├── .gitattributes ├── .github └── workflows │ └── test.yml ├── .gitignore ├── Cargo.toml ├── LICENSE ├── README.md ├── examples ├── Cargo.toml └── src │ └── bin │ ├── add_image.rs │ ├── extract_page.rs │ └── form.rs ├── files ├── encrypted_aes_128.pdf ├── encrypted_aes_256.pdf ├── encrypted_aes_256_hardened.pdf ├── encrypted_rc4_rev2.pdf ├── encrypted_rc4_rev3.pdf ├── ep.pdf ├── ep2.pdf ├── example.pdf ├── example_annotation.pdf ├── formxobject.pdf ├── invalid │ ├── crash-121-1.pdf │ ├── crash-121-2.pdf │ ├── crash-121-3.pdf │ ├── crash-121-4.pdf │ ├── crash-122.pdf │ ├── crash-123.pdf │ ├── crash-124.pdf │ ├── crash-assertion-failure.pdf │ └── infinite-loop-103.pdf ├── jpeg.pdf ├── libreoffice.pdf ├── lossless.pdf ├── offset.pdf ├── password_protected │ ├── passwords_aes_128.pdf │ ├── passwords_aes_256.pdf │ ├── passwords_aes_256_hardened.pdf │ ├── passwords_rc4_rev2.pdf │ └── passwords_rc4_rev3.pdf ├── pdf-sample.pdf ├── xelatex-drawboard.pdf └── xelatex.pdf ├── pdf ├── Cargo.toml ├── examples │ ├── content.rs │ ├── metadata.rs │ ├── names.rs │ ├── other_page_content.rs │ └── read.rs ├── fuzz │ ├── .gitignore │ ├── Cargo.toml │ └── fuzz_targets │ │ └── parse.rs ├── src │ ├── any.rs │ ├── backend.rs │ ├── build.rs │ ├── content.rs │ ├── crypt.rs │ ├── data │ │ ├── t01_lzw+base85.txt │ │ └── t01_plain.txt │ ├── enc.rs │ ├── encoding.rs │ ├── error.rs │ ├── file.rs │ ├── font.rs │ ├── lib.rs │ ├── macros.rs │ ├── object │ │ ├── color.rs │ │ ├── function.rs │ │ ├── mod.rs │ │ ├── stream.rs │ │ └── types.rs │ ├── parser │ │ ├── lexer │ │ │ ├── mod.rs │ │ │ └── str.rs │ │ ├── mod.rs │ │ ├── parse_object.rs │ │ └── parse_xref.rs │ ├── path.rs │ ├── primitive.rs │ ├── repair.rs │ └── xref.rs └── tests │ ├── integration.rs │ ├── write.rs │ └── xref.rs └── pdf_derive ├── Cargo.toml └── src └── lib.rs /.gitattributes: -------------------------------------------------------------------------------- 1 | *.pdf binary 2 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | on: push 2 | name: test 3 | jobs: 4 | tests: 5 | runs-on: ubuntu-latest 6 | steps: 7 | - name: Checkout Code 8 | uses: actions/checkout@v4 9 | 10 | - name: Cache cargo registry 11 | uses: actions/cache@v4 12 | with: 13 | path: | 14 | ~/.cargo/registry 15 | ~/.cargo/git 16 | target 17 | key: ${{ runner.os }}-cargo-registry-${{ hashFiles('**/Cargo.lock') }} 18 | 19 | - name: Install toolchain 20 | uses: actions-rs/toolchain@v1 21 | with: 22 | profile: minimal 23 | toolchain: stable 24 | 25 | - name: Run tests 26 | uses: actions-rs/cargo@v1 27 | with: 28 | command: test 29 | args: --workspace 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | **/*.rs.bk 3 | Cargo.lock 4 | **/*.orig 5 | **/perf.data 6 | **/perf.data.old 7 | 8 | !.travis.yml 9 | 10 | /fonts 11 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = [ 3 | "pdf", 4 | "pdf_derive", 5 | "examples", 6 | ] 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright © 2020 The pdf-rs contributers. 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 4 | 5 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 6 | 7 | THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pdf-rs [![test](https://github.com/pdf-rs/pdf/actions/workflows/test.yml/badge.svg)](https://github.com/pdf-rs/pdf/actions/workflows/test.yml) 2 | Read, alter and write PDF files. 3 | 4 | Modifying and writing PDFs is still experimental. 5 | 6 | One easy way you can contribute is to add different PDF files to `tests/files` and see if they pass the tests (`cargo test`). 7 | 8 | Feel free to contribute with ideas, issues or code! Please join [us on Zulip](https://type.zulipchat.com/#narrow/stream/209232-pdf) if you have any questions or problems. 9 | 10 | # Workspace 11 | This repository uses a Cargo Workspace and default members. This means by default only the `pdf` library is build. 12 | To build additional parts, pass `--package=read` to build the subcrate you are interested in (here the `read` example). 13 | 14 | # Examples 15 | Examples are located in `pdf/examples/` and can be executed using: 16 | 17 | ``` 18 | cargo run --example {content,metadata,names,read,text} -- 19 | ``` 20 | 21 | # Renderer and Viewer 22 | A library for rendering PDFs via [Pathfinder](https://github.com/servo/pathfinder) and minimal viewer can be found [here](https://github.com/pdf-rs/pdf_render). 23 | 24 | # Inspect 25 | There is a tool for visualizing a PDF file as an interactive hierarchy of primitives at [inspect-prim](https://github.com/pdf-rs/inspect-prim). Just clone and `cargo run`. 26 | -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pdf-examples" 3 | version = "0.1.0" 4 | edition = "2021" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | 8 | [dependencies] 9 | pdf = { path = "../pdf" } 10 | datasize = "0.2.13" 11 | clap = { version = "*", features = ["derive"] } 12 | image = "*" 13 | 14 | [[bin]] 15 | name = "extract_page" 16 | -------------------------------------------------------------------------------- /examples/src/bin/add_image.rs: -------------------------------------------------------------------------------- 1 | use std::{path::PathBuf, error::Error}; 2 | 3 | use pdf::{ 4 | file::FileOptions, 5 | object::*, 6 | primitive::Name, enc::{StreamFilter, DCTDecodeParams}, content::{Op, Matrix, Content}, 7 | }; 8 | 9 | use clap::Parser; 10 | use std::io::Cursor; 11 | use image::io::Reader as ImageReader; 12 | 13 | #[derive(Parser, Debug)] 14 | #[command(author, version, about, long_about = None)] 15 | struct Args { 16 | /// Input PDF file 17 | #[arg(short, long)] 18 | input: PathBuf, 19 | 20 | /// Input image file 21 | #[arg(long)] 22 | image: PathBuf, 23 | 24 | /// Page number to add the image to 25 | #[arg(short, long, default_value_t = 0)] 26 | page: u32, 27 | 28 | /// Output file 29 | #[arg(short, long)] 30 | output: PathBuf, 31 | } 32 | 33 | struct Point { 34 | x: f32, 35 | y: f32 36 | } 37 | struct Align { 38 | page_rel: f32, 39 | page_abs: f32, 40 | img_rel: f32, 41 | } 42 | 43 | fn main() -> Result<(), Box> { 44 | let args = Args::parse(); 45 | 46 | let img_data = std::fs::read(&args.image)?; 47 | let img = ImageReader::with_format(Cursor::new(&img_data), image::ImageFormat::Jpeg).decode()?; 48 | let image_dict = ImageDict { 49 | width: img.width(), 50 | height: img.height(), 51 | color_space: Some(ColorSpace::DeviceRGB), 52 | bits_per_component: Some(8), 53 | .. Default::default() 54 | }; 55 | let image = Stream::new_with_filters(image_dict, img_data, vec![StreamFilter::DCTDecode(DCTDecodeParams { color_transform: None})]); 56 | 57 | let mut file = FileOptions::cached().open(&args.input).unwrap(); 58 | let page = file.get_page(args.page).expect("no such page"); 59 | 60 | let resources = page.resources()?; 61 | let mut resources2: Resources = (**resources).clone(); 62 | 63 | let image_obj = XObject::Image(ImageXObject { inner: image }); 64 | let image_ref = file.create(image_obj)?; 65 | 66 | // assume that name did not exist 67 | let image_name = Name::from("MyImage"); 68 | resources2.xobjects.insert(image_name.clone(), image_ref.get_ref()); 69 | 70 | 71 | let mut ops = page.contents.as_ref().unwrap().operations(&file.resolver())?; 72 | 73 | let mm = 72.0 / 25.4; // one millimeter 74 | // bottom right corner of the page, but 5mm margin 75 | let h_align = Align { 76 | img_rel: -1.0, // move left by image width 77 | page_rel: 1.0, // move right by page width 78 | page_abs: -5.0 * mm, // 5,mm from the right edge 79 | }; 80 | let v_align = Align { 81 | img_rel: 0.0, 82 | page_rel: 0.0, 83 | page_abs: 5.0 * mm 84 | }; 85 | let dpi = 300.; 86 | 87 | let px_scale = 72. / dpi; 88 | let media_box = page.media_box.unwrap(); 89 | let scale = Point { x: img.width() as f32 * px_scale , y: img.height() as f32 * px_scale }; 90 | let skew = Point { x: 0.0, y: 0.0 }; 91 | let page_size = Point { 92 | x: media_box.right - media_box.left, 93 | y: media_box.top - media_box.bottom 94 | }; 95 | let page_origin = Point { 96 | x: media_box.left, 97 | y: media_box.bottom 98 | }; 99 | 100 | let position = Point { 101 | x: page_origin.x + h_align.page_abs + h_align.img_rel * scale.x + h_align.page_rel * page_size.x, 102 | y: page_origin.y + v_align.page_abs + v_align.img_rel * scale.y + v_align.page_rel * page_size.y 103 | }; 104 | 105 | ops.append(&mut vec![ 106 | Op::Save, // ADD IMAGE START 107 | Op::Transform { matrix: Matrix{ // IMAGE MANIPULATION 108 | a: scale.x, d: scale.y, 109 | b: skew.x, c: skew.y, 110 | e: position.x, f: position.y, 111 | } }, 112 | Op::XObject {name: image_name}, // IMAGE 113 | Op::Restore, // ADD IMAGE STOP 114 | ]); 115 | 116 | let mut page2: Page = (*page).clone(); 117 | page2.contents = Some(Content::from_ops(ops)); 118 | page2.resources = Some(file.create(resources2)?.into()); 119 | 120 | file.update(page.get_ref().get_inner(), page2)?; 121 | 122 | file.save_to(&args.output)?; 123 | 124 | Ok(()) 125 | } 126 | -------------------------------------------------------------------------------- /examples/src/bin/extract_page.rs: -------------------------------------------------------------------------------- 1 | use std::path::PathBuf; 2 | 3 | use pdf::{ 4 | error::PdfError, 5 | file::FileOptions, 6 | object::*, 7 | build::*, 8 | primitive::{PdfString, Name}, content::{Op, Color, Cmyk, Matrix}, font::{Font, TFont, FontData}, 9 | }; 10 | 11 | use clap::Parser; 12 | 13 | #[derive(Parser, Debug)] 14 | #[command(author, version, about, long_about = None)] 15 | struct Args { 16 | /// Input file 17 | #[arg(short, long)] 18 | input: PathBuf, 19 | 20 | /// Page number 21 | #[arg(short, long, default_value_t = 0)] 22 | page: u32, 23 | 24 | /// Output file 25 | #[arg(short, long)] 26 | output: PathBuf, 27 | } 28 | 29 | fn main() -> Result<(), PdfError> { 30 | let args = Args::parse(); 31 | 32 | let old_file = FileOptions::cached().open(&args.input).unwrap(); 33 | let old_page = old_file.get_page(args.page).expect("no such page"); 34 | 35 | let mut builder = PdfBuilder::new(FileOptions::cached()); 36 | 37 | let mut importer = Importer::new(old_file.resolver(), &mut builder.storage); 38 | let mut pages = Vec::new(); 39 | 40 | let mut new_page = PageBuilder::clone_page(&old_page, &mut importer)?; 41 | importer.finish().verify(&builder.storage.resolver())?; 42 | 43 | let font = Font { 44 | data: FontData::TrueType(TFont{ 45 | base_font: Some(Name::from("Helvetica")), 46 | first_char: None, 47 | font_descriptor: None, 48 | last_char: None, 49 | widths: None, 50 | }), 51 | encoding: Some(pdf::encoding::Encoding::standard()), 52 | name: None, 53 | subtype: pdf::font::FontType::TrueType, 54 | to_unicode: None, 55 | _other: Default::default() 56 | }; 57 | let font_name = Name::from("F42"); 58 | new_page.resources.fonts.insert(font_name.clone(), builder.storage.create(font)?.into()); 59 | 60 | new_page.ops.push(Op::BeginText); 61 | let label = format!("{} page {}", args.input.file_name().unwrap().to_string_lossy(), args.page).into_bytes(); 62 | let mut text_ops = vec![ 63 | Op::FillColor { color: Color::Cmyk(Cmyk { cyan: 0.0, magenta: 0.0, key: 1.0, yellow: 0.0})}, 64 | Op::BeginText, 65 | Op::SetTextMatrix { matrix: Matrix { a: 1.0, b: 0.0, c: 0.0, d: 1., e: 10., f: 10. }}, 66 | Op::TextFont { name: font_name.clone(), size: 20. }, 67 | Op::TextDraw { text: PdfString::new(label.into()) }, 68 | Op::EndText 69 | ]; 70 | new_page.ops.append(&mut text_ops); 71 | 72 | pages.push(new_page); 73 | 74 | let catalog = CatalogBuilder::from_pages(pages); 75 | 76 | let mut info = InfoDict::default(); 77 | info.title = Some(PdfString::from("test")); 78 | 79 | let data = builder.info(info).build(catalog)?; 80 | 81 | std::fs::write(&args.output, data)?; 82 | 83 | Ok(()) 84 | } 85 | -------------------------------------------------------------------------------- /examples/src/bin/form.rs: -------------------------------------------------------------------------------- 1 | extern crate pdf; 2 | 3 | use std::collections::HashMap; 4 | use std::env::args; 5 | 6 | use pdf::content::{FormXObject, Op, serialize_ops}; 7 | use pdf::error::PdfError; 8 | use pdf::file::{FileOptions, Log}; 9 | use pdf::font::{Font, FontData, TFont}; 10 | use pdf::object::*; 11 | use pdf::primitive::{PdfString, Primitive, Name}; 12 | 13 | fn run() -> Result<(), PdfError> { 14 | let path = args().nth(1).expect("no file given"); 15 | println!("read: {}", path); 16 | 17 | let mut file = FileOptions::cached().open(&path)?; 18 | let mut to_update_field: Option<_> = None; 19 | 20 | 21 | let font = Font { 22 | data: FontData::TrueType(TFont{ 23 | base_font: Some(Name::from("Helvetica")), 24 | first_char: None, 25 | font_descriptor: None, 26 | last_char: None, 27 | widths: None, 28 | }), 29 | encoding: Some(pdf::encoding::Encoding::standard()), 30 | name: None, 31 | subtype: pdf::font::FontType::TrueType, 32 | to_unicode: None, 33 | _other: Default::default() 34 | }; 35 | let font_name = Name::from("Helvetica"); 36 | let font = file.create(font)?; 37 | let mut fonts = HashMap::new(); 38 | fonts.insert("Helvetica".into(), font.into()); 39 | let resources = Resources { 40 | fonts, 41 | .. Default::default() 42 | }; 43 | let resources = file.create(resources)?; 44 | 45 | let page0 = file.get_page(0).unwrap(); 46 | let annots = page0.annotations.load(&file.resolver()).expect("can't load annotations"); 47 | for annot in &*annots { 48 | if let Some(ref a) = annot.appearance_streams { 49 | let normal = file.resolver().get(a.normal); 50 | if let Ok(normal) = normal { 51 | match *normal { 52 | AppearanceStreamEntry::Single(ref s) => { 53 | //dbg!(&s.stream.resources); 54 | 55 | let form_dict = FormDict { 56 | resources: Some(resources.clone().into()), 57 | .. (**s.stream).clone() 58 | }; 59 | 60 | let ops = vec![ 61 | Op::Save, 62 | Op::TextFont { name: font_name.clone(), size: 14.0 }, 63 | Op::TextDraw { text: PdfString::from("Hello World!") }, 64 | Op::EndText, 65 | Op::Restore 66 | ]; 67 | let stream = Stream::new(form_dict, serialize_ops(&ops)?); 68 | 69 | let normal2 = AppearanceStreamEntry::Single(FormXObject { stream }); 70 | 71 | file.update(a.normal.get_inner(), normal2)?; 72 | } 73 | _ => {} 74 | } 75 | } 76 | } 77 | } 78 | 79 | if let Some(ref forms) = file.get_root().forms { 80 | println!("Forms:"); 81 | for field in forms.fields.iter().take(1) { 82 | print!(" {:?} = ", field.name); 83 | match field.value { 84 | Primitive::String(ref s) => println!("{}", s.to_string_lossy()), 85 | Primitive::Integer(i) => println!("{}", i), 86 | Primitive::Name(ref s) => println!("{}", s), 87 | ref p => println!("{:?}", p), 88 | } 89 | 90 | if to_update_field.is_none() { 91 | to_update_field = Some(field.clone()); 92 | } 93 | } 94 | } 95 | 96 | if let Some(to_update_field) = to_update_field { 97 | println!("\nUpdating field:"); 98 | println!("{:?}\n", to_update_field); 99 | 100 | let text = "Hello World!"; 101 | let new_value: PdfString = PdfString::new(text.into()); 102 | let mut updated_field = (*to_update_field).clone(); 103 | updated_field.value = Primitive::String(new_value); 104 | 105 | //dbg!(&updated_field); 106 | 107 | let reference = file.update( 108 | to_update_field.get_ref().get_inner(), 109 | updated_field, 110 | )?; 111 | 112 | file.save_to("output/out.pdf")?; 113 | 114 | println!("\nUpdated field:"); 115 | //println!("{:?}\n", reference); 116 | } 117 | 118 | Ok(()) 119 | } 120 | 121 | fn main() { 122 | if let Err(e) = run() { 123 | println!("{e}"); 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /files/encrypted_aes_128.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/encrypted_aes_128.pdf -------------------------------------------------------------------------------- /files/encrypted_aes_256.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/encrypted_aes_256.pdf -------------------------------------------------------------------------------- /files/encrypted_aes_256_hardened.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/encrypted_aes_256_hardened.pdf -------------------------------------------------------------------------------- /files/encrypted_rc4_rev2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/encrypted_rc4_rev2.pdf -------------------------------------------------------------------------------- /files/encrypted_rc4_rev3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/encrypted_rc4_rev3.pdf -------------------------------------------------------------------------------- /files/ep.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/ep.pdf -------------------------------------------------------------------------------- /files/ep2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/ep2.pdf -------------------------------------------------------------------------------- /files/example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/example.pdf -------------------------------------------------------------------------------- /files/example_annotation.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/example_annotation.pdf -------------------------------------------------------------------------------- /files/formxobject.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/formxobject.pdf -------------------------------------------------------------------------------- /files/invalid/crash-121-1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-121-1.pdf -------------------------------------------------------------------------------- /files/invalid/crash-121-2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-121-2.pdf -------------------------------------------------------------------------------- /files/invalid/crash-121-3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-121-3.pdf -------------------------------------------------------------------------------- /files/invalid/crash-121-4.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-121-4.pdf -------------------------------------------------------------------------------- /files/invalid/crash-122.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-122.pdf -------------------------------------------------------------------------------- /files/invalid/crash-123.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-123.pdf -------------------------------------------------------------------------------- /files/invalid/crash-124.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-124.pdf -------------------------------------------------------------------------------- /files/invalid/crash-assertion-failure.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/invalid/crash-assertion-failure.pdf -------------------------------------------------------------------------------- /files/invalid/infinite-loop-103.pdf: -------------------------------------------------------------------------------- 1 | startxref%PDF- -------------------------------------------------------------------------------- /files/jpeg.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/jpeg.pdf -------------------------------------------------------------------------------- /files/libreoffice.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/libreoffice.pdf -------------------------------------------------------------------------------- /files/lossless.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/lossless.pdf -------------------------------------------------------------------------------- /files/offset.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/offset.pdf -------------------------------------------------------------------------------- /files/password_protected/passwords_aes_128.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/password_protected/passwords_aes_128.pdf -------------------------------------------------------------------------------- /files/password_protected/passwords_aes_256.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/password_protected/passwords_aes_256.pdf -------------------------------------------------------------------------------- /files/password_protected/passwords_aes_256_hardened.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/password_protected/passwords_aes_256_hardened.pdf -------------------------------------------------------------------------------- /files/password_protected/passwords_rc4_rev2.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/password_protected/passwords_rc4_rev2.pdf -------------------------------------------------------------------------------- /files/password_protected/passwords_rc4_rev3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/password_protected/passwords_rc4_rev3.pdf -------------------------------------------------------------------------------- /files/pdf-sample.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/pdf-sample.pdf -------------------------------------------------------------------------------- /files/xelatex-drawboard.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/xelatex-drawboard.pdf -------------------------------------------------------------------------------- /files/xelatex.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/files/xelatex.pdf -------------------------------------------------------------------------------- /pdf/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pdf" 3 | version = "0.9.0" 4 | authors = ["Erlend Langseth <3rlendhl@gmail.com>", "Sebastian Köln "] 5 | repository = "https://github.com/pdf-rs/pdf" 6 | readme = "../README.md" 7 | keywords = ["pdf"] 8 | license = "MIT" 9 | documentation = "https://docs.rs/pdf" 10 | edition = "2018" 11 | description = "PDF reader" 12 | 13 | [features] 14 | mmap = ["memmap2"] 15 | dump = ["tempfile"] 16 | threads = ["jpeg-decoder/default"] 17 | sync = [] 18 | cache = ["globalcache"] 19 | default = ["sync", "cache"] 20 | 21 | [dependencies] 22 | pdf_derive = { version = "0.2.0", path = "../pdf_derive" } 23 | snafu = "0.8.3" 24 | libflate = "2.0.0" 25 | deflate = "1.0.0" 26 | itertools = "0.13.0" 27 | memmap2 = { version = "0.9.4", optional = true } 28 | weezl = "0.1.4" 29 | once_cell = "1.5.2" 30 | log = "0.4.14" 31 | tempfile = { version = "3.2.0", optional = true } 32 | md5 = "0.7" 33 | jpeg-decoder = { version = "0.3.0", default-features = false } 34 | aes = "0.8.2" 35 | cbc = "0.1" 36 | stringprep = "0.1.2" 37 | sha2 = "0.10.2" 38 | fax = "0.2.0" 39 | euclid = { version = "0.22.7", optional = true } 40 | bitflags = "2.5" 41 | istring = { version = "0.3.3", features = ["std", "size"] } 42 | datasize = "0.2.13" 43 | globalcache = { version = "0.2.3", features = ["sync"], optional = true } 44 | indexmap = "2.1.0" 45 | 46 | [dev-dependencies] 47 | glob = "0.3.0" 48 | 49 | [lib] 50 | doctest = false 51 | 52 | [[example]] 53 | name = "content" 54 | 55 | [[example]] 56 | name = "metadata" 57 | 58 | [[example]] 59 | name = "names" 60 | 61 | [[example]] 62 | name = "read" 63 | 64 | [[example]] 65 | name = "other_page_content" 66 | -------------------------------------------------------------------------------- /pdf/examples/content.rs: -------------------------------------------------------------------------------- 1 | use std::env; 2 | use std::path::PathBuf; 3 | 4 | 5 | use pdf::error::PdfError; 6 | use pdf::content::*; 7 | use pdf::file::FileOptions; 8 | 9 | 10 | 11 | 12 | use pdf::object::*; 13 | use pdf::build::*; 14 | 15 | use pdf::primitive::PdfString; 16 | 17 | #[cfg(feature="cache")] 18 | fn main() -> Result<(), PdfError> { 19 | let path = PathBuf::from(env::args_os().nth(1).expect("no file given")); 20 | 21 | let mut builder = PdfBuilder::new(FileOptions::cached()); 22 | 23 | let mut pages = Vec::new(); 24 | 25 | let content = Content::from_ops(vec![ 26 | Op::MoveTo { p: Point { x: 100., y: 100. } }, 27 | Op::LineTo { p: Point { x: 100., y: 200. } }, 28 | Op::LineTo { p: Point { x: 200., y: 200. } }, 29 | Op::LineTo { p: Point { x: 200., y: 100. } }, 30 | Op::Close, 31 | Op::Stroke, 32 | ]); 33 | let mut new_page = PageBuilder::from_content(content, &NoResolve)?; 34 | new_page.media_box = Some(pdf::object::Rectangle { 35 | left: 0.0, 36 | top: 0.0, 37 | bottom: 400.0, 38 | right: 400.0 39 | }); 40 | let resources = Resources::default(); 41 | 42 | /* 43 | let font = Font { 44 | name: Some("Test".into()), 45 | subtype: pdf::font::FontType::TrueType, 46 | data: FontData::TrueType(TFont { 47 | base_font: None, 48 | 49 | }) 50 | } 51 | resources.fonts.insert("f1", font); 52 | */ 53 | 54 | new_page.resources = resources; 55 | pages.push(new_page); 56 | 57 | let catalog = CatalogBuilder::from_pages(pages); 58 | 59 | let mut info = InfoDict::default(); 60 | info.title = Some(PdfString::from("test")); 61 | 62 | let data = builder.info(info).build(catalog)?; 63 | 64 | std::fs::write(path, data)?; 65 | 66 | Ok(()) 67 | } 68 | -------------------------------------------------------------------------------- /pdf/examples/metadata.rs: -------------------------------------------------------------------------------- 1 | use std::env::args; 2 | 3 | use pdf::error::PdfError; 4 | use pdf::file::{FileOptions}; 5 | use pdf::object::{FieldDictionary, FieldType, Resolve}; 6 | 7 | /// extract and print a PDF's metadata 8 | #[cfg(feature="cache")] 9 | fn main() -> Result<(), PdfError> { 10 | let path = args() 11 | .nth(1) 12 | .expect("Please provide a file path to the PDF you want to explore."); 13 | 14 | let file = FileOptions::cached().open(&path).unwrap(); 15 | dbg!(file.version()); 16 | let resolver = file.resolver(); 17 | 18 | if let Some(ref info) = file.trailer.info_dict { 19 | dbg!(info); 20 | } 21 | 22 | let catalog = file.get_root(); 23 | dbg!(&catalog.version); 24 | 25 | if let Some(ref forms) = catalog.forms { 26 | for field in forms.fields.iter() { 27 | print_field(field, &resolver); 28 | } 29 | } 30 | 31 | Ok(()) 32 | } 33 | 34 | fn print_field(field: &FieldDictionary, resolve: &impl Resolve) { 35 | if field.typ == Some(FieldType::Signature) { 36 | println!("{:?}", field); 37 | } 38 | for &kid in field.kids.iter() { 39 | let child = resolve.get(kid).unwrap(); 40 | print_field(&child, resolve); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /pdf/examples/names.rs: -------------------------------------------------------------------------------- 1 | extern crate pdf; 2 | 3 | use std::env::args; 4 | use std::fmt; 5 | use std::collections::HashMap; 6 | use pdf::file::{FileOptions}; 7 | use pdf::object::*; 8 | use pdf::primitive::{Primitive, PdfString}; 9 | 10 | struct Indent(usize); 11 | impl fmt::Display for Indent { 12 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 13 | for _ in 0 .. self.0 { 14 | write!(f, " ")?; 15 | } 16 | Ok(()) 17 | } 18 | } 19 | 20 | fn walk_outline(r: &impl Resolve, mut node: RcRef, name_map: &impl Fn(&str) -> usize, page_map: &impl Fn(PlainRef) -> usize, depth: usize) { 21 | let indent = Indent(depth); 22 | loop { 23 | if let Some(ref title) = node.title { 24 | println!("{}title: {:?}", indent, title.to_string_lossy()); 25 | } 26 | if let Some(ref dest) = node.dest { 27 | match dest { 28 | Primitive::String(ref s) => { 29 | let name = s.to_string_lossy(); 30 | let page_nr = name_map(&name); 31 | println!("{}dest: {:?} -> page nr. {:?}", indent, name, page_nr); 32 | } 33 | Primitive::Array(ref a) => match a[0] { 34 | Primitive::Reference(r) => { 35 | let page_nr = page_map(r); 36 | println!("{}dest: {:?} -> page nr. {:?}", indent, a, page_nr); 37 | } 38 | _ => unimplemented!("invalid reference in array"), 39 | } 40 | _ => unimplemented!("invalid dest"), 41 | } 42 | } 43 | if let Some(Action::Goto(MaybeNamedDest::Direct(Dest { page: Some(page), ..}))) = node.action { 44 | let page_nr = page_map(page.get_inner()); 45 | println!("{}action -> page nr. {:?}", indent, page_nr); 46 | } 47 | if let Some(ref a) = node.se { 48 | println!("{} -> {:?}", indent, a); 49 | } 50 | if let Some(entry_ref) = node.first { 51 | let entry = r.get(entry_ref).unwrap(); 52 | walk_outline(r, entry, name_map, page_map, depth + 1); 53 | } 54 | if let Some(entry_ref) = node.next { 55 | node = r.get(entry_ref).unwrap(); 56 | continue; 57 | } 58 | 59 | break; 60 | } 61 | } 62 | 63 | #[cfg(feature="cache")] 64 | fn main() { 65 | let path = args().nth(1).expect("no file given"); 66 | println!("read: {}", path); 67 | 68 | let file = FileOptions::cached().open(&path).unwrap(); 69 | let resolver = file.resolver(); 70 | let catalog = file.get_root(); 71 | 72 | let mut pages_map: HashMap = HashMap::new(); 73 | 74 | let mut count = 0; 75 | let mut dests_cb = |key: &PdfString, val: &Option| { 76 | //println!("{:?} {:?}", key, val); 77 | if let Some(Dest { page: Some(page), ..}) = val { 78 | pages_map.insert(key.to_string_lossy(), page.get_inner()); 79 | } 80 | 81 | count += 1; 82 | }; 83 | 84 | if let Some(ref names) = catalog.names { 85 | if let Some(ref dests) = names.dests { 86 | dests.walk(&resolver, &mut dests_cb).unwrap(); 87 | } 88 | } 89 | 90 | let mut pages = HashMap::new(); 91 | fn add_tree(r: &impl Resolve, pages: &mut HashMap, tree: &PageTree, current_page: &mut usize) { 92 | for &node_ref in &tree.kids { 93 | let node = r.get(node_ref).unwrap(); 94 | match *node { 95 | PagesNode::Tree(ref tree) => { 96 | add_tree(r, pages, tree, current_page); 97 | } 98 | PagesNode::Leaf(ref _page) => { 99 | pages.insert(node_ref.get_inner(), *current_page); 100 | *current_page += 1; 101 | } 102 | } 103 | } 104 | } 105 | add_tree(&resolver, &mut pages, &catalog.pages, &mut 0); 106 | 107 | let get_page_nr = |name: &str| -> usize { 108 | let page = pages_map[name]; 109 | pages[&page] 110 | }; 111 | let page_nr = |r: PlainRef| -> usize { 112 | pages[&r] 113 | }; 114 | 115 | if let Some(ref outlines) = catalog.outlines { 116 | if let Some(entry_ref) = outlines.first { 117 | let entry = resolver.get(entry_ref).unwrap(); 118 | walk_outline(&resolver, entry, &get_page_nr, &page_nr, 0); 119 | } 120 | } 121 | 122 | println!("{} items", count); 123 | 124 | if let Some(ref labels) = catalog.page_labels { 125 | labels.walk(&resolver, &mut |page: i32, label| { 126 | println!("{page} -> {:?}", label); 127 | }); 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /pdf/examples/other_page_content.rs: -------------------------------------------------------------------------------- 1 | use pdf::content::ViewRect; 2 | use pdf::error::PdfError; 3 | use pdf::file::FileOptions; 4 | use pdf::object::Resolve; 5 | use pdf::primitive::{Dictionary, Primitive}; 6 | use std::env::args; 7 | 8 | /// Extract data from a page entry that is under "other". 9 | /// This example looks for stikethroughs in the annotations entry 10 | /// and returns a Vec for the bounds of the struckthrough text. 11 | #[cfg(feature="cache")] 12 | fn main() -> Result<(), PdfError> { 13 | let path = args() 14 | .nth(1) 15 | .expect("Please provide a file path to the PDF you want to explore."); 16 | 17 | let file = FileOptions::cached().open(&path).unwrap(); 18 | let resolver = file.resolver(); 19 | 20 | for (i, page) in file.pages().enumerate() { 21 | let page = page.unwrap(); 22 | let strikethroughs = annotation_strikethrough(&page.other, &resolver)?; 23 | println!( 24 | "Found {} strikethrough annotations on page {}.", 25 | strikethroughs.len(), 26 | i + 1 27 | ); 28 | for strikethrough in strikethroughs { 29 | println!(); 30 | println!("Struck text:"); 31 | println!("{:#?}", strikethrough.0); 32 | println!(); 33 | println!("Text spans {} lines", strikethrough.1.len()); 34 | println!(); 35 | println!("Strikethrough bounding boxes:"); 36 | for rect in strikethrough.1 { 37 | println!("{:#?}", rect); 38 | println!(); 39 | } 40 | println!(); 41 | println!(); 42 | } 43 | } 44 | 45 | Ok(()) 46 | } 47 | 48 | fn annotation_strikethrough( 49 | other_dict: &Dictionary, 50 | resolver: &impl Resolve, 51 | ) -> Result)>, PdfError> { 52 | let mut strikethroughs: Vec<(String, Vec)> = Vec::new(); 53 | 54 | if !other_dict.is_empty() { 55 | let annotations = other_dict.get("Annots".into()); 56 | if let Some(annotations) = annotations { 57 | let annotations_resolved = annotations.clone().resolve(resolver)?; 58 | let annotations_array = annotations_resolved.into_array()?; 59 | for annotation in annotations_array.iter() { 60 | let mut paths: Vec = Vec::new(); 61 | let annotation_resolved = annotation.clone().resolve(resolver)?; 62 | let annotation_dict = annotation_resolved.into_dictionary()?; 63 | 64 | // If you have multiline strikethrough "Rect" will be the bounding 65 | // box around all the strikethrough lines. 66 | // "QuadPoints" gives 8 points for each line that is struckthrough, 67 | // so if a single annotation involves text on two lines, QuadPoints 68 | // should have 16 values in it. It starts with bottom left and 69 | // runs counter-clockwise. 70 | let subtype = annotation_dict.get("Subtype".into()); 71 | if let Some(subtype) = subtype { 72 | let subtype = subtype.clone().into_name()?; 73 | if subtype.as_str() == "StrikeOut" { 74 | let rects = annotation_dict.get("QuadPoints".into()); 75 | let text = annotation_dict.get("Contents".into()); 76 | if let (Some(rects), Some(text)) = (rects, text) { 77 | let text = text.to_string()?; 78 | 79 | // Check multiples of 8. 80 | let rects_array = rects.clone().into_array()?; 81 | if rects_array.len() % 8 == 0 { 82 | let rects: Vec> = 83 | rects_array.chunks(8).map(|chunk| chunk.to_vec()).collect(); 84 | 85 | for rect in rects { 86 | let mut quad_points: Vec = Vec::new(); 87 | for num in rect { 88 | let number = num.as_number()?; 89 | quad_points.push(number); 90 | } 91 | if quad_points.len() == 8 { 92 | paths.push(ViewRect { 93 | x: quad_points[0], 94 | y: quad_points[1], 95 | width: quad_points[2] - quad_points[0], 96 | height: quad_points[7] - quad_points[1], 97 | }); 98 | } 99 | } 100 | strikethroughs.push((text, paths)) 101 | } 102 | } 103 | } 104 | } 105 | } 106 | } 107 | } 108 | 109 | Ok(strikethroughs) 110 | } 111 | -------------------------------------------------------------------------------- /pdf/examples/read.rs: -------------------------------------------------------------------------------- 1 | extern crate pdf; 2 | 3 | use std::collections::HashMap; 4 | use std::env::args; 5 | use std::fs; 6 | use std::time::SystemTime; 7 | 8 | use pdf::enc::StreamFilter; 9 | use pdf::error::PdfError; 10 | use pdf::file::{FileOptions, Log}; 11 | use pdf::object::*; 12 | use pdf::primitive::Primitive; 13 | 14 | struct VerboseLog; 15 | impl Log for VerboseLog { 16 | fn load_object(&self, r: PlainRef) { 17 | println!("load {r:?}"); 18 | } 19 | fn log_get(&self, r: PlainRef) { 20 | println!("get {r:?}"); 21 | } 22 | } 23 | 24 | #[cfg(feature = "cache")] 25 | fn main() -> Result<(), PdfError> { 26 | let path = args().nth(1).expect("no file given"); 27 | println!("read: {}", path); 28 | let now = SystemTime::now(); 29 | 30 | let file = FileOptions::cached().log(VerboseLog).open(&path).unwrap(); 31 | let resolver = file.resolver(); 32 | 33 | if let Some(ref info) = file.trailer.info_dict { 34 | let title = info.title.as_ref().map(|p| p.to_string_lossy()); 35 | let author = info.author.as_ref().map(|p| p.to_string_lossy()); 36 | 37 | let descr = match (title, author) { 38 | (Some(title), None) => title, 39 | (None, Some(author)) => format!("[no title] – {}", author), 40 | (Some(title), Some(author)) => format!("{} – {}", title, author), 41 | _ => "PDF".into(), 42 | }; 43 | println!("{}", descr); 44 | } 45 | 46 | let mut images: Vec<_> = vec![]; 47 | let mut fonts = HashMap::new(); 48 | 49 | for page in file.pages() { 50 | let page = page.unwrap(); 51 | let resources = page.resources().unwrap(); 52 | for (i, font) in resources.fonts.values().enumerate() { 53 | let font = font.load(&resolver)?; 54 | let name = match &font.name { 55 | Some(name) => name.as_str().into(), 56 | None => i.to_string(), 57 | }; 58 | fonts.insert(name, font.clone()); 59 | } 60 | images.extend( 61 | resources 62 | .xobjects 63 | .iter() 64 | .map(|(_name, &r)| resolver.get(r).unwrap()) 65 | .filter(|o| matches!(**o, XObject::Image(_))), 66 | ); 67 | } 68 | 69 | for (i, o) in images.iter().enumerate() { 70 | let img = match **o { 71 | XObject::Image(ref im) => im, 72 | _ => continue, 73 | }; 74 | let (mut data, filter) = img.raw_image_data(&resolver)?; 75 | let ext = match filter { 76 | Some(StreamFilter::DCTDecode(_)) => "jpeg", 77 | Some(StreamFilter::JBIG2Decode(_)) => "jbig2", 78 | Some(StreamFilter::JPXDecode) => "jp2k", 79 | Some(StreamFilter::FlateDecode(_)) => "png", 80 | Some(StreamFilter::CCITTFaxDecode(_)) => { 81 | data = fax::tiff::wrap(&data, img.width, img.height).into(); 82 | "tiff" 83 | } 84 | _ => continue, 85 | }; 86 | 87 | let fname = format!("extracted_image_{}.{}", i, ext); 88 | 89 | fs::write(fname.as_str(), data).unwrap(); 90 | println!("Wrote file {}", fname); 91 | } 92 | println!("Found {} image(s).", images.len()); 93 | 94 | for (name, font) in fonts.iter() { 95 | let fname = format!("font_{}", name); 96 | if let Some(Ok(data)) = font.embedded_data(&resolver) { 97 | fs::write(fname.as_str(), data).unwrap(); 98 | println!("Wrote file {}", fname); 99 | } 100 | } 101 | println!("Found {} font(s).", fonts.len()); 102 | 103 | if let Some(ref forms) = file.get_root().forms { 104 | println!("Forms:"); 105 | for field in forms.fields.iter() { 106 | print!(" {:?} = ", field.name); 107 | match field.value { 108 | Primitive::String(ref s) => println!("{}", s.to_string_lossy()), 109 | Primitive::Integer(i) => println!("{}", i), 110 | Primitive::Name(ref s) => println!("{}", s), 111 | ref p => println!("{:?}", p), 112 | } 113 | } 114 | } 115 | 116 | if let Ok(elapsed) = now.elapsed() { 117 | println!( 118 | "Time: {}s", 119 | elapsed.as_secs() as f64 + elapsed.subsec_nanos() as f64 * 1e-9 120 | ); 121 | } 122 | Ok(()) 123 | } 124 | -------------------------------------------------------------------------------- /pdf/fuzz/.gitignore: -------------------------------------------------------------------------------- 1 | 2 | target 3 | corpus 4 | artifacts 5 | -------------------------------------------------------------------------------- /pdf/fuzz/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | name = "pdf-fuzz" 4 | version = "0.0.0" 5 | authors = ["Automatically generated"] 6 | publish = false 7 | edition = "2018" 8 | 9 | [package.metadata] 10 | cargo-fuzz = true 11 | 12 | [dependencies] 13 | libfuzzer-sys = "0.4" 14 | 15 | [dependencies.pdf] 16 | path = ".." 17 | 18 | # Prevent this from interfering with workspaces 19 | [workspace] 20 | members = ["."] 21 | 22 | [[bin]] 23 | name = "parse" 24 | path = "fuzz_targets/parse.rs" 25 | test = false 26 | doc = false 27 | -------------------------------------------------------------------------------- /pdf/fuzz/fuzz_targets/parse.rs: -------------------------------------------------------------------------------- 1 | #![no_main] 2 | use libfuzzer_sys::fuzz_target; 3 | 4 | fn harness(data: &[u8]) { 5 | if let Ok(file) = pdf::file::FileOptions::cached().load(data) { 6 | for idx in 0..file.num_pages() { 7 | let _ = file.get_page(idx); 8 | } 9 | } 10 | } 11 | 12 | fuzz_target!(|data: &[u8]| { 13 | let _ = harness(data); 14 | }); -------------------------------------------------------------------------------- /pdf/src/any.rs: -------------------------------------------------------------------------------- 1 | use std::any::TypeId; 2 | use std::rc::Rc; 3 | use std::sync::Arc; 4 | use datasize::DataSize; 5 | use crate::object::{Object}; 6 | use crate::error::{Result, PdfError}; 7 | 8 | pub trait AnyObject { 9 | fn type_name(&self) -> &'static str; 10 | fn type_id(&self) -> TypeId; 11 | fn size(&self) -> usize; 12 | } 13 | 14 | #[repr(transparent)] 15 | pub struct NoSize(T); 16 | impl AnyObject for NoSize { 17 | fn size(&self) -> usize { 18 | 0 19 | } 20 | fn type_id(&self) -> TypeId { 21 | TypeId::of::() 22 | } 23 | fn type_name(&self) -> &'static str { 24 | std::any::type_name::() 25 | } 26 | } 27 | 28 | #[repr(transparent)] 29 | pub struct WithSize(T); 30 | impl AnyObject for WithSize { 31 | fn size(&self) -> usize { 32 | datasize::data_size(&self.0) 33 | } 34 | fn type_id(&self) -> TypeId { 35 | TypeId::of::() 36 | } 37 | fn type_name(&self) -> &'static str { 38 | std::any::type_name::() 39 | } 40 | } 41 | 42 | #[derive(DataSize)] 43 | pub struct Any(Rc); 44 | 45 | impl Any { 46 | pub fn downcast(self) -> Result> 47 | where T: AnyObject + 'static 48 | { 49 | if TypeId::of::() == self.0.type_id() { 50 | unsafe { 51 | let raw: *const dyn AnyObject = Rc::into_raw(self.0); 52 | Ok(Rc::from_raw(raw as *const T)) 53 | } 54 | } else { 55 | Err(type_mismatch::(self.0.type_name())) 56 | } 57 | } 58 | pub fn new(rc: Rc) -> Any 59 | where WithSize: AnyObject, T: 'static 60 | { 61 | Any(unsafe { 62 | std::mem::transmute::, Rc>>(rc) 63 | } as _) 64 | } 65 | pub fn new_without_size(rc: Rc) -> Any 66 | where NoSize: AnyObject, T: 'static 67 | { 68 | Any(unsafe { 69 | std::mem::transmute::, Rc>>(rc) 70 | } as _) 71 | } 72 | pub fn type_name(&self) -> &'static str { 73 | self.0.type_name() 74 | } 75 | } 76 | 77 | #[derive(Clone, DataSize)] 78 | pub struct AnySync(Arc); 79 | 80 | #[cfg(feature="cache")] 81 | impl globalcache::ValueSize for AnySync { 82 | #[inline] 83 | fn size(&self) -> usize { 84 | self.0.size() 85 | } 86 | } 87 | 88 | impl AnySync { 89 | pub fn downcast(self) -> Result> 90 | where T: 'static 91 | { 92 | if TypeId::of::() == self.0.type_id() { 93 | unsafe { 94 | let raw: *const (dyn AnyObject+Sync+Send) = Arc::into_raw(self.0); 95 | Ok(Arc::from_raw(raw as *const T)) 96 | } 97 | } else { 98 | Err(type_mismatch::(self.0.type_name())) 99 | } 100 | } 101 | pub fn new(arc: Arc) -> AnySync 102 | where WithSize: AnyObject, T: Sync + Send + 'static 103 | { 104 | AnySync(unsafe { 105 | std::mem::transmute::, Arc>>(arc) 106 | } as _) 107 | } 108 | pub fn new_without_size(arc: Arc) -> AnySync 109 | where NoSize: AnyObject, T: Sync + Send + 'static 110 | { 111 | AnySync(unsafe { 112 | std::mem::transmute::, Arc>>(arc) 113 | } as _) 114 | } 115 | pub fn type_name(&self) -> &'static str { 116 | self.0.type_name() 117 | } 118 | } 119 | fn type_mismatch(name: &str) -> PdfError { 120 | PdfError::Other { msg: format!("expected {}, found {}", std::any::type_name::(), name) } 121 | } 122 | -------------------------------------------------------------------------------- /pdf/src/backend.rs: -------------------------------------------------------------------------------- 1 | use crate::error::*; 2 | use crate::parser::Lexer; 3 | use crate::parser::read_xref_and_trailer_at; 4 | use crate::xref::XRefTable; 5 | use crate::primitive::Dictionary; 6 | use crate::object::*; 7 | use std::ops::Deref; 8 | 9 | use std::ops::{ 10 | RangeFull, 11 | RangeFrom, 12 | RangeTo, 13 | Range, 14 | }; 15 | 16 | pub const MAX_ID: u32 = 1_000_000; 17 | 18 | pub trait Backend: Sized { 19 | fn read(&self, range: T) -> Result<&[u8]>; 20 | //fn write(&mut self, range: T) -> Result<&mut [u8]>; 21 | fn len(&self) -> usize; 22 | fn is_empty(&self) -> bool { 23 | self.len() == 0 24 | } 25 | 26 | /// Returns the offset of the beginning of the file, i.e., where the `%PDF-1.5` header is. 27 | /// (currently only used internally!) 28 | fn locate_start_offset(&self) -> Result { 29 | // Read from the beginning of the file, and look for the header. 30 | // Implementation note 13 in version 1.7 of the PDF reference says that Acrobat viewers 31 | // expect the header to be within the first 1KB of the file, so we do the same here. 32 | const HEADER: &[u8] = b"%PDF-"; 33 | let buf = t!(self.read(..std::cmp::min(1024, self.len()))); 34 | buf 35 | .windows(HEADER.len()) 36 | .position(|window| window == HEADER) 37 | .ok_or_else(|| PdfError::Other{ msg: "file header is missing".to_string() }) 38 | } 39 | 40 | /// Returns the value of startxref (currently only used internally!) 41 | fn locate_xref_offset(&self) -> Result { 42 | // locate the xref offset at the end of the file 43 | // `\nPOS\n%%EOF` where POS is the position encoded as base 10 integer. 44 | // u64::MAX has 20 digits + \n\n(2) + %%EOF(5) = 27 bytes max. 45 | 46 | let mut lexer = Lexer::new(t!(self.read(..))); 47 | lexer.set_pos_from_end(0); 48 | t!(lexer.seek_substr_back(b"startxref")); 49 | t!(lexer.next()).to::() 50 | } 51 | 52 | /// Used internally by File, but could also be useful for applications that want to look at the raw PDF objects. 53 | fn read_xref_table_and_trailer(&self, start_offset: usize, resolve: &impl Resolve) -> Result<(XRefTable, Dictionary)> { 54 | let xref_offset = t!(self.locate_xref_offset()); 55 | let pos = t!(start_offset.checked_add(xref_offset).ok_or(PdfError::Invalid)); 56 | if pos >= self.len() { 57 | bail!("XRef offset outside file bounds"); 58 | } 59 | 60 | let mut lexer = Lexer::with_offset(t!(self.read(pos ..)), pos); 61 | 62 | let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve)); 63 | 64 | let highest_id = t!(trailer.get("Size") 65 | .ok_or_else(|| PdfError::MissingEntry {field: "Size".into(), typ: "XRefTable"})? 66 | .as_u32()); 67 | 68 | if highest_id > MAX_ID { 69 | bail!("too many objects"); 70 | } 71 | let mut refs = XRefTable::new(highest_id as ObjNr); 72 | for section in xref_sections { 73 | refs.add_entries_from(section)?; 74 | } 75 | 76 | let mut prev_trailer = { 77 | match trailer.get("Prev") { 78 | Some(p) => Some(t!(p.as_usize())), 79 | None => None 80 | } 81 | }; 82 | trace!("READ XREF AND TABLE"); 83 | let mut seen = vec![]; 84 | while let Some(prev_xref_offset) = prev_trailer { 85 | if seen.contains(&prev_xref_offset) { 86 | bail!("xref offsets loop"); 87 | } 88 | seen.push(prev_xref_offset); 89 | 90 | let pos = t!(start_offset.checked_add(prev_xref_offset).ok_or(PdfError::Invalid)); 91 | let mut lexer = Lexer::with_offset(t!(self.read(pos..)), pos); 92 | let (xref_sections, trailer) = t!(read_xref_and_trailer_at(&mut lexer, resolve)); 93 | 94 | for section in xref_sections { 95 | refs.add_entries_from(section)?; 96 | } 97 | 98 | prev_trailer = { 99 | match trailer.get("Prev") { 100 | Some(p) => { 101 | let prev = t!(p.as_usize()); 102 | Some(prev) 103 | } 104 | None => None 105 | } 106 | }; 107 | } 108 | Ok((refs, trailer)) 109 | } 110 | } 111 | 112 | 113 | impl Backend for T where T: Deref { //+ DerefMut { 114 | fn read(&self, range: R) -> Result<&[u8]> { 115 | let r = t!(range.to_range(self.len())); 116 | Ok(&self[r]) 117 | } 118 | /* 119 | fn write(&mut self, range: R) -> Result<&mut [u8]> { 120 | let r = range.to_range(self.len())?; 121 | Ok(&mut self[r]) 122 | } 123 | */ 124 | fn len(&self) -> usize { 125 | (**self).len() 126 | } 127 | } 128 | 129 | /// `IndexRange` is implemented by Rust's built-in range types, produced 130 | /// by range syntax like `..`, `a..`, `..b` or `c..d`. 131 | pub trait IndexRange 132 | { 133 | /// Start index (inclusive) 134 | fn start(&self) -> Option; 135 | 136 | /// End index (exclusive) 137 | fn end(&self) -> Option; 138 | 139 | /// `len`: the size of whatever container that is being indexed 140 | fn to_range(&self, len: usize) -> Result> { 141 | match (self.start(), self.end()) { 142 | (None, None) => Ok(0 .. len), 143 | (Some(start), None) if start <= len => Ok(start .. len), 144 | (None, Some(end)) if end <= len => Ok(0 .. end), 145 | (Some(start), Some(end)) if start <= end && end <= len => Ok(start .. end), 146 | _ => Err(PdfError::ContentReadPastBoundary) 147 | } 148 | } 149 | } 150 | 151 | 152 | impl IndexRange for RangeFull { 153 | #[inline] 154 | fn start(&self) -> Option { None } 155 | #[inline] 156 | fn end(&self) -> Option { None } 157 | 158 | } 159 | 160 | impl IndexRange for RangeFrom { 161 | #[inline] 162 | fn start(&self) -> Option { Some(self.start) } 163 | #[inline] 164 | fn end(&self) -> Option { None } 165 | } 166 | 167 | impl IndexRange for RangeTo { 168 | #[inline] 169 | fn start(&self) -> Option { None } 170 | #[inline] 171 | fn end(&self) -> Option { Some(self.end) } 172 | } 173 | 174 | impl IndexRange for Range { 175 | #[inline] 176 | fn start(&self) -> Option { Some(self.start) } 177 | #[inline] 178 | fn end(&self) -> Option { Some(self.end) } 179 | } 180 | -------------------------------------------------------------------------------- /pdf/src/build.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use std::collections::HashSet; 3 | use std::ops::Range; 4 | use std::sync::Arc; 5 | 6 | use datasize::DataSize; 7 | 8 | use crate::PdfError; 9 | use crate::any::AnySync; 10 | use crate::enc::StreamFilter; 11 | use crate::file::Cache; 12 | use crate::file::FileOptions; 13 | use crate::file::Log; 14 | use crate::file::Storage; 15 | use crate::file::Trailer; 16 | use crate::object::*; 17 | use crate::content::*; 18 | use crate::error::Result; 19 | use crate::parser::ParseFlags; 20 | use crate::primitive::Dictionary; 21 | use crate::primitive::Primitive; 22 | 23 | #[derive(Default)] 24 | pub struct PageBuilder { 25 | pub ops: Vec, 26 | pub media_box: Option, 27 | pub crop_box: Option, 28 | pub trim_box: Option, 29 | pub resources: Resources, 30 | pub rotate: i32, 31 | pub metadata: Option, 32 | pub lgi: Option, 33 | pub vp: Option, 34 | pub other: Dictionary, 35 | } 36 | impl PageBuilder { 37 | pub fn from_content(content: Content, resolve: &impl Resolve) -> Result { 38 | Ok(PageBuilder { 39 | ops: content.operations(resolve)?, 40 | .. PageBuilder::default() 41 | }) 42 | } 43 | pub fn from_page(page: &Page, resolve: &impl Resolve) -> Result { 44 | Ok(PageBuilder { 45 | ops: page.contents.as_ref().map(|c| c.operations(resolve)).transpose()?.unwrap_or_default(), 46 | media_box: Some(page.media_box()?), 47 | crop_box: Some(page.crop_box()?), 48 | trim_box: page.trim_box, 49 | resources: (**page.resources()?.data()).clone(), 50 | rotate: page.rotate, 51 | metadata: page.metadata.clone(), 52 | lgi: page.lgi.clone(), 53 | vp: page.vp.clone(), 54 | other: page.other.clone(), 55 | }) 56 | } 57 | pub fn clone_page(page: &Page, cloner: &mut impl Cloner) -> Result { 58 | let old_resources = &**page.resources()?.data(); 59 | 60 | let mut resources = Resources::default(); 61 | let ops = page.contents.as_ref() 62 | .map(|content| content.operations(cloner)).transpose()? 63 | .map(|ops| { 64 | ops.into_iter().map(|op| -> Result { 65 | deep_clone_op(&op, cloner, old_resources, &mut resources) 66 | }).collect() 67 | }) 68 | .transpose()? 69 | .unwrap_or_default(); 70 | 71 | Ok(PageBuilder { 72 | ops, 73 | media_box: Some(page.media_box()?), 74 | crop_box: Some(page.crop_box()?), 75 | trim_box: page.trim_box, 76 | resources, 77 | rotate: page.rotate, 78 | metadata: page.metadata.deep_clone(cloner)?, 79 | lgi: page.lgi.deep_clone(cloner)?, 80 | vp: page.vp.deep_clone(cloner)?, 81 | other: page.other.deep_clone(cloner)?, 82 | }) 83 | } 84 | pub fn size(&mut self, width: f32, height: f32) { 85 | self.media_box = Some(Rectangle { 86 | top: 0., 87 | left: 0., 88 | bottom: height, 89 | right: width, 90 | }); 91 | } 92 | } 93 | 94 | pub struct CatalogBuilder { 95 | pages: Vec 96 | } 97 | impl CatalogBuilder { 98 | pub fn from_pages(pages: Vec) -> CatalogBuilder { 99 | CatalogBuilder { 100 | pages 101 | } 102 | } 103 | pub fn build(self, update: &mut impl Updater) -> Result { 104 | let kids_promise: Vec<_> = self.pages.iter() 105 | .map(|_page| update.promise::()) 106 | .collect(); 107 | let kids: Vec<_> = kids_promise.iter() 108 | .map(|p| Ref::new(p.get_inner())) 109 | .collect(); 110 | 111 | let tree = PagesRc::create(PageTree { 112 | parent: None, 113 | count: kids.len() as _, 114 | kids, 115 | resources: None, 116 | media_box: None, 117 | crop_box: None 118 | }, update)?; 119 | 120 | for (page, promise) in self.pages.into_iter().zip(kids_promise) { 121 | let content = Content::from_ops(page.ops); 122 | let resources = update.create(page.resources)?.into(); 123 | let page = Page { 124 | parent: tree.clone(), 125 | contents: Some(content), 126 | media_box: page.media_box, 127 | crop_box: page.crop_box, 128 | trim_box: page.trim_box, 129 | resources: Some(resources), 130 | rotate: page.rotate, 131 | metadata: page.metadata, 132 | lgi: page.lgi, 133 | vp: page.vp, 134 | other: page.other, 135 | annotations: Default::default(), 136 | }; 137 | update.fulfill(promise, PagesNode::Leaf(page))?; 138 | } 139 | 140 | Ok(Catalog { 141 | version: Some("1.7".into()), 142 | pages: tree, 143 | names: None, 144 | dests: None, 145 | metadata: None, 146 | outlines: None, 147 | struct_tree_root: None, 148 | forms: None, 149 | page_labels: None, 150 | }) 151 | } 152 | } 153 | 154 | pub struct PdfBuilder { 155 | pub storage: Storage, SC, OC, L>, 156 | pub info: Option, 157 | pub id: Option<[String; 2]>, 158 | 159 | } 160 | impl PdfBuilder 161 | where 162 | SC: Cache>>, 163 | OC: Cache, Arc>>, 164 | L: Log, 165 | { 166 | pub fn new(fileoptions: FileOptions<'_, SC, OC, L>) -> Self { 167 | let storage = fileoptions.storage(); 168 | PdfBuilder { 169 | storage, 170 | info: None, 171 | id: None 172 | } 173 | } 174 | pub fn info(mut self, info: InfoDict) -> Self { 175 | self.info = Some(info); 176 | self 177 | } 178 | pub fn id(mut self, a: String, b: String) -> Self { 179 | self.id = Some([a, b]); 180 | self 181 | } 182 | pub fn build(mut self, catalog: CatalogBuilder) -> Result> { 183 | let catalog = catalog.build(&mut self.storage)?; 184 | 185 | let mut trailer = Trailer { 186 | root: self.storage.create(catalog)?, 187 | encrypt_dict: None, 188 | size: 0, 189 | id: vec!["foo".into(), "bar".into()], 190 | info_dict: self.info, 191 | prev_trailer_pos: None, 192 | }; 193 | self.storage.save(&mut trailer)?; 194 | Ok(self.storage.into_inner()) 195 | } 196 | } 197 | pub struct Importer<'a, R, U> { 198 | resolver: R, 199 | map: HashMap, 200 | updater: &'a mut U, 201 | rcrefs: HashMap, 202 | // ptr of old -> (old, new) 203 | shared: HashMap, 204 | } 205 | 206 | pub struct ImporterMap { 207 | resolver: R, 208 | map: HashMap, 209 | } 210 | 211 | impl<'a, R, U> Importer<'a, R, U> { 212 | pub fn new(resolver: R, updater: &'a mut U) -> Self { 213 | Importer { 214 | resolver, 215 | updater, 216 | map: Default::default(), 217 | rcrefs: Default::default(), 218 | shared: Default::default(), 219 | } 220 | } 221 | } 222 | impl<'a, R: Resolve, U> Importer<'a, R, U> { 223 | pub fn finish(self) -> ImporterMap { 224 | ImporterMap { resolver: self.resolver, map: self.map } 225 | } 226 | } 227 | impl ImporterMap { 228 | fn compare_dict(&self, a_dict: &Dictionary, b_dict: &Dictionary, new_resolve: &impl Resolve) -> Result { 229 | let mut same = true; 230 | let mut b_unvisited: HashSet<_> = b_dict.keys().collect(); 231 | for (a_key, a_val) in a_dict.iter() { 232 | if let Some(b_val) = b_dict.get(a_key) { 233 | if !self.compare_prim(a_val, b_val, new_resolve)? { 234 | println!("value for key {a_key} mismatch."); 235 | same = false; 236 | } 237 | b_unvisited.remove(a_key); 238 | } else { 239 | println!("missing key {a_key} in b."); 240 | same = false; 241 | } 242 | } 243 | for b_key in b_unvisited.iter() { 244 | println!("missing key {b_key} in a."); 245 | } 246 | Ok(same && !b_unvisited.is_empty()) 247 | } 248 | fn compare_prim(&self, a: &Primitive, b: &Primitive, new_resolve: &impl Resolve) -> Result { 249 | match (a, b) { 250 | (Primitive::Array(a_parts), Primitive::Array(b_parts)) => { 251 | if a_parts.len() != b_parts.len() { 252 | dbg!(a_parts, b_parts); 253 | println!("different length {} vs. {}", a_parts.len(), b_parts.len()); 254 | println!("a = {a_parts:?}"); 255 | println!("b = {b_parts:?}"); 256 | return Ok(false); 257 | } 258 | for (a, b) in a_parts.iter().zip(b_parts.iter()) { 259 | if !self.compare_prim(a, b, new_resolve)? { 260 | return Ok(false); 261 | } 262 | } 263 | Ok(true) 264 | } 265 | (Primitive::Dictionary(a_dict), Primitive::Dictionary(b_dict)) => { 266 | self.compare_dict(a_dict, b_dict, new_resolve) 267 | } 268 | (Primitive::Reference(r1), Primitive::Reference(r2)) => { 269 | match self.map.get(&r1) { 270 | Some(r) if r == r2 => Ok(true), 271 | _ => Ok(false) 272 | } 273 | } 274 | (Primitive::Stream(a_s), Primitive::Stream(b_s)) => { 275 | if !self.compare_dict(&a_s.info, &b_s.info, new_resolve)? { 276 | println!("stream dicts differ"); 277 | return Ok(false) 278 | } 279 | let a_data = a_s.raw_data(&self.resolver)?; 280 | let b_data = b_s.raw_data(new_resolve)?; 281 | if a_data != b_data { 282 | println!("data differs."); 283 | return Ok(false) 284 | } 285 | Ok(true) 286 | } 287 | (Primitive::Integer(a), Primitive::Number(b)) => Ok(*a as f32 == *b), 288 | (Primitive::Number(a), Primitive::Integer(b)) => Ok(*a == *b as f32), 289 | (Primitive::Reference(a_ref), b) => { 290 | let a = self.resolver.resolve(*a_ref)?; 291 | self.compare_prim(&a, b, new_resolve) 292 | } 293 | (a, Primitive::Reference(b_ref)) => { 294 | let b = new_resolve.resolve(*b_ref)?; 295 | self.compare_prim(a, &b, new_resolve) 296 | } 297 | (ref a, ref b) => { 298 | if a == b { 299 | Ok(true) 300 | } else { 301 | println!("{a:?} != {b:?}"); 302 | Ok(false) 303 | } 304 | } 305 | } 306 | } 307 | pub fn verify(&self, new_resolve: &impl Resolve) -> Result { 308 | let mut same = true; 309 | for (&old_ref, &new_ref) in self.map.iter() { 310 | let old = self.resolver.resolve(old_ref)?; 311 | let new = new_resolve.resolve(new_ref)?; 312 | 313 | if !self.compare_prim(&old, &new, new_resolve)? { 314 | same = false; 315 | } 316 | } 317 | Ok(same) 318 | } 319 | } 320 | 321 | impl<'a, R: Resolve, U> Resolve for Importer<'a, R, U> { 322 | fn get(&self, r: Ref) -> Result> { 323 | self.resolver.get(r) 324 | } 325 | fn get_data_or_decode(&self, id: PlainRef, range: Range, filters: &[StreamFilter]) -> Result> { 326 | self.resolver.get_data_or_decode(id, range, filters) 327 | } 328 | fn options(&self) -> &ParseOptions { 329 | self.resolver.options() 330 | } 331 | fn resolve(&self, r: PlainRef) -> Result { 332 | self.resolver.resolve(r) 333 | } 334 | fn resolve_flags(&self, r: PlainRef, flags: ParseFlags, depth: usize) -> Result { 335 | self.resolver.resolve_flags(r, flags, depth) 336 | } 337 | fn stream_data(&self, id: PlainRef, range: Range) -> Result> { 338 | self.resolver.stream_data(id, range) 339 | } 340 | } 341 | impl<'a, R, U: Updater> Updater for Importer<'a, R, U> { 342 | fn create(&mut self, obj: T) -> Result> { 343 | self.updater.create(obj) 344 | } 345 | fn fulfill(&mut self, promise: PromisedRef, obj: T) -> Result> { 346 | self.updater.fulfill(promise, obj) 347 | } 348 | fn promise(&mut self) -> PromisedRef { 349 | self.updater.promise() 350 | } 351 | fn update(&mut self, old: PlainRef, obj: T) -> Result> { 352 | self.updater.update(old, obj) 353 | } 354 | } 355 | impl<'a, R: Resolve, U: Updater> Cloner for Importer<'a, R, U> { 356 | fn clone_ref(&mut self, old: Ref) -> Result> { 357 | if let Some(&new_ref) = self.map.get(&old.get_inner()) { 358 | return Ok(Ref::new(new_ref)); 359 | } 360 | let obj = self.resolver.get(old)?; 361 | let clone = obj.deep_clone(self)?; 362 | 363 | let r = self.updater.create(clone)?; 364 | self.map.insert(old.get_inner(), r.get_ref().get_inner()); 365 | 366 | Ok(r.get_ref()) 367 | } 368 | fn clone_plainref(&mut self, old: PlainRef) -> Result { 369 | if let Some(&new_ref) = self.map.get(&old) { 370 | return Ok(new_ref); 371 | } 372 | let obj = self.resolver.resolve(old)?; 373 | let clone = obj.deep_clone(self)?; 374 | 375 | let new = self.updater.create(clone)? 376 | .get_ref().get_inner(); 377 | 378 | self.map.insert(old, new); 379 | 380 | Ok(new) 381 | } 382 | fn clone_rcref(&mut self, old: &RcRef) -> Result> { 383 | let old_ref = old.get_ref().get_inner(); 384 | if let Some(&new_ref) = self.map.get(&old_ref) { 385 | let arc = self.rcrefs.get(&new_ref).unwrap().clone().downcast()?; 386 | return Ok(RcRef::new(new_ref, arc)); 387 | } 388 | 389 | let new = old.data().deep_clone(self)?; 390 | let new = self.updater.create::(new)?; 391 | self.rcrefs.insert(new.get_ref().get_inner(), AnySync::new(new.data().clone())); 392 | self.map.insert(old_ref, new.get_ref().get_inner()); 393 | 394 | Ok(new) 395 | } 396 | fn clone_shared(&mut self, old: &Shared) -> Result> { 397 | let key = &**old as *const T as usize; 398 | if let Some((old, new)) = self.shared.get(&key) { 399 | return new.clone().downcast(); 400 | } 401 | let new = Shared::new(old.as_ref().deep_clone(self)?); 402 | self.shared.insert(key, (AnySync::new_without_size(old.clone()), AnySync::new_without_size(new.clone()))); 403 | Ok(new) 404 | } 405 | } -------------------------------------------------------------------------------- /pdf/src/data/t01_lzw+base85.txt: -------------------------------------------------------------------------------- 1 | J..)6T`?p&c!Jnl@ 3 | RM]WM;jjH6Gnc75idkL5]+cPZKEBPWdR>FF(kj1_R%W_d 4 | &/jS!;iuad7h?[L-F$+]]0A3Ck*$I0KZ?;<)CJtqi65Xb 5 | Vc3\n5ua:Q/=0$W<#N3U;H,MQKqfg1?:lUpR;6oN[C2E4 6 | ZNr8Udn.'p+?#X+1>0Kuk$bCDF/(3fL5]Oq)^kJZ!C2H1 7 | 'TO]Rl?Q:&'<5&iP!$Rq;BXRecDN[IJB`,)o8XJOSJ9sD 8 | S]hQ;Rj@!ND)bD_q&C\g:inYC%)&u#:u,M6Bm%IY!Kb1+ 9 | ":aAa'S`ViJglLb8iG1p&i;eVoK&juJHs9%;Xomop"5KatWRT"JQ#qYuL, 11 | JD?M$0QP)lKn06l1apKDC@\qJ4B!!(5m+j.7F790m(Vj8 12 | 8l8Q:_CZ(Gm1%X\N1&u!FKHMB~> -------------------------------------------------------------------------------- /pdf/src/data/t01_plain.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pdf-rs/pdf/f87d3f5e90bddc9df8e0e144b06f2727e7afcc7e/pdf/src/data/t01_plain.txt -------------------------------------------------------------------------------- /pdf/src/enc.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::many_single_char_names)] 2 | #![allow(dead_code)] // TODO 3 | 4 | use itertools::Itertools; 5 | 6 | use crate as pdf; 7 | use crate::error::*; 8 | use crate::object::{Object, Resolve, Stream}; 9 | use crate::primitive::{Primitive, Dictionary}; 10 | use std::convert::{TryFrom, TryInto}; 11 | use std::io::{Read, Write}; 12 | use once_cell::sync::OnceCell; 13 | use datasize::DataSize; 14 | 15 | 16 | #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] 17 | pub struct LZWFlateParams { 18 | #[pdf(key="Predictor", default="1")] 19 | pub predictor: i32, 20 | #[pdf(key="Colors", default="1")] 21 | pub n_components: i32, 22 | #[pdf(key="BitsPerComponent", default="8")] 23 | pub bits_per_component: i32, 24 | #[pdf(key="Columns", default="1")] 25 | pub columns: i32, 26 | #[pdf(key="EarlyChange", default="1")] 27 | pub early_change: i32, 28 | } 29 | impl Default for LZWFlateParams { 30 | fn default() -> LZWFlateParams { 31 | LZWFlateParams { 32 | predictor: 1, 33 | n_components: 1, 34 | bits_per_component: 8, 35 | columns: 1, 36 | early_change: 1 37 | } 38 | } 39 | } 40 | 41 | #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] 42 | pub struct DCTDecodeParams { 43 | // TODO The default value of ColorTransform is 1 if the image has three components and 0 otherwise. 44 | // 0: No transformation. 45 | // 1: If the image has three color components, transform RGB values to YUV before encoding and from YUV to RGB after decoding. 46 | // If the image has four components, transform CMYK values to YUVK before encoding and from YUVK to CMYK after decoding. 47 | // This option is ignored if the image has one or two color components. 48 | #[pdf(key="ColorTransform")] 49 | pub color_transform: Option, 50 | } 51 | 52 | #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] 53 | pub struct CCITTFaxDecodeParams { 54 | #[pdf(key="K", default="0")] 55 | pub k: i32, 56 | 57 | #[pdf(key="EndOfLine", default="false")] 58 | pub end_of_line: bool, 59 | 60 | #[pdf(key="EncodedByteAlign", default="false")] 61 | pub encoded_byte_align: bool, 62 | 63 | #[pdf(key="Columns", default="1728")] 64 | pub columns: u32, 65 | 66 | #[pdf(key="Rows", default="0")] 67 | pub rows: u32, 68 | 69 | #[pdf(key="EndOfBlock", default="true")] 70 | pub end_of_block: bool, 71 | 72 | #[pdf(key="BlackIs1", default="false")] 73 | pub black_is_1: bool, 74 | 75 | #[pdf(key="DamagedRowsBeforeError", default="0")] 76 | pub damaged_rows_before_error: u32, 77 | } 78 | 79 | #[derive(Object, ObjectWrite, Debug, Clone, DataSize, DeepClone)] 80 | pub struct JBIG2DecodeParams { 81 | #[pdf(key="JBIG2Globals")] 82 | pub globals: Option> 83 | } 84 | #[derive(Debug, Clone, DataSize, DeepClone)] 85 | pub enum StreamFilter { 86 | ASCIIHexDecode, 87 | ASCII85Decode, 88 | LZWDecode (LZWFlateParams), 89 | FlateDecode (LZWFlateParams), 90 | JPXDecode, //Jpeg2k 91 | DCTDecode (DCTDecodeParams), 92 | CCITTFaxDecode (CCITTFaxDecodeParams), 93 | JBIG2Decode(JBIG2DecodeParams), 94 | Crypt, 95 | RunLengthDecode 96 | } 97 | impl StreamFilter { 98 | pub fn from_kind_and_params(kind: &str, params: Dictionary, r: &impl Resolve) -> Result { 99 | let params = Primitive::Dictionary (params); 100 | Ok( 101 | match kind { 102 | "ASCIIHexDecode" => StreamFilter::ASCIIHexDecode, 103 | "ASCII85Decode" => StreamFilter::ASCII85Decode, 104 | "LZWDecode" => StreamFilter::LZWDecode (LZWFlateParams::from_primitive(params, r)?), 105 | "FlateDecode" => StreamFilter::FlateDecode (LZWFlateParams::from_primitive(params, r)?), 106 | "JPXDecode" => StreamFilter::JPXDecode, 107 | "DCTDecode" => StreamFilter::DCTDecode (DCTDecodeParams::from_primitive(params, r)?), 108 | "CCITTFaxDecode" => StreamFilter::CCITTFaxDecode (CCITTFaxDecodeParams::from_primitive(params, r)?), 109 | "JBIG2Decode" => StreamFilter::JBIG2Decode(JBIG2DecodeParams::from_primitive(params, r)?), 110 | "Crypt" => StreamFilter::Crypt, 111 | "RunLengthDecode" => StreamFilter::RunLengthDecode, 112 | ty => bail!("Unrecognized filter type {:?}", ty), 113 | } 114 | ) 115 | } 116 | } 117 | 118 | #[inline] 119 | pub fn decode_nibble(c: u8) -> Option { 120 | match c { 121 | n @ b'0' ..= b'9' => Some(n - b'0'), 122 | a @ b'a' ..= b'h' => Some(a - b'a' + 0xa), 123 | a @ b'A' ..= b'H' => Some(a - b'A' + 0xA), 124 | _ => None 125 | } 126 | } 127 | 128 | #[inline] 129 | fn encode_nibble(c: u8) -> u8 { 130 | match c { 131 | 0 ..= 9 => b'0'+ c, 132 | 10 ..= 15 => b'a' - 10 + c, 133 | _ => unreachable!() 134 | } 135 | } 136 | 137 | 138 | pub fn decode_hex(data: &[u8]) -> Result> { 139 | let mut out = Vec::with_capacity(data.len() / 2); 140 | let pairs = data.iter().cloned() 141 | .take_while(|&b| b != b'>') 142 | .filter(|&b| !matches!(b, 0 | 9 | 10 | 12 | 13 | 32)) 143 | .tuples(); 144 | for (i, (high, low)) in pairs.enumerate() { 145 | if let (Some(low), Some(high)) = (decode_nibble(low), decode_nibble(high)) { 146 | out.push(high << 4 | low); 147 | } else { 148 | return Err(PdfError::HexDecode {pos: i * 2, bytes: [high, low]}) 149 | } 150 | } 151 | Ok(out) 152 | } 153 | pub fn encode_hex(data: &[u8]) -> Vec { 154 | let mut buf = Vec::with_capacity(data.len() * 2); 155 | for &b in data { 156 | buf.push(encode_nibble(b >> 4)); 157 | buf.push(encode_nibble(b & 0xf)); 158 | } 159 | buf 160 | } 161 | 162 | #[inline] 163 | fn sym_85(byte: u8) -> Option { 164 | match byte { 165 | b @ 0x21 ..= 0x75 => Some(b - 0x21), 166 | _ => None 167 | } 168 | } 169 | 170 | fn word_85([a, b, c, d, e]: [u8; 5]) -> Option<[u8; 4]> { 171 | fn s(b: u8) -> Option { sym_85(b).map(|n| n as u64) } 172 | let (a, b, c, d, e) = (s(a)?, s(b)?, s(c)?, s(d)?, s(e)?); 173 | let q = (((a * 85 + b) * 85 + c) * 85 + d) * 85 + e; 174 | // 85^5 > 256^4, the result might not fit in an u32. 175 | let r = u32::try_from(q).ok()?; 176 | Some(r.to_be_bytes()) 177 | } 178 | 179 | pub fn decode_85(data: &[u8]) -> Result> { 180 | let mut out = Vec::with_capacity((data.len() + 4) / 5 * 4); 181 | 182 | let mut stream = data.iter().cloned() 183 | .filter(|&b| !matches!(b, b' ' | b'\n' | b'\r' | b'\t')); 184 | 185 | let mut symbols = stream.by_ref() 186 | .take_while(|&b| b != b'~'); 187 | 188 | let (tail_len, tail) = loop { 189 | match symbols.next() { 190 | Some(b'z') => out.extend_from_slice(&[0; 4]), 191 | Some(a) => { 192 | let (b, c, d, e) = match (symbols.next(), symbols.next(), symbols.next(), symbols.next()) { 193 | (Some(b), Some(c), Some(d), Some(e)) => (b, c, d, e), 194 | (None, _, _, _) => break (1, [a, b'u', b'u', b'u', b'u']), 195 | (Some(b), None, _, _) => break (2, [a, b, b'u', b'u', b'u']), 196 | (Some(b), Some(c), None, _) => break (3, [a, b, c, b'u', b'u']), 197 | (Some(b), Some(c), Some(d), None) => break (4, [a, b, c, d, b'u']), 198 | }; 199 | out.extend_from_slice(&word_85([a, b, c, d, e]).ok_or(PdfError::Ascii85TailError)?); 200 | } 201 | None => break (0, [b'u'; 5]) 202 | } 203 | }; 204 | 205 | if tail_len > 0 { 206 | let last = word_85(tail).ok_or(PdfError::Ascii85TailError)?; 207 | out.extend_from_slice(&last[.. tail_len-1]); 208 | } 209 | 210 | match (stream.next(), stream.next()) { 211 | (Some(b'>'), None) => Ok(out), 212 | _ => Err(PdfError::Ascii85TailError) 213 | } 214 | } 215 | 216 | #[inline] 217 | fn divmod(n: u32, m: u32) -> (u32, u32) { 218 | (n / m, n % m) 219 | } 220 | 221 | #[inline] 222 | fn a85(n: u32) -> u8 { 223 | n as u8 + 0x21 224 | } 225 | 226 | #[inline] 227 | fn base85_chunk(c: [u8; 4]) -> [u8; 5] { 228 | let n = u32::from_be_bytes(c); 229 | let (n, e) = divmod(n, 85); 230 | let (n, d) = divmod(n, 85); 231 | let (n, c) = divmod(n, 85); 232 | let (a, b) = divmod(n, 85); 233 | 234 | [a85(a), a85(b), a85(c), a85(d), a85(e)] 235 | } 236 | 237 | fn encode_85(data: &[u8]) -> Vec { 238 | let mut buf = Vec::with_capacity((data.len() / 4) * 5 + 10); 239 | let mut chunks = data.chunks_exact(4); 240 | for chunk in chunks.by_ref() { 241 | let c: [u8; 4] = chunk.try_into().unwrap(); 242 | if c == [0; 4] { 243 | buf.push(b'z'); 244 | } else { 245 | buf.extend_from_slice(&base85_chunk(c)); 246 | } 247 | } 248 | 249 | let r = chunks.remainder(); 250 | if r.len() > 0 { 251 | let mut c = [0; 4]; 252 | c[.. r.len()].copy_from_slice(r); 253 | let out = base85_chunk(c); 254 | buf.extend_from_slice(&out[.. r.len() + 1]); 255 | } 256 | buf.extend_from_slice(b"~>"); 257 | buf 258 | } 259 | 260 | fn inflate_bytes_zlib(data: &[u8]) -> Result> { 261 | use libflate::zlib::Decoder; 262 | let mut decoder = Decoder::new(data)?; 263 | let mut decoded = Vec::new(); 264 | decoder.read_to_end(&mut decoded)?; 265 | Ok(decoded) 266 | } 267 | 268 | fn inflate_bytes(data: &[u8]) -> Result> { 269 | use libflate::deflate::Decoder; 270 | let mut decoder = Decoder::new(data); 271 | let mut decoded = Vec::new(); 272 | decoder.read_to_end(&mut decoded)?; 273 | Ok(decoded) 274 | } 275 | 276 | pub fn flate_decode(data: &[u8], params: &LZWFlateParams) -> Result> { 277 | 278 | let predictor = params.predictor as usize; 279 | let n_components = params.n_components as usize; 280 | let columns = params.columns as usize; 281 | let stride = columns * n_components; 282 | 283 | 284 | // First flate decode 285 | let decoded = { 286 | if let Ok(data) = inflate_bytes_zlib(data) { 287 | data 288 | } else if let Ok(data) = inflate_bytes(data) { 289 | data 290 | } else { 291 | dump_data(data); 292 | bail!("can't inflate"); 293 | } 294 | }; 295 | // Then unfilter (PNG) 296 | // For this, take the old out as input, and write output to out 297 | 298 | if predictor > 10 { 299 | let inp = decoded; // input buffer 300 | let rows = inp.len() / (stride+1); 301 | 302 | // output buffer 303 | let mut out = vec![0; rows * stride]; 304 | 305 | // Apply inverse predictor 306 | let null_vec = vec![0; stride]; 307 | 308 | let mut in_off = 0; // offset into input buffer 309 | 310 | let mut out_off = 0; // offset into output buffer 311 | let mut last_out_off = 0; // last offset to output buffer 312 | 313 | while in_off + stride < inp.len() { 314 | let predictor = PredictorType::from_u8(inp[in_off])?; 315 | in_off += 1; // +1 because the first byte on each row is predictor 316 | 317 | let row_in = &inp[in_off .. in_off + stride]; 318 | let (prev_row, row_out) = if out_off == 0 { 319 | (&null_vec[..], &mut out[out_off .. out_off+stride]) 320 | } else { 321 | let (prev, curr) = out.split_at_mut(out_off); 322 | (&prev[last_out_off ..], &mut curr[.. stride]) 323 | }; 324 | unfilter(predictor, n_components, prev_row, row_in, row_out); 325 | 326 | last_out_off = out_off; 327 | 328 | in_off += stride; 329 | out_off += stride; 330 | } 331 | Ok(out) 332 | } else { 333 | Ok(decoded) 334 | } 335 | } 336 | fn flate_encode(data: &[u8]) -> Vec { 337 | use libflate::deflate::Encoder; 338 | let mut encoded = Vec::new(); 339 | let mut encoder = Encoder::new(&mut encoded); 340 | encoder.write_all(data).unwrap(); 341 | encoded 342 | } 343 | 344 | pub fn dct_decode(data: &[u8], _params: &DCTDecodeParams) -> Result> { 345 | use jpeg_decoder::Decoder; 346 | let mut decoder = Decoder::new(data); 347 | let pixels = decoder.decode()?; 348 | Ok(pixels) 349 | } 350 | 351 | pub fn lzw_decode(data: &[u8], params: &LZWFlateParams) -> Result> { 352 | use weezl::{BitOrder, decode::Decoder}; 353 | let mut out = vec![]; 354 | 355 | let mut decoder = if params.early_change != 0 { 356 | Decoder::with_tiff_size_switch(BitOrder::Msb, 9) 357 | } else { 358 | Decoder::new(BitOrder::Msb, 9) 359 | }; 360 | 361 | decoder 362 | .into_stream(&mut out) 363 | .decode_all(data).status?; 364 | Ok(out) 365 | } 366 | fn lzw_encode(data: &[u8], params: &LZWFlateParams) -> Result> { 367 | use weezl::{BitOrder, encode::Encoder}; 368 | if params.early_change != 0 { 369 | bail!("encoding early_change != 0 is not supported"); 370 | } 371 | let mut compressed = vec![]; 372 | Encoder::new(BitOrder::Msb, 9) 373 | .into_stream(&mut compressed) 374 | .encode_all(data).status?; 375 | Ok(compressed) 376 | } 377 | 378 | pub fn fax_decode(data: &[u8], params: &CCITTFaxDecodeParams) -> Result> { 379 | use fax::{Color, decoder::{pels, decode_g4}}; 380 | 381 | if params.k < 0 { 382 | let columns = params.columns as usize; 383 | let rows = params.rows as usize; 384 | 385 | let height = if params.rows == 0 { None } else { Some(params.rows as u16)}; 386 | let mut buf = Vec::with_capacity(columns * rows); 387 | decode_g4(data.iter().cloned(), columns as u16, height, |line| { 388 | buf.extend(pels(line, columns as u16).map(|c| match c { 389 | Color::Black => 0, 390 | Color::White => 255 391 | })); 392 | assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns); 393 | }).ok_or(PdfError::Other { msg: "faxdecode failed".into() })?; 394 | assert_eq!(buf.len() % columns, 0, "len={}, columns={}", buf.len(), columns); 395 | 396 | if rows != 0 && buf.len() != columns * rows { 397 | bail!("decoded length does not match (expected {rows}∙{columns}, got {})", buf.len()); 398 | } 399 | Ok(buf) 400 | } else { 401 | unimplemented!() 402 | } 403 | } 404 | 405 | pub fn run_length_decode(data: &[u8]) -> Result> { 406 | // Used as specification 407 | let mut buf = Vec::new(); 408 | let d = data; 409 | let mut c = 0; 410 | 411 | while c < data.len() { 412 | let length = d[c]; // length is first byte 413 | if length < 128 { 414 | let start = c + 1; 415 | let end = start + length as usize + 1; 416 | // copy _following_ length + 1 bytes literally 417 | buf.extend_from_slice(&d[start..end]); 418 | c = end; // move cursor to next run 419 | } else if length >= 129 { 420 | let copy = 257 - length as usize; // copy 2 - 128 times 421 | let b = d[c + 1]; // copied byte 422 | buf.extend(std::iter::repeat(b).take(copy)); 423 | c += 2; // move cursor to next run 424 | } else { 425 | break; // EOD 426 | } 427 | } 428 | 429 | Ok(buf) 430 | } 431 | 432 | pub type DecodeFn = dyn Fn(&[u8]) -> Result> + Sync + Send + 'static; 433 | static JPX_DECODER: OnceCell> = OnceCell::new(); 434 | static JBIG2_DECODER: OnceCell> = OnceCell::new(); 435 | 436 | pub fn set_jpx_decoder(f: Box) { 437 | let _ = JPX_DECODER.set(f); 438 | } 439 | pub fn set_jbig2_decoder(f: Box) { 440 | let _ = JBIG2_DECODER.set(f); 441 | } 442 | 443 | pub fn jpx_decode(data: &[u8]) -> Result> { 444 | JPX_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jp2k decoder not set".into()})?(data) 445 | } 446 | pub fn jbig2_decode(data: &[u8], globals: &[u8]) -> Result> { 447 | let data = [ 448 | // file header 449 | // &[0x97, 0x4A, 0x42, 0x32, 0x0D, 0x0A, 0x1A, 0x0A, 0x01, 0x00, 0x00, 0x00, 0x01], 450 | 451 | globals, 452 | data, 453 | 454 | // end of page 455 | &[0x00, 0x00, 0x00, 0x03, 0x31, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00], 456 | 457 | // end of stream 458 | &[0x00, 0x00, 0x00, 0x04, 0x33, 0x01, 0x00, 0x00, 0x00, 0x00], 459 | ].concat(); 460 | JBIG2_DECODER.get().ok_or_else(|| PdfError::Other { msg: "jbig2 decoder not set".into()})?(&data) 461 | } 462 | 463 | pub fn decode(data: &[u8], filter: &StreamFilter) -> Result> { 464 | match *filter { 465 | StreamFilter::ASCIIHexDecode => decode_hex(data), 466 | StreamFilter::ASCII85Decode => decode_85(data), 467 | StreamFilter::LZWDecode(ref params) => lzw_decode(data, params), 468 | StreamFilter::FlateDecode(ref params) => flate_decode(data, params), 469 | StreamFilter::RunLengthDecode => run_length_decode(data), 470 | StreamFilter::DCTDecode(ref params) => dct_decode(data, params), 471 | 472 | _ => bail!("unimplemented {filter:?}"), 473 | } 474 | } 475 | 476 | pub fn encode(data: &[u8], filter: &StreamFilter) -> Result> { 477 | match *filter { 478 | StreamFilter::ASCIIHexDecode => Ok(encode_hex(data)), 479 | StreamFilter::ASCII85Decode => Ok(encode_85(data)), 480 | StreamFilter::LZWDecode(ref params) => lzw_encode(data, params), 481 | StreamFilter::FlateDecode (ref _params) => Ok(flate_encode(data)), 482 | _ => unimplemented!(), 483 | } 484 | } 485 | 486 | /* 487 | * Predictor - copied and adapted from PNG crate.. 488 | */ 489 | 490 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 491 | #[repr(u8)] 492 | #[allow(dead_code)] 493 | pub enum PredictorType { 494 | NoFilter = 0, 495 | Sub = 1, 496 | Up = 2, 497 | Avg = 3, 498 | Paeth = 4 499 | } 500 | 501 | impl PredictorType { 502 | /// u8 -> Self. Temporary solution until Rust provides a canonical one. 503 | pub fn from_u8(n: u8) -> Result { 504 | match n { 505 | 0 => Ok(PredictorType::NoFilter), 506 | 1 => Ok(PredictorType::Sub), 507 | 2 => Ok(PredictorType::Up), 508 | 3 => Ok(PredictorType::Avg), 509 | 4 => Ok(PredictorType::Paeth), 510 | n => Err(PdfError::IncorrectPredictorType {n}) 511 | } 512 | } 513 | } 514 | 515 | fn filter_paeth(a: u8, b: u8, c: u8) -> u8 { 516 | let ia = a as i16; 517 | let ib = b as i16; 518 | let ic = c as i16; 519 | 520 | let p = ia + ib - ic; 521 | 522 | let pa = (p - ia).abs(); 523 | let pb = (p - ib).abs(); 524 | let pc = (p - ic).abs(); 525 | 526 | if pa <= pb && pa <= pc { 527 | a 528 | } else if pb <= pc { 529 | b 530 | } else { 531 | c 532 | } 533 | } 534 | 535 | pub fn unfilter(filter: PredictorType, bpp: usize, prev: &[u8], inp: &[u8], out: &mut [u8]) { 536 | use self::PredictorType::*; 537 | let len = inp.len(); 538 | assert_eq!(len, out.len()); 539 | assert_eq!(len, prev.len()); 540 | if bpp > len { 541 | return; 542 | } 543 | 544 | match filter { 545 | NoFilter => { 546 | out[..len].copy_from_slice(&inp[..len]); 547 | } 548 | Sub => { 549 | out[..bpp].copy_from_slice(&inp[..bpp]); 550 | 551 | for i in bpp..len { 552 | out[i] = inp[i].wrapping_add(out[i - bpp]); 553 | } 554 | } 555 | Up => { 556 | for i in 0..len { 557 | out[i] = inp[i].wrapping_add(prev[i]); 558 | } 559 | } 560 | Avg => { 561 | for i in 0..bpp { 562 | out[i] = inp[i].wrapping_add(prev[i] / 2); 563 | } 564 | 565 | for i in bpp..len { 566 | out[i] = inp[i].wrapping_add( 567 | ((out[i - bpp] as i16 + prev[i] as i16) / 2) as u8 568 | ); 569 | } 570 | } 571 | Paeth => { 572 | for i in 0..bpp { 573 | out[i] = inp[i].wrapping_add( 574 | filter_paeth(0, prev[i], 0) 575 | ); 576 | } 577 | 578 | for i in bpp..len { 579 | out[i] = inp[i].wrapping_add( 580 | filter_paeth(out[i - bpp], prev[i], prev[i - bpp]) 581 | ); 582 | } 583 | } 584 | } 585 | } 586 | 587 | #[allow(unused)] 588 | pub fn filter(method: PredictorType, bpp: usize, previous: &[u8], current: &mut [u8]) { 589 | use self::PredictorType::*; 590 | let len = current.len(); 591 | 592 | match method { 593 | NoFilter => (), 594 | Sub => { 595 | for i in (bpp..len).rev() { 596 | current[i] = current[i].wrapping_sub(current[i - bpp]); 597 | } 598 | } 599 | Up => { 600 | for i in 0..len { 601 | current[i] = current[i].wrapping_sub(previous[i]); 602 | } 603 | } 604 | Avg => { 605 | for i in (bpp..len).rev() { 606 | current[i] = current[i].wrapping_sub(current[i - bpp].wrapping_add(previous[i]) / 2); 607 | } 608 | 609 | for i in 0..bpp { 610 | current[i] = current[i].wrapping_sub(previous[i] / 2); 611 | } 612 | } 613 | Paeth => { 614 | for i in (bpp..len).rev() { 615 | current[i] = current[i].wrapping_sub(filter_paeth(current[i - bpp], previous[i], previous[i - bpp])); 616 | } 617 | 618 | for i in 0..bpp { 619 | current[i] = current[i].wrapping_sub(filter_paeth(0, previous[i], 0)); 620 | } 621 | } 622 | } 623 | } 624 | 625 | #[cfg(test)] 626 | mod tests { 627 | use super::*; 628 | 629 | #[test] 630 | fn base_85() { 631 | fn s(b: &[u8]) -> &str { std::str::from_utf8(b).unwrap() } 632 | 633 | let case = &b"hello world!"[..]; 634 | let encoded = encode_85(case); 635 | assert_eq!(s(&encoded), "BOu!rD]j7BEbo80~>"); 636 | let decoded = decode_85(&encoded).unwrap(); 637 | assert_eq!(case, &*decoded); 638 | /* 639 | assert_eq!( 640 | s(&decode_85( 641 | &lzw_decode( 642 | &decode_85(&include_bytes!("data/t01_lzw+base85.txt")[..]).unwrap(), 643 | &LZWFlateParams::default() 644 | ).unwrap() 645 | ).unwrap()), 646 | include_str!("data/t01_plain.txt") 647 | ); 648 | */ 649 | } 650 | 651 | #[test] 652 | fn run_length_decode_test() { 653 | let x = run_length_decode(&[254, b'a', 255, b'b', 2, b'c', b'b', b'c', 254, b'a', 128]).unwrap(); 654 | assert_eq!(b"aaabbcbcaaa", x.as_slice()); 655 | } 656 | } 657 | -------------------------------------------------------------------------------- /pdf/src/encoding.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use istring::SmallString; 3 | use crate as pdf; 4 | use crate::object::{Object, Resolve, ObjectWrite, DeepClone}; 5 | use crate::primitive::{Primitive, Dictionary}; 6 | use crate::error::{Result}; 7 | use datasize::DataSize; 8 | 9 | #[derive(Debug, Clone, DataSize)] 10 | pub struct Encoding { 11 | pub base: BaseEncoding, 12 | pub differences: HashMap, 13 | } 14 | 15 | #[derive(Object, ObjectWrite, Debug, Clone, Eq, PartialEq, DataSize)] 16 | pub enum BaseEncoding { 17 | StandardEncoding, 18 | SymbolEncoding, 19 | MacRomanEncoding, 20 | WinAnsiEncoding, 21 | MacExpertEncoding, 22 | #[pdf(name = "Identity-H")] 23 | IdentityH, 24 | None, 25 | 26 | #[pdf(other)] 27 | Other(String), 28 | } 29 | impl Object for Encoding { 30 | fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { 31 | match p { 32 | name @ Primitive::Name(_) => { 33 | Ok(Encoding { 34 | base: BaseEncoding::from_primitive(name, resolve)?, 35 | differences: HashMap::new(), 36 | }) 37 | } 38 | Primitive::Dictionary(mut dict) => { 39 | let base = match dict.remove("BaseEncoding") { 40 | Some(p) => BaseEncoding::from_primitive(p, resolve)?, 41 | None => BaseEncoding::None 42 | }; 43 | let mut gid = 0; 44 | let mut differences = HashMap::new(); 45 | if let Some(p) = dict.remove("Differences") { 46 | for part in p.resolve(resolve)?.into_array()? { 47 | match part { 48 | Primitive::Integer(code) => { 49 | gid = code as u32; 50 | } 51 | Primitive::Name(name) => { 52 | differences.insert(gid, name); 53 | gid += 1; 54 | } 55 | _ => bail!("Unknown part primitive in dictionary: {:?}", part), 56 | } 57 | } 58 | } 59 | Ok(Encoding { base, differences }) 60 | } 61 | Primitive::Reference(r) => Self::from_primitive(resolve.resolve(r)?, resolve), 62 | Primitive::Stream(s) => Self::from_primitive(Primitive::Dictionary(s.info), resolve), 63 | _ => bail!("Unknown element: {:?}", p), 64 | } 65 | } 66 | } 67 | impl ObjectWrite for Encoding { 68 | fn to_primitive(&self, update: &mut impl pdf::object::Updater) -> Result { 69 | let base = self.base.to_primitive(update)?; 70 | if self.differences.len() == 0 { 71 | Ok(base) 72 | } else { 73 | let mut list = vec![]; 74 | 75 | let mut diff_list: Vec<_> = self.differences.iter().collect(); 76 | diff_list.sort(); 77 | let mut last = None; 78 | 79 | for &(&gid, name) in diff_list.iter() { 80 | if !last.map(|n| n + 1 == gid).unwrap_or(false) { 81 | list.push(Primitive::Integer(gid as i32)); 82 | } 83 | 84 | list.push(Primitive::Name(name.clone())); 85 | 86 | last = Some(gid); 87 | } 88 | 89 | let mut dict = Dictionary::new(); 90 | dict.insert("BaseEncoding", base); 91 | dict.insert("Differences", Primitive::Array(list)); 92 | Ok(Primitive::Dictionary(dict)) 93 | } 94 | } 95 | } 96 | impl Encoding { 97 | pub fn standard() -> Encoding { 98 | Encoding { 99 | base: BaseEncoding::StandardEncoding, 100 | differences: HashMap::new() 101 | } 102 | } 103 | } 104 | impl DeepClone for Encoding { 105 | fn deep_clone(&self, cloner: &mut impl pdf::object::Cloner) -> Result { 106 | Ok(self.clone()) 107 | } 108 | } -------------------------------------------------------------------------------- /pdf/src/error.rs: -------------------------------------------------------------------------------- 1 | use crate::object::ObjNr; 2 | use std::io; 3 | use std::error::Error; 4 | use crate::parser::ParseFlags; 5 | use std::sync::Arc; 6 | use datasize::{DataSize, data_size}; 7 | use snafu::ErrorCompat; 8 | 9 | #[derive(Debug, Snafu)] 10 | pub enum PdfError { 11 | // Syntax / parsing 12 | #[snafu(display("Unexpected end of file"))] 13 | EOF, 14 | 15 | #[snafu(display("Shared, caused by\n {}", source))] 16 | Shared { 17 | #[snafu(source)] 18 | source: Arc 19 | }, 20 | 21 | #[snafu(display("Not enough Operator arguments"))] 22 | NoOpArg, 23 | 24 | #[snafu(display("Error parsing from string, caused by\n {}", source))] 25 | Parse { 26 | #[snafu(source)] 27 | source: Box 28 | }, 29 | 30 | #[snafu(display("Invalid encoding, caused by\n {}", source))] 31 | Encoding { 32 | #[snafu(source)] 33 | source: Box 34 | }, 35 | 36 | #[snafu(display("Out of bounds: index {}, but len is {}", index, len))] 37 | Bounds { index: usize, len: usize }, 38 | 39 | #[snafu(display("Unexpected token '{}' at {} - expected '{}'", lexeme, pos, expected))] 40 | UnexpectedLexeme {pos: usize, lexeme: String, expected: &'static str}, 41 | 42 | #[snafu(display("Expecting an object, encountered {} at pos {}. Rest:\n{}\n\n((end rest))", first_lexeme, pos, rest))] 43 | UnknownType {pos: usize, first_lexeme: String, rest: String}, 44 | 45 | #[snafu(display("Unknown variant '{}' for enum {}", name, id))] 46 | UnknownVariant { id: &'static str, name: String }, 47 | 48 | #[snafu(display("'{}' not found.", word))] 49 | NotFound { word: String }, 50 | 51 | #[snafu(display("Cannot follow reference during parsing - no resolve fn given (most likely /Length of Stream)."))] 52 | Reference, // TODO: which one? 53 | 54 | #[snafu(display("Erroneous 'type' field in xref stream - expected 0, 1 or 2, found {}", found))] 55 | XRefStreamType { found: u64 }, 56 | 57 | #[snafu(display("Parsing read past boundary of Contents."))] 58 | ContentReadPastBoundary, 59 | 60 | #[snafu(display("Primitive not allowed"))] 61 | PrimitiveNotAllowed { allowed: ParseFlags, found: ParseFlags }, 62 | 63 | ////////////////// 64 | // Encode/decode 65 | #[snafu(display("Hex decode error. Position {}, bytes {:?}", pos, bytes))] 66 | HexDecode {pos: usize, bytes: [u8; 2]}, 67 | 68 | #[snafu(display("Ascii85 tail error"))] 69 | Ascii85TailError, 70 | 71 | #[snafu(display("Failed to convert '{}' into PredictorType", n))] 72 | IncorrectPredictorType {n: u8}, 73 | 74 | ////////////////// 75 | // Dictionary 76 | #[snafu(display("Can't parse field {} of struct {}, caused by\n {}", field, typ, source))] 77 | FromPrimitive { 78 | typ: &'static str, 79 | field: &'static str, 80 | #[snafu(source)] 81 | source: Box 82 | }, 83 | 84 | #[snafu(display("Field /{} is missing in dictionary for type {}.", field, typ))] 85 | MissingEntry { 86 | typ: &'static str, 87 | field: String 88 | }, 89 | 90 | #[snafu(display("Expected to find value {} for key {}. Found {} instead.", value, key, found))] 91 | KeyValueMismatch { 92 | key: String, 93 | value: String, 94 | found: String, 95 | }, 96 | 97 | #[snafu(display("Expected dictionary /Type = {}. Found /Type = {}.", expected, found))] 98 | WrongDictionaryType { 99 | expected: String, 100 | found: String 101 | }, 102 | 103 | ////////////////// 104 | // Misc 105 | #[snafu(display("Tried to dereference free object nr {}.", obj_nr))] 106 | FreeObject {obj_nr: u64}, 107 | 108 | #[snafu(display("Tried to dereference non-existing object nr {}.", obj_nr))] 109 | NullRef {obj_nr: u64}, 110 | 111 | #[snafu(display("Expected primitive {}, found primitive {} instead.", expected, found))] 112 | UnexpectedPrimitive {expected: &'static str, found: &'static str}, 113 | /* 114 | WrongObjectType {expected: &'static str, found: &'static str} { 115 | description("Function called on object of wrong type.") 116 | display("Expected {}, found {}.", expected, found) 117 | } 118 | */ 119 | #[snafu(display("Object stream index out of bounds ({}/{}).", index, max))] 120 | ObjStmOutOfBounds {index: usize, max: usize}, 121 | 122 | #[snafu(display("Page out of bounds ({}/{}).", page_nr, max))] 123 | PageOutOfBounds {page_nr: u32, max: u32}, 124 | 125 | #[snafu(display("Page {} could not be found in the page tree.", page_nr))] 126 | PageNotFound {page_nr: u32}, 127 | 128 | #[snafu(display("Entry {} in xref table unspecified", id))] 129 | UnspecifiedXRefEntry {id: ObjNr}, 130 | 131 | #[snafu(display("Invalid password"))] 132 | InvalidPassword, 133 | 134 | #[snafu(display("Decryption failure"))] 135 | DecryptionFailure, 136 | 137 | #[snafu(display("JPEG Error, caused by\n {}", source))] 138 | Jpeg { 139 | #[snafu(source)] 140 | source: jpeg_decoder::Error 141 | }, 142 | 143 | #[snafu(display("IO Error, caused by\n {}", source))] 144 | Io { 145 | #[snafu(source)] 146 | source: io::Error 147 | }, 148 | 149 | #[snafu(display("{}", msg))] 150 | Other { msg: String }, 151 | 152 | #[snafu(display("NoneError at {}:{}:{}:{}", file, line, column, context))] 153 | NoneError { file: &'static str, line: u32, column: u32, context: Context }, 154 | 155 | #[snafu(display("Try at {}:{}:{}:{}, caused by\n {}", file, line, column, context, source))] 156 | Try { 157 | file: &'static str, 158 | line: u32, 159 | column: u32, 160 | context: Context, 161 | #[snafu(source)] 162 | source: Box 163 | }, 164 | 165 | #[snafu(display("PostScriptParseError"))] 166 | PostScriptParse, 167 | 168 | #[snafu(display("PostScriptExecError"))] 169 | PostScriptExec, 170 | 171 | #[snafu(display("UTF16 decode error"))] 172 | Utf16Decode, 173 | 174 | #[snafu(display("UTF8 decode error"))] 175 | Utf8Decode, 176 | 177 | #[snafu(display("CID decode error"))] 178 | CidDecode, 179 | 180 | #[snafu(display("Max nesting depth reached"))] 181 | MaxDepth, 182 | 183 | #[snafu(display("Invalid"))] 184 | Invalid, 185 | } 186 | impl PdfError { 187 | pub fn is_eof(&self) -> bool { 188 | match self { 189 | PdfError::EOF => true, 190 | PdfError::Try { ref source, .. } => source.is_eof(), 191 | _ => false 192 | } 193 | } 194 | } 195 | datasize::non_dynamic_const_heap_size!(PdfError, 0); 196 | 197 | #[cfg(feature="cache")] 198 | impl globalcache::ValueSize for PdfError { 199 | #[inline] 200 | fn size(&self) -> usize { 201 | data_size(self) 202 | } 203 | } 204 | 205 | #[derive(Debug)] 206 | pub struct Context(pub Vec<(&'static str, String)>); 207 | impl std::fmt::Display for Context { 208 | fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { 209 | for (i, &(key, ref val)) in self.0.iter().enumerate() { 210 | if i == 0 { 211 | writeln!(f)?; 212 | } 213 | writeln!(f, " {} = {}", key, val)?; 214 | } 215 | Ok(()) 216 | } 217 | } 218 | 219 | pub type Result = std::result::Result; 220 | 221 | impl From for PdfError { 222 | fn from(source: io::Error) -> PdfError { 223 | PdfError::Io { source } 224 | } 225 | } 226 | impl From for PdfError { 227 | fn from(msg: String) -> PdfError { 228 | PdfError::Other { msg } 229 | } 230 | } 231 | impl From> for PdfError { 232 | fn from(source: Arc) -> PdfError { 233 | PdfError::Shared { source } 234 | } 235 | } 236 | 237 | #[macro_export] 238 | macro_rules! try_opt { 239 | ($e:expr $(,$c:expr)*) => ( 240 | match $e { 241 | Some(v) => v, 242 | None => { 243 | let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]); 244 | return Err($crate::PdfError::NoneError { 245 | file: file!(), 246 | line: line!(), 247 | column: column!(), 248 | context, 249 | }); 250 | } 251 | } 252 | ); 253 | } 254 | 255 | #[macro_export] 256 | macro_rules! t { 257 | ($e:expr $(,$c:expr)*) => { 258 | match $e { 259 | Ok(v) => v, 260 | Err(e) => { 261 | let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]); 262 | return Err($crate::PdfError::Try { file: file!(), line: line!(), column: column!(), context, source: e.into() }) 263 | } 264 | } 265 | }; 266 | } 267 | 268 | #[macro_export] 269 | macro_rules! ctx { 270 | ($e:expr, $($c:expr),*) => { 271 | match $e { 272 | Ok(v) => Ok(v), 273 | Err(e) => { 274 | let context = $crate::error::Context(vec![ $( (stringify!($c), format!("{:?}", $c) ) ),* ]); 275 | Err($crate::PdfError::TryContext { file: file!(), line: line!(), column: column!(), context, source: e.into() }) 276 | } 277 | } 278 | }; 279 | } 280 | 281 | macro_rules! err_from { 282 | ($($st:ty),* => $variant:ident) => ( 283 | $( 284 | impl From<$st> for PdfError { 285 | fn from(e: $st) -> PdfError { 286 | PdfError::$variant { source: e.into() } 287 | } 288 | } 289 | )* 290 | ) 291 | } 292 | err_from!(std::str::Utf8Error, std::string::FromUtf8Error, std::string::FromUtf16Error, 293 | istring::FromUtf8Error, istring::FromUtf8Error => Encoding); 294 | err_from!(std::num::ParseIntError, std::string::ParseError => Parse); 295 | err_from!(jpeg_decoder::Error => Jpeg); 296 | 297 | macro_rules! other { 298 | ($($t:tt)*) => ($crate::PdfError::Other { msg: format!($($t)*) }) 299 | } 300 | 301 | macro_rules! err { 302 | ($e: expr) => ({ 303 | return Err($e); 304 | }) 305 | } 306 | macro_rules! bail { 307 | ($($t:tt)*) => { 308 | err!($crate::PdfError::Other { msg: format!($($t)*) }) 309 | } 310 | } 311 | macro_rules! unimplemented { 312 | () => (bail!("Unimplemented @ {}:{}", file!(), line!())) 313 | } 314 | 315 | #[cfg(not(feature = "dump"))] 316 | pub fn dump_data(_data: &[u8]) {} 317 | 318 | #[cfg(feature = "dump")] 319 | pub fn dump_data(data: &[u8]) { 320 | use std::io::Write; 321 | if let Some(path) = ::std::env::var_os("PDF_OUT") { 322 | let (mut file, path) = tempfile::Builder::new() 323 | .prefix("") 324 | .tempfile_in(path).unwrap() 325 | .keep().unwrap(); 326 | file.write_all(&data).unwrap(); 327 | info!("data written to {:?}", path); 328 | } else { 329 | info!("set PDF_OUT to an existing directory to dump stream data"); 330 | } 331 | } 332 | 333 | #[cfg(test)] 334 | mod tests { 335 | use super::PdfError; 336 | 337 | fn assert_send() {} 338 | 339 | fn assert_sync() {} 340 | 341 | #[test] 342 | fn error_is_send_and_sync() { 343 | // note that these checks happens at compile time, not when the test is run 344 | assert_send::(); 345 | assert_sync::(); 346 | } 347 | } 348 | -------------------------------------------------------------------------------- /pdf/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow(non_camel_case_types)] /* TODO temporary becaues of pdf_derive */ 2 | #![allow(unused_doc_comments)] // /* TODO temporary because of err.rs */ 3 | #![allow(clippy::len_zero, clippy::should_implement_trait, clippy::manual_map, clippy::from_over_into)] 4 | 5 | #[macro_use] extern crate pdf_derive; 6 | #[macro_use] extern crate snafu; 7 | #[macro_use] extern crate log; 8 | 9 | #[macro_use] 10 | pub mod error; 11 | pub mod object; 12 | pub mod xref; 13 | pub mod primitive; 14 | pub mod file; 15 | pub mod backend; 16 | pub mod content; 17 | pub mod parser; 18 | pub mod font; 19 | pub mod any; 20 | pub mod encoding; 21 | pub mod build; 22 | 23 | // mod content; 24 | pub mod enc; 25 | pub mod crypt; 26 | 27 | // pub use content::*; 28 | pub use crate::error::PdfError; 29 | -------------------------------------------------------------------------------- /pdf/src/macros.rs: -------------------------------------------------------------------------------- 1 | macro_rules! write_entry { 2 | ($out:expr, $key:tt, $val:expr) => { 3 | { 4 | $out.write(b" ")?; 5 | $key.serialize($out)?; 6 | $out.write(b" ")?; 7 | $val.serialize($out)?; 8 | $out.write(b"\n")?; 9 | } 10 | } 11 | } 12 | macro_rules! write_entrys { 13 | ($out:expr, $key:tt << $val:expr $(,)*) => { 14 | write_entry!($out, $key, $val); 15 | }; 16 | ($out:expr, $key:tt << $val:expr, $($rest:tt)*) => { 17 | { 18 | write_entry!($out, $key, $val); 19 | write_entrys!($out, $($rest)*); 20 | } 21 | }; 22 | ($out:expr, $key:tt ? << $val:expr $(,)*) => { 23 | match &$val { 24 | &Some(ref v) => write_entry!($out, $key, v), 25 | &None => {} 26 | } 27 | }; 28 | ($out:expr, $key:tt ? << $val:expr, $($rest:tt)*) => { 29 | { 30 | match &$val { 31 | &Some(ref v) => write_entry!($out, $key, v), 32 | &None => {} 33 | } 34 | write_entrys!($out, $($rest)*); 35 | } 36 | } 37 | } 38 | 39 | macro_rules! write_dict { 40 | ($out:expr, $($rest:tt)*) => { 41 | { 42 | write!($out, "<<\n")?; 43 | write_entrys!($out, $($rest)*); 44 | write!($out, ">>")?; 45 | } 46 | }; 47 | } 48 | 49 | 50 | -------------------------------------------------------------------------------- /pdf/src/object/color.rs: -------------------------------------------------------------------------------- 1 | use datasize::DataSize; 2 | use crate as pdf; 3 | use crate::object::*; 4 | use crate::error::*; 5 | 6 | #[derive(Object, Debug, DataSize, DeepClone, ObjectWrite)] 7 | pub struct IccInfo { 8 | #[pdf(key="N")] 9 | pub components: u32, 10 | 11 | #[pdf(key="Alternate")] 12 | pub alternate: Option>, 13 | 14 | #[pdf(key="Range")] 15 | pub range: Option>, 16 | 17 | #[pdf(key="Metadata")] 18 | pub metadata: Option>, 19 | } 20 | 21 | #[derive(Debug, Clone, DeepClone)] 22 | pub enum ColorSpace { 23 | DeviceGray, 24 | DeviceRGB, 25 | DeviceCMYK, 26 | DeviceN { names: Vec, alt: Box, tint: Function, attr: Option }, 27 | CalGray(Dictionary), 28 | CalRGB(Dictionary), 29 | CalCMYK(Dictionary), 30 | Indexed(Box, u8, Arc<[u8]>), 31 | Separation(Name, Box, Function), 32 | Icc(RcRef>), 33 | Pattern, 34 | Named(Name), 35 | Other(Vec) 36 | } 37 | impl DataSize for ColorSpace { 38 | const IS_DYNAMIC: bool = true; 39 | const STATIC_HEAP_SIZE: usize = 0; 40 | 41 | #[inline] 42 | fn estimate_heap_size(&self) -> usize { 43 | match *self { 44 | ColorSpace::DeviceGray | ColorSpace::DeviceRGB | ColorSpace::DeviceCMYK => 0, 45 | ColorSpace::DeviceN { ref names, ref alt, ref tint, ref attr } => { 46 | names.estimate_heap_size() + 47 | alt.estimate_heap_size() + 48 | tint.estimate_heap_size() + 49 | attr.estimate_heap_size() 50 | } 51 | ColorSpace::CalGray(ref d) | ColorSpace::CalRGB(ref d) | ColorSpace::CalCMYK(ref d) => { 52 | d.estimate_heap_size() 53 | } 54 | ColorSpace::Indexed(ref cs, _, ref data) => { 55 | cs.estimate_heap_size() + data.estimate_heap_size() 56 | } 57 | ColorSpace::Separation(ref name, ref cs, ref f) => { 58 | name.estimate_heap_size() + cs.estimate_heap_size() + f.estimate_heap_size() 59 | } 60 | ColorSpace::Icc(ref s) => s.estimate_heap_size(), 61 | ColorSpace::Pattern => 0, 62 | ColorSpace::Other(ref v) => v.estimate_heap_size(), 63 | ColorSpace::Named(ref n) => n.estimate_heap_size() 64 | } 65 | } 66 | } 67 | 68 | fn get_index(arr: &[Primitive], idx: usize) -> Result<&Primitive> { 69 | arr.get(idx).ok_or(PdfError::Bounds { index: idx, len: arr.len() }) 70 | } 71 | 72 | impl Object for ColorSpace { 73 | fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { 74 | ColorSpace::from_primitive_depth(p, resolve, 5) 75 | } 76 | } 77 | impl ColorSpace { 78 | fn from_primitive_depth(p: Primitive, resolve: &impl Resolve, depth: usize) -> Result { 79 | let p = p.resolve(resolve)?; 80 | 81 | if let Ok(name) = p.as_name() { 82 | let cs = match name { 83 | "DeviceGray" => ColorSpace::DeviceGray, 84 | "DeviceRGB" => ColorSpace::DeviceRGB, 85 | "DeviceCMYK" => ColorSpace::DeviceCMYK, 86 | "Pattern" => ColorSpace::Pattern, 87 | name => ColorSpace::Named(name.into()), 88 | }; 89 | return Ok(cs); 90 | } 91 | let arr = t!(p.into_array()); 92 | let typ_p = t!(get_index(&arr, 0)).clone().resolve(resolve)?; 93 | let typ = t!(typ_p.as_name()); 94 | 95 | if depth == 0 { 96 | bail!("ColorSpace base recursion"); 97 | } 98 | match typ { 99 | "Indexed" => { 100 | let base = Box::new(t!(ColorSpace::from_primitive_depth(t!(get_index(&arr, 1)).clone(), resolve, depth-1))); 101 | let hival = t!(t!(get_index(&arr, 2)).as_u8()); 102 | let lookup = match t!(get_index(&arr, 3)) { 103 | &Primitive::Reference(r) => resolve.resolve(r)?, 104 | p => p.clone() 105 | }; 106 | let lookup = match lookup { 107 | Primitive::String(string) => { 108 | let data: Vec = string.into_bytes().into(); 109 | data.into() 110 | } 111 | Primitive::Stream(stream) => { 112 | let s: Stream::<()> = Stream::from_stream(stream, resolve)?; 113 | t!(s.data(resolve)) 114 | }, 115 | p => return Err(PdfError::UnexpectedPrimitive { 116 | expected: "String or Stream", 117 | found: p.get_debug_name() 118 | }) 119 | }; 120 | Ok(ColorSpace::Indexed(base, hival, lookup)) 121 | } 122 | "Separation" => { 123 | let name = t!(t!(get_index(&arr, 1)).clone().into_name()); 124 | let alternate = Box::new(t!(ColorSpace::from_primitive_depth(t!(get_index(&arr, 2)).clone(), resolve, depth-1))); 125 | let tint = t!(Function::from_primitive(t!(get_index(&arr, 3)).clone(), resolve)); 126 | Ok(ColorSpace::Separation(name, alternate, tint)) 127 | } 128 | "ICCBased" => { 129 | let s = t!(RcRef::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)); 130 | Ok(ColorSpace::Icc(s)) 131 | } 132 | "DeviceN" => { 133 | let names = t!(Object::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)); 134 | let alt = t!(Object::from_primitive(t!(get_index(&arr, 2)).clone(), resolve)); 135 | let tint = t!(Function::from_primitive(t!(get_index(&arr, 3)).clone(), resolve)); 136 | let attr = arr.get(4).map(|p| Dictionary::from_primitive(p.clone(), resolve)).transpose()?; 137 | 138 | Ok(ColorSpace::DeviceN { names, alt, tint, attr}) 139 | } 140 | "CalGray" => { 141 | let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?; 142 | Ok(ColorSpace::CalGray(dict)) 143 | } 144 | "CalRGB" => { 145 | let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?; 146 | Ok(ColorSpace::CalRGB(dict)) 147 | } 148 | "CalCMYK" => { 149 | let dict = Dictionary::from_primitive(t!(get_index(&arr, 1)).clone(), resolve)?; 150 | Ok(ColorSpace::CalCMYK(dict)) 151 | } 152 | "Pattern" => { 153 | Ok(ColorSpace::Pattern) 154 | } 155 | _ => Ok(ColorSpace::Other(arr)) 156 | } 157 | } 158 | } 159 | impl ObjectWrite for ColorSpace { 160 | fn to_primitive(&self, update: &mut impl Updater) -> Result { 161 | match *self { 162 | ColorSpace::DeviceCMYK => Ok(Primitive::name("DeviceCMYK")), 163 | ColorSpace::DeviceRGB => Ok(Primitive::name("DeviceRGB")), 164 | ColorSpace::Indexed(ref base, hival, ref lookup) => { 165 | let base = base.to_primitive(update)?; 166 | let hival = Primitive::Integer(hival.into()); 167 | let lookup = if lookup.len() < 100 { 168 | PdfString::new((**lookup).into()).into() 169 | } else { 170 | Stream::new((), lookup.clone()).to_primitive(update)? 171 | }; 172 | Ok(Primitive::Array(vec![Primitive::name("Indexed"), base, hival, lookup])) 173 | } 174 | ref p => { 175 | dbg!(p); 176 | unimplemented!() 177 | } 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /pdf/src/object/function.rs: -------------------------------------------------------------------------------- 1 | use crate as pdf; 2 | use crate::object::*; 3 | use crate::error::*; 4 | use itertools::izip; 5 | use datasize::DataSize; 6 | 7 | #[derive(Object, Debug, Clone, ObjectWrite)] 8 | struct RawFunction { 9 | #[pdf(key="FunctionType")] 10 | function_type: u32, 11 | 12 | #[pdf(key="Domain")] 13 | domain: Vec, 14 | 15 | #[pdf(key="Range")] 16 | range: Option>, 17 | 18 | #[pdf(key="Size")] 19 | size: Option>, 20 | 21 | #[pdf(key="BitsPerSample")] 22 | _bits_per_sample: Option, 23 | 24 | #[pdf(key="Order", default="1")] 25 | order: u32, 26 | 27 | #[pdf(key="Encode")] 28 | encode: Option>, 29 | 30 | #[pdf(key="Decode")] 31 | decode: Option>, 32 | 33 | #[pdf(other)] 34 | other: Dictionary 35 | } 36 | 37 | #[derive(Object, Debug, Clone)] 38 | struct Function2 { 39 | #[pdf(key="C0")] 40 | c0: Option>, 41 | 42 | #[pdf(key="C1")] 43 | c1: Option>, 44 | 45 | #[pdf(key="N")] 46 | exponent: f32, 47 | } 48 | 49 | #[derive(Debug, Clone, DataSize)] 50 | pub enum Function { 51 | Sampled(SampledFunction), 52 | Interpolated(Vec), 53 | Stiching, 54 | Calculator, 55 | PostScript { func: PsFunc, domain: Vec, range: Vec }, 56 | } 57 | impl Function { 58 | pub fn apply(&self, x: &[f32], out: &mut [f32]) -> Result<()> { 59 | match *self { 60 | Function::Sampled(ref func) => { 61 | func.apply(x, out) 62 | } 63 | Function::Interpolated(ref parts) => { 64 | if parts.len() != out.len() { 65 | bail!("incorrect output length: expected {}, found {}.", parts.len(), out.len()) 66 | } 67 | for (f, y) in parts.iter().zip(out) { 68 | *y = f.apply(x[0]); 69 | } 70 | Ok(()) 71 | } 72 | Function::PostScript { ref func, .. } => func.exec(x, out), 73 | _ => bail!("unimplemted function {:?}", self) 74 | } 75 | } 76 | pub fn input_dim(&self) -> usize { 77 | match *self { 78 | Function::PostScript { ref domain, .. } => domain.len() / 2, 79 | Function::Sampled(ref f) => f.input.len(), 80 | _ => panic!() 81 | } 82 | } 83 | pub fn output_dim(&self) -> usize { 84 | match *self { 85 | Function::PostScript { ref range, .. } => range.len() / 2, 86 | Function::Sampled(ref f) => f.output.len(), 87 | _ => panic!() 88 | } 89 | } 90 | } 91 | impl FromDict for Function { 92 | fn from_dict(dict: Dictionary, resolve: &impl Resolve) -> Result { 93 | use std::f32::INFINITY; 94 | let raw = RawFunction::from_dict(dict, resolve)?; 95 | match raw.function_type { 96 | 2 => { 97 | let f2 = Function2::from_dict(raw.other, resolve)?; 98 | 99 | let n_dim = match (raw.range.as_ref(), f2.c0.as_ref(), f2.c1.as_ref()) { 100 | (Some(range), _, _) => range.len() / 2, 101 | (_, Some(c0), _) => c0.len(), 102 | (_, _, Some(c1)) => c1.len(), 103 | _ => bail!("unknown dimensions") 104 | }; 105 | let mut parts = Vec::with_capacity(n_dim); 106 | let input_range = (raw.domain[0], raw.domain[1]); 107 | for dim in 0 .. n_dim { 108 | let output_range = ( 109 | raw.range.as_ref().and_then(|r| r.get(2*dim).cloned()).unwrap_or(-INFINITY), 110 | raw.range.as_ref().and_then(|r| r.get(2*dim+1).cloned()).unwrap_or(INFINITY) 111 | ); 112 | let c0 = f2.c0.as_ref().and_then(|c0| c0.get(dim).cloned()).unwrap_or(0.0); 113 | let c1 = f2.c1.as_ref().and_then(|c1| c1.get(dim).cloned()).unwrap_or(1.0); 114 | let exponent = f2.exponent; 115 | parts.push(InterpolatedFunctionDim { 116 | input_range, output_range, c0, c1, exponent 117 | }); 118 | } 119 | Ok(Function::Interpolated(parts)) 120 | }, 121 | i => { 122 | dbg!(raw); 123 | bail!("unsupported function type {}", i) 124 | } 125 | } 126 | } 127 | } 128 | impl Object for Function { 129 | fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { 130 | match p { 131 | Primitive::Dictionary(dict) => Self::from_dict(dict, resolve), 132 | Primitive::Stream(s) => { 133 | let stream = Stream::::from_stream(s, resolve)?; 134 | let data = stream.data(resolve)?; 135 | match stream.info.function_type { 136 | 4 => { 137 | let s = std::str::from_utf8(&data)?; 138 | let func = PsFunc::parse(s)?; 139 | let info = stream.info.info; 140 | Ok(Function::PostScript { func, domain: info.domain, range: info.range.unwrap() }) 141 | }, 142 | 0 => { 143 | let info = stream.info.info; 144 | let order = match info.order { 145 | 1 => Interpolation::Linear, 146 | 3 => Interpolation::Cubic, 147 | n => bail!("Invalid interpolation order {}", n), 148 | }; 149 | 150 | let size = try_opt!(info.size); 151 | let range = try_opt!(info.range); 152 | let encode = info.encode.unwrap_or_else(|| size.iter().flat_map(|&n| [0.0, (n-1) as f32]).collect()); 153 | let decode = info.decode.unwrap_or_else(|| range.clone()); 154 | 155 | Ok(Function::Sampled(SampledFunction { 156 | input: izip!(info.domain.chunks_exact(2), encode.chunks_exact(2), size.iter()).map(|(c, e, &s)| { 157 | SampledFunctionInput { 158 | domain: (c[0], c[1]), 159 | encode_offset: e[0], 160 | encode_scale: e[1], 161 | size: s as usize, 162 | } 163 | }).collect(), 164 | output: decode.chunks_exact(2).map(|c| SampledFunctionOutput { 165 | offset: c[0], 166 | scale: (c[1] - c[0]) / 255., 167 | }).collect(), 168 | data, 169 | order, 170 | range, 171 | })) 172 | } 173 | ref p => bail!("found a function stream with type {:?}", p) 174 | } 175 | }, 176 | Primitive::Reference(r) => Self::from_primitive(resolve.resolve(r)?, resolve), 177 | _ => bail!("double indirection") 178 | } 179 | } 180 | } 181 | impl ObjectWrite for Function { 182 | fn to_primitive(&self, update: &mut impl Updater) -> Result { 183 | unimplemented!() 184 | /* 185 | let dict = match self { 186 | Function::Interpolated(parts) => { 187 | let first: &InterpolatedFunctionDim = try_opt!(parts.get(0)); 188 | let f2 = Function2 { 189 | c0: parts.iter().map(|p| p.c0).collect(), 190 | c1: parts.iter().map(|p| p.c0).collect(), 191 | exponent: first.exponent 192 | }; 193 | let f = RawFunction { 194 | function_type: 2, 195 | domain: vec![first.input_range.0, first.input_range.1], 196 | range: parts.iter().flat_map(|p| [p.output_range.0, p.output_range.1]).collect(), 197 | decode: None, 198 | encode: None, 199 | order 200 | }; 201 | 202 | } 203 | } 204 | */ 205 | } 206 | } 207 | impl DeepClone for Function { 208 | fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { 209 | Ok(self.clone()) 210 | } 211 | } 212 | 213 | #[derive(Debug, Clone, DataSize)] 214 | struct SampledFunctionInput { 215 | domain: (f32, f32), 216 | encode_offset: f32, 217 | encode_scale: f32, 218 | size: usize, 219 | } 220 | impl SampledFunctionInput { 221 | fn map(&self, x: f32) -> (usize, usize, f32) { 222 | let x = x.clamp(self.domain.0, self.domain.1); 223 | let y = x.mul_add(self.encode_scale, self.encode_offset); 224 | (y.floor() as usize, self.size, y.fract()) 225 | } 226 | } 227 | 228 | #[derive(Debug, Clone, DataSize)] 229 | struct SampledFunctionOutput { 230 | offset: f32, 231 | scale: f32 232 | } 233 | impl SampledFunctionOutput { 234 | fn map(&self, x: f32) -> f32 { 235 | x.mul_add(self.scale, self.offset) 236 | } 237 | } 238 | 239 | #[derive(Debug, Clone, DataSize)] 240 | enum Interpolation { 241 | Linear, 242 | #[allow(dead_code)] // TODO 243 | Cubic, 244 | } 245 | 246 | #[derive(Debug, Clone, DataSize)] 247 | pub struct SampledFunction { 248 | input: Vec, 249 | output: Vec, 250 | data: Arc<[u8]>, 251 | order: Interpolation, 252 | range: Vec, 253 | } 254 | impl SampledFunction { 255 | fn apply(&self, x: &[f32], out: &mut [f32]) -> Result<()> { 256 | if x.len() != self.input.len() { 257 | bail!("input dimension mismatch {} != {}", x.len(), self.input.len()); 258 | } 259 | let n_out = out.len(); 260 | if out.len() * 2 != self.range.len() { 261 | bail!("output dimension mismatch 2 * {} != {}", out.len(), self.range.len()) 262 | } 263 | match x.len() { 264 | 1 => { 265 | match self.order { 266 | Interpolation::Linear => { 267 | let (i, _, s) = self.input[0].map(x[0]); 268 | let idx = i * n_out; 269 | 270 | for (o, &a) in out.iter_mut().zip(&self.data[idx..]) { 271 | *o = a as f32 * (1. - s); 272 | } 273 | for (o, &b) in out.iter_mut().zip(&self.data[idx + n_out..]) { 274 | *o += b as f32 * s; 275 | } 276 | } 277 | _ => unimplemented!() 278 | } 279 | } 280 | 2 => match self.order { 281 | Interpolation::Linear => { 282 | let (i0, s0, f0) = self.input[0].map(x[0]); 283 | let (i1, _, f1) = self.input[1].map(x[1]); 284 | let (j0, j1) = (i0+1, i1+1); 285 | let (g0, g1) = (1. - f0, 1. - f1); 286 | 287 | out.fill(0.0); 288 | let mut add = |i0, i1, f| { 289 | let idx = (i0 + s0 * i1) * n_out; 290 | 291 | if let Some(part) = self.data.get(idx .. idx+n_out) { 292 | for (o, &b) in out.iter_mut().zip(part) { 293 | *o += f * b as f32; 294 | } 295 | } 296 | }; 297 | 298 | add(i0, i1, g0 * g1); 299 | add(j0, i1, f0 * g1); 300 | add(i0, j1, g0 * f1); 301 | add(j0, j1, f0 * f1); 302 | } 303 | _ => unimplemented!() 304 | } 305 | 3 => match self.order { 306 | Interpolation::Linear => { 307 | let (i0, s0, f0) = self.input[0].map(x[0]); 308 | let (i1, s1, f1) = self.input[1].map(x[1]); 309 | let (i2, _, f2) = self.input[2].map(x[2]); 310 | let (j0, j1, j2) = (i0+1, i1+1, i2+1); 311 | let (g0, g1, g2) = (1. - f0, 1. - f1, 1. - f2); 312 | 313 | out.fill(0.0); 314 | let mut add = |i0, i1, i2, f| { 315 | let idx = (i0 + s0 * (i1 + s1 * i2)) * n_out; 316 | 317 | if let Some(part) = self.data.get(idx .. idx+n_out) { 318 | for (o, &b) in out.iter_mut().zip(part) { 319 | *o += f * b as f32; 320 | } 321 | } 322 | }; 323 | 324 | add(i0, i1, i2, g0 * g1 * g2); 325 | add(j0, i1, i2, f0 * g1 * g2); 326 | add(i0, j1, i2, g0 * f1 * g2); 327 | add(j0, j1, i2, f0 * f1 * g2); 328 | 329 | add(i0, i1, j2, g0 * g1 * f2); 330 | add(j0, i1, j2, f0 * g1 * f2); 331 | add(i0, j1, j2, g0 * f1 * f2); 332 | add(j0, j1, j2, f0 * f1 * f2); 333 | } 334 | _ => unimplemented!() 335 | } 336 | n => bail!("Order {}", n) 337 | } 338 | for (o, y) in self.output.iter().zip(out.iter_mut()) { 339 | *y = o.map(*y); 340 | } 341 | Ok(()) 342 | } 343 | } 344 | 345 | 346 | #[derive(Debug, Clone, DataSize)] 347 | pub struct InterpolatedFunctionDim { 348 | pub input_range: (f32, f32), 349 | pub output_range: (f32, f32), 350 | pub c0: f32, 351 | pub c1: f32, 352 | pub exponent: f32, 353 | } 354 | impl InterpolatedFunctionDim { 355 | pub fn apply(&self, x: f32) -> f32 { 356 | let y = self.c0 + x.powf(self.exponent) * (self.c1 - self.c0); 357 | let (y0, y1) = self.output_range; 358 | y.min(y1).max(y0) 359 | } 360 | } 361 | 362 | #[derive(Debug)] 363 | pub enum PostScriptError { 364 | StackUnderflow, 365 | IncorrectStackSize 366 | } 367 | #[derive(Debug, Clone, DataSize)] 368 | pub struct PsFunc { 369 | pub ops: Vec 370 | } 371 | 372 | macro_rules! op { 373 | ($stack:ident; $($v:ident),* => $($e:expr),*) => ( { 374 | $(let $v = $stack.pop().ok_or(PostScriptError::StackUnderflow)?;)* 375 | $($stack.push($e);)* 376 | } ) 377 | } 378 | 379 | impl PsFunc { 380 | fn exec_inner(&self, stack: &mut Vec) -> Result<(), PostScriptError> { 381 | for &op in &self.ops { 382 | match op { 383 | PsOp::Int(i) => stack.push(i as f32), 384 | PsOp::Value(v) => stack.push(v), 385 | PsOp::Dup => op!(stack; v => v, v), 386 | PsOp::Exch => op!(stack; b, a => b, a), 387 | PsOp::Add => op!(stack; b, a => a + b), 388 | PsOp::Sub => op!(stack; b, a => a - b), 389 | PsOp::Mul => op!(stack; b, a => a * b), 390 | PsOp::Abs => op!(stack; a => a.abs()), 391 | PsOp::Roll => { 392 | let j = stack.pop().ok_or(PostScriptError::StackUnderflow)? as isize; 393 | let n = stack.pop().ok_or(PostScriptError::StackUnderflow)? as usize; 394 | let start = stack.len() - n; 395 | let slice = &mut stack[start..]; 396 | if j > 0 { 397 | slice.rotate_right(j as usize); 398 | } else { 399 | slice.rotate_left(-j as usize); 400 | } 401 | } 402 | PsOp::Index => { 403 | let n = stack.pop().ok_or(PostScriptError::StackUnderflow)? as usize; 404 | if n >= stack.len() { return Err(PostScriptError::StackUnderflow); } 405 | let val = stack[stack.len() - n - 1]; 406 | stack.push(val); 407 | } 408 | PsOp::Cvr => {} 409 | PsOp::Pop => { 410 | stack.pop().ok_or(PostScriptError::StackUnderflow)?; 411 | } 412 | } 413 | } 414 | Ok(()) 415 | } 416 | pub fn exec(&self, input: &[f32], output: &mut [f32]) -> Result<()> { 417 | let mut stack = Vec::with_capacity(10); 418 | stack.extend_from_slice(input); 419 | match self.exec_inner(&mut stack) { 420 | Ok(()) => {}, 421 | Err(_) => return Err(PdfError::PostScriptExec) 422 | } 423 | if output.len() != stack.len() { 424 | bail!("incorrect output length: expected {}, found {}.", stack.len(), output.len()) 425 | } 426 | output.copy_from_slice(&stack); 427 | Ok(()) 428 | } 429 | pub fn parse(s: &str) -> Result { 430 | let start = s.find('{').ok_or(PdfError::PostScriptParse)?; 431 | let end = s.rfind('}').ok_or(PdfError::PostScriptParse)?; 432 | 433 | let ops: Result, _> = s[start + 1 .. end].split_ascii_whitespace().map(PsOp::parse).collect(); 434 | Ok(PsFunc { ops: ops? }) 435 | } 436 | } 437 | 438 | #[derive(Copy, Clone, Debug, DataSize)] 439 | pub enum PsOp { 440 | Int(i32), 441 | Value(f32), 442 | Add, 443 | Sub, 444 | Abs, 445 | Mul, 446 | Dup, 447 | Exch, 448 | Roll, 449 | Index, 450 | Cvr, 451 | Pop, 452 | } 453 | impl PsOp { 454 | pub fn parse(s: &str) -> Result { 455 | if let Ok(i) = s.parse::() { 456 | Ok(PsOp::Int(i)) 457 | } else if let Ok(f) = s.parse::() { 458 | Ok(PsOp::Value(f)) 459 | } else { 460 | Ok(match s { 461 | "add" => PsOp::Add, 462 | "sub" => PsOp::Sub, 463 | "abs" => PsOp::Abs, 464 | "mul" => PsOp::Mul, 465 | "dup" => PsOp::Dup, 466 | "exch" => PsOp::Exch, 467 | "roll" => PsOp::Roll, 468 | "index" => PsOp::Index, 469 | "cvr" => PsOp::Cvr, 470 | "pop" => PsOp::Pop, 471 | _ => { 472 | bail!("unimplemented op {}", s); 473 | } 474 | }) 475 | } 476 | } 477 | } 478 | -------------------------------------------------------------------------------- /pdf/src/object/stream.rs: -------------------------------------------------------------------------------- 1 | use datasize::DataSize; 2 | 3 | use crate as pdf; 4 | use crate::object::*; 5 | use crate::primitive::*; 6 | use crate::error::*; 7 | use crate::parser::Lexer; 8 | use crate::enc::{StreamFilter, decode}; 9 | 10 | use std::ops::{Deref, Range}; 11 | use std::fmt; 12 | 13 | #[derive(Clone)] 14 | pub (crate) enum StreamData { 15 | Generated(Arc<[u8]>), 16 | Original(Range, PlainRef), 17 | } 18 | datasize::non_dynamic_const_heap_size!(StreamData, std::mem::size_of::()); 19 | 20 | /// Simple Stream object with only some additional entries from the stream dict (I). 21 | #[derive(Clone, DataSize)] 22 | pub struct Stream { 23 | pub info: StreamInfo, 24 | pub (crate) inner_data: StreamData, 25 | } 26 | impl Stream { 27 | pub fn from_stream(s: PdfStream, resolve: &impl Resolve) -> Result { 28 | let PdfStream {info, inner} = s; 29 | let info = StreamInfo::::from_primitive(Primitive::Dictionary (info), resolve)?; 30 | let inner_data = match inner { 31 | StreamInner::InFile { id, file_range } => StreamData::Original(file_range, id), 32 | StreamInner::Pending { data } => StreamData::Generated(data) 33 | }; 34 | Ok(Stream { info, inner_data }) 35 | } 36 | 37 | /// the data is not compressed. the specified filters are to be applied when compressing the data 38 | pub fn new_with_filters(i: I, data: impl Into>, filters: Vec) -> Stream { 39 | Stream { 40 | info: StreamInfo { 41 | filters, 42 | file: None, 43 | file_filters: Vec::new(), 44 | info: i 45 | }, 46 | inner_data: StreamData::Generated(data.into()), 47 | } 48 | } 49 | pub fn new(i: I, data: impl Into>) -> Stream { 50 | Stream { 51 | info: StreamInfo { 52 | filters: Vec::new(), 53 | file: None, 54 | file_filters: Vec::new(), 55 | info: i 56 | }, 57 | inner_data: StreamData::Generated(data.into()), 58 | } 59 | } 60 | /// the data is already compressed with the specified filters 61 | pub fn from_compressed(i: I, data: impl Into>, filters: Vec) -> Stream { 62 | Stream { 63 | info: StreamInfo { 64 | filters: filters.clone(), 65 | file: None, 66 | file_filters: Vec::new(), 67 | info: i 68 | }, 69 | inner_data: StreamData::Generated(data.into()), 70 | } 71 | } 72 | 73 | pub fn data(&self, resolve: &impl Resolve) -> Result> { 74 | match self.inner_data { 75 | StreamData::Generated(ref data) => { 76 | let filters = &self.info.filters; 77 | if filters.len() == 0 { 78 | Ok(data.clone()) 79 | } else { 80 | use std::borrow::Cow; 81 | let mut data: Cow<[u8]> = (&**data).into(); 82 | for filter in filters { 83 | data = t!(decode(&data, filter), filter).into(); 84 | } 85 | Ok(data.into()) 86 | } 87 | } 88 | StreamData::Original(ref file_range, id) => { 89 | resolve.get_data_or_decode(id, file_range.clone(), &self.info.filters) 90 | } 91 | } 92 | } 93 | 94 | pub fn len(&self) -> usize { 95 | match self.inner_data { 96 | StreamData::Generated(ref data) => data.len(), 97 | StreamData::Original(ref range, _) => range.len() 98 | } 99 | } 100 | } 101 | 102 | impl fmt::Debug for Stream { 103 | fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { 104 | write!(f, "Stream info={:?}, len={}", self.info.info, self.len()) 105 | } 106 | } 107 | 108 | impl Object for Stream { 109 | /// Convert primitive to Self 110 | fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { 111 | let s = PdfStream::from_primitive(p, resolve)?; 112 | Stream::from_stream(s, resolve) 113 | } 114 | } 115 | impl Stream { 116 | pub fn to_pdf_stream(&self, update: &mut impl Updater) -> Result { 117 | let mut info = match self.info.info.to_primitive(update)? { 118 | Primitive::Dictionary(dict) => dict, 119 | Primitive::Null => Dictionary::new(), 120 | p => bail!("stream info has to be a dictionary (found {:?})", p) 121 | }; 122 | let mut params = None; 123 | if self.info.filters.len() > 0 { 124 | for f in self.info.filters.iter() { 125 | if let Some(para) = match f { 126 | StreamFilter::LZWDecode(ref p) => Some(p.to_primitive(update)?), 127 | StreamFilter::FlateDecode(ref p) => Some(p.to_primitive(update)?), 128 | StreamFilter::DCTDecode(ref p) => Some(p.to_primitive(update)?), 129 | StreamFilter::CCITTFaxDecode(ref p) => Some(p.to_primitive(update)?), 130 | StreamFilter::JBIG2Decode(ref p) => Some(p.to_primitive(update)?), 131 | _ => None 132 | } { 133 | assert!(params.is_none()); 134 | params = Some(para); 135 | } 136 | } 137 | let mut filters = self.info.filters.iter().map(|filter| match filter { 138 | StreamFilter::ASCIIHexDecode => "ASCIIHexDecode", 139 | StreamFilter::ASCII85Decode => "ASCII85Decode", 140 | StreamFilter::LZWDecode(ref _p) => "LZWDecode", 141 | StreamFilter::FlateDecode(ref _p) => "FlateDecode", 142 | StreamFilter::JPXDecode => "JPXDecode", 143 | StreamFilter::DCTDecode(ref _p) => "DCTDecode", 144 | StreamFilter::CCITTFaxDecode(ref _p) => "CCITTFaxDecode", 145 | StreamFilter::JBIG2Decode(ref _p) => "JBIG2Decode", 146 | StreamFilter::Crypt => "Crypt", 147 | StreamFilter::RunLengthDecode => "RunLengthDecode", 148 | }) 149 | .map(|s| Primitive::Name(s.into())); 150 | match self.info.filters.len() { 151 | 0 => {}, 152 | 1 => { 153 | info.insert("Filter", filters.next().unwrap().to_primitive(update)?); 154 | } 155 | _ => { 156 | info.insert("Filter", Primitive::array::(filters, update)?); 157 | } 158 | } 159 | } 160 | if let Some(para) = params { 161 | info.insert("DecodeParms", para); 162 | } 163 | 164 | let inner = match self.inner_data { 165 | StreamData::Generated(ref data) => { 166 | info.insert("Length", Primitive::Integer(data.len() as _)); 167 | StreamInner::Pending { data: data.clone() } 168 | }, 169 | StreamData::Original(ref file_range, id) => { 170 | info.insert("Length", Primitive::Integer(file_range.len() as _)); 171 | StreamInner::InFile { id, file_range: file_range.clone() } 172 | } 173 | }; 174 | 175 | Ok(PdfStream { info, inner }) 176 | } 177 | } 178 | impl ObjectWrite for Stream { 179 | fn to_primitive(&self, update: &mut impl Updater) -> Result { 180 | self.to_pdf_stream(update).map(Primitive::Stream) 181 | } 182 | } 183 | impl DeepClone for Stream { 184 | fn deep_clone(&self, cloner: &mut impl Cloner) -> Result { 185 | let data = match self.inner_data { 186 | StreamData::Generated(ref data) => data.clone(), 187 | StreamData::Original(ref range, id) => cloner.stream_data(id, range.clone())? 188 | }; 189 | Ok(Stream { 190 | info: self.info.deep_clone(cloner)?, 191 | inner_data: StreamData::Generated(data), 192 | }) 193 | } 194 | } 195 | impl Deref for Stream { 196 | type Target = StreamInfo; 197 | fn deref(&self) -> &StreamInfo { 198 | &self.info 199 | } 200 | } 201 | 202 | 203 | /// General stream type. `I` is the additional information to be read from the stream dict. 204 | #[derive(Debug, Clone, DataSize, DeepClone)] 205 | pub struct StreamInfo { 206 | // General dictionary entries 207 | /// Filters that the `data` is currently encoded with (corresponds to both `/Filter` and 208 | /// `/DecodeParms` in the PDF specs), constructed in `from_primitive()`. 209 | pub filters: Vec, 210 | 211 | /// Eventual file containing the stream contentst 212 | pub file: Option, 213 | /// Filters to apply to external file specified in `file`. 214 | pub file_filters: Vec, 215 | 216 | // TODO: 217 | /* 218 | /// Filters to apply to external file specified in `file`. 219 | #[pdf(key="FFilter")] 220 | file_filters: Vec, 221 | #[pdf(key="FDecodeParms")] 222 | file_decode_parms: Vec, 223 | /// Number of bytes in the decoded stream 224 | #[pdf(key="DL")] 225 | dl: Option, 226 | */ 227 | // Specialized dictionary entries 228 | pub info: I, 229 | } 230 | 231 | impl Deref for StreamInfo { 232 | type Target = I; 233 | fn deref(&self) -> &I { 234 | &self.info 235 | } 236 | } 237 | 238 | impl Default for StreamInfo { 239 | fn default() -> StreamInfo { 240 | StreamInfo { 241 | filters: Vec::new(), 242 | file: None, 243 | file_filters: Vec::new(), 244 | info: I::default(), 245 | } 246 | } 247 | } 248 | impl StreamInfo { 249 | /* 250 | /// If the stream is not encoded, this is a no-op. `decode()` should be called whenever it's uncertain 251 | /// whether the stream is encoded. 252 | pub fn encode(&mut self, _filter: StreamFilter) { 253 | // TODO this should add the filter to `self.filters` and encode the data with the given 254 | // filter 255 | unimplemented!(); 256 | }*/ 257 | pub fn get_filters(&self) -> &[StreamFilter] { 258 | &self.filters 259 | } 260 | } 261 | impl Object for StreamInfo { 262 | fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { 263 | let mut dict = Dictionary::from_primitive(p, resolve)?; 264 | 265 | let _length = usize::from_primitive( 266 | dict.remove("Length").ok_or(PdfError::MissingEntry{ typ: "StreamInfo", field: "Length".into() })?, 267 | resolve)?; 268 | 269 | let filters = Vec::::from_primitive( 270 | dict.remove("Filter").unwrap_or(Primitive::Null), 271 | resolve)?; 272 | 273 | let decode_params = Vec::>::from_primitive( 274 | dict.remove("DecodeParms").unwrap_or(Primitive::Null), 275 | resolve)?; 276 | 277 | let file = Option::::from_primitive( 278 | dict.remove("F").unwrap_or(Primitive::Null), 279 | resolve)?; 280 | 281 | let file_filters = Vec::::from_primitive( 282 | dict.remove("FFilter").unwrap_or(Primitive::Null), 283 | resolve)?; 284 | 285 | let file_decode_params = Vec::::from_primitive( 286 | dict.remove("FDecodeParms").unwrap_or(Primitive::Null), 287 | resolve)?; 288 | 289 | 290 | let mut new_filters = Vec::new(); 291 | let mut new_file_filters = Vec::new(); 292 | 293 | for (i, filter) in filters.iter().enumerate() { 294 | let params = match decode_params.get(i) { 295 | Some(Some(params)) => params.clone(), 296 | _ => Dictionary::default(), 297 | }; 298 | new_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?); 299 | } 300 | for (i, filter) in file_filters.iter().enumerate() { 301 | let params = match file_decode_params.get(i) { 302 | Some(params) => params.clone(), 303 | None => Dictionary::default(), 304 | }; 305 | new_file_filters.push(StreamFilter::from_kind_and_params(filter, params, resolve)?); 306 | } 307 | 308 | Ok(StreamInfo { 309 | // General 310 | filters: new_filters, 311 | file, 312 | file_filters: new_file_filters, 313 | // Special 314 | info: T::from_primitive(Primitive::Dictionary (dict), resolve)?, 315 | }) 316 | } 317 | } 318 | 319 | #[derive(Object, Default, Debug, DataSize)] 320 | #[pdf(Type = "ObjStm")] 321 | pub struct ObjStmInfo { 322 | #[pdf(key = "N")] 323 | /// Number of compressed objects in the stream. 324 | pub num_objects: usize, 325 | 326 | #[pdf(key = "First")] 327 | /// The byte offset in the decoded stream, of the first compressed object. 328 | pub first: usize, 329 | 330 | #[pdf(key = "Extends")] 331 | /// A reference to an eventual ObjectStream which this ObjectStream extends. 332 | pub extends: Option>>, 333 | } 334 | 335 | #[derive(DataSize)] 336 | pub struct ObjectStream { 337 | /// Byte offset of each object. Index is the object number. 338 | offsets: Vec, 339 | /// The object number of this object. 340 | _id: ObjNr, 341 | 342 | inner: Stream 343 | } 344 | 345 | impl Object for ObjectStream { 346 | fn from_primitive(p: Primitive, resolve: &impl Resolve) -> Result { 347 | let stream: Stream = Stream::from_primitive(p, resolve)?; 348 | 349 | let mut offsets = Vec::new(); 350 | { 351 | debug!("parsing stream"); 352 | let data = stream.data(resolve)?; 353 | let mut lexer = Lexer::new(&data); 354 | for _ in 0..(stream.info.num_objects as ObjNr) { 355 | let _obj_nr = lexer.next()?.to::()?; 356 | let offset = lexer.next()?.to::()?; 357 | offsets.push(offset); 358 | } 359 | } 360 | 361 | Ok(ObjectStream { 362 | offsets, 363 | _id: 0, // TODO 364 | inner: stream 365 | }) 366 | } 367 | } 368 | 369 | impl ObjectStream { 370 | pub fn get_object_slice(&self, index: usize, resolve: &impl Resolve) -> Result<(Arc<[u8]>, Range)> { 371 | if index >= self.offsets.len() { 372 | err!(PdfError::ObjStmOutOfBounds {index, max: self.offsets.len()}); 373 | } 374 | let start = self.inner.info.first + self.offsets[index]; 375 | let data = self.inner.data(resolve)?; 376 | let end = if index == self.offsets.len() - 1 { 377 | data.len() 378 | } else { 379 | self.inner.info.first + self.offsets[index + 1] 380 | }; 381 | 382 | Ok((data, start..end)) 383 | } 384 | /// Returns the number of contained objects 385 | pub fn n_objects(&self) -> usize { 386 | self.offsets.len() 387 | } 388 | pub fn _data(&self, resolve: &impl Resolve) -> Result> { 389 | self.inner.data(resolve) 390 | } 391 | } 392 | -------------------------------------------------------------------------------- /pdf/src/parser/lexer/mod.rs: -------------------------------------------------------------------------------- 1 | /// Lexing an input file, in the sense of breaking it up into substrings based on delimiters and 2 | /// whitespace. 3 | 4 | use std::str::FromStr; 5 | use std::ops::{Range, Deref, RangeFrom}; 6 | use std::borrow::Cow; 7 | 8 | use crate::error::*; 9 | use crate::primitive::Name; 10 | 11 | mod str; 12 | pub use self::str::{StringLexer, HexStringLexer}; 13 | 14 | 15 | /// `Lexer` has functionality to jump around and traverse the PDF lexemes of a string in any direction. 16 | #[derive(Copy, Clone)] 17 | #[allow(dead_code)] 18 | pub struct Lexer<'a> { 19 | pos: usize, 20 | buf: &'a [u8], 21 | file_offset: usize, 22 | } 23 | 24 | // find the position where condition(data[pos-1]) == false and condition(data[pos]) == true 25 | #[inline] 26 | fn boundary_rev(data: &[u8], pos: usize, condition: impl Fn(u8) -> bool) -> usize { 27 | match data[.. pos].iter().rposition(|&b| !condition(b)) { 28 | Some(start) => start + 1, 29 | None => 0 30 | } 31 | } 32 | 33 | // find the position where condition(data[pos-1]) == true and condition(data[pos]) == false 34 | #[inline] 35 | fn boundary(data: &[u8], pos: usize, condition: impl Fn(u8) -> bool) -> usize { 36 | match data[pos ..].iter().position(|&b| !condition(b)) { 37 | Some(start) => pos + start, 38 | None => data.len() 39 | } 40 | } 41 | 42 | #[inline] 43 | fn is_whitespace(b: u8) -> bool { 44 | matches!(b, 0 | b' ' | b'\r' | b'\n' | b'\t') 45 | } 46 | #[inline] 47 | fn not(f: impl Fn(T) -> bool) -> impl Fn(T) -> bool { 48 | move |t| !f(t) 49 | } 50 | impl<'a> Lexer<'a> { 51 | pub fn new(buf: &'a [u8]) -> Lexer<'a> { 52 | Lexer { 53 | pos: 0, 54 | buf, 55 | file_offset: 0 56 | } 57 | } 58 | pub fn with_offset(buf: &'a [u8], file_offset: usize) -> Lexer<'a> { 59 | Lexer { 60 | pos: 0, 61 | buf, 62 | file_offset 63 | } 64 | } 65 | 66 | /// Returns next lexeme. Lexer moves to the next byte after the lexeme. (needs to be tested) 67 | #[allow(clippy::should_implement_trait)] 68 | pub fn next(&mut self) -> Result> { 69 | let (lexeme, pos) = self.next_word()?; 70 | self.pos = pos; 71 | Ok(lexeme) 72 | } 73 | 74 | /// consume the whitespace sequence following the stream start 75 | pub fn next_stream(&mut self) -> Result<()> { 76 | let pos = self.skip_whitespace(self.pos)?; 77 | if !self.buf[pos ..].starts_with(b"stream") { 78 | // bail!("next token isn't 'stream'"); 79 | } 80 | 81 | let &b0 = self.buf.get(pos + 6).ok_or(PdfError::EOF)?; 82 | if b0 == b'\n' { 83 | self.pos = pos + 7; 84 | } else if b0 == b'\r' { 85 | let &b1 = self.buf.get(pos + 7).ok_or(PdfError::EOF)?; 86 | if b1 != b'\n' { 87 | bail!("invalid whitespace following 'stream'"); 88 | // bail!("invalid whitespace following 'stream'"); 89 | } 90 | self.pos = pos + 8; 91 | } else { 92 | bail!("invalid whitespace"); 93 | } 94 | Ok(()) 95 | } 96 | /// Gives previous lexeme. Lexer moves to the first byte of this lexeme. (needs to be tested) 97 | pub fn back(&mut self) -> Result> { 98 | //println!("back: {:?}", String::from_utf8_lossy(&self.buf[self.pos.saturating_sub(20) .. self.pos])); 99 | 100 | // first reverse until we find non-whitespace 101 | let end_pos = boundary_rev(self.buf, self.pos, is_whitespace); 102 | let start_pos = boundary_rev(self.buf, end_pos, not(is_whitespace)); 103 | self.pos = start_pos; 104 | 105 | Ok(self.new_substr(start_pos .. end_pos)) 106 | } 107 | 108 | /// Look at the next lexeme. Will return empty substr if the next character is EOF. 109 | pub fn peek(&self) -> Result> { 110 | match self.next_word() { 111 | Ok((substr, _)) => Ok(substr), 112 | Err(PdfError::EOF) => Ok(self.new_substr(self.pos..self.pos)), 113 | Err(e) => Err(e), 114 | } 115 | 116 | } 117 | 118 | /// Returns `Ok` if the next lexeme matches `expected` - else `Err`. 119 | pub fn next_expect(&mut self, expected: &'static str) -> Result<()> { 120 | let word = self.next()?; 121 | if word.equals(expected.as_bytes()) { 122 | Ok(()) 123 | } else { 124 | Err(PdfError::UnexpectedLexeme { 125 | pos: self.pos, 126 | lexeme: word.to_string(), 127 | expected 128 | }) 129 | } 130 | } 131 | 132 | /// skip whitespaces and return the position of the first non-whitespace character 133 | #[inline] 134 | fn skip_whitespace(&self, pos: usize) -> Result { 135 | // Move away from eventual whitespace 136 | let pos = boundary(self.buf, pos, is_whitespace); 137 | if pos >= self.buf.len() { 138 | Err(PdfError::EOF) 139 | } else { 140 | Ok(pos) 141 | } 142 | } 143 | 144 | /// Used by next, peek and back - returns substring and new position 145 | /// If forward, places pointer at the next non-whitespace character. 146 | /// If backward, places pointer at the start of the current word. 147 | // TODO ^ backward case is actually not tested or.. thought about that well. 148 | fn next_word(&self) -> Result<(Substr<'a>, usize)> { 149 | if self.pos == self.buf.len() { 150 | return Err(PdfError::EOF); 151 | } 152 | let mut pos = self.skip_whitespace(self.pos)?; 153 | while self.buf.get(pos) == Some(&b'%') { 154 | pos += 1; 155 | if let Some(off) = self.buf[pos..].iter().position(|&b| b == b'\n') { 156 | pos += off+1; 157 | } 158 | 159 | // Move away from eventual whitespace 160 | pos = self.skip_whitespace(pos)?; 161 | } 162 | 163 | let start_pos = pos; 164 | 165 | // If first character is delimiter, this lexeme only contains that character. 166 | // - except << and >> which go together, and / which marks the start of a 167 | // name token. 168 | if self.is_delimiter(pos) { 169 | if self.buf[pos] == b'/' { 170 | pos = self.advance_pos(pos)?; 171 | while !self.is_whitespace(pos) && !self.is_delimiter(pos) { 172 | match self.advance_pos(pos) { 173 | Ok(p) => pos = p, 174 | Err(_) => break, 175 | } 176 | } 177 | return Ok((self.new_substr(start_pos..pos), pos)); 178 | } 179 | 180 | if let Some(slice) = self.buf.get(pos..=pos+1) { 181 | if slice == b"<<" || slice == b">>" { 182 | pos = self.advance_pos(pos)?; 183 | } 184 | } 185 | 186 | pos = self.advance_pos(pos)?; 187 | return Ok((self.new_substr(start_pos..pos), pos)); 188 | } 189 | 190 | // Read to past the end of lexeme 191 | while !self.is_whitespace(pos) && !self.is_delimiter(pos) { 192 | match self.advance_pos(pos) { 193 | Ok(p) => pos = p, 194 | Err(_) => break, 195 | } 196 | } 197 | let result = self.new_substr(start_pos..pos); 198 | 199 | // Move away from whitespace again 200 | //pos = self.skip_whitespace(pos)?; 201 | Ok((result, pos)) 202 | } 203 | 204 | /// Just a helper for next_word. 205 | #[inline] 206 | fn advance_pos(&self, pos: usize) -> Result { 207 | if pos < self.buf.len() { 208 | Ok(pos + 1) 209 | } else { 210 | Err(PdfError::EOF) 211 | } 212 | } 213 | 214 | #[inline] 215 | pub fn next_as(&mut self) -> Result 216 | where T: FromStr, T::Err: std::error::Error + Send + Sync + 'static 217 | { 218 | self.next().and_then(|word| word.to::()) 219 | } 220 | 221 | #[inline] 222 | pub fn get_pos(&self) -> usize { 223 | self.pos 224 | } 225 | 226 | #[inline] 227 | pub fn new_substr(&self, mut range: Range) -> Substr<'a> { 228 | // if the range is backward, fix it 229 | // start is inclusive, end is exclusive. keep that in mind 230 | if range.start > range.end { 231 | let new_end = range.start + 1; 232 | range.start = range.end + 1; 233 | range.end = new_end; 234 | } 235 | 236 | Substr { 237 | file_offset: self.file_offset + range.start, 238 | slice: &self.buf[range], 239 | } 240 | } 241 | 242 | /// Just a helper function for set_pos, set_pos_from_end and offset_pos. 243 | #[inline] 244 | pub fn set_pos(&mut self, wanted_pos: usize) -> Substr<'a> { 245 | let new_pos = wanted_pos.min(self.buf.len()); 246 | let range = if self.pos < new_pos { 247 | self.pos..new_pos 248 | } else { 249 | new_pos..self.pos 250 | }; 251 | self.pos = new_pos; 252 | self.new_substr(range) 253 | } 254 | 255 | /// Returns the substr between the old and new positions 256 | #[inline] 257 | pub fn set_pos_from_end(&mut self, new_pos: usize) -> Substr<'a> { 258 | self.set_pos(self.buf.len().saturating_sub(new_pos).saturating_sub(1)) 259 | } 260 | /// Returns the substr between the old and new positions 261 | #[inline] 262 | pub fn offset_pos(&mut self, offset: usize) -> Substr<'a> { 263 | self.set_pos(self.pos.wrapping_add(offset)) 264 | } 265 | 266 | /// Moves pos to start of next line. Returns the skipped-over substring. 267 | #[allow(dead_code)] 268 | pub fn seek_newline(&mut self) -> Substr{ 269 | let start = self.pos; 270 | while self.buf[self.pos] != b'\n' 271 | && self.incr_pos() { } 272 | self.incr_pos(); 273 | 274 | self.new_substr(start..self.pos) 275 | } 276 | 277 | 278 | // TODO: seek_substr and seek_substr_back should use next() or back()? 279 | /// Moves pos to after the found `substr`. Returns Substr with traversed text if `substr` is found. 280 | #[allow(dead_code)] 281 | pub fn seek_substr(&mut self, substr: impl AsRef<[u8]>) -> Option> { 282 | // 283 | let substr = substr.as_ref(); 284 | let start = self.pos; 285 | let mut matched = 0; 286 | loop { 287 | if self.pos >= self.buf.len() { 288 | return None 289 | } 290 | if self.buf[self.pos] == substr[matched] { 291 | matched += 1; 292 | } else { 293 | matched = 0; 294 | } 295 | if matched == substr.len() { 296 | break; 297 | } 298 | self.pos += 1; 299 | } 300 | self.pos += 1; 301 | Some(self.new_substr(start..(self.pos - substr.len()))) 302 | } 303 | 304 | //TODO perhaps seek_substr_back should, like back(), move to the first letter of the substr. 305 | /// Searches for string backward. Moves to after the found `substr`, returns the traversed 306 | /// Substr if found. 307 | pub fn seek_substr_back(&mut self, substr: &[u8]) -> Result> { 308 | let end = self.pos; 309 | match self.buf[.. end].windows(substr.len()).rposition(|w| w == substr) { 310 | Some(start) => { 311 | self.pos = start + substr.len(); 312 | Ok(self.new_substr(self.pos .. end)) 313 | } 314 | None => Err(PdfError::NotFound {word: String::from_utf8_lossy(substr).into() }) 315 | } 316 | } 317 | 318 | /// Read and return slice of at most n bytes. 319 | #[allow(dead_code)] 320 | pub fn read_n(&mut self, n: usize) -> Substr<'a> { 321 | let start_pos = self.pos; 322 | self.pos += n; 323 | if self.pos >= self.buf.len() { 324 | self.pos = self.buf.len() - 1; 325 | } 326 | if start_pos < self.buf.len() { 327 | self.new_substr(start_pos..self.pos) 328 | } else { 329 | self.new_substr(0..0) 330 | } 331 | } 332 | 333 | /// Returns slice from current position to end. 334 | #[inline] 335 | pub fn get_remaining_slice(&self) -> &'a [u8] { 336 | &self.buf[self.pos..] 337 | } 338 | 339 | /// for debugging 340 | pub fn ctx(&self) -> Cow { 341 | String::from_utf8_lossy(&self.buf[self.pos.saturating_sub(40)..self.buf.len().min(self.pos+40)]) 342 | } 343 | 344 | #[inline] 345 | fn incr_pos(&mut self) -> bool { 346 | if self.pos >= self.buf.len() - 1 { 347 | false 348 | } else { 349 | self.pos += 1; 350 | true 351 | } 352 | } 353 | #[inline] 354 | fn is_whitespace(&self, pos: usize) -> bool { 355 | self.buf.get(pos).map(|&b| is_whitespace(b)).unwrap_or(false) 356 | } 357 | 358 | #[inline] 359 | fn is_delimiter(&self, pos: usize) -> bool { 360 | self.buf.get(pos).map(|b| b"()<>[]{}/%".contains(b)).unwrap_or(false) 361 | } 362 | 363 | } 364 | 365 | 366 | 367 | /// A slice from some original string - a lexeme. 368 | #[derive(Copy, Clone, Debug)] 369 | pub struct Substr<'a> { 370 | slice: &'a [u8], 371 | file_offset: usize, 372 | } 373 | impl<'a> Substr<'a> { 374 | pub fn new + ?Sized>(data: &'a T, file_offset: usize) -> Self { 375 | Substr { slice: data.as_ref(), file_offset } 376 | } 377 | // to: &S -> U. Possibly expensive conversion. 378 | // as: &S -> &U. Cheap borrow conversion 379 | // into: S -> U. Cheap ownership transfer conversion. 380 | 381 | #[allow(clippy::inherent_to_string)] 382 | pub fn to_string(&self) -> String { 383 | String::from_utf8_lossy(self.as_slice()).into() 384 | } 385 | pub fn to_name(&self) -> Result { 386 | Ok(Name(std::str::from_utf8(self.as_slice())?.into())) 387 | } 388 | pub fn to_vec(&self) -> Vec { 389 | self.slice.to_vec() 390 | } 391 | pub fn to(&self) -> Result 392 | where T: FromStr, T::Err: std::error::Error + Send + Sync + 'static 393 | { 394 | std::str::from_utf8(self.slice)?.parse::().map_err(|e| PdfError::Parse { source: e.into() }) 395 | } 396 | pub fn is_integer(&self) -> bool { 397 | if self.slice.len() == 0 { 398 | return false; 399 | } 400 | let mut slice = self.slice; 401 | if slice[0] == b'-' { 402 | if slice.len() < 2 { 403 | return false; 404 | } 405 | slice = &slice[1..]; 406 | } 407 | is_int(slice) 408 | } 409 | pub fn is_real_number(&self) -> bool { 410 | self.real_number().is_some() 411 | } 412 | pub fn real_number(&self) -> Option { 413 | if self.slice.len() == 0 { 414 | return None; 415 | } 416 | let mut slice = self.slice; 417 | if slice[0] == b'-' { 418 | if slice.len() < 2 { 419 | return None; 420 | } 421 | slice = &slice[1..]; 422 | } 423 | if let Some(i) = slice.iter().position(|&b| b == b'.') { 424 | if !is_int(&slice[..i]) { 425 | return None; 426 | } 427 | slice = &slice[i+1..]; 428 | } 429 | if let Some(len) = slice.iter().position(|&b| !b.is_ascii_digit()) { 430 | if len == 0 { 431 | return None; 432 | } 433 | let end = self.slice.len() - slice.len() + len; 434 | Some(Substr { 435 | file_offset: self.file_offset, 436 | slice: &self.slice[..end] 437 | }) 438 | } else { 439 | Some(*self) 440 | } 441 | } 442 | 443 | pub fn as_slice(&self) -> &'a [u8] { 444 | self.slice 445 | } 446 | pub fn as_str(&self) -> Result<&str> { 447 | std::str::from_utf8(self.slice).map_err(|e| PdfError::Parse { source: e.into() }) 448 | } 449 | 450 | pub fn equals(&self, other: impl AsRef<[u8]>) -> bool { 451 | self.slice == other.as_ref() 452 | } 453 | 454 | pub fn reslice(&self, range: RangeFrom) -> Substr<'a> { 455 | Substr { 456 | file_offset: self.file_offset + range.start, 457 | slice: &self.slice[range], 458 | } 459 | } 460 | 461 | pub fn file_range(&self) -> Range { 462 | self.file_offset .. self.file_offset + self.slice.len() 463 | } 464 | } 465 | 466 | #[inline] 467 | fn is_int(b: &[u8]) -> bool { 468 | b.iter().all(|&b| b.is_ascii_digit()) 469 | } 470 | impl<'a> Deref for Substr<'a> { 471 | type Target = [u8]; 472 | fn deref(&self) -> &[u8] { 473 | self.as_slice() 474 | } 475 | } 476 | impl<'a> PartialEq<&[u8]> for Substr<'a> { 477 | fn eq(&self, rhs: &&[u8]) -> bool { 478 | self.equals(rhs) 479 | } 480 | } 481 | 482 | impl<'a> PartialEq<&str> for Substr<'a> { 483 | fn eq(&self, rhs: &&str) -> bool { 484 | self.equals(rhs.as_bytes()) 485 | } 486 | } 487 | 488 | #[cfg(test)] 489 | mod tests { 490 | use super::*; 491 | 492 | #[test] 493 | fn test_boundary_rev() { 494 | assert_eq!(boundary_rev(b" hello", 3, not(is_whitespace)), 1); 495 | assert_eq!(boundary_rev(b" hello", 3, is_whitespace), 3); 496 | } 497 | 498 | #[test] 499 | fn test_boundary() { 500 | assert_eq!(boundary(b" hello ", 3, not(is_whitespace)), 6); 501 | assert_eq!(boundary(b" hello ", 3, is_whitespace), 3); 502 | assert_eq!(boundary(b"01234 7orld", 5, is_whitespace), 7); 503 | assert_eq!(boundary(b"01234 7orld", 7, is_whitespace), 7); 504 | assert_eq!(boundary(b"q\n", 1, is_whitespace), 2); 505 | } 506 | 507 | #[test] 508 | fn test_substr() { 509 | assert!(Substr::new("123", 0).is_real_number()); 510 | assert!(Substr::new("123.", 0).is_real_number()); 511 | assert!(Substr::new("123.45", 0).is_real_number()); 512 | assert!(Substr::new(".45", 0).is_real_number()); 513 | assert!(Substr::new("-.45", 0).is_real_number()); 514 | assert!(!Substr::new("123.45", 0).is_integer()); 515 | assert!(Substr::new("123", 0).is_integer()); 516 | } 517 | } 518 | -------------------------------------------------------------------------------- /pdf/src/parser/lexer/str.rs: -------------------------------------------------------------------------------- 1 | use std::iter::Iterator; 2 | use crate::error::*; 3 | 4 | /// A lexer for PDF strings. Breaks the string up into single characters (`u8`) 5 | /// It's also possible to get the number of indices of the original array that was traversed by the 6 | /// Iterator. 7 | /// 8 | /// ``` 9 | /// let mut string: Vec = Vec::new(); 10 | /// let bytes_traversed = { 11 | /// let mut string_lexer = StringLexer::new(lexer.get_remaining_slice()); 12 | /// for character in string_lexer.iter() { 13 | /// let character = character?; 14 | /// string.push(character); 15 | /// } 16 | /// string_lexer.get_offset() as i64 17 | /// }; 18 | /// // bytes_traversed now holds the number of bytes in the original array traversed. 19 | /// ``` 20 | /// 21 | 22 | #[derive(Clone)] 23 | pub struct StringLexer<'a> { 24 | pos: usize, // points to next byte 25 | nested: i32, // How far in () we are nested 26 | buf: &'a [u8], 27 | } 28 | 29 | impl<'a> StringLexer<'a> { 30 | /// `buf` should start right after the `(` delimiter, and may span all the way to EOF. StringLexer 31 | /// will determine the end of the string. 32 | pub fn new(buf: &'a [u8]) -> StringLexer<'a> { 33 | StringLexer { 34 | pos: 0, 35 | nested: 0, 36 | buf, 37 | } 38 | } 39 | pub fn iter<'b>(&'b mut self) -> StringLexerIter<'a, 'b> { 40 | StringLexerIter {lexer: self} 41 | } 42 | /// Get offset/pos from start of string 43 | pub fn get_offset(&self) -> usize { 44 | self.pos 45 | } 46 | 47 | /// (mostly just used by Iterator, but might be useful) 48 | pub fn next_lexeme(&mut self) -> Result> { 49 | let c = self.next_byte()?; 50 | match c { 51 | b'\\' => { 52 | let c = self.next_byte()?; 53 | Ok( 54 | match c { 55 | b'n' => Some(b'\n'), 56 | b'r' => Some(b'\r'), 57 | b't' => Some(b'\t'), 58 | b'b' => Some(b'\x08'), 59 | b'f' => Some(b'\x0c'), 60 | b'(' => Some(b'('), 61 | b')' => Some(b')'), 62 | b'\n' => { 63 | // ignore end-of-line marker 64 | if let Ok(b'\r') = self.peek_byte() { 65 | let _ = self.next_byte(); 66 | } 67 | self.next_lexeme()? 68 | } 69 | b'\r' => { 70 | // ignore end-of-line marker 71 | if let Ok(b'\n') = self.peek_byte() { 72 | let _ = self.next_byte(); 73 | } 74 | self.next_lexeme()? 75 | } 76 | b'\\' => Some(b'\\'), 77 | 78 | _ => { 79 | self.back()?; 80 | let _start = self.get_offset(); 81 | let mut char_code: u16 = 0; 82 | 83 | // A character code must follow. 1-3 numbers. 84 | for _ in 0..3 { 85 | let c = self.peek_byte()?; 86 | if (b'0'..=b'7').contains(&c) { 87 | self.next_byte()?; 88 | char_code = char_code * 8 + (c - b'0') as u16; 89 | } else { 90 | break; 91 | } 92 | } 93 | Some(char_code as u8) 94 | } 95 | } 96 | ) 97 | }, 98 | 99 | b'(' => { 100 | self.nested += 1; 101 | Ok(Some(b'(')) 102 | }, 103 | b')' => { 104 | self.nested -= 1; 105 | if self.nested < 0 { 106 | Ok(None) 107 | } else { 108 | Ok(Some(b')')) 109 | } 110 | }, 111 | 112 | c => Ok(Some(c)) 113 | 114 | } 115 | } 116 | 117 | fn next_byte(&mut self) -> Result { 118 | if self.pos < self.buf.len() { 119 | self.pos += 1; 120 | Ok(self.buf[self.pos-1]) 121 | } else { 122 | Err(PdfError::EOF) 123 | } 124 | } 125 | fn back(&mut self) -> Result<()> { 126 | if self.pos > 0 { 127 | self.pos -= 1; 128 | Ok(()) 129 | } else { 130 | Err(PdfError::EOF) 131 | } 132 | } 133 | fn peek_byte(&mut self) -> Result { 134 | if self.pos < self.buf.len() { 135 | Ok(self.buf[self.pos]) 136 | } else { 137 | Err(PdfError::EOF) 138 | } 139 | } 140 | } 141 | 142 | // "'a is valid for at least 'b" 143 | pub struct StringLexerIter<'a: 'b, 'b> { 144 | lexer: &'b mut StringLexer<'a>, 145 | } 146 | 147 | impl<'a, 'b> Iterator for StringLexerIter<'a, 'b> { 148 | type Item = Result; 149 | fn next(&mut self) -> Option> { 150 | match self.lexer.next_lexeme() { 151 | Err(e) => Some(Err(e)), 152 | Ok(Some(s)) => Some(Ok(s)), 153 | Ok(None) => None, 154 | } 155 | } 156 | } 157 | 158 | pub struct HexStringLexer<'a> { 159 | pos: usize, // points to next byte 160 | buf: &'a [u8], 161 | } 162 | 163 | impl<'a> HexStringLexer<'a> { 164 | /// `buf` should start right after the `<` delimiter, and may span all the way to EOF. 165 | /// HexStringLexer will determine the end of the string. 166 | pub fn new(buf: &'a [u8]) -> HexStringLexer<'a> { 167 | HexStringLexer { pos: 0, buf } 168 | } 169 | 170 | pub fn iter<'b>(&'b mut self) -> HexStringLexerIter<'a, 'b> { 171 | HexStringLexerIter { lexer: self } 172 | } 173 | 174 | /// Get offset/position from start of string 175 | pub fn get_offset(&self) -> usize { 176 | self.pos 177 | } 178 | 179 | fn next_non_whitespace_char(&mut self) -> Result { 180 | let mut byte = self.read_byte()?; 181 | while byte == b' ' || byte == b'\t' || byte == b'\n' || byte == b'\r' || byte == b'\x0c' { 182 | byte = self.read_byte()?; 183 | } 184 | Ok(byte) 185 | } 186 | 187 | pub fn next_hex_byte(&mut self) -> Result> { 188 | let c1 = self.next_non_whitespace_char()?; 189 | let high_nibble: u8 = match c1 { 190 | b'0' ..= b'9' => c1 - b'0', 191 | b'A' ..= b'F' => c1 - b'A' + 0xA, 192 | b'a' ..= b'f' => c1 - b'a' + 0xA, 193 | b'>' => return Ok(None), 194 | _ => return Err(PdfError::HexDecode { 195 | pos: self.pos, 196 | bytes: [c1, self.peek_byte().unwrap_or(0)] 197 | }), 198 | }; 199 | let c2 = self.next_non_whitespace_char()?; 200 | let low_nibble: u8 = match c2 { 201 | b'0' ..= b'9' => c2 - b'0', 202 | b'A' ..= b'F' => c2 - b'A' + 0xA, 203 | b'a' ..= b'f' => c2 - b'a' + 0xA, 204 | b'>' => { 205 | self.back()?; 206 | 0 207 | } 208 | _ => return Err(PdfError::HexDecode { 209 | pos: self.pos, 210 | bytes: [c1, c2] 211 | }), 212 | }; 213 | Ok(Some((high_nibble << 4) | low_nibble)) 214 | } 215 | 216 | fn read_byte(&mut self) -> Result { 217 | if self.pos < self.buf.len() { 218 | self.pos += 1; 219 | Ok(self.buf[self.pos - 1]) 220 | } else { 221 | Err(PdfError::EOF) 222 | } 223 | } 224 | 225 | fn back(&mut self) -> Result<()> { 226 | if self.pos > 0 { 227 | self.pos -= 1; 228 | Ok(()) 229 | } else { 230 | Err(PdfError::EOF) 231 | } 232 | } 233 | 234 | fn peek_byte(&mut self) -> Result { 235 | if self.pos < self.buf.len() { 236 | Ok(self.buf[self.pos]) 237 | } else { 238 | Err(PdfError::EOF) 239 | } 240 | } 241 | } 242 | 243 | pub struct HexStringLexerIter<'a: 'b, 'b> { 244 | lexer: &'b mut HexStringLexer<'a>, 245 | } 246 | 247 | impl<'a, 'b> Iterator for HexStringLexerIter<'a, 'b> { 248 | type Item = Result; 249 | 250 | fn next(&mut self) -> Option> { 251 | match self.lexer.next_hex_byte() { 252 | Err(e) => Some(Err(e)), 253 | Ok(Some(s)) => Some(Ok(s)), 254 | Ok(None) => None, 255 | } 256 | } 257 | } 258 | 259 | #[cfg(test)] 260 | mod tests { 261 | use crate::error::Result; 262 | use crate::parser::lexer::{HexStringLexer, StringLexer}; 263 | 264 | #[test] 265 | fn tests() { 266 | let vec = b"a\\nb\\rc\\td\\(f/)\\\\hei)"; 267 | let mut lexer = StringLexer::new(vec); 268 | let lexemes: Vec = lexer.iter().map(Result::unwrap).collect(); 269 | assert_eq!(lexemes, b"a\nb\rc\td(f/"); 270 | } 271 | 272 | #[test] 273 | fn string_split_lines() { 274 | { 275 | let data = b"These \\\ntwo strings \\\nare the same.)"; 276 | let mut lexer = StringLexer::new(data); 277 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 278 | assert_eq!(result, b"These two strings are the same."); 279 | } 280 | { 281 | let data = b"These \\\rtwo strings \\\rare the same.)"; 282 | let mut lexer = StringLexer::new(data); 283 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 284 | assert_eq!(result, b"These two strings are the same."); 285 | } 286 | { 287 | let data = b"These \\\r\ntwo strings \\\r\nare the same.)"; 288 | let mut lexer = StringLexer::new(data); 289 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 290 | assert_eq!(result, b"These two strings are the same."); 291 | } 292 | } 293 | 294 | #[test] 295 | fn octal_escape() { 296 | { 297 | let data = b"This string contains\\245two octal characters\\307.)"; 298 | let mut lexer = StringLexer::new(data); 299 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 300 | assert_eq!(result, &b"This string contains\xa5two octal characters\xc7."[..]); 301 | } 302 | { 303 | let data = b"\\0053)"; 304 | let mut lexer = StringLexer::new(data); 305 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 306 | assert_eq!(result, b"\x053"); 307 | } 308 | { 309 | let data = b"\\053)"; 310 | let mut lexer = StringLexer::new(data); 311 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 312 | assert_eq!(result, b"+"); 313 | } 314 | { 315 | let data = b"\\53)"; 316 | let mut lexer = StringLexer::new(data); 317 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 318 | assert_eq!(result, b"+"); 319 | } 320 | { 321 | // overflow is ignored 322 | let data = b"\\541)"; 323 | let mut lexer = StringLexer::new(data); 324 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 325 | assert_eq!(result, b"a"); 326 | } 327 | } 328 | 329 | #[test] 330 | fn hex_test() { 331 | let input = b"901FA3>"; 332 | let mut lexer = HexStringLexer::new(input); 333 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 334 | assert_eq!( 335 | result, 336 | vec![ 337 | b'\x90', 338 | b'\x1f', 339 | b'\xa3', 340 | ] 341 | ); 342 | 343 | let input = b"901FA>"; 344 | let mut lexer = HexStringLexer::new(input); 345 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 346 | assert_eq!( 347 | result, 348 | vec![ 349 | b'\x90', 350 | b'\x1f', 351 | b'\xa0', 352 | ] 353 | ); 354 | 355 | let input = b"1 9F\t5\r\n4\x0c62a>"; 356 | let mut lexer = HexStringLexer::new(input); 357 | let result: Vec = lexer.iter().map(Result::unwrap).collect(); 358 | assert_eq!( 359 | result, 360 | vec![ 361 | b'\x19', 362 | b'\xf5', 363 | b'\x46', 364 | b'\x2a', 365 | ] 366 | ); 367 | } 368 | } 369 | -------------------------------------------------------------------------------- /pdf/src/parser/mod.rs: -------------------------------------------------------------------------------- 1 | //! Basic functionality for parsing a PDF file. 2 | 3 | mod lexer; 4 | mod parse_object; 5 | mod parse_xref; 6 | 7 | pub use self::lexer::*; 8 | pub use self::parse_object::*; 9 | pub use self::parse_xref::*; 10 | 11 | use crate::error::*; 12 | use crate::primitive::StreamInner; 13 | use crate::primitive::{Primitive, Dictionary, PdfStream, PdfString}; 14 | use crate::object::{ObjNr, GenNr, PlainRef, Resolve}; 15 | use crate::crypt::Decoder; 16 | use bitflags::bitflags; 17 | use istring::{SmallBytes, SmallString, IBytes}; 18 | 19 | const MAX_DEPTH: usize = 20; 20 | 21 | 22 | bitflags! { 23 | #[repr(transparent)] 24 | #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] 25 | pub struct ParseFlags: u16 { 26 | const INTEGER = 1 << 0; 27 | const STREAM = 1 << 1; 28 | const DICT = 1 << 2; 29 | const NUMBER = 1 << 3; 30 | const NAME = 1 << 4; 31 | const ARRAY = 1 << 5; 32 | const STRING = 1 << 6; 33 | const BOOL = 1 << 7; 34 | const NULL = 1 << 8; 35 | const REF = 1 << 9; 36 | const ANY = (1 << 10) - 1; 37 | } 38 | } 39 | 40 | 41 | pub struct Context<'a> { 42 | pub decoder: Option<&'a Decoder>, 43 | pub id: PlainRef, 44 | } 45 | impl<'a> Context<'a> { 46 | pub fn decrypt<'buf>(&self, data: &'buf mut [u8]) -> Result<&'buf [u8]> { 47 | if let Some(decoder) = self.decoder { 48 | decoder.decrypt(self.id, data) 49 | } else { 50 | Ok(data) 51 | } 52 | } 53 | #[cfg(test)] 54 | fn fake() -> Self { 55 | Context { 56 | decoder: None, 57 | id: PlainRef { id: 0, gen: 0 } 58 | } 59 | } 60 | } 61 | 62 | /// Can parse stream but only if its dictionary does not contain indirect references. 63 | /// Use `parse_stream` if this is insufficient. 64 | pub fn parse(data: &[u8], r: &impl Resolve, flags: ParseFlags) -> Result { 65 | parse_with_lexer(&mut Lexer::new(data), r, flags) 66 | } 67 | 68 | /// Recursive. Can parse stream but only if its dictionary does not contain indirect references. 69 | /// Use `parse_stream` if this is not sufficient. 70 | pub fn parse_with_lexer(lexer: &mut Lexer, r: &impl Resolve, flags: ParseFlags) -> Result { 71 | parse_with_lexer_ctx(lexer, r, None, flags, MAX_DEPTH) 72 | } 73 | 74 | fn parse_dictionary_object(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, max_depth: usize) -> Result { 75 | let mut dict = Dictionary::default(); 76 | loop { 77 | // Expect a Name (and Object) or the '>>' delimiter 78 | let token = t!(lexer.next()); 79 | if token.starts_with(b"/") { 80 | let key = token.reslice(1..).to_name()?; 81 | let obj = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth)); 82 | dict.insert(key, obj); 83 | } else if token.equals(b">>") { 84 | break; 85 | } else { 86 | err!(PdfError::UnexpectedLexeme{ pos: lexer.get_pos(), lexeme: token.to_string(), expected: "/ or >>"}); 87 | } 88 | } 89 | Ok(dict) 90 | } 91 | 92 | fn parse_stream_object(dict: Dictionary, lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result { 93 | t!(lexer.next_stream()); 94 | 95 | let length = match dict.get("Length") { 96 | Some(&Primitive::Integer(n)) if n >= 0 => n as usize, 97 | Some(&Primitive::Reference(reference)) => t!(t!(r.resolve_flags(reference, ParseFlags::INTEGER, 1)).as_usize()), 98 | Some(other) => err!(PdfError::UnexpectedPrimitive { expected: "unsigned Integer or Reference", found: other.get_debug_name() }), 99 | None => err!(PdfError::MissingEntry { typ: "", field: "Length".into() }), 100 | }; 101 | 102 | let stream_substr = lexer.read_n(length); 103 | 104 | if stream_substr.len() != length { 105 | err!(PdfError::EOF) 106 | } 107 | 108 | // Finish 109 | t!(lexer.next_expect("endstream")); 110 | 111 | Ok(PdfStream { 112 | inner: StreamInner::InFile { 113 | id: ctx.id, 114 | file_range: stream_substr.file_range(), 115 | }, 116 | info: dict, 117 | }) 118 | } 119 | 120 | #[inline] 121 | fn check(flags: ParseFlags, allowed: ParseFlags) -> Result<(), PdfError> { 122 | if !flags.intersects(allowed) { 123 | return Err(PdfError::PrimitiveNotAllowed { allowed, found: flags }); 124 | } 125 | Ok(()) 126 | } 127 | 128 | /// Recursive. Can parse stream but only if its dictionary does not contain indirect references. 129 | /// Use `parse_stream` if this is not sufficient. 130 | pub fn parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result { 131 | let pos = lexer.get_pos(); 132 | match _parse_with_lexer_ctx(lexer, r, ctx, flags, max_depth) { 133 | Ok(r) => Ok(r), 134 | Err(e) => { 135 | lexer.set_pos(pos); 136 | Err(e) 137 | } 138 | } 139 | } 140 | fn _parse_with_lexer_ctx(lexer: &mut Lexer, r: &impl Resolve, ctx: Option<&Context>, flags: ParseFlags, max_depth: usize) -> Result { 141 | 142 | let input = lexer.get_remaining_slice(); 143 | let first_lexeme = t!(lexer.next(), std::str::from_utf8(input)); 144 | 145 | let obj = if first_lexeme.equals(b"<<") { 146 | check(flags, ParseFlags::DICT)?; 147 | 148 | if max_depth == 0 { 149 | return Err(PdfError::MaxDepth); 150 | } 151 | let dict = t!(parse_dictionary_object(lexer, r, ctx, max_depth-1)); 152 | // It might just be the dictionary in front of a stream. 153 | if t!(lexer.peek()).equals(b"stream") { 154 | let ctx = ctx.ok_or(PdfError::PrimitiveNotAllowed { allowed: ParseFlags::STREAM, found: flags })?; 155 | Primitive::Stream(t!(parse_stream_object(dict, lexer, r, ctx))) 156 | } else { 157 | Primitive::Dictionary(dict) 158 | } 159 | } else if first_lexeme.is_integer() { 160 | // May be Integer or Reference 161 | check(flags, ParseFlags::INTEGER | ParseFlags::REF)?; 162 | 163 | // First backup position 164 | let pos_bk = lexer.get_pos(); 165 | 166 | let second_lexeme = t!(lexer.next()); 167 | if second_lexeme.is_integer() { 168 | let third_lexeme = t!(lexer.next()); 169 | if third_lexeme.equals(b"R") { 170 | // It is indeed a reference to an indirect object 171 | check(flags, ParseFlags::REF)?; 172 | Primitive::Reference (PlainRef { 173 | id: t!(first_lexeme.to::()), 174 | gen: t!(second_lexeme.to::()), 175 | }) 176 | } else { 177 | check(flags, ParseFlags::INTEGER)?; 178 | // We are probably in an array of numbers - it's not a reference anyway 179 | lexer.set_pos(pos_bk); // (roll back the lexer first) 180 | Primitive::Integer(t!(first_lexeme.to::())) 181 | } 182 | } else { 183 | check(flags, ParseFlags::INTEGER)?; 184 | // It is but a number 185 | lexer.set_pos(pos_bk); // (roll back the lexer first) 186 | Primitive::Integer(t!(first_lexeme.to::())) 187 | } 188 | } else if let Some(s) = first_lexeme.real_number() { 189 | check(flags, ParseFlags::NUMBER)?; 190 | // Real Number 191 | Primitive::Number (t!(s.to::(), s.to_string())) 192 | } else if first_lexeme.starts_with(b"/") { 193 | check(flags, ParseFlags::NAME)?; 194 | // Name 195 | 196 | let mut rest: &[u8] = &first_lexeme.reslice(1..); 197 | let s = if rest.contains(&b'#') { 198 | let mut s = IBytes::new(); 199 | while let Some(idx) = rest.iter().position(|&b| b == b'#') { 200 | use crate::enc::decode_nibble; 201 | use std::convert::TryInto; 202 | let [hi, lo]: [u8; 2] = rest.get(idx+1 .. idx+3).ok_or(PdfError::EOF)?.try_into().unwrap(); 203 | let byte = match (decode_nibble(lo), decode_nibble(hi)) { 204 | (Some(low), Some(high)) => low | high << 4, 205 | _ => return Err(PdfError::HexDecode { pos: idx, bytes: [hi, lo] }), 206 | }; 207 | s.extend_from_slice(&rest[..idx]); 208 | s.push(byte); 209 | rest = &rest[idx+3..]; 210 | } 211 | s.extend_from_slice(rest); 212 | SmallBytes::from(s.as_slice()) 213 | } else { 214 | SmallBytes::from(rest) 215 | }; 216 | 217 | Primitive::Name(SmallString::from_utf8(s)?) 218 | } else if first_lexeme.equals(b"[") { 219 | check(flags, ParseFlags::ARRAY)?; 220 | if max_depth == 0 { 221 | return Err(PdfError::MaxDepth); 222 | } 223 | let mut array = Vec::new(); 224 | // Array 225 | loop { 226 | // Exit if closing delimiter 227 | if lexer.peek()?.equals(b"]") { 228 | break; 229 | } 230 | 231 | let element = t!(parse_with_lexer_ctx(lexer, r, ctx, ParseFlags::ANY, max_depth-1)); 232 | array.push(element); 233 | } 234 | t!(lexer.next()); // Move beyond closing delimiter 235 | 236 | Primitive::Array (array) 237 | } else if first_lexeme.equals(b"(") { 238 | check(flags, ParseFlags::STRING)?; 239 | let mut string = IBytes::new(); 240 | 241 | let bytes_traversed = { 242 | let mut string_lexer = StringLexer::new(lexer.get_remaining_slice()); 243 | for character in string_lexer.iter() { 244 | string.push(t!(character)); 245 | } 246 | string_lexer.get_offset() 247 | }; 248 | // Advance to end of string 249 | lexer.offset_pos(bytes_traversed); 250 | // decrypt it 251 | if let Some(ctx) = ctx { 252 | string = t!(ctx.decrypt(&mut string)).into(); 253 | } 254 | Primitive::String (PdfString::new(string)) 255 | } else if first_lexeme.equals(b"<") { 256 | check(flags, ParseFlags::STRING)?; 257 | let mut string = IBytes::new(); 258 | 259 | let bytes_traversed = { 260 | let mut hex_string_lexer = HexStringLexer::new(lexer.get_remaining_slice()); 261 | for byte in hex_string_lexer.iter() { 262 | string.push(t!(byte)); 263 | } 264 | hex_string_lexer.get_offset() 265 | }; 266 | // Advance to end of string 267 | lexer.offset_pos(bytes_traversed); 268 | 269 | // decrypt it 270 | if let Some(ctx) = ctx { 271 | string = t!(ctx.decrypt(&mut string)).into(); 272 | } 273 | Primitive::String (PdfString::new(string)) 274 | } else if first_lexeme.equals(b"true") { 275 | check(flags, ParseFlags::BOOL)?; 276 | Primitive::Boolean (true) 277 | } else if first_lexeme.equals(b"false") { 278 | check(flags, ParseFlags::BOOL)?; 279 | Primitive::Boolean (false) 280 | } else if first_lexeme.equals(b"null") { 281 | check(flags, ParseFlags::NULL)?; 282 | Primitive::Null 283 | } else { 284 | err!(PdfError::UnknownType {pos: lexer.get_pos(), first_lexeme: first_lexeme.to_string(), rest: lexer.read_n(50).to_string()}); 285 | }; 286 | 287 | // trace!("Read object"; "Obj" => format!("{}", obj)); 288 | 289 | Ok(obj) 290 | } 291 | 292 | 293 | pub fn parse_stream(data: &[u8], resolve: &impl Resolve, ctx: &Context) -> Result { 294 | parse_stream_with_lexer(&mut Lexer::new(data), resolve, ctx) 295 | } 296 | 297 | 298 | fn parse_stream_with_lexer(lexer: &mut Lexer, r: &impl Resolve, ctx: &Context) -> Result { 299 | let first_lexeme = t!(lexer.next()); 300 | 301 | let obj = if first_lexeme.equals(b"<<") { 302 | let dict = t!(parse_dictionary_object(lexer, r, None, MAX_DEPTH)); 303 | // It might just be the dictionary in front of a stream. 304 | if t!(lexer.peek()).equals(b"stream") { 305 | let ctx = Context { 306 | decoder: None, 307 | id: ctx.id 308 | }; 309 | t!(parse_stream_object(dict, lexer, r, &ctx)) 310 | } else { 311 | err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "Dictionary" }); 312 | } 313 | } else { 314 | err!(PdfError::UnexpectedPrimitive { expected: "Stream", found: "something else" }); 315 | }; 316 | 317 | Ok(obj) 318 | } 319 | 320 | #[cfg(test)] 321 | mod tests { 322 | #[test] 323 | fn dict_with_empty_name_as_value() { 324 | use crate::object::NoResolve; 325 | use super::{ParseFlags, Context}; 326 | { 327 | let data = b"<>>>"; 328 | let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap(); 329 | let dict = primitive.into_dictionary().unwrap(); 330 | 331 | assert_eq!(dict.len(), 1); 332 | let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap(); 333 | assert_eq!(app_dict.len(), 1); 334 | let name = app_dict.get("Name").unwrap().as_name().unwrap(); 335 | assert_eq!(name, ""); 336 | } 337 | 338 | { 339 | let data = b"<>>>stream\nendstream\n"; 340 | let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap(); 341 | let dict = stream.info; 342 | 343 | assert_eq!(dict.len(), 2); 344 | let app_dict = dict.get("App").unwrap().clone().into_dictionary().unwrap(); 345 | assert_eq!(app_dict.len(), 1); 346 | let name = app_dict.get("Name").unwrap().as_name().unwrap(); 347 | assert_eq!(name, ""); 348 | } 349 | } 350 | 351 | #[test] 352 | fn dict_with_empty_name_as_key() { 353 | use crate::object::NoResolve; 354 | use super::{ParseFlags, Context}; 355 | 356 | { 357 | let data = b"<>"; 358 | let primitive = super::parse(data, &NoResolve, ParseFlags::DICT).unwrap(); 359 | let dict = primitive.into_dictionary().unwrap(); 360 | 361 | assert_eq!(dict.len(), 1); 362 | assert!(dict.get("").unwrap().as_bool().unwrap()); 363 | } 364 | 365 | { 366 | let data = b"<>stream\nendstream\n"; 367 | let stream = super::parse_stream(data, &NoResolve, &Context::fake()).unwrap(); 368 | let dict = stream.info; 369 | 370 | assert_eq!(dict.len(), 2); 371 | assert!(dict.get("").unwrap().as_bool().unwrap()); 372 | } 373 | } 374 | 375 | #[test] 376 | fn empty_array() { 377 | use crate::object::NoResolve; 378 | use super::ParseFlags; 379 | 380 | let data = b"[]"; 381 | let primitive = super::parse(data, &NoResolve, ParseFlags::ARRAY).unwrap(); 382 | let array = primitive.into_array().unwrap(); 383 | assert!(array.is_empty()); 384 | } 385 | 386 | #[test] 387 | fn compact_array() { 388 | use crate::object::NoResolve; 389 | use crate::primitive::{Primitive, PdfString}; 390 | use super::lexer::Lexer; 391 | use super::*; 392 | let mut lx = Lexer::new(b"[(Complete L)20(egend for Physical and P)20(olitical Maps)]TJ"); 393 | assert_eq!(parse_with_lexer(&mut lx, &NoResolve, ParseFlags::ANY).unwrap(), 394 | Primitive::Array(vec![ 395 | Primitive::String(PdfString::new("Complete L".into())), 396 | Primitive::Integer(20), 397 | Primitive::String(PdfString::new("egend for Physical and P".into())), 398 | Primitive::Integer(20), 399 | Primitive::String(PdfString::new("olitical Maps".into())) 400 | ]) 401 | ); 402 | assert_eq!(lx.next().unwrap().as_str().unwrap(), "TJ"); 403 | assert!(lx.next().unwrap_err().is_eof()); 404 | } 405 | } 406 | -------------------------------------------------------------------------------- /pdf/src/parser/parse_object.rs: -------------------------------------------------------------------------------- 1 | // Considering whether to impl Object and IndirectObject here. 2 | // 3 | 4 | use crate::parser::{lexer::*, MAX_DEPTH}; 5 | use crate::error::*; 6 | use crate::primitive::{Primitive, PdfStream}; 7 | use crate::parser::{parse_with_lexer_ctx, parse_stream_with_lexer, Context, ParseFlags}; 8 | use crate::object::*; 9 | use crate::crypt::Decoder; 10 | 11 | /// Parses an Object starting at the current position of `lexer`. Almost as 12 | /// `Reader::parse_object`, but this function does not take `Reader`, at the expense that it 13 | /// cannot dereference 14 | 15 | pub fn parse_indirect_object(lexer: &mut Lexer, r: &impl Resolve, decoder: Option<&Decoder>, flags: ParseFlags) -> Result<(PlainRef, Primitive)> { 16 | let id = PlainRef { 17 | id: t!(lexer.next()).to::()?, 18 | gen: t!(lexer.next()).to::()?, 19 | }; 20 | lexer.next_expect("obj")?; 21 | 22 | let ctx = Context { 23 | decoder, 24 | id, 25 | }; 26 | let obj = t!(parse_with_lexer_ctx(lexer, r, Some(&ctx), flags, MAX_DEPTH)); 27 | 28 | if r.options().allow_missing_endobj { 29 | let pos = lexer.get_pos(); 30 | if let Err(e) = lexer.next_expect("endobj") { 31 | warn!("error parsing obj {} {}: {:?}", id.id, id.gen, e); 32 | lexer.set_pos(pos); 33 | } 34 | } else { 35 | t!(lexer.next_expect("endobj")); 36 | } 37 | 38 | Ok((id, obj)) 39 | } 40 | pub fn parse_indirect_stream(lexer: &mut Lexer, r: &impl Resolve, decoder: Option<&Decoder>) -> Result<(PlainRef, PdfStream)> { 41 | let id = PlainRef { 42 | id: t!(lexer.next()).to::()?, 43 | gen: t!(lexer.next()).to::()?, 44 | }; 45 | lexer.next_expect("obj")?; 46 | 47 | let ctx = Context { 48 | decoder, 49 | id, 50 | }; 51 | let stm = t!(parse_stream_with_lexer(lexer, r, &ctx)); 52 | 53 | t!(lexer.next_expect("endobj")); 54 | 55 | Ok((id, stm)) 56 | } 57 | -------------------------------------------------------------------------------- /pdf/src/parser/parse_xref.rs: -------------------------------------------------------------------------------- 1 | use crate::error::*; 2 | use crate::parser::lexer::Lexer; 3 | use crate::xref::{XRef, XRefSection, XRefInfo}; 4 | use crate::primitive::{Primitive, Dictionary}; 5 | use crate::object::*; 6 | use crate::parser::{parse_with_lexer, ParseFlags}; 7 | use crate::parser::parse_object::{parse_indirect_stream}; 8 | use std::convert::TryInto; 9 | 10 | // Just the part of Parser which reads xref sections from xref stream. 11 | /// Takes `&mut &[u8]` so that it can "consume" data as it reads 12 | fn parse_xref_section_from_stream(first_id: u32, mut num_entries: usize, width: &[usize], data: &mut &[u8], resolve: &impl Resolve) -> Result { 13 | let mut entries = Vec::new(); 14 | let [w0, w1, w2]: [usize; 3] = width.try_into().map_err(|_| other!("invalid xref length array"))?; 15 | if num_entries * (w0 + w1 + w2) > data.len() { 16 | if resolve.options().allow_xref_error { 17 | warn!("not enough xref data. truncating."); 18 | num_entries = data.len() / (w0 + w1 + w2); 19 | } else { 20 | bail!("not enough xref data"); 21 | } 22 | } 23 | for _ in 0..num_entries { 24 | // println!("{:?}", &data[.. width.iter().map(|&i| i as usize).sum()]); 25 | // TODO Check if width[i] are 0. Use default values from the PDF references. 26 | let _type = if w0 == 0 { 27 | 1 28 | } else { 29 | read_u64_from_stream(w0, data)? 30 | }; 31 | let field1 = read_u64_from_stream(w1, data)?; 32 | let field2 = read_u64_from_stream(w2, data)?; 33 | 34 | let entry = 35 | match _type { 36 | 0 => XRef::Free {next_obj_nr: field1 as ObjNr, gen_nr: field2 as GenNr}, 37 | 1 => XRef::Raw {pos: field1 as usize, gen_nr: field2 as GenNr}, 38 | 2 => XRef::Stream {stream_id: field1 as ObjNr, index: field2 as usize}, 39 | _ => return Err(PdfError::XRefStreamType {found: _type}), // TODO: Should actually just be seen as a reference to the null object 40 | }; 41 | entries.push(entry); 42 | } 43 | Ok(XRefSection { 44 | first_id, 45 | entries, 46 | }) 47 | } 48 | /// Helper to read an integer with a certain amount of bytes `width` from stream. 49 | fn read_u64_from_stream(width: usize, data: &mut &[u8]) -> Result { 50 | if width > std::mem::size_of::() { 51 | return Err(PdfError::Other { msg: format!("xref stream entry has invalid width {}", width) }); 52 | } 53 | if width > data.len() { 54 | return Err(PdfError::Other { msg: format!("xref stream entry has width {} but only {} bytes left to read", width, data.len()) }); 55 | } 56 | let mut result = 0; 57 | for i in (0..width).rev() { 58 | let base = 8 * i; // (width, 0] 59 | let c: u8 = data[0]; 60 | *data = &data[1..]; // Consume byte 61 | result += u64::from(c) << base; 62 | } 63 | Ok(result) 64 | } 65 | 66 | 67 | /// Reads xref sections (from stream) and trailer starting at the position of the Lexer. 68 | pub fn parse_xref_stream_and_trailer(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec, Dictionary)> { 69 | let xref_stream = t!(parse_indirect_stream(lexer, resolve, None)).1; 70 | let trailer = if t!(lexer.next()) == "trailer" { 71 | let trailer = t!(parse_with_lexer(lexer, resolve, ParseFlags::DICT)); 72 | t!(trailer.into_dictionary()) 73 | } else { 74 | xref_stream.info.clone() 75 | }; 76 | 77 | let xref_stream = t!(Stream::::from_primitive(Primitive::Stream(xref_stream), resolve)); 78 | let mut data_left = &*t!(xref_stream.data(resolve)); 79 | 80 | let width = &xref_stream.w; 81 | 82 | let index = &xref_stream.index; 83 | 84 | if index.len() % 2 != 0 { 85 | return Err(PdfError::Other { msg: format!("xref stream has {} elements which is not an even number", index.len()) }); 86 | } 87 | 88 | let mut sections = Vec::new(); 89 | for (first_id, num_objects) in index.chunks_exact(2).map(|c| (c[0], c[1])) { 90 | let section = t!(parse_xref_section_from_stream(first_id, num_objects as usize, width, &mut data_left, resolve)); 91 | sections.push(section); 92 | } 93 | 94 | Ok((sections, trailer)) 95 | } 96 | 97 | 98 | /// Reads xref sections (from table) and trailer starting at the position of the Lexer. 99 | pub fn parse_xref_table_and_trailer(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec, Dictionary)> { 100 | let mut sections = Vec::new(); 101 | 102 | // Keep reading subsections until we hit `trailer` 103 | while lexer.peek()? != "trailer" { 104 | let start_id = t!(lexer.next_as::()); 105 | let num_ids = t!(lexer.next_as::()); 106 | 107 | let mut section = XRefSection::new(start_id); 108 | 109 | for i in 0..num_ids { 110 | let w1 = t!(lexer.next()); 111 | if w1 == "trailer" { 112 | return Err(PdfError::Other { msg: format!("xref table declares {} entries, but only {} follow.", num_ids, i) }); 113 | } 114 | let w2 = t!(lexer.next()); 115 | let w3 = t!(lexer.next()); 116 | if w3 == "f" { 117 | section.add_free_entry(t!(w1.to::()), t!(w2.to::())); 118 | } else if w3 == "n" { 119 | section.add_inuse_entry(t!(w1.to::()), t!(w2.to::())); 120 | } else { 121 | return Err(PdfError::UnexpectedLexeme {pos: lexer.get_pos(), lexeme: w3.to_string(), expected: "f or n"}); 122 | } 123 | } 124 | sections.push(section); 125 | } 126 | 127 | t!(lexer.next_expect("trailer")); 128 | let trailer = t!(parse_with_lexer(lexer, resolve, ParseFlags::DICT)); 129 | let trailer = t!(trailer.into_dictionary()); 130 | 131 | Ok((sections, trailer)) 132 | } 133 | 134 | pub fn read_xref_and_trailer_at(lexer: &mut Lexer, resolve: &impl Resolve) -> Result<(Vec, Dictionary)> { 135 | let next_word = t!(lexer.next()); 136 | if next_word == "xref" { 137 | // Read classic xref table 138 | parse_xref_table_and_trailer(lexer, resolve) 139 | } else { 140 | // Read xref stream 141 | lexer.back()?; 142 | parse_xref_stream_and_trailer(lexer, resolve) 143 | } 144 | } 145 | -------------------------------------------------------------------------------- /pdf/src/path.rs: -------------------------------------------------------------------------------- 1 | use mint::Point2; 2 | type Point = Point2; 3 | 4 | pub enum FillMode { 5 | NonZero, 6 | EvenOdd 7 | } 8 | 9 | struct PathBuilder { 10 | out: W, 11 | current: Point 12 | } 13 | impl PathBuilder { 14 | pub fn new

(writer: W, start: P) -> PathBuilder 15 | where P: Into 16 | { 17 | PathBuilder { 18 | out: writer, 19 | current: start 20 | } 21 | } 22 | 23 | /// Begin a new subpath by moving the current point to `p`, 24 | /// omitting any connecting line segment. If 25 | /// the previous path construction operator in the current path 26 | /// was also m, the new m overrides it; no vestige of the 27 | /// previous m operation remains in the path. 28 | pub fn move

(&mut self, p: P) { 29 | let p = p.into(); 30 | writeln!(self.out, "{} {} m", p.x, p.y); 31 | self.current = p; 32 | } 33 | /// Append a straight line segment from the current point to the 34 | /// point `p`. The new current point shall be `p`. 35 | pub fn line

(&mut self, p: P) { 36 | let p = p.into(); 37 | writeln!(self.out, "{} {} l", p.x, p.y); 38 | self.current = p; 39 | } 40 | 41 | /// Append a quadratic Bézier curve to the current path. 42 | /// The curve shall extend from the current point to the point ´p´, 43 | /// using `c` as the Bézier control point. 44 | /// The new current point shall be `p`. 45 | /// 46 | /// NOTE: The quadratic Bézier curve is translated into a cubic Bézier curve, 47 | /// since PDF does not allow the former. 48 | pub fn quadratic

(&mut self, c: P, p: P) { 49 | let (p1, p2) = (p1.into(), p2.into()); 50 | let c1 = (2./3.) * c + (1./3.) * self.current; 51 | let c2 = (2./3.) * c + (1./3.) * p; 52 | writen!(self.out, "{} {} {} {} {} {} c", c1.x, c1.y, c2.x, c2.y, p.x, p.y); 53 | self.current = p; 54 | } 55 | 56 | /// Append a cubic Bézier curve to the current path. 57 | /// The curve shall extend from the current point to the point ´p´, 58 | /// using `c1` and `c2` as the Bézier control points. 59 | /// The new current point shall be `p`. 60 | pub fn cubic

(&mut self, c1: P, c2: P, p: P) { 61 | let (c1, c2, p) = (c1.into(), c2.into(), p.into()); 62 | if Some(c1) == self.current { 63 | writeln!(self.out, "{} {} {} {} v", c2.x, c2.y, p.x, p.y); 64 | } else if Some(c2) == self.current { 65 | writeln!(self.out, "{} {} {} {} y", c1.x, c1.y, p.x, p.y); 66 | } else { 67 | writen!(self.out, "{} {} {} {} {} {} c", c1.x, c1.y, c2.x, c2.y, p.x, p.y); 68 | } 69 | self.current = p; 70 | } 71 | 72 | pub fn close(&mut self) { 73 | writeln!(self.out, "h"); 74 | } 75 | 76 | pub fn fill(&mut self, mode: FillMode) { 77 | match mode { 78 | FillMode::NonZero => writeln!(out, "f"), 79 | FillMode::EvenOdd => writeln!(out, "f*") 80 | } 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /pdf/src/repair.rs: -------------------------------------------------------------------------------- 1 | 2 | fn build_xref_table() { 3 | warn!("can't read xref table: {:?}", e); 4 | let start_offset = t!(backend.locate_start_offset()); 5 | let mut lexer = Lexer::new(t!(backend.read(..))); 6 | let mut objects = Vec::new(); 7 | 8 | (|| -> Result<()> { loop { 9 | let offset = lexer.get_pos(); 10 | let w1 = t!(lexer.next()); 11 | let w2 = t!(lexer.next()); 12 | let w3 = t!(lexer.next_expect("obj")); 13 | try_opt!(lexer.seek_substr("endobj")); 14 | 15 | objects.push((t!(w1.to::()), t!(w2.to::()), offset)); 16 | }})(); 17 | 18 | objects.sort_unstable(); 19 | let mut first_id = objects.first().map(|&(n, _, _)| n).unwrap_or(0); 20 | let mut last_id = objects.last().map(|&(n, _, _)| n).unwrap_or(0); 21 | let mut xref = XRefTable::new(1 + last_id - first_id); 22 | for &(obj_nr, gen_nr, offset) in objects.iter() { 23 | for n in first_id + 1 .. obj_nr { 24 | xref.push(XRef::Free { next_obj_nr: obj_nr, gen_nr: 0 }); 25 | } 26 | if obj_nr == last_id { 27 | warn!("duplicate obj_nr {}", obj_nr); 28 | continue; 29 | } 30 | xref.push(XRef::Raw { 31 | pos: offset - start_offset, 32 | gen_nr 33 | }); 34 | last_id = obj_nr; 35 | } 36 | 37 | return t!(Err(e)); 38 | } 39 | 40 | fn build_catalog() { 41 | 42 | } 43 | -------------------------------------------------------------------------------- /pdf/src/xref.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::{Debug, Formatter}; 2 | use crate::error::*; 3 | use crate::object::*; 4 | use crate as pdf; 5 | use datasize::DataSize; 6 | 7 | /////////////////////////// 8 | // Cross-reference table // 9 | /////////////////////////// 10 | 11 | #[derive(Copy, Clone, Debug)] 12 | pub enum XRef { 13 | /// Not currently used. 14 | Free { 15 | next_obj_nr: ObjNr, 16 | gen_nr: GenNr 17 | }, 18 | 19 | /// In use. 20 | Raw { 21 | pos: usize, 22 | gen_nr: GenNr 23 | }, 24 | /// In use and compressed inside an Object Stream 25 | Stream { 26 | stream_id: ObjNr, 27 | index: usize, 28 | }, 29 | 30 | Promised, 31 | 32 | Invalid 33 | } 34 | 35 | impl XRef { 36 | pub fn get_gen_nr(&self) -> GenNr { 37 | match *self { 38 | XRef::Free {gen_nr, ..} 39 | | XRef::Raw {gen_nr, ..} => gen_nr, 40 | XRef::Stream { .. } => 0, // TODO I think these always have gen nr 0? 41 | _ => panic!() 42 | } 43 | } 44 | } 45 | 46 | 47 | /// Runtime lookup table of all objects 48 | #[derive(Clone)] 49 | pub struct XRefTable { 50 | // None means that it's not specified, and should result in an error if used 51 | // Thought: None could also mean Free? 52 | entries: Vec 53 | } 54 | 55 | 56 | impl XRefTable { 57 | pub fn new(num_objects: ObjNr) -> XRefTable { 58 | let mut entries = Vec::new(); 59 | entries.resize(num_objects as usize, XRef::Invalid); 60 | entries.push(XRef::Free { next_obj_nr: 0, gen_nr: 0xffff }); 61 | XRefTable { 62 | entries, 63 | } 64 | } 65 | 66 | pub fn iter(&self) -> impl Iterator + '_ { 67 | self.entries.iter().enumerate() 68 | .filter(|(_, xref)| matches!(xref, XRef::Raw { .. } | XRef::Stream { .. } )) 69 | .map(|(i, _)| i as u32) 70 | } 71 | 72 | pub fn get(&self, id: ObjNr) -> Result { 73 | match self.entries.get(id as usize) { 74 | Some(&entry) => Ok(entry), 75 | None => Err(PdfError::UnspecifiedXRefEntry {id}), 76 | } 77 | } 78 | pub fn set(&mut self, id: ObjNr, r: XRef) { 79 | self.entries[id as usize] = r; 80 | } 81 | pub fn len(&self) -> usize { 82 | self.entries.len() 83 | } 84 | pub fn is_empty(&self) -> bool { 85 | self.entries.is_empty() 86 | } 87 | pub fn push(&mut self, new_entry: XRef) { 88 | self.entries.push(new_entry); 89 | } 90 | pub fn num_entries(&self) -> usize { 91 | self.entries.len() 92 | } 93 | pub fn max_field_widths(&self) -> (u64, u64) { 94 | let mut max_a = 0; 95 | let mut max_b = 0; 96 | for &e in &self.entries { 97 | let (a, b) = match e { 98 | XRef::Raw { pos, gen_nr } => (pos as u64, gen_nr), 99 | XRef::Free { next_obj_nr, gen_nr } => (next_obj_nr, gen_nr), 100 | XRef::Stream { stream_id, index } => (stream_id, index as u64), 101 | _ => continue 102 | }; 103 | max_a = max_a.max(a); 104 | max_b = max_b.max(b); 105 | } 106 | (max_a, max_b) 107 | } 108 | 109 | pub fn add_entries_from(&mut self, section: XRefSection) -> Result<()> { 110 | for (i, &entry) in section.entries() { 111 | if let Some(dst) = self.entries.get_mut(i) { 112 | // Early return if the entry we have has larger or equal generation number 113 | let should_be_updated = match *dst { 114 | XRef::Raw { gen_nr: gen, .. } | XRef::Free { gen_nr: gen, .. } 115 | => entry.get_gen_nr() > gen, 116 | XRef::Stream { .. } | XRef::Invalid 117 | => true, 118 | x => bail!("found {:?}", x) 119 | }; 120 | if should_be_updated { 121 | *dst = entry; 122 | } 123 | } 124 | } 125 | Ok(()) 126 | } 127 | 128 | pub fn write_stream(&self, size: usize) -> Result> { 129 | let (max_a, max_b) = self.max_field_widths(); 130 | let a_w = byte_len(max_a); 131 | let b_w = byte_len(max_b); 132 | 133 | let mut data = Vec::with_capacity((1 + a_w + b_w) * size); 134 | for &x in self.entries.iter().take(size) { 135 | let (t, a, b) = match x { 136 | XRef::Free { next_obj_nr, gen_nr } => (0, next_obj_nr, gen_nr), 137 | XRef::Raw { pos, gen_nr } => (1, pos as u64, gen_nr), 138 | XRef::Stream { stream_id, index } => (2, stream_id, index as u64), 139 | x => bail!("invalid xref entry: {:?}", x) 140 | }; 141 | data.push(t); 142 | data.extend_from_slice(&a.to_be_bytes()[8 - a_w ..]); 143 | data.extend_from_slice(&b.to_be_bytes()[8 - b_w ..]); 144 | } 145 | let info = XRefInfo { 146 | size: size as u32, 147 | index: vec![0, size as u32], 148 | prev: None, 149 | w: vec![1, a_w, b_w], 150 | }; 151 | 152 | Ok(Stream::new(info, data)) 153 | } 154 | } 155 | 156 | fn byte_len(n: u64) -> usize { 157 | (64 + 8 - 1 - n.leading_zeros()) as usize / 8 + (n == 0) as usize 158 | } 159 | 160 | impl Debug for XRefTable { 161 | fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { 162 | for (i, entry) in self.entries.iter().enumerate() { 163 | match *entry { 164 | XRef::Free {next_obj_nr, gen_nr} => { 165 | writeln!(f, "{:4}: {:010} {:05} f", i, next_obj_nr, gen_nr)? 166 | }, 167 | XRef::Raw {pos, gen_nr} => { 168 | writeln!(f, "{:4}: {:010} {:05} n", i, pos, gen_nr)? 169 | }, 170 | XRef::Stream {stream_id, index} => { 171 | writeln!(f, "{:4}: in stream {}, index {}", i, stream_id, index)? 172 | }, 173 | XRef::Promised => { 174 | writeln!(f, "{:4}: Promised?", i)? 175 | }, 176 | XRef::Invalid => { 177 | writeln!(f, "{:4}: Invalid!", i)? 178 | } 179 | } 180 | } 181 | Ok(()) 182 | } 183 | } 184 | 185 | /// As found in PDF files 186 | #[derive(Debug)] 187 | pub struct XRefSection { 188 | pub first_id: u32, 189 | pub entries: Vec, 190 | } 191 | 192 | 193 | impl XRefSection { 194 | pub fn new(first_id: u32) -> XRefSection { 195 | XRefSection { 196 | first_id, 197 | entries: Vec::new(), 198 | } 199 | } 200 | pub fn add_free_entry(&mut self, next_obj_nr: ObjNr, gen_nr: GenNr) { 201 | self.entries.push(XRef::Free{next_obj_nr, gen_nr}); 202 | } 203 | pub fn add_inuse_entry(&mut self, pos: usize, gen_nr: GenNr) { 204 | self.entries.push(XRef::Raw{pos, gen_nr}); 205 | } 206 | pub fn entries(&self) -> impl Iterator { 207 | self.entries.iter().enumerate().map(move |(i, e)| (i + self.first_id as usize, e)) 208 | } 209 | } 210 | 211 | 212 | #[derive(Object, ObjectWrite, Debug, DataSize)] 213 | #[pdf(Type = "XRef")] 214 | pub struct XRefInfo { 215 | // XRefStream fields 216 | #[pdf(key = "Size")] 217 | pub size: u32, 218 | 219 | // 220 | #[pdf(key = "Index", default = "vec![0, size]")] 221 | /// Array of pairs of integers for each subsection, (first object number, number of entries). 222 | /// Default value (assumed when None): `(0, self.size)`. 223 | pub index: Vec, 224 | 225 | #[pdf(key = "Prev")] 226 | prev: Option, 227 | 228 | #[pdf(key = "W")] 229 | pub w: Vec, 230 | } 231 | 232 | // read_xref_table 233 | // read_xref_stream 234 | // read_xref_and_trailer_at 235 | -------------------------------------------------------------------------------- /pdf/tests/integration.rs: -------------------------------------------------------------------------------- 1 | use std::str; 2 | use std::path::{Path, PathBuf}; 3 | use pdf::file::FileOptions; 4 | use pdf::object::*; 5 | use pdf::parser::{parse, ParseFlags}; 6 | use glob::glob; 7 | 8 | macro_rules! run { 9 | ($e:expr) => ( 10 | match $e { 11 | Ok(v) => v, 12 | Err(e) => { 13 | panic!("{}", e); 14 | } 15 | } 16 | ) 17 | } 18 | 19 | fn files() -> PathBuf { 20 | Path::new(env!("CARGO_MANIFEST_DIR")).parent().unwrap().join("files") 21 | } 22 | fn file_path(s: &str) -> PathBuf { 23 | files().join(s) 24 | } 25 | fn dir_pdfs(path: PathBuf) -> impl Iterator { 26 | path.read_dir().unwrap() 27 | .filter_map(|r| r.ok()) 28 | .map(|e| e.path()) 29 | .filter(|p| p.extension().map(|e| e == "pdf").unwrap_or(false)) 30 | } 31 | 32 | #[test] 33 | fn open_file() { 34 | let _ = run!(FileOptions::uncached().open(file_path("example.pdf"))); 35 | #[cfg(all(feature = "mmap", feature = "cache"))] 36 | let _ = run!({ 37 | use memmap2::Mmap; 38 | let file = std::fs::File::open(file_path!("example.pdf")).expect("can't open file"); 39 | let mmap = unsafe { Mmap::map(&file).expect("can't mmap file") }; 40 | FileOptions::cached().load(mmap) 41 | }); 42 | } 43 | 44 | #[cfg(feature="cache")] 45 | #[test] 46 | fn read_pages() { 47 | for path in dir_pdfs(files()) { 48 | println!("\n == Now testing `{}` ==", path.to_str().unwrap()); 49 | 50 | let path = path.to_str().unwrap(); 51 | let file = run!(FileOptions::cached().open(path)); 52 | for i in 0 .. file.num_pages() { 53 | println!("Read page {}", i); 54 | let _ = file.get_page(i); 55 | } 56 | } 57 | } 58 | 59 | #[test] 60 | fn user_password() { 61 | for path in dir_pdfs(file_path("password_protected")) { 62 | println!("\n\n == Now testing `{}` ==\n", path.to_str().unwrap()); 63 | 64 | let path = path.to_str().unwrap(); 65 | let file = run!(FileOptions::uncached().password(b"userpassword").open(path)); 66 | for i in 0 .. file.num_pages() { 67 | println!("\nRead page {}", i); 68 | let _ = file.get_page(i); 69 | } 70 | } 71 | } 72 | 73 | #[test] 74 | fn owner_password() { 75 | for path in dir_pdfs(file_path("password_protected")) { 76 | println!("\n\n == Now testing `{}` ==\n", path.to_str().unwrap()); 77 | 78 | let path = path.to_str().unwrap(); 79 | let file = run!(FileOptions::uncached().password(b"ownerpassword").open(path)); 80 | for i in 0 .. file.num_pages() { 81 | println!("\nRead page {}", i); 82 | let _ = file.get_page(i); 83 | } 84 | } 85 | } 86 | 87 | // Test for invalid PDFs found by fuzzing. 88 | // We don't care if they give an Err or Ok, as long as they don't panic. 89 | #[cfg(feature="cache")] 90 | #[test] 91 | fn invalid_pdfs() { 92 | for path in dir_pdfs(file_path("invalid")) { 93 | let path = path.to_str().unwrap(); 94 | println!("\n\n == Now testing `{}` ==\n", path); 95 | 96 | match FileOptions::cached().open(path) { 97 | Ok(file) => { 98 | for i in 0 .. file.num_pages() { 99 | let _ = file.get_page(i); 100 | } 101 | } 102 | Err(_) => { 103 | continue; 104 | } 105 | } 106 | } 107 | } 108 | 109 | #[cfg(feature="cache")] 110 | #[test] 111 | fn parse_objects_from_stream() { 112 | use pdf::object::NoResolve; 113 | let file = run!(FileOptions::cached().open(file_path("xelatex.pdf"))); 114 | let resolver = file.resolver(); 115 | 116 | // .. we know that object 13 of that file is an ObjectStream 117 | let obj_stream: RcRef = run!(resolver.get(Ref::new(PlainRef {id: 13, gen: 0}))); 118 | for i in 0..obj_stream.n_objects() { 119 | let (data, range) = run!(obj_stream.get_object_slice(i, &resolver)); 120 | let slice = &data[range]; 121 | println!("Object slice #{}: {}\n", i, str::from_utf8(slice).unwrap()); 122 | run!(parse(slice, &NoResolve, ParseFlags::ANY)); 123 | } 124 | } 125 | 126 | // TODO test decoding 127 | -------------------------------------------------------------------------------- /pdf/tests/write.rs: -------------------------------------------------------------------------------- 1 | // TODO: commented out to make it compile 2 | /* 3 | extern crate pdf; 4 | 5 | use pdf::file::File; 6 | use pdf::types::*; 7 | use pdf::stream::ObjectStream; 8 | 9 | fn main() { 10 | let mut file = File::new(Vec::new()); 11 | 12 | let page_tree_promise = file.promise(); 13 | let mut page_tree = PageTree::root(); 14 | let mut page = Page::new((&page_tree_promise).into()); 15 | page.media_box = Some(Rect { 16 | left: 0., 17 | right: 100., 18 | top: 0., 19 | bottom: 200. 20 | }); 21 | 22 | // create the content stream 23 | let content = ObjectStream::new(&mut file); 24 | 25 | // add stream to file 26 | let content_ref = file.add(content); 27 | 28 | page_tree.add(file.add(PagesNode::Page(page)).unwrap()); 29 | 30 | let catalog = Catalog::new(file.fulfill(page_tree_promise, page_tree).unwrap()); 31 | 32 | let catalog_ref = file.add(catalog).unwrap(); 33 | file.finish(catalog_ref); 34 | } 35 | */ 36 | -------------------------------------------------------------------------------- /pdf/tests/xref.rs: -------------------------------------------------------------------------------- 1 | use pdf::file::FileOptions; 2 | 3 | #[test] 4 | fn infinite_loop_invalid_file() { 5 | assert!(FileOptions::uncached().load(b"startxref%PDF-".as_ref()).is_err()); 6 | } 7 | 8 | #[test] 9 | fn ending_angle_bracket() { 10 | assert!(FileOptions::uncached().load(b"%PDF-startxref>".as_ref()).is_err()); 11 | assert!(FileOptions::uncached().load(b"%PDF-startxref<".as_ref()).is_err()); 12 | } 13 | -------------------------------------------------------------------------------- /pdf_derive/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pdf_derive" 3 | version = "0.2.0" 4 | authors = ["Erlend Langseth <3rlendhl@gmail.com>", "Sebastian Köln "] 5 | homepage = "https://github.com/pdf-rs" 6 | repository = "https://github.com/pdf-rs/pdf_derive" 7 | description = "helper for pdf-rs." 8 | license = "MIT" 9 | edition = "2018" 10 | 11 | [dependencies] 12 | syn = { version = "2", features = ["full", "extra-traits"] } 13 | proc-macro2 = "1.0.24" 14 | quote = "1" 15 | 16 | [lib] 17 | proc-macro = true 18 | --------------------------------------------------------------------------------