├── SafeRapidPdf ├── Globals.cs ├── Document │ ├── PdfDate.cs │ ├── PdfFont.cs │ ├── PdfNameTree.cs │ ├── PdfNumberTree.cs │ ├── PdfArtBox.cs │ ├── PdfCropBox.cs │ ├── PdfTrimBox.cs │ ├── PdfMediaBox.cs │ ├── PdfBleedBox.cs │ ├── PdfRotate.cs │ ├── PdfCount.cs │ ├── PdfBaseObject.cs │ ├── PdfContents.cs │ ├── PdfCatalog.cs │ ├── PdfRectangle.cs │ ├── PdfPageTree.cs │ └── PdfPage.cs ├── assembly.cs ├── Logical │ └── PdfStructure.cs ├── Services │ ├── IIndirectReferenceResolver.cs │ └── IndirectReferenceResolver.cs ├── SafeRapidPdf.csproj ├── Parsing │ ├── ParsingException.cs │ ├── UnexpectedTokenException.cs │ └── Lexer.cs ├── Filters │ └── IFilter.cs ├── Objects │ ├── PdfNull.cs │ ├── PdfData.cs │ ├── PdfTrailer.cs │ ├── PdfComment.cs │ ├── PdfKeyValuePair.cs │ ├── PdfStartXRef.cs │ ├── PdfBoolean.cs │ ├── PdfName.cs │ ├── PdfArray.cs │ ├── PdfHexadecimalString.cs │ ├── PdfIndirectObject.cs │ ├── PdfNumeric.cs │ ├── PdfIndirectReference.cs │ ├── PdfXRef.cs │ ├── PdfLiteralString.cs │ ├── PdfObject.cs │ ├── PdfXRefSection.cs │ ├── PdfXRefEntry.cs │ ├── PdfDictionary.cs │ ├── PdfFile.cs │ └── PdfStream.cs ├── PdfObjectType.cs ├── IPdfObject.cs ├── Attributes │ └── ParameterTypeAttribute.cs └── PdfDocument.cs ├── SafeRapidPdf.UnitTests ├── testdata │ ├── 1.pdf │ └── 3.pdf ├── Objects │ └── PdfNumericTests.cs ├── File │ ├── PdfFileTests.cs │ ├── PdfStreamTests.cs │ ├── PdfDocumentTests.cs │ └── PdfXRefTests.cs ├── Util │ └── StringExtensions.cs └── SafeRapidPdf.UnitTests.csproj ├── .github ├── dependabot.yml ├── PULL_REQUEST_TEMPLATE.md ├── workflows │ └── dotnetcore.yml └── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── .vscode ├── settings.json ├── launch.json └── tasks.json ├── .gitignore ├── PdfInfoTool ├── Command │ ├── Dump.cs │ └── Show.cs ├── Program.cs ├── PdfInfoTool.csproj └── Options.cs ├── .editorconfig ├── .devcontainer └── devcontainer.json ├── README.md ├── LICENSE.md └── SafeRapidPdf.sln /SafeRapidPdf/Globals.cs: -------------------------------------------------------------------------------- 1 | global using System; 2 | global using System.Collections.Generic; 3 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfDate.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Document; 2 | 3 | public class PdfDate 4 | { 5 | } 6 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfFont.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Document; 2 | 3 | public class PdfFont 4 | { 5 | } 6 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfNameTree.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Document; 2 | 3 | public class PdfNameTree 4 | { 5 | } 6 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfNumberTree.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Document; 2 | 3 | public class PdfNumberTree 4 | { 5 | } 6 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/testdata/1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Color-Of-Code/SafeRapidPdf/HEAD/SafeRapidPdf.UnitTests/testdata/1.pdf -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/testdata/3.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Color-Of-Code/SafeRapidPdf/HEAD/SafeRapidPdf.UnitTests/testdata/3.pdf -------------------------------------------------------------------------------- /SafeRapidPdf/assembly.cs: -------------------------------------------------------------------------------- 1 | using System.Runtime.CompilerServices; 2 | [assembly: InternalsVisibleTo(assemblyName: "SafeRapidPdf.UnitTests")] 3 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: nuget 4 | directory: "/" 5 | schedule: 6 | interval: weekly 7 | open-pull-requests-limit: 10 8 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | #### Short description of what this resolves: 2 | 3 | 4 | #### Changes proposed in this pull request: 5 | 6 | - 7 | - 8 | - 9 | 10 | **Fixes**: # 11 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "omnisharp.enableRoslynAnalyzers": true, 3 | "csharp.maxProjectFileCountForDiagnosticAnalysis": 10000, 4 | "omnisharp.analyzeOpenDocumentsOnly": false 5 | } 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.suo 2 | /PdfInfoTool/obj 3 | /PdfInfoTool/bin 4 | /SafeRapidPdf/obj 5 | /SafeRapidPdf/bin 6 | /SafeRapidPdf.UnitTests/bin 7 | /SafeRapidPdf.UnitTests/obj 8 | /TestData/ 9 | *.psess 10 | *.vsp 11 | /.vs/ -------------------------------------------------------------------------------- /PdfInfoTool/Command/Dump.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | namespace PdfInfoTool; 3 | 4 | internal static partial class Command 5 | { 6 | internal static int RunDumpAndReturnExitCode(DumpOptions opts) 7 | { 8 | throw new NotImplementedException(); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /SafeRapidPdf/Logical/PdfStructure.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Document; 2 | 3 | namespace SafeRapidPdf.Logical; 4 | 5 | public class PdfStructure : PdfBaseObject 6 | { 7 | public PdfStructure() 8 | : base(PdfObjectType.Structure) 9 | { 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /SafeRapidPdf/Services/IIndirectReferenceResolver.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Services; 4 | 5 | public interface IIndirectReferenceResolver 6 | { 7 | PdfXRef XRef { get; } 8 | 9 | PdfIndirectObject GetObject(int objectNumber, int generationNumber); 10 | } 11 | -------------------------------------------------------------------------------- /PdfInfoTool/Program.cs: -------------------------------------------------------------------------------- 1 | using CommandLine; 2 | using PdfInfoTool; 3 | 4 | return Parser.Default.ParseArguments(args) 5 | .MapResult( 6 | (DumpOptions opts) => Command.RunDumpAndReturnExitCode(opts), 7 | (ShowOptions opts) => Command.RunShowAndReturnExitCode(opts), 8 | _ => 1); 9 | -------------------------------------------------------------------------------- /SafeRapidPdf/SafeRapidPdf.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | true 6 | true 7 | AllEnabledByDefault 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfArtBox.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | /// 6 | /// Extent of the page’s meaningful content 7 | /// 8 | public sealed class PdfArtBox : PdfRectangle 9 | { 10 | public PdfArtBox(PdfArray box) 11 | : base(PdfObjectType.ArtBox, box) 12 | { 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfCropBox.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | /// 6 | /// visible region of default user space 7 | /// 8 | public sealed class PdfCropBox : PdfRectangle 9 | { 10 | public PdfCropBox(PdfArray box) 11 | : base(PdfObjectType.CropBox, box) 12 | { 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfTrimBox.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | /// 6 | /// intended dimensions of the finished page after trimming 7 | /// 8 | public sealed class PdfTrimBox : PdfRectangle 9 | { 10 | public PdfTrimBox(PdfArray box) 11 | : base(PdfObjectType.TrimBox, box) 12 | { 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /SafeRapidPdf/Parsing/ParsingException.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Parsing; 2 | 3 | public class ParsingException : Exception 4 | { 5 | public ParsingException(string message) 6 | : base(message) { } 7 | 8 | public ParsingException() 9 | { 10 | } 11 | 12 | public ParsingException(string message, Exception innerException) 13 | : base(message, innerException) 14 | { 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfMediaBox.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | /// 6 | /// boundaries of the physical medium on which the page is 7 | /// intended to be displayed or printed 8 | /// 9 | public sealed class PdfMediaBox : PdfRectangle 10 | { 11 | public PdfMediaBox(PdfArray box) 12 | : base(PdfObjectType.MediaBox, box) 13 | { 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /.github/workflows/dotnetcore.yml: -------------------------------------------------------------------------------- 1 | name: .NET Core 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | 8 | runs-on: ubuntu-latest 9 | 10 | steps: 11 | - uses: actions/checkout@v3 12 | - name: Setup .NET Core 13 | uses: actions/setup-dotnet@v3 14 | with: 15 | dotnet-version: 8.0.x 16 | - name: Build with dotnet 17 | run: | 18 | dotnet build 19 | dotnet test /p:CollectCoverage=true 20 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfBleedBox.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | /// 6 | /// region to which the contents of the page should be clipped 7 | /// when output in a production environment 8 | /// 9 | public sealed class PdfBleedBox : PdfRectangle 10 | { 11 | public PdfBleedBox(PdfArray box) 12 | : base(PdfObjectType.BleedBox, box) 13 | { 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /SafeRapidPdf/Filters/IFilter.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Filters; 4 | 5 | public interface IFilter 6 | { 7 | /// 8 | /// Decodes the input buffer and returns a decoded output buffer 9 | /// 10 | /// 11 | /// 12 | /// 13 | byte[] Decode(byte[] input, PdfDictionary options); 14 | } 15 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfRotate.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | public sealed class PdfRotate : PdfBaseObject 6 | { 7 | internal PdfRotate(PdfNumeric value) 8 | : base(PdfObjectType.Rotate) 9 | { 10 | Value = value.ToInt32(); 11 | } 12 | 13 | public int Value { get; } 14 | 15 | public override string ToString() 16 | { 17 | return $"Rotate {Value} degrees"; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfNull.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Objects; 2 | 3 | public sealed class PdfNull : PdfObject 4 | { 5 | public static readonly PdfNull Null = new(); 6 | 7 | private PdfNull() 8 | : base(PdfObjectType.Null) 9 | { 10 | } 11 | 12 | internal static PdfNull Parse(Parsing.Lexer lexer) 13 | { 14 | lexer.Expects("null"); 15 | return Null; 16 | } 17 | 18 | public override string ToString() 19 | { 20 | return "null"; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfCount.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | 3 | namespace SafeRapidPdf.Document; 4 | 5 | public sealed class PdfCount : PdfBaseObject 6 | { 7 | public PdfCount(PdfNumeric count) 8 | : base(PdfObjectType.Count) 9 | { 10 | ArgumentNullException.ThrowIfNull(count); 11 | 12 | Value = count.ToInt32(); 13 | } 14 | 15 | public int Value { get; } 16 | 17 | public override string ToString() 18 | { 19 | return $"Count : {Value}"; 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfBaseObject.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Document; 2 | 3 | public abstract class PdfBaseObject : IPdfObject 4 | { 5 | protected PdfBaseObject(PdfObjectType type) 6 | { 7 | ObjectType = type; 8 | } 9 | 10 | public PdfObjectType ObjectType { get; } 11 | 12 | public bool IsContainer { get; protected set; } 13 | 14 | public string Text => ToString(); 15 | 16 | public virtual IReadOnlyList Items 17 | => !IsContainer 18 | ? null 19 | : throw new NotImplementedException(); 20 | } 21 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfData.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Objects; 2 | 3 | public sealed class PdfData : PdfObject 4 | { 5 | private PdfData(byte[] data) 6 | : base(PdfObjectType.Data) 7 | { 8 | Data = data; 9 | } 10 | 11 | public byte[] Data { get; } 12 | 13 | internal static PdfData Parse(Parsing.Lexer lexer, int length) 14 | { 15 | byte[] data = lexer.ReadBytes(length); 16 | return new PdfData(data); 17 | } 18 | 19 | public override string ToString() 20 | { 21 | return "Raw data"; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfTrailer.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Parsing; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfTrailer : PdfDictionary 6 | { 7 | private PdfTrailer(PdfDictionary dictionary) 8 | : base(dictionary, PdfObjectType.Trailer) 9 | { 10 | } 11 | 12 | internal static new PdfTrailer Parse(Lexer lexer) 13 | { 14 | lexer.Expects("<<"); 15 | var dictionary = PdfDictionary.Parse(lexer); 16 | return new PdfTrailer(dictionary); 17 | } 18 | 19 | public override string ToString() 20 | { 21 | return "trailer"; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /PdfInfoTool/PdfInfoTool.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Exe 5 | net8.0 6 | true 7 | true 8 | AllEnabledByDefault 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | -------------------------------------------------------------------------------- /SafeRapidPdf/Parsing/UnexpectedTokenException.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Parsing; 2 | 3 | public class UnexpectedTokenException : ParsingException 4 | { 5 | public UnexpectedTokenException(string expectedToken, string actualToken) 6 | : base($"Expected '{expectedToken}'. Was '{actualToken}'") { } 7 | 8 | public UnexpectedTokenException() 9 | { 10 | } 11 | 12 | public UnexpectedTokenException(string message) 13 | : base(message) 14 | { 15 | } 16 | 17 | public UnexpectedTokenException(string message, System.Exception innerException) 18 | : base(message, innerException) 19 | { 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | 5 | --- 6 | 7 | **Is your feature request related to a problem? Please describe.** 8 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 9 | 10 | **Describe the solution you'd like** 11 | A clear and concise description of what you want to happen. 12 | 13 | **Describe alternatives you've considered** 14 | A clear and concise description of any alternative solutions or features you've considered. 15 | 16 | **Additional context** 17 | Add any other context or sketches about the feature request here. 18 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfComment.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Objects; 2 | 3 | /// 4 | /// Comments start with % and end on EOL char (CR or LF) 5 | /// 6 | public sealed class PdfComment : PdfObject 7 | { 8 | private readonly string _text; 9 | 10 | private PdfComment(string text) 11 | : base(PdfObjectType.Comment) 12 | { 13 | _text = text; 14 | } 15 | 16 | public bool IsEOF => _text == "%EOF"; 17 | 18 | internal static PdfComment Parse(Parsing.Lexer lexer) 19 | { 20 | return new PdfComment(lexer.ReadUntilEol()); 21 | } 22 | 23 | public override string ToString() 24 | { 25 | return $"%{_text}"; 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 1. Take following pdf / a PDF with these features: xxx 13 | 2. Run xxx with parameters yyy 14 | 3. See error 15 | 16 | It is very helpful if you can you provide an example PDF 17 | 18 | **Expected behavior** 19 | A clear and concise description of what you expected to happen. 20 | 21 | **Environment** 22 | - OS: [e.g. Ubuntu 18.04, Windows 10] 23 | - .NET core version 24 | 25 | **Additional context** 26 | Add any other context about the problem here. 27 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfKeyValuePair.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | /// 6 | /// Object not described in the specification but eases use and 7 | /// implementation in .NET 8 | /// 9 | public sealed class PdfKeyValuePair : PdfObject 10 | { 11 | public PdfKeyValuePair(PdfName key, PdfObject value) 12 | : base(PdfObjectType.KeyValuePair) 13 | { 14 | IsContainer = true; 15 | Key = key; 16 | Value = value; 17 | } 18 | 19 | public PdfName Key { get; } 20 | 21 | public PdfObject Value { get; } 22 | 23 | public override IReadOnlyList Items => new[] { Value }; 24 | 25 | public override string ToString() 26 | { 27 | return Key.Text; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfStartXRef.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Parsing; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfStartXRef : PdfObject 6 | { 7 | private PdfStartXRef(PdfNumeric value) 8 | : base(PdfObjectType.StartXRef) 9 | { 10 | IsContainer = true; 11 | Numeric = value; 12 | } 13 | 14 | public PdfNumeric Numeric { get; } 15 | 16 | public override IReadOnlyList Items => new[] { Numeric }; 17 | 18 | public static PdfStartXRef Parse(Lexer lexer) 19 | { 20 | ArgumentNullException.ThrowIfNull(lexer); 21 | 22 | var n = PdfNumeric.Parse(lexer); 23 | return new PdfStartXRef(n); 24 | } 25 | 26 | public override string ToString() 27 | { 28 | return "startxref"; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /SafeRapidPdf/PdfObjectType.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf; 2 | 3 | public enum PdfObjectType 4 | { 5 | // low level objects 6 | Null, 7 | Boolean, 8 | Comment, 9 | Numeric, 10 | HexadecimalString, 11 | LiteralString, 12 | Name, 13 | Data, 14 | 15 | IndirectObject, 16 | IndirectReference, 17 | 18 | Array, 19 | Dictionary, 20 | KeyValuePair, 21 | 22 | Stream, 23 | 24 | XRef, 25 | XRefSection, 26 | XRefEntry, 27 | 28 | Trailer, 29 | 30 | StartXRef, 31 | 32 | // high level objects 33 | File, 34 | Structure, 35 | 36 | Catalog, 37 | PageTree, 38 | Page, 39 | 40 | ArtBox, 41 | BleedBox, 42 | CropBox, 43 | MediaBox, 44 | TrimBox, 45 | 46 | Rotate, 47 | 48 | Count, 49 | 50 | Contents 51 | } 52 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/Objects/PdfNumericTests.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | using Xunit; 3 | 4 | namespace SafeRapidPdf.UnitTests.Objects; 5 | 6 | public class PdfNumericTests 7 | { 8 | [Fact] 9 | public void ToInt32Tests() 10 | { 11 | Assert.Equal(1, PdfNumeric.Parse("1").ToInt32()); 12 | Assert.Equal(1000, PdfNumeric.Parse("1000").ToInt32()); 13 | } 14 | 15 | [Fact] 16 | public void CanCastImplictlyToDouble() 17 | { 18 | Assert.Equal(1.1d, PdfNumeric.Parse("1.1")); 19 | Assert.Equal(1000d, PdfNumeric.Parse("1000")); 20 | } 21 | 22 | [Fact] 23 | public void IsRealTests() 24 | { 25 | Assert.True(PdfNumeric.Parse("1.1").IsReal); 26 | } 27 | 28 | [Fact] 29 | public void IsIntergerTests() 30 | { 31 | Assert.True(PdfNumeric.Parse("1").IsInteger); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /SafeRapidPdf/IPdfObject.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace SafeRapidPdf; 4 | 5 | /// 6 | /// Interface common to all PDF objects high or low level. 7 | /// The presence of this interface eases the implementation 8 | /// of code crawling through all objects. 9 | /// 10 | public interface IPdfObject 11 | { 12 | /// 13 | /// Returns the type of this object 14 | /// 15 | PdfObjectType ObjectType { get; } 16 | 17 | /// 18 | /// Description of this object 19 | /// 20 | string Text { get; } 21 | 22 | /// 23 | /// Does this object have descendants 24 | /// 25 | bool IsContainer { get; } 26 | 27 | /// 28 | /// The children of this object 29 | /// 30 | IReadOnlyList Items { get; } 31 | } 32 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfBoolean.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Parsing; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfBoolean : PdfObject 6 | { 7 | public static readonly PdfBoolean True = new(true); 8 | public static readonly PdfBoolean False = new(false); 9 | 10 | private PdfBoolean(bool value) 11 | : base(PdfObjectType.Boolean) 12 | { 13 | Value = value; 14 | } 15 | 16 | public bool Value { get; } 17 | 18 | public static PdfBoolean Parse(string token) 19 | { 20 | return token switch 21 | { 22 | "true" => True, 23 | "false" => False, 24 | _ => throw new ParsingException($"Expected true or false. Was {token}."), 25 | }; 26 | } 27 | 28 | public override string ToString() 29 | { 30 | return Value ? "true" : "false"; 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfName.cs: -------------------------------------------------------------------------------- 1 | using System.Text.RegularExpressions; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfName : PdfObject 6 | { 7 | private readonly string _rawName; 8 | 9 | private PdfName(string name) 10 | : base(PdfObjectType.Name) 11 | { 12 | _rawName = name; 13 | } 14 | 15 | public string Name 16 | // process the # encoded chars 17 | => Regex.Replace(_rawName, @"#(\d\d)", x => 18 | { 19 | byte val = Convert.ToByte(x.Groups[1].Value, 16); 20 | return ((char)val).ToString(); 21 | }); 22 | 23 | internal static PdfName Parse(Parsing.Lexer lexer) 24 | { 25 | string name = lexer.ReadToken(); 26 | return new PdfName(name); 27 | } 28 | 29 | public override string ToString() 30 | { 31 | return Name; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfArray.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Parsing; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfArray : PdfObject 6 | { 7 | private readonly List _items; 8 | 9 | private PdfArray(List items) 10 | : base(PdfObjectType.Array) 11 | { 12 | IsContainer = true; 13 | _items = items; 14 | } 15 | 16 | public override IReadOnlyList Items => _items; 17 | 18 | public static PdfArray Parse(Lexer lexer) 19 | { 20 | ArgumentNullException.ThrowIfNull(lexer); 21 | 22 | var list = new List(); 23 | PdfObject value; 24 | while ((value = ParseAny(lexer, "]")) != null) 25 | { 26 | list.Add(value); 27 | } 28 | return new PdfArray(list); 29 | } 30 | 31 | public override string ToString() 32 | { 33 | return "[...]"; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfContents.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | 3 | using SafeRapidPdf.Objects; 4 | 5 | namespace SafeRapidPdf.Document; 6 | 7 | public sealed class PdfContents : PdfBaseObject 8 | { 9 | public PdfContents(IPdfObject obj) 10 | : base(PdfObjectType.Contents) 11 | { 12 | IsContainer = true; 13 | 14 | if (obj is PdfIndirectReference reference) 15 | { 16 | obj = reference.ReferencedObject.Object; 17 | } 18 | 19 | Streams = obj is PdfArray array 20 | ? array.Items 21 | : obj is PdfStream stream 22 | ? (new[] { stream }) 23 | : throw new InvalidDataException("Contents must be either a stream or an array of streams"); 24 | } 25 | 26 | public IReadOnlyList Streams { get; } 27 | 28 | public override IReadOnlyList Items => Streams; 29 | 30 | public override string ToString() 31 | { 32 | return "Contents"; 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /SafeRapidPdf/Attributes/ParameterTypeAttribute.cs: -------------------------------------------------------------------------------- 1 | namespace SafeRapidPdf.Attributes; 2 | 3 | [AttributeUsage(AttributeTargets.Property)] 4 | public sealed class ParameterTypeAttribute : Attribute 5 | { 6 | public ParameterTypeAttribute( 7 | bool required, 8 | bool inheritable = false, 9 | string version = "", 10 | bool obsolete = false) 11 | { 12 | Required = required; 13 | Inheritable = inheritable; 14 | Version = version; 15 | Obsolete = obsolete; 16 | } 17 | 18 | /// 19 | /// Required or Optional 20 | /// 21 | public bool Required { get; } 22 | 23 | /// 24 | /// Inheritable attribute 25 | /// 26 | public bool Inheritable { get; } 27 | 28 | /// 29 | /// PDF version from which this parameter is allowed 30 | /// 31 | public string Version { get; } 32 | 33 | /// 34 | /// Was this parameter obsoleted? 35 | /// 36 | public bool Obsolete { get; } 37 | } 38 | -------------------------------------------------------------------------------- /.vscode/launch.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "0.2.0", 3 | "configurations": [ 4 | { 5 | // Use IntelliSense to find out which attributes exist for C# debugging 6 | // Use hover for the description of the existing attributes 7 | // For further information visit https://github.com/OmniSharp/omnisharp-vscode/blob/master/debugger-launchjson.md 8 | "name": ".NET Core Launch (console)", 9 | "type": "coreclr", 10 | "request": "launch", 11 | "preLaunchTask": "build", 12 | // If you have changed target frameworks, make sure to update the program path. 13 | "program": "${workspaceFolder}/PdfInfoTool/bin/Debug/net8.0/PdfInfoTool.dll", 14 | "args": [], 15 | "cwd": "${workspaceFolder}/PdfInfoTool", 16 | // For more information about the 'console' field, see https://aka.ms/VSCode-CS-LaunchJson-Console 17 | "console": "internalConsole", 18 | "stopAtEntry": false 19 | }, 20 | { 21 | "name": ".NET Core Attach", 22 | "type": "coreclr", 23 | "request": "attach" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /PdfInfoTool/Options.cs: -------------------------------------------------------------------------------- 1 | using CommandLine; 2 | 3 | namespace PdfInfoTool; 4 | 5 | 6 | internal interface IOptions 7 | { 8 | [Option('v', "verbose", 9 | HelpText = "Verbose message output.")] 10 | bool Verbose { get; set; } 11 | 12 | [Option('q', "quiet", 13 | HelpText = "Suppresses summary messages.")] 14 | bool Quiet { get; set; } 15 | 16 | [Value(0, MetaName = "input pdf file", 17 | HelpText = "Input pdf file to be processed.", 18 | Required = true)] 19 | string FileName { get; set; } 20 | } 21 | 22 | [Verb("dump", HelpText = "Dump an object out.")] 23 | internal class DumpOptions : IOptions 24 | { 25 | public bool Verbose { get; set; } 26 | public bool Quiet { get; set; } 27 | public string FileName { get; set; } 28 | } 29 | 30 | [Verb("show", HelpText = "Show object contents in a human readable way.")] 31 | internal class ShowOptions : IOptions 32 | { 33 | public bool Verbose { get; set; } 34 | public bool Quiet { get; set; } 35 | public string FileName { get; set; } 36 | 37 | [Value(1, MetaName = "Type of object to display", 38 | HelpText = "Type: xref.", 39 | Required = true)] 40 | public string What { get; set; } 41 | } 42 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = space 5 | indent_size = 4 6 | end_of_line = lf 7 | charset = utf-8 8 | trim_trailing_whitespace = true 9 | insert_final_newline = true 10 | 11 | [*.cs] 12 | dotnet_diagnostic.IDE0008.severity = none # use explicit type instead of var 13 | dotnet_diagnostic.IDE0011.severity = none # add braces to 'if' statement 14 | dotnet_diagnostic.IDE0005.severity = suggestion # using directive is unnecessary 15 | dotnet_diagnostic.IDE0049.severity = suggestion # name can be simplified 16 | dotnet_diagnostic.IDE0055.severity = suggestion # fix formatting 17 | dotnet_diagnostic.IDE0130.severity = error # namespace match directory structure 18 | 19 | dotnet_diagnostic.CA1062.severity = error # null check 20 | dotnet_diagnostic.CA1303.severity = none # literal string, introduce a resource table 21 | dotnet_diagnostic.CA1707.severity = none # _ in method name (common in unit tests) 22 | 23 | # IDE0160: Convert to file-scoped namespace 24 | csharp_style_namespace_declarations = file_scoped:warning 25 | 26 | dotnet_analyzer_diagnostic.severity = warning 27 | 28 | dotnet_code_quality.ca1711.allowed_suffixes = Dictionary|Stream 29 | 30 | [*.AssemblyInfo.cs] 31 | generated_code = true 32 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfHexadecimalString.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfHexadecimalString : PdfObject 6 | { 7 | private readonly string _text; 8 | 9 | private PdfHexadecimalString(string hexString) 10 | : base(PdfObjectType.HexadecimalString) 11 | { 12 | _text = hexString; 13 | //int length = cleantext.Length; 14 | //StringBuilder sb = new StringBuilder(); 15 | //for (int i = 0; i < length; i += 2) 16 | //{ 17 | // byte b = Convert.ToByte(cleantext.Substring(i, 2), 16); 18 | // sb.Append((char)b); 19 | //} 20 | //_text = sb.ToString(); 21 | } 22 | 23 | internal static PdfHexadecimalString Parse(Parsing.Lexer lexer) 24 | { 25 | var hexString = new StringBuilder(); 26 | string text = string.Empty; 27 | while (text != ">") 28 | { 29 | _ = hexString.Append(text); 30 | text = lexer.ReadToken(); 31 | } 32 | if ((hexString.Length % 2) != 0) 33 | _ = hexString.Append('0'); 34 | return new PdfHexadecimalString(hexString.ToString()); 35 | } 36 | 37 | public override string ToString() 38 | { 39 | return _text; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /.vscode/tasks.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "2.0.0", 3 | "tasks": [ 4 | { 5 | "label": "build", 6 | "command": "dotnet", 7 | "type": "process", 8 | "args": [ 9 | "build", 10 | "${workspaceFolder}/PdfInfoTool/PdfInfoTool.csproj", 11 | "/property:GenerateFullPaths=true", 12 | "/consoleloggerparameters:NoSummary" 13 | ], 14 | "problemMatcher": "$msCompile" 15 | }, 16 | { 17 | "label": "publish", 18 | "command": "dotnet", 19 | "type": "process", 20 | "args": [ 21 | "publish", 22 | "${workspaceFolder}/PdfInfoTool/PdfInfoTool.csproj", 23 | "/property:GenerateFullPaths=true", 24 | "/consoleloggerparameters:NoSummary" 25 | ], 26 | "problemMatcher": "$msCompile" 27 | }, 28 | { 29 | "label": "watch", 30 | "command": "dotnet", 31 | "type": "process", 32 | "args": [ 33 | "watch", 34 | "run", 35 | "--project", 36 | "${workspaceFolder}/PdfInfoTool/PdfInfoTool.csproj" 37 | ], 38 | "problemMatcher": "$msCompile" 39 | } 40 | ] 41 | } -------------------------------------------------------------------------------- /.devcontainer/devcontainer.json: -------------------------------------------------------------------------------- 1 | // For format details, see https://aka.ms/devcontainer.json. For config options, see the 2 | // README at: https://github.com/devcontainers/templates/tree/main/src/dotnet 3 | { 4 | "name": "C# (.NET)", 5 | // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile 6 | "image": "mcr.microsoft.com/devcontainers/dotnet:1-8.0-bookworm", 7 | 8 | // Features to add to the dev container. More info: https://containers.dev/features. 9 | // "features": {}, 10 | 11 | // Use 'forwardPorts' to make a list of ports inside the container available locally. 12 | // "forwardPorts": [5000, 5001], 13 | // "portsAttributes": { 14 | // "5001": { 15 | // "protocol": "https" 16 | // } 17 | // } 18 | 19 | "postCreateCommand": "dotnet restore", 20 | "customizations": { 21 | "vscode": { 22 | "extensions": [ 23 | "eamodio.gitlens", 24 | "ms-dotnettools.csharp", 25 | "ms-dotnettools.csdevkit", 26 | "EditorConfig.EditorConfig", 27 | "formulahendry.dotnet-test-explorer", 28 | "ryanluker.vscode-coverage-gutters", 29 | "urbanoanderson.vscode-coverlet" 30 | ] 31 | } 32 | } 33 | 34 | // Configure tool-specific properties. 35 | // "customizations": {}, 36 | 37 | // Uncomment to connect as root instead. More info: https://aka.ms/dev-containers-non-root. 38 | // "remoteUser": "root" 39 | } 40 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/File/PdfFileTests.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | using SafeRapidPdf.Parsing; 3 | using SafeRapidPdf.UnitTests.Util; 4 | 5 | using Xunit; 6 | 7 | namespace SafeRapidPdf.UnitTests.File; 8 | 9 | public class PdfFileTests 10 | { 11 | [Theory] 12 | [InlineData( 13 | """ 14 | %PDF- 15 | trailer<>>>>> 16 | %%EOF 17 | """ 18 | )] 19 | public void Parsing_TinyFile(string pdf) 20 | { 21 | var r = PdfFile.Parse(pdf.ToStream()); 22 | Assert.True(r.Items.Count == 3); 23 | } 24 | 25 | [Theory] 26 | [InlineData( 27 | """ 28 | %PDF- 29 | trailer<>>>>> 30 | """ 31 | )] 32 | public void Parsing_TinyFile_Without_EOF_YieldsException(string pdf) 33 | { 34 | var exception = Assert.Throws(() => 35 | { 36 | _ = PdfFile.Parse(pdf.ToStream()); 37 | }); 38 | Assert.Equal("End of file reached without EOF marker", exception.Message); 39 | } 40 | 41 | [Theory] 42 | [InlineData( 43 | """ 44 | Not a PDF 45 | """ 46 | )] 47 | public void Parsing_Non_Pdf_Yields_Exception(string pdf) 48 | { 49 | _ = Assert.Throws(() => 50 | { 51 | _ = PdfFile.Parse(pdf.ToStream()); 52 | }); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfCatalog.cs: -------------------------------------------------------------------------------- 1 | using System.Linq; 2 | 3 | using SafeRapidPdf.Objects; 4 | 5 | namespace SafeRapidPdf.Document; 6 | 7 | public sealed class PdfCatalog : PdfBaseObject 8 | { 9 | private readonly List _items = new(); 10 | 11 | public PdfCatalog(PdfDictionary catalog) 12 | : base(PdfObjectType.Catalog) 13 | { 14 | ArgumentNullException.ThrowIfNull(catalog); 15 | 16 | IsContainer = true; 17 | catalog.ExpectsType("Catalog"); 18 | 19 | foreach (PdfKeyValuePair pair in catalog.Items.Cast()) 20 | { 21 | switch (pair.Key.Text) 22 | { 23 | case "Type": // skip Type Catalog 24 | break; 25 | case "Pages": 26 | Pages = new PdfPageTree((PdfIndirectReference)catalog["Pages"]); 27 | break; 28 | default: 29 | _items.Add(pair); 30 | break; 31 | } 32 | } 33 | } 34 | 35 | public PdfPageTree Pages { get; } 36 | 37 | public override IReadOnlyList Items 38 | { 39 | get 40 | { 41 | var list = new List(_items.Count + 1); 42 | list.AddRange(_items); 43 | list.Add(Pages); 44 | return list; 45 | } 46 | } 47 | 48 | public override string ToString() 49 | { 50 | return "/"; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/Util/StringExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Text; 4 | using SafeRapidPdf.Parsing; 5 | 6 | namespace SafeRapidPdf.UnitTests.Util; 7 | 8 | 9 | public static class StringExtensions 10 | { 11 | public static Stream ToStream(this string input) 12 | { 13 | byte[] byteArray = Encoding.UTF8.GetBytes(input); 14 | return new MemoryStream(byteArray); 15 | } 16 | 17 | // used to inject a lexer into low level parsers from a string 18 | public static Lexer ToLexer(this string input) 19 | { 20 | return new Lexer(input.ToStream(), true); 21 | } 22 | 23 | // used to inject a lexer into low level parsers from a byte array 24 | public static Lexer ToLexer(this byte[] input) 25 | { 26 | var s = new MemoryStream(input); 27 | return new Lexer(s, true); 28 | } 29 | 30 | public static Lexer Base64ToLexer(this string input) 31 | { 32 | var bytes = Convert.FromBase64String(input); 33 | var s = new MemoryStream(bytes); 34 | return new Lexer(s, true); 35 | } 36 | 37 | public static string ToHexString(this byte[] ba) 38 | { 39 | if (ba == null) throw new ArgumentNullException(nameof(ba)); 40 | 41 | var hex = new StringBuilder(ba.Length * 2); 42 | foreach (byte b in ba) 43 | { 44 | _ = hex.AppendFormat(System.Globalization.CultureInfo.InvariantCulture, "{0:x2}", b); 45 | } 46 | return hex.ToString(); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfIndirectObject.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.Globalization; 3 | using SafeRapidPdf.Parsing; 4 | 5 | namespace SafeRapidPdf.Objects; 6 | 7 | public sealed class PdfIndirectObject : PdfObject 8 | { 9 | private PdfIndirectObject(int objectNumber, int generationNumber, IPdfObject obj) 10 | : base(PdfObjectType.IndirectObject) 11 | { 12 | IsContainer = true; 13 | 14 | ObjectNumber = objectNumber; 15 | GenerationNumber = generationNumber; 16 | Object = obj; 17 | } 18 | 19 | public int ObjectNumber { get; } 20 | 21 | public int GenerationNumber { get; } 22 | 23 | public IPdfObject Object { get; } 24 | 25 | public override IReadOnlyList Items => new[] { Object }; 26 | 27 | internal static PdfIndirectObject Parse(Lexer lexer) 28 | { 29 | int objectNumber = int.Parse(lexer.ReadToken(), CultureInfo.InvariantCulture); 30 | return Parse(lexer, objectNumber); 31 | } 32 | 33 | internal static PdfIndirectObject Parse(Lexer lexer, int objectNumber) 34 | { 35 | int generationNumber = int.Parse(lexer.ReadToken(), CultureInfo.InvariantCulture); 36 | lexer.Expects("obj"); 37 | PdfObject obj = ParseAny(lexer); 38 | lexer.Expects("endobj"); 39 | return new PdfIndirectObject(objectNumber, generationNumber, obj); 40 | } 41 | 42 | public override string ToString() 43 | { 44 | return $"{ObjectNumber} {GenerationNumber} obj"; 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/SafeRapidPdf.UnitTests.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | net8.0 5 | true 6 | true 7 | AllEnabledByDefault 8 | 9 | 10 | 11 | 12 | runtime; build; native; contentfiles; analyzers; buildtransitive 13 | all 14 | 15 | 16 | runtime; build; native; contentfiles; analyzers; buildtransitive 17 | all 18 | 19 | 20 | 21 | 22 | all 23 | runtime; build; native; contentfiles; analyzers 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfNumeric.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfNumeric : PdfObject 6 | { 7 | private readonly string text; 8 | 9 | private PdfNumeric(string text) 10 | : base(PdfObjectType.Numeric) 11 | { 12 | this.text = text; 13 | } 14 | 15 | public bool IsInteger => !IsReal; 16 | 17 | public bool IsReal => text.IndexOf('.', StringComparison.InvariantCultureIgnoreCase) > -1; 18 | 19 | public static implicit operator double(PdfNumeric numeric) 20 | { 21 | return numeric is null 22 | ? default 23 | : double.Parse(numeric.text, CultureInfo.InvariantCulture); 24 | } 25 | 26 | internal static PdfNumeric Parse(Parsing.Lexer lexer) 27 | { 28 | return new PdfNumeric(lexer.ReadToken()); 29 | } 30 | 31 | public static PdfNumeric Parse(string token) 32 | { 33 | return new PdfNumeric(token); 34 | } 35 | 36 | public long ToInt64() 37 | { 38 | return long.Parse(text, CultureInfo.InvariantCulture); 39 | } 40 | 41 | public int ToInt32() 42 | { 43 | return int.Parse(text, CultureInfo.InvariantCulture); 44 | } 45 | 46 | public decimal ToDecimal() 47 | { 48 | return decimal.Parse(text, CultureInfo.InvariantCulture); 49 | } 50 | 51 | public double ToDouble() 52 | { 53 | return double.Parse(text, CultureInfo.InvariantCulture); 54 | } 55 | 56 | public override string ToString() 57 | { 58 | return text; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfIndirectReference.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | using SafeRapidPdf.Services; 3 | 4 | namespace SafeRapidPdf.Objects; 5 | 6 | /// 7 | /// Immutable type 8 | /// 9 | public sealed class PdfIndirectReference : PdfObject 10 | { 11 | private PdfIndirectReference(int objectNumber, int generationNumber) 12 | : base(PdfObjectType.IndirectReference) 13 | { 14 | ObjectNumber = objectNumber; 15 | GenerationNumber = generationNumber; 16 | } 17 | 18 | public int ObjectNumber { get; } 19 | 20 | public int GenerationNumber { get; } 21 | 22 | public PdfIndirectObject ReferencedObject 23 | => Resolver.GetObject(ObjectNumber, GenerationNumber); 24 | 25 | internal IIndirectReferenceResolver Resolver { get; set; } 26 | 27 | public T Dereference() 28 | where T : class 29 | { 30 | return ReferencedObject.Object as T; 31 | } 32 | 33 | internal static PdfIndirectReference Parse(Parsing.Lexer lexer) 34 | { 35 | int objectNumber = int.Parse(lexer.ReadToken(), CultureInfo.InvariantCulture); 36 | return Parse(lexer, objectNumber); 37 | } 38 | 39 | internal static PdfIndirectReference Parse(Parsing.Lexer lexer, int objectNumber) 40 | { 41 | int generationNumber = int.Parse(lexer.ReadToken(), CultureInfo.InvariantCulture); 42 | lexer.Expects("R"); 43 | return new PdfIndirectReference(objectNumber, generationNumber); 44 | } 45 | 46 | public override string ToString() 47 | { 48 | return $"{ObjectNumber} {GenerationNumber} R"; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfRectangle.cs: -------------------------------------------------------------------------------- 1 | using System.Drawing; 2 | using System.IO; 3 | 4 | using SafeRapidPdf.Objects; 5 | 6 | namespace SafeRapidPdf.Document; 7 | 8 | public abstract class PdfRectangle : PdfBaseObject 9 | { 10 | protected PdfRectangle(PdfObjectType type, PdfArray box) 11 | : base(type) 12 | { 13 | ArgumentNullException.ThrowIfNull(box); 14 | 15 | if (box.Items.Count != 4) 16 | { 17 | throw new InvalidDataException("A rectangle must have 4 values!"); 18 | } 19 | 20 | Llx = (PdfNumeric)box.Items[0]; 21 | Lly = (PdfNumeric)box.Items[1]; 22 | Urx = (PdfNumeric)box.Items[2]; 23 | Ury = (PdfNumeric)box.Items[3]; 24 | } 25 | 26 | public PdfNumeric Llx { get; } // lower left x 27 | 28 | public PdfNumeric Lly { get; } // lower left y 29 | 30 | public PdfNumeric Urx { get; } // upper right x 31 | 32 | public PdfNumeric Ury { get; } // upper right y 33 | 34 | public double X => Llx; 35 | 36 | public double Y => Lly; 37 | 38 | public double Width => Urx - Llx; 39 | 40 | public double Height => Ury - Lly; 41 | 42 | public RectangleF ToPixels() 43 | { 44 | // NOTE: PDF dimensions are in points (1/72 in) 45 | const double ptToPxRatio = 4.0d / 3.0d; // 1.333 46 | 47 | return new RectangleF( 48 | x: (float)(X * ptToPxRatio), 49 | y: (float)(Y * ptToPxRatio), 50 | width: (float)(Width * ptToPxRatio), 51 | height: (float)(Height * ptToPxRatio) 52 | ); 53 | } 54 | 55 | public override string ToString() 56 | { 57 | return $"{ObjectType} [{Llx}; {Lly}; {Urx}; {Ury}]"; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/File/PdfStreamTests.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | using SafeRapidPdf.UnitTests.Util; 3 | using Xunit; 4 | 5 | namespace SafeRapidPdf.UnitTests.File; 6 | 7 | public class PdfStreamTests 8 | { 9 | [Fact] 10 | public void Decode_FlateDecode_PngPredictor_Up() 11 | { 12 | //Contents: 13 | // 711 0 obj 14 | // <> 16 | // /Filter /FlateDecode 17 | // /ID[<8826D7FB49D6D5A039A65154F309C2AB><99F0493FB972254F87B930756174CF99>] 18 | // /Index[703 14] 19 | // /Info 702 0 R 20 | // /Length 58 21 | // /Prev 6238004 22 | // /Root 704 0 R 23 | // /Size 717 24 | // /Type /XRef 25 | // /W[1 2 1]>> 26 | // stream ... 27 | //endstream 28 | //endobj 29 | var base64XrefStream = 30 | @"NzExIDAgb2JqDTw8L0RlY29kZVBhcm1zPDwvQ29sdW1ucyA0L1ByZWRpY3RvciAxMj4+L0ZpbHRl 31 | ci9GbGF0ZURlY29kZS9JRFs8ODgyNkQ3RkI0OUQ2RDVBMDM5QTY1MTU0RjMwOUMyQUI+PDk5RjA0 32 | OTNGQjk3MjI1NEY4N0I5MzA3NTYxNzRDRjk5Pl0vSW5kZXhbNzAzIDE0XS9JbmZvIDcwMiAwIFIv 33 | TGVuZ3RoIDU4L1ByZXYgNjIzODAwNC9Sb290IDcwNCAwIFIvU2l6ZSA3MTcvVHlwZS9YUmVmL1db 34 | MSAyIDFdPj5zdHJlYW0NCmjeYmJkEGBgYmDuBRIMoUCCcSOIUAQRS4EEVyuQYNkDJN6cYmBiZPID 35 | qWNgRCL+/xf6CxBgAO9WCPMNCmVuZHN0cmVhbQ1lbmRvYmo="; 36 | var xrefStream = PdfObject.ParseAny(base64XrefStream.Base64ToLexer()) as PdfIndirectObject; 37 | PdfStream pdfStream = xrefStream.Object as PdfStream; 38 | var data = pdfStream.Decode(); 39 | 40 | string hex = data.ToHexString(); 41 | // known good result: 42 | Assert.Equal("0100100001039d000103f2000104a3000105c400010669000110ee000114aa00010074000202c2000202c2010202c2020202c2030101d400", hex); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /SafeRapidPdf/PdfDocument.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using SafeRapidPdf.Document; 4 | using SafeRapidPdf.Objects; 5 | 6 | namespace SafeRapidPdf; 7 | 8 | /// 9 | /// Represents the document structure of a PDF document. 10 | /// 11 | public class PdfDocument 12 | { 13 | private readonly PdfFile _file; 14 | 15 | public PdfDocument(PdfFile file) 16 | { 17 | _file = file; 18 | 19 | foreach (var item in _file.Items) 20 | { 21 | if (item is PdfTrailer trailer) 22 | { 23 | var root = (PdfIndirectReference)trailer["Root"]; 24 | 25 | Root = new PdfCatalog(root.Dereference()); 26 | 27 | break; 28 | } 29 | } 30 | 31 | // NOTE: Linearized documents may have multiple trailers. We use the first. 32 | 33 | if (Root is null) 34 | { 35 | throw new InvalidDataException("Missing trailer"); 36 | } 37 | } 38 | 39 | public PdfCatalog Root { get; } 40 | 41 | public static PdfDocument Load(Stream stream) 42 | { 43 | return new PdfDocument(PdfFile.Parse(stream)); 44 | } 45 | 46 | public IEnumerable GetPages() 47 | { 48 | return GetPages(Root.Items); 49 | } 50 | 51 | public override string ToString() 52 | { 53 | return "Document"; 54 | } 55 | 56 | private IEnumerable GetPages(IReadOnlyList objects) 57 | { 58 | if (objects != null) 59 | { 60 | foreach (var o in objects) 61 | { 62 | if (o.ObjectType == PdfObjectType.Page) 63 | { 64 | yield return (PdfPage)o; 65 | } 66 | 67 | if (o.IsContainer && o.Items != null) 68 | { 69 | foreach (var page in GetPages(o.Items)) 70 | { 71 | yield return page; 72 | } 73 | } 74 | } 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/File/PdfDocumentTests.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using System.Linq; 3 | using SafeRapidPdf.Objects; 4 | 5 | using Xunit; 6 | 7 | namespace SafeRapidPdf.UnitTests.File; 8 | 9 | public class PdfDocumentTests 10 | { 11 | [Fact] 12 | public void CanAccessThroughLoad() 13 | { 14 | using var stream = System.IO.File.OpenRead(GetTestDataFilePath("3.pdf")); // "%PDF-1.5 15 | 16 | var pdf = PdfDocument.Load(stream); 17 | 18 | var pages = pdf.GetPages().ToArray(); 19 | 20 | var page = Assert.Single(pages); 21 | 22 | Assert.Equal(10, page.Items.Count); 23 | 24 | Assert.Equal("ArtBox [0.0; 0.0; 1920.0; 1080.0]", page.ArtBox.ToString()); 25 | Assert.Equal("BleedBox [0.0; 0.0; 1920.0; 1080.0]", page.BleedBox.ToString()); 26 | Assert.Null(page.CropBox); 27 | Assert.Equal("MediaBox [0.0; 0.0; 1920.0; 1080.0]", page.MediaBox.ToString()); 28 | Assert.Null(page.Rotate); 29 | } 30 | 31 | [Fact] 32 | public void CanExtractPages() 33 | { 34 | var file = PdfFile.Parse(GetTestDataFilePath("1.pdf")); 35 | 36 | Assert.Equal("%PDF-1.3", file.Version.ToString()); 37 | Assert.Equal(PdfObjectType.File, file.ObjectType); 38 | 39 | var pdf = new PdfDocument(file); 40 | 41 | var pages = pdf.GetPages().ToArray(); 42 | 43 | Assert.Equal(3, pages.Length); 44 | 45 | Assert.Null(pages[0].ArtBox); 46 | Assert.Null(pages[0].BleedBox); 47 | Assert.Null(pages[0].CropBox); 48 | Assert.Equal("MediaBox [0; 0; 612; 792]", pages[0].MediaBox.ToString()); 49 | Assert.Null(pages[0].Rotate); 50 | 51 | var mediaBox = pages[0].MediaBox.ToPixels(); 52 | 53 | Assert.Equal(0, mediaBox.X); 54 | Assert.Equal(0, mediaBox.Y); 55 | Assert.Equal(816, mediaBox.Width); 56 | Assert.Equal(1056, mediaBox.Height); 57 | } 58 | 59 | private static string GetTestDataFilePath(string name) 60 | { 61 | var baseDirectory = new DirectoryInfo(Directory.GetCurrentDirectory()).Parent.Parent.Parent; 62 | 63 | return Path.Combine(baseDirectory.FullName, "testdata", name); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfPageTree.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | 3 | using SafeRapidPdf.Attributes; 4 | using SafeRapidPdf.Objects; 5 | 6 | namespace SafeRapidPdf.Document; 7 | 8 | public sealed class PdfPageTree : PdfPage 9 | { 10 | public PdfPageTree(PdfIndirectReference pages) 11 | : this(pages, null) 12 | { 13 | } 14 | 15 | public PdfPageTree(PdfIndirectReference pages, PdfPageTree parent) 16 | : base(pages, parent, PdfObjectType.PageTree) 17 | { 18 | ArgumentNullException.ThrowIfNull(pages); 19 | 20 | IsContainer = true; 21 | var pageTree = pages.Dereference(); 22 | pageTree.ExpectsType("Pages"); 23 | 24 | foreach (PdfKeyValuePair pair in pageTree.Items) 25 | { 26 | switch (pair.Key.Text) 27 | { 28 | case "Type": // skip Type Pages 29 | break; 30 | case "Kids": 31 | var kids = (PdfArray)pair.Value; 32 | Kids = new List(); 33 | foreach (PdfIndirectReference item in kids.Items) 34 | { 35 | var dic = item.Dereference(); 36 | string type = dic["Type"].Text; 37 | if (type == "Pages") 38 | Kids.Add(new PdfPageTree(item, this)); 39 | else if (type == "Page") 40 | Kids.Add(new PdfPage(item, this)); 41 | else 42 | throw new InvalidDataException("Content of Kids in a Page Tree Node must be either a Page or another Page Tree Node"); 43 | } 44 | break; 45 | case "Count": 46 | Count = new PdfCount((PdfNumeric)pair.Value); 47 | Add(Count); 48 | break; 49 | default: 50 | HandleKeyValuePair(pair); 51 | break; 52 | } 53 | } 54 | AddRange(Kids); 55 | } 56 | 57 | [ParameterType(required: true, inheritable: false)] 58 | public PdfCount Count { get; } 59 | 60 | [ParameterType(required: true, inheritable: false)] 61 | private List Kids { get; set; } 62 | 63 | public override string ToString() 64 | { 65 | return $"Page Tree Node ({Count} kids)"; 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SafeRapidPdf 2 | 3 | ## CI-Status 4 | 5 | [![Action Status](https://github.com/Color-Of-Code/SafeRapidPdf/workflows/.NET%20Core/badge.svg)](https://github.com/Color-Of-Code/SafeRapidPdf/actions) 6 | 7 | ## Introduction 8 | 9 | There is already a very good PDF parser and generator: [itextsharp](https://itextpdf.com/). 10 | But it doesn't focus on parsing and its licensing model makes it inappropriate for some purposes. 11 | This designed and developed from scratch library is provided under the liberal MIT license (Refer to details in the License section). 12 | 13 | The focus of the library is on reading and parsing, not on writing. 14 | 15 | The goals followed are: 16 | 17 | - parsing and analyzing PDF contents (virus check for example) 18 | - integrity of parsing (document scans from start to end gathering all objects) 19 | - no quirks, invalid PDFs are not parsed 20 | - allow extraction of text and images at a very low level 21 | 22 | This library is not intended for following purposes: 23 | 24 | - rendering a PDF 25 | - modifying a PDF 26 | - generating a PDF 27 | 28 | ## File structure 29 | 30 | This library attempts to provide a quick and yet reliable parser for PDF files. It focusses 31 | on an integral parsing of the whole PDF into its primitive objects. 32 | 33 | - Strings 34 | - Numeric values 35 | - Booleans 36 | - Streams 37 | - Arrays 38 | - Dictionaries 39 | - Indirect Objects 40 | - Indirect References 41 | - Cross Reference sections 42 | 43 | ## Document structure 44 | 45 | The interpretation layer allows then a decomposition into pages and images among other 46 | high level objects. 47 | 48 | - Cross reference table 49 | - Root 50 | - Pages 51 | - Graphics 52 | - Text 53 | - Fonts 54 | 55 | The library is not interested in rendering the PDF only the informative parts will be 56 | extracted such as the position and size of text and graphics for example. 57 | 58 | ## Online resources 59 | 60 | - Wikipedia explanations on [the PDF format](https://en.wikipedia.org/wiki/Portable_Document_Format) 61 | - A python library with similar goals: [pdf-parser](https://blog.didierstevens.com/programs/pdf-tools/) 62 | 63 | It is recommended to read the specification of the PDF language 1.7 for a deeper insight. 64 | 65 | ## Testing 66 | 67 | Unit tests are written in XUnit and code coverage is done thanks to Coverlet 68 | 69 | ```bash 70 | # for vscode integrated report 71 | dotnet test --collect:"XPlat Code Coverage" 72 | 73 | # msbuild report 74 | dotnet test /p:CollectCoverage=true 75 | ``` 76 | 77 | ## Authors 78 | 79 | The SafeRapidPdf contributors: 80 | 81 | - Jaap de Haan (initiator) 82 | 83 | ## License 84 | 85 | The MIT license (Refer to the [LICENSE.md](https://github.com/jdehaan/SafeRapidPdf/blob/master/LICENSE.md) file) 86 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfXRef.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | public sealed class PdfXRef : PdfObject 6 | { 7 | private readonly IList _sections; 8 | private readonly Dictionary _offsets = new(); 9 | 10 | private PdfXRef(IList sections) 11 | : base(PdfObjectType.XRef) 12 | { 13 | IsContainer = true; 14 | 15 | _sections = sections; 16 | 17 | // create the access table 18 | foreach (var section in _sections) 19 | { 20 | foreach (var entryItem in section.Items) 21 | { 22 | var entry = (PdfXRefEntry)entryItem; 23 | 24 | if (entry.InUse) 25 | { 26 | string key = BuildKey(entry.ObjectNumber, entry.GenerationNumber); 27 | _offsets.Add(key, entry.Offset); 28 | } 29 | } 30 | } 31 | } 32 | 33 | public override IReadOnlyList Items 34 | { 35 | get 36 | { 37 | var items = new IPdfObject[_sections.Count]; 38 | 39 | for (var i = 0; i < items.Length; i++) 40 | { 41 | items[i] = _sections[i]; 42 | } 43 | 44 | return items; 45 | } 46 | } 47 | 48 | /// 49 | /// Parse an uncompressed xref dictionary 50 | /// 51 | /// 52 | /// The parsed PdfXRef 53 | internal static PdfXRef Parse(Parsing.Lexer lexer) 54 | { 55 | var sections = new List(); 56 | string token = lexer.PeekToken1(); 57 | while (token != null && char.IsAsciiDigit(token[0])) 58 | { 59 | sections.Add(PdfXRefSection.Parse(lexer)); 60 | token = lexer.PeekToken1(); 61 | } 62 | return new PdfXRef(sections); 63 | } 64 | 65 | /// 66 | /// Parse the xref table out of a compressed stream 67 | /// 68 | /// 69 | /// The parsed PdfXRef 70 | public static PdfXRef Parse(params PdfStream[] xrefStream) 71 | { 72 | if (xrefStream is null) 73 | { 74 | throw new System.ArgumentNullException(nameof(xrefStream)); 75 | } 76 | 77 | var sections = new List(xrefStream.Length); 78 | foreach (var pdfStream in xrefStream) 79 | { 80 | sections.Add(PdfXRefSection.Parse(pdfStream)); 81 | } 82 | return new PdfXRef(sections); 83 | } 84 | 85 | public long GetOffset(int objectNumber, int generationNumber) 86 | { 87 | string key = BuildKey(objectNumber, generationNumber); 88 | return _offsets[key]; 89 | } 90 | 91 | public static string BuildKey(int objectNumber, int generationNumber) 92 | { 93 | return $"{objectNumber:0000000000}_{generationNumber:00000}"; 94 | } 95 | 96 | public override string ToString() 97 | { 98 | return "xref"; 99 | } 100 | } 101 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Unless otherwise stated in the source file, the code is release under the MIT License 2 | 3 | Copyright (c) 2011-2024 SafeRapidPdf contributors 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | 23 | 24 | Integrated 3rdparty source 25 | ========================== 26 | 27 | Tango icon set (Public Domain) 28 | ------------------------------ 29 | 30 | The icons are taken from the Tango Desktop Library project 31 | http://tango.freedesktop.org/ 32 | 33 | The icons were used in the WPF UI. At the moment the UI is removed 34 | 35 | ZLIB.NET 36 | -------- 37 | 38 | This software uses Zlib.Net which comes with its own license: 39 | 40 | Copyright (c) 2006, ComponentAce 41 | http://www.componentace.com 42 | All rights reserved. 43 | 44 | Redistribution and use in source and binary forms, with or without modification, 45 | are permitted provided that the following conditions are met: 46 | 47 | Redistributions of source code must retain the above copyright notice, this list 48 | of conditions and the following disclaimer. 49 | Redistributions in binary form must reproduce the above copyright notice, this 50 | list of conditions and the following disclaimer in the documentation and/or 51 | other materials provided with the distribution. 52 | Neither the name of ComponentAce nor the names of its contributors may be used 53 | to endorse or promote products derived from this software without specific 54 | prior written permission. 55 | 56 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 57 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 58 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 59 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 60 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 62 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 63 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 64 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 | 67 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfLiteralString.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | namespace SafeRapidPdf.Objects; 4 | 5 | /// 6 | /// A literal string is written as an arbitrary number of characters enclosed in 7 | /// parentheses. Any characters may appear in a string except unbalanced 8 | /// parentheses and the backslash, which must be treated specially. Balanced pairs of 9 | /// parentheses within a string require no special treatment. 10 | /// 11 | public sealed class PdfLiteralString : PdfObject 12 | { 13 | private readonly string _text; 14 | 15 | private PdfLiteralString(string text) 16 | : base(PdfObjectType.LiteralString) 17 | { 18 | _text = text; 19 | } 20 | 21 | internal static PdfLiteralString Parse(Parsing.Lexer lexer) 22 | { 23 | int parenthesisCount = 0; 24 | var sb = new StringBuilder(); 25 | char c = lexer.ReadChar(); 26 | while (parenthesisCount != 0 || c != ')') 27 | { 28 | if (c == '(') 29 | parenthesisCount++; 30 | else if (c == ')') 31 | parenthesisCount--; 32 | if (c == '\\') 33 | { 34 | c = lexer.ReadChar(); 35 | switch (c) 36 | { 37 | case 'n': 38 | _ = sb.Append('\n'); 39 | break; 40 | case 'r': 41 | _ = sb.Append('\r'); 42 | break; 43 | case 't': 44 | _ = sb.Append('\t'); 45 | break; 46 | case 'f': 47 | _ = sb.Append('\f'); 48 | break; 49 | 50 | // \b Backspace (BS) 51 | case 'b': 52 | throw new NotImplementedException("Backspace char parsing"); 53 | 54 | case '\\': 55 | case ')': 56 | case '(': 57 | _ = sb.Append(c); 58 | break; 59 | 60 | case '\r': 61 | break; 62 | 63 | default: 64 | // \ddd Character code ddd (octal) 65 | var octalNumber = new StringBuilder(); 66 | _ = octalNumber.Append(c); 67 | char c2 = lexer.ReadChar(); 68 | if (!char.IsAsciiDigit(c2)) 69 | { 70 | lexer.Putc(); 71 | } 72 | else 73 | { 74 | _ = octalNumber.Append(c2); 75 | char c3 = lexer.ReadChar(); 76 | if (!char.IsAsciiDigit(c3)) 77 | lexer.Putc(); 78 | else 79 | _ = octalNumber.Append(c2); 80 | } 81 | int octal = Convert.ToInt32(octalNumber.ToString(), 8); 82 | _ = sb.Append((char)octal); 83 | break; 84 | } 85 | } 86 | else 87 | { 88 | _ = sb.Append(c); 89 | } 90 | c = lexer.ReadChar(); 91 | } 92 | return new PdfLiteralString(sb.ToString()); 93 | } 94 | 95 | public override string ToString() 96 | { 97 | return _text; 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /SafeRapidPdf.sln: -------------------------------------------------------------------------------- 1 | 2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio 15 4 | VisualStudioVersion = 15.0.26124.0 5 | MinimumVisualStudioVersion = 15.0.26124.0 6 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SafeRapidPdf", "SafeRapidPdf\SafeRapidPdf.csproj", "{03A63E93-46CA-4669-914B-C66F744C121C}" 7 | EndProject 8 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "SafeRapidPdf.UnitTests", "SafeRapidPdf.UnitTests\SafeRapidPdf.UnitTests.csproj", "{A656A7F3-7767-4312-839B-EC73502079EC}" 9 | EndProject 10 | Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "PdfInfoTool", "PdfInfoTool\PdfInfoTool.csproj", "{BA94BF23-C865-4678-8F90-E94B31FBC204}" 11 | EndProject 12 | Global 13 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 14 | Debug|Any CPU = Debug|Any CPU 15 | Debug|x64 = Debug|x64 16 | Debug|x86 = Debug|x86 17 | Release|Any CPU = Release|Any CPU 18 | Release|x64 = Release|x64 19 | Release|x86 = Release|x86 20 | EndGlobalSection 21 | GlobalSection(SolutionProperties) = preSolution 22 | HideSolutionNode = FALSE 23 | EndGlobalSection 24 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 25 | {03A63E93-46CA-4669-914B-C66F744C121C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 26 | {03A63E93-46CA-4669-914B-C66F744C121C}.Debug|Any CPU.Build.0 = Debug|Any CPU 27 | {03A63E93-46CA-4669-914B-C66F744C121C}.Debug|x64.ActiveCfg = Debug|x64 28 | {03A63E93-46CA-4669-914B-C66F744C121C}.Debug|x64.Build.0 = Debug|x64 29 | {03A63E93-46CA-4669-914B-C66F744C121C}.Debug|x86.ActiveCfg = Debug|x86 30 | {03A63E93-46CA-4669-914B-C66F744C121C}.Debug|x86.Build.0 = Debug|x86 31 | {03A63E93-46CA-4669-914B-C66F744C121C}.Release|Any CPU.ActiveCfg = Release|Any CPU 32 | {03A63E93-46CA-4669-914B-C66F744C121C}.Release|Any CPU.Build.0 = Release|Any CPU 33 | {03A63E93-46CA-4669-914B-C66F744C121C}.Release|x64.ActiveCfg = Release|x64 34 | {03A63E93-46CA-4669-914B-C66F744C121C}.Release|x64.Build.0 = Release|x64 35 | {03A63E93-46CA-4669-914B-C66F744C121C}.Release|x86.ActiveCfg = Release|x86 36 | {03A63E93-46CA-4669-914B-C66F744C121C}.Release|x86.Build.0 = Release|x86 37 | {A656A7F3-7767-4312-839B-EC73502079EC}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 38 | {A656A7F3-7767-4312-839B-EC73502079EC}.Debug|Any CPU.Build.0 = Debug|Any CPU 39 | {A656A7F3-7767-4312-839B-EC73502079EC}.Debug|x64.ActiveCfg = Debug|x64 40 | {A656A7F3-7767-4312-839B-EC73502079EC}.Debug|x64.Build.0 = Debug|x64 41 | {A656A7F3-7767-4312-839B-EC73502079EC}.Debug|x86.ActiveCfg = Debug|x86 42 | {A656A7F3-7767-4312-839B-EC73502079EC}.Debug|x86.Build.0 = Debug|x86 43 | {A656A7F3-7767-4312-839B-EC73502079EC}.Release|Any CPU.ActiveCfg = Release|Any CPU 44 | {A656A7F3-7767-4312-839B-EC73502079EC}.Release|Any CPU.Build.0 = Release|Any CPU 45 | {A656A7F3-7767-4312-839B-EC73502079EC}.Release|x64.ActiveCfg = Release|x64 46 | {A656A7F3-7767-4312-839B-EC73502079EC}.Release|x64.Build.0 = Release|x64 47 | {A656A7F3-7767-4312-839B-EC73502079EC}.Release|x86.ActiveCfg = Release|x86 48 | {A656A7F3-7767-4312-839B-EC73502079EC}.Release|x86.Build.0 = Release|x86 49 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 50 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Debug|Any CPU.Build.0 = Debug|Any CPU 51 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Debug|x64.ActiveCfg = Debug|x64 52 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Debug|x64.Build.0 = Debug|x64 53 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Debug|x86.ActiveCfg = Debug|x86 54 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Debug|x86.Build.0 = Debug|x86 55 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Release|Any CPU.ActiveCfg = Release|Any CPU 56 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Release|Any CPU.Build.0 = Release|Any CPU 57 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Release|x64.ActiveCfg = Release|x64 58 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Release|x64.Build.0 = Release|x64 59 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Release|x86.ActiveCfg = Release|x86 60 | {BA94BF23-C865-4678-8F90-E94B31FBC204}.Release|x86.Build.0 = Release|x86 61 | EndGlobalSection 62 | EndGlobal 63 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfObject.cs: -------------------------------------------------------------------------------- 1 | using System.Text; 2 | 3 | using SafeRapidPdf.Parsing; 4 | 5 | namespace SafeRapidPdf.Objects; 6 | 7 | public abstract class PdfObject : IPdfObject 8 | { 9 | protected PdfObject(PdfObjectType type) 10 | { 11 | ObjectType = type; 12 | } 13 | 14 | public PdfObjectType ObjectType { get; } 15 | 16 | public bool IsContainer { get; protected set; } 17 | 18 | public string Text => ToString(); 19 | 20 | public virtual IReadOnlyList Items 21 | => !IsContainer 22 | ? null 23 | : throw new NotImplementedException(); 24 | 25 | internal static PdfObject ParseAny(Lexer lexer) 26 | { 27 | return ParseAny(lexer, string.Empty); 28 | } 29 | 30 | internal static PdfObject ParseAny(Lexer lexer, string endToken) 31 | { 32 | string token = lexer.ReadToken(); 33 | if (token is null) 34 | return null; 35 | 36 | switch (token) 37 | { 38 | case "null": return PdfNull.Null; // null object 39 | 40 | case "true": 41 | case "false": 42 | return PdfBoolean.Parse(token); 43 | 44 | case "/": return PdfName.Parse(lexer); 45 | case "%": return PdfComment.Parse(lexer); 46 | case "<": return PdfHexadecimalString.Parse(lexer); 47 | case "(": return PdfLiteralString.Parse(lexer); 48 | 49 | case "xref": 50 | return PdfXRef.Parse(lexer); 51 | 52 | case "trailer": 53 | return PdfTrailer.Parse(lexer); 54 | 55 | case "<<": 56 | var dic = PdfDictionary.Parse(lexer); 57 | 58 | // check for stream and combine put dictionary into stream object 59 | token = lexer.PeekToken1(); 60 | 61 | if (token == "stream") 62 | { 63 | return PdfStream.Parse(dic, lexer); 64 | } 65 | 66 | return dic; 67 | 68 | case "[": return PdfArray.Parse(lexer); 69 | 70 | case "startxref": 71 | return PdfStartXRef.Parse(lexer); 72 | 73 | case ")": 74 | case ">": 75 | case ">>": 76 | case "]": 77 | case "}": 78 | case "stream": 79 | case "endstream": 80 | case "endobj": 81 | if (endToken == token) 82 | { 83 | return null; // expected end 84 | } 85 | 86 | throw new ParsingException("Out of sync"); 87 | 88 | default: 89 | // must be an integer or double value 90 | PdfNumeric num = PdfNumeric.Parse(token); 91 | if (num.IsInteger) 92 | { 93 | string token2 = lexer.PeekToken2(); 94 | switch (token2) 95 | { 96 | case "obj": 97 | return PdfIndirectObject.Parse(lexer, num.ToInt32()); 98 | 99 | case "R": 100 | PdfIndirectReference ir = PdfIndirectReference.Parse(lexer, num.ToInt32()); 101 | ir.Resolver = lexer.IndirectReferenceResolver; 102 | 103 | return ir; 104 | default: 105 | // ignore; 106 | return num; 107 | } 108 | } 109 | else 110 | { 111 | return num; 112 | } 113 | } 114 | 115 | throw new ParsingException("Could not read object"); 116 | } 117 | 118 | internal static PdfObject ParseAny(PdfStream stream) 119 | { 120 | byte[] decodedBytes = stream.Decode(); 121 | _ = Encoding.UTF8.GetString(decodedBytes); 122 | 123 | // contents are not always pdf objects... 124 | // var s = new MemoryStream(decodedBytes); 125 | // var parser = new LexicalParser(s, true); 126 | // return PdfObject.ParseAny(parser); 127 | return null; 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfXRefSection.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | 3 | using SafeRapidPdf.Parsing; 4 | 5 | namespace SafeRapidPdf.Objects; 6 | 7 | public sealed class PdfXRefSection : PdfObject 8 | { 9 | private readonly IPdfObject[] _entries; 10 | 11 | private PdfXRefSection(int firstId, int size, IPdfObject[] entries) 12 | : base(PdfObjectType.XRefSection) 13 | { 14 | IsContainer = true; 15 | 16 | FirstId = firstId; 17 | Size = size; 18 | _entries = entries; 19 | } 20 | 21 | public int FirstId { get; } 22 | 23 | public int Size { get; } 24 | 25 | public override IReadOnlyList Items => _entries; 26 | 27 | internal static PdfXRefSection Parse(PdfStream pdfStream) 28 | { 29 | var dictionary = pdfStream.StreamDictionary; 30 | var type = (PdfName)dictionary["Type"]; 31 | if (type.Name != "XRef") 32 | { 33 | throw new ParsingException("A stream of type XRef is expected"); 34 | } 35 | 36 | // W[1 2 1] (4 columns) 37 | // W[1 3 1] (5 columns, larger indexes) 38 | var w = (PdfArray)dictionary["W"]; 39 | int firstId = 0; 40 | int size = 0; 41 | 42 | if (dictionary.TryGetValue("Index", out IPdfObject indexObject)) 43 | { 44 | var index = (PdfArray)indexObject; 45 | firstId = ((PdfNumeric)index.Items[0]).ToInt32(); 46 | size = ((PdfNumeric)index.Items[1]).ToInt32(); 47 | } 48 | else if (dictionary.TryGetValue("Size", out IPdfObject sizeObject)) 49 | { 50 | size = ((PdfNumeric)sizeObject).ToInt32(); 51 | } 52 | 53 | int items = w.Items.Count; 54 | 55 | // for xref this shall always be 3 56 | if (items != 3) 57 | { 58 | throw new ParsingException("The W[] parameter must contain 3 columns for an XRef"); 59 | } 60 | int[] sizes = new int[w.Items.Count]; 61 | int bytesPerEntry = 0; 62 | for (int i = 0; i < items; i++) 63 | { 64 | sizes[i] = ((PdfNumeric)w.Items[i]).ToInt32(); 65 | bytesPerEntry += sizes[i]; 66 | } 67 | var decodedXRef = pdfStream.Decode(); 68 | // Use W[...] to build up the xref 69 | int rowCount = decodedXRef.Length / bytesPerEntry; 70 | if (size != rowCount) 71 | { 72 | throw new ParsingException("The number of refs inside the Index value must match the actual refs count present in the stream"); 73 | } 74 | 75 | var entries = new IPdfObject[rowCount]; 76 | 77 | for (int row = 0; row < rowCount; row++) 78 | { 79 | var entry = PdfXRefEntry.Parse(firstId + row, decodedXRef, sizes, row, bytesPerEntry); 80 | entries[row] = entry; 81 | } 82 | 83 | return new PdfXRefSection(firstId, size, entries); 84 | } 85 | 86 | internal static PdfXRefSection Parse(Lexer lexer) 87 | { 88 | int firstId = int.Parse(lexer.ReadToken(), CultureInfo.InvariantCulture); 89 | int size = int.Parse(lexer.ReadToken(), CultureInfo.InvariantCulture); 90 | 91 | var entries = new IPdfObject[size]; 92 | 93 | for (int i = 0; i < size; i++) 94 | { 95 | var entry = PdfXRefEntry.Parse(firstId + i, lexer); 96 | 97 | // first entry must be free and have a gen 65535 98 | // head of the linked list of free objects 99 | 100 | if (i == 0) 101 | { 102 | if (entry.GenerationNumber != 65535) 103 | { 104 | throw new ParsingException($"The first xref entry must have generation number 65535. Was {entry.GenerationNumber}"); 105 | } 106 | 107 | if (entry.InUse) 108 | { 109 | throw new ParsingException($"The first xref entry must be free"); 110 | } 111 | } 112 | 113 | entries[i] = entry; 114 | } 115 | 116 | return new PdfXRefSection(firstId, size, entries); 117 | } 118 | 119 | public override string ToString() 120 | { 121 | return $"{FirstId} {Size}"; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfXRefEntry.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | using SafeRapidPdf.Parsing; 3 | 4 | namespace SafeRapidPdf.Objects; 5 | 6 | public sealed class PdfXRefEntry : PdfObject 7 | { 8 | private PdfXRefEntry(int objectNumber, int generationNumber, long offset, char type) 9 | : base(PdfObjectType.XRefEntry) 10 | { 11 | ObjectNumber = objectNumber; 12 | GenerationNumber = generationNumber; 13 | Offset = offset; 14 | EntryType = type; 15 | } 16 | 17 | public int ObjectNumber { get; } 18 | 19 | public int GenerationNumber { get; } 20 | 21 | public char EntryType { get; } 22 | 23 | public long Offset { get; } 24 | 25 | // 'f': free (deleted objects) 26 | // 'n': in use 27 | // 'o': in use (compressed in stream) 28 | public bool InUse => EntryType != 'f'; 29 | 30 | internal static PdfXRefEntry Parse(int objectNumber, Lexer lexer) 31 | { 32 | string offsetS = lexer.ReadToken(); 33 | if (offsetS.Length != 10) 34 | { 35 | throw new ParsingException("Expected 10 digits for offset in xref"); 36 | } 37 | long offset = long.Parse(offsetS, CultureInfo.InvariantCulture); 38 | 39 | string generationS = lexer.ReadToken(); 40 | if (generationS.Length != 5) 41 | { 42 | throw new ParsingException("Expected 5 digits for generation in xref"); 43 | } 44 | int generationNumber = int.Parse(generationS, CultureInfo.InvariantCulture); 45 | 46 | string inuse = lexer.ReadToken(); 47 | if (inuse is not "f" and not "n") 48 | { 49 | throw new ParsingException($"xref flag must be 'f' or 'n'. Was {inuse}"); 50 | } 51 | 52 | char entryType = (inuse == "f") ? 'f' : 'n'; 53 | 54 | return new PdfXRefEntry(objectNumber, generationNumber, offset, entryType); 55 | } 56 | 57 | internal static PdfXRefEntry Parse(int objectNumber, byte[] decodedXRef, int[] sizes, int row, int bytesPerEntry) 58 | { 59 | int position = 0; 60 | long[] result = new long[3]; 61 | for (int column = 0; column < 3; column++) 62 | { 63 | long v = 0; 64 | for (int bytes = 0; bytes < sizes[column]; bytes++) 65 | { 66 | var b = decodedXRef[(row * bytesPerEntry) + position]; 67 | v = (v * 256) + b; 68 | position++; 69 | } 70 | result[column] = v; 71 | } 72 | 73 | // Meaning of types and fields within an xref stream 74 | // type field 75 | char entryType; 76 | long offset; 77 | int generationNumber; 78 | switch (result[0]) 79 | { 80 | // 0 0 = f 81 | // 2 -> object number of next free object 82 | // 3 -> generation number (if used again) 83 | case 0: 84 | entryType = 'f'; 85 | offset = result[1]; 86 | generationNumber = (int)result[2]; 87 | break; 88 | 89 | // 1 1 = n (uncompressed) 90 | // 2 -> byte offset in file 91 | // 3 -> generation number 92 | case 1: 93 | entryType = 'n'; 94 | offset = result[1]; 95 | generationNumber = (int)result[2]; 96 | break; 97 | 98 | // 2 1 = n (compressed) 99 | // 2 -> object number where the data is stored 100 | // 3 -> index of object in the stream 101 | case 2: 102 | entryType = 'o'; 103 | 104 | // TODO: access the file at that position and decode 105 | offset = result[1]; // object 106 | generationNumber = (int)result[2]; // index 107 | break; 108 | default: 109 | throw new ParsingException($"Invalid type numeric id inside xref item: {result[0]}"); 110 | } 111 | 112 | return new PdfXRefEntry(objectNumber, generationNumber, offset, entryType); 113 | } 114 | 115 | public override string ToString() 116 | { 117 | return $"{Offset:0000000000} {GenerationNumber:00000} {EntryType}"; 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /PdfInfoTool/Command/Show.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using SafeRapidPdf; 3 | using SafeRapidPdf.Objects; 4 | 5 | namespace PdfInfoTool; 6 | 7 | internal static partial class Command 8 | { 9 | internal static int RunShowAndReturnExitCode(ShowOptions opts) 10 | { 11 | var file = PdfFile.Parse(opts.FileName); 12 | Console.WriteLine("PDF Version: {0}", file.Version); 13 | 14 | if (opts.What == "xref") 15 | { 16 | // this is the information coming from the interpreted xref 17 | var xref = file.XRef; 18 | foreach (var section in xref.Items) 19 | { 20 | foreach (var entry in section.Items) 21 | { 22 | var xrefEntry = entry as PdfXRefEntry; 23 | var o = xrefEntry.ObjectNumber; 24 | var obj = o.ToString("D5", System.Globalization.CultureInfo.InvariantCulture); 25 | if (opts.Verbose) 26 | { 27 | var type = "ref ObjStm"; 28 | if (xrefEntry.EntryType != 'o') 29 | { 30 | var g = xrefEntry.GenerationNumber; 31 | try 32 | { 33 | var refObject = file.GetObject(o, g); 34 | var objectType = refObject.Object.ObjectType; 35 | if (objectType == PdfObjectType.Stream) 36 | { 37 | var stream = refObject.Object as PdfStream; 38 | if (stream.StreamDictionary.TryGetValue("Type", out IPdfObject contentType)) 39 | { 40 | if (contentType.Text == "XObject") 41 | { 42 | _ = stream.StreamDictionary.TryGetValue("Subtype", out contentType); 43 | type = $"Stream(XObject: {contentType})"; 44 | } 45 | else 46 | { 47 | type = $"Stream({contentType})"; 48 | } 49 | } 50 | else 51 | { 52 | //var sObject = PdfObject.ParseAny(stream); 53 | type = $"Stream(?)"; 54 | } 55 | } 56 | else if (objectType == PdfObjectType.Dictionary) 57 | { 58 | var dictionary = refObject.Object as PdfDictionary; 59 | type = dictionary.TryGetValue("Type", out IPdfObject contentType) 60 | ? contentType.ToString() 61 | : dictionary.Items.Count > 0 && 62 | dictionary.Items[0].Text == "Linearized" 63 | ? "Linearization Parameter" 64 | : $"Dictionary(?)"; 65 | } 66 | else 67 | { 68 | type = objectType.ToString(); 69 | } 70 | } 71 | catch 72 | { 73 | type = $"Not found {o} {g}"; 74 | } 75 | } 76 | 77 | Console.WriteLine($"{obj}: {entry} - {type}"); 78 | } 79 | else 80 | { 81 | Console.WriteLine($"{obj}: {entry}"); 82 | } 83 | } 84 | } 85 | } 86 | // the information coming from the objects itselves 87 | /* 88 | foreach (var item in file.Items) 89 | { 90 | var type = item.ObjectType; 91 | if (item.ObjectType == PdfObjectType.IndirectObject) 92 | { 93 | var iobj = item as PdfIndirectObject; 94 | type = iobj.Object.ObjectType; 95 | } 96 | Console.WriteLine(" - {0}: {1}", item, type); 97 | } 98 | */ 99 | return 0; 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfDictionary.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | 3 | using SafeRapidPdf.Parsing; 4 | 5 | namespace SafeRapidPdf.Objects; 6 | 7 | /// 8 | /// A PDF Dictionary type, a collection of named objects 9 | /// 10 | public class PdfDictionary : PdfObject 11 | { 12 | private readonly IList _dictionary; 13 | 14 | private PdfDictionary(IList dictionary) 15 | : base(PdfObjectType.Dictionary) 16 | { 17 | IsContainer = true; 18 | _dictionary = dictionary; 19 | } 20 | 21 | protected PdfDictionary(PdfDictionary dictionary, PdfObjectType type) 22 | : base(type) 23 | { 24 | ArgumentNullException.ThrowIfNull(dictionary); 25 | 26 | IsContainer = true; 27 | _dictionary = dictionary._dictionary; 28 | } 29 | 30 | public IPdfObject this[string name] 31 | => TryGetValue(name, out IPdfObject value) 32 | ? value 33 | : throw new KeyNotFoundException(name + " was not found in PdfDictionary"); 34 | 35 | public void ExpectsType(string name) 36 | { 37 | var type = (PdfName)this["Type"]; 38 | if (type.Name != name) 39 | { 40 | throw new ParsingException($"Expected {name}, but got {type.Name}"); 41 | } 42 | } 43 | 44 | public static PdfDictionary Parse(Lexer lexer) 45 | { 46 | ArgumentNullException.ThrowIfNull(lexer); 47 | 48 | var dictionaryItems = new List(); 49 | 50 | PdfObject obj; 51 | 52 | while ((obj = ParseAny(lexer, ">>")) != null) 53 | { 54 | if (obj is PdfName name) 55 | { 56 | PdfObject value = ParseAny(lexer); 57 | 58 | dictionaryItems.Add(new PdfKeyValuePair(name, value)); 59 | } 60 | else 61 | { 62 | throw new ParsingException("The first item of a pair inside a dictionary must be a PDF name object"); 63 | } 64 | } 65 | 66 | return new PdfDictionary(dictionaryItems); 67 | } 68 | 69 | public bool TryGetValue(string key, out IPdfObject value) 70 | { 71 | foreach (PdfKeyValuePair pair in _dictionary) 72 | { 73 | if (pair.Key.Text == key) 74 | { 75 | value = pair.Value; 76 | 77 | return true; 78 | } 79 | } 80 | 81 | value = null; 82 | 83 | return false; 84 | } 85 | 86 | /// 87 | /// Automatically dereference indirect references or returns the Pdf object 88 | /// after checking that it is of the expected type 89 | /// 90 | /// The type of the object to resolve 91 | /// The name to resolve 92 | /// The resolved type 93 | public T Resolve(string name) 94 | where T : class 95 | { 96 | IPdfObject value = this[name]; 97 | 98 | return value is PdfIndirectReference reference 99 | ? reference.Dereference() 100 | : value is T t 101 | ? t 102 | : throw new InvalidDataException($"Expected type '{typeof(T)}' resolving '{name}'. Was {value.GetType()}'."); 103 | } 104 | 105 | public IEnumerable Keys 106 | { 107 | get 108 | { 109 | foreach (PdfKeyValuePair pair in _dictionary) 110 | { 111 | yield return pair.Key.Text; 112 | } 113 | } 114 | } 115 | 116 | public IEnumerable Values 117 | { 118 | get 119 | { 120 | foreach (PdfKeyValuePair pair in _dictionary) 121 | { 122 | yield return pair.Value; 123 | } 124 | } 125 | } 126 | 127 | public override IReadOnlyList Items 128 | { 129 | get 130 | { 131 | var result = new IPdfObject[_dictionary.Count]; 132 | 133 | for (int i = 0; i < result.Length; i++) 134 | { 135 | result[i] = _dictionary[i]; 136 | } 137 | 138 | return result; 139 | } 140 | } 141 | 142 | public string Type 143 | { 144 | get 145 | { 146 | if (TryGetValue("Type", out IPdfObject typeObject)) 147 | { 148 | PdfName type = (PdfName)typeObject; 149 | 150 | return type.Name; 151 | } 152 | 153 | return null; 154 | } 155 | } 156 | 157 | public override string ToString() 158 | { 159 | return Type != null ? $"<<...>> ({Type})" : "<<...>>"; 160 | } 161 | } 162 | -------------------------------------------------------------------------------- /SafeRapidPdf/Services/IndirectReferenceResolver.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | 3 | using SafeRapidPdf.Objects; 4 | using SafeRapidPdf.Parsing; 5 | 6 | namespace SafeRapidPdf.Services; 7 | 8 | internal class IndirectReferenceResolver : IIndirectReferenceResolver 9 | { 10 | private readonly Lexer _lexer; 11 | private PdfDictionary _linearizationHeader; 12 | private long startXRef; 13 | 14 | public IndirectReferenceResolver(Lexer lexer) 15 | { 16 | _lexer = lexer; 17 | 18 | _lexer.PushPosition(0); 19 | 20 | TryParseLinearizationHeader(); 21 | 22 | // in the case of linearized PDFs there are additional linearized structures added 23 | // to the PDF. Otherwise we take the non linearized approach 24 | if (_linearizationHeader != null) 25 | { 26 | RetrieveXRefLinearized(); 27 | } 28 | else 29 | { 30 | RetrieveXRef(); 31 | } 32 | 33 | _lexer.PopPosition(); 34 | } 35 | 36 | public PdfXRef XRef { get; private set; } 37 | 38 | private void TryParseLinearizationHeader() 39 | { 40 | _linearizationHeader = null; 41 | 42 | try 43 | { 44 | // fetch the first object we see and try to parse it 45 | var o = PdfObject.ParseAny(_lexer); 46 | 47 | while (o.ObjectType == PdfObjectType.Comment) 48 | { 49 | o = PdfObject.ParseAny(_lexer); 50 | } 51 | 52 | if (o.ObjectType == PdfObjectType.IndirectObject) 53 | { 54 | var d = ((PdfIndirectObject)o).Object; 55 | if (d.ObjectType == PdfObjectType.Dictionary) 56 | { 57 | var dict = (PdfDictionary)d; 58 | var linearizedVersion = dict["Linearized"].Text; 59 | if (!string.IsNullOrWhiteSpace(linearizedVersion)) 60 | { 61 | _linearizationHeader = dict; 62 | } 63 | } 64 | } 65 | } 66 | catch 67 | { 68 | // ignore... I know bad style 69 | // in this case the linearization header is assumed to not have been found 70 | } 71 | } 72 | 73 | public PdfIndirectObject GetObject(int objectNumber, int generationNumber) 74 | { 75 | // entry from XRef 76 | _lexer.PushPosition(XRef.GetOffset(objectNumber, generationNumber)); 77 | 78 | // load the object if it was not yet found 79 | var obj = PdfIndirectObject.Parse(_lexer); 80 | _lexer.PopPosition(); 81 | return obj; 82 | } 83 | 84 | private void RetrieveXRefLinearized() 85 | { 86 | // if we get here we can read the next object as the first xref 87 | // use the linearized header to jump to the main table /T offset 88 | // parse the xref there too 89 | var firstPageXRef = (PdfIndirectObject)PdfObject.ParseAny(_lexer); 90 | var mainXRefPosition = (PdfNumeric)_linearizationHeader["T"]; 91 | 92 | _lexer.PushPosition(mainXRefPosition.ToInt64()); 93 | 94 | var mainXRef = (PdfIndirectObject)PdfObject.ParseAny(_lexer); 95 | 96 | _lexer.PopPosition(); 97 | 98 | XRef = PdfXRef.Parse((PdfStream)firstPageXRef.Object, (PdfStream)mainXRef.Object); 99 | } 100 | 101 | // returns true if an xref was found false otherwise 102 | private void RetrieveXRef() 103 | { 104 | XRef = null; 105 | 106 | // only necessary if not linearized 107 | startXRef = RetrieveStartXRef(); 108 | 109 | // if the xref was not found, early exit 110 | if (startXRef == -1) 111 | return; 112 | 113 | _lexer.PushPosition(startXRef); 114 | 115 | var token = _lexer.ReadToken(); 116 | if (token == "xref") 117 | { 118 | // we have an uncompressed xref table 119 | XRef = PdfXRef.Parse(_lexer); 120 | } 121 | else 122 | { 123 | // maybe there is no xref 124 | } 125 | _lexer.PopPosition(); 126 | } 127 | 128 | private long RetrieveStartXRef() 129 | { 130 | long position = -100; // look from end, might go wrong for very small documents 131 | position = Math.Max(position, -_lexer.Size); // avoid underflow 132 | _lexer.PushPosition(position); 133 | 134 | // determine StartXRef 135 | long result = -1; 136 | string t; 137 | do 138 | { 139 | t = _lexer.ReadToken(); 140 | } 141 | while (t is not null and not "startxref"); 142 | 143 | if (t == "startxref") 144 | { 145 | result = long.Parse(_lexer.ReadToken(), CultureInfo.InvariantCulture); 146 | } 147 | 148 | _lexer.PopPosition(); 149 | return result; 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfFile.cs: -------------------------------------------------------------------------------- 1 | using System.ComponentModel; 2 | using System.Diagnostics; 3 | using System.IO; 4 | using System.Linq; 5 | 6 | using SafeRapidPdf.Parsing; 7 | using SafeRapidPdf.Services; 8 | 9 | namespace SafeRapidPdf.Objects; 10 | 11 | /// 12 | /// Represents the physical structure of a PDF. Contains the objects present 13 | /// in the file and allows direct retrieval of indirect references. 14 | /// The file itself is considered as a PDF object. 15 | /// 16 | public class PdfFile : IPdfObject, IIndirectReferenceResolver 17 | { 18 | private readonly Dictionary _indirectObjects; 19 | 20 | private PdfFile(IReadOnlyList objects) 21 | { 22 | Items = objects; 23 | 24 | // build up the fast object lookup dictionary 25 | _indirectObjects = new Dictionary(); 26 | 27 | foreach (var obj in Items.OfType()) 28 | { 29 | InsertObject(obj); 30 | } 31 | 32 | SetResolver(this); 33 | } 34 | 35 | /// 36 | /// Gets the parsing time in seconds 37 | /// 38 | public double ParsingTime { get; private set; } 39 | 40 | public string Version => Items[0].ToString(); 41 | 42 | public IReadOnlyList Items { get; private set; } 43 | 44 | public string Text => "File"; 45 | 46 | public bool IsContainer => true; 47 | 48 | public PdfObjectType ObjectType => PdfObjectType.File; 49 | 50 | public PdfXRef XRef { get; private set; } 51 | 52 | private void SetResolver(IPdfObject obj) 53 | { 54 | if (obj.IsContainer) 55 | { 56 | foreach (IPdfObject item in obj.Items) 57 | { 58 | if (item is PdfIndirectReference iref) 59 | { 60 | iref.Resolver = this; 61 | } 62 | else 63 | { 64 | SetResolver(item); 65 | } 66 | } 67 | } 68 | } 69 | 70 | public static PdfFile Parse(Stream reader, EventHandler progress = null) 71 | { 72 | progress?.Invoke(null, new ProgressChangedEventArgs(0, null)); 73 | 74 | var watch = Stopwatch.StartNew(); 75 | 76 | var lexer = new Lexer(reader); 77 | 78 | lexer.Expects("%"); // Ensure the first byte matches the PDF marker 79 | 80 | var objects = new List(); 81 | 82 | PdfComment comment = PdfComment.Parse(lexer); 83 | 84 | if (!comment.Text.StartsWith("%PDF-", StringComparison.Ordinal)) 85 | { 86 | throw new ParsingException("PDF header missing"); 87 | } 88 | 89 | objects.Add(comment); 90 | 91 | bool lastObjectWasOEF = false; 92 | 93 | while (true) 94 | { 95 | var obj = PdfObject.ParseAny(lexer); 96 | 97 | if (obj is null) 98 | { 99 | if (lastObjectWasOEF) 100 | { 101 | break; 102 | } 103 | else 104 | { 105 | throw new ParsingException("End of file reached without EOF marker"); 106 | } 107 | } 108 | 109 | objects.Add(obj); 110 | 111 | progress?.Invoke(null, new ProgressChangedEventArgs(lexer.Percentage, null)); 112 | 113 | lastObjectWasOEF = false; 114 | if (obj is PdfComment cmt) 115 | { 116 | if (cmt.IsEOF) 117 | { 118 | // a linearized or updated document might contain several EOF markers 119 | lastObjectWasOEF = true; 120 | } 121 | } 122 | } 123 | 124 | progress?.Invoke(null, new ProgressChangedEventArgs(100, null)); 125 | watch.Stop(); 126 | 127 | return new PdfFile(objects) 128 | { 129 | ParsingTime = watch.Elapsed.TotalSeconds, 130 | // copy over xref 131 | XRef = lexer.IndirectReferenceResolver.XRef 132 | }; 133 | } 134 | 135 | public static PdfFile Parse(string pdfFilePath, EventHandler progress = null) 136 | { 137 | using Stream reader = File.Open(pdfFilePath, FileMode.Open, FileAccess.Read, FileShare.Read); 138 | return Parse(reader, progress); 139 | } 140 | 141 | private void InsertObject(PdfIndirectObject obj) 142 | { 143 | if (obj is null) 144 | throw new InvalidDataException("This object must be an indirect object"); 145 | 146 | string key = PdfXRef.BuildKey(obj.ObjectNumber, obj.GenerationNumber); 147 | _indirectObjects[key] = obj; 148 | } 149 | 150 | public PdfIndirectObject GetObject(int objectNumber, int generationNumber) 151 | { 152 | string key = PdfXRef.BuildKey(objectNumber, generationNumber); 153 | return _indirectObjects[key]; 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /SafeRapidPdf/Document/PdfPage.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | 3 | using SafeRapidPdf.Attributes; 4 | using SafeRapidPdf.Objects; 5 | 6 | namespace SafeRapidPdf.Document; 7 | 8 | public class PdfPage : PdfBaseObject 9 | { 10 | private readonly List _items = new(); 11 | 12 | public PdfPage(PdfIndirectReference pages, PdfPageTree parent) 13 | : this(pages, parent, PdfObjectType.Page) 14 | { 15 | ArgumentNullException.ThrowIfNull(pages); 16 | 17 | IsContainer = true; 18 | 19 | var page = pages.Dereference(); 20 | 21 | page.ExpectsType("Page"); 22 | 23 | foreach (PdfKeyValuePair pair in page.Items) 24 | { 25 | HandleKeyValuePair(pair); 26 | } 27 | } 28 | 29 | protected PdfPage(PdfIndirectReference pages, PdfPageTree parent, PdfObjectType type) 30 | : base(type) 31 | { 32 | ArgumentNullException.ThrowIfNull(pages); 33 | 34 | GenerationNumber = pages.GenerationNumber; 35 | ObjectNumber = pages.ObjectNumber; 36 | Parent = parent; 37 | } 38 | 39 | protected void Add(IPdfObject item) 40 | { 41 | _items.Add(item); 42 | } 43 | 44 | 45 | protected void AddRange(IEnumerable collection) 46 | { 47 | _items.AddRange(collection); 48 | } 49 | 50 | protected int GenerationNumber { get; } 51 | 52 | protected int ObjectNumber { get; } 53 | 54 | // excepted in root node 55 | [ParameterType(required: true, inheritable: false)] 56 | public PdfPageTree Parent { get; } 57 | 58 | // public PdfDate LastModified { get; private set; } 59 | 60 | public PdfDictionary Resources { get; private set; } 61 | 62 | [ParameterType(required: true, inheritable: true)] 63 | public PdfMediaBox MediaBox { get; private set; } 64 | 65 | [ParameterType(required: false, inheritable: true)] 66 | public PdfCropBox CropBox { get; private set; } 67 | 68 | [ParameterType(required: false, inheritable: false, version: "1.3")] 69 | public PdfBleedBox BleedBox { get; private set; } 70 | 71 | [ParameterType(required: false, inheritable: false, version: "1.3")] 72 | public PdfTrimBox TrimBox { get; private set; } 73 | 74 | [ParameterType(required: false, inheritable: false, version: "1.3")] 75 | public PdfArtBox ArtBox { get; private set; } 76 | 77 | // public PdfDictionary BoxColorInfo { get; private set; } 78 | 79 | [ParameterType(required: false, inheritable: false)] 80 | public PdfContents Contents { get; private set; } 81 | 82 | [ParameterType(required: false, inheritable: true)] 83 | public PdfRotate Rotate { get; private set; } 84 | 85 | // public PdfDictionary Group { get; private set; } 86 | // public PdfStream Thumb { get; private set; } 87 | // public PdfArray B { get; private set; } 88 | // public PdfNumeric Dur { get; private set; } 89 | // public PdfDictionary Trans { get; private set; } 90 | // public PdfArray Annots { get; private set; } 91 | // public PdfDictionary AA { get; private set; } 92 | // public PdfStream Metadata { get; private set; } 93 | // public PdfDictionary PieceInfo { get; private set; } 94 | // public PdfNumeric StructParents { get; private set; } 95 | // public PdfStream ID { get; private set; } 96 | // public PdfNumeric PZ { get; private set; } 97 | // public PdfDictionary SeparationInfo { get; private set; } 98 | // public PdfName Tabs { get; private set; } 99 | // public PdfName TemplateInstantiated { get; private set; } 100 | // public PdfDictionary PresSteps { get; private set; } 101 | // public PdfNumeric UserUnit { get; private set; } 102 | // public PdfDictionary VP { get; private set; } 103 | 104 | public override IReadOnlyList Items => _items; 105 | 106 | protected void HandleKeyValuePair(PdfKeyValuePair pair) 107 | { 108 | ArgumentNullException.ThrowIfNull(pair); 109 | 110 | switch (pair.Key.Text) 111 | { 112 | case "Type": // skip type Page 113 | break; 114 | case "ArtBox": 115 | ArtBox = new PdfArtBox((PdfArray)pair.Value); 116 | _items.Add(ArtBox); 117 | break; 118 | case "BleedBox": 119 | BleedBox = new PdfBleedBox((PdfArray)pair.Value); 120 | _items.Add(BleedBox); 121 | break; 122 | case "CropBox": 123 | CropBox = new PdfCropBox((PdfArray)pair.Value); 124 | _items.Add(CropBox); 125 | break; 126 | case "MediaBox": 127 | MediaBox = new PdfMediaBox((PdfArray)pair.Value); 128 | _items.Add(MediaBox); 129 | break; 130 | case "TrimBox": 131 | TrimBox = new PdfTrimBox((PdfArray)pair.Value); 132 | _items.Add(TrimBox); 133 | break; 134 | case "Rotate": 135 | Rotate = new PdfRotate((PdfNumeric)pair.Value); 136 | _items.Add(Rotate); 137 | break; 138 | case "Contents": 139 | Contents = new PdfContents(pair.Value); 140 | _items.Add(Contents); 141 | break; 142 | case "Parent": 143 | var parent = (PdfIndirectReference)pair.Value; 144 | if (parent.ObjectNumber != Parent.ObjectNumber) 145 | { 146 | throw new InvalidDataException("Unexpected not matching parent object number!"); 147 | } 148 | if (parent.GenerationNumber != Parent.GenerationNumber) 149 | { 150 | throw new InvalidDataException("Unexpected not matching parent generation number!"); 151 | } 152 | // ignore entry (parent is shown through the hierarchy 153 | break; 154 | default: 155 | _items.Add(pair); 156 | break; 157 | } 158 | } 159 | 160 | public override string ToString() 161 | { 162 | return "Page"; 163 | } 164 | } 165 | -------------------------------------------------------------------------------- /SafeRapidPdf/Objects/PdfStream.cs: -------------------------------------------------------------------------------- 1 | using System.Globalization; 2 | using System.IO; 3 | using System.IO.Compression; 4 | 5 | using SafeRapidPdf.Parsing; 6 | 7 | namespace SafeRapidPdf.Objects; 8 | 9 | public sealed class PdfStream : PdfObject 10 | { 11 | private PdfStream(PdfDictionary dictionary, PdfData data) 12 | : base(PdfObjectType.Stream) 13 | { 14 | IsContainer = true; 15 | StreamDictionary = dictionary; 16 | Data = data; 17 | } 18 | 19 | public PdfDictionary StreamDictionary { get; } 20 | 21 | public PdfData Data { get; } 22 | 23 | public override IReadOnlyList Items 24 | { 25 | get 26 | { 27 | var list = new List(StreamDictionary.Items.Count + 1); 28 | list.AddRange(StreamDictionary.Items); 29 | list.Add(Data); 30 | return list; 31 | } 32 | } 33 | 34 | private byte[] FlateDecodeWithPredictorNone(int _, byte[] decompressed) 35 | { 36 | return decompressed; 37 | } 38 | 39 | private byte[] FlateDecodeWithPredictorPngUp(int columns, byte[] decompressed) 40 | { 41 | var output = new List(32 * 1024); 42 | var previousRow = new byte[columns]; 43 | for (int i = 0; i < columns; i++) 44 | previousRow[i] = 0; 45 | int rows = decompressed.Length / (columns + 1); // we have an additional predictor byte in the source 46 | for (int r = 0; r < rows; r++) 47 | { 48 | var currentRow = new byte[columns]; 49 | byte rowPredictor = decompressed[r * (columns + 1)]; 50 | if (rowPredictor != 2) 51 | { 52 | throw new NotImplementedException("Only up predictor is supported at the moment"); 53 | } 54 | for (int i = 0; i < columns; i++) 55 | { 56 | // the leading predictor is ignored, assuming it's always UP 57 | var inputByte = decompressed[(r * (columns + 1)) + i + 1]; 58 | currentRow[i] = (byte)(inputByte + previousRow[i]); 59 | output.Add(currentRow[i]); 60 | } 61 | previousRow = currentRow; 62 | } 63 | return output.ToArray(); 64 | } 65 | 66 | private byte[] FlateDecodeWithPredictor(int predictor, int columns, byte[] input) 67 | { 68 | // now we have to handle the predictors... 69 | return predictor switch 70 | { 71 | //1 = default: no prediction 72 | 1 => FlateDecodeWithPredictorNone(columns, input), 73 | //12 = PNG prediction (on encoding, PNG Up on all rows) 74 | 12 => FlateDecodeWithPredictorPngUp(columns, input), 75 | _ => throw new NotImplementedException($"Sorry at the moment predictor {predictor} is not implemented. Please make a feature request on https://github.com/jdehaan/SafeRapidPdf/issues. Ideally provide an example pdf."), 76 | }; 77 | } 78 | 79 | public byte[] Decode() 80 | { 81 | if (!StreamDictionary.TryGetValue("Filter", out IPdfObject? filter)) 82 | { 83 | // filter is optional 84 | // no filter provided= return the data as-is 85 | return Data.Data; 86 | } 87 | 88 | // TODO: multiple filter in order can be specified 89 | if (filter.Text == "FlateDecode") 90 | { 91 | 92 | var data = new MemoryStream(Data.Data); 93 | 94 | // Read the ZLIB header 95 | _ = data.ReadByte(); // 104 96 | _ = data.ReadByte(); // 222 97 | 98 | byte[] decompressed; 99 | 100 | using (var output = new MemoryStream()) 101 | using (var deflatedStream = new DeflateStream(data, CompressionMode.Decompress)) 102 | { 103 | deflatedStream.CopyTo(output); 104 | 105 | decompressed = output.ToArray(); 106 | } 107 | 108 | // set defaults 109 | int predictor = 1; // no prediction 110 | int columns = 1; 111 | 112 | if (StreamDictionary.TryGetValue("DecodeParms", out var decodeParams)) 113 | { 114 | var parameters = (PdfDictionary)decodeParams; 115 | columns = ((PdfNumeric)parameters["Columns"]).ToInt32(); 116 | predictor = ((PdfNumeric)parameters["Predictor"]).ToInt32(); 117 | } 118 | 119 | return columns <= 0 120 | ? throw new NotImplementedException("The sample count must be greater than 0") 121 | : FlateDecodeWithPredictor(predictor, columns, decompressed); 122 | } 123 | 124 | //else if (filter.Text == "DCTDecode") 125 | //{ 126 | // // JPEG image 127 | //} 128 | //else 129 | 130 | throw new NotImplementedException("Implement Filter: " + filter.Text); 131 | } 132 | 133 | internal static PdfStream Parse(PdfDictionary dictionary, Lexer lexer) 134 | { 135 | ArgumentNullException.ThrowIfNull(dictionary); 136 | 137 | lexer.Expects("stream"); 138 | char eol = lexer.ReadChar(); 139 | 140 | if (eol == '\r') 141 | { 142 | eol = lexer.ReadChar(); 143 | } 144 | 145 | if (eol != '\n') 146 | { 147 | throw new ParsingException($@"Stream must end with either \r\n or \n. Was '{eol}'"); 148 | } 149 | 150 | IPdfObject lengthObject = dictionary["Length"] ?? 151 | throw new ParsingException("Stream dictionary is missing 'Length' entry"); 152 | 153 | int length; 154 | if (lengthObject is PdfIndirectReference reference) 155 | { 156 | PdfIndirectObject lenobj = lexer.IndirectReferenceResolver 157 | .GetObject(reference.ObjectNumber, reference.GenerationNumber); 158 | 159 | length = ((PdfNumeric)lenobj.Object).ToInt32(); 160 | } 161 | else 162 | { 163 | length = int.Parse(lengthObject.ToString(), CultureInfo.InvariantCulture); 164 | } 165 | 166 | var data = PdfData.Parse(lexer, length); 167 | lexer.Expects("endstream"); 168 | 169 | return new PdfStream(dictionary, data); 170 | } 171 | 172 | public override string ToString() 173 | { 174 | return "stream"; 175 | } 176 | } 177 | -------------------------------------------------------------------------------- /SafeRapidPdf/Parsing/Lexer.cs: -------------------------------------------------------------------------------- 1 | using System.Collections.Generic; 2 | using System.IO; 3 | using System.Text; 4 | 5 | using SafeRapidPdf.Services; 6 | 7 | namespace SafeRapidPdf.Parsing; 8 | 9 | public class Lexer 10 | { 11 | private static readonly bool[] _regularTable = new bool[257]; 12 | private static readonly bool[] _whitespaceTable = new bool[257]; 13 | private static readonly bool[] _delimiterTable = new bool[257]; 14 | private readonly Stream _reader; 15 | private readonly Stack _positions = new(); 16 | private string _peekedToken; 17 | private string _peekedToken2; 18 | private int _byteRead = -1; 19 | 20 | static Lexer() 21 | { 22 | for (int c = 0; c < 257; c++) 23 | { 24 | _regularTable[c] = IsRegular(c - 1); 25 | _whitespaceTable[c] = IsWhitespace(c - 1); 26 | _delimiterTable[c] = IsDelimiter(c - 1); 27 | } 28 | } 29 | 30 | public Lexer(Stream stream, bool withoutResolver = false) 31 | { 32 | _reader = stream; 33 | _ = _reader.Seek(0, SeekOrigin.End); 34 | Size = _reader.Position; 35 | _ = _reader.Seek(0, SeekOrigin.Begin); 36 | 37 | if (!withoutResolver) 38 | { 39 | IndirectReferenceResolver = new IndirectReferenceResolver(this); 40 | } 41 | } 42 | 43 | public IIndirectReferenceResolver IndirectReferenceResolver { get; private set; } 44 | 45 | public int Percentage => (int)(_reader.Position * 100 / Size); 46 | 47 | public long Size { get; } 48 | 49 | public void Expects(string expectedToken) 50 | { 51 | string actualToken = ReadToken(); 52 | 53 | if (actualToken != expectedToken) 54 | { 55 | throw new UnexpectedTokenException(expectedToken, actualToken); 56 | } 57 | } 58 | 59 | public string PeekToken2() 60 | { 61 | _peekedToken ??= ReadTokenInternal(); 62 | 63 | if (IsInteger(_peekedToken)) 64 | { 65 | _peekedToken2 ??= ReadTokenInternal(); 66 | 67 | // should be "obj" or "R" 68 | string token = _peekedToken2; 69 | if (token is "obj" or "R") 70 | { 71 | return token; 72 | } 73 | } 74 | return _peekedToken; 75 | } 76 | 77 | public string PeekToken1() 78 | { 79 | _peekedToken ??= ReadTokenInternal(); 80 | 81 | return _peekedToken; 82 | } 83 | 84 | public string ReadToken() 85 | { 86 | if (_peekedToken != null) 87 | { 88 | string peekedToken = _peekedToken; 89 | _peekedToken = _peekedToken2; 90 | _peekedToken2 = null; 91 | return peekedToken; 92 | } 93 | return ReadTokenInternal(); 94 | } 95 | 96 | public string ReadUntilEol() 97 | { 98 | var sb = new StringBuilder(); 99 | 100 | while (true) 101 | { 102 | int c = ReadByte(); 103 | 104 | if (IsEol(c)) 105 | { 106 | break; 107 | } 108 | 109 | _ = sb.Append((char)c); 110 | } 111 | 112 | return sb.ToString(); 113 | } 114 | 115 | public byte[] ReadBytes(int length) 116 | { 117 | byte[] buffer = new byte[length]; 118 | 119 | return _reader.Read(buffer, 0, length) != length 120 | ? throw new ParsingException("Could not read the full amount of bytes") 121 | : buffer; 122 | } 123 | 124 | public char ReadChar() 125 | { 126 | return (char)ReadByte(); 127 | } 128 | 129 | private int ReadByte() 130 | { 131 | if (_byteRead != -1) 132 | { 133 | int result = _byteRead; 134 | _byteRead = -1; 135 | return result; 136 | } 137 | 138 | return _reader.ReadByte(); 139 | } 140 | 141 | private string ReadTokenInternal() 142 | { 143 | int b = SkipWhitespaces(); 144 | if (b == -1) 145 | return null; 146 | 147 | int c = b; 148 | switch (c) 149 | { 150 | case '%': return "%"; 151 | case '/': return "/"; 152 | case '[': return "["; 153 | case ']': return "]"; 154 | case '(': return "("; 155 | case ')': return ")"; 156 | case '<': 157 | b = ReadByte(); 158 | if (b == '<') 159 | return "<<"; 160 | _byteRead = b; 161 | return "<"; 162 | case '>': 163 | b = ReadByte(); 164 | if (b == '>') 165 | return ">>"; 166 | _byteRead = b; 167 | return ">"; 168 | default: 169 | break; 170 | } 171 | 172 | string token = ParseToken(c); 173 | 174 | return string.IsNullOrEmpty(token) 175 | ? ReadByte() == -1 176 | ? null 177 | : throw new ParsingException("Token may not be empty") 178 | : token; 179 | } 180 | 181 | private string ParseToken(int b) 182 | { 183 | var token = new StringBuilder(); 184 | if (_delimiterTable[b + 1]) 185 | { 186 | _ = token.Append((char)b); 187 | b = ReadByte(); 188 | } 189 | else 190 | { 191 | while (_regularTable[b + 1]) 192 | { 193 | _ = token.Append((char)b); 194 | b = ReadByte(); 195 | } 196 | } 197 | _byteRead = b; 198 | 199 | return token.ToString(); 200 | } 201 | 202 | /// 203 | /// Skip whitespaces and return the first non-whitespace char 204 | /// 205 | /// 206 | private int SkipWhitespaces() 207 | { 208 | int c; 209 | do 210 | { 211 | c = ReadByte(); 212 | } while (_whitespaceTable[c + 1]); 213 | 214 | return c; 215 | } 216 | 217 | public void Putc() 218 | { 219 | _ = _reader.Seek(-1, SeekOrigin.Current); 220 | } 221 | 222 | private static bool IsInteger(string token) 223 | { 224 | return int.TryParse(token, out _); 225 | } 226 | 227 | /// 228 | /// Whitespace as defined by PDF 229 | /// 230 | /// 231 | /// 232 | public static bool IsWhitespace(int b) 233 | { 234 | return b is <= 32 and // shortcut everything > 32 => most cases 235 | (32 or 10 or 12 or 13 or 9 or 0); 236 | } 237 | 238 | /// 239 | /// Regular char as defined by PDF 240 | /// 241 | /// 242 | /// 243 | public static bool IsRegular(int b) 244 | { 245 | return !IsWhitespace(b) && !IsDelimiter(b) && b != -1; 246 | } 247 | 248 | /// 249 | /// Delimiter char as defined by PDF 250 | /// 251 | /// 252 | /// 253 | public static bool IsDelimiter(int b) 254 | { 255 | // 37 40 41 47 60 62 91 93 123 125 256 | return 257 | b is '/' or // 47 258 | '<' or '>' or // 60 62 259 | '[' or ']' or // 91 93 260 | '(' or ')' or // 40 41 261 | '{' or '}' or // 123 125 262 | '%'; // 37 263 | } 264 | 265 | /// 266 | /// End of line as defined by PDF 267 | /// 268 | /// 269 | /// 270 | public static bool IsEol(int b) 271 | { 272 | // -1 was added to catch %%EOF without CR or LF 273 | return b is 10 or 13 or (-1); 274 | } 275 | 276 | public void PushPosition(long newPosition) 277 | { 278 | _positions.Push(_reader.Position); 279 | _ = newPosition < 0 280 | ? _reader.Seek(newPosition, SeekOrigin.End) 281 | : _reader.Seek(newPosition, SeekOrigin.Begin); 282 | 283 | _peekedToken = null; 284 | _peekedToken2 = null; 285 | } 286 | 287 | public void PopPosition() 288 | { 289 | _ = _reader.Seek(_positions.Pop(), SeekOrigin.Begin); 290 | _peekedToken = null; 291 | _peekedToken2 = null; 292 | } 293 | } 294 | -------------------------------------------------------------------------------- /SafeRapidPdf.UnitTests/File/PdfXRefTests.cs: -------------------------------------------------------------------------------- 1 | using SafeRapidPdf.Objects; 2 | using SafeRapidPdf.UnitTests.Util; 3 | using Xunit; 4 | 5 | namespace SafeRapidPdf.UnitTests.File; 6 | 7 | public class PdfXRefTests 8 | { 9 | [Theory] 10 | [InlineData( 11 | """ 12 | 0 6 13 | 0000000000 65535 f 14 | 0000000016 00000 n 15 | 0000000051 00000 n 16 | 0000000109 00000 n 17 | 0000000281 00000 n 18 | 0000000385 00000 n 19 | """ 20 | )] 21 | public void Parsing_Uncompressed_XRef(string xref) 22 | { 23 | var r = PdfXRef.Parse(xref.ToLexer()); 24 | // 1 section 25 | Assert.Equal(1, r.Items.Count); 26 | var s = r.Items[0] as PdfXRefSection; 27 | // 6 entries 28 | Assert.Equal(6, s.Items.Count); 29 | } 30 | 31 | [Fact] 32 | public void Parsing_CompressedXRef() 33 | { 34 | //703 0 obj 35 | //<> 36 | 37 | /* 43 0 obj 38 | << /Linearized 1.0 % Version 39 | /L 54567 % File length 40 | /H [ 475 598 ] % Primary hint stream offset and length (part 5) 41 | /O 45 % Object number of first page’s page object (part 6) 42 | /E 5437 % Offset of end of first page 43 | /N 11 % Number of pages in document 44 | /T 52786 % Offset of first entry in main cross-reference table (part 11) 45 | */ 46 | var base64XrefStream = 47 | @"NzExIDAgb2JqDTw8L0RlY29kZVBhcm1zPDwvQ29sdW1ucyA0L1ByZWRpY3RvciAxMj4+L0ZpbHRl 48 | ci9GbGF0ZURlY29kZS9JRFs8ODgyNkQ3RkI0OUQ2RDVBMDM5QTY1MTU0RjMwOUMyQUI+PDk5RjA0 49 | OTNGQjk3MjI1NEY4N0I5MzA3NTYxNzRDRjk5Pl0vSW5kZXhbNzAzIDE0XS9JbmZvIDcwMiAwIFIv 50 | TGVuZ3RoIDU4L1ByZXYgNjIzODAwNC9Sb290IDcwNCAwIFIvU2l6ZSA3MTcvVHlwZS9YUmVmL1db 51 | MSAyIDFdPj5zdHJlYW0NCmjeYmJkEGBgYmDuBRIMoUCCcSOIUAQRS4EEVyuQYNkDJN6cYmBiZPID 52 | qWNgRCL+/xf6CxBgAO9WCPMNCmVuZHN0cmVhbQ1lbmRvYmo="; 53 | 54 | var xrefStream = PdfObject.ParseAny(base64XrefStream.Base64ToLexer()) as PdfIndirectObject; 55 | PdfStream pdfStream = xrefStream.Object as PdfStream; 56 | var data = pdfStream.Decode(); 57 | 58 | string hex = data.ToHexString(); 59 | 60 | // known good result: 61 | Assert.Equal("0100100001039d000103f2000104a3000105c400010669000110ee000114aa00010074000202c2000202c2010202c2020202c2030101d400", hex); 62 | 63 | // this yields now a decoded buffer that needs to get decoded further using PNG algos 64 | // W[1 2 1] (4 columns) 65 | // W[1 3 1] (5 columns, larger indexes) 66 | 67 | // needed to resolve the values for refs encoded with 2 68 | var base64Object706 = @"NzA2IDAgb2JqDTw8L0ZpbHRlci9GbGF0ZURlY29kZS9GaXJzdCAzMC9MZW5ndGggMTkzL04gNC9U 69 | eXBlL09ialN0bT4+c3RyZWFtDQpo3kSOwQ6CMBBEf2W/wG0BARPSRFAJBwKxHkwIh1qrUcES6EH/ 70 | 3gIaTzO78zKZgDpAIKAuUOpZ9YC6rtUlUD+EKMJEN7rnnZBqPAZ/YgnsrQ8n3nrGcPsyKTfCjFTK 71 | 6dQwJ2WvJVemwnKzw6wVVxXXeCxOdyWNhbN2hMkMM1ZhliSxGNQZAhKO37pCrjrRC3PTT4wbIR+/ 72 | DRZZTYjtKUq4iGZQ1uRAka+/J8+BLIiDh3en/itRd3PO2EeAAQDI4UbhDQplbmRzdHJlYW0NZW5k 73 | b2JqDQ=="; 74 | 75 | // this contains the rest of the refs 76 | var base64Object559 = @"NTU5IDAgb2JqDTw8L0RlY29kZVBhcm1zPDwvQ29sdW1ucyA1L1ByZWRpY3RvciAxMj4+L0ZpbHRl 77 | ci9GbGF0ZURlY29kZS9JRFs8ODgyNkQ3RkI0OUQ2RDVBMDM5QTY1MTU0RjMwOUMyQUI+PDk5RjA0 78 | OTNGQjk3MjI1NEY4N0I5MzA3NTYxNzRDRjk5Pl0vSW5mbyA3MDIgMCBSL0xlbmd0aCAxNTQ2L1Jv 79 | b3QgNzA0IDAgUi9TaXplIDcwMy9UeXBlL1hSZWYvV1sxIDMgMV0+PnN0cmVhbQ0KaN7sl2lsVVUQ 80 | x+855769rxstLaGlaHFBlKUIQsoSQC2C0igmiGiCKShScQcEUYIYYqIsSuIWEUvEWEMrYixqgsao 81 | RMVIsKjgRkoENSqicaks1s7vkDf3iyR85334ZzJn7sycWc+zgfyssX0PBzYITItgMBn6fuUUgfZK 82 | OJXILBEcNBHOasG6BuiFgtVrBQuTgrGrkH9ZMN6/G832aiR/Vr6d0o1u51z434FefoOgW47MA/Lt 83 | 1HGC478QTvZu9H8sOPgRON7DQtXsLu7G2LYV3ZjoEBm7vl00tO2X05lFgj1f4Kvjajf5S85i4O6D 84 | vgda0GSGCI7mFpkqoQ+tgb5QMDFcOFsnwRkMB4tH70V/P7ViLTpHdWM4eT38w3oa7tHTOmJljkW+ 85 | /R68BJyHJLby8CpvChrEt7AP+TL/RDQTPdsoXrnZZCpGJGcJpyODZCwn7/I7hG6pkNOD4o85ch2n 86 | /6rO2FR8oCpsDYiMuzbiIVHK+1Q07MX/xGd8y00bJDImdRmcLYJbiE+GukosxtaLZLZVJOf72hsL 87 | voLMeE7hBLWCG8nFQOyWzRBs3C2chUboO36icvZqNZo/BdNfCpZWYF3yaCo2Ct3rcsE+i5C8CCve 88 | rr/1XdzxFrXuboMmAhn6YtXD0L7O3xbNi5+hbn12nHBW30CUkJzka3KA2orH0bw00qd86xbp3c2H 89 | +IMVezP6z4DPfU29WNk3E/77cLaDdcJvvx7rQ4VOjsNb0WY6qTRTq9GONWtm7Xnw/d2HwemCfg3+ 90 | BuWHo/mW+eCyObuBeV2w4BxOv0FyBHypxvQMPDFDItbx0z7ILcS6vVHq3zQdFE7rpZEa9vIlyPuv 91 | vB7fQb4f6W7zjnqV7STa+2R6rJPaMwd+JBpk2TJzQuZGsJVvm3ViGCTTElWzcppg9unclDuRxxFz 92 | sEW9WT9tAq1w25fTp+AMB6tUv30W7AGO5JTpZ9FsVuq8sn6ytYF0sfkkMmf2RLJPr9mzBXtQXZYe 93 | MeSigG9DIhMy5cJZRKOc+m9EvpSI7eL0D/w/BP8l4r8JbWM4pb9Cqtfu1zkWNEdOn9PbOerW0aGu 94 | FT2+ur6iVqlw8yYcqSITevkj4BP0bE/B6Wyx/g8JVtBTldeI/AKfi2n4TzSqF0fsMjfcu/jjZ7L3 95 | kCpy1I97VT0MqVW7DZpKcGAKb5NMAyf9FeTjefgk6CcGE6xkFTGXaWPSZfDxMByrU9RLhldDDwR9 96 | rM4FL+CUenM+F9S/wwdHBl2bRslwd/OtZjxGHSZbtHfCUWSc3bEEnb1lwpgCNsgEpmj2JnQOUPkw 97 | Bacr4gMc+znRZmK4v6EPQPeH5tRRpTGf91+hH41oZtcnplMDPnqbNBcn+pGOtu/BeQy6STsoLx9E 98 | PhwUiaT3mQng2LmuHWQap86CXpCbLaZqM/Tv8JmEQaT7LK8gS2zDPuSduxTjTyGTOU02XaHufbuD 99 | OJD9+Gadh3HeQkl2RKIADtGOUW+uVN9sjm+rlwkO+Fr8nLAb/p3w2W4VP5BTYu7okfgH0CuwgmSc 100 | WW3JwhjeQs6/DPeBzIQEkUzhW8rXHpFMLWOP3IoV7tuD2ivCSskV3B2f3Ufs6OeJCe+6dD0a0BN/ 101 | HBleC2lmXRn7IuTVkfQvAZntpqEp9+oLSpgqxVRdAZsiNVtflfYtkCwn6OgEfsZ36+vRbyU/W7LM 102 | OjdL95eftymftfl43lvfpSF9WnW+eFXJLu7HG6+KCVMOXcS0TDD/a5iN+cyHOHdMk4WQvZmR6jJn 103 | NqjmuN9lZDDRoH104kX9W6QLmFQ19dpfJdw9w3wonaPxd3ji/uLbN6Dz0On/BSBve/EWIobJZt1Z 104 | fpf57RaOj7wNmAn56E8elU09kV1mUrqdQ95XdqlG2N6e+99havl/kZUMmnk7tGKHkmXDG76BCjFM 105 | v4Asj2RrD1urbw83V2mPCb+7mQ8B9VnAtDf0rOm+u2natZNTHwfZk6eCYSfIt7yrA24d4HMQi2Aa 106 | xG7AGy+gHwOqNygG6eigNCJZru8rszzCz4voPFWfT+NpPDmadSc9Paa07fofmeP/CTAAUeNQwA0K 107 | ZW5kc3RyZWFtDWVuZG9iag0="; 108 | xrefStream = PdfObject.ParseAny(base64Object559.Base64ToLexer()) as PdfIndirectObject; 109 | pdfStream = xrefStream.Object as PdfStream; 110 | data = pdfStream.Decode(); 111 | 112 | hex = data.ToHexString(); 113 | var expectedResult = 114 | "0000000000010220f0000102219d00010221e80001022266000102231300010234c0000102361100" + 115 | "0102372e00010237aa00010266f20001026781000102aee4000102af5b000102d2ee000102e2f600" + 116 | "0102e748000102e7f7000102ed20000103b343000103b42f000103b4de000103b62c000106859600" + 117 | "0106867a00010687290001068dcb000106904e00010692d0000107e51100010827e8000108345c00" + 118 | "0108392800010869b40001087605000108771500010877c40001087a0800010d378e000114181a00" + 119 | "0116b7ed0001176dcf000117cde0000117e386000117e483000117e532000117ed1f000117efa100" + 120 | "0117f21e000117f496000117f70e000118023f0001193f620001194a8100011a381100011a434500" + 121 | "011a4a7b00011b01c500011b0cf500011b13c800011c0e4300011c0f6700011c101600011c121800" + 122 | "011c155400012060f300012061e30001206292000120666a00012069a6000124b092000124b18e00" + 123 | "0124b23d000124b424000124b669000124b8d6000124bb06000124c729000124d377000124d79600" + 124 | "0128f5b3000128f6ac000128f75b000128fb32000128fd9d00012900020001291007000129156b00" + 125 | "012af67600012af77b00012af82a00012d060b00012db32700012e996300012f93bf00012f94bd00" + 126 | "012f956c00012f9abf00012f9d3b00012f9f6d00012fa1c900012fa42300012fa69000012fa92000" + 127 | "012fb5f20001308ed900013095ae0001309a86000130fdaa00013106f30001310ba5000131bda700" + 128 | "0131c80a000131cf84000131d02c000131d0da0001324522000132466100013247120001324e5400" + 129 | "01324e930001324ece000132f304000133219400013338ef000133a3c30001341ac40001348baf00" + 130 | "0134bb88000134bc74000134bd69000134c743000134dc5f000134e7a9000135034e0001351c9a00" + 131 | "01353a130001353b4a0001353bfb000135427700013544ea000135475600013547910001354a0000" + 132 | "01354a5a00013555d1000135e35c000135eec6000135ef850001366921000136761a000136771d00" + 133 | "0137067e00013712090001375c060001375d310001375de200013763e8000137666700013766b200" + 134 | "013767ab0001376a240001376a630001376b2c0001376d960001376fff0001377c200001377d0b00" + 135 | "01377e5a0001385dba0001386a7f0001386b450001386c8c0001393fe90001394b1c00013a535d00" + 136 | "013a5e2600013b566b00013b57a600013b585700013b5d0000013b5f6d00013b619700013b620300" + 137 | "013b623800013b643700013b64ec00013b658e00013b65c300013b690000013b6e9300013b71a000" + 138 | "013b72e700013b739f00013b82c600013b87a300013b8bdb00013b8ce6000145e727000145e85800" + 139 | "0145e909000145ee66000145f0ea000145fd140001476344000148042a000148b270000148b37500" + 140 | "0148b426000148b93a000148bb97000148bdc8000148beaf000148c0d3000148c14b000148c20c00" + 141 | "0148c519000148d211000148d8f000014d756a00014e5a5300014e65c600014e679200014e6b3800" + 142 | "014e6bef00014e6c9800014e6e1a00014e6f0300014e79c900014f062000015013ba000150150800" + 143 | "015015b90001504d2100015050160001505203000150540300015055ab00015057cb0001505a6400" + 144 | "01505b9a0001505cb90001505e3b00015060d900015062ec000150642500015065b1000150676200" + 145 | "015068ef0001506a670001506c0d0001506dc30001506ea40001506f720001507062000150713a00" + 146 | "015072000001507302000150752800015088a70001508aa80001508b850001509a660001509e7300" + 147 | "0150a2a7000150a60b000150b123000150b88e000150baa3000150c074000150c468000150c75600" + 148 | "0150c900000150ceac000150cfea000150d3c4000150d70f000150d9f1000150db36000150dbdf00" + 149 | "0150dfb9000150e359000150e592000150e87d000150eb69000150ee17000150ef16000150eff100" + 150 | "0150fb5a000150fc14000150fccc00015100b700015103b10001510648000151245e0001517d0b00" + 151 | "0151a693000151c2be000151df16000152557e00015258d50001525e9b0001528115000152840000" + 152 | "0152875b0001528a1e0001528cf60001528fd00001529301000152962500015299d80001529cc400" + 153 | "0152a005000152a2c2000152a6ab000152a994000152b26f000152bac9000152bdfc000152cb6500" + 154 | "0152cffd000152d338000152d67a000152ea08000152f954000153036b00015306d30001530a1200" + 155 | "01530da200015310e4000153143800015317660001531a9f0001531dc700015320f4000153244200" + 156 | "015327360001532a4a0001532d250001533059000153330f00015336090001533760000153383e00" + 157 | "0153391b0001533ec300015346700001534b840001534fc00001535a97000153d68b000153f1f500" + 158 | "0154005a0001544333000154509a00015453c500015457010001545b0a0001545e09000154611d00" + 159 | "0154642600015466fc0001546c680001546f5f0001547544000154786d0001547e43000154813300" + 160 | "0154861f000154890e0001548e9f00015492db000154962c0001549d85000154a9c0000154ac6c00" + 161 | "0154af1f000154b0c8000154b289000154b44d000154b5df000154b780000154b91e000154c52c00" + 162 | "0154d1df000154d50e000154d83c000154db45000154de3d000154e139000154e40c000154e70900" + 163 | "0154f02e000154f3a4000154f455000155130500015514f700015517210001551799000155193f00" + 164 | "01551b470001551d2500015521430001552f3200015541c400015551e50001555b270001555e3700" + 165 | "0155620f00015564da0001556af100015570a100015574d90001557a5c00015582bd00015589cc00" + 166 | "01558ff700015594b400015597c900015598760001559b8d000155be0e000155e9ea0001562cbe00" + 167 | "01562f30000156524c0001566e34000156795d0001567c73000156823b00015685c10001568d3300" + 168 | "015693c600015695b2000156d32f000156d6ad000156d78c000156d899000156dfa7000156e82a00" + 169 | "0156f157000156f585000156fe0600015706740001571106000157243a0001573502000157494f00" + 170 | "01575364000157562e0001576dd20001577d46000157879500015790c30001579659000157992e00" + 171 | "0157a3ba000157ba72000157bec5000157c60d000157c76a0001582a0b0001582d4f000158419400" + 172 | "015853a8000158622c0001586b91000158713300015873f100015875e90001587cc3000158833100" + 173 | "0158890500015889b40001588a6c0001588b26000158983e0001589ba2000158a035000158a2e600" + 174 | "0158abf6000158af6b000158bf86000158ccfa000158d08a000158efb60001590c2000015930bf00" + 175 | "01594f85000159672400015978f50001597fe2000159b120000159bf7a000159c506000159cf8c00" + 176 | "0159d3c1000159deb300015a001600015a04a600015a0a0300015a0ceb00015a134e00015a164500" + 177 | "015a16f400015a18e500015a198e00015a1bbf00015a4d0e00015a503700015a64f500015a6f4300" + 178 | "015a84ab00015a877500015a8a6200015a8d5800015a8e1100015a911d00015a91ca00015aa21800" + 179 | "015aa43100015abdd300015ac57c00015ac62400015ac91900015ac9ca00015acd0c00015acf7900" + 180 | "015ad1e000015adfaa00015ae7a500015c2fc400015c30cd00015c317e00015c352300015c37a200" + 181 | "015c3a0600015c3c7600015c4d2300015d88b200015d953600015e020100015e0d2a00015e406e00" + 182 | "015e417f00015ea41500015ea53d00015ea5a200015ede1c00015f13af00015f13e400015f164e00" + 183 | "015f168300015f16b800015f1d6f00015f1e3300015f1ec500015f2d2000015f2e2800015f2f3400" + 184 | "02000003000200000f000200000f010200000f020200000f030200000f040200000f050200000f06" + 185 | "0200000f070200000f080200001300020000170002000022000200002b000200003c000200004100" + 186 | "0200004600020000500002000058000200005e00020000740002000088000200009a00020000b100" + 187 | "020000c600020000ce00020000e60002000188000200020b0002000213000200021f000200022900" + 188 | "02000229010200022902020002290302000229040200022905020002290602000229070200022908" + 189 | "0200022909020002290a020002290b020002290c020002290d020002290e020002290f0200022910" + 190 | "02000229110200022912020002291302000229140200022915020002291602000229170200022918" + 191 | "0200022919020002291a020002291b020002291c020002291d020002291e020002291f0200022920" + 192 | "02000229210200022922020002292302000229240200022925020002292602000229270200022928" + 193 | "0200022929020002292a020002292b020002292c020002292d020002292e020002292f0200022930" + 194 | "02000229310200022932020002293302000229340200022935020002293602000229370200022938" + 195 | "0200022939020002293a020002293b020002293c020002293d020002293e020002293f0200022940" + 196 | "02000229410200022942020002294302000229440200022945020002294602000229470200022948" + 197 | "0200022949020002294a020002294b020002294c020002294d020002294e020002294f0200022950" + 198 | "02000229510200022952020002295302000229540200022955020002295602000229570200022958" + 199 | "0200022959020002295a020002295b020002295c020002295d020002295e020002295f0200022960" + 200 | "0200022961020002296202000229630200022a000200022a010200022a020200022a030200022a04" + 201 | "0200022b000200022b010200022d000200022d010200022d020200022d030200022e00"; 202 | Assert.Equal(expectedResult, hex); 203 | } 204 | 205 | } 206 | --------------------------------------------------------------------------------