├── .gitattributes ├── .gitignore ├── DocumentLayoutAnalysis ├── DlaViewer │ ├── App.xaml │ ├── App.xaml.cs │ ├── AssemblyInfo.cs │ ├── CustomPlotController.cs │ ├── Distribution.cs │ ├── DlaViewer.csproj │ ├── MainViewModel.cs │ ├── MainWindow.xaml │ ├── MainWindow.xaml.cs │ ├── PdfDocumentModel.cs │ └── PdfPageModel.cs ├── DocumentLayoutAnalysis.sln ├── DocumentLayoutAnalysis │ ├── App.config │ ├── DocumentLayoutAnalysis.csproj │ ├── Export │ │ └── Scemas │ │ │ ├── alto-4-1.xsd │ │ │ ├── pagecontent.xsd │ │ │ └── tei_ocr.rng │ ├── MarkedContentTest.cs │ ├── PageExportExample.cs │ ├── Program.cs │ ├── Properties │ │ └── AssemblyInfo.cs │ ├── RamerDouglasPeucker.cs │ ├── Resources │ │ ├── README.md │ │ └── Samples │ │ │ ├── 104-7-3.pdf │ │ │ ├── APISmap1.1.old.xml │ │ │ ├── APISmap1.1.xml │ │ │ ├── APISmap1.1_bw_raw.png │ │ │ ├── APISmap1.1_raw.png │ │ │ ├── APISmap1.1default.xml │ │ │ ├── APISmap1.pdf │ │ │ ├── Random 2 Columns Lists Chart.1.alto.xml │ │ │ ├── Random 2 Columns Lists Chart.1.xml │ │ │ ├── Random 2 Columns Lists Chart.1_bw_raw.png │ │ │ ├── Random 2 Columns Lists Chart.1_raw.png │ │ │ ├── Random 2 Columns Lists Chart.pdf │ │ │ ├── Random 2 Columns Lists Chart_PDF-A.pdf │ │ │ ├── excel.pdf │ │ │ ├── excel_2.pdf │ │ │ ├── oxyplot.pdf │ │ │ ├── oxyplot_2.pdf │ │ │ ├── oxyplot_3.pdf │ │ │ ├── oxyplot_4.pdf │ │ │ ├── oxyplot_5.pdf │ │ │ ├── oxyplot_6.pdf │ │ │ ├── oxyplot_7.pdf │ │ │ ├── rotated block2.1.xml │ │ │ ├── rotated block2.1_bw_raw.png │ │ │ ├── rotated block2.1_raw.png │ │ │ └── rotated block2.pdf │ ├── doc │ │ ├── docstrum example 1.png │ │ ├── docstrum example 2.png │ │ ├── nearest neighbour word example v2.png │ │ ├── rxyc example.png │ │ ├── viewer.png │ │ ├── xy cut.gif │ │ └── xy cut_small.gif │ └── packages.config └── ImageConverter │ ├── ImageConverter.csproj │ ├── PdfImageConverter.cs │ ├── Properties │ └── AssemblyInfo.cs │ ├── README.md │ ├── Structures.cs │ ├── x64 │ └── libmupdf.dll │ └── x86 │ └── libmupdf.dll └── README.md /.gitattributes: -------------------------------------------------------------------------------- 1 | ############################################################################### 2 | # Set default behavior to automatically normalize line endings. 3 | ############################################################################### 4 | * text=auto 5 | 6 | ############################################################################### 7 | # Set default behavior for command prompt diff. 8 | # 9 | # This is need for earlier builds of msysgit that does not have it on by 10 | # default for csharp files. 11 | # Note: This is only used by command line 12 | ############################################################################### 13 | #*.cs diff=csharp 14 | 15 | ############################################################################### 16 | # Set the merge driver for project and solution files 17 | # 18 | # Merging from the command prompt will add diff markers to the files if there 19 | # are conflicts (Merging from VS is not affected by the settings below, in VS 20 | # the diff markers are never inserted). Diff markers may cause the following 21 | # file extensions to fail to load in VS. An alternative would be to treat 22 | # these files as binary and thus will always conflict and require user 23 | # intervention with every merge. To do so, just uncomment the entries below 24 | ############################################################################### 25 | #*.sln merge=binary 26 | #*.csproj merge=binary 27 | #*.vbproj merge=binary 28 | #*.vcxproj merge=binary 29 | #*.vcproj merge=binary 30 | #*.dbproj merge=binary 31 | #*.fsproj merge=binary 32 | #*.lsproj merge=binary 33 | #*.wixproj merge=binary 34 | #*.modelproj merge=binary 35 | #*.sqlproj merge=binary 36 | #*.wwaproj merge=binary 37 | 38 | ############################################################################### 39 | # behavior for image files 40 | # 41 | # image files are treated as binary by default. 42 | ############################################################################### 43 | #*.jpg binary 44 | #*.png binary 45 | #*.gif binary 46 | 47 | ############################################################################### 48 | # diff behavior for common document formats 49 | # 50 | # Convert binary document formats to text before diffing them. This feature 51 | # is only available from the command line. Turn it on by uncommenting the 52 | # entries below. 53 | ############################################################################### 54 | #*.doc diff=astextplain 55 | #*.DOC diff=astextplain 56 | #*.docx diff=astextplain 57 | #*.DOCX diff=astextplain 58 | #*.dot diff=astextplain 59 | #*.DOT diff=astextplain 60 | #*.pdf diff=astextplain 61 | #*.PDF diff=astextplain 62 | #*.rtf diff=astextplain 63 | #*.RTF diff=astextplain 64 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.suo 8 | *.user 9 | *.userosscache 10 | *.sln.docstates 11 | 12 | # User-specific files (MonoDevelop/Xamarin Studio) 13 | *.userprefs 14 | 15 | # Build results 16 | [Dd]ebug/ 17 | [Dd]ebugPublic/ 18 | [Rr]elease/ 19 | [Rr]eleases/ 20 | x64/ 21 | x86/ 22 | bld/ 23 | [Bb]in/ 24 | [Oo]bj/ 25 | [Ll]og/ 26 | 27 | # Visual Studio 2015/2017 cache/options directory 28 | .vs/ 29 | # Uncomment if you have tasks that create the project's static files in wwwroot 30 | #wwwroot/ 31 | 32 | # Visual Studio 2017 auto generated files 33 | Generated\ Files/ 34 | 35 | # MSTest test Results 36 | [Tt]est[Rr]esult*/ 37 | [Bb]uild[Ll]og.* 38 | 39 | # NUNIT 40 | *.VisualState.xml 41 | TestResult.xml 42 | 43 | # Build Results of an ATL Project 44 | [Dd]ebugPS/ 45 | [Rr]eleasePS/ 46 | dlldata.c 47 | 48 | # Benchmark Results 49 | BenchmarkDotNet.Artifacts/ 50 | 51 | # .NET Core 52 | project.lock.json 53 | project.fragment.lock.json 54 | artifacts/ 55 | **/Properties/launchSettings.json 56 | 57 | # StyleCop 58 | StyleCopReport.xml 59 | 60 | # Files built by Visual Studio 61 | *_i.c 62 | *_p.c 63 | *_i.h 64 | *.ilk 65 | *.meta 66 | *.obj 67 | *.iobj 68 | *.pch 69 | *.pdb 70 | *.ipdb 71 | *.pgc 72 | *.pgd 73 | *.rsp 74 | *.sbr 75 | *.tlb 76 | *.tli 77 | *.tlh 78 | *.tmp 79 | *.tmp_proj 80 | *.log 81 | *.vspscc 82 | *.vssscc 83 | .builds 84 | *.pidb 85 | *.svclog 86 | *.scc 87 | 88 | # Chutzpah Test files 89 | _Chutzpah* 90 | 91 | # Visual C++ cache files 92 | ipch/ 93 | *.aps 94 | *.ncb 95 | *.opendb 96 | *.opensdf 97 | *.sdf 98 | *.cachefile 99 | *.VC.db 100 | *.VC.VC.opendb 101 | 102 | # Visual Studio profiler 103 | *.psess 104 | *.vsp 105 | *.vspx 106 | *.sap 107 | 108 | # Visual Studio Trace Files 109 | *.e2e 110 | 111 | # TFS 2012 Local Workspace 112 | $tf/ 113 | 114 | # Guidance Automation Toolkit 115 | *.gpState 116 | 117 | # ReSharper is a .NET coding add-in 118 | _ReSharper*/ 119 | *.[Rr]e[Ss]harper 120 | *.DotSettings.user 121 | 122 | # JustCode is a .NET coding add-in 123 | .JustCode 124 | 125 | # TeamCity is a build add-in 126 | _TeamCity* 127 | 128 | # DotCover is a Code Coverage Tool 129 | *.dotCover 130 | 131 | # AxoCover is a Code Coverage Tool 132 | .axoCover/* 133 | !.axoCover/settings.json 134 | 135 | # Visual Studio code coverage results 136 | *.coverage 137 | *.coveragexml 138 | 139 | # NCrunch 140 | _NCrunch_* 141 | .*crunch*.local.xml 142 | nCrunchTemp_* 143 | 144 | # MightyMoose 145 | *.mm.* 146 | AutoTest.Net/ 147 | 148 | # Web workbench (sass) 149 | .sass-cache/ 150 | 151 | # Installshield output folder 152 | [Ee]xpress/ 153 | 154 | # DocProject is a documentation generator add-in 155 | DocProject/buildhelp/ 156 | DocProject/Help/*.HxT 157 | DocProject/Help/*.HxC 158 | DocProject/Help/*.hhc 159 | DocProject/Help/*.hhk 160 | DocProject/Help/*.hhp 161 | DocProject/Help/Html2 162 | DocProject/Help/html 163 | 164 | # Click-Once directory 165 | publish/ 166 | 167 | # Publish Web Output 168 | *.[Pp]ublish.xml 169 | *.azurePubxml 170 | # Note: Comment the next line if you want to checkin your web deploy settings, 171 | # but database connection strings (with potential passwords) will be unencrypted 172 | *.pubxml 173 | *.publishproj 174 | 175 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 176 | # checkin your Azure Web App publish settings, but sensitive information contained 177 | # in these scripts will be unencrypted 178 | PublishScripts/ 179 | 180 | # NuGet Packages 181 | *.nupkg 182 | # The packages folder can be ignored because of Package Restore 183 | **/[Pp]ackages/* 184 | # except build/, which is used as an MSBuild target. 185 | !**/[Pp]ackages/build/ 186 | # Uncomment if necessary however generally it will be regenerated when needed 187 | #!**/[Pp]ackages/repositories.config 188 | # NuGet v3's project.json files produces more ignorable files 189 | *.nuget.props 190 | *.nuget.targets 191 | 192 | # Microsoft Azure Build Output 193 | csx/ 194 | *.build.csdef 195 | 196 | # Microsoft Azure Emulator 197 | ecf/ 198 | rcf/ 199 | 200 | # Windows Store app package directories and files 201 | AppPackages/ 202 | BundleArtifacts/ 203 | Package.StoreAssociation.xml 204 | _pkginfo.txt 205 | *.appx 206 | 207 | # Visual Studio cache files 208 | # files ending in .cache can be ignored 209 | *.[Cc]ache 210 | # but keep track of directories ending in .cache 211 | !*.[Cc]ache/ 212 | 213 | # Others 214 | ClientBin/ 215 | ~$* 216 | *~ 217 | *.dbmdl 218 | *.dbproj.schemaview 219 | *.jfm 220 | *.pfx 221 | *.publishsettings 222 | orleans.codegen.cs 223 | 224 | # Including strong name files can present a security risk 225 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 226 | #*.snk 227 | 228 | # Since there are multiple workflows, uncomment next line to ignore bower_components 229 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 230 | #bower_components/ 231 | 232 | # RIA/Silverlight projects 233 | Generated_Code/ 234 | 235 | # Backup & report files from converting an old project file 236 | # to a newer Visual Studio version. Backup files are not needed, 237 | # because we have git ;-) 238 | _UpgradeReport_Files/ 239 | Backup*/ 240 | UpgradeLog*.XML 241 | UpgradeLog*.htm 242 | ServiceFabricBackup/ 243 | *.rptproj.bak 244 | 245 | # SQL Server files 246 | *.mdf 247 | *.ldf 248 | *.ndf 249 | 250 | # Business Intelligence projects 251 | *.rdl.data 252 | *.bim.layout 253 | *.bim_*.settings 254 | *.rptproj.rsuser 255 | 256 | # Microsoft Fakes 257 | FakesAssemblies/ 258 | 259 | # GhostDoc plugin setting file 260 | *.GhostDoc.xml 261 | 262 | # Node.js Tools for Visual Studio 263 | .ntvs_analysis.dat 264 | node_modules/ 265 | 266 | # Visual Studio 6 build log 267 | *.plg 268 | 269 | # Visual Studio 6 workspace options file 270 | *.opt 271 | 272 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 273 | *.vbw 274 | 275 | # Visual Studio LightSwitch build output 276 | **/*.HTMLClient/GeneratedArtifacts 277 | **/*.DesktopClient/GeneratedArtifacts 278 | **/*.DesktopClient/ModelManifest.xml 279 | **/*.Server/GeneratedArtifacts 280 | **/*.Server/ModelManifest.xml 281 | _Pvt_Extensions 282 | 283 | # Paket dependency manager 284 | .paket/paket.exe 285 | paket-files/ 286 | 287 | # FAKE - F# Make 288 | .fake/ 289 | 290 | # JetBrains Rider 291 | .idea/ 292 | *.sln.iml 293 | 294 | # CodeRush 295 | .cr/ 296 | 297 | # Python Tools for Visual Studio (PTVS) 298 | __pycache__/ 299 | *.pyc 300 | 301 | # Cake - Uncomment if you are using it 302 | # tools/** 303 | # !tools/packages.config 304 | 305 | # Tabs Studio 306 | *.tss 307 | 308 | # Telerik's JustMock configuration file 309 | *.jmconfig 310 | 311 | # BizTalk build output 312 | *.btp.cs 313 | *.btm.cs 314 | *.odx.cs 315 | *.xsd.cs 316 | 317 | # OpenCover UI analysis results 318 | OpenCover/ 319 | 320 | # Azure Stream Analytics local run output 321 | ASALocalRun/ 322 | 323 | # MSBuild Binary and Structured Log 324 | *.binlog 325 | 326 | # NVidia Nsight GPU debugger configuration file 327 | *.nvuser 328 | 329 | # MFractors (Xamarin productivity tool) working folder 330 | .mfractor/ 331 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/App.xaml: -------------------------------------------------------------------------------- 1 |  6 | 7 | 8 | 9 | 10 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/App.xaml.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Configuration; 4 | using System.Data; 5 | using System.Linq; 6 | using System.Threading.Tasks; 7 | using System.Windows; 8 | 9 | namespace DlaViewer 10 | { 11 | /// 12 | /// Interaction logic for App.xaml 13 | /// 14 | public partial class App : Application 15 | { 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/AssemblyInfo.cs: -------------------------------------------------------------------------------- 1 | using System.Windows; 2 | 3 | [assembly: ThemeInfo( 4 | ResourceDictionaryLocation.None, //where theme specific resource dictionaries are located 5 | //(used if a resource is not found in the page, 6 | // or application resource dictionaries) 7 | ResourceDictionaryLocation.SourceAssembly //where the generic resource dictionary is located 8 | //(used if a resource is not found in the page, 9 | // app, or any theme specific resource dictionaries) 10 | )] 11 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/CustomPlotController.cs: -------------------------------------------------------------------------------- 1 | namespace DlaViewer 2 | { 3 | using OxyPlot; 4 | 5 | public class CustomPlotController : PlotController 6 | { 7 | private readonly OxyModifierKeys _zoomOxyModifierKeys = OxyModifierKeys.Control; 8 | 9 | public CustomPlotController() : base() 10 | { 11 | this.BindMouseDown(OxyMouseButton.Left, PanZoomAt); 12 | this.BindMouseEnter(OxyPlot.PlotCommands.HoverSnapTrack); 13 | this.BindMouseDown(OxyMouseButton.Left, _zoomOxyModifierKeys, OxyPlot.PlotCommands.ZoomRectangle); 14 | this.BindMouseDown(OxyMouseButton.Left, OxyModifierKeys.None, 2, OxyPlot.PlotCommands.ResetAt); 15 | 16 | this.UnbindMouseDown(OxyMouseButton.Middle); 17 | this.UnbindMouseDown(OxyMouseButton.Right); 18 | this.UnbindKeyDown(OxyKey.C, OxyModifierKeys.Control | OxyModifierKeys.Alt); 19 | this.UnbindKeyDown(OxyKey.R, OxyModifierKeys.Control | OxyModifierKeys.Alt); 20 | this.UnbindKeyDown(OxyKey.Up); 21 | this.UnbindKeyDown(OxyKey.Down); 22 | this.UnbindKeyDown(OxyKey.Left); 23 | this.UnbindKeyDown(OxyKey.Right); 24 | 25 | this.UnbindKeyDown(OxyKey.Up, OxyModifierKeys.Control); 26 | this.UnbindKeyDown(OxyKey.Down, OxyModifierKeys.Control); 27 | this.UnbindKeyDown(OxyKey.Left, OxyModifierKeys.Control); 28 | this.UnbindKeyDown(OxyKey.Right, OxyModifierKeys.Control); 29 | this.UnbindMouseWheel(); 30 | } 31 | 32 | private static readonly IViewCommand PanZoomAt = new DelegatePlotCommand( 33 | (view, controller, args) => controller.AddMouseManipulator(view, new PanZoomManipulator(view), args)); 34 | } 35 | 36 | public class PanZoomManipulator : MouseManipulator 37 | { 38 | public PanZoomManipulator(IPlotView plotView) : base(plotView) 39 | { } 40 | 41 | private ScreenPoint PreviousPosition { get; set; } 42 | private DataPoint PreviousPositionShortTerm { get; set; } 43 | private bool IsPanEnabled { get; set; } 44 | 45 | public override void Completed(OxyMouseEventArgs e) 46 | { 47 | base.Completed(e); 48 | 49 | if (!this.IsPanEnabled) 50 | { 51 | return; 52 | } 53 | 54 | this.View.SetCursorType(CursorType.Default); 55 | e.Handled = true; 56 | } 57 | 58 | public override void Delta(OxyMouseEventArgs e) 59 | { 60 | base.Delta(e); 61 | if (this.PreviousPosition.Equals(e.Position)) 62 | { 63 | e.Handled = true; 64 | return; 65 | } 66 | 67 | if (!this.IsPanEnabled) 68 | { 69 | e.Handled = true; 70 | return; 71 | } 72 | 73 | DataPoint current = this.InverseTransform(e.Position.X, e.Position.Y); 74 | const double inScale = 1.03; 75 | const double outScale = 0.97; 76 | 77 | if (this.XAxis != null && this.YAxis != null) 78 | { 79 | // this is pan 80 | this.XAxis.Pan(this.PreviousPosition, e.Position); 81 | this.YAxis.Pan(this.PreviousPosition, e.Position); 82 | } 83 | else 84 | { 85 | double scale; 86 | // this is zoom 87 | if (this.YAxis != null && this.YAxis.IsZoomEnabled) 88 | { 89 | if (this.PreviousPositionShortTerm.Y - current.Y > 0) 90 | { 91 | scale = outScale; 92 | } 93 | else if (this.PreviousPositionShortTerm.Y - current.Y < 0) 94 | { 95 | scale = inScale; 96 | } 97 | else 98 | { 99 | scale = 1; 100 | } 101 | 102 | PreviousPositionShortTerm = this.InverseTransform(e.Position.X, e.Position.Y); 103 | this.YAxis.ZoomAt(scale, current.Y); 104 | } 105 | 106 | if (this.XAxis != null && this.XAxis.IsZoomEnabled) 107 | { 108 | if (this.PreviousPositionShortTerm.X - current.X > 0) 109 | { 110 | scale = inScale; 111 | } 112 | else if (this.PreviousPositionShortTerm.X - current.X < 0) 113 | { 114 | scale = outScale; 115 | } 116 | else 117 | { 118 | scale = 1; 119 | } 120 | PreviousPositionShortTerm = this.InverseTransform(e.Position.X, e.Position.Y); 121 | this.XAxis.ZoomAt(scale, current.X); 122 | } 123 | } 124 | this.PlotView.InvalidatePlot(false); 125 | this.PreviousPosition = e.Position; 126 | e.Handled = true; 127 | } 128 | 129 | public override void Started(OxyMouseEventArgs e) 130 | { 131 | base.Started(e); 132 | this.PreviousPosition = e.Position; 133 | 134 | this.IsPanEnabled = (this.XAxis != null && this.XAxis.IsPanEnabled) 135 | || (this.YAxis != null && this.YAxis.IsPanEnabled); 136 | 137 | if (this.IsPanEnabled) 138 | { 139 | if (this.XAxis != null && this.YAxis != null) 140 | { 141 | this.View.SetCursorType(CursorType.Pan); 142 | } 143 | else if (this.XAxis == null && this.YAxis != null) 144 | { 145 | this.View.SetCursorType(CursorType.ZoomVertical); 146 | } 147 | else if (this.XAxis != null && this.YAxis == null) 148 | { 149 | this.View.SetCursorType(CursorType.ZoomHorizontal); 150 | } 151 | e.Handled = true; 152 | } 153 | } 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/Distribution.cs: -------------------------------------------------------------------------------- 1 | namespace DlaViewer 2 | { 3 | using OxyPlot; 4 | using OxyPlot.Axes; 5 | using OxyPlot.Series; 6 | using System; 7 | using System.Collections.Generic; 8 | using System.Linq; 9 | using UglyToad.PdfPig.DocumentLayoutAnalysis; 10 | 11 | public class Distribution 12 | { 13 | public IReadOnlyList Values { get; } 14 | 15 | public double Mode { get; } 16 | 17 | public double Average { get; } 18 | 19 | public int BinLength { get; } 20 | 21 | public Dictionary> Bins { get; set; } 22 | 23 | public Distribution(IEnumerable values, int binLength = 1) 24 | { 25 | if (binLength <= 0) 26 | { 27 | throw new ArgumentException(); 28 | } 29 | 30 | Values = values.Select(x => Math.Round(x, 5)).ToList(); 31 | Average = Values.Average(); 32 | Mode = Values.Mode(); 33 | BinLength = binLength; 34 | 35 | var max = (int)Math.Ceiling(Values.Max()); 36 | if (max == 0) 37 | { 38 | max = binLength; 39 | } 40 | else 41 | { 42 | binLength = binLength > max ? max : binLength; 43 | } 44 | 45 | var bins = Enumerable.Range(0, (int)Math.Ceiling(max / (double)binLength) + 1) 46 | .Select(x => x * binLength) 47 | .ToDictionary(x => x, _ => new List()); 48 | 49 | foreach (var value in Values) 50 | { 51 | var key = bins.Keys.ElementAt((int)Math.Floor(value / binLength)); 52 | bins[key].Add(value); 53 | } 54 | 55 | Bins = bins; 56 | } 57 | 58 | public PlotModel GetPlotModel(string title = "", double titleFontSize = 12) 59 | { 60 | var distPlotModel = new PlotModel { Title = title, IsLegendVisible = true, TitleFontSize = titleFontSize }; 61 | distPlotModel.Axes.Add(new LinearAxis { Position = AxisPosition.Left, Minimum = 0.0 }); 62 | 63 | var histoSeries = new HistogramSeries() { FillColor = OxyColors.Blue, StrokeThickness = 1, StrokeColor = OxyColors.Black, RenderInLegend = false }; 64 | double peakDist = double.NegativeInfinity; 65 | foreach (var bin in Bins) 66 | { 67 | double pct = bin.Value.Count / (double)Values.Count; 68 | if (pct > peakDist) peakDist = pct; 69 | double start = bin.Key; 70 | double end = bin.Key + BinLength - 10e-7; 71 | histoSeries.Items.Add(new HistogramItem(start, end, pct, bin.Value.Count)); 72 | } 73 | distPlotModel.Series.Add(histoSeries); 74 | 75 | // plot average 76 | ScatterSeries averageSeries = new ScatterSeries() { MarkerType = MarkerType.Diamond, MarkerFill = OxyColors.Red, MarkerSize = 5, Title = "Average" }; 77 | averageSeries.Points.Add(new ScatterPoint(Average, peakDist / 2.0)); 78 | distPlotModel.Series.Add(averageSeries); 79 | 80 | if (!double.IsNaN(Mode)) 81 | { 82 | // plot mode 83 | ScatterSeries modeSeries = new ScatterSeries() { MarkerType = MarkerType.Circle, MarkerFill = OxyColors.Orange, MarkerSize = 5, Title = "Mode" }; 84 | modeSeries.Points.Add(new ScatterPoint(Mode, peakDist / 2.0)); 85 | distPlotModel.Series.Add(modeSeries); 86 | } 87 | 88 | return distPlotModel; 89 | } 90 | } 91 | 92 | /* 93 | public class DistributionBin 94 | { 95 | /// 96 | /// Included 97 | /// 98 | public int Start { get; } 99 | 100 | /// 101 | /// Excluded 102 | /// 103 | public int End { get; } 104 | 105 | public double Average; 106 | 107 | public double Mode; 108 | 109 | public DistributionBin(int start, int end, IEnumerable values = null) 110 | { 111 | 112 | } 113 | } 114 | */ 115 | } 116 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/DlaViewer.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | WinExe 5 | netcoreapp3.1 6 | true 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/MainViewModel.cs: -------------------------------------------------------------------------------- 1 | namespace DlaViewer 2 | { 3 | using ImageConverter; 4 | using OxyPlot; 5 | using OxyPlot.Annotations; 6 | using OxyPlot.Axes; 7 | using OxyPlot.Series; 8 | using System; 9 | using System.Collections.Generic; 10 | using System.Collections.ObjectModel; 11 | using System.ComponentModel; 12 | using System.IO; 13 | using System.Linq; 14 | using UglyToad.PdfPig.Core; 15 | using static UglyToad.PdfPig.Core.PdfSubpath; 16 | 17 | public class MainViewModel : INotifyPropertyChanged, IDisposable 18 | { 19 | public PlotController CustomController { get; } 20 | 21 | public string PdfPigVersion { get; set; } 22 | 23 | private PdfImageConverter _pdfImageConverter; 24 | private PdfDocumentModel _pdfDocumentModel; 25 | 26 | private PlotModel _pagePlotModel; 27 | private IList _currentSeries; 28 | private PlotModel _heightHistoPlotModel; 29 | private PlotModel _widthtHistoPlotModel; 30 | 31 | private int _numberOfPages; 32 | private int _currentPageNumber; 33 | 34 | public ObservableCollection WordExtractorList { get; } 35 | public ObservableCollection PageSegmenterList { get; } 36 | 37 | private PdfPageModel _pdfPageModel; 38 | 39 | private Type _wordExtractor; 40 | public Type WordExtractor 41 | { 42 | get 43 | { 44 | return _wordExtractor; 45 | } 46 | 47 | set 48 | { 49 | _wordExtractor = value; 50 | SetWordExtractor(value); 51 | this.RaisePropertyChanged(nameof(WordExtractor)); 52 | } 53 | } 54 | 55 | public void SetWordExtractor(Type wordExtractor) 56 | { 57 | if (_pdfPageModel != null && wordExtractor != null) 58 | { 59 | _pdfPageModel.SetWordExtractor(wordExtractor); 60 | 61 | if (IsDisplayWords) 62 | { 63 | DisplayWords(); 64 | } 65 | 66 | if (IsDisplayTextLines) 67 | { 68 | DisplayTextLines(); 69 | } 70 | 71 | if (IsDisplayTextBlocks) 72 | { 73 | DisplayTextBlocks(); 74 | } 75 | } 76 | } 77 | 78 | private Type _pageSegmenter; 79 | public Type PageSegmenter 80 | { 81 | get 82 | { 83 | return _pageSegmenter; 84 | } 85 | 86 | set 87 | { 88 | _pageSegmenter = value; 89 | SetPageSegmenter(value); 90 | this.RaisePropertyChanged(nameof(PageSegmenter)); 91 | } 92 | } 93 | 94 | public void SetPageSegmenter(Type pageSegmenter) 95 | { 96 | if (_pdfPageModel != null && pageSegmenter != null) 97 | { 98 | _pdfPageModel.SetPageSegmenter(pageSegmenter); 99 | if (IsDisplayTextLines) 100 | { 101 | DisplayTextLines(); 102 | } 103 | 104 | if (IsDisplayTextBlocks) 105 | { 106 | DisplayTextBlocks(); 107 | } 108 | } 109 | } 110 | 111 | public void HidePagePlotModel() 112 | { 113 | _currentSeries = PagePlotModel?.Series.ToList(); 114 | PagePlotModel?.Series.Clear(); 115 | } 116 | 117 | public void ShowPagePlotModel() 118 | { 119 | if (_currentSeries != null) 120 | { 121 | _currentSeries.ToList().ForEach(s => PagePlotModel.Series.Add(s)); 122 | PagePlotModel.InvalidatePlot(true); 123 | this.RaisePropertyChanged(nameof(PagePlotModel)); 124 | } 125 | } 126 | 127 | public PlotModel PagePlotModel 128 | { 129 | get 130 | { 131 | return _pagePlotModel; 132 | } 133 | 134 | private set 135 | { 136 | _pagePlotModel = value; 137 | this.RaisePropertyChanged(nameof(PagePlotModel)); 138 | } 139 | } 140 | 141 | public PlotModel HeightHistoPlotModel 142 | { 143 | get 144 | { 145 | return _heightHistoPlotModel; 146 | } 147 | 148 | private set 149 | { 150 | _heightHistoPlotModel = value; 151 | this.RaisePropertyChanged(nameof(HeightHistoPlotModel)); 152 | } 153 | } 154 | 155 | public PlotModel WidthHistoPlotModel 156 | { 157 | get 158 | { 159 | return _widthtHistoPlotModel; 160 | } 161 | 162 | private set 163 | { 164 | _widthtHistoPlotModel = value; 165 | this.RaisePropertyChanged(nameof(WidthHistoPlotModel)); 166 | } 167 | } 168 | 169 | public OxyImage PageImage { get; private set; } 170 | 171 | public int CurrentPageNumber 172 | { 173 | get 174 | { 175 | return _currentPageNumber; 176 | } 177 | 178 | set 179 | { 180 | if (value > NumberOfPages || value < 1) 181 | { 182 | return; 183 | } 184 | 185 | _currentPageNumber = value; 186 | LoadPage(_currentPageNumber); 187 | this.RaisePropertyChanged(nameof(CurrentPageNumber)); 188 | } 189 | } 190 | 191 | public int NumberOfPages 192 | { 193 | get 194 | { 195 | return _numberOfPages; 196 | } 197 | 198 | private set 199 | { 200 | _numberOfPages = value; 201 | this.RaisePropertyChanged(nameof(NumberOfPages)); 202 | } 203 | } 204 | 205 | private bool _clipPaths; 206 | public bool ClipPaths 207 | { 208 | get 209 | { 210 | return _clipPaths; 211 | } 212 | 213 | set 214 | { 215 | if (value == _clipPaths) return; 216 | _clipPaths = value; 217 | if (!string.IsNullOrEmpty(_pdfPath)) 218 | { 219 | _pdfDocumentModel = PdfDocumentModel.Open(_pdfPath, _clipPaths); 220 | LoadPage(CurrentPageNumber); 221 | } 222 | this.RaisePropertyChanged(nameof(ClipPaths)); 223 | } 224 | } 225 | 226 | private bool _removeDuplicateLetters; 227 | public bool RemoveDuplicateLetters 228 | { 229 | get 230 | { 231 | return _removeDuplicateLetters; 232 | } 233 | 234 | set 235 | { 236 | if (value == _removeDuplicateLetters) return; 237 | _removeDuplicateLetters = value; 238 | 239 | if (_pdfPageModel != null) 240 | { 241 | _pdfPageModel.SetRemoveDuplicateLetters(_removeDuplicateLetters); 242 | } 243 | 244 | if (IsDisplayLetters) 245 | { 246 | DisplayLetters(); 247 | } 248 | 249 | if (IsDisplayWords) 250 | { 251 | DisplayWords(); 252 | } 253 | 254 | if (IsDisplayTextLines) 255 | { 256 | DisplayTextLines(); 257 | } 258 | 259 | if (IsDisplayTextBlocks) 260 | { 261 | DisplayTextBlocks(); 262 | } 263 | 264 | this.RaisePropertyChanged(nameof(RemoveDuplicateLetters)); 265 | } 266 | } 267 | 268 | bool _isDisplayLetters; 269 | public bool IsDisplayLetters 270 | { 271 | get 272 | { 273 | return _isDisplayLetters; 274 | } 275 | 276 | set 277 | { 278 | if (value == _isDisplayLetters) return; 279 | _isDisplayLetters = value; 280 | 281 | if (_isDisplayLetters) 282 | { 283 | DisplayLetters(); 284 | } 285 | else 286 | { 287 | HideLetters(); 288 | } 289 | 290 | this.RaisePropertyChanged(nameof(IsDisplayLetters)); 291 | } 292 | } 293 | 294 | 295 | bool _isDisplayWords; 296 | public bool IsDisplayWords 297 | { 298 | get 299 | { 300 | return _isDisplayWords; 301 | } 302 | 303 | set 304 | { 305 | if (value == _isDisplayWords) return; 306 | _isDisplayWords = value; 307 | 308 | if (_isDisplayWords) 309 | { 310 | DisplayWords(); 311 | } 312 | else 313 | { 314 | HideWords(); 315 | } 316 | 317 | this.RaisePropertyChanged(nameof(IsDisplayWords)); 318 | } 319 | } 320 | 321 | bool _isDisplayTextLines; 322 | public bool IsDisplayTextLines 323 | { 324 | get 325 | { 326 | return _isDisplayTextLines; 327 | } 328 | 329 | set 330 | { 331 | if (value == _isDisplayTextLines) return; 332 | _isDisplayTextLines = value; 333 | 334 | if (_isDisplayTextLines) 335 | { 336 | DisplayTextLines(); 337 | } 338 | else 339 | { 340 | HideTextLines(); 341 | } 342 | 343 | this.RaisePropertyChanged(nameof(IsDisplayTextLines)); 344 | } 345 | } 346 | 347 | bool _isDisplayTextBlocks; 348 | public bool IsDisplayTextBlocks 349 | { 350 | get 351 | { 352 | return _isDisplayTextBlocks; 353 | } 354 | 355 | set 356 | { 357 | if (value == _isDisplayTextBlocks) return; 358 | _isDisplayTextBlocks = value; 359 | 360 | if (_isDisplayTextBlocks) 361 | { 362 | DisplayTextBlocks(); 363 | } 364 | else 365 | { 366 | HideTextBlocks(); 367 | } 368 | 369 | this.RaisePropertyChanged(nameof(IsDisplayTextBlocks)); 370 | } 371 | } 372 | 373 | bool _isDisplayPaths; 374 | public bool IsDisplayPaths 375 | { 376 | get 377 | { 378 | return _isDisplayPaths; 379 | } 380 | 381 | set 382 | { 383 | if (value == _isDisplayPaths) return; 384 | _isDisplayPaths = value; 385 | 386 | if (_isDisplayPaths) 387 | { 388 | DisplayPaths(); 389 | } 390 | else 391 | { 392 | HidePaths(); 393 | } 394 | 395 | this.RaisePropertyChanged(nameof(IsDisplayPaths)); 396 | } 397 | } 398 | 399 | bool _isDisplayImages; 400 | public bool IsDisplayImages 401 | { 402 | get 403 | { 404 | return _isDisplayImages; 405 | } 406 | 407 | set 408 | { 409 | if (value == _isDisplayImages) return; 410 | _isDisplayImages = value; 411 | 412 | if (_isDisplayImages) 413 | { 414 | DisplayImages(); 415 | } 416 | else 417 | { 418 | HideImages(); 419 | } 420 | 421 | this.RaisePropertyChanged(nameof(IsDisplayImages)); 422 | } 423 | } 424 | 425 | /// 426 | /// Initializes a new instance of the class. 427 | /// 428 | public MainViewModel() 429 | { 430 | CustomController = new CustomPlotController(); 431 | 432 | WordExtractorList = new ObservableCollection(PdfDocumentModel.GetWordExtractors()); 433 | PageSegmenterList = new ObservableCollection(PdfDocumentModel.GetPageSegmenters()); 434 | } 435 | 436 | public event PropertyChangedEventHandler PropertyChanged; 437 | 438 | protected void RaisePropertyChanged(string property) 439 | { 440 | PropertyChanged?.Invoke(this, new PropertyChangedEventArgs(property)); 441 | } 442 | 443 | private string _pdfPath; 444 | 445 | public void OpenDocument(string path) 446 | { 447 | if (Path.GetExtension(path) != ".pdf") 448 | { 449 | return; 450 | } 451 | 452 | _pdfPath = path; 453 | _pdfImageConverter = new PdfImageConverter(_pdfPath); 454 | _pdfDocumentModel = PdfDocumentModel.Open(_pdfPath, ClipPaths); 455 | NumberOfPages = _pdfDocumentModel.NumberOfPages; 456 | PdfPigVersion = _pdfDocumentModel.PdfPigVersion; 457 | CurrentPageNumber = 1; 458 | } 459 | 460 | private bool LoadPage(int pageNo) 461 | { 462 | if (_pdfDocumentModel == null) return false; 463 | 464 | _pdfPageModel = _pdfDocumentModel.GetPage(pageNo); 465 | 466 | if (_pdfPageModel == null) return false; 467 | 468 | // set remove duplicate letters 469 | _pdfPageModel.SetRemoveDuplicateLetters(_removeDuplicateLetters); 470 | 471 | // set word extractor 472 | _pdfPageModel.SetWordExtractor(WordExtractor); 473 | 474 | // set page segmenter 475 | _pdfPageModel.SetPageSegmenter(PageSegmenter); 476 | 477 | var pageInfoModel = _pdfPageModel.GetPageInfo(); 478 | 479 | // Plot height distrib 480 | HeightHistoPlotModel = pageInfoModel.HeightDistribution?.GetPlotModel("Letters height distribution"); 481 | WidthHistoPlotModel = pageInfoModel.WidthDistribution?.GetPlotModel("Letters width distribution"); 482 | 483 | // Plot page 484 | var pagePlotModel = new PlotModel { IsLegendVisible = false }; 485 | pagePlotModel.Axes.Add(new LinearAxis { Position = AxisPosition.Left, Minimum = 0, Maximum = _pdfPageModel.Height }); 486 | pagePlotModel.Axes.Add(new LinearAxis { Position = AxisPosition.Bottom, Minimum = 0, Maximum = _pdfPageModel.Width }); 487 | 488 | // Add background image 489 | try 490 | { 491 | using (var stream = _pdfImageConverter.GetPageStream(pageNo, 2)) 492 | { 493 | using (var fs = new FileStream("image.jpg", FileMode.Create)) 494 | { 495 | stream.CopyTo(fs); 496 | } 497 | stream.Position = 0; 498 | PageImage = new OxyImage(stream); 499 | } 500 | 501 | pagePlotModel.Annotations.Add(new ImageAnnotation 502 | { 503 | ImageSource = PageImage, 504 | Opacity = 0.5, 505 | X = new PlotLength(_pdfPageModel.CropBox.Bounds.BottomLeft.X, PlotLengthUnit.Data), 506 | Y = new PlotLength(_pdfPageModel.CropBox.Bounds.BottomLeft.Y, PlotLengthUnit.Data), 507 | Width = new PlotLength(_pdfPageModel.CropBox.Bounds.Width, PlotLengthUnit.Data), 508 | Height = new PlotLength(_pdfPageModel.CropBox.Bounds.Height, PlotLengthUnit.Data), 509 | HorizontalAlignment = HorizontalAlignment.Left, 510 | VerticalAlignment = VerticalAlignment.Bottom 511 | }); 512 | } 513 | catch (Exception) 514 | { 515 | throw; 516 | } 517 | 518 | this.PagePlotModel = pagePlotModel; 519 | 520 | if (IsDisplayLetters) 521 | { 522 | DisplayLetters(); 523 | } 524 | 525 | if (IsDisplayWords) 526 | { 527 | DisplayWords(); 528 | } 529 | 530 | if (IsDisplayTextLines) 531 | { 532 | DisplayTextLines(); 533 | } 534 | 535 | if (IsDisplayTextBlocks) 536 | { 537 | DisplayTextBlocks(); 538 | } 539 | 540 | if (IsDisplayPaths) 541 | { 542 | DisplayPaths(); 543 | } 544 | 545 | if (IsDisplayImages) 546 | { 547 | DisplayImages(); 548 | } 549 | 550 | return true; 551 | } 552 | 553 | public void DisplayLetters() 554 | { 555 | if (PagePlotModel == null) return; 556 | 557 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "letter").ToList()) 558 | { 559 | PagePlotModel.Series.Remove(s); 560 | } 561 | 562 | foreach (var letter in _pdfPageModel.GetLetters()) 563 | { 564 | var series1 = new LineSeries { Tag = "letter", Title = GetShorterText(letter.Value), LineStyle = LineStyle.Solid, Color = OxyColors.Blue }; 565 | var bbox = letter.GlyphRectangle; 566 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 567 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); 568 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); 569 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); 570 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 571 | PagePlotModel.Series.Add(series1); 572 | } 573 | 574 | PagePlotModel.InvalidatePlot(true); 575 | } 576 | 577 | public void HideLetters() 578 | { 579 | if (PagePlotModel == null) return; 580 | 581 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "letter").ToList()) 582 | { 583 | PagePlotModel.Series.Remove(s); 584 | } 585 | 586 | PagePlotModel.InvalidatePlot(true); 587 | } 588 | 589 | public void DisplayWords() 590 | { 591 | if (PagePlotModel == null) return; 592 | 593 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "word").ToList()) 594 | { 595 | PagePlotModel.Series.Remove(s); 596 | } 597 | 598 | foreach (var word in _pdfPageModel.GetWords()) 599 | { 600 | var series1 = new LineSeries { Tag = "word", Title = GetShorterText(word.Text), LineStyle = LineStyle.Solid, Color = OxyColors.Red }; 601 | var bbox = word.BoundingBox; 602 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 603 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); 604 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); 605 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); 606 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 607 | PagePlotModel.Series.Add(series1); 608 | } 609 | 610 | PagePlotModel.InvalidatePlot(true); 611 | } 612 | 613 | public void HideWords() 614 | { 615 | if (PagePlotModel == null) return; 616 | 617 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "word").ToList()) 618 | { 619 | PagePlotModel.Series.Remove(s); 620 | } 621 | 622 | PagePlotModel.InvalidatePlot(true); 623 | } 624 | 625 | public void DisplayTextLines() 626 | { 627 | if (PagePlotModel == null) return; 628 | 629 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "textline").ToList()) 630 | { 631 | PagePlotModel.Series.Remove(s); 632 | } 633 | 634 | foreach (var line in _pdfPageModel.GetTextBlocks().SelectMany(b => b.TextLines)) 635 | { 636 | var series1 = new LineSeries { Tag = "textline", Title = GetShorterText(line.Text), LineStyle = LineStyle.Solid, Color = OxyColors.OrangeRed }; 637 | var bbox = line.BoundingBox; 638 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 639 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); 640 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); 641 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); 642 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 643 | PagePlotModel.Series.Add(series1); 644 | } 645 | 646 | PagePlotModel.InvalidatePlot(true); 647 | } 648 | 649 | public void HideTextLines() 650 | { 651 | if (PagePlotModel == null) return; 652 | 653 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "textline").ToList()) 654 | { 655 | PagePlotModel.Series.Remove(s); 656 | } 657 | 658 | PagePlotModel.InvalidatePlot(true); 659 | } 660 | 661 | public void DisplayTextBlocks() 662 | { 663 | if (PagePlotModel == null) return; 664 | 665 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "textblock").ToList()) 666 | { 667 | PagePlotModel.Series.Remove(s); 668 | } 669 | 670 | foreach (var block in _pdfPageModel.GetTextBlocks()) 671 | { 672 | var series1 = new LineSeries { Tag = "textblock", Title = GetShorterText(block.Text), LineStyle = LineStyle.Solid, Color = OxyColors.Brown }; 673 | var bbox = block.BoundingBox; 674 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 675 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); 676 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); 677 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); 678 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 679 | PagePlotModel.Series.Add(series1); 680 | } 681 | 682 | PagePlotModel.InvalidatePlot(true); 683 | } 684 | 685 | public void HideTextBlocks() 686 | { 687 | if (PagePlotModel == null) return; 688 | 689 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "textblock").ToList()) 690 | { 691 | PagePlotModel.Series.Remove(s); 692 | } 693 | 694 | PagePlotModel.InvalidatePlot(true); 695 | } 696 | 697 | public void DisplayPaths() 698 | { 699 | if (PagePlotModel == null) return; 700 | 701 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "pdfpath").ToList()) 702 | { 703 | PagePlotModel.Series.Remove(s); 704 | } 705 | 706 | foreach (var path in _pdfPageModel.GetPdfPaths()) 707 | { 708 | foreach (var sp in path) 709 | { 710 | string title = ("path: " + (path.IsStroked ? "stroked " + (path.StrokeColor?.ToRGBValues()).ToString() : "") + 711 | (path.IsFilled ? "filled " + (path.FillColor?.ToRGBValues()).ToString() : "") + 712 | (path.IsClipping ? "clipping" : "") 713 | ).Trim(); 714 | var series1 = new LineSeries { Tag = "pdfpath", Title = title, LineStyle = LineStyle.Solid, Color = OxyColors.Yellow }; 715 | 716 | PdfPoint first = PdfPoint.Origin; 717 | foreach (var c in sp.Commands) 718 | { 719 | if (c is Move m) 720 | { 721 | first = m.Location; 722 | series1.Points.Add(PdfDocumentModel.ToDataPoint(first)); 723 | } 724 | else if (c is Line l) 725 | { 726 | series1.Points.Add(PdfDocumentModel.ToDataPoint(l.From)); 727 | series1.Points.Add(PdfDocumentModel.ToDataPoint(l.To)); 728 | } 729 | else if (c is BezierCurve bc) 730 | { 731 | var lines = bc.ToLines(10).ToList(); 732 | for (int i = 0; i < lines.Count; i++) 733 | { 734 | series1.Points.Add(PdfDocumentModel.ToDataPoint(lines[i].From)); 735 | series1.Points.Add(PdfDocumentModel.ToDataPoint(lines[i].To)); 736 | } 737 | } 738 | else if (c is Close) 739 | { 740 | series1.Points.Add(PdfDocumentModel.ToDataPoint(first)); 741 | } 742 | else 743 | { 744 | throw new ArgumentException(); 745 | } 746 | } 747 | 748 | PagePlotModel.Series.Add(series1); 749 | } 750 | } 751 | 752 | PagePlotModel.InvalidatePlot(true); 753 | } 754 | 755 | public void HidePaths() 756 | { 757 | if (PagePlotModel == null) return; 758 | 759 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "pdfpath").ToList()) 760 | { 761 | PagePlotModel.Series.Remove(s); 762 | } 763 | 764 | PagePlotModel.InvalidatePlot(true); 765 | } 766 | 767 | public void DisplayImages() 768 | { 769 | if (PagePlotModel == null) return; 770 | 771 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "image").ToList()) 772 | { 773 | PagePlotModel.Series.Remove(s); 774 | } 775 | 776 | foreach (var block in _pdfPageModel.GetImages()) 777 | { 778 | var series1 = new LineSeries { Tag = "image", Title = "image", LineStyle = LineStyle.Solid, Color = OxyColors.YellowGreen }; 779 | var bbox = block.Bounds; 780 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 781 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomRight)); 782 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopRight)); 783 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.TopLeft)); 784 | series1.Points.Add(PdfDocumentModel.ToDataPoint(bbox.BottomLeft)); 785 | PagePlotModel.Series.Add(series1); 786 | } 787 | 788 | PagePlotModel.InvalidatePlot(true); 789 | } 790 | 791 | public void HideImages() 792 | { 793 | if (PagePlotModel == null) return; 794 | 795 | foreach (var s in PagePlotModel.Series.Where(s => (string)s.Tag == "image").ToList()) 796 | { 797 | PagePlotModel.Series.Remove(s); 798 | } 799 | 800 | PagePlotModel.InvalidatePlot(true); 801 | } 802 | private string GetShorterText(string text) 803 | { 804 | if (text.Length <= 25) return text; 805 | return string.Join("", text.Take(22)) + "..."; 806 | } 807 | 808 | public void Dispose() 809 | { 810 | _pdfImageConverter.Dispose(); 811 | // other dispose 812 | } 813 | } 814 | } 815 | -------------------------------------------------------------------------------- /DocumentLayoutAnalysis/DlaViewer/MainWindow.xaml: -------------------------------------------------------------------------------- 1 |  10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 |