├── Kopie.png ├── sample.html ├── README.md ├── Export-TextFromPDF.ps1 ├── Join-PDFFiles.ps1 ├── WriteText2PDF.ps1 ├── ConvertFrom-HtmlToPDF.ps1 ├── Get-PDFInfo.ps1 ├── Set-PDFMetadata.ps1 ├── Set-WatermarkToPDF.ps1 └── iText7 └── New-PDFFile.ps1 /Kopie.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/datenteiler/PDFHacks/HEAD/Kopie.png -------------------------------------------------------------------------------- /sample.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | Convert this HTML to PDF 7 | 8 | 9 |

This is an important message!

10 |


11 |

This is 12 | an important 13 | 14 | sample text. 15 | 16 |

17 | 18 | 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDF Hacks with PowerShell and iText 2 | 3 | iText is for creating, consuming and manipulating PDF files and iTextSharp is the .NET library for iText 5. 4 | These PowerShell scripts need iTextSharp to run. So first things first: Get iTextSharp e.g. from Nuget: 5 | https://www.nuget.org/packages/iTextSharp/ 6 | 7 | Rename the file from .nupkg to .zip, unpack the itextsharp.dll and copy it to the path of the script. 8 | 9 | The script in iText7 shows how it works to create a PDF file with PowerShell and the new iText 7, because iText 5 or iTextSharp is EOL, and has been replaced. Only security fixes will be added. It is highly recommended to use iText 7 for new projects, and to consider moving existing projects from iTextSharp to iText 7. 10 | -------------------------------------------------------------------------------- /Export-TextFromPDF.ps1: -------------------------------------------------------------------------------- 1 | function Export-TextFromPDF 2 | { 3 | <# 4 | .Synopsis 5 | Export Text from a PDF file 6 | .DESCRIPTION 7 | Use the iTextSharp parser PdfTextExtractor to get only the text from a given PDF 8 | .EXAMPLE 9 | Export-TextFromPDF -File YourFile.pdf 10 | #> 11 | [CmdletBinding()] 12 | Param 13 | ( 14 | # Insert filename 15 | [String] 16 | [Parameter( 17 | Mandatory, 18 | ValueFromPipeline, 19 | ValueFromPipelineByPropertyName, 20 | Position=0) 21 | ] 22 | $Name 23 | ) 24 | 25 | Begin 26 | { 27 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 28 | $reader = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $(Join-Path $pwd $Name) 29 | 30 | } 31 | Process 32 | { 33 | for ($i = 1; $i -lt $reader.NumberOfPages; $i++) 34 | { 35 | $text = $text + [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader, $i) 36 | } 37 | } 38 | End 39 | { 40 | $text 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /Join-PDFFiles.ps1: -------------------------------------------------------------------------------- 1 | function Join-PDFFiles 2 | { 3 | <# 4 | .Synopsis 5 | Join PDF files to one PDF 6 | .DESCRIPTION 7 | Join PDF files to one PDF with iTextSharp 8 | .EXAMPLE 9 | Join-PDFFiles -Filenames $(gci *.pdf) -Output "JoinedPDFs.pdf" 10 | #> 11 | param 12 | ( 13 | [string[]] 14 | [Parameter(Mandatory, 15 | ValueFromPipeline, 16 | ValueFromPipelineByPropertyName, 17 | Position=0) 18 | ] 19 | $Filenames, 20 | 21 | [String] 22 | [Parameter(Mandatory, 23 | Position=1)] 24 | $Output 25 | ) 26 | 27 | begin 28 | { 29 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 30 | $doc = New-Object iTextSharp.text.Document 31 | $fs = [System.IO.FileStream]::new($(Join-Path $pwd $Output), [System.IO.FileMode]::Create) 32 | $writer = New-Object iTextSharp.text.pdf.PdfCopy($doc, $fs) 33 | $doc.Open() 34 | } 35 | process 36 | { 37 | foreach ($filename in $filenames) 38 | { 39 | $reader = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $filename 40 | $reader.ConsolidateNamedDestinations() 41 | 42 | for ($i = 1; $i -le $reader.NumberOfPages; $i++) 43 | { 44 | $page = $writer.GetImportedPage($reader, $i) 45 | $writer.AddPage($page) 46 | } 47 | $reader.Close() 48 | 49 | } 50 | } 51 | end 52 | { 53 | $writer.Close() 54 | $doc.Close() 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /WriteText2PDF.ps1: -------------------------------------------------------------------------------- 1 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 2 | $doc = New-Object iTextSharp.text.Document 3 | $stream = [IO.File]::OpenWrite($(Join-Path $pwd "TextPDF.pdf")) 4 | $writer = [itextsharp.text.pdf.PdfWriter]::GetInstance($doc, $stream) 5 | [void]$doc.AddTitle("The Title") 6 | [void]$doc.AddSubject("A Text") 7 | [void]$doc.AddAuthor("Christian Imhorst") 8 | $doc.Open() 9 | # Set fonts 10 | $title = [iTextSharp.text.FontFactory]::GetFont("HELVETICA_BOLD", 28, [iTextSharp.text.BaseColor]::DARK_GRAY) 11 | $heading = [iTextSharp.text.FontFactory]::GetFont("HELVETICA_BOLD", 18, [iTextSharp.text.BaseColor]::BLACK) 12 | $standard = [iTextSharp.text.FontFactory]::GetFont("HELVETICA", 12, [iTextSharp.text.BaseColor]::BLACK) 13 | $p = New-Object iTextSharp.text.Paragraph 14 | [void]$p.Add([iTextSharp.text.Paragraph]::new("The Title", $title)) 15 | # Chunk is the smallest part of text that can be added to a document 16 | [void]$p.Add([iTextSharp.text.Chunk]::NEWLINE) 17 | [void]$p.Add([iTextSharp.text.Paragraph]::new("This is a heading", $heading)) 18 | [void]$p.Add([iTextSharp.text.Chunk]::NEWLINE) 19 | [void]$p.Add([iTextSharp.text.Paragraph]::new("Hello World! 20 | 21 | Lorem ipsum dolor sit amet, consetetur sadipscing elitr, sed diam nonumy eirmod tempor 22 | invidunt ut labore et dolore magna aliquyam erat, sed diam voluptua. At vero eos et 23 | accusam et justo duo dolores et ea rebum. Stet clita kasd gubergren, no sea takimata 24 | sanctus est Lorem ipsum dolor sit amet.")) 25 | [void]$doc.Add($p) 26 | $doc.Dispose() 27 | $stream.Close() 28 | -------------------------------------------------------------------------------- /ConvertFrom-HtmlToPDF.ps1: -------------------------------------------------------------------------------- 1 | function ConvertFrom-HtmlToPDF 2 | { 3 | <# 4 | .Synopsis 5 | Convert your HTML page to PDF 6 | .DESCRIPTION 7 | Convert your HTML page to PDF with iTextSharp 8 | .EXAMPLE 9 | ConvertFrom-HtmlToPDF -HTML sample.html -Output sample.pdf 10 | #> 11 | Param 12 | ( 13 | # Insert your HTML 14 | [Parameter(Mandatory, 15 | ValueFromPipelineByPropertyName, 16 | ValueFromPipeline, 17 | Position=0)] 18 | $HTML, 19 | 20 | # PDF file out 21 | [String] 22 | [Parameter(Mandatory, 23 | Position=1)] 24 | $Output 25 | ) 26 | 27 | Begin 28 | { 29 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 30 | $doc = New-Object iTextSharp.text.Document 31 | $memoryStream = New-Object System.IO.MemoryStream 32 | $null = [itextsharp.text.pdf.PdfWriter]::GetInstance($doc, $memoryStream) 33 | $example_html = $(Get-Content $(Join-Path $pwd $HTML) | Out-String) 34 | 35 | } 36 | Process 37 | { 38 | $doc.Open() 39 | 40 | # Use the built-in HTMLWorker to parse the HTML. 41 | # Only inline CSS is supported. 42 | $htmlWorker = New-Object iTextSharp.text.html.simpleparser.HTMLWorker($doc) 43 | 44 | # HTMLWorker doesn't read a string directly but instead needs a TextReader 45 | $sr = new-object System.IO.StringReader($example_html) 46 | $htmlWorker.Parse($sr) 47 | 48 | $doc.Close() 49 | 50 | $bytes = $memoryStream.ToArray() 51 | [System.IO.File]::WriteAllBytes($(Join-Path $pwd $Output), $bytes) 52 | } 53 | End {} 54 | } 55 | -------------------------------------------------------------------------------- /Get-PDFInfo.ps1: -------------------------------------------------------------------------------- 1 | function Get-PDFInfo 2 | { 3 | <# 4 | .Synopsis 5 | Get Infos from a PDF file 6 | .DESCRIPTION 7 | Use iTextSharp PDFReader class to get infos from a given PDF 8 | .EXAMPLE 9 | Get-PDFInfo -File YourFile.pdf 10 | #> 11 | [CmdletBinding()] 12 | Param 13 | ( 14 | # Insert filename 15 | [String] 16 | [Parameter( 17 | Mandatory, 18 | ValueFromPipeline, 19 | ValueFromPipelineByPropertyName, 20 | Position=0) 21 | ] 22 | $Name, 23 | 24 | [switch]$Javascript 25 | ) 26 | 27 | Begin 28 | { 29 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 30 | $reader = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $(Join-Path $pwd $Name) 31 | } 32 | Process 33 | { 34 | # Output of PDFReader Info is a hashtable: 35 | $info = $reader.Info 36 | 37 | # Change DateTime to human-readable format and add both to the hashtable again 38 | $ModDate = (Select-String -InputObject $info.ModDate -Pattern "\d{14}").Matches.Value 39 | $CreationDate = (Select-String -InputObject $info.CreationDate -Pattern "\d{14}").Matches.Value 40 | $info.ModDate = [datetime]::ParseExact($ModDate,"yyyyMMddHHmmss", $null) 41 | $info.CreationDate = [datetime]::ParseExact($CreationDate,"yyyyMMddHHmmss",$null) 42 | 43 | # Add more keys to the hashtable: 44 | $info += @{NumberOfPages = $reader.NumberOfPages} 45 | $info += @{FileLength = $reader.FileLength} 46 | $info += @{PdfVersion = $reader.PdfVersion} 47 | $info += @{IsEncrypted = $reader.IsEncrypted()} 48 | 49 | $info += @{PageSize = $reader.GetPageSize(1)} 50 | 51 | # Maybe the PDF contains JavaScript? 52 | if ($Javascript) 53 | { 54 | $info += @{JavaScript = $reader.JavaScript} 55 | } 56 | } 57 | End 58 | { 59 | $info.GetEnumerator() | Sort-Object -Property name 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /Set-PDFMetadata.ps1: -------------------------------------------------------------------------------- 1 | function Set-PDFMetadata 2 | { 3 | <# 4 | .Synopsis 5 | Set or change metadata in your PDF file 6 | .DESCRIPTION 7 | Set or change metadata in your PDF file with iTextSharp 8 | .EXAMPLE 9 | Set-PDFMetadata -File Input.pdf -Output Output_neu.pdf -Metadata @{"Author" = "Christian Imhorst"} 10 | .EXAMPLE 11 | Set-PDFMetadata -File Input.pdf -Output Output_neu.pdf -Metadata @{"Author" = "Christian Imhorst"; "Creator" = "PowerShell"; "Conference" = "PSConfEU2019"; "Hashtag" = "#PSConfEU2019"} 12 | #> 13 | Param 14 | ( 15 | # Insert filename 16 | [String] 17 | [Parameter( 18 | Mandatory, 19 | ValueFromPipeline, 20 | ValueFromPipelineByPropertyName, 21 | Position=0) 22 | ] 23 | $File, 24 | # Name of the output file 25 | [String] 26 | [Parameter( 27 | Mandatory, 28 | Position=1) 29 | ] 30 | $Output, 31 | 32 | # Hashtable with your metadata: @{"Author" = "Christian Imhorst"} 33 | [hashtable] 34 | [Parameter( 35 | Mandatory, 36 | Position=2) 37 | ] 38 | $Metadata 39 | ) 40 | 41 | Begin 42 | { 43 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 44 | $reader = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $(Join-Path $pwd $File) 45 | $fs = [System.IO.FileStream]::new($(Join-Path $pwd $Output), [System.IO.FileMode]::Create, [System.IO.FileAccess]::Write, [System.IO.FileShare]::None) 46 | $stamper = New-Object iTextSharp.text.pdf.PdfStamper($reader, $fs) 47 | } 48 | Process 49 | { 50 | $info = $reader.Info 51 | foreach ($key in $Metadata.Keys) 52 | { 53 | if ($info.ContainsKey($key)) 54 | { 55 | $info.Remove($key) 56 | } 57 | $info.Add($key, $Metadata[$key]) 58 | } 59 | 60 | $stamper.MoreInfo = $info 61 | $stamper.Dispose() 62 | } 63 | End 64 | { 65 | $fs.Dispose() 66 | $reader.Dispose() 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /Set-WatermarkToPDF.ps1: -------------------------------------------------------------------------------- 1 | function Set-WatermarkToPDF 2 | { 3 | <# 4 | .Synopsis 5 | Set a watermark to a PDF 6 | .DESCRIPTION 7 | You can set a given watermark from an image file to a PDF. 8 | Output is a new PDF with a watermark. 9 | .EXAMPLE 10 | Set-WatermarkToPDF -Name My.pdf -Output My_Copy.pdf -Watermark watermark.png -SetAbsolutePositionXY 0,600 # 100,300 11 | 12 | #> 13 | Param 14 | ( 15 | # Insert filename 16 | [String] 17 | [Parameter( 18 | Mandatory, 19 | ValueFromPipeline, 20 | ValueFromPipelineByPropertyName, 21 | Position=0) 22 | ] 23 | $Name, 24 | 25 | [String] 26 | [Parameter( 27 | Mandatory, 28 | Position=1) 29 | ] 30 | $Output, 31 | 32 | # File with the watermark 33 | [String] 34 | [Parameter( 35 | Mandatory, 36 | Position=2) 37 | ] 38 | $Watermark, 39 | 40 | # Set absolut position of the watermark 41 | [int[]] 42 | [Parameter( 43 | Mandatory, 44 | Position = 3) 45 | ] 46 | $SetAbsolutePositionXY 47 | ) 48 | 49 | Begin 50 | { 51 | Add-Type -Path $(Join-Path $pwd "itextsharp.dll") 52 | $reader = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $(Join-Path $pwd $Name) 53 | $memoryStream = New-Object System.IO.MemoryStream 54 | $pdfStamper = New-Object iTextSharp.text.pdf.PdfStamper($reader, $memoryStream) 55 | 56 | $img = [iTextSharp.text.Image]::GetInstance($Watermark) 57 | $img.SetAbsolutePosition($SetAbsolutePositionXY[0], $SetAbsolutePositionXY[1]) 58 | [iTextSharp.text.pdf.PdfContentByte]$myWaterMark 59 | } 60 | Process 61 | { 62 | $pageIndex = $reader.NumberOfPages 63 | 64 | for ($i = 1; $i -le $pageIndex; $i++) { 65 | $myWaterMark = $pdfStamper.GetOverContent($i) 66 | $myWaterMark.AddImage($img) 67 | } 68 | 69 | $pdfStamper.FormFlattening = $true 70 | $pdfStamper.Dispose() 71 | 72 | $bytes = $memoryStream.ToArray() 73 | $memoryStream.Dispose() 74 | $reader.Dispose() 75 | [System.IO.File]::WriteAllBytes($Output, $bytes) 76 | } 77 | End {} 78 | } 79 | -------------------------------------------------------------------------------- /iText7/New-PDFFile.ps1: -------------------------------------------------------------------------------- 1 | function New-PDFFile 2 | { 3 | <# 4 | .Synopsis 5 | Write title and text to a PDF file 6 | .DESCRIPTION 7 | You can parse a single line of text, more lines of text or a whole text file to a PDF 8 | .EXAMPLE 9 | New-PDFFile -Name New.pdf -Title "My Title" -Text "This is my text" 10 | .EXAMPLE 11 | New-PDFFile -Name New.pdf -Title "My Lorem" -Text (Get-Content lorem.txt | Out-String) 12 | 13 | .UPDATE 14 | iText7 has a couple of dependencies. It depends on: 15 | 16 | Common.Logging version 3.4.1 17 | https://www.nuget.org/packages/Common.Logging/3.4.1 18 | 19 | Which depends on Common.Logging.Core also version 3.4.1 20 | https://www.nuget.org/packages/Common.Logging.Core/3.4.1 21 | 22 | And iText.Kernel.dll also needs Portable.BouncyCastle in version 1.8.1.3 23 | https://www.nuget.org/packages/Portable.BouncyCastle/1.8.1.3 24 | 25 | Download the Nuget package and rename .nupkg to .zip. Then you can extract 26 | the .dll files from the .zip archive. 27 | #> 28 | param 29 | ( 30 | [String] 31 | [Parameter(Mandatory)] 32 | $Name, 33 | 34 | [String] 35 | [Parameter(Mandatory)] 36 | $Title, 37 | 38 | [String[]] 39 | [Parameter(Mandatory)] 40 | $Text 41 | ) 42 | 43 | begin 44 | { 45 | Add-Type -Path $(Join-Path $pwd "lib\Common.Logging.Core.dll") 46 | Add-Type -Path $(Join-Path $pwd "lib\Common.Logging.dll") 47 | Add-Type -Path $(Join-Path $pwd "lib\BouncyCastle.Crypto.dll") 48 | Add-Type -Path $(Join-Path $pwd "lib\itext.io.dll") 49 | Add-Type -Path $(Join-Path $pwd "lib\itext.kernel.dll") 50 | Add-Type -Path $(Join-Path $pwd "lib\itext.layout.dll") 51 | 52 | [string]$Filename = $(Join-Path $pwd $Name) 53 | } 54 | process 55 | { 56 | $pdfWriter = [iText.Kernel.Pdf.PdfWriter]::new($Filename) 57 | $pdf = [iText.Kernel.Pdf.PdfDocument]::new($pdfWriter) 58 | $doc = [iText.Layout.Document]::new($pdf, [iText.Kernel.Geom.PageSize]::A4) 59 | 60 | $doc.SetMargins(36,36,36,36) 61 | 62 | $heading1 = [iText.Kernel.Font.PdfFontFactory]::CreateFont([iText.IO.Font.FontConstants]::HELVETICA) 63 | $font = [iText.Kernel.Font.PdfFontFactory]::CreateFont([iText.IO.Font.FontConstants]::TIMES_ROMAN) 64 | 65 | $myTitle = [iText.Layout.Element.Text]::new($Title).SetFont($heading1).SetFontSize(16) 66 | $myText = [iText.Layout.Element.Text]::new($Text).SetFont($font).SetFontSize(11) 67 | 68 | $theTitle = [iText.Layout.Element.Paragraph]::new() 69 | $theTitle.Add($myTitle) 70 | $doc.Add($theTitle) 71 | 72 | for ($i = 0; $i -lt $Text.Length; $i++) 73 | { 74 | New-Variable -Name "p$i" -Value ([iText.Layout.Element.Paragraph]::new()) 75 | (Get-Variable -Name "p$i" -ValueOnly).Add($myText) 76 | $doc.Add((Get-Variable -Name "p$i" -ValueOnly)) 77 | } 78 | } 79 | 80 | end 81 | { 82 | $pdf.Close() 83 | } 84 | } 85 | --------------------------------------------------------------------------------