├── .github └── workflows │ └── tests.yml ├── LICENSE ├── README.md ├── composer.json └── src ├── Command.php ├── DataFields.php ├── FdfFile.php ├── InfoFields.php ├── InfoFile.php ├── Pdf.php └── XfdfFile.php /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | on: pull_request 3 | jobs: 4 | phpunit: 5 | name: PHP ${{ matrix.php }} 6 | runs-on: ubuntu-latest 7 | strategy: 8 | matrix: 9 | php: 10 | - "5.3" 11 | - "5.4" 12 | - "5.5" 13 | - "5.6" 14 | - "7.0" 15 | - "7.1" 16 | - "7.2" 17 | - "7.3" 18 | - "7.4" 19 | - "8.0" 20 | steps: 21 | - name: Checkout 22 | uses: actions/checkout@v2 23 | 24 | - name: Install pdftk 25 | run: | 26 | cd /tmp 27 | sudo wget http://mirrors.kernel.org/ubuntu/pool/universe/p/pdftk-java/pdftk-java_3.0.9-1_all.deb 28 | sudo apt install -y -q ./pdftk-java_3.0.9-1_all.deb 29 | pdftk --version 30 | 31 | - name: Install PHP 32 | uses: shivammathur/setup-php@v2 33 | with: 34 | php-version: ${{ matrix.php }} 35 | tools: composer:v2 36 | 37 | - name: Update composer 38 | run: composer self-update 39 | 40 | - name: Get composer cache directory 41 | id: composer-cache 42 | run: echo "::set-output name=dir::$(composer config cache-files-dir)" 43 | 44 | - name: Cache composer cache 45 | uses: actions/cache@v2 46 | with: 47 | path: ${{ steps.composer-cache.outputs.dir }} 48 | key: ${{ runner.os }}-composer-${{ hashFiles('**/composer.json') }} 49 | restore-keys: ${{ runner.os }}-composer- 50 | 51 | - name: Install composer packages 52 | run: composer update --prefer-dist --no-interaction --no-progress --optimize-autoloader --ansi 53 | 54 | - name: Run phpunit 55 | run: vendor/bin/phpunit --color=always 56 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 Michael Härtl 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | php-pdftk 2 | ========= 3 | 4 | [![GitHub Tests](https://github.com/mikehaertl/php-pdftk/workflows/Tests/badge.svg)](https://github.com/mikehaertl/php-pdftk/actions) 5 | [![Packagist Version](https://img.shields.io/packagist/v/mikehaertl/php-pdftk?label=version)](https://packagist.org/packages/mikehaertl/php-pdftk) 6 | [![Packagist Downloads](https://img.shields.io/packagist/dt/mikehaertl/php-pdftk)](https://packagist.org/packages/mikehaertl/php-pdftk) 7 | [![GitHub license](https://img.shields.io/github/license/mikehaertl/php-pdftk)](https://github.com/mikehaertl/php-pdftk/blob/master/LICENSE) 8 | [![Packagist PHP Version Support](https://img.shields.io/packagist/php-v/mikehaertl/php-pdftk)](https://packagist.org/packages/mikehaertl/php-pdftk) 9 | 10 | A PDF conversion and form utility based on pdftk. 11 | 12 | ## Features 13 | 14 | *php-pdftk* brings the full power of `pdftk` to PHP - and more. 15 | 16 | * Fill forms, either from a XFDF/FDF file or from a data array (UTF-8 safe for unflattened forms, requires pdftk 2.x !) 17 | * Create XFDF or FDF files from PHP arrays (UTF-8 safe!) 18 | * Create FDF files from filled PDF forms 19 | * Combine pages from several PDF files into a new PDF file 20 | * Split a PDF into one file per page 21 | * Add background or overlay PDFs 22 | * Read out meta data about PDF and form fields 23 | * Set passwords and permissions 24 | * Remove passwords 25 | 26 | ## Requirements 27 | 28 | * The `pdftk` command must be installed and working on your system 29 | * This library is written for pdftk 2.x versions. You should be able to use it with pdftk 1.x but not all methods will work there. 30 | For details consult the man page of pdftk on your system. 31 | * There is a [known issue](https://github.com/mikehaertl/php-pdftk/issues/150) 32 | on Ubuntu if you installed the `pdftk` package from snap. This version has 33 | no permission to write to the `/tmp` directory. You can either set another 34 | temporay directory as described below or use another package. For Ubuntu 35 | 18.10 there's also a `pdftk-java` package available via apt which should work 36 | fine. You can also install this package on Ubuntu 18.04 if you download it 37 | manually. Also check [this answer](https://askubuntu.com/a/1028983/175814) 38 | on askubuntu. 39 | 40 | > **Note:** The pdftk version from the alternative PPA `ppa:malteworld/ppa` is 41 | > no longer available. The author instead now points to his answer on askubuntu 42 | > linked above. 43 | 44 | ## Installation 45 | 46 | You should use [composer](https://getcomposer.org/) to install this library. 47 | 48 | ``` 49 | composer require mikehaertl/php-pdftk 50 | ``` 51 | 52 | ## Examples 53 | 54 | ### Create instance for PDF files 55 | 56 | There are several ways to tell the `Pdf` instance which file(s) it should use. 57 | Some files may also require a password or need an alias to be used as a handle 58 | in some operations (e.g. cat or shuffle). 59 | 60 | > **Note:** In version 2.x of pdftk a handle can be one or more upper case letters. 61 | 62 | ```php 63 | // Create an instance for a single file 64 | $pdf = new Pdf('/path/to/form.pdf'); 65 | 66 | // Alternatively add files later. Handles are autogenerated in this case. 67 | $pdf = new Pdf(); 68 | $pdf->addFile('/path/to/file1.pdf'); 69 | $pdf->addFile('/path/to/file2.pdf'); 70 | 71 | // Add files with own handle 72 | $pdf = new Pdf(); 73 | $pdf->addFile('/path/to/file1.pdf', 'A'); 74 | $pdf->addFile('/path/to/file2.pdf', 'B'); 75 | // Add file with handle and password 76 | $pdf->addFile('/path/to/file3.pdf', 'C', 'secret*password'); 77 | 78 | // Shortcut to pass all files to the constructor 79 | $pdf = new Pdf([ 80 | 'A' => ['/path/to/file1.pdf', 'secret*password1'], 81 | 'B' => ['/path/to/file2.pdf', 'secret*password2'], 82 | ]); 83 | ``` 84 | 85 | ### Operations 86 | 87 | Please consult the `pdftk` man page for each operation to find out how each operation works 88 | in detail and which options are available. 89 | 90 | For all operations you can either save the PDF locally through `saveAs($name)` or send it to the 91 | browser with `send()`. If you pass a filename to `send($name)` the client browser will open a download 92 | dialogue whereas without a filename it will usually display the PDF inline. 93 | 94 | **IMPORTANT: You can always only perform *one* of the following operations on a single PDF instance. 95 | Below you can find a workaround if you need multiple operations.** 96 | 97 | #### Fill Form 98 | 99 | Fill a PDF form with data from a PHP array or an XFDF/FDF file. 100 | 101 | ```php 102 | use mikehaertl\pdftk\Pdf; 103 | 104 | // Fill form with data array 105 | $pdf = new Pdf('/full/path/to/form.pdf'); 106 | $result = $pdf->fillForm([ 107 | 'name'=>'ÄÜÖ äüö мирано čárka', 108 | 'nested.name' => 'valX', 109 | ]) 110 | ->needAppearances() 111 | ->saveAs('filled.pdf'); 112 | 113 | // Always check for errors 114 | if ($result === false) { 115 | $error = $pdf->getError(); 116 | } 117 | 118 | // Fill form from FDF 119 | $pdf = new Pdf('form.pdf'); 120 | $result = $pdf->fillForm('data.xfdf') 121 | ->saveAs('filled.pdf'); 122 | if ($result === false) { 123 | $error = $pdf->getError(); 124 | } 125 | ``` 126 | 127 | **Note:** When filling in UTF-8 data, you should always add the `needAppearances()` option. 128 | This will make sure, that the PDF reader takes care of using the right fonts for rendering, 129 | something that pdftk can't do for you. Also note that `flatten()` doesn't really work well 130 | if you have special characters in your data. 131 | 132 | If you use `pdftk-java` >= 3.3.0 and the embedded font does not support UTF-8 133 | characters you can also replace it with a local font: 134 | 135 | ```php 136 | use mikehaertl\pdftk\Pdf; 137 | 138 | // Fill form with data array 139 | $pdf = new Pdf('/full/path/to/form.pdf'); 140 | $result = $pdf->fillForm($data) 141 | ->replacementFont('/usr/share/fonts/dejavu/DejaVuSans.ttf') 142 | ->saveAs('filled.pdf'); 143 | ``` 144 | 145 | #### Create a XFDF/FDF file from a PHP array 146 | 147 | This is a bonus feature that is not available from `pdftk`. 148 | 149 | ```php 150 | use mikehaertl\pdftk\XfdfFile; 151 | use mikehaertl\pdftk\FdfFile; 152 | 153 | $xfdf = new XfdfFile(['name' => 'Jürgen мирано']); 154 | $xfdf->saveAs('/path/to/data.xfdf'); 155 | 156 | $fdf = new FdfFile(['name' => 'Jürgen мирано']); 157 | $fdf->saveAs('/path/to/data.fdf'); 158 | ``` 159 | 160 | #### Cat 161 | 162 | Assemble a PDF from pages from one or more PDF files. 163 | 164 | ```php 165 | use mikehaertl\pdftk\Pdf; 166 | 167 | // Extract pages 1-5 and 7,4,9 into a new file 168 | $pdf = new Pdf('/path/to/my.pdf'); 169 | $result = $pdf->cat(1, 5) 170 | ->cat([7, 4, 9]) 171 | ->saveAs('/path/to/new.pdf'); 172 | if ($result === false) { 173 | $error = $pdf->getError(); 174 | } 175 | 176 | // Combine pages from several files 177 | $pdf = new Pdf([ 178 | 'A' => '/path/file1.pdf', // A is alias for file1.pdf 179 | 'B' => ['/path/file2.pdf','pass**word'], // B is alias for file2.pdf 180 | 'C' => ['/path/file3.pdf','secret**pw'], // C is alias for file3.pdf 181 | ]); 182 | $result = $pdf->cat(1, 5, 'A') // pages 1-5 from A 183 | ->cat(3, null, 'B') // page 3 from B 184 | ->cat(7, 'end', 'B', null, 'east') // pages 7-end from B, rotated East 185 | ->cat('end',3,'A','even') // even pages 3-end in reverse order from A 186 | ->cat([2,3,7], 'C') // pages 2,3 and 7 from C 187 | ->saveAs('/path/new.pdf'); 188 | if ($result === false) { 189 | $error = $pdf->getError(); 190 | } 191 | ``` 192 | 193 | #### Shuffle 194 | 195 | Like `cat()` but create "*streams*" and fill the new PDF with one page from each 196 | stream at a time. 197 | 198 | ```php 199 | use mikehaertl\pdftk\Pdf; 200 | 201 | $pdf = new Pdf([ 202 | 'A' => '/path/file1.pdf', // A is alias for file1.pdf 203 | 'B' => '/path/file2.pdf', // B is alias for file2.pdf 204 | ]); 205 | 206 | // new.pdf will have pages A1, B3, A2, B4, A3, B5, ... 207 | $result = $pdf->shuffle(1, 5, 'A') // pages 1-5 from A 208 | ->shuffle(3, 8, 'B') // pages 3-8 from B 209 | ->saveAs('/path/new.pdf'); 210 | if ($result === false) { 211 | $error = $pdf->getError(); 212 | } 213 | ``` 214 | 215 | #### Burst 216 | 217 | Split a PDF file into one file per page. 218 | 219 | ```php 220 | use mikehaertl\pdftk\Pdf; 221 | 222 | $pdf = new Pdf('/path/my.pdf'); 223 | $result = $pdf->burst('/path/page_%d.pdf'); // Supply a printf() pattern 224 | if ($result === false) { 225 | $error = $pdf->getError(); 226 | } 227 | ``` 228 | 229 | #### Add background PDF 230 | 231 | Add another PDF file as background. 232 | 233 | ```php 234 | use mikehaertl\pdftk\Pdf; 235 | 236 | // Set background from another PDF (first page repeated) 237 | $pdf = new Pdf('/path/my.pdf'); 238 | $result = $pdf->background('/path/back.pdf') 239 | ->saveAs('/path/watermarked.pdf'); 240 | if ($result === false) { 241 | $error = $pdf->getError(); 242 | } 243 | 244 | // Set background from another PDF (one page each) 245 | $pdf = new Pdf('/path/my.pdf'); 246 | $result = $pdf->multiBackground('/path/back_pages.pdf') 247 | ->saveAs('/path/watermarked.pdf'); 248 | if ($result === false) { 249 | $error = $pdf->getError(); 250 | } 251 | ``` 252 | 253 | #### Add overlay PDF 254 | 255 | Add another PDF file as overlay. 256 | 257 | ```php 258 | use mikehaertl\pdftk\Pdf; 259 | 260 | // Stamp with another PDF (first page repeated) 261 | $pdf = new Pdf('/path/my.pdf'); 262 | $result = $pdf->stamp('/path/overlay.pdf') 263 | ->saveAs('/path/stamped.pdf'); 264 | if ($result === false) { 265 | $error = $pdf->getError(); 266 | } 267 | 268 | // Stamp with another PDF (one page each) 269 | $pdf = new Pdf('/path/my.pdf'); 270 | $result = $pdf->multiStamp('/path/overlay_pages.pdf') 271 | ->saveAs('/path/stamped.pdf'); 272 | if ($result === false) { 273 | $error = $pdf->getError(); 274 | } 275 | ``` 276 | 277 | #### Attach Files 278 | 279 | Add file attachments to the document or to a specific page. 280 | 281 | ```php 282 | use mikehaertl\pdftk\Pdf; 283 | 284 | $files = [ 285 | '/path/to/file1', 286 | '/path/to/file2', 287 | ] 288 | 289 | // Add files at the document level 290 | $pdf = new Pdf('/path/my.pdf'); 291 | $result = $pdf->attachFiles($files) 292 | ->saveAs('/path/withfiles.pdf'); 293 | if ($result === false) { 294 | $error = $pdf->getError(); 295 | } 296 | 297 | // Add files to a specific page 298 | $pdf = new Pdf('/path/my.pdf'); 299 | $page = 7; 300 | $result = $pdf->attachFiles($files, $page) 301 | ->saveAs('/path/withfiles.pdf'); 302 | if ($result === false) { 303 | $error = $pdf->getError(); 304 | } 305 | ``` 306 | #### Unpack Files 307 | 308 | Copy file attachments from a PDF to the given directory. 309 | 310 | ```php 311 | use mikehaertl\pdftk\Pdf; 312 | 313 | $pdf = new Pdf('/path/my.pdf'); 314 | $result = $pdf->unpackFiles('/path/to/dir'); 315 | if ($result === false) { 316 | $error = $pdf->getError(); 317 | } 318 | ``` 319 | 320 | #### Generate FDF 321 | 322 | Create a FDF file from a given filled PDF form. 323 | 324 | ```php 325 | use mikehaertl\pdftk\Pdf; 326 | 327 | // Create FDF from PDF 328 | $pdf = new Pdf('/path/form.pdf'); 329 | $result = $pdf->generateFdfFile('/path/data.fdf'); 330 | if ($result === false) { 331 | $error = $pdf->getError(); 332 | } 333 | ``` 334 | 335 | #### Get PDF data 336 | 337 | Read out metadata or form field information from a PDF file. 338 | 339 | ```php 340 | use mikehaertl\pdftk\Pdf; 341 | 342 | // Get data 343 | $pdf = new Pdf('/path/my.pdf'); 344 | $data = $pdf->getData(); 345 | if ($data === false) { 346 | $error = $pdf->getError(); 347 | } 348 | 349 | // Get form data fields 350 | $pdf = new Pdf('/path/my.pdf'); 351 | $data = $pdf->getDataFields(); 352 | if ($data === false) { 353 | $error = $pdf->getError(); 354 | } 355 | 356 | // Get data as string 357 | echo $data; 358 | $txt = (string) $data; 359 | $txt = $data->__toString(); 360 | 361 | // Get data as array 362 | $arr = (array) $data; 363 | $arr = $data->__toArray(); 364 | $field1 = $data[0]['Field1']; 365 | ``` 366 | 367 | #### How to perform more than one operation on a PDF 368 | 369 | As stated above, you can only perform one of the preceeding operations on a single PDF instance. 370 | If you need more than one operation you can feed one `Pdf` instance into another: 371 | 372 | ```php 373 | use mikehaertl\pdftk\Pdf; 374 | 375 | // Extract pages 1-5 and 7,4,9 into a new file 376 | $pdf = new Pdf('/path/my.pdf'); 377 | $pdf->cat(1, 5) 378 | ->cat([7, 4, 9]); 379 | 380 | // We now use the above PDF as source file for a new PDF 381 | $pdf2 = new Pdf($pdf); 382 | $result = $pdf2->fillForm(['name' => 'ÄÜÖ äüö мирано čárka']) 383 | ->needAppearances() 384 | ->saveAs('/path/filled.pdf'); 385 | if ($result === false) { 386 | $error = $pdf->getError(); 387 | } 388 | ``` 389 | 390 | ### Options 391 | 392 | You can combine the above operations with one or more of the following options. 393 | 394 | ```php 395 | use mikehaertl\pdftk\Pdf; 396 | 397 | $pdf = new Pdf('/path/my.pdf'); 398 | 399 | $result = $pdf->allow('AllFeatures') // Change permissions 400 | ->flatten() // Merge form data into document (doesn't work well with UTF-8!) 401 | ->compress($value) // Compress/Uncompress 402 | ->keepId('first') // Keep first/last Id of combined files 403 | ->dropXfa() // Drop newer XFA form from PDF 404 | ->dropXmp() // Drop newer XMP data from PDF 405 | ->needAppearances() // Make clients create appearance for form fields 406 | ->setPassword($pw) // Set owner password 407 | ->setUserPassword($pw) // Set user password 408 | ->passwordEncryption(128) // Set password encryption strength 409 | ->saveAs('new.pdf'); 410 | if ($result === false) { 411 | $error = $pdf->getError(); 412 | } 413 | 414 | // Example: Fill PDF form and merge form data into PDF 415 | // Fill form with data array 416 | $result = $pdf = new Pdf('/path/form.pdf'); 417 | $pdf->fillForm(['name' => 'My Name']) 418 | ->flatten() 419 | ->saveAs('/path/filled.pdf'); 420 | if ($result === false) { 421 | $error = $pdf->getError(); 422 | } 423 | 424 | // Example: Remove password from a PDF 425 | $pdf = new Pdf; 426 | $result = $pdf->addFile('/path/my.pdf', null, 'some**password') 427 | ->saveAs('/path/new.pdf'); 428 | if ($result === false) { 429 | $error = $pdf->getError(); 430 | } 431 | ``` 432 | 433 | ### Shell Command 434 | 435 | The class uses [php-shellcommand](https://github.com/mikehaertl/php-shellcommand) to execute 436 | `pdftk`. You can pass `$options` for its `Command` class as second argument to the constructor: 437 | 438 | ```php 439 | use mikehaertl\pdftk\Pdf; 440 | 441 | $pdf = new Pdf('/path/my.pdf', [ 442 | 'command' => '/some/other/path/to/pdftk', 443 | // or on most Windows systems: 444 | // 'command' => 'C:\Program Files (x86)\PDFtk\bin\pdftk.exe', 445 | 'useExec' => true, // May help on Windows systems if execution fails 446 | ]); 447 | ``` 448 | 449 | #### Solve issues with UTF-8 characters in filenames or infofile content 450 | 451 | If you have files with UTF-8 encoded characters in their filename or if you 452 | pass an infofile with such characters to `updateInfo()` you should supply the 453 | correct locale when excuting `pdftk`. You can therefore add these options: 454 | 455 | ```php 456 | $pdf = new Pdf($file, [ 457 | 'locale' => 'en_US.utf8', 458 | 'procEnv' => [ 459 | 'LANG' => 'en_US.utf-8', 460 | ], 461 | ]); 462 | ``` 463 | 464 | > **Note:** You need to ensure that the locale you set here is available on 465 | > your system. On Linux you can check with `locale -a` which locales are 466 | > installed. [This article](https://wiki.archlinux.org/title/locale) explains 467 | > the concept in more detail. 468 | 469 | 470 | 471 | ### Temporary File 472 | 473 | Internally a temporary file is created via [php-tmpfile](https://github.com/mikehaertl/php-tmpfile). 474 | You can also access that file directly, e.g. if you neither want to send or save the 475 | file but only need the binary PDF content: 476 | 477 | ```php 478 | use mikehaertl\pdftk\Pdf; 479 | 480 | $pdf = new Pdf('/path/my.pdf'); 481 | $result = $pdf->fillForm(['name' => 'My Name']) 482 | ->execute(); 483 | if ($result === false) { 484 | $error = $pdf->getError(); 485 | } 486 | $content = file_get_contents( (string) $pdf->getTmpFile() ); 487 | ``` 488 | 489 | If you have permission issues you may have to set a directory where your 490 | `pdftk` command can write to: 491 | 492 | ```php 493 | use mikehaertl\pdftk\Pdf; 494 | 495 | $pdf = new Pdf('/path/my.pdf'); 496 | $pdf->tempDir = '/home/john/temp'; 497 | ``` 498 | 499 | ## API 500 | 501 | Please consult the source files for a full documentation of each method. 502 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "mikehaertl/php-pdftk", 3 | "description": "A PDF conversion and form utility based on pdftk.", 4 | "keywords": ["pdf", "pdftk"], 5 | "type": "library", 6 | "license": "MIT", 7 | "authors": [ 8 | { 9 | "name": "Michael Haertl", 10 | "email": "haertl.mike@gmail.com" 11 | } 12 | ], 13 | "require": { 14 | "php": ">=5.3.0", 15 | "mikehaertl/php-shellcommand": "^1.6.3", 16 | "mikehaertl/php-tmpfile": "^1.1.0" 17 | }, 18 | "require-dev": { 19 | "phpunit/phpunit": ">4.0 <9.4" 20 | }, 21 | "autoload": { 22 | "psr-4": { 23 | "mikehaertl\\pdftk\\": "src/" 24 | } 25 | }, 26 | "autoload-dev": { 27 | "psr-4": { 28 | "tests\\": "tests" 29 | } 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/Command.php: -------------------------------------------------------------------------------- 1 | 13 | * @license http://www.opensource.org/licenses/MIT 14 | */ 15 | class Command extends BaseCommand 16 | { 17 | /** 18 | * @var string the pdftk binary 19 | */ 20 | protected $_command = 'pdftk'; 21 | 22 | /** 23 | * @var array list of input files to process as array('name' => $filename, 24 | * 'password' => $pw) indexed by handle 25 | */ 26 | protected $_files = array(); 27 | 28 | /** 29 | * @var array list of command options, either strings or array with 30 | * arguments to addArg() 31 | */ 32 | protected $_options = array(); 33 | 34 | /** 35 | * @var string the operation to perform 36 | */ 37 | protected $_operation; 38 | 39 | /** 40 | * @var string|array operation arguments, e.g. a list of page ranges or a 41 | * filename or tmp file instance 42 | */ 43 | protected $_operationArgument = array(); 44 | 45 | /** 46 | * @var bool whether to force escaping of the operation argument e.g. for 47 | * filenames 48 | */ 49 | protected $_escapeOperationArgument = false; 50 | 51 | /** 52 | * @param string $name the PDF file to add for processing 53 | * @param string $handle one or more uppercase letters A..Z to reference 54 | * this file later. 55 | * @param string|null $password the owner (or user) password if any 56 | * @return Command the command instance for method chaining 57 | * @throws \Exception 58 | */ 59 | public function addFile($name, $handle, $password = null) 60 | { 61 | $this->checkExecutionStatus(); 62 | $file = array( 63 | 'name' => $name, 64 | 'password' => $password, 65 | ); 66 | $this->_files[$handle] = $file; 67 | return $this; 68 | } 69 | 70 | /** 71 | * @param string $option the pdftk option to add 72 | * @param string|File|null $argument the argument to add, either string, 73 | * File instance or null if none 74 | * @param null|bool whether to escape the option. Default is null meaning 75 | * use Command default setting. 76 | * @return Command the command instance for method chaining 77 | */ 78 | public function addOption($option, $argument = null, $escape = null) 79 | { 80 | $this->_options[] = $argument === null ? $option : array($option, $argument, $escape); 81 | return $this; 82 | } 83 | 84 | /** 85 | * @param string $operation the operation to perform 86 | * @return Command the command instance for method chaining 87 | */ 88 | public function setOperation($operation) 89 | { 90 | $this->checkExecutionStatus(); 91 | $this->_operation = $operation; 92 | return $this; 93 | } 94 | 95 | /** 96 | * @return string|null the current operation or null if none set 97 | */ 98 | public function getOperation() 99 | { 100 | return $this->_operation; 101 | } 102 | 103 | /** 104 | * @param string $value the operation argument 105 | * @param bool $escape whether to escape the operation argument 106 | * @return Command the command instance for method chaining 107 | */ 108 | public function setOperationArgument($value, $escape = false) 109 | { 110 | $this->checkExecutionStatus(); 111 | $this->_operationArgument = $value; 112 | $this->_escapeOperationArgument = $escape; 113 | return $this; 114 | } 115 | 116 | /** 117 | * @return string|array|null the current operation argument as string or 118 | * array or null if none set 119 | */ 120 | public function getOperationArgument() 121 | { 122 | // Typecast to string in case we have a File instance as argument 123 | return is_array($this->_operationArgument) ? $this->_operationArgument : (string) $this->_operationArgument; 124 | } 125 | 126 | /** 127 | * @return int the number of files added to the command 128 | */ 129 | public function getFileCount() 130 | { 131 | return count($this->_files); 132 | } 133 | 134 | /** 135 | * Add a page range as used by some operations 136 | * 137 | * @param int|string|array $start the start page number or an array of page 138 | * numbers. If an array, the other arguments will be ignored. $start can 139 | * also be bigger than $end for pages in reverse order. 140 | * @param int|string|null $end the end page number or null for single page 141 | * (or list if $start is an array) 142 | * @param string|null $handle the handle of the file to use. Can be null if 143 | * only a single file was added. 144 | * @param string|null $qualifier the page number qualifier, either 'even' 145 | * or 'odd' or null for none 146 | * @param string $rotation the rotation to apply to the pages. 147 | * @return Command the command instance for method chaining 148 | */ 149 | public function addPageRange($start, $end = null, $handle = null, $qualifier = null, $rotation = null) 150 | { 151 | $this->checkExecutionStatus(); 152 | if (is_array($start)) { 153 | if ($handle !== null) { 154 | $start = array_map(function ($p) use ($handle) { 155 | return $handle . $p; 156 | }, $start); 157 | } 158 | $range = implode(' ', $start); 159 | } else { 160 | $range = $handle . $start; 161 | if ($end) { 162 | $range .= '-' . $end; 163 | } 164 | $range .= $qualifier . $rotation; 165 | } 166 | $this->_operationArgument[] = $range; 167 | return $this; 168 | } 169 | 170 | /** 171 | * @param string|null $filename the filename to add as 'output' option or 172 | * null if none 173 | * @return bool whether the command was executed successfully 174 | */ 175 | public function execute($filename = null) 176 | { 177 | $this->checkExecutionStatus(); 178 | $this->processInputFiles(); 179 | $this->processOperation(); 180 | $this->processOptions($filename); 181 | return parent::execute(); 182 | } 183 | 184 | /** 185 | * Process input PDF files and create respective command arguments 186 | */ 187 | protected function processInputFiles() 188 | { 189 | $passwords = array(); 190 | foreach ($this->_files as $handle => $file) { 191 | $this->addArg($handle . '=', $file['name']); 192 | if ($file['password'] !== null) { 193 | $passwords[$handle] = $file['password']; 194 | } 195 | } 196 | if ($passwords !== array()) { 197 | $this->addArg('input_pw'); 198 | foreach ($passwords as $handle => $password) { 199 | $this->addArg($handle . '=', $password); 200 | } 201 | } 202 | } 203 | 204 | /** 205 | * Process options and create respective command arguments 206 | * @param string|null $filename if provided an 'output' option will be 207 | * added 208 | */ 209 | protected function processOptions($filename = null) 210 | { 211 | // output must be first option after operation 212 | if ($filename !== null) { 213 | $this->addArg('output', $filename, true); 214 | } 215 | foreach ($this->_options as $option) { 216 | if (is_array($option)) { 217 | $this->addArg($option[0], $option[1], $option[2]); 218 | } else { 219 | $this->addArg($option); 220 | } 221 | } 222 | } 223 | 224 | /** 225 | * Process opearation and create respective command arguments 226 | */ 227 | protected function processOperation() 228 | { 229 | if ($this->_operation !== null) { 230 | $value = $this->_operationArgument ? $this->_operationArgument : null; 231 | if ($value instanceof TmpFile) { 232 | $value = (string) $value; 233 | } 234 | $this->addArg($this->_operation, $value, $this->_escapeOperationArgument); 235 | } 236 | } 237 | 238 | /** 239 | * Ensure that the command was not exectued yet. Throws exception 240 | * otherwise. 241 | * @throws \Exception 242 | */ 243 | protected function checkExecutionStatus() 244 | { 245 | if ($this->getExecuted()) { 246 | throw new \Exception('Operation was already executed'); 247 | } 248 | } 249 | } 250 | -------------------------------------------------------------------------------- /src/DataFields.php: -------------------------------------------------------------------------------- 1 | 12 | * @author Michael Härtl 13 | * @license http://www.opensource.org/licenses/MIT 14 | */ 15 | class DataFields extends ArrayObject 16 | { 17 | private $_string; 18 | private $_array; 19 | 20 | /** 21 | * DataFields constructor. 22 | * 23 | * @param string $input 24 | * @param int $flags 25 | * @param string $iterator_class 26 | */ 27 | public function __construct($input = null, $flags = 0, $iterator_class = "ArrayIterator") 28 | { 29 | $this->_string = $input ?: ''; 30 | $this->_array = self::parse($this->_string); 31 | 32 | return parent::__construct($this->_array, $flags, $iterator_class); 33 | } 34 | 35 | /** 36 | * @return string 37 | */ 38 | public function __toString() 39 | { 40 | return $this->_string; 41 | } 42 | 43 | /** 44 | * @return array 45 | */ 46 | public function __toArray() 47 | { 48 | return $this->_array; 49 | } 50 | 51 | /** 52 | * Parse the output of dump_data_fields into an array. 53 | * 54 | * The string to parse can either be a single block of `Xyz:value` lines 55 | * or a set of such blocks, separated by and starting with `---`. 56 | * 57 | * 58 | * Here's an example: 59 | * 60 | * ``` 61 | * --- 62 | * FieldType: Text 63 | * FieldName: Text1 64 | * FieldFlags: 0 65 | * FieldValue: University of Missouri : Ray-Holland 66 | * extended line value 67 | * FieldValueDefault: University of Missouri : Ray-Holland 68 | * extended line2 value 69 | * FieldJustification: Left 70 | * FieldMaxLength: 99 71 | * --- 72 | * FieldType: Text 73 | * FieldName: Text2 74 | * ... 75 | * ... 76 | * ``` 77 | * 78 | * @param $input the string to parse 79 | * @return array the parsed result 80 | */ 81 | public static function parse($input) 82 | { 83 | if (strncmp('---', $input, 3) === 0) { 84 | // Split blocks only if '---' is followed by 'FieldType' 85 | $blocks = preg_split( 86 | '/^---(\r\n|\n|\r)(?=FieldType:)/m', 87 | substr($input, 3) 88 | ); 89 | return array_map('\mikehaertl\pdftk\DataFields::parseBlock', $blocks); 90 | } else { 91 | return self::parseBlock($input); 92 | } 93 | } 94 | 95 | /** 96 | * Parses a block of this form: 97 | * 98 | * ``` 99 | * Name1: Value1 100 | * Name2: Value2 101 | * Name3: Value3 102 | * ... 103 | * ``` 104 | * 105 | * @param string $block the block to parse 106 | * @return array the parsed block values indexed by respective names 107 | */ 108 | public static function parseBlock($block) 109 | { 110 | $data = array(); 111 | $lines = preg_split("/(\r\n|\n|\r)/", trim($block)); 112 | $continueKey = null; 113 | foreach ($lines as $n => $line) { 114 | if ($continueKey !== null) { 115 | $data[$continueKey] .= "\n" . $line; 116 | if (!self::lineContinues($lines, $n, $continueKey)) { 117 | $continueKey = null; 118 | } 119 | } elseif (preg_match('/([^:]*): ?(.*)/', $line, $match)) { 120 | $key = $match[1]; 121 | $value = $match[2]; 122 | // Convert multiple keys like 'FieldStateOption' or 'FieldValue' 123 | // from Choice fields to array 124 | if (isset($data[$key])) { 125 | $data[$key] = (array) $data[$key]; 126 | $data[$key][] = $value; 127 | } else { 128 | $data[$key] = $value; 129 | } 130 | if (self::lineContinues($lines, $n, $key)) { 131 | $continueKey = $key; 132 | } 133 | } 134 | } 135 | return $data; 136 | } 137 | 138 | /** 139 | * Checks whether the value for the given line number continues on the next 140 | * line, i.e. is a multiline string. 141 | * 142 | * This can be the case for 'FieldValue' and 'FieldValueDefault' keys. To 143 | * find the end of the string we don't simply test for /^Field/, as this 144 | * would also match multiline strings where a line starts with 'Field'. 145 | * 146 | * Instead we assume that the string is always followed by one of these 147 | * keys: 148 | * 149 | * - 'FieldValue:' 150 | * - 'FieldValueDefault:' 151 | * - 'FieldJustification:' 152 | * 153 | * @param array $lines all lines of the block 154 | * @param int $n the 0-based index of the current line 155 | * @param string the key for the value. Only 'FieldValue' and 156 | * 'FieldValueDefault' can span multiple lines 157 | * @return bool whether the value continues in line n + 1 158 | */ 159 | protected static function lineContinues($lines, $n, $key) 160 | { 161 | return 162 | in_array($key, array('FieldValue', 'FieldValueDefault')) && 163 | array_key_exists($n + 1, $lines) && 164 | !preg_match('/^Field(Value|ValueDefault|Justification):/', $lines[$n + 1]); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/FdfFile.php: -------------------------------------------------------------------------------- 1 | 13 | * @license http://www.opensource.org/licenses/MIT 14 | */ 15 | class FdfFile extends File 16 | { 17 | // FDF file header 18 | const FDF_HEADER = <<> >> 26 | endobj 27 | trailer 28 | <> 29 | %%EOF 30 | FDF; 31 | 32 | /** 33 | * Constructor 34 | * 35 | * @param array $data the form data as name => value 36 | * @param string|null $suffix the optional suffix for the tmp file 37 | * @param string|null $suffix the optional prefix for the tmp file. If null 38 | * 'php_tmpfile_' is used. 39 | * @param string|null $directory directory where the file should be 40 | * created. Autodetected if not provided. 41 | * @param string|null $encoding of the data. Default is 'UTF-8'. 42 | */ 43 | public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8') 44 | { 45 | if ($directory === null) { 46 | $directory = self::getTempDir(); 47 | } 48 | $suffix = '.fdf'; 49 | $prefix = 'php_pdftk_fdf_'; 50 | 51 | $this->_fileName = tempnam($directory, $prefix); 52 | $newName = $this->_fileName . $suffix; 53 | rename($this->_fileName, $newName); 54 | $this->_fileName = $newName; 55 | 56 | if (!function_exists('mb_convert_encoding')) { 57 | throw new \Exception('MB extension required.'); 58 | } 59 | 60 | $fields = ''; 61 | foreach ($data as $key => $value) { 62 | // Create UTF-16BE string encode as ASCII hex 63 | // See http://blog.tremily.us/posts/PDF_forms/ 64 | $utf16Value = mb_convert_encoding($value, 'UTF-16BE', $encoding); 65 | 66 | /* Also create UTF-16BE encoded key, this allows field names containing 67 | * german umlauts and most likely many other "special" characters. 68 | * See issue #17 (https://github.com/mikehaertl/php-pdftk/issues/17) 69 | */ 70 | $utf16Key = mb_convert_encoding($key, 'UTF-16BE', $encoding); 71 | 72 | // Escape parenthesis 73 | $utf16Value = strtr($utf16Value, array('(' => '\\(', ')' => '\\)')); 74 | $fields .= "<>\n"; 75 | } 76 | 77 | // Use fwrite, since file_put_contents() messes around with character encoding 78 | $fp = fopen($this->_fileName, 'w'); 79 | fwrite($fp, self::FDF_HEADER); 80 | fwrite($fp, $fields); 81 | fwrite($fp, self::FDF_FOOTER); 82 | fclose($fp); 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/InfoFields.php: -------------------------------------------------------------------------------- 1 | 11 | * @license http://www.opensource.org/licenses/MIT 12 | */ 13 | class InfoFields extends ArrayObject 14 | { 15 | private $_string; 16 | 17 | private $_array; 18 | 19 | /** 20 | * InfoFields constructor. 21 | * 22 | * @param string $input 23 | * @param int $flags 24 | * @param string $iterator_class 25 | */ 26 | public function __construct($input = null, $flags = 0, $iterator_class = "ArrayIterator") 27 | { 28 | $this->_string = $input ?: ''; 29 | $this->_array = $this->parseData($this->_string); 30 | 31 | return parent::__construct($this->_array, $flags, $iterator_class); 32 | } 33 | 34 | /** 35 | * @return string 36 | */ 37 | public function __toString() 38 | { 39 | return $this->_string; 40 | } 41 | 42 | /** 43 | * @return array 44 | */ 45 | public function __toArray() 46 | { 47 | return $this->_array; 48 | } 49 | 50 | /** 51 | * Parse the output of dump_data into something usable. 52 | * 53 | * The expected string looks similar to this: 54 | * 55 | * InfoBegin 56 | * InfoKey: Creator 57 | * InfoValue: Adobe Acrobat Pro DC 15.0 58 | * InfoBegin 59 | * InfoKey: Producer 60 | * InfoValue: XYZ 61 | * PdfID0: 1fdce9ed1153ab4c973334b512a67997 62 | * PdfID1: c7acc878cda02ad7bb401fa8080a8929 63 | * NumberOfPages: 11 64 | * BookmarkBegin 65 | * BookmarkTitle: First bookmark 66 | * BookmarkLevel: 1 67 | * BookmarkPageNumber: 1 68 | * BookmarkBegin 69 | * BookmarkTitle: Second bookmark 70 | * BookmarkLevel: 1 71 | * BookmarkPageNumber: 2 72 | * 73 | * @param $dataString 74 | * @return array 75 | */ 76 | private function parseData($dataString) 77 | { 78 | $output = array(); 79 | foreach (explode(PHP_EOL, $dataString) as $line) { 80 | $trimmedLine = trim($line); 81 | // Parse blocks of the form: 82 | // AbcBegin 83 | // AbcData1: Value1 84 | // AbcData2: Value2 85 | // AbcBegin 86 | // AbcData1: Value3 87 | // AbcData2: Value4 88 | // ... 89 | if (preg_match('/^(\w+)Begin$/', $trimmedLine, $matches)) { 90 | // Previous group ended - if any - so add it to output 91 | if (!empty($group) && !empty($groupData)) { 92 | $output[$group][] = $groupData; 93 | } 94 | // Now start next group 95 | $group = $matches[1]; // Info, PageMedia, ... 96 | if (!isset($output[$group])) { 97 | $output[$group] = array(); 98 | } 99 | $groupData = array(); 100 | continue; 101 | } 102 | if (!empty($group)) { 103 | // Check for AbcData1: Value1 104 | if (preg_match("/^$group(\w+): ?(.*)$/", $trimmedLine, $matches)) { 105 | $groupData[$matches[1]] = $matches[2]; 106 | continue; 107 | } else { 108 | // Something else, so group ended 109 | if (!empty($groupData)) { 110 | $output[$group][] = $groupData; 111 | $groupData = array(); 112 | } 113 | $group = null; 114 | } 115 | } 116 | if (preg_match('/([^:]*): ?(.*)/', $trimmedLine, $matches)) { 117 | $output[$matches[1]] = $matches[2]; 118 | } 119 | } 120 | // There could be a final group left if it was not followed by another 121 | // line in the loop 122 | if (!empty($group) && !empty($groupData)) { 123 | $output[$group][] = $groupData; 124 | } 125 | 126 | // Info group is a list of ['Key' => 'x', 'Value' => 'y'], so 127 | // convert it to ['x' => 'y', ...] 128 | if (isset($output['Info'])) { 129 | $data = array(); 130 | foreach ($output['Info'] as $infoGroup) { 131 | if (isset($infoGroup['Key'], $infoGroup['Value'])) { 132 | $data[$infoGroup['Key']] = $infoGroup['Value']; 133 | } 134 | } 135 | $output['Info'] = $data; 136 | } 137 | return $output; 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/InfoFile.php: -------------------------------------------------------------------------------- 1 | 14 | * @license http://www.opensource.org/licenses/MIT 15 | */ 16 | class InfoFile extends File 17 | { 18 | /** 19 | * @var string[] list of valid keys for the document information directory of 20 | * the PDF. These will be converted into `InfoBegin... InfoKey... InvoValue` 21 | * blocks on the output. 22 | * 23 | * See section 14.3.3 in https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf 24 | */ 25 | public static $documentInfoFields = array( 26 | 'Title', 27 | 'Author', 28 | 'Subject', 29 | 'Keywords', 30 | 'Creator', 31 | 'Producer', 32 | 'CreationDate', 33 | 'ModDate', 34 | 'Trapped', 35 | ); 36 | 37 | /** 38 | * Constructor 39 | * 40 | * @param array|InfoFields $data the data in this format: 41 | * ``` 42 | * [ 43 | * 'Info' => [ 44 | * 'Title' => '...', 45 | * 'Author' => '...', 46 | * 'Subject' => '...', 47 | * 'Keywords' => '...', 48 | * 'Creator' => '...', 49 | * 'Producer' => '...', 50 | * 'CreationDate' => '...', 51 | * 'ModDate' => '...', 52 | * 'Trapped' => '...', 53 | * ], 54 | * 'Bookmark' => [ 55 | * [ 56 | * 'Title' => '...', 57 | * 'Level' => ..., 58 | * 'PageNumber' => ..., 59 | * ], 60 | * ], 61 | * 'PageMedia' => [ ... ], 62 | * 'PageLabel' => [ ... ], 63 | * // ... 64 | * ] 65 | * ``` 66 | * This is the same format as the InfoFields object that is returned 67 | * by `getData()` if you cast it to an array. You can also pass such an 68 | * (optionally modified) object as input. Some fields like 'NumberOfPages' 69 | * or 'PdfID0' are ignored as those are not part of the PDF's metadata. 70 | * All array elements are optional. 71 | * @param string|null $suffix the optional suffix for the tmp file 72 | * @param string|null $suffix the optional prefix for the tmp file. If null 73 | * 'php_tmpfile_' is used. 74 | * @param string|null $directory directory where the file should be 75 | * created. Autodetected if not provided. 76 | * @param string|null $encoding of the data. Default is 'UTF-8'. If the 77 | * data has another encoding it will be converted to UTF-8. This requires 78 | * the mbstring extension to be installed. 79 | * @throws Exception on invalid data format or if mbstring extension is 80 | * missing and data must be converted 81 | */ 82 | public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8') 83 | { 84 | if ($suffix === null) { 85 | $suffix = '.txt'; 86 | } 87 | if ($prefix === null) { 88 | $prefix = 'php_pdftk_info_'; 89 | } 90 | if ($directory === null) { 91 | $directory = self::getTempDir(); 92 | } 93 | 94 | $tempName = tempnam($directory, $prefix); 95 | $newName = $tempName . $suffix; 96 | rename($tempName, $newName); 97 | $this->_fileName = $newName; 98 | 99 | if ($encoding !== 'UTF-8' && !function_exists('mb_convert_encoding')) { 100 | throw new Exception('mbstring extension required.'); 101 | } 102 | 103 | $fields = ''; 104 | $normalizedData = self::normalize($data); 105 | 106 | foreach ($normalizedData as $block => $items) { 107 | $fields .= self::renderBlock($block, $items, $encoding); 108 | } 109 | 110 | // Use fwrite, since file_put_contents() messes around with character encoding 111 | $fp = fopen($this->_fileName, 'w'); 112 | fwrite($fp, $fields); 113 | fclose($fp); 114 | } 115 | 116 | /** 117 | * Normalize the input data 118 | * 119 | * This also converts data from the legacy format (<0.13.0) to the new 120 | * input format described in the constructor. 121 | * 122 | * @param array $data the data to normalize 123 | * @return array a normalized array in the format described in the constructor 124 | */ 125 | private static function normalize($data) 126 | { 127 | $normalized = array(); 128 | foreach ($data as $key => $value) { 129 | if (in_array($key, self::$documentInfoFields)) { 130 | $normalized['Info'][$key] = $value; 131 | } elseif (is_array($value)) { 132 | if (!isset($normalized[$key])) { 133 | $normalized[$key] = array(); 134 | } 135 | $normalized[$key] = array_merge($normalized[$key], $value); 136 | } 137 | } 138 | return $normalized; 139 | } 140 | 141 | /** 142 | * Render a set of block fields 143 | * 144 | * @param string $block like 'Info', 'Bookmark', etc. 145 | * @param array $items the field items to render 146 | * @param string $encoding the encoding of the item data 147 | * @return string the rendered fields 148 | */ 149 | private static function renderBlock($block, $items, $encoding) 150 | { 151 | $fields = ''; 152 | foreach ($items as $key => $value) { 153 | if ($block === 'Info') { 154 | $fields .= self::renderField($block, $key, $value, $encoding, true); 155 | } else { 156 | $fields .= "{$block}Begin\n"; 157 | foreach ($value as $subKey => $subValue) { 158 | $fields .= self::renderField($block, $subKey, $subValue, $encoding, false); 159 | } 160 | } 161 | } 162 | return $fields; 163 | } 164 | 165 | /** 166 | * Render a field in a given input block 167 | * 168 | * @param string $prefix the prefix to use for the field 169 | * @param string $key the field key 170 | * @param string $value the field value 171 | * @param string $encoding the endoding of key and value 172 | * @param bool $isInfo whether it's an 'Info' field 173 | * @return string the rendered field 174 | */ 175 | private static function renderField($prefix, $key, $value, $encoding, $isInfo) 176 | { 177 | if ($encoding !== 'UTF-8') { 178 | $value = mb_convert_encoding($value, 'UTF-8', $encoding); 179 | $key = mb_convert_encoding($key, 'UTF-8', $encoding); 180 | $value = defined('ENT_XML1') ? htmlspecialchars($key, ENT_XML1, 'UTF-8') : htmlspecialchars($key); 181 | $key = defined('ENT_XML1') ? htmlspecialchars($value, ENT_XML1, 'UTF-8') : htmlspecialchars($value); 182 | } 183 | if ($isInfo) { 184 | return "InfoBegin\nInfoKey: $key\nInfoValue: $value\n"; 185 | } else { 186 | return "{$prefix}{$key}: $value\n"; 187 | } 188 | 189 | } 190 | } 191 | -------------------------------------------------------------------------------- /src/Pdf.php: -------------------------------------------------------------------------------- 1 | 16 | * @license http://www.opensource.org/licenses/MIT 17 | */ 18 | class Pdf 19 | { 20 | // The prefix for temporary files 21 | const TMP_PREFIX = 'tmp_php_pdftk_'; 22 | 23 | /** 24 | * @var bool whether to ignore any errors if some non-empty output file was 25 | * still created. Default is false. 26 | */ 27 | public $ignoreWarnings = false; 28 | 29 | /** 30 | * @var null|string an optional directory where temporary files should be 31 | * created. If left empty the directory is autodetected. 32 | */ 33 | public $tempDir; 34 | 35 | /** 36 | * @var File the temporary output file 37 | */ 38 | protected $_tmpFile; 39 | 40 | /** 41 | * @var string the content type of the tmp output 42 | */ 43 | protected $_tmpOutputContentType = 'application/pdf'; 44 | 45 | /** 46 | * @var Command the command instance that executes pdftk 47 | */ 48 | protected $_command; 49 | 50 | /** 51 | * @var int a counter for autogenerated handles 52 | */ 53 | protected $_handle = 0; 54 | 55 | /** 56 | * @var string the error message 57 | */ 58 | protected $_error = ''; 59 | 60 | /** 61 | * @var string|null the output filename. If null (default) a tmp file is 62 | * used as output. If false, no output option is added at all. 63 | */ 64 | protected $_output; 65 | 66 | /** 67 | * @var string the PDF data as returned from getData() 68 | */ 69 | protected $_data; 70 | protected $_data_utf8; 71 | 72 | /** 73 | * @var DataFields the PDF form field data as returned from getDataFields() 74 | */ 75 | protected $_dataFields; 76 | protected $_dataFields_utf8; 77 | 78 | /** 79 | * @var Pdf[]|null if the input was an instance, we keep a reference here, 80 | * so that it won't get unlinked before this object gets destroyed 81 | */ 82 | protected $_pdfs; 83 | 84 | /** 85 | * @param string|Pdf|array $pdf a pdf filename or Pdf instance or an array 86 | * of filenames/instances indexed by a handle. The array values can also 87 | * be arrays of the form array($filename, $password) if some files are 88 | * password protected. 89 | * @param array $options Options to pass to set on the Command instance, 90 | * e.g. the pdftk binary path 91 | */ 92 | public function __construct($pdf = null, $options = array()) 93 | { 94 | $command = $this->getCommand(); 95 | if ($options !== array()) { 96 | $command->setOptions($options); 97 | } 98 | if (is_string($pdf) || $pdf instanceof Pdf) { 99 | $this->addFile($pdf); 100 | } elseif (is_array($pdf)) { 101 | foreach ($pdf as $handle => $file) { 102 | if (is_array($file)) { 103 | $this->addFile($file[0], $handle, $file[1]); 104 | } else { 105 | $this->addFile($file, $handle); 106 | } 107 | } 108 | } 109 | } 110 | 111 | /** 112 | * @param string|Pdf $name the PDF filename or Pdf instance to add for 113 | * processing 114 | * @param string|null $handle one or more uppercase letters A..Z to 115 | * reference this file later. If no handle is provided, an internal handle 116 | * is autocreated, consuming the range Z..A 117 | * @param string|null $password the owner (or user) password if any 118 | * @return Pdf the pdf instance for method chaining 119 | */ 120 | public function addFile($name, $handle = null, $password = null) 121 | { 122 | if ($handle === null || is_numeric($handle)) { 123 | $handle = $this->nextHandle(); 124 | } 125 | if ($name instanceof Pdf) { 126 | // Keep a reference to the object to prevent unlinking 127 | $this->_pdfs[] = $name; 128 | if (!$name->getCommand()->getExecuted()) { 129 | // @todo: Catch errors! 130 | $name->execute(); 131 | } 132 | $name = (string) $name->getTmpFile(); 133 | } 134 | $this->getCommand()->addFile($name, $handle, $password); 135 | return $this; 136 | } 137 | 138 | /** 139 | * Assemble (catenate) pages from the input files. 140 | * 141 | * Values for rotation are (in degrees): north: 0, east: 90, south: 180, 142 | * west: 270, left: -90, right: +90, down: +180. left, right and down make 143 | * relative adjustments to a page's rotation. Note: Older pdftk versions 144 | * use N, E, S, W, L, R, and D instead. 145 | * 146 | * Example: 147 | * 148 | * $pdf = new Pdf; 149 | * $pdf->addFile('file1.pdf', 'A') 150 | * ->addFile('file2.pdf', 'B') 151 | * ->cat(array(1,3),'B')) // pages 1 and 3 of file B 152 | * ->cat(1, 5, 'A', 'odd') // pages 1, 3, 5 of file A 153 | * ->cat('end', 5, 'B') // pages 5 to end of file B in reverse order 154 | * ->cat(null, null, 'B', 'east') // All pages from file B rotated by 90 degree 155 | * ->saveAs('out.pdf'); 156 | * or 157 | * $files = ['file1.pdf', 'file2.pdf', 'file3.pdf']; 158 | * $pdf = new Pdf($files); 159 | * $pdf->cat() // all files, all pages 160 | * ->saveAs('out.pdf'); 161 | * 162 | * @param int|string|array|null $start the start page number or an array of page 163 | * numbers. If an array, the other arguments will be ignored. $start can 164 | * also be bigger than $end for pages in reverse order. If $start is null all 165 | * pages of all files will be added. 166 | * @param int|string|null $end the end page number or null for single page 167 | * (or list if $start is an array) 168 | * @param string|null $handle the handle of the file to use. Can be null if 169 | * only a single file was added. 170 | * @param string|null $qualifier the page number qualifier, either 'even' 171 | * or 'odd' or null for none 172 | * @param string $rotation the rotation to apply to the pages. 173 | * @return Pdf the pdf instance for method chaining 174 | */ 175 | public function cat($start = null, $end = null, $handle = null, $qualifier = null, $rotation = null) 176 | { 177 | $this->getCommand() 178 | ->setOperation('cat') 179 | ->addPageRange($start, $end, $handle, $qualifier, $rotation); 180 | return $this; 181 | } 182 | 183 | /** 184 | * Shuffle pages from the input files. 185 | * 186 | * This works the same as cat(), but each call to this method creates a 187 | * "stream" of pages. The outfile will be assembled by adding one page from 188 | * each stream at a time. 189 | * 190 | * Example: 191 | * 192 | * $pdf = new Pdf; 193 | * $pdf1 = $pdf->addFile('file1.pdf'); 194 | * $pdf->shuffle($pdf1, array(1,3,2)) 195 | * ->shuffle($pdf1, array(4,5,9) 196 | * ->saveAs('out.pdf'); 197 | * 198 | * This will give the page order 1, 4, 3, 5, 2, 9 in the out.pdf 199 | * 200 | * @param string $handle the handle of the input file to use 201 | * @param int|array $start the start page number or an array of page 202 | * numbers. 203 | * @param int|null $end the end page number or null for single page (or 204 | * list if $start is an array) 205 | * @param string|null $qualifier the page number qualifier, either 'even' 206 | * or 'odd' or null for none 207 | * @param string $rotation the rotation to apply to the pages. See cat() 208 | * for more details. 209 | * @return Pdf the pdf instance for method chaining 210 | */ 211 | public function shuffle($start, $end = null, $handle = null, $qualifier = null, $rotation = null) 212 | { 213 | $this->getCommand() 214 | ->setOperation('shuffle') 215 | ->addPageRange($start, $end, $handle, $qualifier, $rotation); 216 | return $this; 217 | } 218 | 219 | /** 220 | * Split the PDF document into pages 221 | * 222 | * @param string|null $filepattern the output name in sprintf format or 223 | * null for default 'pg_%04d.pdf' 224 | * @return bool whether the burst operation was successful 225 | */ 226 | public function burst($filepattern = null) 227 | { 228 | $this->constrainSingleFile(); 229 | $this->getCommand()->setOperation('burst'); 230 | $this->_output = $filepattern === null ? 'pg_%04d.pdf' : $filepattern; 231 | return $this->execute(); 232 | } 233 | 234 | /** 235 | * Attach files to the PDF 236 | * 237 | * @param array $files the list of full paths to the files to attach 238 | * @param string $toPage the page to add the attachment to. If omitted the 239 | * files are attached at the document level. 240 | * @return bool whether the operation was successful 241 | */ 242 | public function attachFiles($files, $toPage = null) 243 | { 244 | $this->constrainSingleFile(); 245 | if ($toPage !== null) { 246 | $files[] = 'to_page'; 247 | $files[] = $toPage; 248 | } 249 | $this->getCommand() 250 | ->setOperation('attach_files') 251 | ->setOperationArgument($files, true); 252 | return $this; 253 | } 254 | 255 | /** 256 | * Copy all attachments from the PDF to the given directory 257 | * 258 | * @param string|null $dir the output directory 259 | * @return bool whether the operation was successful 260 | */ 261 | public function unpackFiles($dir = null) 262 | { 263 | $this->constrainSingleFile(); 264 | $this->getCommand()->setOperation('unpack_files'); 265 | $this->_output = $dir; 266 | return $this->execute(); 267 | } 268 | 269 | /** 270 | * Generate the FDF file for a single PDF file. 271 | * 272 | * @param string $name name of the FDF file 273 | * @return bool whether the pdf is generated successful 274 | */ 275 | public function generateFdfFile($name) 276 | { 277 | $this->constrainSingleFile(); 278 | $this->getCommand()->setOperation('generate_fdf'); 279 | $this->_output = $name; 280 | return $this->execute(); 281 | } 282 | 283 | /** 284 | * Fill a PDF form 285 | * 286 | * @param string|array $data either a XFDF/FDF filename or an array with 287 | * form field data (name => value) 288 | * @param string $encoding the encoding of the data. Default is 'UTF-8'. 289 | * @param bool $dropXfa whether to drop XFA forms (see dropXfa()). Default 290 | * is true. 291 | * @param string $format the file format to use for form filling when 292 | * passing an array in `$data`. This can be `xfdf` or `fdf`. `xfdf` should 293 | * give best results so you should not have to change the default. 294 | * @return Pdf the pdf instance for method chaining 295 | */ 296 | public function fillForm($data, $encoding = 'UTF-8', $dropXfa = true, $format = 'xfdf') 297 | { 298 | $this->constrainSingleFile(); 299 | if (is_array($data)) { 300 | $className = '\mikehaertl\pdftk\\' . ($format === 'xfdf' ? 'XfdfFile' : 'FdfFile'); 301 | $data = new $className($data, null, null, $this->tempDir, $encoding); 302 | } 303 | $this->getCommand() 304 | ->setOperation('fill_form') 305 | ->setOperationArgument($data, true); 306 | 307 | if ($dropXfa) { 308 | $this->dropXfa(); 309 | } 310 | return $this; 311 | } 312 | 313 | /** 314 | * Update meta data of PDF 315 | * 316 | * @param string|array $data either a InfoFile filename or an array with 317 | * form field data (name => value) 318 | * @param string the encoding of the data. Default is 'UTF-8'. 319 | * @return Pdf the pdf instance for method chaining 320 | */ 321 | public function updateInfo($data, $encoding = 'UTF-8') 322 | { 323 | $this->constrainSingleFile(); 324 | if (is_array($data) || $data instanceof InfoFields) { 325 | $data = new InfoFile($data, null, null, $this->tempDir, $encoding); 326 | } 327 | $this->getCommand() 328 | ->setOperation($encoding == 'UTF-8' ? 'update_info_utf8' : 'update_info') 329 | ->setOperationArgument($data, true); 330 | 331 | return $this; 332 | } 333 | 334 | /** 335 | * Apply a PDF as watermark to the background of a single PDF file. 336 | * 337 | * The PDF file must have a transparent background for the watermark to be 338 | * visible. 339 | * 340 | * @param string $file name of the background PDF file. Only the first page 341 | * is used. 342 | * @return Pdf the pdf instance for method chaining 343 | */ 344 | public function background($file) 345 | { 346 | $this->constrainSingleFile(); 347 | $this->getCommand() 348 | ->setOperation('background') 349 | ->setOperationArgument($file, true); 350 | return $this; 351 | } 352 | 353 | /** 354 | * Apply multiple PDF pages as watermark to the corresponding pages of a 355 | * single PDF file. 356 | * 357 | * If $file has fewer pages than the PDF file then the last page is 358 | * repeated as background. 359 | * 360 | * @param string $file name of the background PDF file. 361 | * @return Pdf the pdf instance for method chaining 362 | */ 363 | public function multiBackground($file) 364 | { 365 | $this->getCommand() 366 | ->setOperation('multibackground') 367 | ->setOperationArgument($file, true); 368 | return $this; 369 | } 370 | 371 | /** 372 | * Add $file as overlay to a single PDF file. 373 | * 374 | * The $file should have a transparent background. 375 | * 376 | * @param string $file name of the PDF file to add as overlay. Only the 377 | * first page is used. 378 | * @return Pdf the pdf instance for method chaining 379 | */ 380 | public function stamp($file) 381 | { 382 | $this->constrainSingleFile(); 383 | $this->getCommand() 384 | ->setOperation('stamp') 385 | ->setOperationArgument($file, true); 386 | return $this; 387 | } 388 | 389 | /** 390 | * Add multiple pages from $file as overlay to the corresponding pages of a 391 | * single PDF file. 392 | * 393 | * If $file has fewer pages than the PDF file then the last page is 394 | * repeated as overlay. 395 | * 396 | * @param string $file name of the PDF file to add as overlay 397 | * @return Pdf the pdf instance for method chaining 398 | */ 399 | public function multiStamp($file) 400 | { 401 | $this->getCommand() 402 | ->setOperation('multistamp') 403 | ->setOperationArgument($file, true); 404 | return $this; 405 | } 406 | 407 | /** 408 | * @param bool $utf8 whether to dump the data UTF-8 encoded. Default is 409 | * true. 410 | * @return InfoFields|bool meta data about the PDF or false on failure 411 | */ 412 | public function getData($utf8 = true) 413 | { 414 | $property = $utf8 ? '_data_utf8' : '_data'; 415 | if ($this->$property === null) { 416 | $command = $this->getCommand(); 417 | $command->setOperation($utf8 ? 'dump_data_utf8' : 'dump_data'); 418 | if (!$command->execute()) { 419 | $this->_error = $command->getError(); 420 | return false; 421 | } else { 422 | $this->$property = new InfoFields(trim($command->getOutput())); 423 | } 424 | } 425 | return $this->$property; 426 | } 427 | 428 | /** 429 | * @param bool $utf8 whether to dump the data UTF-8 encoded. Default is 430 | * true. 431 | * @return DataFields|bool data about the PDF form fields or false on 432 | * failure 433 | */ 434 | public function getDataFields($utf8 = true) 435 | { 436 | $property = $utf8 ? '_dataFields_utf8' : '_dataFields'; 437 | if ($this->$property === null) { 438 | $command = $this->getCommand(); 439 | $command->setOperation($utf8 ? 'dump_data_fields_utf8' : 'dump_data_fields'); 440 | if (!$command->execute()) { 441 | $this->_error = $command->getError(); 442 | return false; 443 | } else { 444 | $this->$property = new DataFields(trim($command->getOutput())); 445 | } 446 | } 447 | return $this->$property; 448 | } 449 | 450 | /** 451 | * Set PDF permissions 452 | * 453 | * 454 | * @param string|null $permissions list of space separated permissions or 455 | * null for none. The available permissions are Printing, DegradedPrinting, 456 | * ModifyContents, Assembly, CopyContents, ScreenReaders, 457 | * ModifyAnnotations, FillIn, AllFeatures. 458 | * @return Pdf the pdf instance for method chaining 459 | */ 460 | public function allow($permissions = null) 461 | { 462 | $this->getCommand() 463 | ->addOption('allow', $permissions, false); 464 | return $this; 465 | } 466 | 467 | /** 468 | * Flatten the PDF form fields values into a single PDF file. 469 | * 470 | * @return Pdf the pdf instance for method chaining 471 | */ 472 | public function flatten() 473 | { 474 | $this->getCommand() 475 | ->addOption('flatten'); 476 | return $this; 477 | } 478 | 479 | /** 480 | * Restore/remove compression 481 | * 482 | * @param bool $compress whether to restore (default) or remove the 483 | * compression 484 | * @return Pdf the pdf instance for method chaining 485 | */ 486 | public function compress($compress = true) 487 | { 488 | $this->getCommand() 489 | ->addOption($compress ? 'compress' : 'uncompress'); 490 | return $this; 491 | } 492 | 493 | /** 494 | * When combining multiple PDFs, use either the first or last ID in the 495 | * output. If not called, a new ID is created. 496 | * 497 | * @param string $id, either 'first' (default) or 'last' 498 | * @return Pdf the pdf instance for method chaining 499 | */ 500 | public function keepId($id = 'first') 501 | { 502 | $this->getCommand() 503 | ->addOption($id === 'first' ? 'keep_first_id' : 'keep_final_id'); 504 | return $this; 505 | } 506 | 507 | /** 508 | * Set need_appearances flag in PDF 509 | * 510 | * This flag makes sure, that a PDF reader takes care of rendering form 511 | * field content, even if it contains non ASCII characters. You should 512 | * always use this option if you fill in forms e.g. with Unicode 513 | * characters. You can't combine this option with flatten() though! 514 | * 515 | * @return Pdf the pdf instance for method chaining 516 | */ 517 | public function needAppearances() 518 | { 519 | $this->getCommand() 520 | ->addOption('need_appearances'); 521 | return $this; 522 | } 523 | 524 | /** 525 | * Drop XFA data from forms created with newer Acrobat. 526 | * 527 | * Newer PDF forms contain both, the newer XFA and the older AcroForm form 528 | * fields. PDF readers can use both, but will prefer XFA if present. Since 529 | * pdftk can only fill in AcroForm data you should always add this option 530 | * when filling in forms with pdftk. 531 | * 532 | * @return Pdf the pdf instance for method chaining 533 | */ 534 | public function dropXfa() 535 | { 536 | $this->getCommand() 537 | ->addOption('drop_xfa'); 538 | return $this; 539 | } 540 | 541 | /** 542 | * Drop XMP meta data 543 | * 544 | * Newer PDFs can contain both, new style XMP data and old style info 545 | * directory. PDF readers can use both, but will prefer XMP if present. 546 | * Since pdftk can only update the info directory you should always add 547 | * this option when updating PDF info. 548 | * 549 | * @return Pdf the pdf instance for method chaining 550 | */ 551 | public function dropXmp() 552 | { 553 | $this->getCommand() 554 | ->addOption('drop_xmp'); 555 | return $this; 556 | } 557 | 558 | /** 559 | * @param string $password the owner password to set on the output PDF 560 | * @return Pdf the pdf instance for method chaining 561 | */ 562 | public function setPassword($password) 563 | { 564 | $this->getCommand() 565 | ->addOption('owner_pw', $password, true); 566 | return $this; 567 | } 568 | 569 | /** 570 | * @param string $password the user password to set on the output PDF 571 | * @return Pdf the pdf instance for method chaining 572 | */ 573 | public function setUserPassword($password) 574 | { 575 | $this->getCommand() 576 | ->addOption('user_pw', $password, true); 577 | return $this; 578 | } 579 | 580 | /** 581 | * @param int $strength the password encryption strength. Default is 128 582 | * @return Pdf the pdf instance for method chaining 583 | */ 584 | public function passwordEncryption($strength = 128) 585 | { 586 | $this->getCommand() 587 | ->addOption($strength == 128 ? 'encrypt_128bit' : 'encrypt_40bit'); 588 | return $this; 589 | } 590 | 591 | /** 592 | * Replace embedded font with a local font when filling a form. 593 | * 594 | * This option is only available for pdftk-java >= 3.3.0. It is useful when 595 | * filling a form with non-ASCII text that is not supported by the fonts 596 | * included in the input PDF. 597 | * 598 | * @param string $fontName the path to the font or the name of a font family. 599 | * @return Pdf the pdf instance for method chaining 600 | */ 601 | public function replacementFont($path) 602 | { 603 | $this->getCommand() 604 | ->addOption('replacement_font', $path); 605 | return $this; 606 | } 607 | 608 | /** 609 | * Execute the operation and save the output file 610 | * 611 | * @param string $name of output file 612 | * @return bool whether the PDF could be processed and saved 613 | */ 614 | public function saveAs($name) 615 | { 616 | if (!$this->getCommand()->getExecuted() && !$this->execute()) { 617 | return false; 618 | } 619 | $tmpFile = (string) $this->getTmpFile(); 620 | if (!copy($tmpFile, $name)) { 621 | $this->_error = "Could not copy PDF from tmp location '$tmpFile' to '$name'"; 622 | return false; 623 | } 624 | return true; 625 | } 626 | 627 | /** 628 | * Send PDF to client, either inline or as download (triggers PDF creation) 629 | * 630 | * @param string|null $filename the filename to send. If empty, the PDF is 631 | * streamed inline. 632 | * @param bool $inline whether to force inline display of the PDF, even if 633 | * filename is present. 634 | * @param array $headers a list of additional HTTP headers to send in the 635 | * response as an array. The array keys are the header names like 636 | * 'Cache-Control' and the array values the header value strings to send. 637 | * Each array value can also be another array of strings if the same header 638 | * should be sent multiple times. This can also be used to override 639 | * automatically created headers like 'Expires' or 'Content-Length'. To suppress 640 | * automatically created headers, `false` can also be used as header value. 641 | * @return bool whether PDF was created successfully 642 | */ 643 | public function send($filename = null, $inline = false, $headers = array()) 644 | { 645 | if (!$this->getCommand()->getExecuted() && !$this->execute()) { 646 | return false; 647 | } 648 | $this->getTmpFile()->send($filename, $this->_tmpOutputContentType, $inline, $headers); 649 | return true; 650 | } 651 | 652 | /** 653 | * Get the raw PDF contents (triggers PDF creation). 654 | * 655 | * @return string|bool the PDF content as a string or `false` if the PDF 656 | * wasn't created successfully. 657 | */ 658 | public function toString() 659 | { 660 | if (!$this->getCommand()->getExecuted() && !$this->execute()) { 661 | return false; 662 | } 663 | return file_get_contents($this->getTmpFile()->getFileName()); 664 | } 665 | 666 | /** 667 | * @return Command the command instance that executes pdftk 668 | */ 669 | public function getCommand() 670 | { 671 | if ($this->_command === null) { 672 | $this->_command = new Command; 673 | } 674 | return $this->_command; 675 | } 676 | 677 | /** 678 | * @return File the temporary output file instance 679 | */ 680 | public function getTmpFile() 681 | { 682 | if ($this->_tmpFile === null) { 683 | $this->_tmpFile = new File('', '.pdf', self::TMP_PREFIX, $this->tempDir); 684 | } 685 | return $this->_tmpFile; 686 | } 687 | 688 | /** 689 | * @return string the error message or an empty string if none 690 | */ 691 | public function getError() 692 | { 693 | return $this->_error; 694 | } 695 | 696 | /** 697 | * Execute the pdftk command and store the output file to a temporary 698 | * location or $this->_output if set. You should probably never call this 699 | * method unless you only need a temporary PDF file as result. 700 | * 701 | * @return bool whether the command was executed successfully 702 | */ 703 | public function execute() 704 | { 705 | $command = $this->getCommand(); 706 | if ($command->getExecuted()) { 707 | return false; 708 | } 709 | 710 | if ($this->_output === false) { 711 | $filename = null; 712 | } else { 713 | $filename = $this->_output ? $this->_output : (string) $this->getTmpFile(); 714 | } 715 | if (!$command->execute($filename)) { 716 | $this->_error = $command->getError(); 717 | if ($filename && !(file_exists($filename) && filesize($filename) !== 0 && $this->ignoreWarnings)) { 718 | return false; 719 | } 720 | } 721 | return true; 722 | } 723 | 724 | /** 725 | * Make sure, that only one file is present 726 | */ 727 | protected function constrainSingleFile() 728 | { 729 | if ($this->getCommand()->getFileCount() > 1) { 730 | throw new \Exception('This operation can only process single files'); 731 | } 732 | } 733 | 734 | /** 735 | * @return string the next handle in the series A, B, C, ... Z, AA, AB... 736 | */ 737 | protected function nextHandle() 738 | { 739 | // N.B. Multi-character handles are only available in pdftk 1.45+ 740 | 741 | $i = $this->_handle++; 742 | $char = 'A'; 743 | while ($i-- > 0) { 744 | $char++; 745 | } 746 | 747 | return $char; 748 | } 749 | } 750 | -------------------------------------------------------------------------------- /src/XfdfFile.php: -------------------------------------------------------------------------------- 1 | field value 17 | * 'Firstname' => 'John', 18 | * 19 | * // Hierarchical/nested fields in dot notation 20 | * 'Address.Street' => 'Some Street', 21 | * 'Address.City' => 'Any City', 22 | * 23 | * // Multi value fields 24 | * 'Pets' => ['Cat', 'Mouse'], 25 | * ] 26 | * ``` 27 | * 28 | * This will result in the following XML structure (header/footer omitted): 29 | * 30 | * ``` 31 | * 32 | * John 33 | * 34 | * 35 | * 36 | * Some Street 37 | * 38 | * 39 | * Any City 40 | * 41 | * 42 | * 43 | * Cat 44 | * Mouse 45 | * 46 | * ``` 47 | * 48 | * @author Tomas Holy 49 | * @author Michael Härtl 50 | * @license http://www.opensource.org/licenses/MIT 51 | */ 52 | class XfdfFile extends File 53 | { 54 | // XFDF file header 55 | const XFDF_HEADER = << 57 | 58 | 59 | 60 | FDF; 61 | 62 | // XFDF file footer 63 | const XFDF_FOOTER = << 65 | 66 | 67 | FDF; 68 | 69 | /** 70 | * Constructor 71 | * 72 | * 73 | * @param array $data the form data as name => value 74 | * @param string|null $suffix the optional suffix for the tmp file 75 | * @param string|null $prefix the optional prefix for the tmp file. If null 76 | * 'php_tmpfile_' is used. 77 | * @param string|null $directory directory where the file should be 78 | * created. Autodetected if not provided. 79 | * @param string|null $encoding of the data. Default is 'UTF-8'. 80 | */ 81 | public function __construct($data, $suffix = null, $prefix = null, $directory = null, $encoding = 'UTF-8') 82 | { 83 | if ($directory === null) { 84 | $directory = self::getTempDir(); 85 | } 86 | if ($suffix === null) { 87 | $suffix = '.xfdf'; 88 | } 89 | if ($prefix === null) { 90 | $prefix = 'php_pdftk_xfdf_'; 91 | } 92 | 93 | $tempfile = tempnam($directory, $prefix); 94 | $this->_fileName = $tempfile . $suffix; 95 | rename($tempfile, $this->_fileName); 96 | 97 | $fields = $this->parseData($data, $encoding); 98 | $this->writeXml($fields); 99 | } 100 | 101 | /** 102 | * Parses an array of key/value data into a nested array structure. 103 | * 104 | * The data may use keys in dot notation (#55). Values can also be arrays in 105 | * case of multi value fields (#148). To make both distinguishable in the 106 | * result array keys that represent field names are prefixed with `_`. This 107 | * also allows for numeric field names (#260). 108 | * 109 | * For example an array like this: 110 | * 111 | * ``` 112 | * [ 113 | * 'a' => 'value a', 114 | * 'b.x' => 'value b.x', 115 | * 'b.y' => 'value b.y', 116 | * 117 | * 'c.0' => 'val c.0', 118 | * 'c.1' => 'val c.1', 119 | * 120 | * 'd' => ['m1', 'm2'], 121 | * ] 122 | * ``` 123 | * 124 | * Will become: 125 | * 126 | * ``` 127 | * [ 128 | * '_a' => 'value a', 129 | * '_b' => [ 130 | * '_x' => 'value b.x', 131 | * '_y' => 'value b.y', 132 | * ], 133 | * '_c' => [ 134 | * '_0' => 'value c.0', 135 | * '_1' => 'value c.1', 136 | * ], 137 | * '_d' => [ 138 | * // notice the missing underscore in the keys 139 | * 0 => 'm1', 140 | * 1 => 'm2', 141 | * ], 142 | * ] 143 | * 144 | * 145 | * @param mixed $data the data to parse 146 | * @param string the encoding of the data 147 | * @return array the result array in UTF-8 encoding with dot keys converted 148 | * to nested arrays 149 | */ 150 | protected function parseData($data, $encoding) 151 | { 152 | $result = array(); 153 | foreach ($data as $key => $value) { 154 | if ($encoding !== 'UTF-8' && function_exists('mb_convert_encoding')) { 155 | $key = mb_convert_encoding($key, 'UTF-8', $encoding); 156 | $value = mb_convert_encoding($value, 'UTF-8', $encoding); 157 | } 158 | if (strpos($key, '.') === false) { 159 | $result['_' . $key] = $value; 160 | } else { 161 | $target = &$result; 162 | $keyParts = explode('.', $key); 163 | $lastPart = array_pop($keyParts); 164 | foreach ($keyParts as $part) { 165 | if (!isset($target['_' . $part])) { 166 | $target['_' . $part] = array(); 167 | } 168 | $target = &$target['_' . $part]; 169 | } 170 | $target['_' . $lastPart] = $value; 171 | } 172 | } 173 | return $result; 174 | } 175 | 176 | /** 177 | * Write the given fields to an XML file 178 | * 179 | * @param array $fields the fields in a nested array structure 180 | */ 181 | protected function writeXml($fields) 182 | { 183 | // Use fwrite, since file_put_contents() messes around with character encoding 184 | $fp = fopen($this->_fileName, 'w'); 185 | fwrite($fp, self::XFDF_HEADER); 186 | $this->writeFields($fp, $fields); 187 | fwrite($fp, self::XFDF_FOOTER); 188 | fclose($fp); 189 | } 190 | 191 | /** 192 | * Write the fields to the given filepointer 193 | * 194 | * @param int $fp 195 | * @param mixed[] $fields an array of field values as returned by 196 | * `parseData()`. 197 | */ 198 | protected function writeFields($fp, $fields) 199 | { 200 | foreach ($fields as $key => $value) { 201 | $key = $this->xmlEncode(substr($key,1)); 202 | fwrite($fp, "\n"); 203 | if (!is_array($value)) { 204 | $value = array($value); 205 | } 206 | if (array_key_exists(0, $value)) { 207 | // Numeric keys: single or multi-value field 208 | foreach($value as $val) { 209 | $val = $this->xmlEncode($val); 210 | fwrite($fp, "$val\n"); 211 | } 212 | } else { 213 | // String keys: nested/hierarchical fields 214 | $this->writeFields($fp, $value); 215 | } 216 | fwrite($fp, "\n"); 217 | } 218 | } 219 | 220 | /** 221 | * @param string|null $value the value to encode 222 | * @return string|null the value correctly encoded for use in a XML document 223 | */ 224 | protected function xmlEncode($value) 225 | { 226 | if ($value === null) { 227 | return null; 228 | } 229 | return defined('ENT_XML1') ? 230 | htmlspecialchars($value, ENT_XML1, 'UTF-8') : 231 | htmlspecialchars($value); 232 | } 233 | } 234 | --------------------------------------------------------------------------------