├── .gitignore ├── LICENSE ├── README.md ├── composer.json ├── fpdm.php └── src ├── ex-array.php ├── ex-fdf.php ├── ex.pdf ├── export ├── cache │ ├── data.fdf │ └── pdf_flatten.pdf ├── fdf │ ├── fdf.php │ └── forge_fdf.php └── pdf │ ├── pdftk.php │ └── pdftk.txt ├── fields.fdf ├── filters ├── FilterASCII85.php ├── FilterASCIIHex.php ├── FilterFlate.php ├── FilterLZW.php └── FilterStandard.php ├── fpdm.php ├── lib └── url.php └── template.pdf /.gitignore: -------------------------------------------------------------------------------- 1 | composer.phar 2 | /vendor/ 3 | 4 | # Commit your application's lock file http://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file 5 | # You may choose to ignore a library lock file http://getcomposer.org/doc/02-libraries.md#lock-file 6 | # composer.lock 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 codeshell 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PDF Form Filling with FPDM 2 | 3 | ## Package 4 | 5 | The FPDM class allows to fill out PDF forms, i.e. populate fields of a PDF file. It is **developed by Olivier Plathey**, author of the [FDPF Library](http://www.fpdf.org/), and has been released as [Skript 93](http://www.fpdf.org/en/script/script93.php). 6 | 7 | I created this repository for the following reasons: 8 | 9 | - make the current FPDM source available via [composer](https://packagist.org/packages/tmw/fpdm), autoload via classmaps 10 | - bugfixing 11 | - FIX: compatibility issues with PHP 7.x [e376dc1](https://github.com/codeshell/fpdm/commit/e376dc157655ded24c61e098199586f3325d63c1) v2.9.1 12 | - FIX: filling forms in multiple files (wrong buffer usage, invalid offsets) [e376dc1](https://github.com/codeshell/fpdm/commit/e376dc157655ded24c61e098199586f3325d63c1) v2.9.1 13 | - FIX: convert ASCII object names to utf-8 [1eddba7](https://github.com/codeshell/fpdm/commit/1eddba76f610690821e8c0b3753df337a6cf65f7) v2.9.2 14 | - improvements (changes to the original codebase are prefixed with `//FIX: change description` and ended with `//ENDFIX`) 15 | - ADD: support for checkboxes (disabled by default, activate with `$pdf->useCheckboxParser = true;`) [0375dd9](https://github.com/codeshell/fpdm/commit/0375dd95f05fd2d8d32d9ae1ab882fa0895b07b3) v2.9.2 16 | 17 | ## Version 18 | 19 | Based on version 2.9 (2017-05-11) available from [fpdf.org/en/script/script93.php](http://www.fpdf.org/en/script/script93.php). 20 | 21 | _Note: If you find that a new version has been hosted on fpdf.org, please do not hesitate to drop me [a short note](https://github.com/codeshell/fpdm/issues) to make sure I do not miss it out._ 22 | 23 | This repository only contains the separate php class written for form filling (FPD**M**). If you are looking for a repository containing the main FPD**F** Library, please head over to [github.com/Setasign/FPDF](https://github.com/Setasign/FPDF). 24 | 25 | Once again, all credits to Olivier Plathey for providing an easy to use script for form filling in addition to his FPDF library! 26 | 27 | ## Installation 28 | 29 | ### Composer 30 | 31 | The preferred way of making FPDM available in your app is to install it via composer with 32 | 33 | `composer require tmw/fpdm` 34 | 35 | ## Usage 36 | 37 | ### Composer (autoload) 38 | 39 | [autoload](https://getcomposer.org/doc/01-basic-usage.md#autoloading) FPDM class files by adding this to your code: 40 | 41 | `require 'vendor/autoload.php';` 42 | 43 | ### Standalone Script (legacy) 44 | 45 | Load the top level entry point by calling 46 | 47 | `require_once '/abolute/path/to/fpdm.php';` 48 | 49 | or 50 | 51 | `require_once './relative/path/to/fpdm.php';` 52 | 53 | ## Customization to original code 54 | 55 | ### classmaps vs. psr-4 (or: legacy code vs modern frameworks á la Laravel) 56 | 57 | Autoloading classes with [namespaces](https://www.php.net/manual/en/language.namespaces.basics.php) and following [PSR-4: Autoloader](https://www.php-fig.org/psr/psr-4/) would be desireable. Especially reducing the risk of naming conflicts by using vendor namespaces. 58 | 59 | However, FPDM has been around for a long time and as such is used in many projects that use non-namespaced code (I refer to them as legacy projects). Legacy projects instantiate FPDM by calling `$mypdf = new FPDM()` which is unqualified but defaults to the global namespace with non-namespaced code. 60 | 61 | Using psr-4 would autoload the class to a subnamespace (e.g. \codeshell\fpdm\FPDM) instead of the global namespace (e.g. \FPDM) thus breaking any legacy code no matter if it used `new FPDM()` or `new \FPDM()`. 62 | 63 | __Classmaps are a compromise.__ They allow taking advantage of composers autoloading and dependency management. Yet classes are added to the global namespace. Legacy projects can switch to composer without having to refactor their code. __Newer projects (e.g. utilizing frameworks like laravel, that heavily rely on namespaces) can still use legacy classes__ by using the fully qualified name (in this case the class name prefixed with global prefix operator as in `new \FPDM()`). 64 | 65 | That's my reasoning for using classmaps over psr-4 for FPDM. Please let me know if there are use cases where classmaps won't work with modern frameworks. 66 | 67 | ### Checkboxes 68 | 69 | I added support for checkboxes. The feature is not heavily tested but works for me. Can be enabled with `useCheckboxParser = true` like so: 70 | 71 | ```php 72 | 'anything that evaluates to true.', // checkbox will be checked; Careful, that includes ANY non-empty string (even "no" or "unchecked") 75 | 'another_checkbox' => false, // checkbox will be UNchecked; empty string or 0 work as well 76 | ); 77 | 78 | $pdf = new FPDM('template.pdf'); 79 | $pdf->useCheckboxParser = true; // Checkbox parsing is ignored (default FPDM behaviour) unless enabled with this setting 80 | $pdf->Load($fields, true); 81 | $pdf->Merge(); 82 | $pdf->Output(); 83 | ``` 84 | 85 | You don't have to figure out the technical names of checkbox states. They are retrieved during the parsing process. 86 | 87 | ## Original Info Page 88 | _Everything below is mirrored from http://www.fpdf.org/en/script/script93.php ._ 89 | 90 | ### Information 91 | 92 | Author: Olivier 93 | 94 | License: FPDF 95 | 96 | ### Description 97 | 98 | This script allows to merge data into a PDF form. Given a template PDF with text fields, it's 99 | possible to inject values in two different ways: 100 | 101 | - from a PHP array 102 | - from an FDF file 103 | 104 | The resulting document is produced by the Output() method, which works the same as for FPDF. 105 | 106 | Note: if your template PDF is not compatible with this script, you can process it with 107 | [PDFtk](https://www.pdflabs.com/tools/pdftk-server/) this way: 108 | 109 | `pdftk modele.pdf output modele2.pdf` 110 | 111 | Then try again with modele2.pdf. 112 | 113 | ### Example 114 | 115 | This example shows how to merge data from an array: 116 | 117 | ```php 118 | 'My name', 126 | 'address' => 'My address', 127 | 'city' => 'My city', 128 | 'phone' => 'My phone number' 129 | ); 130 | 131 | $pdf = new FPDM('template.pdf'); 132 | $pdf->Load($fields, false); // second parameter: false if field values are in ISO-8859-1, true if UTF-8 133 | $pdf->Merge(); 134 | $pdf->Output(); 135 | ?> 136 | ``` 137 | 138 | View the result [here](http://www.fpdf.org/en/script/ex93.pdf). 139 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "tmw/fpdm", 3 | "description": "PDF form filling using FPDM Class written by FPDF author Olivier", 4 | "type": "library", 5 | "license": "MIT", 6 | "authors": [ 7 | { 8 | "name": "Olivier Plathey", 9 | "email": "oliver@fpdf.org", 10 | "homepage": "http://www.fpdf.org/", 11 | "role": "Author" 12 | }, 13 | { 14 | "name": "codeshell", 15 | "role": "Developer", 16 | "homepage": "https://github.com/codeshell/" 17 | } 18 | ], 19 | "homepage": "https://github.com/codeshell/fpdm", 20 | "autoload": { 21 | "classmap": [ 22 | "src/fpdm.php", 23 | "src/filters/" 24 | ] 25 | }, 26 | "repositories": [ 27 | { 28 | "type": "git", 29 | "url": "https://github.com/codeshell/fpdm", 30 | "name": "GitHub" 31 | } 32 | ], 33 | "require": { 34 | "php": ">=5.3.0" 35 | }, 36 | "keywords": [ 37 | "FPDM", 38 | "PDF", 39 | "forms", 40 | "fields", 41 | "fill", 42 | "populate", 43 | "FPDF" 44 | ], 45 | "support": { 46 | "issues": "https://github.com/codeshell/fpdm/issues", 47 | "source": "https://github.com/codeshell/fpdm/tree/master" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /fpdm.php: -------------------------------------------------------------------------------- 1 | 'My name', 11 | 'address' => 'My address', 12 | 'city' => 'My city', 13 | 'phone' => 'My phone number' 14 | ); 15 | 16 | $pdf = new FPDM('template.pdf'); 17 | $pdf->Load($fields, false); // second parameter: false if field values are in ISO-8859-1, true if UTF-8 18 | $pdf->Merge(); 19 | $pdf->Output(); 20 | ?> 21 | -------------------------------------------------------------------------------- /src/ex-fdf.php: -------------------------------------------------------------------------------- 1 | Merge(); 11 | $pdf->Output(); 12 | ?> 13 | -------------------------------------------------------------------------------- /src/ex.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/ex.pdf -------------------------------------------------------------------------------- /src/export/cache/data.fdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/export/cache/data.fdf -------------------------------------------------------------------------------- /src/export/cache/pdf_flatten.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/export/cache/pdf_flatten.pdf -------------------------------------------------------------------------------- /src/export/fdf/fdf.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/export/fdf/fdf.php -------------------------------------------------------------------------------- /src/export/fdf/forge_fdf.php: -------------------------------------------------------------------------------- 1 | value pair to $fdf_data_strings. 8 | 9 | For check boxes and radio buttons, add field values 10 | as a name => value pair to $fdf_data_names. Typically, 11 | true and false correspond to the (case sensitive) 12 | names "Yes" and "Off". 13 | 14 | Any field added to the $fields_hidden or $fields_readonly 15 | array must also be a key in $fdf_data_strings or 16 | $fdf_data_names; this might be changed in the future 17 | 18 | Any field listed in $fdf_data_strings or $fdf_data_names 19 | that you want hidden or read-only must have its field 20 | name added to $fields_hidden or $fields_readonly; do this 21 | even if your form has these bits set already 22 | 23 | PDF can be particular about CR and LF characters, so I 24 | spelled them out in hex: CR == \x0d : LF == \x0a 25 | */ 26 | 27 | function escape_pdf_string( $ss ) 28 | { 29 | $ss_esc= ''; 30 | $ss_len= strlen( $ss ); 31 | for( $ii= 0; $ii< $ss_len; ++$ii ) { 32 | if( ord($ss{$ii})== 0x28 || // open paren 33 | ord($ss{$ii})== 0x29 || // close paren 34 | ord($ss{$ii})== 0x5c ) // backslash 35 | { 36 | $ss_esc.= chr(0x5c).$ss{$ii}; // escape the character w/ backslash 37 | } 38 | else if( ord($ss{$ii}) < 32 || 126 < ord($ss{$ii}) ) { 39 | $ss_esc.= sprintf( "\\%03o", ord($ss{$ii}) ); // use an octal code 40 | } 41 | else { 42 | $ss_esc.= $ss{$ii}; 43 | } 44 | } 45 | return $ss_esc; 46 | } 47 | 48 | 49 | /** 50 | $key = addcslashes($key, "\n\r\t\\()"); 51 | $val = addcslashes($val, "\n\r\t\\()"); 52 | **/ 53 | function escape_pdf_name( $ss ) 54 | { 55 | $ss_esc= ''; 56 | $ss_len= strlen( $ss ); 57 | for( $ii= 0; $ii< $ss_len; ++$ii ) { 58 | if( ord($ss{$ii}) < 33 || 126 < ord($ss{$ii}) || 59 | ord($ss{$ii})== 0x23 ) // hash mark 60 | { 61 | $ss_esc.= sprintf( "#%02x", ord($ss{$ii}) ); // use a hex code 62 | } 63 | else { 64 | $ss_esc.= $ss{$ii}; 65 | } 66 | } 67 | return $ss_esc; 68 | } 69 | 70 | 71 | 72 | /** 73 | * Generates the fdf code 74 | * 75 | *@param String $pdf_form_url: a string containing a URL path to a PDF file on the 76 | * server. This PDF MUST exist and contain fields with 77 | * the names referenced by $pdf_data for this function 78 | * to work. 79 | *@param Array $fdf_data_strings: an array of any fields in $pdf_form_url that you want to 80 | * populate, of the form key=>val; where the field 81 | * name is the key, and the field's value is in val. 82 | *@return String 83 | **/ 84 | function forge_fdf( $pdf_form_url, 85 | $fdf_data_strings, 86 | $fdf_data_names, 87 | $fields_hidden, 88 | $fields_readonly ) 89 | { 90 | $fdf = "%FDF-1.2\x0d%\xe2\xe3\xcf\xd3\x0d\x0a"; // header 91 | $fdf.= "1 0 obj\x0d<< "; // open the Root dictionary 92 | $fdf.= "\x0d/FDF << "; // open the FDF dictionary 93 | $fdf.= "/Fields [ "; // open the form Fields array 94 | 95 | // string data, used for text fields, combo boxes and list boxes 96 | foreach( $fdf_data_strings as $key => $value ) { 97 | $fdf.= "<< /V (".escape_pdf_string($value).")". 98 | "/T (".escape_pdf_string($key).") "; 99 | if( in_array( $key, $fields_hidden ) ) 100 | $fdf.= "/SetF 2 "; 101 | else 102 | $fdf.= "/ClrF 2 "; 103 | 104 | if( in_array( $key, $fields_readonly ) ) 105 | $fdf.= "/SetFf 1 "; 106 | else 107 | $fdf.= "/ClrFf 1 "; 108 | 109 | $fdf.= ">> \x0d"; 110 | } 111 | 112 | // name data, used for checkboxes and radio buttons 113 | // (e.g., /Yes and /Off for true and false) 114 | foreach( $fdf_data_names as $key => $value ) { 115 | $fdf.= "<< /V /".escape_pdf_name($value). 116 | " /T (".escape_pdf_string($key).") "; 117 | if( in_array( $key, $fields_hidden ) ) 118 | $fdf.= "/SetF 2 "; 119 | else 120 | $fdf.= "/ClrF 2 "; 121 | 122 | if( in_array( $key, $fields_readonly ) ) 123 | $fdf.= "/SetFf 1 "; 124 | else 125 | $fdf.= "/ClrFf 1 "; 126 | $fdf.= ">> \x0d"; 127 | } 128 | 129 | $fdf.= "] \x0d"; // close the Fields array 130 | 131 | // the PDF form filename or URL, if given 132 | if( $pdf_form_url ) { 133 | $fdf.= "/F (".escape_pdf_string($pdf_form_url).") \x0d"; 134 | } 135 | 136 | $fdf.= ">> \x0d"; // close the FDF dictionary 137 | $fdf.= ">> \x0dendobj\x0d"; // close the Root dictionary 138 | 139 | // trailer; note the "1 0 R" reference to "1 0 obj" above 140 | $fdf.= "trailer\x0d<<\x0d/Root 1 0 R \x0d\x0d>>\x0d"; 141 | $fdf.= "%%EOF\x0d\x0a"; 142 | 143 | return $fdf; 144 | } 145 | 146 | ?> -------------------------------------------------------------------------------- /src/export/pdf/pdftk.php: -------------------------------------------------------------------------------- 1 | array("pipe", "r"), // // stdin 79 | 1 => array("pipe", "w"), // stdout 80 | 2 => array("pipe", "w") // stderr 81 | ); 82 | 83 | $output_modes=$settings['output_modes']; 84 | $security=$settings['security']; 85 | 86 | $cwd = '/tmp'; 87 | $env = array('misc_options' => 'aeiou'); 88 | $err=''; 89 | $success=0; 90 | 91 | if(is_windows()) { 92 | $cmd="pdftk.exe"; //For windows 93 | }else{ 94 | $cmd="pdftk"; //For linux and mac 95 | } 96 | 97 | $dircmd=fix_path(dirname(__file__)); 98 | 99 | if(file_exists("$dircmd/$cmd")) { 100 | 101 | $pdf_out=FPDM_CACHE."pdf_flatten.pdf"; 102 | 103 | $cmdline="$dircmd/$cmd \"$pdf_file\" fill_form \"$fdf_file\" output \"$pdf_out\" $output_modes $security"; //direct to ouptut 104 | 105 | //echo htmlentities("$cmdline , $descriptorspec, $cwd, $env"); 106 | 107 | if(PHP5_ENGINE) { // Php5 108 | $process = proc_open($cmdline, $descriptorspec, $pipes, $cwd, $env); 109 | }else { //Php4 110 | $process = proc_open($cmdline, $descriptorspec, $pipes); 111 | } 112 | 113 | if (is_resource($process)) { 114 | 115 | if(PHP5_ENGINE) { 116 | $err=stream_get_contents($pipes[2]); 117 | }else { //Php4 118 | $err= ""; 119 | while (($str = fgets($pipes[2], 4096))) { 120 | $err.= "$str\n"; 121 | } 122 | } 123 | 124 | fclose($pipes[2]); 125 | 126 | //Its important to close the pipes before proc_close call to avoid dead locks 127 | $return_value = proc_close($process); 128 | 129 | }else { 130 | $err="No more resource to execute the command"; 131 | } 132 | 133 | }else { 134 | $err="Sorry but pdftk binary is not provided / Cette fonctionnalite requiere pdftk non fourni ici
    "; 135 | $err.="
  1. download it from / telecharger ce dernier a partir de
    pdflabs
    "; 136 | $err.="
  2. copy the executable in this directory / Copier l'executable dans
    $dircmd
    " ; 137 | $err.="
  3. set \$cmd to match binary name in / configurer \$cmd pour qu'il corresponde dans le fichier
    ".__file__."
"; 138 | } 139 | 140 | if($err) { 141 | $ret=array("success"=> false,"return"=>$err); 142 | }else 143 | $ret=array("success"=> true,"return"=>$pdf_out); 144 | 145 | return $ret; 146 | } 147 | 148 | ?> -------------------------------------------------------------------------------- /src/export/pdf/pdftk.txt: -------------------------------------------------------------------------------- 1 | 2 | pdftk 1.43 a Handy Tool for Manipulating PDF Documents 3 | Copyright (C) 2003-10, Sid Steward - Please Visit: www.pdftk.com 4 | This is free software; see the source code for copying conditions. There is 5 | NO warranty, not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 6 | 7 | SYNOPSIS 8 | pdftk 9 | [input_pw ] 10 | [ ] 11 | [output ] 12 | [encrypt_40bit | encrypt_128bit] 13 | [allow ] 14 | [owner_pw ] 15 | [user_pw ] 16 | [flatten] [compress | uncompress] 17 | [keep_first_id | keep_final_id] [drop_xfa] 18 | [verbose] [dont_ask | do_ask] 19 | Where: 20 | may be empty, or: 21 | [cat | attach_files | unpack_files | burst | 22 | fill_form | background | stamp | generate_fdf | 23 | multibackground | multistamp | 24 | dump_data | dump_data_fields | update_info] 25 | 26 | For Complete Help: pdftk --help 27 | 28 | DESCRIPTION 29 | If PDF is electronic paper, then pdftk is an electronic staple-remover, 30 | hole-punch, binder, secret-decoder-ring, and X-Ray-glasses. Pdftk is a 31 | simple tool for doing everyday things with PDF documents. Use it to: 32 | 33 | * Merge PDF Documents 34 | * Split PDF Pages into a New Document 35 | * Rotate PDF Documents or Pages 36 | * Decrypt Input as Necessary (Password Required) 37 | * Encrypt Output as Desired 38 | * Fill PDF Forms with X/FDF Data and/or Flatten Forms 39 | * Generate FDF Data Stencil from PDF Forms 40 | * Apply a Background Watermark or a Foreground Stamp 41 | * Report PDF Metrics such as Metadata and Bookmarks 42 | * Update PDF Metadata 43 | * Attach Files to PDF Pages or the PDF Document 44 | * Unpack PDF Attachments 45 | * Burst a PDF Document into Single Pages 46 | * Uncompress and Re-Compress Page Streams 47 | * Repair Corrupted PDF (Where Possible) 48 | 49 | OPTIONS 50 | A summary of options is included below. 51 | 52 | --help, -h 53 | Show summary of options. 54 | 55 | 56 | A list of the input PDF files. If you plan to combine these PDFs 57 | (without using handles) then list files in the order you want 58 | them combined. Use - to pass a single PDF into pdftk via stdin. 59 | Input files can be associated with handles, where a handle is a 60 | single, upper-case letter: 61 | 62 | = 63 | 64 | Handles are often omitted. They are useful when specifying PDF 65 | passwords or page ranges, later. 66 | 67 | For example: A=input1.pdf B=input2.pdf 68 | 69 | [input_pw ] 70 | Input PDF owner passwords, if necessary, are associated with 71 | files by using their handles: 72 | 73 | = 74 | 75 | If handles are not given, then passwords are associated with 76 | input files by order. 77 | 78 | Most pdftk features require that encrypted input PDF are accom- 79 | panied by the ~owner~ password. If the input PDF has no owner 80 | password, then the user password must be given, instead. If the 81 | input PDF has no passwords, then no password should be given. 82 | 83 | When running in do_ask mode, pdftk will prompt you for a pass- 84 | word if the supplied password is incorrect or none was given. 85 | 86 | [ ] 87 | If this optional argument is omitted, then pdftk runs in 'fil- 88 | ter' mode. Filter mode takes only one PDF input and creates a 89 | new PDF after applying all of the output options, like encryp- 90 | tion and compression. 91 | 92 | Available operations are: cat, attach_files, unpack_files, 93 | burst, fill_form, background, stamp, multibackground, multi- 94 | stamp, dump_data, dump_data_fields, generate_fdf, update_info. 95 | Some operations takes additional arguments, described below. 96 | 97 | cat [] 98 | Catenates pages from input PDFs to create a new PDF. Page 99 | order in the new PDF is specified by the order of the given 100 | page ranges. Page ranges are described like this: 101 | 102 | [[-[]]][] 104 | 105 | Where the handle identifies one of the input PDF files, and 106 | the beginning and ending page numbers are one-based refer- 107 | ences to pages in the PDF file, and the qualifier can be even 108 | or odd, and the page rotation can be N, S, E, W, L, R, or D. 109 | 110 | If the handle is omitted from the page range, then the pages 111 | are taken from the first input PDF. 112 | 113 | The even qualifier causes pdftk to use only the even-numbered 114 | PDF pages, so 1-6even yields pages 2, 4 and 6 in that order. 115 | 6-1even yields pages 6, 4 and 2 in that order. 116 | 117 | The odd qualifier works similarly to the even. 118 | 119 | The page rotation setting can cause pdftk to rotate pages and 120 | documents. Each option sets the page rotation as follows (in 121 | degrees): N: 0, E: 90, S: 180, W: 270, L: -90, R: +90, D: 122 | +180. L, R, and D make relative adjustments to a page's rota- 123 | tion. 124 | 125 | If no arguments are passed to cat, then pdftk combines all 126 | input PDFs in the order they were given to create the output. 127 | 128 | NOTES: 129 | * may be less than . 130 | * The keyword end may be used to reference the final page of 131 | a document instead of a page number. 132 | * Reference a single page by omitting the ending page number. 133 | * The handle may be used alone to represent the entire PDF 134 | document, e.g., B1-end is the same as B. 135 | 136 | Page Range Examples w/o Handles: 137 | 1-endE - rotate entire document 90 degrees 138 | 5 11 20 139 | 5-25oddW - take odd pages in range, rotate 90 degrees 140 | 6-1 141 | 142 | Page Range Examples Using Handles: 143 | Say A=in1.pdf B=in2.pdf, then: 144 | A1-21 145 | Bend-1odd 146 | A72 147 | A1-21 Beven A72 148 | AW - rotate entire document 90 degrees 149 | B 150 | A2-30evenL - take the even pages from the range, remove 90 151 | degrees from each page's rotation 152 | A A 153 | AevenW AoddE 154 | AW BW BD 155 | attach_files [to_page ] 157 | Packs arbitrary files into a PDF using PDF's file attachment 158 | features. More than one attachment may be listed after 159 | attach_files. Attachments are added at the document level 160 | unless the optional to_page option is given, in which case 161 | the files are attached to the given page number (the first 162 | page is 1, the final page is end). For example: 163 | 164 | pdftk in.pdf attach_files table1.html table2.html to_page 6 165 | output out.pdf 166 | 167 | unpack_files 168 | Copies all of the attachments from the input PDF into the 169 | current folder or to an output directory given after output. 170 | For example: 171 | 172 | pdftk report.pdf unpack_files output ~/atts/ 173 | 174 | or, interactively: 175 | 176 | pdftk report.pdf unpack_files output PROMPT 177 | 178 | burst Splits a single, input PDF document into individual pages. 179 | Also creates a report named doc_data.txt which is the same as 180 | the output from dump_data. If the output section is omitted, 181 | then PDF pages are named: pg_%04d.pdf, e.g.: pg_0001.pdf, 182 | pg_0002.pdf, etc. To name these pages yourself, supply a 183 | printf-styled format string via the output section. For 184 | example, if you want pages named: page_01.pdf, page_02.pdf, 185 | etc., pass output page_%02d.pdf to pdftk. Encryption can be 186 | applied to the output by appending output options such as 187 | owner_pw, e.g.: 188 | 189 | pdftk in.pdf burst owner_pw foopass 190 | 191 | fill_form 192 | Fills the single input PDF's form fields with the data from 193 | an FDF file, XFDF file or stdin. Enter the data filename 194 | after fill_form, or use - to pass the data via stdin, like 195 | so: 196 | 197 | pdftk form.pdf fill_form data.fdf output form.filled.pdf 198 | 199 | After filling a form, the form fields remain interactive 200 | unless you also use the flatten output option. flatten merges 201 | the form fields with the PDF pages. You can use flatten 202 | alone, too, but only on a single PDF: 203 | 204 | pdftk form.pdf fill_form data.fdf output out.pdf flatten 205 | 206 | or: 207 | 208 | pdftk form.filled.pdf output out.pdf flatten 209 | 210 | If the input FDF file includes Rich Text formatted data in 211 | addition to plain text, then the Rich Text data is packed 212 | into the form fields as well as the plain text. Pdftk also 213 | sets a flag that cues Acrobat/Reader to generate new field 214 | appearances based on the Rich Text data. That way, when the 215 | user opens the PDF, the viewer will create the Rich Text 216 | fields on the spot. If the user's PDF viewer does not sup- 217 | port Rich Text, then the user will see the plain text data 218 | instead. If you flatten this form before Acrobat has a 219 | chance to create (and save) new field appearances, then the 220 | plain text field data is what you'll see. 221 | 222 | background 223 | Applies a PDF watermark to the background of a single input 224 | PDF. Pass the background PDF's filename after background 225 | like so: 226 | 227 | pdftk in.pdf background back.pdf output out.pdf 228 | 229 | Pdftk uses only the first page from the background PDF and 230 | applies it to every page of the input PDF. This page is 231 | scaled and rotated as needed to fit the input page. You can 232 | use - to pass a background PDF into pdftk via stdin. 233 | 234 | If the input PDF does not have a transparent background (such 235 | as a PDF created from page scans) then the resulting back- 236 | ground won't be visible -- use the stamp feature instead. 237 | 238 | multibackground 239 | Same as the background feature, but applies each page of the 240 | background PDF to the corresponding page of the input PDF. 241 | If the input PDF has more pages than the stamp PDF, then the 242 | final stamp page is repeated across these remaining pages in 243 | the input PDF. 244 | 245 | stamp 246 | This behaves just like the background feature except it over- 247 | lays the stamp PDF page on top of the input PDF document's 248 | pages. This works best if the stamp PDF page has a transpar- 249 | ent background. 250 | 251 | multistamp 252 | Same as the stamp feature, but applies each page of the back- 253 | ground PDF to the corresponding page of the input PDF. If 254 | the input PDF has more pages than the stamp PDF, then the 255 | final stamp page is repeated across these remaining pages in 256 | the input PDF. 257 | 258 | dump_data 259 | Reads a single, input PDF file and reports various statis- 260 | tics, metadata, bookmarks (a/k/a outlines), and page labels 261 | to the given output filename or (if no output is given) to 262 | stdout. Does not create a new PDF. 263 | 264 | dump_data_fields 265 | Reads a single, input PDF file and reports form field statis- 266 | tics to the given output filename or (if no output is given) 267 | to stdout. Does not create a new PDF. 268 | 269 | generate_fdf 270 | Reads a single, input PDF file and generates a FDF file suit- 271 | able for fill_form out of it to the given output filename or 272 | (if no output is given) to stdout. Does not create a new 273 | PDF. 274 | 275 | update_info 276 | Changes the metadata stored in a single PDF's Info dictionary 277 | to match the input data file. The input data file uses the 278 | same syntax as the output from dump_data. This does not 279 | change the metadata stored in the PDF's XMP stream, if it has 280 | one. For example: 281 | 282 | pdftk in.pdf update_info in.info output out.pdf 283 | 284 | [output ] 285 | The output PDF filename may not be set to the name of an input 286 | filename. Use - to output to stdout. When using the dump_data 287 | operation, use output to set the name of the output data file. 288 | When using the unpack_files operation, use output to set the 289 | name of an output directory. When using the burst operation, 290 | you can use output to control the resulting PDF page filenames 291 | (described above). 292 | 293 | [encrypt_40bit | encrypt_128bit] 294 | If an output PDF user or owner password is given, output PDF 295 | encryption strength defaults to 128 bits. This can be overrid- 296 | den by specifying encrypt_40bit. 297 | 298 | [allow ] 299 | Permissions are applied to the output PDF only if an encryption 300 | strength is specified or an owner or user password is given. If 301 | permissions are not specified, they default to 'none,' which 302 | means all of the following features are disabled. 303 | 304 | The permissions section may include one or more of the following 305 | features: 306 | 307 | Printing 308 | Top Quality Printing 309 | 310 | DegradedPrinting 311 | Lower Quality Printing 312 | 313 | ModifyContents 314 | Also allows Assembly 315 | 316 | Assembly 317 | 318 | CopyContents 319 | Also allows ScreenReaders 320 | 321 | ScreenReaders 322 | 323 | ModifyAnnotations 324 | Also allows FillIn 325 | 326 | FillIn 327 | 328 | AllFeatures 329 | Allows the user to perform all of the above, and top 330 | quality printing. 331 | 332 | [owner_pw ] 333 | 334 | [user_pw ] 335 | If an encryption strength is given but no passwords are sup- 336 | plied, then the owner and user passwords remain empty, which 337 | means that the resulting PDF may be opened and its security 338 | parameters altered by anybody. 339 | 340 | [compress | uncompress] 341 | These are only useful when you want to edit PDF code in a text 342 | editor like vim or emacs. Remove PDF page stream compression by 343 | applying the uncompress filter. Use the compress filter to 344 | restore compression. 345 | 346 | [flatten] 347 | Use this option to merge an input PDF's interactive form fields 348 | (and their data) with the PDF's pages. Only one input PDF may be 349 | given. Sometimes used with the fill_form operation. 350 | 351 | [keep_first_id | keep_final_id] 352 | When combining pages from multiple PDFs, use one of these 353 | options to copy the document ID from either the first or final 354 | input document into the new output PDF. Otherwise pdftk creates 355 | a new document ID for the output PDF. When no operation is 356 | given, pdftk always uses the ID from the (single) input PDF. 357 | 358 | [drop_xfa] 359 | If your input PDF is a form created using Acrobat 7 or Adobe 360 | Designer, then it probably has XFA data. Filling such a form 361 | using pdftk yields a PDF with data that fails to display in 362 | Acrobat 7 (and 6?). The workaround solution is to remove the 363 | form's XFA data, either before you fill the form using pdftk or 364 | at the time you fill the form. Using this option causes pdftk to 365 | omit the XFA data from the output PDF form. 366 | 367 | This option is only useful when running pdftk on a single input 368 | PDF. When assembling a PDF from multiple inputs using pdftk, 369 | any XFA data in the input is automatically omitted. 370 | 371 | [verbose] 372 | By default, pdftk runs quietly. Append verbose to the end and it 373 | will speak up. 374 | 375 | [dont_ask | do_ask] 376 | Depending on the compile-time settings (see ASK_ABOUT_WARNINGS), 377 | pdftk might prompt you for further input when it encounters a 378 | problem, such as a bad password. Override this default behavior 379 | by adding dont_ask (so pdftk won't ask you what to do) or do_ask 380 | (so pdftk will ask you what to do). 381 | 382 | When running in dont_ask mode, pdftk will over-write files with 383 | its output without notice. 384 | 385 | EXAMPLES 386 | 387 | 388 | Decrypt a PDF 389 | pdftk secured.pdf input_pw foopass output unsecured.pdf 390 | 391 | Encrypt a PDF using 128-bit strength (the default), withhold all per- 392 | missions (the default) 393 | pdftk 1.pdf output 1.128.pdf owner_pw foopass 394 | 395 | Same as above, except password 'baz' must also be used to open output 396 | PDF 397 | pdftk 1.pdf output 1.128.pdf owner_pw foo user_pw baz 398 | 399 | Same as above, except printing is allowed (once the PDF is open) 400 | pdftk 1.pdf output 1.128.pdf owner_pw foo user_pw baz allow printing 401 | 402 | Join in1.pdf and in2.pdf into a new PDF, out1.pdf 403 | pdftk in1.pdf in2.pdf cat output out1.pdf 404 | or (using handles): 405 | pdftk A=in1.pdf B=in2.pdf cat A B output out1.pdf 406 | or (using wildcards): 407 | pdftk *.pdf cat output combined.pdf 408 | 409 | Remove 'page 13' from in1.pdf to create out1.pdf 410 | pdftk in.pdf cat 1-12 14-end output out1.pdf 411 | or: 412 | pdftk A=in1.pdf cat A1-12 A14-end output out1.pdf 413 | 414 | Apply 40-bit encryption to output, revoking all permissions (the 415 | default). Set the owner PW to 'foopass'. 416 | pdftk 1.pdf 2.pdf cat output 3.pdf encrypt_40bit owner_pw foopass 417 | 418 | Join two files, one of which requires the password 'foopass'. The out- 419 | put is not encrypted. 420 | pdftk A=secured.pdf 2.pdf input_pw A=foopass cat output 3.pdf 421 | 422 | Uncompress PDF page streams for editing the PDF in a text editor (e.g., 423 | vim, emacs) 424 | pdftk doc.pdf output doc.unc.pdf uncompress 425 | 426 | Repair a PDF's corrupted XREF table and stream lengths, if possible 427 | pdftk broken.pdf output fixed.pdf 428 | 429 | Burst a single PDF document into pages and dump its data to 430 | doc_data.txt 431 | pdftk in.pdf burst 432 | 433 | Burst a single PDF document into encrypted pages. Allow low-quality 434 | printing 435 | pdftk in.pdf burst owner_pw foopass allow DegradedPrinting 436 | 437 | Write a report on PDF document metadata and bookmarks to report.txt 438 | pdftk in.pdf dump_data output report.txt 439 | 440 | Rotate the first PDF page to 90 degrees clockwise 441 | pdftk in.pdf cat 1E 2-end output out.pdf 442 | 443 | Rotate an entire PDF document to 180 degrees 444 | pdftk in.pdf cat 1-endS output out.pdf 445 | 446 | NOTES 447 | The pdftk home page permalink is: 448 | http://www.pdflabs.com/tools/pdftk-the-pdf-toolkit/ 449 | The easy-to-remember shortcut is: www.pdftk.com 450 | 451 | AUTHOR 452 | Sid Steward (sid.steward at pdflabs dot com) maintains pdftk. Please 453 | email him with questions or bug reports. Include pdftk in the subject 454 | line to ensure successful delivery. Thank you. 455 | -------------------------------------------------------------------------------- /src/fields.fdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/fields.fdf -------------------------------------------------------------------------------- /src/filters/FilterASCII85.php: -------------------------------------------------------------------------------- 1 | ORD_u) { 63 | return $this->error('Illegal character in ASCII85Decode.'); 64 | } 65 | 66 | $chn[$state++] = $ch - ORD_exclmark; 67 | 68 | if ($state == 5) { 69 | $state = 0; 70 | $r = 0; 71 | for ($j = 0; $j < 5; ++$j) 72 | $r = $r * 85 + $chn[$j]; 73 | $out .= chr($r >> 24); 74 | $out .= chr($r >> 16); 75 | $out .= chr($r >> 8); 76 | $out .= chr($r); 77 | } 78 | } 79 | $r = 0; 80 | 81 | if ($state == 1) 82 | return $this->error('Illegal length in ASCII85Decode.'); 83 | if ($state == 2) { 84 | $r = $chn[0] * 85 * 85 * 85 * 85 + ($chn[1]+1) * 85 * 85 * 85; 85 | $out .= chr($r >> 24); 86 | } 87 | else if ($state == 3) { 88 | $r = $chn[0] * 85 * 85 * 85 * 85 + $chn[1] * 85 * 85 * 85 + ($chn[2]+1) * 85 * 85; 89 | $out .= chr($r >> 24); 90 | $out .= chr($r >> 16); 91 | } 92 | else if ($state == 4) { 93 | $r = $chn[0] * 85 * 85 * 85 * 85 + $chn[1] * 85 * 85 * 85 + $chn[2] * 85 * 85 + ($chn[3]+1) * 85 ; 94 | $out .= chr($r >> 24); 95 | $out .= chr($r >> 16); 96 | $out .= chr($r >> 8); 97 | } 98 | 99 | return $out; 100 | } 101 | 102 | function encode($in) { 103 | return $this->error("ASCII85 encoding not implemented."); 104 | } 105 | } 106 | } 107 | 108 | unset($__tmp); -------------------------------------------------------------------------------- /src/filters/FilterASCIIHex.php: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/filters/FilterASCIIHex.php -------------------------------------------------------------------------------- /src/filters/FilterFlate.php: -------------------------------------------------------------------------------- 1 | = 1.0.9! 5 | // 6 | 7 | $__tmp = version_compare(phpversion(), "5") == -1 ? array('FilterFlateDecode') : array('FilterFlateDecode', false); 8 | if (!call_user_func_array('class_exists', $__tmp)) { 9 | 10 | 11 | if(isset($FPDM_FILTERS)) array_push($FPDM_FILTERS,"FlateDecode"); 12 | 13 | class FilterFlate { 14 | 15 | var $data = null; 16 | var $dataLength = 0; 17 | 18 | function error($msg) { 19 | die($msg); 20 | } 21 | 22 | /** 23 | * Method to decode GZIP compressed data. 24 | * 25 | * @param string data The compressed data. 26 | * @return uncompressed data 27 | */ 28 | function decode($data) { 29 | 30 | $this->data = $data; 31 | $this->dataLength = strlen($data); 32 | 33 | // uncompress 34 | $data=gzuncompress($data); 35 | 36 | if(!$data) $this->error("FilterFlateDecode: invalid stream data."); 37 | 38 | return $data; 39 | } 40 | 41 | 42 | function encode($in) { 43 | return gzcompress($in, 9); 44 | } 45 | } 46 | 47 | } 48 | //unset $__tmp; 49 | ?> -------------------------------------------------------------------------------- /src/filters/FilterLZW.php: -------------------------------------------------------------------------------- 1 | error('LZW flavour not supported.'); 51 | } 52 | 53 | $this->initsTable(); 54 | 55 | $this->data = $data; 56 | $this->dataLength = strlen($data); 57 | 58 | // Initialize pointers 59 | $this->bytePointer = 0; 60 | $this->bitPointer = 0; 61 | 62 | $this->nextData = 0; 63 | $this->nextBits = 0; 64 | 65 | $oldCode = 0; 66 | 67 | $string = ''; 68 | $uncompData = ''; 69 | 70 | while (($code = $this->getNextCode()) != 257) { 71 | if ($code == 256) { 72 | $this->initsTable(); 73 | $code = $this->getNextCode(); 74 | 75 | if ($code == 257) { 76 | break; 77 | } 78 | 79 | $uncompData .= $this->sTable[$code]; 80 | $oldCode = $code; 81 | 82 | } else { 83 | 84 | if ($code < $this->tIdx) { 85 | $string = $this->sTable[$code]; 86 | $uncompData .= $string; 87 | 88 | $this->addStringToTable($this->sTable[$oldCode], $string[0]); 89 | $oldCode = $code; 90 | } else { 91 | $string = $this->sTable[$oldCode]; 92 | $string = $string.$string[0]; 93 | $uncompData .= $string; 94 | 95 | $this->addStringToTable($string); 96 | $oldCode = $code; 97 | } 98 | } 99 | } 100 | 101 | return $uncompData; 102 | } 103 | 104 | 105 | /** 106 | * Initialize the string table. 107 | */ 108 | function initsTable() { 109 | $this->sTable = array(); 110 | 111 | for ($i = 0; $i < 256; $i++) 112 | $this->sTable[$i] = chr($i); 113 | 114 | $this->tIdx = 258; 115 | $this->bitsToGet = 9; 116 | } 117 | 118 | /** 119 | * Add a new string to the string table. 120 | */ 121 | function addStringToTable ($oldString, $newString='') { 122 | $string = $oldString.$newString; 123 | 124 | // Add this new String to the table 125 | $this->sTable[$this->tIdx++] = $string; 126 | 127 | if ($this->tIdx == 511) { 128 | $this->bitsToGet = 10; 129 | } else if ($this->tIdx == 1023) { 130 | $this->bitsToGet = 11; 131 | } else if ($this->tIdx == 2047) { 132 | $this->bitsToGet = 12; 133 | } 134 | } 135 | 136 | // Returns the next 9, 10, 11 or 12 bits 137 | function getNextCode() { 138 | if ($this->bytePointer == $this->dataLength) { 139 | return 257; 140 | } 141 | 142 | $this->nextData = ($this->nextData << 8) | (ord($this->data[$this->bytePointer++]) & 0xff); 143 | $this->nextBits += 8; 144 | 145 | if ($this->nextBits < $this->bitsToGet) { 146 | $this->nextData = ($this->nextData << 8) | (ord($this->data[$this->bytePointer++]) & 0xff); 147 | $this->nextBits += 8; 148 | } 149 | 150 | $code = ($this->nextData >> ($this->nextBits - $this->bitsToGet)) & $this->andTable[$this->bitsToGet-9]; 151 | $this->nextBits -= $this->bitsToGet; 152 | 153 | return $code; 154 | } 155 | 156 | function encode($in) { 157 | $this->error("LZW encoding not implemented."); 158 | } 159 | } 160 | } 161 | 162 | unset($__tmp); -------------------------------------------------------------------------------- /src/filters/FilterStandard.php: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/fpdm.php: -------------------------------------------------------------------------------- 1 | "/\/AS\s+\/(\w+)$/", 41 | "name"=>"/\/(\w+)/", 42 | // "/AP_D_SingleLine"=>"/\/D\s+\/(\w+)\s+\d+\s+\d+\s+R\s+\/(\w+)$/", 43 | //ENDFIX 44 | "/Type"=>"/\/Type\s+\/(\w+)$/", 45 | "/Subtype" =>"/^\/Subtype\s+\/(\w+)$/" 46 | ); 47 | 48 | //Major stream filters come from FPDI's stuff but I've added some :) 49 | if (!defined('FPDM_DIRECT')) { 50 | $FPDM_FILTERS = array("LZWDecode", "ASCIIHexDecode", "ASCII85Decode", "FlateDecode", "Standard" ); 51 | } 52 | // require_once("filters/FilterASCIIHex.php"); 53 | // require_once("filters/FilterASCII85.php"); 54 | // require_once("filters/FilterFlate.php"); 55 | // require_once("filters/FilterLZW.php"); 56 | // require_once("filters/FilterStandard.php"); 57 | 58 | 59 | $__tmp = version_compare(phpversion(), "5") == -1 ? array('FPDM') : array('FPDM', false); 60 | if (!call_user_func_array('class_exists', $__tmp)) { 61 | 62 | 63 | define('FPDM_VERSION',2.9); 64 | 65 | define('FPDM_INVALID',0); 66 | define('FPDM_STATIC',1); 67 | define('FPDM_COMMON',2); 68 | define('FPDM_VERBOSE',3); 69 | define('FPDM_CACHE',dirname(__FILE__).'/export/cache/'); //cache directory for fdf temporary files needed by pdftk. 70 | define('FPDM_PASSWORD_MAX_LEN',15); //Security to prevent shell overflow. 71 | 72 | class FPDM { 73 | //@@@@@@@@@ 74 | var $useCheckboxParser = false; //boolean: allows activation of custom checkbox parser (not available in original FPDM source) 75 | 76 | var $pdf_source = ''; //string: full pathname to the input pdf , a form file 77 | var $fdf_source = ''; //string: full pathname to the input fdf , a form data file 78 | var $pdf_output = ''; //string: full pathname to the resulting filled pdf 79 | 80 | var $pdf_entries = array(); //array: Holds the content of the pdf file as array 81 | var $fdf_content = ''; //string: holds the content of the fdf file 82 | var $fdf_parse_needed = false;//boolean: false will use $fields data else extract data from fdf content 83 | var $value_entries = array(); //array: a map of values to faliclitate access and changes 84 | 85 | var $positions = array(); //array, stores what object id is at a given position n ($positions[n]=) 86 | 87 | var $offsets = array(); //array of offsets for objects, index is the object's id, starting at 1 88 | var $pointer = 0; //integer, Current line position in the pdf file during the parsing 89 | 90 | var $shifts = array(); //array, Shifts of objects in the order positions they appear in the pdf, starting at 0. 91 | var $shift = 0; //integer, Global shift file size due to object values size changes 92 | 93 | var $streams = ''; //Holds streams configuration found during parsing 94 | var $streams_filter = ''; //Regexp to decode filter streams 95 | 96 | var $safe_mode = false; //boolean, if set, ignore previous offsets do no calculations for the new xref table, seek pos directly in file 97 | var $check_mode = false; //boolean, Use this to track offset calculations errors in corrupteds pdfs files for sample 98 | var $halt_mode = false; //if true, stops when offset error is encountered 99 | 100 | var $info = array(); //array, holds the info properties 101 | var $fields = array(); //array that holds fields-Data parsed from FDF 102 | 103 | var $verbose = false; //boolean , a debug flag to decide whether or not to show internal process 104 | var $verbose_level = 1; //integer default is 1 and if greater than 3, shows internal parsing as well 105 | 106 | var $support = ''; //string set to 'native' for fpdm or 'pdftk' for pdf toolkit 107 | var $flatten_mode = false; //if true, flatten field data as text and remove form fields (NOT YET SUPPORTED BY FPDM) 108 | var $compress_mode = false; //boolean , pdftk feature only to compress streams 109 | var $uncompress_mode = false; //boolean pdftk feature only to uncompress streams 110 | var $security = array(); //Array holding securtity settings 111 | //(password owner nad user, encrypt (set to 40 or 128 or 0), allow ] see pdfk help 112 | 113 | var $needAppearancesTrue = false; //boolean, indicates if /NeedAppearances is already set to true 114 | var $isUTF8 = false; //boolean (true for UTF-8, false for ISO-8859-1) 115 | 116 | /** 117 | * Constructor 118 | * 119 | *@example Common use: 120 | *@param string $pdf_source Source-Filename 121 | *@param string $fdf_source Source-Filename 122 | *@param boolean $verbose , optional false per default 123 | */ 124 | function __construct() { 125 | //============== 126 | 127 | $args=func_get_args(); 128 | $num_args=func_num_args(); 129 | 130 | $FDF_FILE=($num_args>=FPDM_COMMON); 131 | $VERBOSE_FLAG=($num_args>=FPDM_VERBOSE); 132 | 133 | $verbose=false; 134 | 135 | //We are not joking here, let's have a polymorphic constructor! 136 | switch($num_args) { 137 | case FPDM_INVALID: 138 | $this->Error("Invalid instantiation of FPDM, requires at least one param"); 139 | break; 140 | case FPDM_STATIC: 141 | if($args[0] =='[_STATIC_]') break; //static use, caller is anonymous function defined in _set_field_value 142 | //else this is the pdf_source then, fdf content is loaded using Load() function 143 | default: 144 | case FPDM_VERBOSE: //Use the verbose value provided 145 | if($VERBOSE_FLAG) $verbose=$args[2]; 146 | case FPDM_COMMON: //Common use 147 | $this->pdf_source = $args[0];//Blank pdf form 148 | 149 | if($FDF_FILE) { 150 | $this->fdf_source = $args[1];//Holds the data of the fields to fill the form 151 | $this->fdf_parse_needed=true; 152 | } 153 | 154 | //calculation and map 155 | $this->offsets=array(); 156 | $this->pointer=0; 157 | $this->shift=0; 158 | $this->shifts=array(); 159 | $this->n=0; 160 | 161 | //Stream filters 162 | $filters=$this->getFilters("|"); 163 | $this->streams_filter="/(\/($filters))+/"; 164 | //$this->dumpContent($this->streams_filter); 165 | 166 | $this->info=array(); 167 | 168 | //Debug modes 169 | $this->verbose=$verbose; 170 | $this->verbose_level=($verbose&&is_int($verbose)) ? $verbose : 1; 171 | $this->safe_mode=false; 172 | $this->check_mode=false; //script will takes much more time if you do so 173 | $this->halt_mode=true; 174 | 175 | $this->support='native'; //may ne overriden 176 | $this->security=array('password'=>array('owner'=>null,'user'=>null),'encrypt'=>0,'allow'=>array()); 177 | 178 | //echo "
filesize:".filesize($this->pdf_source); 179 | $this->load_file('PDF'); 180 | 181 | if($FDF_FILE) $this->load_file('FDF'); 182 | 183 | } 184 | } 185 | 186 | /** 187 | *Loads a form data to be merged 188 | * 189 | *@note this overrides fdf input source if it was previously defined 190 | *@access public 191 | *@param string|array $fdf_data a FDF file content or $pdf_data an array containing the values for the fields to change 192 | **/ 193 | function Load($data,$isUTF8=false) { 194 | //------------------------ 195 | $this->isUTF8 = $isUTF8; 196 | $this->load_file('FDF',$data); 197 | } 198 | 199 | /** 200 | *Loads a file according to its type 201 | * 202 | *@access private 203 | *@param string type 'PDF' or 'FDF' 204 | *@param String|array content the data content of FDF files only or directly the fields values as array 205 | **/ 206 | function load_file($type,$content=NULL) { 207 | //------------------------------------ 208 | switch($type) { 209 | case "PDF" : 210 | if($content) 211 | $this->Error("load_file do not accept PDF content, only FDF content sorry"); 212 | else 213 | $this->pdf_entries = $this->getEntries($this->pdf_source,'PDF'); 214 | break; 215 | case "FDF" : 216 | if(!is_null($content)) { 217 | if(is_array($content)) { 218 | $this->fields=$content; 219 | $this->fdf_parse_needed=false; 220 | //$this->dumpEntries($content,"PDF fields content"); 221 | } else if(is_string($content)){ //String 222 | $this->fdf_content = $content; //TODO: check content 223 | $this->fdf_parse_needed=true; 224 | } else 225 | $this->Error('Invalid content type for this FDF file!'); 226 | } else { 227 | $this->fdf_content = $this->getContent($this->fdf_source,'FDF'); 228 | $this->fdf_parse_needed=true; 229 | } 230 | break; 231 | default: 232 | $this->Error("Invalid file type $type"); 233 | } 234 | } 235 | 236 | /** 237 | *Set a mode and play with your power debug toys 238 | * 239 | *@access public 240 | *@note for big boys only coz it may hurt 241 | *@param string $mode a choice between 'safe','check','verbose','halt' or 'verbose_level' 242 | *@param string|int $value an integer for verbose_level 243 | **/ 244 | function set_modes($mode,$value) { 245 | //------------------------------- 246 | switch($mode) { 247 | case 'safe': 248 | $this->safe_mode=$value; 249 | break; 250 | case 'check': 251 | $this->check_mode=$value; 252 | break; 253 | case 'flatten': 254 | $this->flatten_mode=$value; 255 | break; 256 | case 'compress_mode': 257 | $this->compress_mode=$value; 258 | if($value) $this->uncompress_mode=false; 259 | break; 260 | case 'uncompress_mode': 261 | $this->uncompress_mode=$value; 262 | if($value) $this->compress_mode=false; 263 | break; 264 | case 'verbose': 265 | $this->verbose=$value; 266 | break; 267 | case 'halt': 268 | $this->halt_mode=$value; 269 | break; 270 | case 'verbose_level': 271 | $this->verbose_level=$value; 272 | break; 273 | default: 274 | $this->Error("set_modes error, Invalid mode '$mode'"); 275 | } 276 | } 277 | 278 | /** 279 | *Retrieves informations of the pdf 280 | * 281 | *@access public 282 | *@note To track PDF versions and so on... 283 | *@param Boolean output 284 | **/ 285 | function Info($asArray=false) { 286 | //---------------------- 287 | $info=$this->info; 288 | $info["Reader"]=($this->support == "native") ? 'FPDF-Merge '.FPDM_VERSION: $this->support; 289 | $info["Fields"]=$this->fields; 290 | $info["Modes"]=array( 291 | 'safe'=>($this->safe_mode)? 'Yes' :'No', 292 | 'check'=>($this->check_mode) ? 'Yes': 'No', 293 | 'flatten'=>($this->flatten_mode) ? 'Yes': 'No', 294 | 'compress_mode'=>($this->compress_mode) ? 'Yes': 'No', 295 | 'uncompress_mode'=>($this->uncompress_mode) ? 'Yes': 'No', 296 | 'verbose'=>$this->verbose, 297 | 'verbose_level'=>$this->verbose_level, 298 | 'halt'=>$this->halt_mode 299 | ); 300 | if($asArray) { 301 | return $info; 302 | } else { 303 | $this->dumpEntries($info); 304 | } 305 | } 306 | 307 | /** 308 | *Changes the support 309 | * 310 | *@access public 311 | *@internal fixes xref table offsets 312 | *@note special playskool toy for Christmas dedicated to my impatient fanclub (Grant, Kris, nejck,...) 313 | *@param String support Allow to use external support that has more advanced features (ie 'pdftk') 314 | **/ 315 | function Plays($cool) { 316 | //---------------------- 317 | if($cool=='pdftk') //Use a coolest support as .. 318 | $this->support='pdftk';//..Per DeFinition This is Kool! 319 | else 320 | $this->support='native'; 321 | } 322 | 323 | /** 324 | *Fixes a corrupted PDF file 325 | * 326 | *@access public 327 | *@internal fixes xref table offsets 328 | *@note Real work is not made here but by Merge that should be launched after to complete the work 329 | **/ 330 | function Fix() { 331 | //--------------- 332 | if(!$this->fields) $this->fields=array(); //Default: No field data 333 | $this->set_modes('check',true); //Compare xref table offsets with objects offsets in the pdf file 334 | $this->set_modes('halt',false); //Do no stop on errors so fix is applied during merge process 335 | } 336 | 337 | //######## pdftk's output configuration ####### 338 | 339 | /** 340 | *Decides to use the compress filter to restore compression. 341 | *@note This is only useful when you want to repack PDF that was previously edited in a text editor like vim or emacs. 342 | **/ 343 | function Compress() { 344 | //------------------- 345 | $this->set_modes('compress',true); 346 | $this->support="pdftk"; 347 | } 348 | 349 | /** 350 | *Decides to remove PDF page stream compression by applying the uncompress filter. 351 | *@note This is only useful when you want to edit PDF code in a text editor like vim or emacs. 352 | **/ 353 | function Uncompress() { 354 | //--------------------- 355 | $this->set_modes('uncompress',true); 356 | $this->support="pdftk"; 357 | } 358 | /** 359 | *Activates the flatten output to remove form from pdf file keeping field datas. 360 | **/ 361 | function Flatten() { 362 | //----------------- 363 | $this->set_modes('flatten',true); 364 | $this->support="pdftk"; 365 | } 366 | 367 | /*** 368 | *Defines a password type 369 | *@param String type , 'owner' or 'user' 370 | **/ 371 | function Password($type,$code) { 372 | //------------------------------ 373 | switch($type) { 374 | case 'owner': 375 | case 'user': 376 | $this->security["password"]["$type"]=$code; 377 | break; 378 | default: 379 | $this->Error("Unsupported password type ($type), specify 'owner' or 'user' instead."); 380 | } 381 | $this->support="pdftk"; 382 | } 383 | 384 | 385 | /** 386 | *Defines the encrytion to the given bits 387 | *@param integer $bits 0, 40 or 128 388 | **/ 389 | function Encrypt($bits) { 390 | //----------------------- 391 | switch($bits) { 392 | case 0: 393 | case 40: 394 | case 128: 395 | $this->security["encrypt"]=$bits; 396 | break; 397 | default: 398 | $this->Error("Unsupported encrypt value of $bits, only 0, 40 and 128 are supported"); 399 | } 400 | $this->support="pdftk"; 401 | } 402 | 403 | /** 404 | *Allow permissions 405 | * 406 | *@param Array permmissions If no arg is given, show help. 407 | * Permissions are applied to the output PDF only if an encryption 408 | * strength is specified or an owner or user password is given. If 409 | * permissions are not specified, they default to 'none,' which 410 | * means all of the following features are disabled. 411 | * 412 | * The permissions section may include one or more of the following 413 | * features: 414 | * 415 | * Printing 416 | * Top Quality Printing 417 | * 418 | * DegradedPrinting 419 | * Lower Quality Printing 420 | * 421 | * ModifyContents 422 | * Also allows Assembly 423 | * 424 | * Assembly 425 | * 426 | * CopyContents 427 | * Also allows ScreenReaders 428 | * 429 | * ScreenReaders 430 | * 431 | * ModifyAnnotations 432 | * Also allows FillIn 433 | * 434 | * FillIn 435 | * 436 | * AllFeatures 437 | * Allows the user to perform all of the above, and top 438 | * quality printing. 439 | **/ 440 | function Allow($permissions=null) { 441 | //-------------------------- 442 | $perms_help=array( 443 | 'Printing'=>'Top Quality Printing', 444 | 'DegradedPrinting'=>'Lower Quality Printing', 445 | 'ModifyContents' =>'Also allows Assembly', 446 | 'Assembly' => '', 447 | 'CopyContents' => 'Also allows ScreenReaders', 448 | 'ScreenReaders' => '', 449 | 'ModifyAnnotations'=>'Also allows FillIn', 450 | 'FillIn'=>'', 451 | 'AllFeatures'=> "All above" 452 | ); 453 | if(is_null($permissions)) { 454 | echo '
Info Allow permissions:
'; 455 | print_r($perms_help); 456 | }else { 457 | if(is_string($permissions)) $permissions=array($permissions); 458 | $perms=array_keys($perms_help); 459 | $this->security["allow"]=array_intersect($permissions, $perms); 460 | $this->support="pdftk"; 461 | } 462 | } 463 | 464 | //############################# 465 | 466 | /** 467 | *Merge FDF file with a PDF file 468 | * 469 | *@access public 470 | *@note files has been provided during the instantiation of this class 471 | *@internal flatten mode is not yet supported 472 | *@param Boolean flatten Optional, false by default, if true will use pdftk (requires a shell) to flatten the pdf form 473 | **/ 474 | function Merge($flatten=false) { 475 | //------------------------------ 476 | 477 | if($flatten) $this->Flatten(); 478 | 479 | 480 | if($this->support == "native") { 481 | 482 | if($this->fdf_parse_needed) { 483 | $fields=$this->parseFDFContent(); 484 | }else { 485 | $fields=$this->fields; 486 | } 487 | 488 | $count_fields=count($fields); 489 | 490 | if($this->verbose&&($count_fields==0)) 491 | $this->dumpContent("The FDF content has either no field data or parsing may failed","FDF parser: "); 492 | 493 | $fields_value_definition_lines=array(); 494 | 495 | $count_entries=$this->parsePDFEntries($fields_value_definition_lines); 496 | 497 | 498 | if($count_entries) { 499 | 500 | $this->value_entries=$fields_value_definition_lines; 501 | if($this->verbose) { 502 | $this->dumpContent("$count_entries Field entry values found for $count_fields field values to fill","Merge info: "); 503 | } 504 | //==== Alterate work is made here: change values ============ 505 | if($count_fields) { 506 | foreach($fields as $name => $value) { 507 | $this->set_field_value("current",$name,$value); 508 | // $value=''; //Strategy applies only to current value, clear others 509 | // $this->set_field_value("default",$name,$value); 510 | // $this->set_field_value("tooltip",$name,$value); 511 | } 512 | } 513 | //=========================================================== 514 | 515 | //===== Cross refs/size fixes (offsets calculations for objects have been previously be done in set_field_value) ======= 516 | 517 | //Update cross reference table to match object size changes 518 | $this->fix_xref_table(); 519 | 520 | //update the pointer to the cross reference table 521 | $this->fix_xref_start(); 522 | 523 | }else 524 | $this->Error("PDF file is empty!"); 525 | 526 | } //else pdftk's job is done in Output, not here. 527 | } 528 | 529 | /** 530 | *Warns verbose/output conflicts 531 | * 532 | *@access private 533 | *@param string $dest a output destination 534 | **/ 535 | function Close($dest) { 536 | //---------------- 537 | $this->Error("Output: Verbose mode should be desactivated, it is incompatible with this output mode $dest"); 538 | } 539 | 540 | /** 541 | *Get current pdf content (without any offset fixes) 542 | * 543 | *@access private 544 | *@param String pdf_file, if given , use the content as buffer (note file will be deleted after!) 545 | *@return string buffer the pdf content 546 | **/ 547 | function get_buffer($pdf_file=''){ 548 | //--------------------- 549 | if($pdf_file == '') { 550 | $buffer=implode("\n",$this->pdf_entries); 551 | }else { 552 | $buffer=$this->getContent($pdf_file,'PDF'); 553 | //@unlink($pdf_file); 554 | } 555 | return $buffer; 556 | } 557 | 558 | 559 | /** 560 | *Output PDF to some destination 561 | * 562 | *@access public 563 | *@note reproduces the fpdf's behavior 564 | *@param string dest the destination 565 | *@param string name the filename 566 | **/ 567 | function Output($dest='', $name=''){ 568 | //----------------------------------- 569 | 570 | $pdf_file=''; 571 | 572 | if($this->support == "pdftk") { 573 | //As PDFTK can only merge FDF files not data directly, 574 | require_once("lib/url.php"); //we will need a url support because relative urls for pdf inside fdf files are not supported by PDFTK... 575 | require_once("export/fdf/fdf.php"); //...conjointly with my patched/bridged forge_fdf that provides fdf file generation support from array data. 576 | require_once("export/pdf/pdftk.php");//Of course don't forget to bridge to PDFTK! 577 | 578 | $tmp_file=false; 579 | $pdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->pdf_source)); //string: full pathname to the input pdf , a form file 580 | 581 | if($this->fdf_source) { //FDF file provided 582 | $fdf_file=resolve_path(fix_path(dirname(__FILE__).'/'.$this->fdf_source)); 583 | }else { 584 | 585 | $pdf_url=getUrlfromDir($pdf_file); //Normaly http scheme not local file 586 | 587 | if($this->fdf_parse_needed) { //fdf source was provided 588 | $pdf_data=$this->parseFDFContent(); 589 | }else { //fields data was provided as an array, we have to generate the fdf file 590 | $pdf_data=$this->fields; 591 | } 592 | 593 | $fdf_file=fix_path(FPDM_CACHE)."fields".rnunid().".fdf"; 594 | $tmp_file=true; 595 | $ret=output_fdf($pdf_url,$pdf_data,$fdf_file); 596 | if(!$ret["success"]) 597 | $this->Error("Output failed as something goes wrong (Pdf was $pdf_url)
during internal FDF generation of file $fdf_file,
Reason is given by {$ret['return']}"); 598 | } 599 | 600 | //Serializes security options (not deeply tested) 601 | $security=''; 602 | if(!is_null($this->security["password"]["owner"])) $security.=' owner_pw "'.substr($this->security["password"]["owner"],0,FPDM_PASSWORD_MAX_LEN).'"'; 603 | if(!is_null($this->security["password"]["user"])) $security.=' user_pw "'.substr($this->security["password"]["user"],0,FPDM_PASSWORD_MAX_LEN).'"'; 604 | if($this->security["encrypt"]!=0) $security.=' encrypt_'.$this->security["encrypt"].'bit'; 605 | if(count($this->security["allow"])>0) { 606 | $permissions=$this->security["allow"]; 607 | $security.=' allow '; 608 | foreach($permissions as $permission) 609 | $security.=' '.$permission; 610 | } 611 | 612 | //Serialize output modes 613 | $output_modes=''; 614 | if($this->flatten_mode) $output_modes.=' flatten'; 615 | if($this->compress_mode) $output_modes.=' compress'; 616 | if($this->uncompress_mode) $output_modes.=' uncompress'; 617 | 618 | 619 | $ret=pdftk($pdf_file,$fdf_file,array("security"=>$security,"output_modes"=>$output_modes)); 620 | 621 | if($tmp_file) @unlink($fdf_file); //Clear cache 622 | 623 | if($ret["success"]) { 624 | $pdf_file=$ret["return"]; 625 | }else 626 | $this->Error($ret["return"]); 627 | } 628 | 629 | //$this->buffer=$this->get_buffer($pdf_file); 630 | 631 | 632 | $dest=strtoupper($dest); 633 | if($dest=='') 634 | { 635 | if($name=='') 636 | { 637 | $name='doc.pdf'; 638 | $dest='I'; 639 | } 640 | else 641 | $dest='F'; 642 | } 643 | 644 | //Abort to avoid to polluate output 645 | if($this->verbose&&(($dest=='I')||($dest=='D'))) { 646 | $this->Close($dest); 647 | } 648 | 649 | switch($dest) 650 | { 651 | case 'I': 652 | //Send to standard output 653 | if(ob_get_length()) 654 | $this->Error('Some data has already been output, can\'t send PDF file'); 655 | if(php_sapi_name()!='cli') 656 | { 657 | //We send to a browser 658 | header('Content-Type: application/pdf'); 659 | if(headers_sent()) 660 | $this->Error('Some data has already been output, can\'t send PDF file'); 661 | header('Content-Length: '.strlen($this->get_buffer())); 662 | header('Content-Disposition: inline; filename="'.$name.'"'); 663 | header('Cache-Control: private, max-age=0, must-revalidate'); 664 | header('Pragma: public'); 665 | ini_set('zlib.output_compression','0'); 666 | } 667 | echo $this->get_buffer(); 668 | break; 669 | case 'D': 670 | //Download file 671 | if(ob_get_length()) 672 | $this->Error('Some data has already been output, can\'t send PDF file'); 673 | header('Content-Type: application/x-download'); 674 | if(headers_sent()) 675 | $this->Error('Some data has already been output, can\'t send PDF file'); 676 | header('Content-Length: '.strlen($this->get_buffer())); 677 | header('Content-Disposition: attachment; filename="'.$name.'"'); 678 | 679 | header("Expires: Mon, 26 Jul 1997 05:00:00 GMT"); // Date in the past 680 | header("Last-Modified: " . gmdate("D, d M Y H:i:s") . " GMT"); // always modified 681 | header("Cache-Control: no-store, no-cache, must-revalidate, max-age=0"); // HTTP/1.1 682 | header("Cache-Control: post-check=0, pre-check=0", false); 683 | //header("Pragma: "); // HTTP/1.0 684 | 685 | header('Cache-Control: private, max-age=0, must-revalidate'); 686 | header('Pragma: public,no-cache'); 687 | ini_set('zlib.output_compression','0'); 688 | echo $this->get_buffer(); 689 | break; 690 | case 'F': 691 | //Save to local file 692 | if($this->verbose) $this->dumpContent("Write file $name","Output"); 693 | $f=fopen($name,'wb'); 694 | if(!$f) 695 | $this->Error('Unable to create output file: '.$name.' (currently opened under Acrobat Reader?)'); 696 | 697 | fwrite($f,$this->get_buffer(),strlen($this->get_buffer())); 698 | fclose($f); 699 | break; 700 | case 'S': 701 | //Return as a string 702 | return $this->get_buffer(); 703 | default: 704 | $this->Error('Incorrect output destination: '.$dest); 705 | } 706 | return ''; 707 | } 708 | 709 | 710 | /** 711 | *Decodes and returns the binary form of a field hexified value 712 | * 713 | *@note static method due to callback.. 714 | *@param string value the hexified string 715 | *@return string call the binary string 716 | **/ 717 | function pdf_decode_field_value($value) { 718 | //---------------------------------------- 719 | $call=$this->static_method_call('_hex2bin',$value); 720 | return $call; 721 | } 722 | 723 | /** 724 | *Encodes and returns the headecimal form of a field binary value 725 | * 726 | *@note static method due to callback.. 727 | *@param string value the binary string 728 | *@return string call the hexified string 729 | **/ 730 | function pdf_encode_field_value($value) { 731 | //--------------------------------------- 732 | $value=$this->static_method_call('_bin2hex',$value); 733 | return $value; 734 | } 735 | 736 | 737 | /** 738 | *Universal Php4/5 static call helper 739 | * 740 | *@param String $method a name of a method belonging to this class 741 | *@return mixed the return value of the called method 742 | **/ 743 | function static_method_call($method) { 744 | //--------------------------------------------- 745 | 746 | $params_call=func_get_args(); 747 | array_shift($params_call); 748 | //var_dump($params_call); 749 | 750 | return call_user_func_array(array($this,$method),$params_call); 751 | } 752 | 753 | /** 754 | *Changes a field value that can be in hex <> or binary form () 755 | * 756 | *@param $matches the regexp matches of the line that contains the value to change 757 | *@param String $value the new value for the field property 758 | **/ 759 | function replace_value($matches,$value) { 760 | //---------------------------------------------- 761 | 762 | array_shift($matches); 763 | 764 | if(($value!='')&&($matches[1]=="<")) //Value must be hexified.. 765 | $value=$this->pdf_encode_field_value($value); 766 | 767 | $matches[2]=$value; 768 | $value_type_code=$matches[0]; //Should be V, DV or TU 769 | $matches[0]="/".$value_type_code." "; 770 | 771 | $value=implode("",$matches); 772 | //echo(htmlentities($value)); 773 | return $value; 774 | } 775 | 776 | /** 777 | *Core to change the value of a field property, inline. 778 | * 779 | *@access private 780 | *@param int $line the lien where the field property value is defined in the pdf file 781 | *@param string $value the new value to set 782 | *@return int $shift the size change of the field property value 783 | **/ 784 | function _set_field_value($line,$value) { 785 | //---------------------------------------- 786 | 787 | $verbose_set=($this->verbose&&($this->verbose_level>1)); 788 | //get the line content 789 | $CurLine =$this->pdf_entries[$line]; 790 | 791 | $OldLen=strlen($CurLine); 792 | 793 | //My PHP4/5 static call hack, only to make the callback $this->replace_value($matches,"$value") possible! 794 | $callback_code='$THIS=new FPDM("[_STATIC_]");return $THIS->replace_value($matches,"'.$value.'");'; 795 | 796 | $field_regexp='/^\/(\w+)\s?(\<|\()([^\)\>]*)(\)|\>)/'; 797 | 798 | if(preg_match($field_regexp,$CurLine)) { 799 | //modify it according to the new value $value 800 | $CurLine = preg_replace_callback( 801 | $field_regexp, 802 | create_function('$matches',$callback_code), 803 | $CurLine 804 | ); 805 | }else { 806 | if($verbose_set) echo("
WARNING:".htmlentities("Can not access to the value: $CurLine using regexp $field_regexp")); 807 | } 808 | 809 | 810 | $NewLen=strlen($CurLine); 811 | $Shift=$NewLen-$OldLen; 812 | $this->shift=$this->shift+$Shift; 813 | 814 | //Saves 815 | $this->pdf_entries[$line]=$CurLine; 816 | 817 | return $Shift; 818 | } 819 | 820 | function _encode_value($str) { 821 | if($this->isUTF8) 822 | $str="\xFE\xFF".iconv('UTF-8','UTF-16BE',$str); 823 | return $this->_bin2hex($str); 824 | } 825 | 826 | function _set_field_value2($line,$value,$append) { 827 | $CurLine=$this->pdf_entries[$line]; 828 | $OldLen=strlen($CurLine); 829 | 830 | if($append) 831 | { 832 | $CurLine .= ' /V <'.$this->_encode_value($value).'>'; 833 | } 834 | else 835 | { 836 | if(preg_match('#/V\s?[<(]([^>)]*)[>)]#', $CurLine, $a, PREG_OFFSET_CAPTURE)) 837 | { 838 | $len=strlen($a[1][0]); 839 | $pos1=$a[1][1]; 840 | $pos2=$pos1+$len; 841 | $CurLine=substr($CurLine,0,$pos1-1).'<'.$this->_encode_value($value).'>'.substr($CurLine,$pos2+1); 842 | } 843 | else 844 | $this->Error('/V not found'); 845 | } 846 | 847 | $NewLen=strlen($CurLine); 848 | $Shift=$NewLen-$OldLen; 849 | $this->shift=$this->shift+$Shift; 850 | $this->pdf_entries[$line]=$CurLine; 851 | return $Shift; 852 | } 853 | 854 | 855 | /** 856 | *Changes the value of a field property, inline. 857 | * 858 | *@param string $type supported values for type are 'default' , 'current' or 'tooltip' 859 | *@param string $name name of the field annotation to change the value 860 | *@param string $value the new value to set 861 | **/ 862 | function set_field_value($type,$name,$value) { 863 | //------------------------------------ 864 | $verbose_set=($this->verbose&&($this->verbose_level>1)); 865 | 866 | //Get the line(s) of the misc field values 867 | if(isset($this->value_entries["$name"])) { 868 | 869 | $object_id=$this->value_entries["$name"]["infos"]["object"]; 870 | 871 | if($type=="tooltip") { 872 | 873 | $offset_shift=$this->set_field_tooltip($name,$value); 874 | 875 | } elseif ($this->useCheckboxParser && isset($this->value_entries["$name"]['infos']['checkbox_state'])) { //FIX: set checkbox value 876 | $offset_shift=$this->set_field_checkbox($name, $value); 877 | //ENDFIX 878 | } else {//if(isset($this->value_entries["$name"]["values"]["$type"])) { 879 | // echo $this->value_entries["$name"]["values"]["$type"]; 880 | /* $field_value_line=$this->value_entries["$name"]["values"]["$type"]; 881 | $field_value_maxlen=$this->value_entries["$name"]["constraints"]["maxlen"]; 882 | 883 | if($field_value_maxlen) //Truncates the size if needed 884 | $value=substr($value, 0, $field_value_maxlen); 885 | 886 | if($verbose_set) echo "
Change $type value of the field $name at line $field_value_line to '$value'"; 887 | $offset_shift=$this->_set_field_value($field_value_line,$value);*/ 888 | if(isset($this->value_entries[$name]["values"]["current"])) 889 | $offset_shift=$this->_set_field_value2($this->value_entries[$name]["values"]["current"],$value,false); 890 | else 891 | $offset_shift=$this->_set_field_value2($this->value_entries[$name]["infos"]["name_line"],$value,true); 892 | } 893 | // }else 894 | // $this->Error("set_field_value failed as invalid valuetype $type for object $object_id"); 895 | 896 | 897 | //offset size shift will affect the next objects offsets taking into accound the order they appear in the file-- 898 | $this->apply_offset_shift_from_object($object_id,$offset_shift); 899 | 900 | } else 901 | $this->Error("field $name not found"); 902 | 903 | } 904 | 905 | 906 | /** 907 | *Changes the tooltip value of a field property, inline. 908 | * 909 | *@param string $name name of the field annotation to change the value 910 | *@param string $value the new value to set 911 | *@return int offset_shift the size variation 912 | **/ 913 | function set_field_tooltip($name,$value) { 914 | //------------------------------------ 915 | $offset_shift=0; 916 | $verbose_set=($this->verbose&&($this->verbose_level>1)); 917 | 918 | //Get the line(s) of the misc field values 919 | if(isset($this->value_entries["$name"])) { 920 | $field_tooltip_line=$this->value_entries["$name"]["infos"]["tooltip"]; 921 | if($field_tooltip_line) { 922 | if($verbose_set) echo "
Change tooltip of the field $name at line $field_tooltip_line to value [$value]"; 923 | $offset_shift=$this->_set_field_value($field_tooltip_line,$value); 924 | }else { 925 | if($verbose_set) echo "
Change toolpip value aborted, the field $name has no tooltip definition."; 926 | } 927 | } else 928 | $this->Error("set_field_tooltip failed as the field $name does not exist"); 929 | return $offset_shift; 930 | } 931 | 932 | //FIX: parse checkbox definition 933 | /** 934 | *Changes the checkbox state. 935 | * 936 | *@param string $name name of the field to change the state 937 | *@param string $value the new state to set 938 | *@return int offset_shift the size variation 939 | **/ 940 | public function set_field_checkbox($name, $value) 941 | { 942 | //------------------------------------ 943 | $offset_shift=0; 944 | $verbose_set=($this->verbose&&($this->verbose_level>1)); 945 | //Get the line(s) of the misc field values 946 | if (isset($this->value_entries["$name"])) { 947 | if (isset($this->value_entries["$name"]["infos"]["checkbox_state_line"]) 948 | && isset($this->value_entries["$name"]["infos"]["checkbox_no"]) 949 | && isset($this->value_entries["$name"]["infos"]["checkbox_yes"])) { 950 | $field_checkbox_line=$this->value_entries["$name"]["infos"]["checkbox_state_line"]; 951 | if ($field_checkbox_line) { 952 | if ($verbose_set) { 953 | echo "
Change checkbox of the field $name at line $field_checkbox_line to value [$value]"; 954 | } 955 | $state = $this->value_entries["$name"]["infos"]["checkbox_no"]; 956 | if ($value) { 957 | $state = $this->value_entries["$name"]["infos"]["checkbox_yes"]; 958 | } 959 | $CurLine =$this->pdf_entries[$field_checkbox_line]; 960 | $OldLen=strlen($CurLine); 961 | $CurLine = '/AS /'.$state; 962 | $NewLen=strlen($CurLine); 963 | $Shift=$NewLen-$OldLen; 964 | $this->shift=$this->shift+$Shift; 965 | //Saves 966 | $this->pdf_entries[$field_checkbox_line]=$CurLine; 967 | return $Shift; 968 | // $offset_shift=$this->_set_field_value($field_checkbox_line, $state); 969 | } else { 970 | if ($verbose_set) { 971 | echo "
Change checkbox value aborted, parsed checkbox definition incomplete."; 972 | } 973 | } 974 | } else { 975 | if ($verbose_set) { 976 | echo "
Change checkbox value aborted, the field $name has no checkbox definition."; 977 | } 978 | } 979 | } else { 980 | $this->Error("set_field_checkbox failed as the field $name does not exist"); 981 | } 982 | return $offset_shift; 983 | } 984 | //ENDFIX 985 | 986 | /** 987 | *Dumps the line entries 988 | * 989 | *@note for debug purposes 990 | *@access private 991 | *@param array entries the content to dump 992 | *@param string tag an optional tag to highlight 993 | *@param boolean halt decides to stop or not this script 994 | **/ 995 | function dumpEntries($entries,$tag="",$halt=false) { 996 | //------------------------------------------------------------ 997 | if($tag) echo "

$tag


"; 998 | if($entries) { 999 | echo "
";
1000 | 				echo htmlentities(print_r($entries,true));
1001 | 				echo "
"; 1002 | } 1003 | if($halt) exit(); 1004 | } 1005 | 1006 | 1007 | /** 1008 | *Dumps the string content 1009 | * 1010 | *@note for debug purposes 1011 | *@access private 1012 | *@param string content the content to dump 1013 | *@param string tag an optional tag to highlight 1014 | *@param boolean halt decides to stop or not this script 1015 | **/ 1016 | function dumpContent($content,$tag="",$halt=false) { 1017 | //-------------------------------------------------- 1018 | if($tag) echo "

$tag

"; 1019 | if($content) { 1020 | echo "
";
1021 | 				echo htmlentities($content);
1022 | 				echo "
"; 1023 | } 1024 | if($halt) exit(); 1025 | } 1026 | 1027 | /** 1028 | *Retrieves the content of a file as a string 1029 | * 1030 | *@access private 1031 | *@param string $filename the filename of the file 1032 | *@param string $filetype the type of file as info 1033 | *@return string $content 1034 | **/ 1035 | function getContent($filename,$filetype) { 1036 | //---------------------------------------- 1037 | //$content = file_get_contents($filename); 1038 | $handle=fopen($filename,'rb'); 1039 | $content = fread($handle, filesize($filename)); 1040 | fclose($handle); 1041 | 1042 | if (!$content) 1043 | $this->Error(sprintf('Cannot open '.$filetype.' file %s !', $filename)); 1044 | 1045 | if($filetype=='PDF') 1046 | { 1047 | $start = substr($content, 0, 2048); 1048 | if(strpos($start, '/ObjStm')!==false) 1049 | $this->Error('Object streams are not supported'); 1050 | if(strpos($start, '/Linearized')!==false) 1051 | $this->Error('Fast Web View mode is not supported'); 1052 | $end = substr($content, -512); 1053 | if(strpos($end, '/Prev')!==false) 1054 | $this->Error('Incremental updates are not supported'); 1055 | $this->needAppearancesTrue = (strpos($content, '/NeedAppearances true')!==false); 1056 | } 1057 | 1058 | /* if($this->verbose) { 1059 | $this->dumpContent($content,"$filetype file content read"); 1060 | }*/ 1061 | return $content; 1062 | } 1063 | 1064 | /** 1065 | *Retrieves the content of a file as an array of lines entries 1066 | * 1067 | *@access private 1068 | *@param string $filename the filename of the file 1069 | *@param string $filetype the type of file as info 1070 | *@return array $entries 1071 | **/ 1072 | function getEntries($filename,$filetype) { 1073 | //---------------------------------------- 1074 | $content=$this->getContent($filename,$filetype); 1075 | $entries=explode("\n",$content); 1076 | 1077 | /* if($this->verbose) { 1078 | $this->dumpEntries($entries,"$filetype file entries"); 1079 | }*/ 1080 | return $entries; 1081 | } 1082 | 1083 | 1084 | /** 1085 | *Retrieves a binary string from its hexadecimal representation 1086 | * 1087 | *@access private 1088 | *@note Function was written because PHP has a bin2hex, but not a hex2bin! 1089 | *@internal note pack(“C”,hexdec(substr($data,$i,2))) DOES NOT WORK 1090 | *@param string $hexString the hexified string 1091 | *@return string $bin a binary string 1092 | **/ 1093 | function _hex2bin ($hexString) 1094 | { 1095 | //echo "
_hex2bin($hexString)"; 1096 | $BinStr = ''; 1097 | 1098 | $hexLength=strlen($hexString); 1099 | // only hex numbers is allowed 1100 | if ($hexLength % 2 != 0 || preg_match("/[^\da-fA-F]/",$hexString)) return FALSE; 1101 | 1102 | 1103 | //Loop through the input and convert it 1104 | for ($i = 0; $i < $hexLength; $i += 2) 1105 | $BinStr .= '%'.substr ($hexString, $i, 2); 1106 | 1107 | 1108 | // Raw url-decode and return the result 1109 | return rawurldecode ($BinStr);//chr(hexdec()) 1110 | } 1111 | 1112 | 1113 | /** 1114 | *Encodes a binary string to its hexadecimal representation 1115 | * 1116 | *@access private 1117 | *@internal dechex(ord($str{$i})); is buggy because for hex value of 0-15 heading 0 is missing! Using sprintf() to get it right. 1118 | *@param string $str a binary string 1119 | *@return string $hex the hexified string 1120 | **/ 1121 | function _bin2hex($str) { 1122 | //---------------------- 1123 | $hex = ""; 1124 | $i = 0; 1125 | do { 1126 | $hex .= sprintf("%02X", ord($str[$i])); 1127 | $i++; 1128 | } while ($i < strlen($str)); 1129 | return $hex; 1130 | } 1131 | 1132 | 1133 | /** 1134 | * Extracts the map object for the xref table 1135 | * @note PDF lines should have been previouly been parsed to make this work 1136 | * @return array a map that holds the xrefstart infos and values 1137 | */ 1138 | function get_xref_table() { 1139 | //------------------------ 1140 | return $this->value_entries['$_XREF_$']; 1141 | } 1142 | 1143 | /** 1144 | * Extracts the offset of the xref table 1145 | * @note PDF lines should have been previouly been parsed to make this work 1146 | * @return int the xrefstart value 1147 | */ 1148 | function get_xref_start() { 1149 | //------------------------ 1150 | return $this->value_entries['$_XREF_$']["infos"]["start"]["pointer"]; 1151 | } 1152 | 1153 | 1154 | /** 1155 | * Extracts the line where the offset of the xref table is stored 1156 | * @note PDF lines should have been previouly been parsed to make this work 1157 | * @return int the wished line number 1158 | */ 1159 | function get_xref_start_line() { 1160 | //------------------------------- 1161 | return $this->value_entries['$_XREF_$']["infos"]["start"]["line"]; 1162 | } 1163 | 1164 | /** 1165 | * Calculates the offset of the xref table 1166 | * 1167 | * @return int the wished xrefstart offset value 1168 | */ 1169 | function get_xref_start_value() { 1170 | //------------------------------- 1171 | $size_shift=$this->shift; 1172 | $xref_start=$this->get_xref_start(); 1173 | return $xref_start+$size_shift; 1174 | } 1175 | 1176 | 1177 | /** 1178 | * Read the offset of the xref table directly from file content 1179 | * 1180 | * @note content has been previously been defined in $this->get_buffer() 1181 | * @param int $object_id an object id, a integer value starting from 1 1182 | * @return int the wished xrefstart offset value 1183 | */ 1184 | function read_xref_start_value() { 1185 | //------------------------------ 1186 | $buffer=$this->get_buffer(); 1187 | $chunks = preg_split('/\bxref\b/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE); 1188 | return intval($chunks[1][1])-4; //-4 , relative to end of xref 1189 | } 1190 | 1191 | 1192 | /** 1193 | * Calculates the new offset/xref for this object id by applying the offset_shift due to value changes 1194 | * 1195 | * @note uses internally precalculated $offsets,$positions and $shifts 1196 | * @param int $object_id an object id, a integer value starting from 1 1197 | * @return int the wished offset 1198 | */ 1199 | function get_offset_object_value($object_id) { 1200 | //-------------------------------------------- 1201 | 1202 | //Static is to keep forever... 1203 | static $offsets=null; 1204 | static $positions=null; 1205 | static $shifts=null; 1206 | 1207 | //if(is_null($offsets)) { //...variables content set once. This is the beauty of php :) 1208 | 1209 | //!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !) 1210 | $positions=$this->_get_positions_ordered(); 1211 | //Makes it 0 indexed as object id starts from 1 and positions starts from 0 1212 | $offsets=$this->_get_offsets_starting_from_zero(); 1213 | //Shifts are already 0 indexed, don't change. 1214 | $shifts=$this->shifts; 1215 | //} 1216 | 1217 | $p=$positions[$object_id]; 1218 | $offset=$offsets[$p]; 1219 | $shift=$shifts[$p]; //size shift of the object due to value changes 1220 | return $offset+$shift; 1221 | } 1222 | 1223 | 1224 | /** 1225 | * Reads the offset of the xref table directly from file content 1226 | * 1227 | * @note content has been previously been defined in $this->get_buffer() 1228 | * @param int $object_id an object id, a integer value starting from 1 1229 | * @return int the wished offset 1230 | */ 1231 | function read_offset_object_value($object_id) { 1232 | //------------------------------ 1233 | $buffer=$this->get_buffer(); 1234 | $previous_object_footer='';//'endobj' or comment; 1235 | $object_header=$previous_object_footer.'\n'.$object_id.' 0 obj'; 1236 | $chars = preg_split('/'.$object_header.'/', $buffer, -1, PREG_SPLIT_OFFSET_CAPTURE); 1237 | $offset=intval($chars[1][1])-strlen($object_header)+strlen($previous_object_footer)+2; 1238 | return $offset; 1239 | } 1240 | 1241 | 1242 | /** 1243 | * Fix the offset of the xref table 1244 | * 1245 | */ 1246 | function fix_xref_start() { 1247 | //------------------------- 1248 | 1249 | $pdf_entries=&$this->pdf_entries; 1250 | $verbose_fix=($this->verbose&&($this->verbose_level>1)); 1251 | $calculate_xrefstart_value=((!$this->safe_mode)||$this->check_mode); 1252 | $extract_xrefstart_value_from_file=($this->safe_mode||$this->check_mode); 1253 | 1254 | if($calculate_xrefstart_value) { 1255 | $xref_start_value_calculated=$this->get_xref_start_value(); //get computed value from old one 1256 | if(!$this->safe_mode) $xref_start_value=$xref_start_value_calculated; 1257 | } 1258 | 1259 | if($extract_xrefstart_value_from_file) { 1260 | $xref_start_value_safe=$this->read_xref_start_value();//read direct from new file content 1261 | if($this->safe_mode) $xref_start_value=$xref_start_value_safe; 1262 | } 1263 | 1264 | if($this->check_mode) { //Compared calculated value with position value read direct from file 1265 | if($xref_start_value_calculated != $xref_start_value_safe) { 1266 | if($verbose_fix) echo "
xrefstart's value must be $xref_start_value_safe calculated is $xref_start_value_calculated.Don't worry, FPDFM-merge will fix it for you.
"; 1267 | $xref_start_value=$xref_start_value_safe; //Overrides with the good value 1268 | if($this->halt_mode) 1269 | $this->Error("Halt on error mode enabled, aborting. Use \$pdf->set_modes('halt',false); to disable this mode and go further fixing corrupted pdf."); 1270 | } else { 1271 | if($verbose_fix) echo "
xrefstart's value for the file is correct and vaults $xref_start_value"; 1272 | } 1273 | } 1274 | 1275 | //updates xrefstart's value 1276 | $xref_start_line=$this->get_xref_start_line(); 1277 | $pdf_entries[$xref_start_line]="$xref_start_value"; 1278 | } 1279 | 1280 | /** 1281 | * Get the offsets table 0 indexed 1282 | * 1283 | * @return array $offsets 1284 | */ 1285 | function _get_offsets_starting_from_zero() { 1286 | //------------------------------------------- 1287 | $offsets=$this->offsets; 1288 | return array_values($offsets); 1289 | } 1290 | 1291 | /** 1292 | * Sorts the position array by key 1293 | * 1294 | * @return array $positions the ordered positions 1295 | */ 1296 | function _get_positions_ordered() { 1297 | //-------------------------------- 1298 | $positions=$this->positions; 1299 | ksort($positions); 1300 | return $positions; 1301 | } 1302 | 1303 | /** 1304 | * Fix the xref table by rebuilding its offsets entries 1305 | * 1306 | */ 1307 | function fix_xref_table() { 1308 | //------------------------ 1309 | 1310 | $xref_table=$this->get_xref_table(); 1311 | $xLen=$xref_table["infos"]["count"]; 1312 | $pdf_entries=&$this->pdf_entries; 1313 | 1314 | //Do some checks 1315 | $offsets=$this->offsets; 1316 | //$offsets=array_values($offsets); 1317 | $oLen=count($offsets); 1318 | 1319 | 1320 | if($xLen == $oLen) { //...to rectify xref entries 1321 | 1322 | //jump over len and header, this is the first entry with n 1323 | $first_xref_entry_line=$xref_table["infos"]["line"]+3; 1324 | 1325 | //echo "xREF:{$pdf_entries[$first_xref_entry_line]}"; 1326 | 1327 | //!NOTE: xref table is ordered by object id (position's object is not defined linearly in the pdf !) 1328 | //$positions=$this->positions; 1329 | //ksort($positions); 1330 | $verbose_fix=($this->verbose&&($this->verbose>1)); 1331 | $calculate_offset_value=((!$this->safe_mode)||$this->check_mode); 1332 | $extract_offset_value_from_file=($this->safe_mode||$this->check_mode); 1333 | 1334 | //Get new file content (ie with values changed) 1335 | //$this->get_buffer()=$this->get_buffer(); 1336 | 1337 | for($i=0;$i<$xLen;$i++) { 1338 | 1339 | $obj_id=$i+1; 1340 | 1341 | //Try two way to retrieve xref offset value of an object of the given id 1342 | 1343 | if($calculate_offset_value) { 1344 | $offset_value_calculated=$this->get_offset_object_value($obj_id);; 1345 | if(!$this->safe_mode) $offset_value=$offset_value_calculated; 1346 | } 1347 | 1348 | if($extract_offset_value_from_file) { 1349 | $offset_value_read=$this->read_offset_object_value($obj_id); 1350 | if($this->safe_mode) $offset_value=$offset_value_read; 1351 | } 1352 | 1353 | if($this->check_mode) { 1354 | if($offset_value_calculated != $offset_value_read) { 1355 | if($verbose_fix) echo "
Offset for object $obj_id read is $offset_value_read, calculated $offset_value_calculated"; 1356 | $offset_value=$offset_value_read; //overrides to fix bad values 1357 | if($this->halt_mode) $this->Error("
Offset for object $obj_id read is $offset_value_read, calculated $offset_value_calculated"); 1358 | }else { 1359 | if($verbose_fix) echo "
Offset for object $obj_id is correct and vaults $offset_value"; 1360 | } 1361 | } 1362 | $pdf_entries[$first_xref_entry_line+$i]=sprintf('%010d 00000 n ',$offset_value); 1363 | } 1364 | 1365 | }else { 1366 | //Congratulations you won the corrupted Error Prize 1367 | $this->Error("Number of objects ($oLen) differs with number of xrefs ($xLen), something , pdf xref table is corrupted :("); 1368 | } 1369 | 1370 | 1371 | } 1372 | 1373 | 1374 | /** 1375 | * Applies a shift offset $shift from the object whose id is given as param 1376 | * 1377 | * @note offset shift will affect the next objects taking into accound the order they appear in the file 1378 | * @access public 1379 | * @param int object_id the id whose size shift has changed 1380 | * @param int offset_shift the shift value to use 1381 | */ 1382 | function apply_offset_shift_from_object($object_id,$offset_shift) { 1383 | //--------------------------------------------------------- 1384 | //get the position of object 1385 | $object_pos=$this->positions[$object_id]; 1386 | //get the next object position 1387 | $next_object_pos=$object_pos+1; 1388 | //Applies offset change to next following objects 1389 | $this->_apply_offset_shift($next_object_pos,$offset_shift); 1390 | } 1391 | 1392 | /** 1393 | * Applies a shift offset $shift starting at the index $from to the shifts array 1394 | * 1395 | * @access private 1396 | * @param int from the index to start apply the shift 1397 | * @param int shift the shift value to use 1398 | */ 1399 | function _apply_offset_shift($from,$shift) { 1400 | //------------------------------------------ 1401 | $offsets=&$this->shifts; 1402 | $params=array($from,$shift); 1403 | 1404 | foreach($offsets as $key=>$value) { 1405 | if($key>=$from) { 1406 | $offset=$offsets[$key]+$shift; 1407 | $offsets[$key]=$offset; 1408 | } 1409 | } 1410 | 1411 | } 1412 | 1413 | /** 1414 | * Decodes a PDF value according to the encoding 1415 | * 1416 | * @access public 1417 | * @param string $encoding the encoding to use for decoding the value, only 'hex' is supported 1418 | * @param string value a value to decode 1419 | * @return string the value decoded 1420 | */ 1421 | function decodeValue($encoding,$value) { 1422 | //---------------------------------------------- 1423 | //echo "Decoding $encoding value($value)"; 1424 | if($encoding=="hex") 1425 | $value=$this->pdf_decode_field_value($value); 1426 | return $value; 1427 | } 1428 | 1429 | /** 1430 | *Retrieve the list of supported filters 1431 | * 1432 | *@note Uses $FPDM_FILTERS array built dynamically 1433 | *@param String $sep a separator to merge filter names, default is '|' 1434 | *@return String the suported filters 1435 | **/ 1436 | function getFilters($sep="|") { 1437 | //--------------------- 1438 | global $FPDM_FILTERS; 1439 | return implode($sep,$FPDM_FILTERS); 1440 | } 1441 | 1442 | 1443 | /** 1444 | *Get a filter by name 1445 | * 1446 | *@param name a string matching one of the supported default filters (marked with +) * 1447 | *Without parameters: 1448 | *+ ASCIIHexDecode : Decodes data encoded in an ASCII hexadecimal representation, reproducing the original binary data. 1449 | *+ ASCII85Decode : Decodes data encoded in an ASCII base-85 representation, reproducing the original binary data. 1450 | * RunLengthDecode : Decompresses data encoded using a byte-oriented run-length encoding algorithm, reproducing the original text or binary data (typically monochrome image data, or any data that contains frequent long runs of a single byte value). 1451 | * JPXDecode : (PDF 1.5) Decompresses data encoded using the wavelet-based JPEG2000 standard, reproducing the original image data. 1452 | *With parameter(s): 1453 | *+ LZWDecode : Decompresses data encoded using the LZW (Lempel-Ziv-Welch) adaptive compression method, reproducing the original text or binary data. 1454 | *+ FlateDecode (PDF 1.2): Decompresses data encoded using the zlib/deflate compression method, reproducing the original text or binary data. 1455 | * CCITTFaxDecode : Decompresses data encoded using the CCITT facsimile standard, reproducing the original data (typically monochrome image data at 1 bit per pixel). 1456 | * JBIG2Decode (PDF 1.4) :Decompresses data encoded using the JBIG2 standard, reproducing the original monochrome (1 bit per pixel) image data (or an approximation of that data). 1457 | * DCTDecode : Decompresses data encoded using a DCT (discrete cosine transform) technique based on the JPEG standard, reproducing image sample data that approximates the original data. 1458 | * Crypt (PDF 1.5) :Decrypts data encrypted by a security handler, reproducing the data as it was before encryption. 1459 | *@return the wished filter class to access the stream 1460 | **/ 1461 | function getFilter($name) { 1462 | //--------------------- 1463 | 1464 | switch($name) { 1465 | case "LZWDecode": 1466 | $filter=new FilterLZW(); 1467 | break; 1468 | case "ASCIIHexDecode": 1469 | $filter=new FilterASCIIHex(); 1470 | break; 1471 | case "ASCII85Decode": 1472 | $filter=new FilterASCII85(); 1473 | break; 1474 | case "FlateDecode": 1475 | $filter=new FilterFlate(); 1476 | break; 1477 | case "Standard": //Raw 1478 | $filter=new FilterStandard(); 1479 | break; 1480 | default: 1481 | $this->Error("getFilter cannot open stream of object because filter '{$name}' is not supported, sorry."); 1482 | } 1483 | 1484 | 1485 | return $filter; 1486 | } 1487 | 1488 | 1489 | //========= Stream manipulation stuff (alpha, not used by now!) ================ 1490 | 1491 | /** 1492 | * Detect if the stream has a textual content 1493 | * 1494 | * @access public 1495 | * @param string $stream the string content of the stream 1496 | * @return boolean 1497 | */ 1498 | function is_text_stream($stream_content) { 1499 | //-------------------------------------- 1500 | return preg_match("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream_content); 1501 | } 1502 | 1503 | /** 1504 | * changes the text value of a text stream 1505 | * 1506 | * @access public 1507 | * @param array $stream the stream defintion retrieved during PDF parsing 1508 | * @param string $value the new text value 1509 | */ 1510 | function change_stream_value($stream,$value) { 1511 | //-------------------------------------------- 1512 | 1513 | $entries=&$this->pdf_entries; 1514 | 1515 | $verbose_parsing=($this->verbose&&($this->verbose_level>3)); 1516 | 1517 | if($is_text_stream) { 1518 | 1519 | $OldLen=$stream["length"]["value"]; 1520 | $lMin=$stream["start"]; 1521 | $lMax=$stream["end"]; 1522 | 1523 | $stream_content=$this->_set_text_value($stream_content,$value); 1524 | $NewLen=strlen($stream_content); 1525 | 1526 | for($l=$lMin;$l<=$lMax;$l++) { 1527 | 1528 | if($l==$lMin) { 1529 | $entries[$lMin]=$stream_content; 1530 | 1531 | //Update the length 1532 | $stream_def_line=$stream["length"]["line"]; 1533 | $stream_def=$entries[$stream_def_line]; 1534 | 1535 | $stream_def=preg_replace("/\/Length\s*(\d+)/",'/Length '.$NewLen,$stream_def); 1536 | 1537 | $entries[$stream_def_line]=$stream_def; 1538 | 1539 | //update the filter type... 1540 | $stream_def_line=$stream["filters"]["line"]; 1541 | $stream_def=$entries[$stream_def_line]; 1542 | if($verbose_parsing) { 1543 | echo "
";
1544 | 							echo htmlentities(print_r($stream_def,true));
1545 | 							echo "
"; 1546 | } 1547 | 1548 | //...to filter Standard 1549 | $stream_def=preg_replace($this->streams_filter,'/Standard ',$stream_def); 1550 | 1551 | $entries[$stream_def_line]=$stream_def; 1552 | 1553 | //Update the shift 1554 | $size_shift=$NewLen-$OldLen; 1555 | $this->apply_offset_shift_from_object($obj,$size_shift); 1556 | 1557 | }else if($lmin!=$lMax) { 1558 | unset($entries[$l]); 1559 | } 1560 | } 1561 | 1562 | if($verbose_parsing) { 1563 | var_dump($stream_content); 1564 | } 1565 | } 1566 | } 1567 | 1568 | /** 1569 | * Overrides value between Td and TJ, ommiting <> 1570 | * 1571 | * @note core method 1572 | * @access private 1573 | * @param array $stream the stream defintion retrieved during PDF parsing 1574 | * @param string $value the new text value 1575 | */ 1576 | function _set_text_value($stream,$value) { 1577 | //--------------------------------------- 1578 | $chunks=preg_split("/(\s*Td\s+[\<\(])([^\>\)]+)([\>\)]\s+Tj)/",$stream,0,PREG_SPLIT_DELIM_CAPTURE); 1579 | $chunks[2]=$value; 1580 | $stream=implode($chunks,''); 1581 | return $stream; 1582 | } 1583 | 1584 | 1585 | //================================ 1586 | 1587 | function _extract_pdf_definition_value($name,$line,&$match) { 1588 | //----------------------------------------------------------- 1589 | global $FPDM_REGEXPS; 1590 | $value=preg_match($FPDM_REGEXPS["$name"],$line,$match); 1591 | if(!$value) { //value is concatained with name: /name/value 1592 | $value=preg_match("/".preg_quote($name,'/')."\/(\w+)/",$line,$match); 1593 | } 1594 | return $value; 1595 | } 1596 | 1597 | function extract_pdf_definition_value($name,$line,&$match) { 1598 | //----------------------------------------------------------- 1599 | global $FPDM_REGEXPS; 1600 | if(array_key_exists($name,$FPDM_REGEXPS)) { 1601 | $value=$this->_extract_pdf_definition_value($name,$line,$match); 1602 | }else 1603 | $this->Error("extract_pdf_definition_value() does not support definition '$name'"); 1604 | 1605 | /*if($name=="/Type") { 1606 | if(preg_match("/\//",$line,$foo)) { 1607 | var_dump($match); 1608 | die("Decoding $name value in line ".htmlentities($line)); 1609 | } 1610 | }*/ 1611 | return $value; 1612 | } 1613 | 1614 | 1615 | /** 1616 | * Parses the lines entries of a PDF 1617 | * 1618 | * @access public 1619 | * @param array $lines the FDF content as an array of lines 1620 | * @return integer the number of lines the PDF has 1621 | */ 1622 | function parsePDFEntries(&$lines){ 1623 | //-------------------------------- 1624 | 1625 | $entries=&$this->pdf_entries; 1626 | 1627 | $CountLines = count($entries); 1628 | 1629 | $Counter=0; 1630 | $obj=0; //this is an invalid object id, we use it to know if we are into an object 1631 | //FIX: parse checkbox definition 1632 | $ap_d_yes=''; 1633 | $ap_d_no=''; 1634 | $ap_line=0; 1635 | $ap_d_line=0; 1636 | $as=''; 1637 | //ENDFIX 1638 | $type=''; 1639 | $subtype=''; 1640 | $name=''; 1641 | $value=''; 1642 | $default_maxLen=0; //No limit 1643 | $default_tooltip_line=0; //Tooltip is optional as it may not be defined 1644 | $xref_table=0; 1645 | $trailer_table=0; 1646 | $n=0; //Position of an object, in the order it is declared in the pdf file 1647 | $stream=array(); 1648 | $id_def=false; //true when parsing/decoding trailer ID 1649 | $id_single_line_def=false; //true when the two ID chunks are one the same line 1650 | $id_multi_line_def=false; //true or OpenOffice 3.2 1651 | $creator=''; 1652 | $producer=''; 1653 | $creationDate=''; 1654 | 1655 | $verbose_parsing=($this->verbose&&($this->verbose_level>3)); 1656 | $verbose_decoding=($this->verbose&&($this->verbose_level>4)); 1657 | 1658 | if($this->verbose) $this->dumpContent("Starting to parse $CountLines entries","PDF parse"); 1659 | 1660 | while ( $Counter < $CountLines ){ 1661 | 1662 | $CurLine = $entries[$Counter]; 1663 | 1664 | if($verbose_parsing) $this->dumpContent($CurLine,"====Parsing Line($Counter)"); 1665 | if(!$xref_table) { 1666 | 1667 | //Header of an object? 1668 | if(preg_match("/^(\d+) (\d+) obj/",$CurLine,$match)) { 1669 | $obj=intval($match[1]); 1670 | $this->offsets[$obj]=$this->pointer; 1671 | $this->positions[$obj]=$n; 1672 | $this->shifts[$n]=0; 1673 | $n++; 1674 | if($verbose_parsing) $this->dumpContent($CurLine,"====Opening object($obj) at line $Counter"); 1675 | $object=array(); 1676 | $object["values"]=array(); 1677 | $object["constraints"]=array(); 1678 | $object["constraints"]["maxlen"]=$default_maxLen; 1679 | $object["infos"]=array(); 1680 | $object["infos"]["object"]=intval($obj); 1681 | $object["infos"]["tooltip"]=$default_tooltip_line; 1682 | 1683 | } else { 1684 | 1685 | //Object has been opened 1686 | if($obj) { 1687 | 1688 | //Footer of an object? 1689 | if(preg_match("/endobj/",$CurLine,$match)) { 1690 | if($verbose_parsing) $this->dumpContent("","====Closing object($obj) at line $Counter"); 1691 | 1692 | //We process fields here, save only Annotations texts that are supported by now 1693 | if($subtype=="Widget") { 1694 | 1695 | if($name != '') { 1696 | $lines["$name"]=$object; 1697 | if($verbose_parsing) $this->dumpContent("$type $subtype (obj id=$obj) is a text annotation of name '$name', saves it."); 1698 | }//else 1699 | // $this->Error("$type $subtype (obj id=$obj) is a text annotation without a name, this cannot be."); 1700 | 1701 | 1702 | $values=$object["values"]; 1703 | 1704 | //Sanity values checks, watchdog. 1705 | // if(!array_key_exists("current",$values)) $this->Error("Cannot find value (/V) for field $name"); 1706 | // if(!array_key_exists("default",$values)) $this->Error("Cannot find default value (/DV) for field $name"); 1707 | 1708 | }else 1709 | if($verbose_parsing) $this->dumpContent("Object $type $subtype (obj id=$obj) is not supported"); 1710 | 1711 | 1712 | $object=null; 1713 | $obj=0; 1714 | //FIX: parse checkbox definition 1715 | $ap_d_yes=''; 1716 | $ap_d_no=''; 1717 | $ap_line=0; 1718 | $ap_d_line=0; 1719 | $as=''; 1720 | //ENDFIX 1721 | $type=''; 1722 | $subtype=''; 1723 | $name=''; 1724 | $value=''; 1725 | $maxLen=0; 1726 | 1727 | } else { 1728 | 1729 | if(preg_match("/\/Length\s*(\d+)/",$CurLine,$match)) { 1730 | $stream["length"]=array("line"=>$Counter,"value"=>$match[1]); 1731 | $stream["start"]=0; 1732 | $stream["end"]=0; 1733 | $stream["content"]=''; 1734 | if($verbose_parsing) $this->dumpContent($CurLine,"->Stream filter length definition({$match[1]}) for object($obj) at line $Counter"); 1735 | } 1736 | 1737 | //Handles single filter /Filter /filter_type as well as well as filter chains such as /Filter [/filter_type1 /filter_type2 .../filter_typeN] 1738 | if(preg_match_all($this->streams_filter,$CurLine,$matches)) { 1739 | 1740 | //$this->dumpContent($this->streams_filter); 1741 | /*$stream_filter=$match[1]; 1742 | $stream_filter=trim(preg_replace('/(<<|\/Length\s*\d+|>>)/', '', $stream_filter),' '); 1743 | $stream_filters=preg_split('/\s*\//',$stream_filter); 1744 | array_shift($stream_filters);*/ 1745 | $stream_filters=$matches[2]; 1746 | $stream["filters"]=array("line"=>$Counter, "type"=>$stream_filters); 1747 | if($verbose_parsing) { 1748 | //var_dump($stream_filters); 1749 | $stream_filter=implode(" ",$stream_filters); 1750 | $this->dumpContent($CurLine,"->Stream filter type definition($stream_filter) for object($obj) at line $Counter"); 1751 | } 1752 | } 1753 | 1754 | if(array_key_exists("length",$stream)) { //length is mandatory 1755 | 1756 | if(preg_match("/\b(stream|endstream)\b/",$CurLine,$match)) { 1757 | 1758 | if(!array_key_exists("filters",$stream)) {//filter type is optional, if none is given, its standard 1759 | 1760 | $stream["filters"]=array("type"=>array("Standard")); 1761 | if($verbose_parsing) { 1762 | var_dump($stream); 1763 | $this->dumpContent($CurLine,"->No stream filter type definition for object($obj) was found, setting it to 'Standard'"); 1764 | } 1765 | } 1766 | 1767 | 1768 | if($match[1] == "stream") { 1769 | if($verbose_parsing) $this->dumpContent($CurLine,"->Opening stream for object($obj) at line $Counter"); 1770 | $stream["start"]=$Counter+1; 1771 | }else { 1772 | $stream["end"]=$Counter-1; 1773 | 1774 | $stream["content"]=implode("\n",array_slice($entries,$stream["start"],$stream["end"]-$stream["start"]+1)); 1775 | 1776 | 1777 | 1778 | $filters=$stream["filters"]["type"]; 1779 | $f=count($filters); 1780 | $stream_content=$stream["content"]; 1781 | 1782 | //var_dump($filters); 1783 | 1784 | //$filters_type=$filters["type"]; 1785 | 1786 | //now process the stream, ie unpack it if needed 1787 | //by decoding in the reverse order the streams have been encoded 1788 | //This is done by applying decode using the filters in the order given by /Filter. 1789 | foreach($filters as $filter_name) { 1790 | 1791 | $stream_filter=$this->getFilter($filter_name); 1792 | $stream_content=$stream_filter->decode($stream_content); 1793 | if($verbose_decoding) { 1794 | echo "
Stream decoded using filter '$filter_name':[
";
1795 | 													var_dump($stream_content); //todo : manipulate this content and adjust offsets.
1796 | 													echo "
]
"; 1797 | } 1798 | } 1799 | 1800 | if($verbose_parsing) { 1801 | $this->dumpEntries($stream); 1802 | 1803 | echo ""; 1804 | if($this->is_text_stream($stream_content)) { 1805 | echo "Stream text unfiltered:[
";
1806 | 												} else {
1807 | 													echo "Stream unfiltered:[
";
1808 | 												}
1809 | 												var_dump($stream_content); 
1810 | 												echo "
]
"; 1811 | $this->dumpContent($CurLine,"->Closing stream for object($obj) at line $Counter"); 1812 | } 1813 | 1814 | $stream=array(); 1815 | } 1816 | }else if($stream["start"]>0){ 1817 | //stream content line that will be processed on endstream... 1818 | } 1819 | 1820 | } else { 1821 | 1822 | /* 1823 | Producer 1824 | /CreationDate (D:20101225151810+01'00')>> 1825 | */ 1826 | if(($creator=='')&&preg_match("/\/Creator\<([^\>]+)\>/",$CurLine,$values)) { 1827 | $creator=$this->decodeValue("hex",$values[1]); 1828 | if($verbose_parsing) echo("Creator read ($creator)"); 1829 | $this->info["Creator"]=$creator; 1830 | } 1831 | 1832 | if(($producer=='')&&preg_match("/\/Producer\<([^\>]+)\>/",$CurLine,$values)) { 1833 | $producer=$this->decodeValue("hex",$values[1]); 1834 | if($verbose_parsing) echo("Producer read ($producer)"); 1835 | $this->info["Producer"]=$producer; 1836 | } 1837 | 1838 | if(($creationDate=='')&&preg_match("/\/CreationDate\(([^\)]+)\)/",$CurLine,$values)) { 1839 | $creationDate=$values[1]; 1840 | if($verbose_parsing) echo("Creation date read ($creationDate)"); 1841 | $this->info["CreationDate"]=$creationDate; 1842 | } 1843 | 1844 | //=== DEFINITION ==== 1845 | //preg_match("/^\/Type\s+\/(\w+)$/",$CurLine,$match) 1846 | $match=array(); 1847 | //FIX: parse checkbox definition 1848 | if($this->useCheckboxParser && ('' == $ap_d_yes || '' == $ap_d_no || '' == $as)) { 1849 | if (!$ap_line && '/AP' == substr($CurLine, 0, 3)) { 1850 | if ($verbose_parsing) { 1851 | echo("
Found AP Line '$Counter'"); 1852 | } 1853 | $ap_line = $Counter; 1854 | } elseif (!$ap_d_line && '/D' == substr($CurLine, 0, 2)) { 1855 | if ($verbose_parsing) { 1856 | echo("
Found D Line '$Counter'"); 1857 | } 1858 | $ap_d_line = $Counter; 1859 | } elseif (($ap_line==$Counter-4)&&($ap_d_line==$Counter-2)&&($ap_d_yes=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) { 1860 | $ap_d_yes=$match[1]; 1861 | if ($verbose_parsing) { 1862 | echo("
Object's checkbox_yes is '$ap_d_yes'"); 1863 | } 1864 | $object["infos"]["checkbox_yes"]=$ap_d_yes; 1865 | } elseif (($ap_line==$Counter-5)&&($ap_d_line==$Counter-3)&&($ap_d_no=='')&&$this->extract_pdf_definition_value("name", $CurLine, $match)) { 1866 | $ap_d_no=$match[1]; 1867 | if ($verbose_parsing) { 1868 | echo("
Object's checkbox_no is '$ap_d_no'"); 1869 | } 1870 | $object["infos"]["checkbox_no"]=$ap_d_no; 1871 | } elseif (($as=='')&&$this->extract_pdf_definition_value("/AS", $CurLine, $match)) { 1872 | $as=$match[1]; 1873 | if ($verbose_parsing) { 1874 | echo("
Object's AS is '$as'"); 1875 | } 1876 | $object["infos"]["checkbox_state"]=$as; 1877 | $object["infos"]["checkbox_state_line"]=$Counter; 1878 | } 1879 | } 1880 | //ENDFIX 1881 | if(($type=='')||($subtype=='')||($name=="")) { 1882 | 1883 | if(($type=='')&&$this->extract_pdf_definition_value("/Type",$CurLine,$match)) { 1884 | 1885 | if($match[1]!='Border') { 1886 | $type=$match[1]; 1887 | if($verbose_parsing) echo("
Object's type is '$type'"); 1888 | } 1889 | 1890 | } 1891 | if(($subtype=='')&&$this->extract_pdf_definition_value("/Subtype",$CurLine,$match)) { 1892 | 1893 | $subtype=$match[1]; 1894 | if($verbose_parsing) echo("
Object's subType is '$subtype'"); 1895 | 1896 | } 1897 | if(($name=="")&&preg_match("/^\/T\s?\((.+)\)\s*$/",$this->_protectContentValues($CurLine),$match)) { 1898 | 1899 | $name=$this->_unprotectContentValues($match[1]); 1900 | //FIX: convert ASCII object names to utf-8 1901 | // don't use utf8_encode($name) yet, it's core function since php 7.2 1902 | $name = mb_convert_encoding($name, 'UTF-8', 'ASCII'); 1903 | //ENDFIX 1904 | if($verbose_parsing) echo ("Object's name is '$name'"); 1905 | 1906 | $object["infos"]["name"]=$name; //Keep a track 1907 | $object["infos"]["name_line"]=$Counter; 1908 | 1909 | //$this->dumpContent(" Name [$name]"); 1910 | } 1911 | 1912 | }// else { 1913 | 1914 | //=== CONTENT ==== 1915 | 1916 | //$this->dumpContent($CurLine); 1917 | //=== Now, start the serious work , read DV, V Values and eventually TU 1918 | //note if(preg_match_all("/^\/(V|DV)\s+(\<|\))([^\)\>]+)(\)|\>)/",$CurLine,$matches)) { 1919 | //do not work as all is encoded on the same line... 1920 | if(preg_match("/^\/(V|DV|TU)\s+([\<\(])/",$CurLine,$def)) { 1921 | 1922 | //get an human readable format of value type and encoding 1923 | 1924 | if($def[1] == "TU") { 1925 | $valuetype="info"; 1926 | $object["infos"]["tooltip"]=$Counter; 1927 | } else { 1928 | $valuetype=($def[1] == "DV") ? "default" : "current"; 1929 | $object["values"]["$valuetype"]=$Counter; //Set a marker to process lately 1930 | } 1931 | 1932 | $encoding=($def[2]=="<") ? "hex" : "plain"; 1933 | 1934 | if(preg_match("/^\/(V|DV|TU)\s+(\<|\)|\()([^\)\>]*)(\)|\>\))/",$CurLine,$values)) { 1935 | $value=$values[3]; 1936 | $value=$this->decodeValue($encoding,$value); 1937 | }else 1938 | $value=''; 1939 | 1940 | if($verbose_parsing) 1941 | $this->dumpContent("$type $subtype (obj id=$obj) has $encoding $valuetype value [$value] at line $Counter"); 1942 | 1943 | 1944 | }else if(preg_match("/^\/MaxLen\s+(\d+)/",$CurLine,$values)) { 1945 | $maxLen=$values[1]; 1946 | $object["constraints"]["maxlen"]=intval($maxLen); 1947 | } else 1948 | if($verbose_parsing) echo("WARNING: definition ignored"); 1949 | 1950 | if(substr($CurLine,0,7)=='/Fields' && !$this->needAppearancesTrue) { 1951 | $CurLine='/NeedAppearances true '.$CurLine; 1952 | $entries[$Counter]=$CurLine; 1953 | } 1954 | 1955 | //TODO: Fetch the XObject..and change Td <> Tj 1956 | /* if(preg_match("/^\/AP/",$CurLine,$values)) { 1957 | //die("stop"); 1958 | $CurLine=''; //clear link to Xobject 1959 | $entries[$Counter]=$CurLine; 1960 | }*/ 1961 | 1962 | // } 1963 | 1964 | } 1965 | 1966 | 1967 | } 1968 | 1969 | } 1970 | 1971 | //~~~~~Xref table header? ~~~~~~ 1972 | if(preg_match("/\bxref\b/",$CurLine,$match)) { 1973 | 1974 | $xref_table=1; 1975 | if($verbose_parsing) $this->dumpContent("->Starting xref table at line $Counter:[$CurLine]"); 1976 | $lines['$_XREF_$']=array(); 1977 | $lines['$_XREF_$']["entries"]=array(); 1978 | $lines['$_XREF_$']["infos"]=array(); 1979 | $lines['$_XREF_$']["infos"]["line"]=$Counter; 1980 | $lines['$_XREF_$']["infos"]["start"]=array(); 1981 | $start_pointer=$this->pointer+strpos($CurLine,"xref"); //HACK for PDFcreator 1.0.0 1982 | $lines['$_XREF_$']["infos"]["start"]["pointer"]=$start_pointer; 1983 | } 1984 | 1985 | } 1986 | $obj_header=false; 1987 | } else { 1988 | //We are inside the xref table 1989 | //$this->dumpContent($CurLine,""); 1990 | $xref_table=$xref_table+1; 1991 | switch($xref_table) { 1992 | case 2: 1993 | if(preg_match("/^(\d+) (\d+)/",$CurLine,$match)) { 1994 | $refs_count=intval($match[2]);//xref_table length+1 (includes this line) 1995 | $lines['$_XREF_$']["infos"]["count"]=$refs_count-1; 1996 | if($verbose_parsing) $this->dumpContent("Xref table length is $refs_count"); 1997 | }else 1998 | if($verbose_parsing) $this->dumpContent("WARNING: Xref table length ignored!"); 1999 | break; 2000 | case 3: 2001 | //Should be 0000000000 65535 f 2002 | if($verbose_parsing) $this->dumpContent("this is Xref table header, should be 0000000000 65535 f "); 2003 | break; 2004 | default: 2005 | //xref entries 2006 | if($refs_count>0) { 2007 | $xref=$xref_table-3; 2008 | 2009 | if($refs_count == 1) {//Last one , due to the shift, is the trailer 2010 | if(!preg_match("/^trailer/",$CurLine)) //if not, Houston we have a problem 2011 | $this->Error("xref_table length corrupted?: Trailer not found at expected!"); 2012 | else 2013 | $trailer_table=1; 2014 | }else { 2015 | $lines['$_XREF_$']["entries"][$xref]=$CurLine; 2016 | if($verbose_parsing) $this->dumpContent("Xref table entry for object $xref found."); 2017 | } 2018 | $refs_count--; 2019 | } else { //We are inside the trailer 2020 | 2021 | if($trailer_table==1) { //should be << 2022 | 2023 | if(trim($CurLine) != '') { //HACK: PDFCreator Version 1.0.0 has an extra CR after trailer 2024 | if(!preg_match("/<Error("trailer_table corrupted?; missing start delimiter << "); 2026 | $trailer_table++; 2027 | } 2028 | 2029 | 2030 | }else if(($trailer_table>0)&&((!is_null($id_def))||preg_match("/^\/(Size|Root|Info|ID|DocChecksum)/",$CurLine,$match))) { 2031 | 2032 | //Value can be extracted using (\d+|\[[^\]]+\]) 2033 | if(preg_match("/\/Size (\d+)/",$CurLine,$match)) { 2034 | //Seems to match with xref entries count.. 2035 | $size_read=$match[1]; 2036 | $this->info["size"]=$size_read; 2037 | if($verbose_parsing) $this->dumpContent("Size read ($size_read) for pdf found."); 2038 | } 2039 | 2040 | if(preg_match("/^\/ID\s*\[\s*<([\da-fA-F]+)/",$CurLine,$match)) { 2041 | $oid=$match[1]; 2042 | $id_def=true; 2043 | if($verbose_parsing) $this->dumpContent("ID chunk one ($oid) for pdf found."); 2044 | 2045 | //Determines if the ID definition is one line... 2046 | if(preg_match("/\>\s?\.*$/",$CurLine,$match)) { 2055 | $tid=$match[1]; 2056 | $this->info["ID"]=array($oid,$tid); 2057 | if($verbose_parsing) $this->dumpContent("ID chunk two ($tid) for pdf found."); 2058 | $id_def=false; 2059 | }else 2060 | $this->Error("trailer_table corrupted?; ID chunk two can not be decoded "); 2061 | } else 2062 | $id_multi_line_def=true; 2063 | } 2064 | 2065 | if(preg_match("/^\/DocChecksum \/([\da-fA-F]+)/",$CurLine,$match)) { 2066 | $checksum=$match[1]; 2067 | $this->info["checksum"]=$checksum; 2068 | if($verbose_parsing) $this->dumpContent("Checksum read ($checksum) for pdf found."); 2069 | } 2070 | 2071 | if(preg_match("/>>/",$CurLine,$match)) 2072 | $trailer_table=-1;//negative value: expects startxref to follow 2073 | 2074 | 2075 | } else { 2076 | 2077 | switch($trailer_table) { 2078 | case -1://startxref 2079 | if(!preg_match("/^startxref/",$CurLine,$match)) 2080 | $this->Error("startxref tag expected, read $CurLine"); 2081 | break; 2082 | case -2://startxref's value 2083 | if(preg_match("/^(\d+)/",$CurLine,$match)) { 2084 | $lines['$_XREF_$']["infos"]["start"]["value"]=intval($match[1]); 2085 | $lines['$_XREF_$']["infos"]["start"]["line"]=$Counter; 2086 | }else 2087 | $this->Error("startxref value expected, read $CurLine"); 2088 | break; 2089 | default://%%EOF 2090 | } 2091 | $trailer_table--; 2092 | 2093 | } 2094 | 2095 | } 2096 | } 2097 | 2098 | } 2099 | 2100 | $this->pointer=$this->pointer+strlen($CurLine)+1; //+1 due to \n 2101 | $Counter++; 2102 | } 2103 | 2104 | if($this->verbose) { 2105 | 2106 | $refs=(array_key_exists('$_XREF_$',$lines)) ? $lines['$_XREF_$']["infos"]["count"] : 0; 2107 | if($refs) { 2108 | $this->dumpContent("PDF parse retrieved $refs refs"); 2109 | }else { 2110 | $this->dumpContent("PDF parse retrieved no refs, seems the xref table is broken or inacessible, this is bad!"); 2111 | } 2112 | } 2113 | 2114 | return count($lines); 2115 | } 2116 | 2117 | /** 2118 | * Protect ( ) that may be in value or names 2119 | * 2120 | * @access protected 2121 | * @param string $content the FDF content to protect values 2122 | * @return string the content protected 2123 | */ 2124 | function _protectContentValues($content) { 2125 | //------------------------------------------------- 2126 | $content=str_replace("\\(","$@#",$content); 2127 | $content=str_replace("\\)","#@$",$content); 2128 | return $content; 2129 | } 2130 | 2131 | /** 2132 | * Unprotect ( ) that may be in value or names 2133 | * 2134 | * @access protected 2135 | * @param string $content the FDF content with protected values 2136 | * @return string the content unprotected 2137 | */ 2138 | function _unprotectContentValues($content) { 2139 | //-------------------------------------------------- 2140 | $content=str_replace("$@#","\\(",$content); 2141 | $content=str_replace("#@$","\\)",$content); 2142 | $content=stripcslashes($content); 2143 | return $content; 2144 | } 2145 | 2146 | /** 2147 | * Parses the content of a FDF file and saved extracted field data 2148 | * 2149 | *@access public 2150 | *@return array $fields the data of the fields parsed 2151 | */ 2152 | function parseFDFContent(){ 2153 | //------------------------- 2154 | 2155 | $content=$this->fdf_content; 2156 | $content=$this->_protectContentValues($content);//protect ( ) that may be in value or names... 2157 | 2158 | if($this->verbose) $this->dumpEntries($content,"FDF parse"); 2159 | 2160 | //..so that this regexp can do its job without annoyances 2161 | if(preg_match_all("/(T|V)\s*\(([^\)]+)\)\s*\/(T|V)\s*\(([^\)]+)\)/", $content,$matches, PREG_PATTERN_ORDER)) { 2162 | 2163 | $fMax=count($matches[0]); 2164 | $fields=array(); 2165 | for($f=0;$f<$fMax;$f++) { 2166 | $value=''; 2167 | $name=''; 2168 | if($matches[1][$f]=="V") { 2169 | $value=$matches[2][$f]; 2170 | if($matches[3][$f]=="T") 2171 | $name=$matches[4][$f]; 2172 | else 2173 | $this->Error("Field $f ignored , incomplete field declaration, name is expected"); 2174 | } else { 2175 | if($matches[1][$f]=="T") { 2176 | $name=$matches[2][$f]; 2177 | if($matches[3][$f]=="V") 2178 | $value=$matches[4][$f]; 2179 | else 2180 | $this->Error("Field $f ignored , incomplete field declaration, value is expected"); 2181 | } else 2182 | $this->Error("Field $f ignored , Invalid field keys ({$matches[0][$f]})"); 2183 | } 2184 | if($name!='') { 2185 | if(array_key_exists($name,$fields)) 2186 | $this->Error("Field $f ignored , already defined"); 2187 | else { 2188 | $name=$this->_unprotectContentValues($name); 2189 | $value=$this->_unprotectContentValues($value); 2190 | if($this->verbose) 2191 | $this->dumpContent("FDF field [$name] has its value set to \"$value\""); 2192 | $fields[$name]=$value; 2193 | } 2194 | } else 2195 | $this->Error("Field $f ignored , no name"); 2196 | 2197 | } 2198 | } else 2199 | if($this->verbose) $this->dumpContent($fields,"FDF has no fields",false); 2200 | 2201 | if($this->verbose) $this->dumpContent($fields,"FDF parsed",false); 2202 | 2203 | return $fields; 2204 | } 2205 | 2206 | 2207 | /** 2208 | * Close the opened file 2209 | */ 2210 | function closeFile() { 2211 | //-------------------- 2212 | if (isset($this->f) && is_resource($this->f)) { 2213 | fclose($this->f); 2214 | unset($this->f); 2215 | } 2216 | } 2217 | 2218 | /** 2219 | * Print Error and die 2220 | * 2221 | * @param string $msg Error-Message 2222 | */ 2223 | function Error($msg) { 2224 | //-------------------- 2225 | die('FPDF-Merge Error: '.$msg); 2226 | } 2227 | 2228 | 2229 | } 2230 | 2231 | } 2232 | 2233 | unset($__tmp); 2234 | -------------------------------------------------------------------------------- /src/lib/url.php: -------------------------------------------------------------------------------- 1 | 0) ? func_get_arg(0) : $_SERVER["SERVER_PORT"]; 22 | $schemes=array( 23 | 'http'=> 80,// default for http 24 | 'https'=> 443, // default for https 25 | 'ftp' => 21, // default for ftp 26 | 'ftps'=> 990 // default for ftps 27 | ); 28 | $ports=array_flip($schemes); 29 | return (array_key_exists($port,$ports)) ? $ports[$port] : 0; 30 | } 31 | 32 | function getHost() { 33 | //------------------ 34 | return $_SERVER["HTTP_HOST"];// [SERVER_NAME] 35 | } 36 | 37 | 38 | if (!function_exists('fix_path')) { 39 | // fixes windows paths... 40 | // (windows accepts forward slashes and backwards slashes, so why does PHP use backwards? 41 | function fix_path($path) { 42 | //------------------------- 43 | return str_replace('\\','/',$path); 44 | } 45 | } 46 | 47 | function getWebDir($local_dir) { 48 | //---------------------------- 49 | $local_root=$_SERVER["DOCUMENT_ROOT"]; 50 | $server_dir=str_replace($local_root,'',$local_dir); 51 | return $server_dir; 52 | } 53 | 54 | //Local dir may be: 55 | // the main script dir: dirname($_SERVER['PHP_SELF']) 56 | // the current script dir fix_path(dirname(__FILE__)) 57 | //return the full url with ending / 58 | function getUrlfromDir($local_dir) { 59 | //------------------------------- 60 | $server_dir=getWebDir($local_dir); 61 | $server_scheme=getScheme(); 62 | $server_host=getHost(); 63 | return "{$server_scheme}://{$server_host}/$server_dir"; 64 | } 65 | 66 | /** 67 | * Compiles url out of array of it's pieces 68 | * 'query' is ignored if 'query_params' is present 69 | * 70 | * @param Array $aUrl Array of url pieces 71 | */ 72 | function build_url($aUrl) { 73 | //------------------------- 74 | //[scheme]://[user]:[pass]@[host]/[path]?[query]#[fragment] 75 | 76 | if (!is_array($aUrl)) { 77 | return ""; 78 | } 79 | 80 | $sQuery = ''; 81 | 82 | // Compile query 83 | if (isset($aUrl['query_params']) && is_array($aUrl['query_params'])) { 84 | $aPairs = array(); 85 | foreach ($aUrl['query_params'] as $sKey=>$sValue) { 86 | $aPairs[] = $sKey.'='.urlencode($sValue); 87 | } 88 | $sQuery = implode('&', $aPairs); 89 | } else { 90 | if(isset($aUrl['query'])) $sQuery = $aUrl['query']; 91 | } 92 | 93 | // Compile url 94 | $sUrl = 95 | $aUrl['scheme'] . '://' . ( 96 | isset($aUrl['user']) && $aUrl['user'] != '' && isset($aUrl['pass']) 97 | ? $aUrl['user'] . ':' . $aUrl['pass'] . '@' 98 | : '' 99 | ) . 100 | $aUrl['host'] . ( 101 | isset($aUrl['path']) && $aUrl['path'] != '' 102 | ? $aUrl['path'] 103 | : '' 104 | ) . ( 105 | $sQuery != '' 106 | ? '?' . $sQuery 107 | : '' 108 | ) . ( 109 | isset($aUrl['fragment']) && $aUrl['fragment'] != '' 110 | ? '#' . $aUrl['fragment'] 111 | : '' 112 | ); 113 | return $sUrl; 114 | } 115 | 116 | function resolve_url($relative_url) { 117 | //----------------------------- 118 | $url=parse_url($relative_url); 119 | $url["path"]=resolve_path($url["path"]); //fix this 120 | $absolute_url=build_url($url); 121 | return $absolute_url; 122 | } 123 | 124 | 125 | //Get realpath without checking existence of file like php function does.. 126 | function resolve_path($path) { 127 | //---------------------------------- 128 | $out=array(); 129 | foreach(explode('/', $path) as $i=>$fold){ 130 | if ($fold=='' || $fold=='.') continue; 131 | if ($fold=='..' && $i>0 && end($out)!='..') array_pop($out); 132 | else $out[]= $fold; 133 | } return ($path{0}=='/'?'/':'').join('/', $out); 134 | } 135 | 136 | 137 | //This part is from http://fr2.php.net/manual/en/function.parse-url.php 138 | function j_parseUrl($url) { 139 | //-------------------------- 140 | $r = "(?:([a-z0-9+-._]+)://)?"; 141 | $r .= "(?:"; 142 | $r .= "(?:((?:[a-z0-9-._~!$&'()*+,;=:]|%[0-9a-f]{2})*)@)?"; 143 | $r .= "(?:\[((?:[a-z0-9:])*)\])?"; 144 | $r .= "((?:[a-z0-9-._~!$&'()*+,;=]|%[0-9a-f]{2})*)"; 145 | $r .= "(?::(\d*))?"; 146 | $r .= "(/(?:[a-z0-9-._~!$&'()*+,;=:@/]|%[0-9a-f]{2})*)?"; 147 | $r .= "|"; 148 | $r .= "(/?"; 149 | $r .= "(?:[a-z0-9-._~!$&'()*+,;=:@]|%[0-9a-f]{2})+"; 150 | $r .= "(?:[a-z0-9-._~!$&'()*+,;=:@\/]|%[0-9a-f]{2})*"; 151 | $r .= ")?"; 152 | $r .= ")"; 153 | $r .= "(?:\?((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9a-f]{2})*))?"; 154 | $r .= "(?:#((?:[a-z0-9-._~!$&'()*+,;=:\/?@]|%[0-9a-f]{2})*))?"; 155 | preg_match("`$r`i", $url, $match); 156 | $parts = array( 157 | "scheme"=>'', 158 | "userinfo"=>'', 159 | "authority"=>'', 160 | "host"=> '', 161 | "port"=>'', 162 | "path"=>'', 163 | "query"=>'', 164 | "fragment"=>''); 165 | switch (count ($match)) { 166 | case 10: $parts['fragment'] = $match[9]; 167 | case 9: $parts['query'] = $match[8]; 168 | case 8: $parts['path'] = $match[7]; 169 | case 7: $parts['path'] = $match[6] . $parts['path']; 170 | case 6: $parts['port'] = $match[5]; 171 | case 5: $parts['host'] = $match[3]?"[".$match[3]."]":$match[4]; 172 | case 4: $parts['userinfo'] = $match[2]; 173 | case 3: $parts['scheme'] = $match[1]; 174 | } 175 | $parts['authority'] = ($parts['userinfo']?$parts['userinfo']."@":""). 176 | $parts['host']. 177 | ($parts['port']?":".$parts['port']:""); 178 | return $parts; 179 | } 180 | 181 | define('URL_TOOLBOX',1); 182 | 183 | }//End of URL_TOOLBOX 184 | ?> -------------------------------------------------------------------------------- /src/template.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/codeshell/fpdm/08aabe1706ebda5e2ef14357ea7869f67bb4ecb7/src/template.pdf --------------------------------------------------------------------------------