├── LICENSE
├── README.md
└── tcpdi_parser.php


/LICENSE:
--------------------------------------------------------------------------------
  1 | GNU LESSER GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 | 
  9 |   This version of the GNU Lesser General Public License incorporates
 10 | the terms and conditions of version 3 of the GNU General Public
 11 | License, supplemented by the additional permissions listed below.
 12 | 
 13 |   0. Additional Definitions.
 14 | 
 15 |   As used herein, "this License" refers to version 3 of the GNU Lesser
 16 | General Public License, and the "GNU GPL" refers to version 3 of the GNU
 17 | General Public License.
 18 | 
 19 |   "The Library" refers to a covered work governed by this License,
 20 | other than an Application or a Combined Work as defined below.
 21 | 
 22 |   An "Application" is any work that makes use of an interface provided
 23 | by the Library, but which is not otherwise based on the Library.
 24 | Defining a subclass of a class defined by the Library is deemed a mode
 25 | of using an interface provided by the Library.
 26 | 
 27 |   A "Combined Work" is a work produced by combining or linking an
 28 | Application with the Library.  The particular version of the Library
 29 | with which the Combined Work was made is also called the "Linked
 30 | Version".
 31 | 
 32 |   The "Minimal Corresponding Source" for a Combined Work means the
 33 | Corresponding Source for the Combined Work, excluding any source code
 34 | for portions of the Combined Work that, considered in isolation, are
 35 | based on the Application, and not on the Linked Version.
 36 | 
 37 |   The "Corresponding Application Code" for a Combined Work means the
 38 | object code and/or source code for the Application, including any data
 39 | and utility programs needed for reproducing the Combined Work from the
 40 | Application, but excluding the System Libraries of the Combined Work.
 41 | 
 42 |   1. Exception to Section 3 of the GNU GPL.
 43 | 
 44 |   You may convey a covered work under sections 3 and 4 of this License
 45 | without being bound by section 3 of the GNU GPL.
 46 | 
 47 |   2. Conveying Modified Versions.
 48 | 
 49 |   If you modify a copy of the Library, and, in your modifications, a
 50 | facility refers to a function or data to be supplied by an Application
 51 | that uses the facility (other than as an argument passed when the
 52 | facility is invoked), then you may convey a copy of the modified
 53 | version:
 54 | 
 55 |    a) under this License, provided that you make a good faith effort to
 56 |    ensure that, in the event an Application does not supply the
 57 |    function or data, the facility still operates, and performs
 58 |    whatever part of its purpose remains meaningful, or
 59 | 
 60 |    b) under the GNU GPL, with none of the additional permissions of
 61 |    this License applicable to that copy.
 62 | 
 63 |   3. Object Code Incorporating Material from Library Header Files.
 64 | 
 65 |   The object code form of an Application may incorporate material from
 66 | a header file that is part of the Library.  You may convey such object
 67 | code under terms of your choice, provided that, if the incorporated
 68 | material is not limited to numerical parameters, data structure
 69 | layouts and accessors, or small macros, inline functions and templates
 70 | (ten or fewer lines in length), you do both of the following:
 71 | 
 72 |    a) Give prominent notice with each copy of the object code that the
 73 |    Library is used in it and that the Library and its use are
 74 |    covered by this License.
 75 | 
 76 |    b) Accompany the object code with a copy of the GNU GPL and this license
 77 |    document.
 78 | 
 79 |   4. Combined Works.
 80 | 
 81 |   You may convey a Combined Work under terms of your choice that,
 82 | taken together, effectively do not restrict modification of the
 83 | portions of the Library contained in the Combined Work and reverse
 84 | engineering for debugging such modifications, if you also do each of
 85 | the following:
 86 | 
 87 |    a) Give prominent notice with each copy of the Combined Work that
 88 |    the Library is used in it and that the Library and its use are
 89 |    covered by this License.
 90 | 
 91 |    b) Accompany the Combined Work with a copy of the GNU GPL and this license
 92 |    document.
 93 | 
 94 |    c) For a Combined Work that displays copyright notices during
 95 |    execution, include the copyright notice for the Library among
 96 |    these notices, as well as a reference directing the user to the
 97 |    copies of the GNU GPL and this license document.
 98 | 
 99 |    d) Do one of the following:
100 | 
101 |        0) Convey the Minimal Corresponding Source under the terms of this
102 |        License, and the Corresponding Application Code in a form
103 |        suitable for, and under terms that permit, the user to
104 |        recombine or relink the Application with a modified version of
105 |        the Linked Version to produce a modified Combined Work, in the
106 |        manner specified by section 6 of the GNU GPL for conveying
107 |        Corresponding Source.
108 | 
109 |        1) Use a suitable shared library mechanism for linking with the
110 |        Library.  A suitable mechanism is one that (a) uses at run time
111 |        a copy of the Library already present on the user's computer
112 |        system, and (b) will operate properly with a modified version
113 |        of the Library that is interface-compatible with the Linked
114 |        Version.
115 | 
116 |    e) Provide Installation Information, but only if you would otherwise
117 |    be required to provide such information under section 6 of the
118 |    GNU GPL, and only to the extent that such information is
119 |    necessary to install and execute a modified version of the
120 |    Combined Work produced by recombining or relinking the
121 |    Application with a modified version of the Linked Version. (If
122 |    you use option 4d0, the Installation Information must accompany
123 |    the Minimal Corresponding Source and Corresponding Application
124 |    Code. If you use option 4d1, you must provide the Installation
125 |    Information in the manner specified by section 6 of the GNU GPL
126 |    for conveying Corresponding Source.)
127 | 
128 |   5. Combined Libraries.
129 | 
130 |   You may place library facilities that are a work based on the
131 | Library side by side in a single library together with other library
132 | facilities that are not Applications and are not covered by this
133 | License, and convey such a combined library under terms of your
134 | choice, if you do both of the following:
135 | 
136 |    a) Accompany the combined library with a copy of the same work based
137 |    on the Library, uncombined with any other library facilities,
138 |    conveyed under the terms of this License.
139 | 
140 |    b) Give prominent notice with the combined library that part of it
141 |    is a work based on the Library, and explaining where to find the
142 |    accompanying uncombined form of the same work.
143 | 
144 |   6. Revised Versions of the GNU Lesser General Public License.
145 | 
146 |   The Free Software Foundation may publish revised and/or new versions
147 | of the GNU Lesser General Public License from time to time. Such new
148 | versions will be similar in spirit to the present version, but may
149 | differ in detail to address new problems or concerns.
150 | 
151 |   Each version is given a distinguishing version number. If the
152 | Library as you received it specifies that a certain numbered version
153 | of the GNU Lesser General Public License "or any later version"
154 | applies to it, you have the option of following the terms and
155 | conditions either of that published version or of any later version
156 | published by the Free Software Foundation. If the Library as you
157 | received it does not specify a version number of the GNU Lesser
158 | General Public License, you may choose any version of the GNU Lesser
159 | General Public License ever published by the Free Software Foundation.
160 | 
161 |   If the Library as you received it specifies that a proxy can decide
162 | whether future versions of the GNU Lesser General Public License shall
163 | apply, that proxy's public statement of acceptance of any version is
164 | permanent authorization for you to choose that version for the
165 | Library.
166 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | tcpdi_parser
2 | ============
3 | 
4 | Parser for use with TCPDI, based on TCPDF_PARSER.  Supports PDFs up to v1.7.
5 | 
6 | See [pauln/tcpdi](https://github.com/pauln/tcpdi) for installation and usage instructions.
7 | 


--------------------------------------------------------------------------------
/tcpdi_parser.php:
--------------------------------------------------------------------------------
   1 | <?php
   2 | //============================================================+
   3 | // File name   : tcpdi_parser.php
   4 | // Version     : 1.1
   5 | // Begin       : 2013-09-25
   6 | // Last Update : 2016-05-03
   7 | // Author      : Paul Nicholls - https://github.com/pauln
   8 | // License     : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
   9 | //
  10 | // Based on    : tcpdf_parser.php
  11 | // Version     : 1.0.003
  12 | // Begin       : 2011-05-23
  13 | // Last Update : 2013-03-17
  14 | // Author      : Nicola Asuni - Tecnick.com LTD - www.tecnick.com - info@tecnick.com
  15 | // License     : GNU-LGPL v3 (http://www.gnu.org/copyleft/lesser.html)
  16 | // -------------------------------------------------------------------
  17 | // Copyright (C) 2011-2013 Nicola Asuni - Tecnick.com LTD
  18 | //
  19 | // This file is for use with the TCPDF software library.
  20 | //
  21 | // tcpdi_parser is free software: you can redistribute it and/or modify it
  22 | // under the terms of the GNU Lesser General Public License as
  23 | // published by the Free Software Foundation, either version 3 of the
  24 | // License, or (at your option) any later version.
  25 | //
  26 | // tcpdi_parser is distributed in the hope that it will be useful, but
  27 | // WITHOUT ANY WARRANTY; without even the implied warranty of
  28 | // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
  29 | // See the GNU Lesser General Public License for more details.
  30 | //
  31 | // You should have received a copy of the License
  32 | // along with tcpdi_parser. If not, see
  33 | // <http://www.tecnick.com/pagefiles/tcpdf/LICENSE.TXT>.
  34 | //
  35 | // See LICENSE file for more information.
  36 | // -------------------------------------------------------------------
  37 | //
  38 | // Description : This is a PHP class for parsing PDF documents.
  39 | //
  40 | //============================================================+
  41 | 
  42 | /**
  43 |  * @file
  44 |  * This is a PHP class for parsing PDF documents.<br>
  45 |  * @author Paul Nicholls
  46 |  * @author Nicola Asuni
  47 |  * @version 1.1
  48 |  */
  49 | 
  50 | // include class for decoding filters
  51 | require_once(dirname(__FILE__).'/include/tcpdf_filters.php');
  52 | 
  53 | if (!defined ('PDF_TYPE_NULL'))
  54 |     define ('PDF_TYPE_NULL', 0);
  55 | if (!defined ('PDF_TYPE_NUMERIC'))
  56 |     define ('PDF_TYPE_NUMERIC', 1);
  57 | if (!defined ('PDF_TYPE_TOKEN'))
  58 |     define ('PDF_TYPE_TOKEN', 2);
  59 | if (!defined ('PDF_TYPE_HEX'))
  60 |     define ('PDF_TYPE_HEX', 3);
  61 | if (!defined ('PDF_TYPE_STRING'))
  62 |     define ('PDF_TYPE_STRING', 4);
  63 | if (!defined ('PDF_TYPE_DICTIONARY'))
  64 |     define ('PDF_TYPE_DICTIONARY', 5);
  65 | if (!defined ('PDF_TYPE_ARRAY'))
  66 |     define ('PDF_TYPE_ARRAY', 6);
  67 | if (!defined ('PDF_TYPE_OBJDEC'))
  68 |     define ('PDF_TYPE_OBJDEC', 7);
  69 | if (!defined ('PDF_TYPE_OBJREF'))
  70 |     define ('PDF_TYPE_OBJREF', 8);
  71 | if (!defined ('PDF_TYPE_OBJECT'))
  72 |     define ('PDF_TYPE_OBJECT', 9);
  73 | if (!defined ('PDF_TYPE_STREAM'))
  74 |     define ('PDF_TYPE_STREAM', 10);
  75 | if (!defined ('PDF_TYPE_BOOLEAN'))
  76 |     define ('PDF_TYPE_BOOLEAN', 11);
  77 | if (!defined ('PDF_TYPE_REAL'))
  78 |     define ('PDF_TYPE_REAL', 12);
  79 | 
  80 | /**
  81 |  * @class tcpdi_parser
  82 |  * This is a PHP class for parsing PDF documents.<br>
  83 |  * Based on TCPDF_PARSER, part of the TCPDF project by Nicola Asuni.
  84 |  * @brief This is a PHP class for parsing PDF documents..
  85 |  * @version 1.1
  86 |  * @author Paul Nicholls - github.com/pauln
  87 |  * @author Nicola Asuni - info@tecnick.com
  88 |  */
  89 | class tcpdi_parser {
  90 |     /**
  91 |      * Unique parser ID
  92 |      * @public
  93 |      */
  94 |     public $uniqueid = '';
  95 | 
  96 |     /**
  97 |      * Raw content of the PDF document.
  98 |      * @private
  99 |      */
 100 |     private $pdfdata = '';
 101 | 
 102 |     /**
 103 |      * XREF data.
 104 |      * @protected
 105 |      */
 106 |     protected $xref = array();
 107 | 
 108 |     /**
 109 |      * Object streams.
 110 |      * @protected
 111 |      */
 112 |     protected $objstreams = array();
 113 | 
 114 |     /**
 115 |      * Objects in objstreams.
 116 |      * @protected
 117 |      */
 118 |     protected $objstreamobjs = array();
 119 | 
 120 |     /**
 121 |      * List of seen XREF data locations.
 122 |      * @protected
 123 |      */
 124 |     protected $xref_seen_offsets = array();
 125 | 
 126 |     /**
 127 |      * Array of PDF objects.
 128 |      * @protected
 129 |      */
 130 |     protected $objects = array();
 131 | 
 132 |     /**
 133 |      * Array of object offsets.
 134 |      * @private
 135 |      */
 136 |     private $objoffsets = array();
 137 | 
 138 |     /**
 139 |      * Class object for decoding filters.
 140 |      * @private
 141 |      */
 142 |     private $FilterDecoders;
 143 | 
 144 |     /**
 145 |      * Pages
 146 |      *
 147 |      * @private array
 148 |      */
 149 |     private $pages;
 150 | 
 151 |     /**
 152 |      * Page count
 153 |      * @private integer
 154 |      */
 155 |     private $page_count;
 156 | 
 157 |     /**
 158 |      * actual page number
 159 |      * @private integer
 160 |      */
 161 |     private $pageno;
 162 | 
 163 |     /**
 164 |      * PDF version of the loaded document
 165 |      * @private string
 166 |      */
 167 |     private $pdfVersion;
 168 | 
 169 |     /**
 170 |      * Available BoxTypes
 171 |      *
 172 |      * @public array
 173 |      */
 174 |     public $availableBoxes = array('/MediaBox', '/CropBox', '/BleedBox', '/TrimBox', '/ArtBox');
 175 | 
 176 | // -----------------------------------------------------------------------------
 177 | 
 178 |     /**
 179 |      * Parse a PDF document an return an array of objects.
 180 |      * @param $data (string) PDF data to parse.
 181 |      * @public
 182 |      * @since 1.0.000 (2011-05-24)
 183 |      */
 184 |     public function __construct($data, $uniqueid) {
 185 |         if (empty($data)) {
 186 |             $this->Error('Empty PDF data.');
 187 |         }
 188 |         $this->uniqueid = $uniqueid;
 189 |         $this->pdfdata = $data;
 190 |         // get length
 191 |         $pdflen = strlen($this->pdfdata);
 192 |         // initialize class for decoding filters
 193 |         $this->FilterDecoders = new TCPDF_FILTERS();
 194 |         // get xref and trailer data
 195 |         $this->xref = $this->getXrefData();
 196 |         $this->findObjectOffsets();
 197 |         // parse all document objects
 198 |         $this->objects = array();
 199 |         /*foreach ($this->xref['xref'] as $obj => $offset) {
 200 |             if (!isset($this->objects[$obj]) AND ($offset > 0)) {
 201 |                 // decode only objects with positive offset
 202 |                 //$this->objects[$obj] = $this->getIndirectObject($obj, $offset, true);
 203 |             }
 204 |         }*/
 205 |         $this->getPDFVersion();
 206 |         $this->readPages();
 207 |     }
 208 | 
 209 |     /**
 210 |      * Clean up when done, to free memory etc
 211 |      */
 212 |     public function cleanUp() {
 213 |         unset($this->pdfdata);
 214 |         $this->pdfdata = '';
 215 |         unset($this->objstreams);
 216 |         $this->objstreams = array();
 217 |         unset($this->objects);
 218 |         $this->objects = array();
 219 |         unset($this->objstreamobjs);
 220 |         $this->objstreamobjs = array();
 221 |         unset($this->xref);
 222 |         $this->xref = array();
 223 |         unset($this->objoffsets);
 224 |         $this->objoffsets = array();
 225 |         unset($this->pages);
 226 |         $this->pages = array();
 227 |     }
 228 | 
 229 |     /**
 230 |      * Return an array of parsed PDF document objects.
 231 |      * @return (array) Array of parsed PDF document objects.
 232 |      * @public
 233 |      * @since 1.0.000 (2011-06-26)
 234 |      */
 235 |     public function getParsedData() {
 236 |         return array($this->xref, $this->objects, $this->pages);
 237 |     }
 238 | 
 239 |     /**
 240 |      * Get PDF-Version
 241 |      *
 242 |      * And reset the PDF Version used in FPDI if needed
 243 |      * @public
 244 |      */
 245 |     public function getPDFVersion() {
 246 |         preg_match('/\d\.\d/', substr($this->pdfdata, 0, 16), $m);
 247 |         if (isset($m[0]))
 248 |             $this->pdfVersion = $m[0];
 249 |         return $this->pdfVersion;
 250 |     }
 251 | 
 252 |     /**
 253 |      * Read all /Page(es)
 254 |      *
 255 |      */
 256 |     function readPages() {
 257 |         $params = $this->getObjectVal($this->xref['trailer'][1]['/Root']);
 258 |         $objref = null;
 259 |         foreach ($params[1][1] as $k=>$v) {
 260 |             if ($k == '/Pages') {
 261 |                 $objref = $v;
 262 |                 break;
 263 |             }
 264 |         }
 265 |         if ($objref == null || $objref[0] !== PDF_TYPE_OBJREF) {
 266 |             // Offset not found.
 267 |             return;
 268 |         }
 269 | 
 270 |         $dict = $this->getObjectVal($objref);
 271 |         if ($dict[0] == PDF_TYPE_OBJECT && $dict[1][0] == PDF_TYPE_DICTIONARY) {
 272 |             // Dict wrapped in an object
 273 |             $dict = $dict[1];
 274 |         }
 275 | 
 276 |         if ($dict[0] !== PDF_TYPE_DICTIONARY) {
 277 |             return;
 278 |         }
 279 | 
 280 |         $this->pages = array();
 281 |         if (isset($dict[1]['/Kids'])) {
 282 |             $v = $dict[1]['/Kids'];
 283 |             if ($v[0] == PDF_TYPE_ARRAY) {
 284 |                 foreach ($v[1] as $ref) {
 285 |                     $page = $this->getObjectVal($ref);
 286 |                     $this->readPage($page);
 287 |                 }
 288 |             }
 289 |         }
 290 | 
 291 |         $this->page_count = count($this->pages);
 292 |     }
 293 | 
 294 |     /**
 295 |      * Read a single /Page element, recursing through /Kids if necessary
 296 |      *
 297 |      */
 298 |     private function readPage($page) {
 299 |         if (isset($page[1][1]['/Kids'])) {
 300 |             // Nested pages!
 301 |             foreach ($page[1][1]['/Kids'][1] as $subref) {
 302 |                 $subpage = $this->getObjectVal($subref);
 303 |                 $this->readPage($subpage);
 304 |             }
 305 |         } else {
 306 |             $this->pages[] = $page;
 307 |         }
 308 |     }
 309 | 
 310 |     /**
 311 |      * Get pagecount from sourcefile
 312 |      *
 313 |      * @return int
 314 |      */
 315 |     function getPageCount() {
 316 |         return $this->page_count;
 317 |     }
 318 | 
 319 |     /**
 320 |      * Get Cross-Reference (xref) table and trailer data from PDF document data.
 321 |      * @param $offset (int) xref offset (if know).
 322 |      * @param $xref (array) previous xref array (if any).
 323 |      * @return Array containing xref and trailer data.
 324 |      * @protected
 325 |      * @since 1.0.000 (2011-05-24)
 326 |      */
 327 |     protected function getXrefData($offset=0, $xref=array()) {
 328 |         if ($offset == 0) {
 329 |             // find last startxref
 330 |             if (preg_match('/.*[\r\n]startxref[\s\r\n]+([0-9]+)[\s\r\n]+%%EOF/is', $this->pdfdata, $matches) == 0) {
 331 |                 $this->Error('Unable to find startxref');
 332 |             }
 333 |             $startxref = $matches[1];
 334 |         } else {
 335 |             if (preg_match('/([0-9]+[\s][0-9]+[\s]obj)/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 336 |                 // Cross-Reference Stream object
 337 |                 $startxref = $offset;
 338 |             } elseif (preg_match('/[\r\n]startxref[\s\r\n]+([0-9]+)[\s\r\n]+%%EOF/i', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset)) {
 339 |                 // startxref found
 340 |                 $startxref = $matches[1][0];
 341 |             } else {
 342 |                 $this->Error('Unable to find startxref');
 343 |             }
 344 |         }
 345 |         unset($matches);
 346 | 
 347 |         // DOMPDF gets the startxref wrong, giving us the linebreak before the xref starts.
 348 |         $startxref += strspn($this->pdfdata, "\r\n", $startxref);
 349 | 
 350 |         // check xref position
 351 |         if (strpos($this->pdfdata, 'xref', $startxref) == $startxref) {
 352 |             // Cross-Reference
 353 |             $xref = $this->decodeXref($startxref, $xref);
 354 |         } else {
 355 |             // Cross-Reference Stream
 356 |             $xref = $this->decodeXrefStream($startxref, $xref);
 357 |         }
 358 |         if (empty($xref)) {
 359 |             $this->Error('Unable to find xref');
 360 |         }
 361 | 
 362 |         return $xref;
 363 |     }
 364 | 
 365 |     /**
 366 |      * Decode the Cross-Reference section
 367 |      * @param $startxref (int) Offset at which the xref section starts.
 368 |      * @param $xref (array) Previous xref array (if any).
 369 |      * @return Array containing xref and trailer data.
 370 |      * @protected
 371 |      * @since 1.0.000 (2011-06-20)
 372 |      */
 373 |     protected function decodeXref($startxref, $xref=array()) {
 374 |         $this->xref_seen_offsets[] = $startxref;
 375 |         if (!isset($xref['xref_location'])) {
 376 |             $xref['xref_location'] = $startxref;
 377 |             $xref['max_object'] = 0;
 378 |         }
 379 |         // extract xref data (object indexes and offsets)
 380 |         $xoffset = $startxref + 5;
 381 |         // initialize object number
 382 |         $obj_num = 0;
 383 |         $offset = $xoffset;
 384 |         while (preg_match('/^([0-9]+)[\s]([0-9]+)[\s]?([nf]?)/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $offset) > 0) {
 385 |             $offset = (strlen($matches[0][0]) + $matches[0][1]);
 386 |             if ($matches[3][0] == 'n') {
 387 |                 // create unique object index: [object number]_[generation number]
 388 |                 $gen_num = intval($matches[2][0]);
 389 |                 $index = $obj_num.'_'.$gen_num;
 390 |                 // check if object already exist
 391 |                 if (!isset($xref['xref'][$obj_num][$gen_num])) {
 392 |                     // store object offset position
 393 |                     $xref['xref'][$obj_num][$gen_num] = intval($matches[1][0]);
 394 |                 }
 395 |                 ++$obj_num;
 396 |                 $offset += 2;
 397 |             } elseif ($matches[3][0] == 'f') {
 398 |                 ++$obj_num;
 399 |                 $offset += 2;
 400 |             } else {
 401 |                 // object number (index)
 402 |                 $obj_num = intval($matches[1][0]);
 403 |             }
 404 |         }
 405 |         unset($matches);
 406 |         $xref['max_object'] = max($xref['max_object'], $obj_num);
 407 |         // get trailer data
 408 |         if (preg_match('/trailer[\s]*<<(.*)>>[\s\r\n]+(?:[%].*[\r\n]+)*startxref[\s\r\n]+/isU', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE, $xoffset) > 0) {
 409 |             $trailer_data = $matches[1][0];
 410 |             if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
 411 |                 // get only the last updated version
 412 |                 $xref['trailer'] = array();
 413 |                 $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
 414 |                 $xref['trailer'][1] = array();
 415 |                 // parse trailer_data
 416 |                 if (preg_match('/Size[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
 417 |                     $xref['trailer'][1]['/Size'] = array(PDF_TYPE_NUMERIC, intval($matches[1]));
 418 |                 }
 419 |                 if (preg_match('/Root[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 420 |                     $xref['trailer'][1]['/Root'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
 421 |                 }
 422 |                 if (preg_match('/Encrypt[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 423 |                     $xref['trailer'][1]['/Encrypt'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
 424 |                 }
 425 |                 if (preg_match('/Info[\s]+([0-9]+)[\s]+([0-9]+)[\s]+R/i', $trailer_data, $matches) > 0) {
 426 |                     $xref['trailer'][1]['/Info'] = array(PDF_TYPE_OBJREF, intval($matches[1]), intval($matches[2]));
 427 |                 }
 428 |                 if (preg_match('/ID[\s]*[\[][\s]*[<]([^>]*)[>][\s]*[<]([^>]*)[>]/i', $trailer_data, $matches) > 0) {
 429 |                     $xref['trailer'][1]['/ID'] = array(PDF_TYPE_ARRAY, array());
 430 |                     $xref['trailer'][1]['/ID'][1][0] = array(PDF_TYPE_HEX, $matches[1]);
 431 |                     $xref['trailer'][1]['/ID'][1][1] = array(PDF_TYPE_HEX, $matches[2]);
 432 |                 }
 433 |             }
 434 |             if (preg_match('/Prev[\s]+([0-9]+)/i', $trailer_data, $matches) > 0) {
 435 |                 // get previous xref
 436 |                 $prevoffset = intval($matches[1]);
 437 |                 if (!in_array($prevoffset, $this->xref_seen_offsets)) {
 438 |                     $this->xref_seen_offsets[] = $prevoffset;
 439 |                     $xref = $this->getXrefData($prevoffset, $xref);
 440 |                 }
 441 |             }
 442 |             unset($matches);
 443 |         } else {
 444 |             $this->Error('Unable to find trailer');
 445 |         }
 446 |         return $xref;
 447 |     }
 448 | 
 449 |     /**
 450 |      * Decode the Cross-Reference Stream section
 451 |      * @param $startxref (int) Offset at which the xref section starts.
 452 |      * @param $xref (array) Previous xref array (if any).
 453 |      * @return Array containing xref and trailer data.
 454 |      * @protected
 455 |      * @since 1.0.003 (2013-03-16)
 456 |      */
 457 |     protected function decodeXrefStream($startxref, $xref=array()) {
 458 |         // try to read Cross-Reference Stream
 459 |         list($xrefobj, $unused) = $this->getRawObject($startxref);
 460 |         $xrefcrs = $this->getIndirectObject($xrefobj[1], $startxref, true);
 461 |         if (!isset($xref['xref_location'])) {
 462 |             $xref['xref_location'] = $startxref;
 463 |             $xref['max_object'] = 0;
 464 |         }
 465 |         if (!isset($xref['xref'])) {
 466 |             $xref['xref'] = array();
 467 |         }
 468 |         if (!isset($xref['trailer']) OR empty($xref['trailer'])) {
 469 |             // get only the last updated version
 470 |             $xref['trailer'] = array();
 471 |             $xref['trailer'][0] = PDF_TYPE_DICTIONARY;
 472 |             $xref['trailer'][1] = array();
 473 |             $filltrailer = true;
 474 |         } else {
 475 |             $filltrailer = false;
 476 |         }
 477 |         $valid_crs = false;
 478 |         $sarr = $xrefcrs[0][1];
 479 |         $keys = array_keys($sarr);
 480 |         $columns = 1; // Default as per PDF 32000-1:2008.
 481 |         $predictor = 1; // Default as per PDF 32000-1:2008.
 482 |         foreach ($keys as $k=>$key) {
 483 |             $v = $sarr[$key];
 484 |             if (($key == '/Type') AND ($v[0] == PDF_TYPE_TOKEN AND ($v[1] == 'XRef'))) {
 485 |                 $valid_crs = true;
 486 |             } elseif (($key == '/Index') AND ($v[0] == PDF_TYPE_ARRAY AND count($v[1] >= 2))) {
 487 |                 // first object number in the subsection
 488 |                 $index_first = intval($v[1][0][1]);
 489 |                 // number of entries in the subsection
 490 |                 $index_entries = intval($v[1][1][1]);
 491 |             } elseif (($key == '/Prev') AND ($v[0] == PDF_TYPE_NUMERIC)) {
 492 |                 // get previous xref offset
 493 |                 $prevxref = intval($v[1]);
 494 |             } elseif (($key == '/W') AND ($v[0] == PDF_TYPE_ARRAY)) {
 495 |                 // number of bytes (in the decoded stream) of the corresponding field
 496 |                 $wb = array();
 497 |                 $wb[0] = intval($v[1][0][1]);
 498 |                 $wb[1] = intval($v[1][1][1]);
 499 |                 $wb[2] = intval($v[1][2][1]);
 500 |             } elseif (($key == '/DecodeParms') AND ($v[0] == PDF_TYPE_DICTIONARY)) {
 501 |                 $decpar = $v[1];
 502 |                 foreach ($decpar as $kdc => $vdc) {
 503 |                     if (($kdc == '/Columns') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
 504 |                         $columns = intval($vdc[1]);
 505 |                     } elseif (($kdc == '/Predictor') AND ($vdc[0] == PDF_TYPE_NUMERIC)) {
 506 |                         $predictor = intval($vdc[1]);
 507 |                     }
 508 |                 }
 509 |             } elseif ($filltrailer) {
 510 |                 switch($key) {
 511 |                     case '/Size':
 512 |                     case '/Root':
 513 |                     case '/Info':
 514 |                     case '/ID':
 515 |                         $xref['trailer'][1][$key] = $v;
 516 |                         break;
 517 |                     default:
 518 |                         break;
 519 |                 }
 520 |             }
 521 |         }
 522 |         // decode data
 523 |         $obj_num = 0;
 524 |         if ($valid_crs AND isset($xrefcrs[1][3][0])) {
 525 |             // number of bytes in a row
 526 |             $rowlen = ($columns + 1);
 527 |             // convert the stream into an array of integers
 528 |             $sdata = unpack('C*', $xrefcrs[1][3][0]);
 529 |             // split the rows
 530 |             $sdata = array_chunk($sdata, $rowlen);
 531 |             // initialize decoded array
 532 |             $ddata = array();
 533 |             // initialize first row with zeros
 534 |             $prev_row = array_fill (0, $rowlen, 0);
 535 |             // for each row apply PNG unpredictor
 536 |             foreach ($sdata as $k => $row) {
 537 |                 // initialize new row
 538 |                 $ddata[$k] = array();
 539 |                 // get PNG predictor value
 540 |                 if (empty($predictor)) {
 541 |                     $predictor = (10 + $row[0]);
 542 |                 }
 543 |                 // for each byte on the row
 544 |                 for ($i=1; $i<=$columns; ++$i) {
 545 |                     if (!isset($row[$i])) {
 546 |                         // No more data in this row - we're done here.
 547 |                         break;
 548 |                     }
 549 |                     // new index
 550 |                     $j = ($i - 1);
 551 |                     $row_up = $prev_row[$j];
 552 |                     if ($i == 1) {
 553 |                         $row_left = 0;
 554 |                         $row_upleft = 0;
 555 |                     } else {
 556 |                         $row_left = $row[($i - 1)];
 557 |                         $row_upleft = $prev_row[($j - 1)];
 558 |                     }
 559 |                     switch ($predictor) {
 560 |                         case 1: // No prediction (equivalent to PNG None)
 561 |                         case 10: { // PNG prediction (on encoding, PNG None on all rows)
 562 |                             $ddata[$k][$j] = $row[$i];
 563 |                             break;
 564 |                         }
 565 |                         case 11: { // PNG prediction (on encoding, PNG Sub on all rows)
 566 |                             $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
 567 |                             break;
 568 |                         }
 569 |                         case 12: { // PNG prediction (on encoding, PNG Up on all rows)
 570 |                             $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
 571 |                             break;
 572 |                         }
 573 |                         case 13: { // PNG prediction (on encoding, PNG Average on all rows)
 574 |                             $ddata[$k][$j] = (($row[$i] + (($row_left + $row_up) / 2)) & 0xff);
 575 |                             break;
 576 |                         }
 577 |                         case 14: { // PNG prediction (on encoding, PNG Paeth on all rows)
 578 |                             // initial estimate
 579 |                             $p = ($row_left + $row_up - $row_upleft);
 580 |                             // distances
 581 |                             $pa = abs($p - $row_left);
 582 |                             $pb = abs($p - $row_up);
 583 |                             $pc = abs($p - $row_upleft);
 584 |                             $pmin = min($pa, $pb, $pc);
 585 |                             // return minumum distance
 586 |                             switch ($pmin) {
 587 |                                 case $pa: {
 588 |                                     $ddata[$k][$j] = (($row[$i] + $row_left) & 0xff);
 589 |                                     break;
 590 |                                 }
 591 |                                 case $pb: {
 592 |                                     $ddata[$k][$j] = (($row[$i] + $row_up) & 0xff);
 593 |                                     break;
 594 |                                 }
 595 |                                 case $pc: {
 596 |                                     $ddata[$k][$j] = (($row[$i] + $row_upleft) & 0xff);
 597 |                                     break;
 598 |                                 }
 599 |                             }
 600 |                             break;
 601 |                         }
 602 |                         default: { // PNG prediction (on encoding, PNG optimum)
 603 |                             $this->Error("Unknown PNG predictor $predictor");
 604 |                             break;
 605 |                         }
 606 |                     }
 607 |                 }
 608 |                 $prev_row = $ddata[$k];
 609 |             } // end for each row
 610 |             // complete decoding
 611 |             unset($sdata);
 612 |             $sdata = array();
 613 |             // for every row
 614 |             foreach ($ddata as $k => $row) {
 615 |                 // initialize new row
 616 |                 $sdata[$k] = array(0, 0, 0);
 617 |                 if ($wb[0] == 0) {
 618 |                     // default type field
 619 |                     $sdata[$k][0] = 1;
 620 |                 }
 621 |                 $i = 0; // count bytes on the row
 622 |                 // for every column
 623 |                 for ($c = 0; $c < 3; ++$c) {
 624 |                     // for every byte on the column
 625 |                     for ($b = 0; $b < $wb[$c]; ++$b) {
 626 |                         if (isset($row[$i])) {
 627 |                             $sdata[$k][$c] += ($row[$i] << (($wb[$c] - 1 - $b) * 8));
 628 |                         }
 629 |                         ++$i;
 630 |                     }
 631 |                 }
 632 |             }
 633 |             unset($ddata);
 634 |             // fill xref
 635 |             if (isset($index_first)) {
 636 |                 $obj_num = $index_first;
 637 |             } else {
 638 |                 $obj_num = 0;
 639 |             }
 640 |             foreach ($sdata as $k => $row) {
 641 |                 switch ($row[0]) {
 642 |                     case 0: { // (f) linked list of free objects
 643 |                         ++$obj_num;
 644 |                         break;
 645 |                     }
 646 |                     case 1: { // (n) objects that are in use but are not compressed
 647 |                         // create unique object index: [object number]_[generation number]
 648 |                         $index = $obj_num.'_'.$row[2];
 649 |                         // check if object already exist
 650 |                         if (!isset($xref['xref'][$obj_num][$row[2]])) {
 651 |                             // store object offset position
 652 |                             $xref['xref'][$obj_num][$row[2]] = $row[1];
 653 |                         }
 654 |                         ++$obj_num;
 655 |                         break;
 656 |                     }
 657 |                     case 2: { // compressed objects
 658 |                         // $row[1] = object number of the object stream in which this object is stored
 659 |                         // $row[2] = index of this object within the object stream
 660 |                         /*$index = $row[1].'_0_'.$row[2];
 661 |                         $xref['xref'][$row[1]][0][$row[2]] = -1;*/
 662 |                         break;
 663 |                     }
 664 |                     default: { // null objects
 665 |                         break;
 666 |                     }
 667 |                 }
 668 |             }
 669 |         } // end decoding data
 670 |         $xref['max_object'] = max($xref['max_object'], $obj_num);
 671 |         if (isset($prevxref)) {
 672 |             // get previous xref
 673 |             $xref = $this->getXrefData($prevxref, $xref);
 674 |         }
 675 |         return $xref;
 676 |     }
 677 | 
 678 |     /**
 679 |      * Get raw stream data
 680 |      * @param $offset (int) Stream offset.
 681 |      * @param $length (int) Stream length.
 682 |      * @return string Steam content
 683 |      * @protected
 684 |      */
 685 |     protected function getRawStream($offset, $length) {
 686 |         $offset += strspn($this->pdfdata, "\x00\x09\x0a\x0c\x0d\x20", $offset);
 687 |         $offset += 6; // "stream"
 688 |         $offset += strspn($this->pdfdata, "\x20", $offset);
 689 |         $offset += strspn($this->pdfdata, "\r\n", $offset);
 690 | 
 691 |         $obj = array();
 692 |         $obj[] = PDF_TYPE_STREAM;
 693 |         $obj[] = substr($this->pdfdata, $offset, $length);
 694 | 
 695 |         return array($obj, $offset+$length);
 696 |     }
 697 | 
 698 |     /**
 699 |      * Get object type, raw value and offset to next object
 700 |      * @param $offset (int) Object offset.
 701 |      * @return array containing object type, raw value and offset to next object
 702 |      * @protected
 703 |      * @since 1.0.000 (2011-06-20)
 704 |      */
 705 |     protected function getRawObject($offset=0, $data=null) {
 706 |         if ($data == null) {
 707 |             $data =& $this->pdfdata;
 708 |         }
 709 |         $objtype = ''; // object type to be returned
 710 |         $objval = ''; // object value to be returned
 711 |         // skip initial white space chars: \x00 null (NUL), \x09 horizontal tab (HT), \x0A line feed (LF), \x0C form feed (FF), \x0D carriage return (CR), \x20 space (SP)
 712 |         while (strspn($data{$offset}, "\x00\x09\x0a\x0c\x0d\x20") == 1) {
 713 |             $offset++;
 714 |         }
 715 |         // get first char
 716 |         $char = $data{$offset};
 717 |         // get object type
 718 |         switch ($char) {
 719 |             case '%': { // \x25 PERCENT SIGN
 720 |                 // skip comment and search for next token
 721 |                 $next = strcspn($data, "\r\n", $offset);
 722 |                 if ($next > 0) {
 723 |                     $offset += $next;
 724 |                     list($obj, $unused) = $this->getRawObject($offset, $data);
 725 |                     return $obj;
 726 |                 }
 727 |                 break;
 728 |             }
 729 |             case '/': { // \x2F SOLIDUS
 730 |                 // name object
 731 |                 $objtype = PDF_TYPE_TOKEN;
 732 |                 ++$offset;
 733 |                 $length = strcspn($data, "\x00\x09\x0a\x0c\x0d\x20\x28\x29\x3c\x3e\x5b\x5d\x7b\x7d\x2f\x25", $offset);
 734 |                 $objval = substr($data, $offset, $length);
 735 |                 $offset += $length;
 736 |                 break;
 737 |             }
 738 |             case '(':   // \x28 LEFT PARENTHESIS
 739 |             case ')': { // \x29 RIGHT PARENTHESIS
 740 |                 // literal string object
 741 |                 $objtype = PDF_TYPE_STRING;
 742 |                 ++$offset;
 743 |                 $strpos = $offset;
 744 |                 if ($char == '(') {
 745 |                     $open_bracket = 1;
 746 |                     while ($open_bracket > 0) {
 747 |                         if (!isset($data{$strpos})) {
 748 |                             break;
 749 |                         }
 750 |                         $ch = $data{$strpos};
 751 |                         switch ($ch) {
 752 |                             case '\\': { // REVERSE SOLIDUS (5Ch) (Backslash)
 753 |                                 // skip next character
 754 |                                 ++$strpos;
 755 |                                 break;
 756 |                             }
 757 |                             case '(': { // LEFT PARENHESIS (28h)
 758 |                                 ++$open_bracket;
 759 |                                 break;
 760 |                             }
 761 |                             case ')': { // RIGHT PARENTHESIS (29h)
 762 |                                 --$open_bracket;
 763 |                                 break;
 764 |                             }
 765 |                         }
 766 |                         ++$strpos;
 767 |                     }
 768 |                     $objval = substr($data, $offset, ($strpos - $offset - 1));
 769 |                     $offset = $strpos;
 770 |                 }
 771 |                 break;
 772 |             }
 773 |             case '[':   // \x5B LEFT SQUARE BRACKET
 774 |             case ']': { // \x5D RIGHT SQUARE BRACKET
 775 |                 // array object
 776 |                 $objtype = PDF_TYPE_ARRAY;
 777 |                 ++$offset;
 778 |                 if ($char == '[') {
 779 |                     // get array content
 780 |                     $objval = array();
 781 |                     do {
 782 |                         // get element
 783 |                         list($element, $offset) = $this->getRawObject($offset, $data);
 784 |                         $objval[] = $element;
 785 |                     } while ($element[0] !== ']');
 786 |                     // remove closing delimiter
 787 |                     array_pop($objval);
 788 |                 } else {
 789 |                     $objtype = ']';
 790 |                 }
 791 |                 break;
 792 |             }
 793 |             case '<':   // \x3C LESS-THAN SIGN
 794 |             case '>': { // \x3E GREATER-THAN SIGN
 795 |                 if (isset($data{($offset + 1)}) AND ($data{($offset + 1)} == $char)) {
 796 |                     // dictionary object
 797 |                     $objtype = PDF_TYPE_DICTIONARY;
 798 |                     if ($char == '<') {
 799 |                         list ($objval, $offset) = $this->getDictValue($offset, $data);
 800 |                     } else {
 801 |                         $objtype = '>>';
 802 |                         $offset += 2;
 803 |                     }
 804 |                 } else {
 805 |                     // hexadecimal string object
 806 |                     $objtype = PDF_TYPE_HEX;
 807 |                     ++$offset;
 808 |                     // The "Panose" entry in the FontDescriptor Style dict seems to have hex bytes separated by spaces.
 809 |                     if (($char == '<') AND (preg_match('/^([0-9A-Fa-f ]+)[>]/iU', substr($data, $offset), $matches) == 1)) {
 810 |                         $objval = $matches[1];
 811 |                         $offset += strlen($matches[0]);
 812 |                         unset($matches);
 813 |                     }
 814 |                 }
 815 |                 break;
 816 |             }
 817 |             default: {
 818 |                 $frag = $data{$offset} . @$data{$offset+1} . @$data{$offset+2} . @$data{$offset+3};
 819 |                 switch ($frag) {
 820 |                     case 'endo':
 821 |                         // indirect object
 822 |                         $objtype = 'endobj';
 823 |                         $offset += 6;
 824 |                         break;
 825 |                     case 'stre':
 826 |                         // Streams should always be indirect objects, and thus processed by getRawStream().
 827 |                         // If we get here, treat it as a null object as something has gone wrong.
 828 |                     case 'null':
 829 |                         // null object
 830 |                         $objtype = PDF_TYPE_NULL;
 831 |                         $offset += 4;
 832 |                         $objval = 'null';
 833 |                         break;
 834 |                     case 'true':
 835 |                         // boolean true object
 836 |                         $objtype = PDF_TYPE_BOOLEAN;
 837 |                         $offset += 4;
 838 |                         $objval = true;
 839 |                         break;
 840 |                     case 'fals':
 841 |                         // boolean false object
 842 |                         $objtype = PDF_TYPE_BOOLEAN;
 843 |                         $offset += 5;
 844 |                         $objval = false;
 845 |                         break;
 846 |                     case 'ends':
 847 |                         // end stream object
 848 |                         $objtype = 'endstream';
 849 |                         $offset += 9;
 850 |                         break;
 851 |                     default:
 852 |                         if (preg_match('/^([0-9]+)[\s]+([0-9]+)[\s]+([Robj]{1,3})/i', substr($data, $offset, 33), $matches) == 1) {
 853 |                             if ($matches[3] == 'R') {
 854 |                                 // indirect object reference
 855 |                                 $objtype = PDF_TYPE_OBJREF;
 856 |                                 $offset += strlen($matches[0]);
 857 |                                 $objval = array(intval($matches[1]), intval($matches[2]));
 858 |                             } elseif ($matches[3] == 'obj') {
 859 |                                 // object start
 860 |                                 $objtype = PDF_TYPE_OBJECT;
 861 |                                 $objval = intval($matches[1]).'_'.intval($matches[2]);
 862 |                                 $offset += strlen ($matches[0]);
 863 |                             }
 864 |                         } elseif (($numlen = strspn($data, '+-.0123456789', $offset)) > 0) {
 865 |                             // numeric object
 866 |                             $objval = substr($data, $offset, $numlen);
 867 |                             $objtype = (intval($objval) != $objval) ? PDF_TYPE_REAL : PDF_TYPE_NUMERIC;
 868 |                             $offset += $numlen;
 869 |                         }
 870 |                         unset($matches);
 871 |                         break;
 872 |                 }
 873 |                 break;
 874 |             }
 875 |         }
 876 |         $obj = array();
 877 |         $obj[] = $objtype;
 878 |         if ($objtype == PDF_TYPE_OBJREF && is_array($objval)) {
 879 |             foreach ($objval as $val) {
 880 |                 $obj[] = $val;
 881 |             }
 882 |         } else {
 883 |             $obj[] = $objval;
 884 |         }
 885 |         return array($obj, $offset);
 886 |     }
 887 |     private function getDictValue($offset, &$data) {
 888 |         $objval = array();
 889 | 
 890 |         // Extract dict from data.
 891 |         $i=1;
 892 |         $dict = '';
 893 |         $offset += 2;
 894 |         do {
 895 |             if ($data{$offset} == '>' && $data{$offset+1} == '>') {
 896 |                 $i--;
 897 |                 $dict .= '>>';
 898 |                 $offset += 2;
 899 |             } else if ($data{$offset} == '<' && $data{$offset+1} == '<') {
 900 |                 $i++;
 901 |                 $dict .= '<<';
 902 |                 $offset += 2;
 903 |             } else {
 904 |                 $dict .= $data{$offset};
 905 |                 $offset++;
 906 |             }
 907 |         } while ($i>0);
 908 | 
 909 |         // Now that we have just the dict, parse it.
 910 |         $dictoffset = 0;
 911 |         do {
 912 |             // Get dict element.
 913 |             list($key, $eloffset) = $this->getRawObject($dictoffset, $dict);
 914 |             if ($key[0] == '>>') {
 915 |                 break;
 916 |             }
 917 |             list($element, $dictoffset) = $this->getRawObject($eloffset, $dict);
 918 |             $objval['/'.$key[1]] = $element;
 919 |             unset($key);
 920 |             unset($element);
 921 |         } while (true);
 922 | 
 923 |         return array($objval, $offset);
 924 |     }
 925 | 
 926 |     /**
 927 |      * Get content of indirect object.
 928 |      * @param $obj_ref (string) Object number and generation number separated by underscore character.
 929 |      * @param $offset (int) Object offset.
 930 |      * @param $decoding (boolean) If true decode streams.
 931 |      * @return array containing object data.
 932 |      * @protected
 933 |      * @since 1.0.000 (2011-05-24)
 934 |      */
 935 |     protected function getIndirectObject($obj_ref, $offset=0, $decoding=true) {
 936 |         $obj = explode('_', $obj_ref);
 937 |         if (($obj === false) OR (count($obj) != 2)) {
 938 |             $this->Error('Invalid object reference: '.$obj);
 939 |             return;
 940 |         }
 941 |         $objref = $obj[0].' '.$obj[1].' obj';
 942 | 
 943 |         if (strpos($this->pdfdata, $objref, $offset) != $offset) {
 944 |             // an indirect reference to an undefined object shall be considered a reference to the null object
 945 |             return array('null', 'null', $offset);
 946 |         }
 947 |         // starting position of object content
 948 |         $offset += strlen($objref);
 949 |         // get array of object content
 950 |         $objdata = array();
 951 |         $i = 0; // object main index
 952 |         do {
 953 |             if (($i > 0) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY) AND array_key_exists('/Length', $objdata[($i - 1)][1])) {
 954 |                 // Stream - get using /Length in stream's dict
 955 |                 $lengthobj = $objdata[($i-1)][1]['/Length'];
 956 |                 if ($lengthobj[0] === PDF_TYPE_OBJREF) {
 957 |                     $lengthobj = $this->getObjectVal($lengthobj);
 958 |                     if ($lengthobj[0] === PDF_TYPE_OBJECT) {
 959 |                         $lengthobj = $lengthobj[1];
 960 |                     }
 961 |                 }
 962 |                 $streamlength = $lengthobj[1];
 963 |                 list($element, $offset) = $this->getRawStream($offset, $streamlength);
 964 |             } else {
 965 |                 // get element
 966 |                 list($element, $offset) = $this->getRawObject($offset);
 967 |             }
 968 |             // decode stream using stream's dictionary information
 969 |             if ($decoding AND ($element[0] == PDF_TYPE_STREAM) AND (isset($objdata[($i - 1)][0])) AND ($objdata[($i - 1)][0] == PDF_TYPE_DICTIONARY)) {
 970 |                 $element[3] = $this->decodeStream($objdata[($i - 1)][1], $element[1]);
 971 |             }
 972 |             $objdata[$i] = $element;
 973 |             ++$i;
 974 |         } while ($element[0] != 'endobj');
 975 |         // remove closing delimiter
 976 |         array_pop($objdata);
 977 |         // return raw object content
 978 |         return $objdata;
 979 |     }
 980 | 
 981 |     /**
 982 |      * Get the content of object, resolving indect object reference if necessary.
 983 |      * @param $obj (string) Object value.
 984 |      * @return array containing object data.
 985 |      * @public
 986 |      * @since 1.0.000 (2011-06-26)
 987 |      */
 988 |     public function getObjectVal($obj) {
 989 |         if ($obj[0] == PDF_TYPE_OBJREF) {
 990 |             if (strpos($obj[1], '_') !== false) {
 991 |                 $key = explode('_', $obj[1]);
 992 |             } else {
 993 |                 $key = array($obj[1], $obj[2]);
 994 |             }
 995 | 
 996 |             $ret = array(0=>PDF_TYPE_OBJECT, 'obj'=>$key[0], 'gen'=>$key[1]);
 997 | 
 998 |             // reference to indirect object
 999 |             $object = null;
1000 |             if (isset($this->objects[$key[0]][$key[1]])) {
1001 |                 // this object has been already parsed
1002 |                 $object = $this->objects[$key[0]][$key[1]];
1003 |             } elseif (($offset = $this->findObjectOffset($key)) !== false) {
1004 |                 // parse new object
1005 |                 $this->objects[$key[0]][$key[1]] = $this->getIndirectObject($key[0].'_'.$key[1], $offset, false);
1006 |                 $object = $this->objects[$key[0]][$key[1]];
1007 |             } elseif (($key[1] == 0) && isset($this->objstreamobjs[$key[0]])) {
1008 |                 // Object is in an object stream
1009 |                 $streaminfo = $this->objstreamobjs[$key[0]];
1010 |                 $objs = $streaminfo[0];
1011 |                 if (!isset($this->objstreams[$objs[0]][$objs[1]])) {
1012 |                     // Fetch and decode object stream
1013 |                     $offset = $this->findObjectOffset($objs);;
1014 |                     $objstream = $this->getObjectVal(array(PDF_TYPE_OBJREF, $objs[0], $objs[1]));
1015 |                     $decoded = $this->decodeStream($objstream[1][1], $objstream[2][1]);
1016 |                     $this->objstreams[$objs[0]][$objs[1]] = $decoded[0]; // Store just the data, in case we need more from this objstream
1017 |                     // Free memory
1018 |                     unset($objstream);
1019 |                     unset($decoded);
1020 |                 }
1021 |                 $this->objects[$key[0]][$key[1]] = $this->getRawObject($streaminfo[1], $this->objstreams[$objs[0]][$objs[1]]);
1022 |                 $object = $this->objects[$key[0]][$key[1]];
1023 |             }
1024 |             if (!is_null($object)) {
1025 |                 $ret[1] = $object[0];
1026 |                 if (isset($object[1][0]) && $object[1][0] == PDF_TYPE_STREAM) {
1027 |                     $ret[0] = PDF_TYPE_STREAM;
1028 |                     $ret[2] = $object[1];
1029 |                 }
1030 |                 return $ret;
1031 |             }
1032 |         }
1033 |         return $obj;
1034 |     }
1035 | 
1036 |     /**
1037 |      * Extract object stream to find out what it contains.
1038 |      *
1039 |      */
1040 |     function extractObjectStream($key) {
1041 |         $objref = array(PDF_TYPE_OBJREF, $key[0], $key[1]);
1042 |         $obj = $this->getObjectVal($objref);
1043 |         if ($obj[0] !== PDF_TYPE_STREAM || !isset($obj[1][1]['/First'][1])) {
1044 |             // Not a valid object stream dictionary - skip it.
1045 |             return;
1046 |         }
1047 |         $stream = $this->decodeStream($obj[1][1], $obj[2][1]);// Decode object stream, as we need the first bit
1048 |         $first = intval($obj[1][1]['/First'][1]);
1049 |         $ints = preg_split('/\s/', substr($stream[0], 0, $first)); // Get list of object / offset pairs
1050 |         for ($j=1; $j<count($ints); $j++) {
1051 |             if (($j % 2) == 1) {
1052 |                 $this->objstreamobjs[$ints[$j-1]] = array($key, $ints[$j]+$first);
1053 |             }
1054 |         }
1055 | 
1056 |         // Free memory - we may not need this at all.
1057 |         unset($obj);
1058 |         unset($stream);
1059 |     }
1060 | 
1061 |     /**
1062 |      * Find all object offsets.  Saves having to scour the file multiple times.
1063 |      * @private
1064 |      */
1065 |     private function findObjectOffsets() {
1066 |         $this->objoffsets = array();
1067 |         if (preg_match_all('/(*ANYCRLF)^[\s]*([0-9]+)[\s]+([0-9]+)[\s]+obj/im', $this->pdfdata, $matches, PREG_OFFSET_CAPTURE) >= 1) {
1068 |             $i = 0;
1069 |             $laststreamend = 0;
1070 |             foreach($matches[0] as $match) {
1071 |                 $offset = $match[1] + strspn($match[0], "\x00\x09\x0a\x0c\x0d\x20");
1072 |                 if ($offset < $laststreamend) {
1073 |                     // Contained within another stream, skip it.
1074 |                     continue;
1075 |                 }
1076 |                 $this->objoffsets[trim($match[0])] = $offset;
1077 |                 $dictoffset = $match[1] + strlen($match[0]);
1078 |                 $dictfrag = substr($this->pdfdata, $dictoffset, 256);
1079 |                 if (preg_match('|^\s+<<[^>]+/Length\s+(\d+)|', $dictfrag, $lengthmatch, PREG_OFFSET_CAPTURE) == 1) {
1080 |                     $laststreamend += intval($lengthmatch[1][0]);
1081 |                 }
1082 |                 if (preg_match('|^\s+<<[^>]+/ObjStm|', $dictfrag, $objstm) == 1) {
1083 |                     $this->extractObjectStream(array($matches[1][$i][0], $matches[2][$i][0]));
1084 |                 }
1085 |                 $i++;
1086 |             }
1087 |         }
1088 |         unset($lengthmatch);
1089 |         unset($dictfrag);
1090 |         unset($matches);
1091 |     }
1092 | 
1093 |     /**
1094 |      * Get offset of an object.  Checks xref first, then offsets found by scouring the file.
1095 |      * @param $key (array) Object key to find (obj, gen).
1096 |      * @return int Offset of the object in $this->pdfdata.
1097 |      * @private
1098 |      */
1099 |     private function findObjectOffset($key) {
1100 |         $objref = $key[0].' '.$key[1].' obj';
1101 |         if (isset($this->xref['xref'][$key[0]][$key[1]])) {
1102 |             $offset = $this->xref['xref'][$key[0]][$key[1]];
1103 |             if (strpos($this->pdfdata, $objref, $offset) === $offset) {
1104 |                 // Offset is in xref table and matches actual position in file
1105 |                 //echo "Offset in XREF is correct, returning<br>";
1106 |                 return $this->xref['xref'][$key[0]][$key[1]];
1107 |             }
1108 |         }
1109 |         if (array_key_exists($objref, $this->objoffsets)) {
1110 |             //echo "Offset found in internal reftable<br>";
1111 |             return $this->objoffsets[$objref];
1112 |         }
1113 |         return false;
1114 |     }
1115 | 
1116 |     /**
1117 |      * Decode the specified stream.
1118 |      * @param $sdic (array) Stream's dictionary array.
1119 |      * @param $stream (string) Stream to decode.
1120 |      * @return array containing decoded stream data and remaining filters.
1121 |      * @protected
1122 |      * @since 1.0.000 (2011-06-22)
1123 |      */
1124 |     protected function decodeStream($sdic, $stream) {
1125 |         // get stream lenght and filters
1126 |         $slength = strlen($stream);
1127 |         if ($slength <= 0) {
1128 |             return array('', array());
1129 |         }
1130 |         $filters = array();
1131 |         foreach ($sdic as $k => $v) {
1132 |             if ($v[0] == PDF_TYPE_TOKEN) {
1133 |                 if (($k == '/Length') AND ($v[0] == PDF_TYPE_NUMERIC)) {
1134 |                     // get declared stream lenght
1135 |                     $declength = intval($v[1]);
1136 |                     if ($declength < $slength) {
1137 |                         $stream = substr($stream, 0, $declength);
1138 |                         $slength = $declength;
1139 |                     }
1140 |                 } elseif ($k == '/Filter') {
1141 |                     if ($v[0] == PDF_TYPE_TOKEN) {
1142 |                         // single filter
1143 |                         $filters[] = $v[1];
1144 |                     } elseif ($v[0] == PDF_TYPE_ARRAY) {
1145 |                         // array of filters
1146 |                         foreach ($v[1] as $flt) {
1147 |                             if ($flt[0] == PDF_TYPE_TOKEN) {
1148 |                                 $filters[] = $flt[1];
1149 |                             }
1150 |                         }
1151 |                     }
1152 |                 }
1153 |             }
1154 |         }
1155 |         // decode the stream
1156 |         $remaining_filters = array();
1157 |         foreach ($filters as $filter) {
1158 |             if (in_array($filter, $this->FilterDecoders->getAvailableFilters())) {
1159 |                 $stream = $this->FilterDecoders->decodeFilter($filter, $stream);
1160 |             } else {
1161 |                 // add missing filter to array
1162 |                 $remaining_filters[] = $filter;
1163 |             }
1164 |         }
1165 |         return array($stream, $remaining_filters);
1166 |     }
1167 | 
1168 | 
1169 |     /**
1170 |      * Set pageno
1171 |      *
1172 |      * @param int $pageno Pagenumber to use
1173 |      */
1174 |     public function setPageno($pageno) {
1175 |         $pageno = ((int) $pageno) - 1;
1176 | 
1177 |         if ($pageno < 0 || $pageno >= $this->getPageCount()) {
1178 |             $this->error("Pagenumber is wrong! (Requested $pageno, max ".$this->getPageCount().")");
1179 |         }
1180 | 
1181 |         $this->pageno = $pageno;
1182 |     }
1183 | 
1184 |     /**
1185 |      * Get page-resources from current page
1186 |      *
1187 |      * @return array
1188 |      */
1189 |     public function getPageResources() {
1190 |         return $this->_getPageResources($this->pages[$this->pageno]);
1191 |     }
1192 | 
1193 |     /**
1194 |      * Get page-resources from /Page
1195 |      *
1196 |      * @param array $obj Array of pdf-data
1197 |      */
1198 |     private function _getPageResources ($obj) { // $obj = /Page
1199 |         $obj = $this->getObjectVal($obj);
1200 | 
1201 |         // If the current object has a resources
1202 |         // dictionary associated with it, we use
1203 |         // it. Otherwise, we move back to its
1204 |         // parent object.
1205 |         if (isset ($obj[1][1]['/Resources'])) {
1206 |             $res = $obj[1][1]['/Resources'];
1207 |             if ($res[0] == PDF_TYPE_OBJECT)
1208 |                 return $res[1];
1209 |             return $res;
1210 |         } else {
1211 |             if (!isset ($obj[1][1]['/Parent'])) {
1212 |                 return false;
1213 |             } else {
1214 |                 $res = $this->_getPageResources($obj[1][1]['/Parent']);
1215 |                 if ($res[0] == PDF_TYPE_OBJECT)
1216 |                     return $res[1];
1217 |                 return $res;
1218 |             }
1219 |         }
1220 |     }
1221 | 
1222 |     /**
1223 |      * Get annotations from current page
1224 |      *
1225 |      * @return array
1226 |      */
1227 |     public function getPageAnnotations() {
1228 |         return $this->_getPageAnnotations($this->pages[$this->pageno]);
1229 |     }
1230 | 
1231 |     /**
1232 |      * Get annotations from /Page
1233 |      *
1234 |      * @param array $obj Array of pdf-data
1235 |      */
1236 |     private function _getPageAnnotations ($obj) { // $obj = /Page
1237 |         $obj = $this->getObjectVal($obj);
1238 | 
1239 |         // If the current object has an annotations
1240 |         // dictionary associated with it, we use
1241 |         // it. Otherwise, we move back to its
1242 |         // parent object.
1243 |         if (isset ($obj[1][1]['/Annots'])) {
1244 |             $annots = $obj[1][1]['/Annots'];
1245 |         } else {
1246 |             if (!isset ($obj[1][1]['/Parent'])) {
1247 |                 return false;
1248 |             } else {
1249 |                 $annots = $this->_getPageAnnotations($obj[1][1]['/Parent']);
1250 |             }
1251 |         }
1252 | 
1253 |         if ($annots[0] == PDF_TYPE_OBJREF)
1254 |             return $this->getObjectVal($annots);
1255 |         return $annots;
1256 |     }
1257 | 
1258 | 
1259 |     /**
1260 |      * Get content of current page
1261 |      *
1262 |      * If more /Contents is an array, the streams are concated
1263 |      *
1264 |      * @return string
1265 |      */
1266 |     public function getContent() {
1267 |         $buffer = '';
1268 | 
1269 |         if (isset($this->pages[$this->pageno][1][1]['/Contents'])) {
1270 |             $contents = $this->_getPageContent($this->pages[$this->pageno][1][1]['/Contents']);
1271 |             foreach($contents AS $tmp_content) {
1272 |                 $buffer .= $this->_rebuildContentStream($tmp_content) . ' ';
1273 |             }
1274 |         }
1275 | 
1276 |         return $buffer;
1277 |     }
1278 | 
1279 | 
1280 |     /**
1281 |      * Resolve all content-objects
1282 |      *
1283 |      * @param array $content_ref
1284 |      * @return array
1285 |      */
1286 |     private function _getPageContent($content_ref) {
1287 |         $contents = array();
1288 | 
1289 |         if ($content_ref[0] == PDF_TYPE_OBJREF) {
1290 |             $content = $this->getObjectVal($content_ref);
1291 |             if ($content[1][0] == PDF_TYPE_ARRAY) {
1292 |                 $contents = $this->_getPageContent($content[1]);
1293 |             } else {
1294 |                 $contents[] = $content;
1295 |             }
1296 |         } elseif ($content_ref[0] == PDF_TYPE_ARRAY) {
1297 |             foreach ($content_ref[1] AS $tmp_content_ref) {
1298 |                 $contents = array_merge($contents,$this->_getPageContent($tmp_content_ref));
1299 |             }
1300 |         }
1301 | 
1302 |         return $contents;
1303 |     }
1304 | 
1305 | 
1306 |     /**
1307 |      * Rebuild content-streams
1308 |      *
1309 |      * @param array $obj
1310 |      * @return string
1311 |      */
1312 |     private function _rebuildContentStream($obj) {
1313 |         $filters = array();
1314 | 
1315 |         if (isset($obj[1][1]['/Filter'])) {
1316 |             $_filter = $obj[1][1]['/Filter'];
1317 | 
1318 |             if ($_filter[0] == PDF_TYPE_OBJREF) {
1319 |                 $tmpFilter = $this->getObjectVal($_filter);
1320 |                 $_filter = $tmpFilter[1];
1321 |             }
1322 | 
1323 |             if ($_filter[0] == PDF_TYPE_TOKEN) {
1324 |                 $filters[] = $_filter;
1325 |             } elseif ($_filter[0] == PDF_TYPE_ARRAY) {
1326 |                 $filters = $_filter[1];
1327 |             }
1328 |         }
1329 | 
1330 |         $stream = $obj[2][1];
1331 | 
1332 |         foreach ($filters AS $_filter) {
1333 |             $stream = $this->FilterDecoders->decodeFilter($_filter[1], $stream);
1334 |         }
1335 | 
1336 |         return $stream;
1337 |     }
1338 | 
1339 | 
1340 |     /**
1341 |      * Get a Box from a page
1342 |      * Arrayformat is same as used by fpdf_tpl
1343 |      *
1344 |      * @param array $page a /Page
1345 |      * @param string $box_index Type of Box @see $availableBoxes
1346 |      * @param float Scale factor from user space units to points
1347 |      * @return array
1348 |      */
1349 |     public function getPageBox($page, $box_index, $k) {
1350 |         $page = $this->getObjectVal($page);
1351 |         $box = null;
1352 |         if (isset($page[1][1][$box_index]))
1353 |             $box =& $page[1][1][$box_index];
1354 | 
1355 |         if (!is_null($box) && $box[0] == PDF_TYPE_OBJREF) {
1356 |             $tmp_box = $this->getObjectVal($box);
1357 |             $box = $tmp_box[1];
1358 |         }
1359 | 
1360 |         if (!is_null($box) && $box[0] == PDF_TYPE_ARRAY) {
1361 |             $b =& $box[1];
1362 |             return array('x' => $b[0][1] / $k,
1363 |                          'y' => $b[1][1] / $k,
1364 |                          'w' => abs($b[0][1] - $b[2][1]) / $k,
1365 |                          'h' => abs($b[1][1] - $b[3][1]) / $k,
1366 |                          'llx' => min($b[0][1], $b[2][1]) / $k,
1367 |                          'lly' => min($b[1][1], $b[3][1]) / $k,
1368 |                          'urx' => max($b[0][1], $b[2][1]) / $k,
1369 |                          'ury' => max($b[1][1], $b[3][1]) / $k,
1370 |                          );
1371 |         } elseif (!isset ($page[1][1]['/Parent'])) {
1372 |             return false;
1373 |         } else {
1374 |             return $this->getPageBox($this->getObjectVal($page[1][1]['/Parent']), $box_index, $k);
1375 |         }
1376 |     }
1377 | 
1378 |     /**
1379 |      * Get all page boxes by page no
1380 |      *
1381 |      * @param int The page number
1382 |      * @param float Scale factor from user space units to points
1383 |      * @return array
1384 |      */
1385 |     public function getPageBoxes($pageno, $k) {
1386 |         return $this->_getPageBoxes($this->pages[$pageno - 1], $k);
1387 |     }
1388 | 
1389 |     /**
1390 |      * Get all boxes from /Page
1391 |      *
1392 |      * @param array a /Page
1393 |      * @return array
1394 |      */
1395 |     private function _getPageBoxes($page, $k) {
1396 |         $boxes = array();
1397 | 
1398 |         foreach($this->availableBoxes AS $box) {
1399 |             if ($_box = $this->getPageBox($page, $box, $k)) {
1400 |                 $boxes[$box] = $_box;
1401 |             }
1402 |         }
1403 | 
1404 |         return $boxes;
1405 |     }
1406 | 
1407 |     /**
1408 |      * Get the page rotation by pageno
1409 |      *
1410 |      * @param integer $pageno
1411 |      * @return array
1412 |      */
1413 |     public function getPageRotation($pageno) {
1414 |         return $this->_getPageRotation($this->pages[$pageno - 1]);
1415 |     }
1416 | 
1417 |     private function _getPageRotation($obj) { // $obj = /Page
1418 |         $obj = $this->getObjectVal($obj);
1419 |         if (isset ($obj[1][1]['/Rotate'])) {
1420 |             $res = $this->getObjectVal($obj[1][1]['/Rotate']);
1421 |             if ($res[0] == PDF_TYPE_OBJECT)
1422 |                 return $res[1];
1423 |             return $res;
1424 |         } else {
1425 |             if (!isset ($obj[1][1]['/Parent'])) {
1426 |                 return false;
1427 |             } else {
1428 |                 $res = $this->_getPageRotation($obj[1][1]['/Parent']);
1429 |                 if ($res[0] == PDF_TYPE_OBJECT)
1430 |                     return $res[1];
1431 |                 return $res;
1432 |             }
1433 |         }
1434 |     }
1435 | 
1436 |     /**
1437 |      * This method is automatically called in case of fatal error; it simply outputs the message and halts the execution.
1438 |      * @param $msg (string) The error message
1439 |      * @public
1440 |      * @since 1.0.000 (2011-05-23)
1441 |      */
1442 |     public function Error($msg) {
1443 |         // exit program and print error
1444 |         die("<strong>TCPDI_PARSER ERROR [{$this->uniqueid}]: </strong>".$msg);
1445 |     }
1446 | 
1447 | } // END OF TCPDF_PARSER CLASS
1448 | 
1449 | //============================================================+
1450 | // END OF FILE
1451 | //============================================================+
1452 | 


--------------------------------------------------------------------------------