├── README.markdown ├── filters.php ├── generic.php ├── pdf.php ├── utils.php └── xmp.php /README.markdown: -------------------------------------------------------------------------------- 1 | This is a mostly complete port of [pyPdf](http://pybrary.net/pyPdf/). The API is basically the same. 2 | 3 | page_count; 9 | 10 | ?> 11 | -------------------------------------------------------------------------------- /filters.php: -------------------------------------------------------------------------------- 1 | = 10) && ($predictor <= 15)) { 28 | $output = ''; 29 | 30 | $rowlength = $columns + 1; 31 | assert((strlen($data) % $rowlength) == 0); 32 | $prev_rowdata = array_fill(0, $rowlength, 0); 33 | for ($row=0; $rowdata['/Filter']; 75 | if (!$filters) { 76 | $filters = array(); 77 | } 78 | 79 | if (count($filters) && !is_a($filters[0], 'NameObject')) { 80 | $filters = array($filters); 81 | } 82 | 83 | $data = $stream->stream; 84 | foreach ($filters as $filter_type) { 85 | if ($filter_type == '/FlateDecode') { 86 | $data = FlateDecode::decode($data, $stream->data['/DecodeParms']); 87 | } 88 | } 89 | return $data; 90 | } 91 | ?> -------------------------------------------------------------------------------- /generic.php: -------------------------------------------------------------------------------- 1 | ", "[", "]", "{", "}", "/", "%"); 49 | 50 | class NameObject extends Object { 51 | function NameObject($data) { 52 | $this->data = $data; 53 | } 54 | 55 | function read_from_stream($stream) { 56 | global $name_delimiters; 57 | 58 | $name = fread($stream, 1); 59 | if ($name != '/') { 60 | die("Error reading PDF: name read error."); 61 | } 62 | while (true) { 63 | $tok = fread($stream, 1); 64 | if ((trim($tok) == '') || (in_array($tok, $name_delimiters))) { 65 | fseek($stream, -1, 1); 66 | break; 67 | } 68 | $name .= $tok; 69 | } 70 | return $name; 71 | } 72 | } 73 | 74 | class DictionaryObject extends Object { 75 | function DictionaryObject($data=array()) { 76 | $this->data = $data; 77 | } 78 | 79 | function read_from_stream($stream, $pdf) { 80 | $tmp = fread($stream, 2); 81 | if ($tmp != '<<') { 82 | die("Error reading PDF: dictionary read error."); 83 | } 84 | $data = array(); 85 | while (true) { 86 | $tok = read_non_whitespace($stream); 87 | if ($tok == '>') { 88 | fread($stream, 1); 89 | break; 90 | } 91 | fseek($stream, -1, 1); 92 | $key = read_object($stream, $pdf); 93 | $tok = read_non_whitespace($stream); 94 | fseek($stream, -1, 1); 95 | $value = read_object($stream, $pdf); 96 | if (in_array($key, array_keys($data))) { 97 | die("Error reading PDF: multiple definitions in dictionary."); 98 | } 99 | $data[$key] = $value; 100 | } 101 | 102 | $pos = ftell($stream); 103 | $s = read_non_whitespace($stream); 104 | if (($s == 's') && (fread($stream, 5) == 'tream')) { 105 | $eol = fread($stream, 1); 106 | while ($eol == ' ') { 107 | $eol = fread($stream, 1); 108 | } 109 | assert(($eol == "\n") || ($eol == "\r")); 110 | if ($eol == "\r") { 111 | fread($stream, 1); 112 | } 113 | $length = $data['/Length']; 114 | if (is_a($length, 'IndirectObject')) { 115 | $t = ftell($stream); 116 | $length = $pdf->get_object($length); 117 | fseek($stream, $t, 0); 118 | } 119 | $data['__streamdata__'] = fread($stream, $length); 120 | $e = read_non_whitespace($stream); 121 | $ndstream = fread($stream, 8); 122 | if (($e + $ndstream) != "endstream") { 123 | $pos = ftell($stream); 124 | fseek($stream, -10, 1); 125 | $end = fread($stream, 9); 126 | if ($end == "endstream") { 127 | $data['__streamdata__'] = substr($data['__streamdata__'], 0, -1); 128 | } else { 129 | fseek($stream, $pos, 0); 130 | die("Error reading PDF: Unable to find 'endstream' marker after stream."); 131 | } 132 | } 133 | } else { 134 | fseek($stream, $pos, 0); 135 | } 136 | if (in_array('__streamdata__', array_keys($data))) { 137 | return StreamObject::init_from_dict($data); 138 | } else { 139 | return $data; 140 | } 141 | } 142 | 143 | } 144 | 145 | class NullObject extends Object { 146 | function read_from_stream($stream) { 147 | $nulltxt = fread($stream, 4); 148 | if ($nulltxt != "null") { 149 | die("Error reading PDF: error reading null object."); 150 | } 151 | return new NullObject(); 152 | } 153 | } 154 | 155 | class BooleanObject extends Object { 156 | function BooleanObject($value) { 157 | $this->value = $value; 158 | } 159 | 160 | function read_from_stream($stream) { 161 | $word = fread($stream, 4); 162 | if ($word == "true") { 163 | return new BooleanObject(true); 164 | } else if ($word == "fals") { 165 | fread($stream, 1); 166 | return new BooleanObject(false); 167 | } 168 | assert(false); 169 | } 170 | } 171 | 172 | class ListObject extends Object { 173 | function read_from_stream($stream, $pdf) { 174 | $arr = array(); 175 | $tmp = fread($stream, 1); 176 | if ($tmp != '[') { 177 | die("Error reading PDF: error reading array."); 178 | } 179 | while (true) { 180 | $tok = fread($stream, 1); 181 | while (trim($tok) == '') { 182 | $tok = fread($stream, 1); 183 | } 184 | fseek($stream, -1, 1); 185 | $peekahead = fread($stream, 1); 186 | if ($peekahead == ']') { 187 | break; 188 | } 189 | fseek($stream, -1, 1); 190 | $arr[] = read_object($stream, $pdf); 191 | } 192 | return $arr; 193 | } 194 | } 195 | 196 | class IndirectObject extends Object { 197 | function IndirectObject($idnum, $generation, $pdf) { 198 | $this->idnum = $idnum; 199 | $this->generation = $generation; 200 | $this->pdf = $pdf; 201 | } 202 | 203 | function get_object() { 204 | return $this->pdf->get_object($this); 205 | } 206 | 207 | function read_from_stream($stream, $pdf) { 208 | $idnum = ''; 209 | while (true) { 210 | $tok = fread($stream, 1); 211 | if (trim($tok) == '') { 212 | break; 213 | } 214 | $idnum .= $tok; 215 | } 216 | $generation = ''; 217 | while (true) { 218 | $tok = fread($stream, 1); 219 | if (trim($tok) == '') { 220 | break; 221 | } 222 | $generation .= $tok; 223 | } 224 | $r = fread($stream, 1); 225 | if ($r != "R") { 226 | die("Error reading PDF: error reading indirect object reference."); 227 | } 228 | return new IndirectObject((int) $idnum, (int) $generation, $pdf); 229 | } 230 | 231 | function __toString() { 232 | return "IndirectObject({$this->idnum}, {$this->generation})"; 233 | } 234 | } 235 | 236 | class NumberObject extends Object { 237 | function NumberObject($value) { 238 | $this->value = $value; 239 | } 240 | 241 | function read_from_stream($stream) { 242 | $name = ''; 243 | while (true) { 244 | $tok = fread($stream, 1); 245 | if (($tok != '+') && ($tok != '-') && ($tok != '.') && (!ctype_digit($tok))) { 246 | fseek($stream, -1, 1); 247 | break; 248 | } 249 | $name .= $tok; 250 | } 251 | if (strpos($name, '.') !== false) { 252 | return (float) $name; 253 | } else { 254 | return (int) $name; 255 | } 256 | } 257 | } 258 | 259 | class StreamObject extends Object { 260 | function StreamObject() { 261 | $this->stream = null; 262 | $this->data = array(); 263 | } 264 | 265 | function init_from_dict($dict) { 266 | if (in_array('/Filter', array_keys($dict))) { 267 | $retval = new EncodedStreamObject(); 268 | } else { 269 | $retval = new DecodedStreamObject(); 270 | } 271 | $retval->stream = $dict['__streamdata__']; 272 | unset($dict['__streamdata__']); 273 | unset($dict['/Length']); 274 | foreach ($dict as $key=>$val) { 275 | $retval->data[$key] = $val; 276 | } 277 | return $retval; 278 | } 279 | 280 | function flate_encode() { 281 | if (in_array('/Filter', array_keys($this->data))) { 282 | $f = $this->data['/Filter']; 283 | if (is_array($f)) { 284 | array_unshift($f, new NameObject('/FlateDecode')); 285 | } else { 286 | $newf = array(); 287 | $newf[] = new NameObject('/FlateDecode'); 288 | $newf[] = $f; 289 | $f = $newf; 290 | } 291 | } else { 292 | $f = new NameObject('/FlateDecode'); 293 | } 294 | 295 | $retval = new EncodedStreamObject(); 296 | $filter = new NameObject('/Filter'); 297 | $retval[$filter] = $f; 298 | $retval->stream = FlateDecode::encode($this->stream); 299 | return $retval; 300 | } 301 | } 302 | 303 | class DecodedStreamObject extends StreamObject { 304 | function get_data() { 305 | return $this->stream; 306 | } 307 | 308 | function set_data($data) { 309 | $this->stream = $data; 310 | } 311 | } 312 | 313 | class EncodedStreamObject extends StreamObject { 314 | function EncodedStreamObject() { 315 | $this->decoded_self = null; 316 | } 317 | 318 | function get_data() { 319 | if ($this->decoded_self) { 320 | return $this->decoded_self->get_data(); 321 | } 322 | 323 | $decoded = new DecodedStreamObject(); 324 | $decoded->stream = decode_stream_data($this); 325 | foreach ($this->data as $key=>$value) { 326 | if (!in_array($key, array("/Length", "/Filter", "/DecodeParms"))) { 327 | $decoded->data[$key] = $value; 328 | } 329 | } 330 | $this->decoded_self = $decoded; 331 | return $decoded->stream; 332 | } 333 | 334 | function set_data($data) { 335 | $this->stream = $data; 336 | } 337 | } 338 | 339 | function create_string_object($string) { 340 | // UTF16_BIG_ENDIAN_BOM 341 | if (substr($string, 0, 2) == chr(0xFE) . chr(0xFF)) { 342 | return utf16_decode($string); 343 | } 344 | 345 | return $string; 346 | } 347 | 348 | function read_hex_string_from_stream($stream) { 349 | fread($stream, 1); 350 | $txt = ''; 351 | $x = ''; 352 | while (true) { 353 | $tok = read_non_whitespace($stream); 354 | if ($tok == '>') { 355 | break; 356 | } 357 | $x .= $tok; 358 | if (strlen($x) == 2) { 359 | $txt .= chr(base_convert($x, 16, 10)); 360 | $x = ''; 361 | } 362 | } 363 | if (strlen($x) == 1) { 364 | $x .= '0'; 365 | } 366 | if (strlen($x) == 2) { 367 | $txt .= chr(base_convert($x, 16, 10)); 368 | } 369 | 370 | return create_string_object($txt); 371 | } 372 | 373 | function read_string_from_stream($stream) { 374 | $tok = fread($stream, 1); 375 | $parens = 1; 376 | $txt = ''; 377 | while (true) { 378 | $tok = fread($stream, 1); 379 | if ($tok == '(') { 380 | $parens += 1; 381 | } else if ($tok == ')') { 382 | $parens -= 1; 383 | if ($parens == 0) { 384 | break; 385 | } 386 | } else if ($tok == '\\') { 387 | $tok = fread($stream, 1); 388 | if ($tok == 'n') { 389 | $tok = "\n"; 390 | } else if ($tok == 'r') { 391 | $tok = "\r"; 392 | } else if ($tok == 't') { 393 | $tok = "\t"; 394 | } else if ($tok == 'b') { 395 | $tok = "\b"; 396 | } else if ($tok == 'f') { 397 | $tok = "\f"; 398 | } else if ($tok == '(') { 399 | $tok = '('; 400 | } else if ($tok == ')') { 401 | $tok = ')'; 402 | } else if ($tok == '\\') { 403 | $tok = "\\"; 404 | } else if (ctype_digit($tok)) { 405 | for ($i=0; $i<2; $i++) { 406 | $ntok = fread($stream, 1); 407 | if (ctype_digit($ntok)) { 408 | $tok += $ntok; 409 | } else { 410 | break; 411 | } 412 | } 413 | $tok = chr(base_convert($tok, 8, 10)); 414 | } else if (($tok == "\n") || ($tok == "\r") || ($tok == "\n\r")) { 415 | $tok = fread($stream, 1); 416 | if (!(($tok == "\n") || ($tok == "\r") || ($tok == "\n\r"))) { 417 | fseek($stream, -1, 1); 418 | } 419 | $tok = ''; 420 | } else { 421 | die("Error reading PDF: unexpected escaped string."); 422 | } 423 | } 424 | $txt .= $tok; 425 | } 426 | return create_string_object($txt); 427 | } 428 | 429 | class Object { 430 | function get_object() { 431 | return $this; 432 | } 433 | } 434 | 435 | ?> -------------------------------------------------------------------------------- /pdf.php: -------------------------------------------------------------------------------- 1 | stream = $stream; 16 | $this->resolved_objects = array(); 17 | $this->read($stream); 18 | $this->page_count = $this->get_page_count(); 19 | } 20 | 21 | function get_document_info() { 22 | if (!in_array('/Info', array_keys($this->trailer))) { 23 | return null; 24 | } 25 | $info = $this->trailer['/Info']; 26 | $info = $info->get_object(); 27 | return new DocumentInformation($info); 28 | } 29 | 30 | function get_page_count($pages=null) { 31 | if (!$pages) { 32 | $catalog = $this->trailer['/Root']->get_object(); 33 | $pages = $catalog['/Pages']->get_object(); 34 | } 35 | $t = $pages['/Type']; 36 | if ($t == '/Pages') { 37 | if (in_array('/Kids', array_keys($pages['/Kids'][0]->get_object()))) { 38 | $sum = 0; 39 | foreach ($pages['/Kids'] as $page) { 40 | $page = $page->get_object(); 41 | $sum += $this->get_page_count($page); 42 | } 43 | return $sum; 44 | } else { 45 | return count($pages['/Kids']); 46 | } 47 | } 48 | } 49 | 50 | function get_object($indirect_ref) { 51 | $retval = $this->resolved_objects[$indirect_ref->generation]; 52 | if ($retval) { 53 | $retval = $retval[$indirect_ref->idnum]; 54 | if ($retval) { 55 | return $retval; 56 | } 57 | } 58 | 59 | if (($indirect_ref->generation === 0) 60 | and in_array($indirect_ref->idnum, array_keys($this->xref_obj_stm))) { 61 | $stm_num = $this->xref_obj_stm[$indirect_ref->idnum][0]; 62 | $idx = $this->xref_obj_stm[$indirect_ref->idnum][1]; 63 | 64 | $o = new IndirectObject($stm_num, 0, $this); 65 | $obj_stm = $o->get_object(); 66 | $stream_data = tmpfile(); 67 | fwrite($stream_data, $obj_stm->get_data()); 68 | fseek($stream_data, 0); # !!! 69 | fseek($stream_data, 0); 70 | for ($i=0; $i<$obj_stm->data['/N']; $i++) { 71 | $obj_num = NumberObject::read_from_stream($stream_data); 72 | read_non_whitespace($stream_data); 73 | fseek($stream_data, -1, 1); 74 | $offset = NumberObject::read_from_stream($stream_data); 75 | read_non_whitespace($stream_data); 76 | fseek($stream_data, -1, 1); 77 | $t = ftell($stream_data); 78 | fseek($stream_data, $obj_stm->data['/First'] + $offset, 0); 79 | $obj = read_object($stream_data, $this); 80 | $this->resolved_objects[0][$obj_num] = $obj; 81 | fseek($stream_data, $t, 0); 82 | } 83 | 84 | fclose($stream_data); 85 | return $this->resolved_objects[0][$indirect_ref->idnum]; 86 | } 87 | 88 | $start = $this->xref[$indirect_ref->generation][$indirect_ref->idnum]; 89 | fseek($this->stream, $start, 0); 90 | $header = $this->read_object_header($this->stream); 91 | $idnum = $header[0]; 92 | $generation = $header[1]; 93 | $retval = read_object($this->stream, $this); 94 | 95 | $this->cache_indirect_object($generation, $idnum, $retval); 96 | return $retval; 97 | } 98 | 99 | function read_object_header($stream) { 100 | read_non_whitespace($stream); 101 | fseek($stream, -1, 1); 102 | $idnum = read_until_whitespace($stream); 103 | $generation = read_until_whitespace($stream); 104 | 105 | $obj = fread($stream, 3); 106 | read_non_whitespace($stream); 107 | fseek($stream, -1, 1); 108 | return array((int) $idnum, (int) $generation); 109 | } 110 | 111 | function cache_indirect_object($generation, $idnum, $obj) { 112 | if (!in_array($generation, array_keys($this->resolved_objects))) { 113 | $this->resolved_objects[$generation] = array(); 114 | } 115 | $this->resolved_objects[$generation][$idnum] = $obj; 116 | } 117 | 118 | function read($stream) { 119 | fseek($stream, -1, 2); 120 | $line = ''; 121 | while (!$line) { 122 | $line = $this->read_next_end_line($stream); 123 | } 124 | if (substr($line, 0, 5) != '%%EOF') { 125 | die("Error reading PDF: EOF marker not found."); 126 | } 127 | 128 | $line = $this->read_next_end_line($stream); 129 | $startxref = (int) $line; 130 | $line = $this->read_next_end_line($stream); 131 | if (substr($line, 0, 9) != 'startxref') { 132 | die("Error reading PDF: startxref not found."); 133 | } 134 | 135 | $this->xref = array(); 136 | $this->xref_obj_stm = array(); 137 | $this->trailer = array(); 138 | while (1) { 139 | fseek($stream, $startxref, 0); 140 | $x = fread($stream, 1); 141 | if ($x == "x") { 142 | $ref = fread($stream, 4); 143 | if (substr($ref, 0, 3) != 'ref') { 144 | die("Error reading PDF: xref table read error."); 145 | } 146 | read_non_whitespace($stream); 147 | fseek($stream, -1, 1); 148 | while (1) { 149 | $num = read_object($stream, $this); 150 | read_non_whitespace($stream); 151 | fseek($stream, -1, 1); 152 | $size = read_object($stream, $this); 153 | read_non_whitespace($stream); 154 | fseek($stream, -1, 1); 155 | $cnt = 0; 156 | while ($cnt < $size) { 157 | $line = fread($stream, 20); 158 | 159 | if (in_array(substr($line, -1, 1), array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 't'))) { 160 | fseek($stream, -1, 1); 161 | } 162 | $tmp = explode(' ', substr($line, 0, 16)); 163 | $offset = (int) $tmp[0]; 164 | $generation = (int) $tmp[1]; 165 | if (!in_array($generation, array_keys($this->xref))) { 166 | $this->xref[$generation] = array(); 167 | } 168 | if (!$this->xref[$generation][$num]) { 169 | $this->xref[$generation][$num] = $offset; 170 | } 171 | $cnt += 1; 172 | $num += 1; 173 | } 174 | read_non_whitespace($stream); 175 | fseek($stream, -1, 1); 176 | $trailertag = fread($stream, 7); 177 | if ($trailertag != 'trailer') { 178 | fseek($stream, -7, 1); 179 | } else { 180 | break; 181 | } 182 | } 183 | read_non_whitespace($stream); 184 | fseek($stream, -1, 1); 185 | $new_trailer = read_object($stream, $this); 186 | foreach ($new_trailer as $key=>$value) { 187 | if (!in_array($key, array_keys($this->trailer))) { 188 | $this->trailer[$key] = $value; 189 | } 190 | } 191 | if (in_array('/Prev', array_keys($new_trailer))) { 192 | $startxref = $new_trailer['/Prev']; 193 | } else { 194 | break; 195 | } 196 | } else if (ctype_digit($x)) { 197 | fseek($stream, -1, 1); 198 | $hdr = $this->read_object_header($stream); 199 | $idnum = $hdr[0]; 200 | $generation = $hdr[1]; 201 | $xrefstream = read_object($stream, $this); 202 | assert($xrefstream->data['/Type'] == '/XRef'); 203 | $this->cache_indirect_object($generation, $idnum, $xrefstream); 204 | $stream_data = $xrefstream->get_data(); 205 | $cursor = 0; 206 | $idx_pairs = $xrefstream->data['/Index']; 207 | if (!$idx_pairs) { 208 | $idx_pairs = array(0, $xrefstream->data['/Size']); 209 | } 210 | $entry_sizes = $xrefstream->data['/W']; 211 | foreach ($this->_pairs($idx_pairs) as $pair) { 212 | $num = $pair[0]; 213 | $size = $pair[1]; 214 | $cnt = 0; 215 | while ($cnt < $size) { 216 | for ($i=0; $ixref))) { 243 | $this->xref[$generation] = array(); 244 | } 245 | if (!in_array($num, array_keys($this->xref[$generation]))) { 246 | $this->xref[$generation][$num] = $byte_offset; 247 | } 248 | } else if ($xref_type == 2) { 249 | if (!in_array($num, array_keys($this->xref_obj_stm))) { 250 | $this->xref_obj_stm[$num] = array($objstr_num, $objstr_idx); 251 | } 252 | } 253 | $cnt += 1; 254 | $num += 1; 255 | } 256 | } 257 | 258 | $trailer_keys = array('/Root', '/Info', '/ID'); 259 | foreach ($trailer_keys as $key) { 260 | if ((in_array($key, array_keys($xrefstream->data))) 261 | and (!in_array($key, array_keys($this->trailer)))) { 262 | $this->trailer[$key] = $xrefstream->data[$key]; 263 | } 264 | } 265 | 266 | if (in_array('/Prev', array_keys($xrefstream->data))) { 267 | $startxref = $xrefstream->data['/Prev']; 268 | } else { 269 | break; 270 | } 271 | } else { 272 | fseek($stream, -11, 1); 273 | $tmp = fread($stream, 20); 274 | $xref_loc = strpos($tmp, 'xref'); 275 | if ($xref_loc !== -1) { 276 | $startxref -= (10 - $xref_loc); 277 | continue; 278 | } else { 279 | assert(false); 280 | break; 281 | } 282 | } 283 | } 284 | 285 | // var_dump($this->get_xmp_metadata()); 286 | } 287 | 288 | function get_xmp_metadata() { 289 | $root = $this->trailer['/Root']->get_object(); 290 | $metadata = $root['/Metadata']; 291 | if (!$metadata) { 292 | return null; 293 | } 294 | 295 | $metadata = $metadata->get_object(); 296 | // var_dump($metadata); 297 | return $metadata; 298 | } 299 | 300 | function _pairs($array) { 301 | $i = 0; 302 | $retval = array(); 303 | while (true) { 304 | $retval[] = array($array[$i], $array[$i+1]); 305 | $i += 2; 306 | if ($i+1 >= count($array)) { 307 | break; 308 | } 309 | } 310 | return $retval; 311 | } 312 | 313 | function read_next_end_line($stream) { 314 | $line = ''; 315 | while (true) { 316 | $x = fread($stream, 1); 317 | fseek($stream, -2, SEEK_CUR); 318 | if (($x == "\n") || ($x == "\r")) { 319 | while (($x == "\n") || ($x == "\r")) { 320 | $x = fread($stream, 1); 321 | fseek($stream, -2, SEEK_CUR); 322 | } 323 | fseek($stream, 1, SEEK_CUR); 324 | break; 325 | } else { 326 | $line = $x . $line; 327 | } 328 | } 329 | return $line; 330 | } 331 | 332 | } 333 | 334 | $doc_info_keys = array( 335 | 'Title' => 'title', 336 | 'Author' => 'author', 337 | 'Keywords' => 'keywords', 338 | 'Pages' => 'pages', 339 | 'Subject' => 'subject', 340 | 'Creator' => 'creator', 341 | 'Producer' => 'producer', 342 | 'CreationDate' => 'creation_date', 343 | 'ModDate' => 'mod_date'); 344 | 345 | class DocumentInformation { 346 | 347 | function __construct($info) { 348 | global $doc_info_keys; 349 | 350 | $data = array(); 351 | foreach ($info as $key=>$value) { 352 | $key = str_replace('/', '', $key); 353 | $data[$doc_info_keys[$key]] = $value; 354 | } 355 | $this->data = $data; 356 | } 357 | } 358 | 359 | function convert_to_int($d, $size) { 360 | $out = bin2hex($d); 361 | if ($out) { 362 | $out = base_convert($out, 16, 10); 363 | } 364 | return (int) $out; 365 | } 366 | ?> -------------------------------------------------------------------------------- /utils.php: -------------------------------------------------------------------------------- 1 | $v) { 65 | $out .= _hi($k) . " => " . _hi($v) . ", "; 66 | } 67 | $out .= "}"; 68 | return $out; 69 | } else { 70 | return $o; 71 | } 72 | } 73 | ?> -------------------------------------------------------------------------------- /xmp.php: -------------------------------------------------------------------------------- 1 | [0-9]{4})(-(?P[0-9]{2})(-(?P[0-9]+)(T(?P[0-9]{2}):(?P[0-9]{2})(:(?P[0-9]{2}(.[0-9]+)?))?(?PZ|[-+][0-9]{2}:[0-9]{2}))?)?)?"; 16 | 17 | class XmpInformation extends Object { 18 | 19 | function XmpInformation($stream) { 20 | $this->stream = $stream; 21 | 22 | $doc_root = new DOMDocument(); 23 | $doc_root->loadXML($this->stream->get_data()); 24 | $rdf_els = $doc_root->getElementsByTagNameNS($RDF_NAMESPACE, 'RDF'); 25 | $this->rdf_root = $rdf_els[0]; 26 | 27 | $this->cache = array(); 28 | } 29 | 30 | function get_element($about_uri, $ns, $name) { 31 | $retval = array(); 32 | 33 | $descs = $this->rdf_root->getElementsByTagNameNS($RDF_NAMESPACE, 'Description'); 34 | foreach ($descs as $desc) { 35 | if ($desc->getAttributeNS($RDF_NAMESPACE, 'about') == $about_uri) { 36 | $attr = $desc->getAttributeNodeNS($ns, $name); 37 | if ($attr) { 38 | $retval[] = $attr; 39 | } 40 | foreach ($desc->getElementsByTagNameNS($ns, $name) as $el) { 41 | $retval[] = $el; 42 | } 43 | } 44 | } 45 | 46 | return $retval; 47 | } 48 | 49 | function get_nodes_in_ns($about_uri, $ns) { 50 | $retval = array(); 51 | 52 | $descs = $this->rdf_root->getElementsByTagNameNS($RDF_NAMESPACE, 'Description'); 53 | foreach ($descs as $desc) { 54 | if ($desc->getAttributeNS($RDF_NAMESPACE, 'about') == $about_uri) { 55 | for ($i=0; $i<$desc->attributes->length; $i++) { 56 | $attr = $desc->attributes->item($i); 57 | if ($attr->namespaceURI == $ns) { 58 | $retval[] = $attr; 59 | } 60 | } 61 | 62 | foreach ($desc->childNodes as $child) { 63 | if ($child->namespaceURI == $ns) { 64 | $retval[] = $child; 65 | } 66 | } 67 | } 68 | } 69 | 70 | return $retval; 71 | } 72 | 73 | function _get_text($element) { 74 | $text = ''; 75 | foreach ($element->childNodes as $child) { 76 | if ($child->nodeType == XML_TEXT_NODE) { 77 | $text .= $child->data; 78 | } 79 | } 80 | 81 | return $text; 82 | } 83 | 84 | function _converter_string($value) { 85 | return $value; 86 | } 87 | 88 | function _converter_date($value) { 89 | // $m = array(); 90 | // preg_match($iso8601, $value, &$m); 91 | // $year = (int) $m['year']; 92 | // if (!$m['month']) { 93 | // $m['month'] = 1; 94 | // } 95 | // $month = (int) $m['month']; 96 | // if (!$m['day']) { 97 | // $m['day'] = 1; 98 | // } 99 | // $day = (int) $m['day']; 100 | // if (!$m['hour']) { 101 | // $m['hour'] = 0; 102 | // } 103 | // $hour = (int) $m['hour']; 104 | // if (!$m['minute']) { 105 | // $m['minute'] = 0; 106 | // } 107 | // $minute = (int) $m['minute']; 108 | // if (!$m['second']) { 109 | // $m['second'] = 0; 110 | // } 111 | // $second = (float) $m['second']; 112 | // $seconds = floor($second); 113 | // $milliseconds = ($second - $seconds) * 1000000; 114 | // if (!$m['tzd']) { 115 | // $m['tzd'] = 'Z'; 116 | // } 117 | // $tzd = $m['tzd']; 118 | 119 | sscanf($tstamp, "%u-%u-%uT%u:%u:%uZ", $year, $month, $day, $hour, $min, $sec); 120 | return mktime($hour, $min, $sec, $month, $day, $year); 121 | } 122 | 123 | } --------------------------------------------------------------------------------