├── H. G. Wells - The War of the Worlds.epub ├── epubs ├── k │ └── Kris Ottman Neville - Earth Alert.epub ├── c │ └── Cory Doctorow - With a Little Help.epub ├── h │ ├── H. G. Wells - The War of the Worlds.epub │ └── Harold MacGrath - Arms and the Woman.epub └── j │ └── Joseph Smith Fletcher - The Borough Treasurer.epub ├── test.php ├── rename.php ├── OreillyZipListing.php ├── Readme.txt ├── BookGluttonEpubFileFinder.php ├── BookGluttonZipEpub.php └── BookGluttonEpub.php /H. G. Wells - The War of the Worlds.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vaporbook/BookGluttonEpub/HEAD/H. G. Wells - The War of the Worlds.epub -------------------------------------------------------------------------------- /epubs/k/Kris Ottman Neville - Earth Alert.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vaporbook/BookGluttonEpub/HEAD/epubs/k/Kris Ottman Neville - Earth Alert.epub -------------------------------------------------------------------------------- /epubs/c/Cory Doctorow - With a Little Help.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vaporbook/BookGluttonEpub/HEAD/epubs/c/Cory Doctorow - With a Little Help.epub -------------------------------------------------------------------------------- /epubs/h/H. G. Wells - The War of the Worlds.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vaporbook/BookGluttonEpub/HEAD/epubs/h/H. G. Wells - The War of the Worlds.epub -------------------------------------------------------------------------------- /epubs/h/Harold MacGrath - Arms and the Woman.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vaporbook/BookGluttonEpub/HEAD/epubs/h/Harold MacGrath - Arms and the Woman.epub -------------------------------------------------------------------------------- /epubs/j/Joseph Smith Fletcher - The Borough Treasurer.epub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Vaporbook/BookGluttonEpub/HEAD/epubs/j/Joseph Smith Fletcher - The Borough Treasurer.epub -------------------------------------------------------------------------------- /test.php: -------------------------------------------------------------------------------- 1 | setLogVerbose(true); 13 | $epub->setLogLevel(2); 14 | $epub->open($file); 15 | print_r($epub->getMetaPairs()); 16 | 17 | 18 | echo "Now opening $file as virtual zip (no filesystem on disk):\n"; 19 | 20 | $epub = new BookGluttonZipEpub(); 21 | $epub->enableLogging(); 22 | $epub->loadZip($file); 23 | print_r($epub->getMetaPairs()); 24 | 25 | echo "There are ".$epub->getFlatNav()->length." navPoints here.\n"; 26 | echo "NCX:\n"; 27 | foreach($epub->getFlatNav() as $np) { 28 | echo $np->nodeValue."\n"; 29 | } 30 | 31 | 32 | ?> -------------------------------------------------------------------------------- /rename.php: -------------------------------------------------------------------------------- 1 | $cur) { 23 | 24 | if(!preg_match('/\.epub$/i',$file)) continue; 25 | 26 | $filesize=$cur->getSize(); 27 | $bytestotal+=$filesize; 28 | $nbfiles++; 29 | echo "$file => $filesize\n"; 30 | 31 | try { 32 | 33 | $epub = new BookGluttonZipEpub(); 34 | $epub->enableLogging(); 35 | $epub->loadZip($file); 36 | $title = $epub->getTitle(); 37 | $author = $epub->getAuthor(); 38 | //$epub->close(); 39 | 40 | // how you do the actual rename is up to you -- our example 41 | // just echoes what the operation will do: 42 | $newtitle = preg_replace('/[\$\'\\\!\`\~\/\>\<\}\{\@\^\*]/',"","$author - $title".".epub"); 43 | echo "rename to ".$newtitle."\n"; 44 | 45 | if(!is_dir("$dir/$author")) { 46 | mkdir("$dir/$author"); 47 | } 48 | 49 | rename($file,"$dir/$author/$newtitle"); 50 | 51 | } catch (Exception $e) { 52 | 53 | // BAD FILES go to bad file GHETTO 54 | 55 | echo "Exception caught:".$e->getMessage()."\n----------\n"; 56 | 57 | rename($file,$dir.'/_GHETTO.'.$newtitle); 58 | 59 | echo "Moved to ghetto.\n------------\n=============\n"; 60 | 61 | 62 | } 63 | 64 | } 65 | 66 | $bytestotal=number_format($bytestotal); 67 | echo "Total: $nbfiles files, $bytestotal bytes\n"; 68 | 69 | exit(); 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | ?> -------------------------------------------------------------------------------- /OreillyZipListing.php: -------------------------------------------------------------------------------- 1 | z = $z; 15 | $this->files = array(); 16 | $this->handles = array(); 17 | $this->log("reading zip file ".$z); 18 | $this->_open(); 19 | if(!is_resource($this->zh)) { 20 | $this->log('error '.$this->zh); 21 | exit; 22 | } else { 23 | while($e = zip_read($this->zh)) { 24 | $this->files[] = array( 25 | 'name'=>zip_entry_name($e), 26 | 'size'=>zip_entry_filesize($e) 27 | ); 28 | $this->handles[zip_entry_name($e)] = $e; 29 | } 30 | } 31 | $this->_close(); 32 | } 33 | 34 | public function getFiles() 35 | { 36 | return $this->files; 37 | } 38 | 39 | public function findFile($name, $linkonly=false) 40 | { 41 | $this->log('seeking '.$name); 42 | $data = null; 43 | foreach($this->files as $file) { 44 | $basename = pathinfo($file['name'], PATHINFO_BASENAME); 45 | $this->log($basename); 46 | if($basename==$name) { 47 | if($linkonly) { 48 | $data = 'zip://' . $this->z . '#'.$file['name']; 49 | } else { 50 | $data = $this->getFile($file['name']); 51 | } 52 | break; 53 | } else { 54 | //echo "$basename==$name\n"; 55 | } 56 | } 57 | return $data; 58 | } 59 | 60 | 61 | public function getFile($name) 62 | { 63 | $this->_open(); 64 | $contents = ''; 65 | 66 | while($e = zip_read($this->zh)) { 67 | if(zip_entry_name($e)==$name) { 68 | if(zip_entry_open($this->zh, $e)) { 69 | $size = zip_entry_filesize($e); 70 | while($data = zip_entry_read($e,$size)) { 71 | $contents .= $data; 72 | } 73 | } 74 | } 75 | } 76 | 77 | 78 | /* 79 | $e = $this->handles[$name]; 80 | 81 | if(zip_entry_open($this->zh, $e)) { 82 | $size = zip_entry_filesize($e); 83 | while($data = zip_entry_read($e,$size)) { 84 | $contents .= $data; 85 | } 86 | } else { 87 | $this->log('could not open'); 88 | } 89 | */ 90 | $this->_close(); 91 | return $contents; 92 | } 93 | 94 | public function log($msg, $level=0) 95 | { 96 | 97 | //echo $msg . "\n"; 98 | 99 | } 100 | private function _close() 101 | { 102 | zip_close($this->zh); 103 | } 104 | private function _open() 105 | { 106 | $this->zh = zip_open($this->z); 107 | } 108 | 109 | 110 | 111 | 112 | 113 | } -------------------------------------------------------------------------------- /Readme.txt: -------------------------------------------------------------------------------- 1 | BookGluttonEpub 2 | 3 | Copyright (c) 2011, Aaron Miller 4 | 5 | Licensed under the MIT license. 6 | 7 | Core class for the BookGlutton publishing and social reading platform. Includes OPS virtualization, zip container manipulation, and more, in a single convenient class library. Zip container and file finder helper classes included. 8 | 9 | DEPS 10 | 11 | Requires epubcheck, zip, tidy, java, PHP mods for zip, dom_document, simple_xml, xpath, possibly others... 12 | 13 | API documentation 14 | 15 | Coming soon - There are many helpful convenience methods, and helpful comments, in the code. Please browse the main class file for more info on how to use this library. 16 | 17 | TODO 18 | 19 | 1.Clean up the code! Production-tested but messy. 20 | 2.Add some test scripts and test epub/OPS content 21 | 22 | Usage Examples: 23 | 24 | Please see the test.php script for the simplest possible example of usage. More involved test scripts will be added when I get time. Do look through the main class file at some of the methods available. There is much useful there. 25 | 26 | 27 | 1. Open an epub from a file: 28 | 29 | 30 | $epub = new BookGluttonEpub(); 31 | 32 | $epub->open($epub_filename); 33 | 34 | 35 | 2. Load an OPS structure into a virtualized Epub: 36 | 37 | 38 | $epub = new BookGluttonEpub(); 39 | 40 | $epub->loadOPS($path_to_ops); 41 | 42 | 43 | 3. Open a remote epub by URL and echo its ISBN: 44 | 45 | 46 | $epub = new BookGluttonEpub(); 47 | 48 | $epub->openRemote($href); 49 | 50 | $epub->setPretty(true); 51 | 52 | echo $epub->getIsbn(); 53 | 54 | 55 | 4. Open an epub as a virtual zip epub and unzip its contents into an OPS structure: 56 | 57 | 58 | $epub = new BookGluttonZipEpub(); 59 | 60 | $epub->ingestZipData($zipdata, $book->getPackagePath()); 61 | 62 | print_r($epub->getMetaPairs()); 63 | 64 | 65 | 66 | 5. Load remote, modify and save local to OPS: 67 | 68 | 69 | $epub = new BookGluttonEpub(); 70 | 71 | $epub->openRemote($href); 72 | 73 | $epub->setTitle($book->getTitle()); 74 | 75 | $epub->setAuthor($book->getAuthor()); 76 | 77 | $epub->setDescription($book->getDescription()); 78 | 79 | $epub->setRights($book->getRights()); 80 | 81 | $epub->writeOPS(); 82 | 83 | 84 | 6. Create a new virtual OPS, then load an HTML conversion source, then save locally as OPS: 85 | 86 | 87 | $epub = new BookGluttonEpub(); 88 | 89 | $epub->create(array( 90 | 'title'=>$book->getTitle(), 91 | 'author'=>$book->getAuthor(), 92 | 'language'=>$book->getLanguage(), 93 | 'desc'=>$book->getDescription(), 94 | 'rights'=>$book->getRights() 95 | 96 | )); 97 | 98 | $epub->loadSource($zipped_html_or_html); 99 | 100 | $epub->moveOps($ops_repo_root, $unique_package_directory_id); 101 | 102 | 103 | 104 | 105 | -------------------------------------------------------------------------------- /BookGluttonEpubFileFinder.php: -------------------------------------------------------------------------------- 1 | repo = $repo; 19 | $this->epubfiles = array(); 20 | $this->zipfiles = array(); 21 | $this->basefindcmd = $findcmd; 22 | $this->findcmd = $this->basefindcmd; 23 | 24 | if(file_exists($this->basefindcmd)) { 25 | 26 | 27 | 28 | echo "Creating index of epub files\n"; 29 | $epubfindcmd = $this->basefindcmd." ".$this->repo." -name '".$exp."' -printf \"%T@ %p\n\""; 30 | error_log($epubfindcmd); 31 | 32 | exec($epubfindcmd, $this->epubfiles); 33 | rsort($this->epubfiles); // sort by newest first 34 | echo "indexed ".count($this->epubfiles)." epub files.\n"; 35 | 36 | 37 | 38 | echo "Creating index of zipfiles\n"; 39 | $zipfindcmd = $this->basefindcmd." ".$this->repo." -iname '*.zip' -printf \"%T@ %p\n\""; 40 | error_log($zipfindcmd); 41 | $zips = array(); 42 | exec($zipfindcmd, $zips); 43 | rsort($zips); // sort by most recent first 44 | foreach($zips as $zip) { 45 | $parts = explode(' ', $zip); 46 | $zipfile = $parts[1]; 47 | $zl = new OreillyZipListing($zipfile); 48 | $this->zipfiles[$zipfile] = $zl->getFiles(); 49 | } 50 | echo "indexed ".count($this->zipfiles)." zip archive files.\n"; 51 | 52 | } else { 53 | 54 | throw new Exception("No find command found, cannot index files"); 55 | 56 | } 57 | 58 | 59 | } 60 | 61 | public function findFileMatch($fileext) 62 | { 63 | // will find most recent epub file in repo 64 | $epubfiles = $this->epubfiles; 65 | $match = null; 66 | foreach($epubfiles as $file) { 67 | $parts = explode(' ', $file); 68 | //echo pathinfo($parts[1],PATHINFO_BASENAME)."==".$fileext."\n"; 69 | if(pathinfo($parts[1],PATHINFO_BASENAME)==$fileext) { 70 | $match = $parts[1]; 71 | break; 72 | } 73 | } 74 | return $match; 75 | 76 | } 77 | 78 | 79 | public function findFileMatchInZips($fileext) 80 | { 81 | $zipfiles = $this->zipfiles; 82 | $match = null; 83 | // get a matching list with each line prefixed with unix timestamp 84 | foreach($zipfiles as $zipfile=>$files) 85 | { 86 | foreach($files as $file) { 87 | //echo pathinfo($file['name'],PATHINFO_BASENAME)."==".$fileext."\n"; 88 | if(pathinfo($file['name'],PATHINFO_BASENAME)==$fileext) { 89 | $match = 'zip://' . $zipfile . '#'.$file['name']; 90 | break; 91 | } 92 | } 93 | } 94 | return $match; 95 | } 96 | 97 | public function dumpFileListings() 98 | { 99 | print_r($this->zipfiles); 100 | print_r($this->epubfiles); 101 | } 102 | 103 | 104 | 105 | 106 | } 107 | 108 | ?> -------------------------------------------------------------------------------- /BookGluttonZipEpub.php: -------------------------------------------------------------------------------- 1 | apache_user = 'apache'; 11 | $this->logecho = false; 12 | parent::__construct(); 13 | } 14 | 15 | public function loadZip($file) 16 | { 17 | 18 | $this->zipfile = $file; 19 | $this->_za = null; 20 | $this->filelist = array(); 21 | $this->log("reading zip file ".$file); 22 | $this->_openZip(); 23 | if(!is_resource($this->ziphandle)) { 24 | $this->log('error '.$this->ziphandle); 25 | throw new Exception('could not open zip data'); 26 | } else { 27 | while($e = zip_read($this->ziphandle)) { 28 | $this->filelist[] = array( 29 | 'name'=>zip_entry_name($e), 30 | 'size'=>zip_entry_filesize($e) 31 | ); 32 | if(zip_entry_name($e)=='META-INF/container.xml') { 33 | // stub TODO 34 | } 35 | } 36 | } 37 | $this->_closeZip(); 38 | 39 | $xml = $this->getFile('META-INF/container.xml'); 40 | $this->container = simplexml_load_string($xml); 41 | if(!$this->container) { 42 | throw new Exception('cannot find or parse container doc'); 43 | } else { 44 | if(!is_object($this->container->rootfiles->rootfile)) { 45 | throw new Exception('could not get rootfile element from container doc'); 46 | } 47 | $atts = $this->container->rootfiles->rootfile->attributes(); 48 | $this->opfpath = $atts['full-path']; 49 | $this->opfroot = pathinfo($this->opfpath, PATHINFO_DIRNAME); 50 | error_log('opfroot is '.$this->opfroot); 51 | $this->opfXML = $this->getFile($this->opfpath); 52 | $this->opf = parent::makeOpfDoc($this->opfXML); 53 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 54 | $type = $item->getAttribute("media-type"); 55 | if($type=="application/x-dtbncx+xml") { 56 | $this->ncxpath = $this->getNcxPath($item->getAttribute("href")); 57 | $this->ncxXML = $this->getFile($this->opfroot . '/' . $this->ncxpath); 58 | if($this->ncxXML) { 59 | $ncxfound = true; 60 | break; 61 | } else { 62 | $this->log('ncx '.$path . '/' . $this->ncxpath.' not located'); 63 | } 64 | 65 | } 66 | } 67 | 68 | $this->ncx = parent::makeNcxDoc($this->ncxXML); 69 | 70 | return true; 71 | } 72 | } 73 | 74 | public function getOpfXML() 75 | { 76 | return $this->opfXML; 77 | } 78 | 79 | public function ingestZipData($data, $opspath=null) 80 | { 81 | 82 | // used when pulling an epub from a zipped archive 83 | // first writes it to disk, then ingests it whole 84 | if($opspath) { 85 | if(!is_dir($opspath)) { 86 | throw new Exception('ops path passed is not a directory!'); 87 | } 88 | } 89 | 90 | $tmpfile = Util::getTempDir().'/epubimport'.time().'.epub'; 91 | 92 | if(file_put_contents($tmpfile, $data)) { 93 | if($this->loadZip($tmpfile)) { 94 | if($opspath) { 95 | 96 | // TODO eliminate need for ZipArchive memory bloat. For now, it's a shortcut 97 | // idea here is to dump to a path if needing OPS, if not, 98 | // we should one day offer the option to store structure and/or zipdata 99 | // in memory or on disk 100 | 101 | 102 | if($puts = $this->writeFiles($opspath)) { 103 | 104 | parent::loadOPS($opspath); 105 | 106 | } else { 107 | 108 | $this->log("no files were written."); 109 | 110 | } 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | /* 120 | $this->_za = new ZipArchive(); 121 | 122 | // TODO we dont have to store OPS in disk, could use memory 123 | 124 | if ($this->_za->open($tmpfile) === TRUE) { 125 | echo "opened\n"; 126 | if($this->_za->extractTo($opspath)) { 127 | echo "extracted\n"; 128 | $this->_za->close(); 129 | echo "closed\n"; 130 | if(!file_exists($opspath.'/META-INF/container.xml')) { 131 | echo "throwing up\n"; 132 | throw new Exception('no container found in package!'); 133 | } else { 134 | echo file_get_contents($opspath.'/META-INF/container.xml'); 135 | } 136 | echo "calling parent\n"; 137 | parent::loadOPS($opspath); 138 | echo "loaded OPS\n"; 139 | } else { 140 | echo "could not extract this bitch\n"; 141 | } 142 | } else { 143 | echo "throwing up again\n"; 144 | throw new Exception('could not open zip archive!'); 145 | } 146 | */ 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | } 171 | 172 | // TODO we dont have to get rid of the zip data 173 | $data = null; 174 | 175 | // TODO we dont have to get rid of the stored zip data either 176 | unlink($tmpfile); 177 | } 178 | } else { 179 | throw new Exception('EPUB file could not be stored locally for import'); 180 | } 181 | 182 | } 183 | 184 | public function writeFiles($path) 185 | { 186 | $this->_openZip(); 187 | $puts = 0; 188 | while($e = zip_read($this->ziphandle)) { 189 | if(zip_entry_open($this->ziphandle, $e)) { 190 | //$this->log('opened entry'); 191 | $size = zip_entry_filesize($e); 192 | while($data = zip_entry_read($e,$size)) { 193 | 194 | $fullpath = $path .'/'. zip_entry_name($e); 195 | 196 | $dirpath = $path .'/'. pathinfo(zip_entry_name($e), PATHINFO_DIRNAME); 197 | 198 | mkdir($dirpath .'/', 03775, true); 199 | 200 | //chmod($dirpath . '/', 03775); 201 | //chown($dirpath, $this->apache_user); 202 | //chgrp($dirpath, get_current_user()); 203 | 204 | if(file_put_contents($fullpath, $data)) { 205 | //echo $path .'/'. zip_entry_name($e)." written\n"; 206 | $puts++; 207 | } else { 208 | $this->log($path .'/'. zip_entry_name($e) . "failed to write as user ".getmyuid().':'.get_current_user()); 209 | } 210 | } 211 | } 212 | } 213 | return $puts; 214 | } 215 | 216 | public function getFile($name) 217 | { 218 | $this->_openZip(); 219 | $contents = ''; 220 | 221 | while($e = zip_read($this->ziphandle)) { 222 | if(zip_entry_name($e)==$name) { 223 | if(zip_entry_open($this->ziphandle, $e)) { 224 | $size = zip_entry_filesize($e); 225 | while($data = zip_entry_read($e,$size)) { 226 | $contents .= $data; 227 | } 228 | } 229 | } 230 | } 231 | 232 | return $contents; 233 | 234 | } 235 | 236 | public function log($msg, $level=0) 237 | { 238 | 239 | 240 | if($this->logecho) { 241 | echo $msg."\n"; 242 | } else { 243 | error_log($msg); 244 | } 245 | 246 | } 247 | 248 | private function _closeZip() 249 | { 250 | zip_close($this->ziphandle); 251 | } 252 | 253 | private function _openZip() 254 | { 255 | $this->ziphandle = zip_open($this->zipfile); 256 | if(!$this->ziphandle) { 257 | $this->log('could not open zipfile'); 258 | } 259 | } 260 | 261 | public function enableLogging() 262 | { 263 | $this->logecho = true; 264 | $this->log('logging enabled'); 265 | } 266 | 267 | public function __destruct() 268 | { 269 | $this->_closeZip(); 270 | } 271 | 272 | 273 | } 274 | 275 | 276 | ?> -------------------------------------------------------------------------------- /BookGluttonEpub.php: -------------------------------------------------------------------------------- 1 | logverbose = true; 15 | $this->loglevel = 0; 16 | $this->m = null; 17 | $this->dcdata = array(); 18 | $this->ncxXP = null; 19 | $this->opfXP = null; 20 | $this->prettyPrint = true; 21 | $this->readonly = false; 22 | $this->maxblocks = 3000; 23 | $this->tidyloc = TIDY_LOC; 24 | $this->java = JAVA_LOC; 25 | $this->epubcheck = $this->java . ' -jar '.EPUBCHECK; 26 | $this->epubcheck_ckstring = 'Epubcheck Version 1.0.3 No errors or warnings'; 27 | $this->opf = null; 28 | $this->opfNS = "http://www.idpf.org/2007/opf"; // NO trailing slash 29 | $this->dcNS = "http://purl.org/dc/elements/1.1/"; // NEEDS trailing slash to validate 30 | $this->doctypeNISO = "-//NISO//DTD ncx 2005-1//EN"; 31 | $this->ncxNS = "http://www.daisy.org/z3986/2005/ncx/"; 32 | $this->opsmime = "application/oebps-package+xml"; 33 | $this->daisydtd = "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"; 34 | $this->ncxmime = "application/x-dtbncx+xml"; 35 | $this->packageVersion = "2.0"; 36 | $this->xmllang = "en-US"; 37 | $this->title = ''; 38 | $this->author = ''; 39 | $this->zipQ = array(); 40 | $this->_za = null; 41 | $this->ocf_filename = null; // temporary epub filename 42 | $this->useNavDivs = false; 43 | $this->useNavDocs = true; 44 | $this->includecover = false; 45 | $this->workpath = DiskUtil::getTempDir(); // this will be the path to write the epub to 46 | $this->opsname = uniqid(); // unique name for ops container directory 47 | $this->packagepath = $this->workpath . '/' .$this->opsname; // this will be the working package dir (ops) 48 | $this->ncxpath = "index.ncx"; 49 | $this->opfpath = "index.opf"; 50 | $this->mimetypepath = $this->packagepath . '/mimetype'; // filename of mimetype file 51 | $this->metapath = $this->packagepath . '/META-INF'; 52 | $this->opspath = $this->packagepath; 53 | $this->navmaplabel = 'Table of Contents'; 54 | $this->uniqIDscheme = "PrimaryID"; 55 | $this->uniqIDval = 'not set'; 56 | $this->opsrel = ''; // this is the rel path within the package structure where content is found. 57 | 58 | $this->ziphandle_limit = 100; // system-dependent 59 | $this->suppress_purify = true; 60 | $this->conversionIndexParsed = false; 61 | $this->conversionMetasParsed = false; 62 | $this->hasPrimaryIdSet = false; 63 | $this->ncxCurPlayOrder = 0; 64 | $this->ncxGeneratedDepth = 0; 65 | $this->ncxGeneratedLength = 0; 66 | $this->ncxGeneratedNavMapCurPoint = null; 67 | $this->ncxLastDepth = 0; 68 | $this->preflight = array(); 69 | $this->tmpdump = null; 70 | $this->ncxSpineadd = array(); 71 | $this->zipstem = ''; 72 | $this->unsavedchanges = false; 73 | $this->deferredSpine = false; 74 | $this->xhtml11doctype = ''; 75 | $this->htmltag = ''; 76 | // A default XHTML 1.1 template for importing new content 77 | $this->doctmpl =<< 79 | 80 | 81 | 82 | END 83 | ; 84 | 85 | $this->logerr('BookGluttonEpub instantiated', 1); 86 | 87 | } 88 | public function setReadonly($bool) 89 | { 90 | $this->readonly = $bool; 91 | } 92 | 93 | public function create($meta=array('title'=>null, 94 | 'author'=>null, 95 | 'language'=>null, 96 | 'desc'=>null, 97 | 'rights'=>null)) 98 | { 99 | 100 | } 101 | 102 | 103 | public function createFromAssets($root=null, $files) 104 | { 105 | /** 106 | Expects a path to an empty directory in which to create the OPS, 107 | and an array of assets, each of which is a hash of path, content 108 | keyvalues. content should be base64 encoded and will be decoded and 109 | written to path relative to root. 110 | */ 111 | 112 | if(!$root) { 113 | $root = $this->packagepath; // defaults to uniqid in temp dir 114 | } 115 | if(!file_exists($root)) { 116 | DiskUtil::makeDir($root); 117 | } 118 | 119 | $this->ocf_filename = $this->_makeEpubTargetFromAssets($root, $files); 120 | 121 | /* 122 | foreach($files as $file) { 123 | $file['content'] = base64_decode($file['content']); 124 | $this->writeFile($file); 125 | } 126 | */ 127 | 128 | //$this->loadOPS($root); 129 | $this->open($this->ocf_filename); 130 | $this->_saveMeta(); 131 | } 132 | 133 | 134 | private function _makeEpubTargetFromAssets($packagedir, $files) 135 | { 136 | 137 | // expects base64 encoded content!!!! 138 | 139 | // see the other function using ZipArchive for 140 | // notes on why we do it this way 141 | 142 | $this->mimetypepath = $packagedir . '/mimetype'; 143 | 144 | $this->_writeFile($this->mimetypepath, $this->getMimetypeString()); 145 | 146 | if(file_exists($this->mimetypepath)) { 147 | $this->logerr('success!'); 148 | } else { 149 | throw new Exception('could not create a mimetype file for this ops structure'); 150 | } 151 | 152 | $arcname = $packagedir.'.epub'; 153 | //error_log('created archive file:'.$arcname); 154 | $zipcmd = ZIP_LOC; // path to zip command 155 | $zipflags = '-0 -j -X'; 156 | $zipcmdfull = "$zipcmd $zipflags $arcname $this->mimetypepath"; 157 | 158 | exec(escapeshellcmd($zipcmdfull), $output); 159 | $this->logerr('output was:'.print_r($output, true)); 160 | 161 | $zip = new ZipArchive(); 162 | 163 | if($zip->open($arcname)!==TRUE) { 164 | $this->logerr("cannot open <$arcname>"); 165 | } 166 | $zipq = array(); 167 | // asm: the following line causes problems reading these on stanza, so leave commented 168 | $zip->addEmptyDir('META-INF'); 169 | 170 | $dirnames = array(); 171 | foreach($files as $file) { 172 | $pi = pathinfo($file['path']); 173 | if($pi['dirname']!="." && $pi['dirname'] != "..") { 174 | $dirnames[$pi['dirname']] = $pi['dirname']; 175 | } 176 | } 177 | 178 | $fullpath = ""; 179 | // make sure dirs exist 180 | foreach($dirnames as $dirpath=>$bool) { 181 | $dirs = explode('/', $dirpath); 182 | $fullpath = ""; 183 | foreach($dirs as $step) { 184 | if($fullpath=="") { 185 | $fullpath = $step; 186 | } else { 187 | $fullpath = $fullpath . '/'.$step; 188 | } 189 | $zip->addEmptyDir($fullpath); 190 | } 191 | } 192 | $filenum = 1; 193 | foreach($files as $file) { 194 | if($file['path']=='mimetype') continue; 195 | $filenum++; 196 | // SEE NOTE ABOUT FILE HANDLE LIMITS 197 | if($filenum > $this->ziphandle_limit) { 198 | $zip->close(); 199 | $zip->open($arcname); 200 | $filenum = 1; 201 | } 202 | $zip->addFromString($file['path'], base64_decode($file['content'])); 203 | } 204 | $zip->close(); 205 | $arctmp = UPLOAD_DIR.'/'.uniqid().'.epub'; 206 | copy($arcname, $arctmp); 207 | $zipflags = "-F $arctmp"; 208 | $zipcmdfull = "$zipcmd $zipflags"; 209 | if(!exec(escapeshellcmd($zipcmdfull), $output)) { 210 | throw new Exception('could not fix zip file'); 211 | } 212 | //error_log('zip -F output:'.print_r($output, true)); 213 | unlink($arcname); 214 | DiskUtil::xRename($arctmp, $arcname); 215 | return $arcname; 216 | 217 | } 218 | 219 | 220 | public function open($epub) 221 | { 222 | 223 | $this->_za = new ZipArchive(); 224 | if ($this->_za->open($epub) === TRUE) { 225 | $this->numFiles = $this->_za->numFiles; 226 | 227 | //$this->logerr('files found in archive are:'); 228 | $this->packagepath = $this->workpath . '/' .$this->opsname; // this will be the working package dir (ops) 229 | $this->_za->extractTo($this->packagepath); 230 | $this->mimetypepath = $this->packagepath . '/mimetype'; // filename of mimetype file 231 | $this->metapath = $this->packagepath . '/META-INF'; 232 | $this->opspath = $this->packagepath; 233 | 234 | $this->loadOPS($this->packagepath); 235 | 236 | } else { 237 | throw (new Exception('cannot open zipfile:'.$epub)); 238 | } 239 | } 240 | //_makeEpubTmp() 241 | 242 | public function openRemote($href) 243 | { 244 | $tmpfile = DiskUtil::getTempDir().'/epubimport'.time().'.epub'; 245 | if(file_put_contents($tmpfile, file_get_contents($href))) { 246 | $this->open($tmpfile); 247 | return $tmpfile; 248 | } else { 249 | throw new Exception('EPUB file could not be stored locally for import'); 250 | } 251 | } 252 | 253 | public function ingestRaw($data) 254 | { 255 | $tmpfile = DiskUtil::getTempDir().'/epubimport'.time().'.epub'; 256 | 257 | if(file_put_contents($tmpfile, $data)) { 258 | $this->open($tmpfile); 259 | // return tmpfile location so it can be cleaned up 260 | return $tmpfile; 261 | } else { 262 | throw new Exception('EPUB file could not be stored locally for import'); 263 | } 264 | } 265 | 266 | public function isWritable() 267 | { 268 | 269 | $path = $this->packagepath; 270 | $mimetype = $path . '/mimetype'; 271 | $opf = $path . '/'. $this->opfpath; 272 | $ncx = $path . '/'. $this->ncxpath; 273 | $metafile = $this->metapath.'/container.xml'; 274 | @$res = is_writable($path) && 275 | is_writable($mimetype) && 276 | is_writable($opf) && 277 | is_writable($ncx) && 278 | is_writable($metafile); 279 | return $res; 280 | } 281 | 282 | public function opfWritable() 283 | { 284 | return is_writable($this->packagepath . '/'. $this->opfpath); 285 | } 286 | 287 | public function ncxWritable() 288 | { 289 | return is_writable($this->packagepath . '/'. $this->opfpath); 290 | } 291 | 292 | public function mimeWritable() 293 | { 294 | return is_writable($this->packagepath . '/mimetype'); 295 | } 296 | 297 | public function pathWritable() 298 | { 299 | return is_writable($this->packagepath); 300 | } 301 | 302 | public function metapathWritable() 303 | { 304 | return is_writable($this->metapath); 305 | } 306 | 307 | public function enableWrite() 308 | { 309 | $path = $this->packagepath; 310 | $mimetype = $path . '/mimetype'; 311 | $opf = $path . '/'. $this->opfpath; 312 | $ncx = $path . '/'. $this->ncxpath; 313 | $metafile = $this->metapath.'/container.xml'; 314 | $changes = array($path, $mimetype, $opf, $ncx, $metafile); 315 | $success = 0; 316 | foreach($changes as $f) { 317 | 318 | if(chmod($f, 0777)) { 319 | $success++; 320 | } 321 | 322 | } 323 | return ($success===count($changes)); 324 | } 325 | 326 | public function disableWrite($mode = 0644) 327 | { 328 | $path = $this->packagepath; 329 | $mimetype = $path . '/mimetype'; 330 | $opf = $path . '/'. $this->opfpath; 331 | $ncx = $path . '/'. $this->ncxpath; 332 | $metafile = $this->metapath.'/container.xml'; 333 | $changes = array($path, $mimetype, $opf, $ncx, $metafile); 334 | $success = 0; 335 | foreach($changes as $f) { 336 | if(is_dir($f)) { 337 | if(chmod($f, 0755)) { 338 | $success++; 339 | } 340 | } else { 341 | if(chmod($f, $mode)) { 342 | $success++; 343 | } 344 | } 345 | } 346 | return ($success===count($changes)); 347 | } 348 | 349 | public function upgradeMetadata() 350 | { 351 | $pack = $this->getPackageEl(); 352 | if($pack->getElementsByTagName('dc-metadata')->length > 0) { 353 | $dcmetadata = $pack->getElementsByTagName('dc-metadata')->item(0); 354 | if(!($metadata = $pack->getElementsByTagName('metadata')->item(0))) { 355 | $metadata = $pack->appendChild($this->getOpfDoc()->createElement('metadata')); 356 | } 357 | $children = $dcmetadata->childNodes; 358 | for($i = 0; $i < $children->length; $i++) { 359 | $meta = $children->item($i); 360 | $metadata->appendChild($meta->cloneNode(true)); 361 | } 362 | $metadata->removeChild($dcmetadata); 363 | } 364 | } 365 | 366 | public function regeneratePrimaryId() 367 | { 368 | 369 | $uuid = uuidGen::generateUuid(); 370 | $content = 'urn:uuid:'.$uuid; 371 | $this->replacePrimaryIdValue($content); 372 | return $content; 373 | 374 | } 375 | 376 | 377 | public function getPackageEl() 378 | { 379 | return $this->getOpfDoc()->getElementsByTagName('package')->item(0); 380 | } 381 | 382 | 383 | public function loadOPS($path) 384 | { 385 | // loads a local OPS file structure into xml containers 386 | 387 | if(!$this->_za && class_exists('ZipArchive')) { 388 | $this->_za = new ZipArchive(); 389 | } else { 390 | $this->_za = null; 391 | } 392 | $this->packagepath = $path; // this will be the working package dir (ops) 393 | $this->opspath = $this->packagepath; // alias 394 | $this->mimetypepath = $this->packagepath . '/mimetype'; // filename of mimetype file 395 | $this->metapath = $this->packagepath . '/META-INF'; 396 | 397 | // CONTENT.XML 398 | 399 | $this->contdoc = $this->_makeContDoc($this->getContainerXMLRaw()); 400 | $this->rootfile = $this->_getRootFileName(); 401 | // get the relative path to it, trimming dots and slashes 402 | $this->opsrel = trim(pathinfo($this->rootfile, PATHINFO_DIRNAME),'/.'); 403 | 404 | 405 | // opffile, oppath and rootfile are all the same!!! 406 | 407 | $this->opfpath = $this->rootfile; // the relative path to the opf file 408 | $this->opffile = $this->opfpath; 409 | 410 | 411 | // .OPF 412 | if(!$this->opfpath || !file_exists($path . '/'. $this->opfpath)) { 413 | throw new Exception('not a valid path for opf file'); 414 | } 415 | 416 | $opf = file_get_contents($path . '/'. $this->opfpath); 417 | if(!$opf) { 418 | throw new Exception("something is wrong there is no opf file at $path/".$this->opfpath); 419 | } 420 | $this->opf = $this->_makeOpfDoc($opf); 421 | 422 | 423 | $this->ncx = $this->_makeNcxDoc($this->getNcxXMLRaw()); 424 | 425 | // add manifest files to zipQ 426 | 427 | //error_log('adding these to a zip listing'); 428 | 429 | 430 | $this->_addToZipQ(); 431 | 432 | // .NCX 433 | 434 | 435 | // do a lil check on things 436 | 437 | $this->opf_metadataNode->setAttribute('xmlns:dc', "http://purl.org/dc/elements/1.1/"); 438 | $this->opf_metadataNode->setAttribute('xmlns:opf',"http://www.idpf.org/2007/opf"); 439 | // check for lang 440 | 441 | if(!$this->hasLang()) { 442 | $this->addMeta('language', $this->xmllang); 443 | } 444 | 445 | 446 | // numerical ids not allowed 447 | 448 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 449 | $id = $item->getAttribute('id'); 450 | if(preg_match('/^\d/',$id)) { 451 | $item->setAttribute('id', 'id'.$id); 452 | } 453 | } 454 | 455 | foreach($this->opf_spineNode->getElementsByTagName('itemref') as $itemref) { 456 | $id = $itemref->getAttribute('idref'); 457 | if(preg_match('/^\d/',$id)) { 458 | $itemref->setAttribute('idref', 'id'.$id); 459 | } 460 | } 461 | 462 | 463 | } 464 | 465 | public function fileExists($href) 466 | { 467 | return file_exists($this->getAbs($href)); 468 | } 469 | 470 | public function getAbs($href) 471 | { 472 | /** 473 | 474 | Get full disk path to href of opf item 475 | 476 | */ 477 | $rel = (strlen($this->opsrel)>0) ? $this->opsrel . '/' : ''; 478 | return $this->opspath . '/' . $rel . $href; 479 | } 480 | 481 | private function _addToZipQ() 482 | { 483 | 484 | /** 485 | 486 | Add all manifest files to zip archive object. 487 | Called during LoadOPS as part of the init 488 | for a package. If a file is not found, 489 | throws Exception. 490 | 491 | */ 492 | 493 | if($this->opf_manifestNode) { 494 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 495 | $href = $item->getAttribute('href'); 496 | 497 | if(!$this->fileExists($href)) { 498 | $dump = $this->opf->saveXML(); 499 | //error_log('epub:file at '.$href.' does not exist'."\n$dump"); 500 | } 501 | 502 | $abs = $this->getAbs($href); 503 | $rel = $this->getRel($href); 504 | if($href && file_exists($abs)) { // only add if it's a valid key and the file exists 505 | 506 | // array_key_exists is failing in this, dont know why 507 | // not even sure why we needed it here 508 | 509 | // if(array_key_exists($rel, $this->zipQ)===FALSE) { // preserve existing (primacy rule) 510 | 511 | $this->zipQ[$rel]=$abs; 512 | 513 | //} else { // this is a duplicate id and should be removed 514 | 515 | // if($this->opf_manifestNode->removeChild($item)) { 516 | // error_log('epub:removed item with duplicate id:'.$rel); 517 | // } 518 | 519 | } 520 | 521 | } 522 | } else { 523 | throw new Exception('manifest is not defined yet'); 524 | } 525 | } 526 | 527 | private function _getRootFileName() 528 | { 529 | return $this->contdoc->getElementsByTagName('rootfiles')->item(0)->getElementsByTagName('rootfile')->item(0)->getAttribute('full-path'); 530 | } 531 | 532 | public function getContainerXMLRaw() 533 | { 534 | if(@!($container = file_get_contents($this->metapath.'/container.xml'))) { 535 | $msg = ""; 536 | if(is_dir($this->metapath)) { $msg = ' and it is not even an existent directory!!'; } 537 | //$result = DiskUtil::findFile($path, '*.opf'); // like find $path -name $regex 538 | $result = false; 539 | //TODO 540 | if($result) { 541 | $this->opffile = $result; 542 | $this->opfpath = $this->opffile; 543 | $container = null; 544 | } else { 545 | throw new Exception('container file not found in '.$this->metapath.$msg.' plus could not create'); 546 | } 547 | } else { 548 | return $container; 549 | } 550 | 551 | } 552 | 553 | public function getNcxXMLRaw() 554 | { 555 | if(!$this->opf_spineNode||!$this->opfXP) throw new Exception('must define an xpath parser and a spine node representation before calling this'); 556 | $tocatt = $this->opf_spineNode->getAttribute('toc'); 557 | $path = $this->packagepath; 558 | // error_log('//*[@id="'.$tocatt.'"]'); 559 | $tocitem = $this->opfXP->evaluate('//*[@id="'.$tocatt.'"]'); 560 | if($tocitem->length > 0) { 561 | //error_log('found an item in manifest matching id '.$tocatt.' specified by spines toc attribute'); 562 | if($tocitem->item(0)->getAttribute('media-type')=='text/xml') { 563 | // fix ncx for stanza - eliminate incorrect media-type value 564 | $tocitem->item(0)->setAttribute('media-type', 'application/x-dtbncx+xml'); 565 | } 566 | if($tocitem->item(0)->getAttribute('href')) { 567 | $this->ncxpath = $this->getNcxPath($tocitem->item(0)->getAttribute('href')); 568 | } else { 569 | // error_log('found an item for the ncx but the href is not set. going to try to find the right file to attribute to this...'); 570 | $ncxfound = $this->_seekNcxFile($tocitem->item(0)); 571 | } 572 | } else { 573 | 574 | // an item with the specified id for the ncx item is not found: 575 | 576 | // this should be fatal, but some software like Calibre does 577 | // not put a toc attribute on the spine, even though there's 578 | // an ncx in the manifest. no point in failing here without 579 | // double-checking the manifest, even though against the spec 580 | 581 | $ncxfound = false; 582 | error_log('no ncx designation in spine attribute, seeking...'); 583 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 584 | $type = $item->getAttribute("media-type"); 585 | if($type=="application/x-dtbncx+xml") { 586 | $this->ncxpath = $this->getNcxPath($item->getAttribute("href")); 587 | if(!file_exists($path . '/' . $this->ncxpath)) { 588 | error_log('ncx file not found at '.$path . '/' . $this->ncxpath); 589 | } 590 | $ncxfound = true; 591 | break; 592 | } 593 | } 594 | if(!$ncxfound) { // if still not found, seek 595 | $ncxfound = $this->_seekNcxFile(); 596 | if(!$ncxfound) { // still no? fucked 597 | throw new Exception('ncx not found, even after searching recursively'); 598 | } 599 | 600 | } 601 | } 602 | if(!$this->ncxpath) { 603 | if($this->_seekNcxFile()) { 604 | //error_log('found ncx by searching filesystem'); 605 | } else { 606 | //error_log('cannot locate ncx'); 607 | } 608 | } 609 | 610 | //error_log('checking for file at '.$path . '/'. $this->ncxpath); 611 | if(!file_exists($path . '/'. $this->ncxpath) || !$this->ncxpath) { // make sure file there 612 | //error_log('throwing Exception: ncx file not found'); 613 | throw new Exception('ncx file not found at '.$this->ncxpath); 614 | } 615 | //error_log('ncxpath set to '.$this->ncxpath); 616 | return file_get_contents($path .'/'. $this->ncxpath); 617 | } 618 | 619 | private function _seekNcxFile($tocitem=null) 620 | { 621 | $tocatt = $this->opf_spineNode->getAttribute('toc'); 622 | if(!$tocatt) $tocatt = 'toc'; 623 | $ncxfound = false; 624 | // be smart, just search the ops for files matching /\.ncx$/i 625 | $files = array(); 626 | exec("find ".$this->packagepath." -type f -name '*'", $files); 627 | 628 | //error_log(print_r($files, true)); 629 | foreach($files as $candidate) { 630 | if(preg_match('/\.ncx$/i', trim($candidate))) { 631 | 632 | // add the item to the manifest, with id=$tocatt 633 | 634 | if(!$tocitem) { // if the item is not already defined 635 | // error_log('creating a new manifest item for the ncx'); 636 | $item = $this->opf_manifestNode->appendChild($this->opf->createElement('item')); 637 | $item->setAttribute('id', $tocatt); 638 | } else { 639 | $item = $tocitem; 640 | } 641 | $newhref = trim(str_replace($this->packagepath, '', $candidate), './ '); 642 | // error_log('**find command found ncx href: '.$newhref); 643 | $item->setAttribute('href', $newhref); 644 | $item->setAttribute('media-type', 'application/x-dtbncx+xml'); 645 | $this->ncxpath = $this->getNcxPath($newhref); 646 | //$this->_saveMeta(); 647 | 648 | $ncxfound = true; 649 | } 650 | } 651 | return $ncxfound; 652 | 653 | } 654 | 655 | 656 | public function getNcxPath($href) 657 | { 658 | // conditionally appends a relative path to the ncx manifest href 659 | // passed in, based on whether opsrel is rootlever or not 660 | 661 | //error_log('opsrel is '.$this->opsrel); 662 | 663 | return (strlen($this->opsrel)>0) ? $this->opsrel . '/' . $href : $href; 664 | } 665 | 666 | 667 | /* Conversion methods */ 668 | 669 | 670 | 671 | public function includeCover($cover) 672 | { 673 | //$this->logerr('setting cover HTML:'.$cover); 674 | $this->includecover = $cover; 675 | } 676 | 677 | public function setPretty($bool) 678 | { 679 | //$this->logerr('setPretty called'); 680 | $this->prettyPrint = $bool; 681 | } 682 | 683 | public function loadSource($filename) 684 | { 685 | 686 | 687 | // detect zip or epub and forwards to either 688 | // open method or loadSourceFromZip 689 | 690 | if(!$filename) throw new Exception ('loadSource requires a non-null and non-empty filename'); 691 | $this->logerr('loadSource:'.$filename, 4); 692 | 693 | //asm: this was causing the title to be 'Untitled' 694 | /* 695 | if(!@$this->opf) { // if create has not been called yet 696 | //error_log('create has not been called, calling it now'); 697 | $this->create(array('title'=>$this->getTitle(), 'author'=>$this->getAuthor())); 698 | } 699 | */ 700 | 701 | 702 | if(!@$this->opf) { 703 | $this->_makeDirs(); 704 | $this->opf = $this->_makeOpfDoc(); 705 | $this->ncx = $this->_makeNcxDoc(); 706 | $this->contdoc = $this->_makeContDoc(); 707 | } 708 | 709 | $base_href = ''; 710 | $docroot = ''; 711 | if(preg_match('/^(http:\/\/.+?)([^\/]*?)$/', $filename, $urlmatches)) { // pre-fetch remote files 712 | // store url for later processing - add trailing slash if needed 713 | $hostpath = $urlmatches[1]; 714 | $urlinfo = parse_url($filename); 715 | 716 | $docroot = ($urlinfo['host']) ? 'http://' . $urlinfo['host'] : ''; 717 | $base_href = (preg_match('/\/$/', $hostpath)) ? $hostpath : $hostpath . '/'; 718 | 719 | //$this->logerr('trying to cache url:'.$filename); 720 | $tmpwork = DiskUtil::getTempDir() . '/' . uniqid(); 721 | if(!($remote = file_get_contents($filename))) { 722 | throw new Exception('cannot cache remote url'); 723 | } 724 | if(!file_put_contents($tmpwork, $remote)) { 725 | throw new Exception('cannot cache remote url'); 726 | } else { 727 | $filename = $tmpwork; 728 | } 729 | } 730 | $snip = file_get_contents($filename,null,null,0,2); 731 | if(!$snip) { throw new Exception('file '.$filename.' does not exist'); } 732 | if ($snip=='PK') { // .zip or epub 733 | 734 | $this->loadSourceFromZip($filename); 735 | $this->_saveMeta(); 736 | $this->loadOPS($this->packagepath); 737 | 738 | } else { 739 | throw new Exception("This method only accepts zipped HTML (web) archives conforming to the UBO spec"); 740 | /* 741 | 742 | $pi = pathinfo($filename); 743 | $basename = $pi['basename']; 744 | $id = $this->_validID('source'); 745 | $doc = $this->_domFromDoc($filename); 746 | $xp = new DOMXPath($doc); 747 | $xp->registerNamespace("ht", "http://www.w3.org/1999/xhtml"); 748 | 749 | 750 | 751 | 752 | // build package elements 753 | $this->_setIdentifier(); 754 | $this->setPublisher('BookGlutton API (www.BookGlutton.com)'); 755 | $this->_guessMetas($doc, $xp); 756 | 757 | // add a cover 758 | 759 | if($this->includecover!=false) { 760 | //$this->logerr('adding cover item:'.print_r($this->includecover, true)); 761 | $this->_addCoverItem(); 762 | } 763 | 764 | //$oochaps = $xp->query('//ht:p[@class="ChapterTitle"]', $bodynode); 765 | $heads = $xp->query('//ht:h1|//ht:h2|//ht:h3', $doc); 766 | $images = $doc->getElementsByTagname('img'); 767 | $this->logerr('found '.$heads->length.' headings here'); 768 | // save the document back out and add the items from the headings 769 | if($this->useNavDivs==true) { 770 | $doc = $this->_headsToNavDivs($doc, $heads, $basename); 771 | } else { 772 | if($this->useNavDocs==true) { 773 | $this->logerr('calling headsToNavDocs'); 774 | try { 775 | $this->_headsToNavDocs($doc, $heads, $basename); 776 | } catch (Exception $e) { 777 | error_log('ignoring caught Exception in _headsToNavDocs:'.$e->getMessage()); 778 | } 779 | return; // skip adding the original source 780 | } else { 781 | $this->_headsToNavItems($doc, $heads, $basename); 782 | } 783 | } 784 | 785 | foreach($images as $image) { // images must not be in spine! 786 | $src = $image->getAttribute('src'); 787 | if(!preg_match('/^http:\/\//i', $src)) { // if img is not remote 788 | $mime = $this->_getMimeFromExt($src); 789 | $this->addItem($this->_validID('image'), $src, $mime, null, null); 790 | } 791 | } 792 | 793 | $this->logerr('loadSource:adding source item now', 2); 794 | $this->addItem($id, $basename, 'application/xhtml+xml', $doc->saveXML(), 'yes'); 795 | */ 796 | } 797 | } 798 | 799 | public function getPackagePath() 800 | { 801 | return $this->packagepath; 802 | } 803 | 804 | public function setPackagePath($p) 805 | { 806 | $this->packagepath = $p; 807 | $this->metapath = $this->packagepath . '/META-INF'; 808 | $this->opspath = $this->packagepath; 809 | } 810 | 811 | public function loadSourceFromZip($zipfile) 812 | { 813 | //error_log('loading:'.$zipfile); 814 | $za = new ZipArchive(); 815 | if ($za->open($zipfile) === TRUE) { // valid zip 816 | 817 | // check if its epub 818 | $checkit = $za->statIndex(0); 819 | if($checkit['name']=='mimetype') { 820 | if($fp = $za->getStream('mimetype')) { // is a valid file 821 | $contents = ''; 822 | while (!feof($fp)) { // suck contents in 823 | $contents .= fread($fp, 2); 824 | } 825 | fclose($fp); 826 | $this->logerr($contents, 3); 827 | if(preg_match('/application\/epub\+zip/',$contents)) { 828 | $this->logerr('this is an epub file!', 1); 829 | $za->close(); 830 | $this->open($zipfile); // open as epub 831 | return; 832 | } 833 | } 834 | } else { 835 | $this->logerr('No mimetype file found in archive, probably not epub', 2); 836 | } 837 | $numFiles = $za->numFiles; 838 | // error_log($numFiles . " found in archive..."); 839 | $acceptlist = array(); 840 | $firstdoc = null; 841 | $order=0; 842 | for ($i=0; $i<$numFiles;$i++) { 843 | $stats = ($za->statIndex($i)); 844 | // do some filtering by file extension first, only acceptable types 845 | if(preg_match('/(xml|x?html?|gif|jpe?g|svg|png|swf|css)$/i',$stats['name'])) { 846 | if(preg_match('/^[^\._]/', $stats['name'])) { // won't process hidden or system files 847 | $mime = $this->_getMimeFromExt($stats['name']); // all html and xml types return '...+xml' for this 848 | $isimg = preg_match('/(jpe?g|gif|png|swf|css|svg)$/i', $mime); 849 | $prefix = ($isimg) ? 'image' : 'item'; 850 | $itemid = $this->_validID($prefix); 851 | $addtospine = ($isimg) ? null : 'yes'; 852 | $fp = $za->getStream($stats['name']); 853 | if($fp) { // is a valid file 854 | $contents = ''; 855 | while (!feof($fp)) { $contents .= fread($fp, 2);} 856 | fclose($fp); 857 | // XML and HTML files - content docs 858 | if (preg_match('/xml$/i', $mime)) { 859 | 860 | $this->preflightReport('INFO: '.$mime.' "'.$stats['name'].'" ('.strlen($contents).' bytes)'); 861 | $this->_loadXMLContent($itemid, $stats['name'], $mime, $contents); 862 | 863 | } else { // not xml or html, add to manifest 864 | $this->logerr('Not a content document type, adding to manifest only.',2); 865 | $this->preflightReport('INFO: '.$mime.' "'.$stats['name'].'" ('.strlen($contents).' bytes)'); 866 | $this->addItem($itemid, $stats['name'], $mime, $contents, false); 867 | $contents = null; // free mem 868 | } 869 | } else { 870 | 871 | //error_log("INFO: invalid file pointer from zip/EPUB"); 872 | } // fail silently if not a valid file 873 | } else { 874 | 875 | //error_log("Epub: not an allowed file"); 876 | $this->preflightReport("WARN: ".$stats['name']." skipped, not an allowed file"); 877 | 878 | } 879 | } else { 880 | // error_log("Epub: not an allowed file extension"); 881 | } 882 | } 883 | 884 | if($this->includecover!=false) { 885 | $this->logerr('Adding a cover',2); 886 | $this->_addCoverItem(); 887 | } 888 | if(!$this->hasPrimaryIdSet) { 889 | 890 | $this->_setIdentifier(); 891 | } 892 | //$this->setPublisher('BookGlutton API (www.BookGlutton.com)'); 893 | if($this->deferredSpine===true) { 894 | //error_log(print_r($this->ncxSpineadd),true); 895 | 896 | foreach($this->ncxSpineadd as $key=>$path) { 897 | 898 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 899 | //error_log($path.'=='.$item->getAttribute('href')); 900 | 901 | if($item->getAttribute('href')==$path) { 902 | //error_log('adding to spine'); 903 | $this->addSpineRef($item->getAttribute('id'), 'yes'); 904 | $this->preflightReport("INFO: added $path to spine"); 905 | break; 906 | } 907 | } 908 | 909 | 910 | } 911 | 912 | } else { 913 | throw new Exception('no UBO index file found to build EPUB structure'); 914 | } 915 | 916 | } else { // complain if this isnt even a valid zip 917 | throw (new Exception('cannot open zipfile:'.$zipfile)); 918 | } 919 | 920 | } 921 | 922 | private function _loadXMLContent ($itemid, $name, $mime, $contents) 923 | { 924 | $this->logerr('This is to be an xml based content doc, judging from the extension:'.$name,2); 925 | if(preg_match('/index\.html?$/i', $name)) { 926 | $isindex = true; 927 | $this->preflightReport("INFO: Found UBO index file"); 928 | } else { 929 | $isindex = false; 930 | } 931 | $tmp = $this->opspath . '/' . $this->opsrel . '/' . $name; // path to new file (return from addItem?) 932 | DiskUtil::assertPath(pathinfo($tmp, PATHINFO_DIRNAME)); 933 | $doc = new DomDocument(); 934 | 935 | if(preg_match('/]+?xhtml 1.1[^>]+?>/im', $contents)) { 936 | $contents = preg_replace('/]+?>/m', $this->xhtml11doctype, $contents); 937 | } 938 | 939 | $contents = preg_replace('/]+?>/m', $this->htmltag, $contents); 940 | 941 | if(@!$doc->loadXML($contents)) { 942 | 943 | $this->preflightReport("ERROR: Could not parse the file ".$name.". It may interfere with validation.",1); 944 | 945 | } 946 | 947 | $contents = null; // free mem 948 | if($doc->getElementsByTagName('title')->length > 0) { 949 | $title = $doc->getElementsByTagName('title')->item(0)->textContent; 950 | } 951 | if($isindex) { 952 | // set metas from index file 953 | $this->_parseMetasFromDoc($doc); 954 | // build navigation doc 955 | $this->ncxCurPlayOrder = 0; 956 | $this->ncxGeneratedDepth = 0; 957 | $this->ncxGeneratedLength = 0; 958 | $this->ncxGeneratedNavMapCurPoint = null; 959 | $this->ncxLastDepth = 0; 960 | $this->ncxSpineadd = array(); 961 | $this->zipstem = pathinfo($name, PATHINFO_DIRNAME); 962 | $this->_recurseList($doc->getElementsByTagName('ol')->item(0)); 963 | // defer building spine until after all docs accounted for 964 | $this->deferredSpine = true; 965 | } 966 | $this->addItem($itemid, $name, $mime, $doc->saveXML()); 967 | $this->preflightReport("INFO: file ".$name." added"); 968 | 969 | $doc = null; // free mem 970 | //$this->addNavItem($itemid, $title, $name, 'document'); 971 | } 972 | 973 | 974 | private function _recurseList($olel, $depth=0) 975 | { 976 | if(!$olel) { 977 | $this->preflightReport("ERROR: Could not find the toc list element. Be sure to use an ORDERED LIST element in your index.html file, with class attribute set to 'toc.' This will cause invalid results.",1); 978 | return; 979 | } 980 | 981 | if($children = $olel->childNodes) { 982 | foreach($children as $ol) { 983 | if($child = $ol->firstChild) { 984 | $this->_processHtml($child, $depth); 985 | } else if($child = $ol->nextSibling) { 986 | $this->_processHtml($child, $depth); 987 | } 988 | } 989 | } 990 | } 991 | 992 | 993 | private function _processHtml($child,$depth) 994 | { 995 | if($child->nodeType==1) { 996 | if($child->nodeName=='li') { 997 | //error_log('li found'); 998 | $this->preflightReport("INFO: toc list item found - ".$child->nodeValue); 999 | foreach($child->childNodes as $ch) { 1000 | if($ch->nodeName=='a') { 1001 | $title = $ch->textContent; 1002 | $prefix = ($this->zipstem !='.') ? $this->zipstem .'/' :''; 1003 | $name = $prefix . $ch->getAttribute('href'); 1004 | if($depth>0) { // not at top level 1005 | if($depth>$this->ncxLastDepth) { // we're deeper in now 1006 | $nps = $this->ncx_navMapNode->getElementsByTagName('navPoint'); 1007 | $np = $nps->item($nps->length-1); // find last element instead of navmap 1008 | $this->ncxGeneratedNavMapCurPoint = $np; 1009 | } 1010 | } else if($depth==0) { // at navmap level 1011 | $this->ncxGeneratedNavMapCurPoint = null; 1012 | } 1013 | $this->addNavItem('navPoint'.$this->ncxGeneratedLength, $title, $name, $child->getAttribute('class'),$this->ncxGeneratedNavMapCurPoint); 1014 | 1015 | $this->preflightReport("NCX: ".$title." added with target of ".$name); 1016 | 1017 | $pre = (strlen($this->opsrel)>0) ? $this->opsrel . '/' : ''; 1018 | $find = $pre.$name; 1019 | $prts = explode('#',$find); 1020 | $find = $prts[0]; 1021 | 1022 | /* fix for path bug */ 1023 | $fparts = preg_replace('/^\.\//','',$find); // strip leading path 1024 | 1025 | $this->ncxSpineadd[$fparts]=$fparts; 1026 | 1027 | /* end fix */ 1028 | 1029 | $this->ncxGeneratedLength++; 1030 | break; 1031 | } 1032 | } 1033 | $this->ncxLastDepth = $depth; 1034 | $depth++; 1035 | } 1036 | } 1037 | $this->_recurseList($child, $depth); 1038 | } 1039 | 1040 | private function _parseMetasFromDoc($doc) 1041 | { 1042 | if($this->conversionMetasParsed || $this->conversionIndexParsed) return; 1043 | $metas = $doc->getElementsByTagName('meta'); 1044 | foreach($metas as $meta) { 1045 | $mn=strtolower($meta->getAttribute('name')); 1046 | $c = $meta->getAttribute('content'); 1047 | $role=null;$type=null; 1048 | if($mn=='ubo.primaryid') { 1049 | $scheme = $meta->getAttribute('scheme'); 1050 | $this->_setIdentifier($c, $scheme, true); 1051 | $this->preflightReport("Primary Id set from UBO metadata: ".$c); 1052 | $this->hasPrimaryIdSet=true; 1053 | } elseif($mn=='ubo.cover') { 1054 | $this->addCoverMeta($c); 1055 | } elseif($mn=='dc.identifier') { 1056 | $scheme = $meta->getAttribute('scheme'); 1057 | $this->_setIdentifier($c, $scheme, false); 1058 | $this->preflightReport("Added identifier from DC metadata: ".$c); 1059 | } elseif ($mn=='dc.title') { 1060 | $this->addMeta('title', $c); 1061 | $this->preflightReport("Added title from DC metadata: ".$c); 1062 | } elseif (preg_match('/^dc.creator(.*?)$/', $mn, $matches)) { 1063 | if(count($matches)>1) { 1064 | if(preg_match('/^\./', $matches[1])) { 1065 | $role = substr($matches[1],1); 1066 | } 1067 | } 1068 | if($role!='aut') { 1069 | $role=null; 1070 | } 1071 | $this->addMeta('creator', $c, $role); 1072 | $this->preflightReport("Added creator from DC metadata: ".$c); 1073 | } elseif ($mn=='dc.language') { 1074 | $this->addMeta('language', $c); 1075 | $this->preflightReport("Set language from DC metadata: ".$c); 1076 | } elseif ($mn=='dc.publisher') { 1077 | $this->addMeta('publisher', $c); 1078 | $this->preflightReport("Set publisher from DC metadata: ".$c); 1079 | } elseif (preg_match('/^dc.date(.*?)$/', $mn, $matches)) { 1080 | if(count($matches)>1) { 1081 | $type = substr($matches[1],1); 1082 | } 1083 | $this->addMeta('date', $c, $type); 1084 | $this->preflightReport("Set date from DC metadata: ".$c); 1085 | } elseif ($mn=='dc.description') { 1086 | $this->addMeta('description', $c); 1087 | $this->preflightReport("Set description from DC metadata: ".$c); 1088 | } elseif (preg_match('/^dc.contributor(.*?)$/', $mn, $matches)) { 1089 | if(count($matches)>1) { 1090 | $role = substr($matches[1],1); 1091 | } 1092 | $this->addMeta('contributor', $c, $role); 1093 | $this->preflightReport("Set contributor from DC metadata: ".$c); 1094 | } 1095 | } 1096 | $this->conversionMetasParsed = true; 1097 | $this->conversionIndexParsed = true; 1098 | } 1099 | 1100 | public function fix() 1101 | { 1102 | 1103 | /* magic method that attempts to repair common problems */ 1104 | 1105 | // missing ncx 1106 | 1107 | 1108 | 1109 | 1110 | 1111 | 1112 | } 1113 | 1114 | 1115 | 1116 | 1117 | /* post conversion test in lieu of epubcheck */ 1118 | 1119 | public function testOPS($path=null) 1120 | { 1121 | $this->_saveMeta(); 1122 | if($path==null) $path = $this->packagepath; 1123 | $mimetypepath = $path . '/'. $this->getMimetypeFilename(); // filename of mimetype file 1124 | $metapath = $path . '/META-INF'; 1125 | $result = array(); 1126 | $result[0] = 'fail'; 1127 | $result[1] = 'error message'; 1128 | if(is_dir($metapath)) { // dir exists 1129 | if($container = file_get_contents($metapath.'/container.xml')) { // found container file 1130 | $contdoc = new DomDocument(); 1131 | if($cd = $contdoc->loadXML($container)) { // loaded container xml 1132 | $fp = $contdoc->getElementsByTagName('rootfiles')->item(0)->getElementsByTagName('rootfile')->item(0)->getAttribute('full-path'); 1133 | if($fullp = $this->_getRelPathWithOpf($fp)) { // found full path to opf file 1134 | $p = (strlen($fullp[0])>0) ? "$path/$fullp[0]":"$path"; 1135 | $op = "$p/$fullp[1]"; 1136 | $this->logerr($op, 2); 1137 | if(file_exists($op)) { // found opf file 1138 | $opf = new DomDocument('1.0','utf-8'); 1139 | $opf->preserveWhiteSpace = FALSE; 1140 | $opf->loadXML(file_get_contents($op)); 1141 | $package = $opf->getElementsByTagName('package')->item(0); 1142 | $meta = $package->getElementsByTagName('metadata')->item(0); 1143 | $mani = $package->getElementsByTagName('manifest')->item(0); 1144 | $spine = $package->getElementsByTagName('spine')->item(0); 1145 | if($meta && $mani && $spine) { 1146 | $tocatt = $spine->getAttribute('toc'); 1147 | $opfXP = new DomXpath($opf); 1148 | $opfXP->registerNamespace("opfns", $this->opfNS); 1149 | $opfXP->registerNamespace("dc", $this->dcNS); 1150 | $tocitem = $opfXP->evaluate('//*[@id="'.$tocatt.'"]'); 1151 | if($tocitem->length > 0) { 1152 | if(file_exists($p . '/'. $tocitem->item(0)->getAttribute('href'))) { 1153 | $result[0] = 'pass'; 1154 | $result[1] = array('opf'=>$fp); 1155 | //foreach($mani->getElementsByTagName('item') as $item) { 1156 | 1157 | // $href = $item->getAttribute('href'); 1158 | //} 1159 | } else { $result[1] = "ncx file not found:".$p . '/'. $tocitem->item(0)->getAttribute('href')."\n"; } 1160 | } else { $result[1] = "no item with ncx id found\n"; } 1161 | } else { $result[1] = "one of the three required opf nodes is missing\n"; } 1162 | } else { $result[1] = "opf not found\n"; } 1163 | } else { $result[1] = "full path to opf not found\n"; } 1164 | } else { $result[1] = "container file not loaded\n"; } 1165 | } else { $result[1] = "container file not found\n"; } 1166 | } else { $result[1] = "meta-inf dir no exist\n"; } 1167 | return $result; 1168 | } 1169 | 1170 | 1171 | 1172 | 1173 | 1174 | /* XML convenience methods */ 1175 | 1176 | 1177 | 1178 | 1179 | private function _getParentDiv($node) 1180 | { 1181 | $parent = $node->parentNode; 1182 | while($parent) { 1183 | if(strtolower($parent->nodeName) == 'div') { 1184 | return $parent; 1185 | } 1186 | } 1187 | return null; 1188 | } 1189 | 1190 | 1191 | /* 1192 | 1193 | XML OPS MODEL functions 1194 | 1195 | 1196 | Readers - return elements (usually unattached) 1197 | Factories - create elements (also unattached) 1198 | Builders - attach elements to dom docs 1199 | Getters and Setters 1200 | 1201 | */ 1202 | 1203 | 1204 | 1205 | /* Public getters and setters for OPS metadata */ 1206 | 1207 | 1208 | 1209 | /* XML getters */ 1210 | 1211 | 1212 | private function _getSpineEl() 1213 | { 1214 | return $this->opf_spineNode; 1215 | } 1216 | 1217 | private function _getManifestEl() 1218 | { 1219 | return $this->opf_manifestNode; 1220 | } 1221 | 1222 | private function _getMetadataEl() 1223 | { 1224 | return $this->opf_metadataNode; 1225 | } 1226 | 1227 | private function _getNavMapEl() 1228 | { 1229 | return $this->ncx_navMapNode; 1230 | } 1231 | 1232 | // TODO all should return lists 1233 | 1234 | public function getTitle() 1235 | { 1236 | return $this->getDcTitle(); 1237 | } 1238 | 1239 | public function getPrimaryId() 1240 | { 1241 | return $this->uniqIDval; 1242 | } 1243 | 1244 | public function removeInvalidLangs() 1245 | { 1246 | // scan metadata for invalid language specification 1247 | // and remove offending nodes 1248 | 1249 | $langels = $this->opf_manifestNode->getElementsByTagName('dc:language'); 1250 | if($langels) { 1251 | foreach($langels as $lang) { 1252 | if(!preg_match('/^[a-z][a-z]\-[A-Z][A-Z]$/', $lang->nodeValue)) { 1253 | $this->opf_manifestNode->removeChild($lang); 1254 | //error_log('epub:removed invalid language specifier'); 1255 | } 1256 | } 1257 | } 1258 | $langels = $this->opf_manifestNode->getElementsByTagName('language'); 1259 | if($langels) { 1260 | foreach($langels as $lang) { 1261 | if(!preg_match('/^[a-z][a-z]\-[A-Z][A-Z]$/', $lang->nodeValue)) { 1262 | $this->opf_manifestNode->removeChild($lang); 1263 | // error_log('epub:removed invalid language specifier'); 1264 | } 1265 | } 1266 | } 1267 | $langels = $this->opf_manifestNode->getElementsByTagName('opf:language'); 1268 | if($langels) { 1269 | foreach($langels as $lang) { 1270 | if(!preg_match('/^[a-z][a-z]\-[A-Z][A-Z]$/', $lang->nodeValue)) { 1271 | $this->opf_manifestNode->removeChild($lang); 1272 | // error_log('epub:removed invalid language specifier'); 1273 | } 1274 | } 1275 | } 1276 | } 1277 | 1278 | public function hasLanguage() 1279 | { 1280 | // is there a node in metadata specifying language? 1281 | 1282 | $haslang = false; 1283 | $langels = $this->opf_manifestNode->getElementsByTagName('dc:language'); 1284 | if($langels) { 1285 | if($lang = $langels->item(0)) { 1286 | $haslang = true; 1287 | } else { 1288 | $langels = $this->opf_manifestNode->getElementsByTagName('language'); 1289 | if($langels) { 1290 | if($lang = $langels->item(0)) { 1291 | $haslang = true; 1292 | } 1293 | } 1294 | } 1295 | } 1296 | return $haslang; 1297 | } 1298 | 1299 | public function getLanguage() 1300 | { 1301 | // returns first language matched from metadata 1302 | $langels = $this->opf_manifestNode->getElementsByTagName('dc:language'); 1303 | if($langels) { 1304 | return $langels->item(0)->nodeValue; 1305 | } else { 1306 | $langels = $this->opf_manifestNode->getElementsByTagName('language'); 1307 | if($langels) { 1308 | return $langels->item(0)->nodeValue; 1309 | } 1310 | } 1311 | return $this->xmllang; 1312 | } 1313 | 1314 | public function getAuthor() 1315 | { 1316 | return $this->getDcCreator(); 1317 | } 1318 | 1319 | public function getMetaPairs() 1320 | { 1321 | // returns raw key val pairs from the meta node 1322 | $pairs = array(); 1323 | foreach($this->opf_metadataNode->childNodes as $child) { 1324 | if($child->nodeType==1) { 1325 | $pairs[$child->nodeName] = $child->nodeValue; 1326 | } 1327 | } 1328 | return $pairs; 1329 | 1330 | } 1331 | 1332 | 1333 | public function getDcTitle() 1334 | { 1335 | $q = $this->opfXP->query('//dc:title', $this->opf_metadataNode); 1336 | //$tnode = $this->opf->getElementsByTagName('dc:title')->item(0); 1337 | if($q->length>0) { 1338 | return $q->item(0)->nodeValue; 1339 | } else { 1340 | return 'Unknown Title'; 1341 | } 1342 | 1343 | } 1344 | 1345 | public function getDcCreator() 1346 | { 1347 | if($q = $this->getDcCreators()) { 1348 | $default = $q->item(0)->nodeValue; 1349 | $list = array(); 1350 | foreach($q as $cr) { 1351 | if($cr->getAttribute('role')=='aut') { 1352 | $list[] = $q->nodeValue; 1353 | } 1354 | } 1355 | if(count($list)>0) { 1356 | return implode(', ',$list); 1357 | } else { 1358 | return $default; 1359 | } 1360 | } else { 1361 | return 'Unknown Creator'; 1362 | } 1363 | } 1364 | 1365 | public function getDcCreators() 1366 | { 1367 | $q = $this->opfXP->query('//dc:creator', $this->opf_metadataNode); 1368 | if($q->length > 0) { 1369 | return $q; 1370 | } else { 1371 | return null; 1372 | } 1373 | } 1374 | 1375 | public function getNcxTitle() 1376 | { 1377 | $q = $this->ncxXP->query('//nc:docTitle/nc:text'); 1378 | if($q->length > 0) { 1379 | return $q->item(0)->nodeValue; 1380 | } else { 1381 | return 'Unknown Title'; 1382 | } 1383 | } 1384 | 1385 | public function getCoverMeta() 1386 | { 1387 | // not really part of the standard, 1388 | // but becoming standard practice way 1389 | // to include covers. 1390 | //error_log('looking for cover meta...'); 1391 | //returns meta value for cover image id, if there is one 1392 | foreach($this->opf_metadataNode->getElementsByTagName('meta') as $meta) { 1393 | if($meta->getAttribute('name')=='cover') { 1394 | return $meta->getAttribute('content'); 1395 | } 1396 | } 1397 | return false; 1398 | } 1399 | 1400 | public function getOpfDoc() 1401 | { 1402 | // return the domdocument for the opf file 1403 | 1404 | return $this->opf; 1405 | } 1406 | 1407 | public function getNcxDoc() 1408 | { 1409 | // return the domdocument for the ncx file 1410 | 1411 | return $this->ncx; 1412 | } 1413 | 1414 | public function getDescription() 1415 | { 1416 | 1417 | if($ds = $this->opf_metadataNode->getElementsByTagName('description')) { 1418 | if($ds->item(0)) { 1419 | return $ds->item(0)->nodeValue; 1420 | } 1421 | } 1422 | return null; 1423 | 1424 | /* 1425 | $q = $this->opfXP->query('//dc:description', $this->opf_metadataNode); 1426 | if($q->length > 0) { 1427 | return $q->item(0)->nodeValue; 1428 | } else { 1429 | return null; 1430 | } 1431 | */ 1432 | } 1433 | 1434 | public function getRights() 1435 | { 1436 | $q = $this->opfXP->query('//dc:rights', $this->opf_metadataNode); 1437 | if($q->length > 0) { 1438 | return $q->item(0)->nodeValue; 1439 | } else { 1440 | return null; 1441 | } 1442 | } 1443 | 1444 | /** 1445 | 1446 | Getters for OPS files and metadata reference hashes 1447 | 1448 | */ 1449 | 1450 | 1451 | /** Getters for OPS files -- full XML content */ 1452 | /** Useful for rolling your own functions */ 1453 | 1454 | 1455 | public function dumpOcfFile() 1456 | { 1457 | return file_get_contents($this->ocf_filename); 1458 | } 1459 | 1460 | public function getMimetypeFile() 1461 | { 1462 | $st = @stat($this->mimetypepath); 1463 | return array('relpath'=>'mimetype', 'content'=>file_get_contents($this->mimetypepath), 'stat'=>$st); 1464 | } 1465 | public function getContainerXML() 1466 | { 1467 | //$this->logerr('getContainer'); 1468 | $st = @stat($this->metapath.'/container.xml'); 1469 | return array('relpath'=>'META-INF/container.xml', 'content'=>file_get_contents($this->metapath.'/container.xml'), 'stat'=>$st); 1470 | } 1471 | public function getOpfRaw() 1472 | { 1473 | $opf = $this->getOpfXML(); 1474 | return $opf['content']; 1475 | } 1476 | public function getOpfXML() 1477 | { 1478 | //$this->logerr('getOpf'); 1479 | $st = @stat($this->packagepath . '/'. $this->opfpath); 1480 | return array('relpath'=>$this->opfpath, 'content'=>file_get_contents($this->packagepath . '/'. $this->opfpath), 'stat'=>$st); 1481 | } 1482 | 1483 | public function getSpineXML() 1484 | { 1485 | if(@!$this->opf) return null; 1486 | return $this->opf_spineNode; 1487 | } 1488 | 1489 | public function getItemXML($filename) 1490 | { 1491 | $itemarray = $this->getItemByPath($filename); 1492 | if(count($itemarray)>0) { 1493 | return $itemarray['content']; 1494 | } else { 1495 | // error_log('epub:item has no keys'); 1496 | return ''; 1497 | } 1498 | } 1499 | 1500 | public function getItemFilepath($item) 1501 | { 1502 | return $this->packagepath . '/'. dirname($this->opfpath) . '/'. $item['href']; 1503 | } 1504 | 1505 | public function getNcxXML() 1506 | { 1507 | $this->logerr('getNcx'); 1508 | $st = stat($this->packagepath . '/'. $this->ncxpath); 1509 | //error_log($this->ncxpath); 1510 | //error_log(print_r($st,true)); 1511 | if(!$con = file_get_contents($this->packagepath . '/'. $this->ncxpath)) { 1512 | // error_log('could not get file contents from '.$this->packagepath . '/'. $this->ncxpath); 1513 | } 1514 | return array('relpath'=>$this->ncxpath, 'content'=>$con, 'stat'=>$st); 1515 | } 1516 | 1517 | public function getOpfFilename() 1518 | { 1519 | $res = $this->testOPS(); 1520 | return ($res[0]=='pass') ? $res[1]['opf'] : null; 1521 | } 1522 | 1523 | public function getContainerFilename() 1524 | { 1525 | return 'META-INF/container.xml'; 1526 | } 1527 | 1528 | public function getMimetypeString() 1529 | { 1530 | return 'application/epub+zip'; 1531 | } 1532 | 1533 | public function getMimetypeFilename() 1534 | { 1535 | return 'mimetype'; 1536 | } 1537 | 1538 | 1539 | /** 1540 | Getters that return arrays about content documents and other OPS 1541 | content structure, or return the contents of those files 1542 | */ 1543 | 1544 | public function getItemRefs() 1545 | { 1546 | $encode=true; 1547 | $r = array(); 1548 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 1549 | $r[]=$this->_itemElToArray($item, $encode); 1550 | } 1551 | return $r; 1552 | } 1553 | public function getItemFiles() 1554 | { //WARNING: mem intensive!! 1555 | $encode=true; 1556 | $r = array(); 1557 | foreach($this->opf_manifestNode->getElementsByTagName('item') as $item) { 1558 | $r[]=$this->_itemElToFullArray($item, $encode); 1559 | } 1560 | return $r; 1561 | } 1562 | 1563 | public function getSpineItems() 1564 | { 1565 | // dereferences items in spine and returns array of hashes 1566 | // corresponding to the items from the manifest 1567 | 1568 | 1569 | $items = array(); 1570 | 1571 | // ASM: for some stupid reason, the Xpath expressions 1572 | // '//itemref' and '//opfns:itemref' both fail to return 1573 | // anything but an empty nodeset. However, '//*' returns 1574 | // all the nodes in the doc, which then lets us filter 1575 | // out the itemref elements by iterating. Asinine, but 1576 | // only other way would be getElementsByTagName and 1577 | // getElementById, which would be slower, I think. 1578 | 1579 | 1580 | $els = $this->opf_spineNode->getElementsByTagName('itemref'); 1581 | 1582 | //error_log($els->length." itemrefs in spine"); 1583 | 1584 | foreach($els as $ref) { 1585 | 1586 | $item = $this->opfXP->evaluate('//item[@id="'.$ref->getAttribute('idref').'"]', $this->opf_manifestNode)->item(0); 1587 | 1588 | //$item = $this->opf->getElementById($ref->getAttribute('idref')); 1589 | 1590 | if(!$item) { 1591 | set_time_limit(30); 1592 | $its = $this->opf_manifestNode->getElementsByTagName('item'); 1593 | foreach($its as $it) { 1594 | if($it->getAttribute('id')==$ref->getAttribute('idref')) { 1595 | $item = $it; 1596 | break; 1597 | } 1598 | } 1599 | } 1600 | if(!$item) { 1601 | throw new Exception('Could not correctly index the items in this package (ID from IDREF not found)'); 1602 | } 1603 | $itemarray = $this->_itemElToArray($item); 1604 | $itemarray['linear'] = $ref->getAttribute('linear'); 1605 | $items[] = $itemarray; 1606 | } 1607 | 1608 | 1609 | /* 1610 | 1611 | 1612 | 1613 | 1614 | $els = $this->opfXP->evaluate('//*', $this->opf_spineNode); 1615 | foreach($els as $ref) { 1616 | if($ref->nodeType==1 && $ref->nodeName=='itemref') { 1617 | $item = $this->opfXP->evaluate('//*[@id="'.$ref->getAttribute('idref').'"]', $this->opf_manifestNode)->item(0); 1618 | if($item) { 1619 | $itemarray = $this->_itemElToArray($item); 1620 | $itemarray['linear'] = $ref->getAttribute('linear'); 1621 | $items[] = $itemarray; 1622 | } 1623 | } 1624 | } 1625 | */ 1626 | return $items; 1627 | } 1628 | 1629 | public function getNavPoints($appendguide=false) 1630 | { 1631 | // returns an array of hashes representing 1632 | // each navPoint element in the NCX 1633 | return $this->_getNavPoints($this->ncx_navMapNode, $appendguide); 1634 | } 1635 | 1636 | 1637 | public function getAsSimpleHash() 1638 | { 1639 | /** 1640 | Returns a simple (flat) hashed array of the whole contained structure 1641 | Useful for making your own zips or hand-rolling edit functions 1642 | */ 1643 | $S = ''; 1644 | $opf = $this->getOpfXML(); 1645 | $container = $this->getContainerXML(); 1646 | $S[$this->getMimetypeFilename()]=$this->getMimetypeString(); 1647 | $S[$container['relpath']]=$container['content']; 1648 | $S[$opf['relpath']]=$opf['content']; 1649 | foreach($this->getItemFiles() as $item) { 1650 | $S[$item['href']]=$item['content']; 1651 | } 1652 | return $S; 1653 | } 1654 | 1655 | 1656 | public function getCoverMetaWithImage() 1657 | { 1658 | if($id = $this->getCoverMeta()) { 1659 | //error_log('cover meta is specified as id '.$id); 1660 | $item = $this->getItemHashById($id); 1661 | //error_log('got item from manifest:'.print_r($item,true)); 1662 | return $item; 1663 | } else { 1664 | return false; 1665 | } 1666 | } 1667 | 1668 | 1669 | 1670 | private function _getNavPoints($navPoint, $appendguide=false) 1671 | { 1672 | // this may be called recursively from _navElToArray() 1673 | // note: dont use appendguide on recursive calls 1674 | 1675 | // $appendguide: whether to append the guide 1676 | // as a final nav element with a nested 1677 | // list of guide items to content docs 1678 | 1679 | $navs = array(); 1680 | if(!is_object($navPoint)) return $navs; 1681 | $children = $navPoint->childNodes; 1682 | foreach($navPoint->childNodes as $c) { 1683 | if($c->nodeType==1) { /* match exactly, or account for namespace */ 1684 | if($c->nodeName=='navPoint' || preg_match('/\:navPoint$/',$c->nodeName)) { 1685 | $navs[] = $this->_navElToArray($c); 1686 | } 1687 | } 1688 | } 1689 | if($appendguide) { 1690 | if($guide = $this->opf->getElementsByTagName('guide')) { 1691 | if($g = $guide->item(0)) { 1692 | if($refs = $g->getElementsByTagName('reference')) { 1693 | if($refs->length>0) { 1694 | $order = count($navs)+1; 1695 | $guidenav = array('id'=>'autoguide'.time(), 1696 | 'playOrder'=>$order, 1697 | 'class'=>'guide', 1698 | 'label'=>'Guide to Contents', 1699 | 'src'=>'', 1700 | 'navPoints'=>array() 1701 | ); 1702 | $guidenps = array(); 1703 | $firstitem = true; 1704 | for($i = 0; $i < $refs->length; $i++) { 1705 | $order++; 1706 | $ref = $refs->item($i); 1707 | $refnav = 1708 | array( 1709 | 'id'=>'guideref'.uniqid(), 1710 | 'playOrder'=>$order, 1711 | 'class'=>'guideref', 1712 | 'label'=>$ref->getAttribute('title'), 1713 | 'src'=>$ref->getAttribute('href'), 1714 | 'navPoints'=>array() 1715 | ); 1716 | if($firstitem) { 1717 | $guidenav['src'] = $ref->getAttribute('href'); 1718 | $firstitem = false; 1719 | } 1720 | $guidenps[] = $refnav; 1721 | 1722 | } 1723 | $guidenav['navPoints'] = $guidenps; 1724 | $navs[] = $guidenav; 1725 | } 1726 | } 1727 | } 1728 | } 1729 | 1730 | } 1731 | return $navs; 1732 | /* 1733 | if(!is_object($navPoint)) return $navs; 1734 | foreach($navPoint->getElementsByTagName('navPoint') as $np) { 1735 | $navs[] = $this->_navElToArray($np); 1736 | } 1737 | return $navs; 1738 | */ 1739 | } 1740 | private function _navElToArray($nav) 1741 | { // converts a navPoint element 1742 | // to an array of hashes, including 1743 | // nested navPoints 1744 | if(!is_object($nav)) { 1745 | throw new Exception("that is not an object"); 1746 | } 1747 | // return array(); 1748 | $src = (is_object($this->ncxXP->evaluate('nc:content',$nav)->item(0))) ? $this->ncxXP->evaluate('nc:content',$nav)->item(0)->getAttribute('src') : null; 1749 | return array('id'=>$nav->getAttribute('id'), 1750 | 'playOrder'=>intval($nav->getAttribute('playOrder')), 1751 | 'class'=>$nav->getAttribute('class'), 1752 | 'label'=>str_replace('\\', '\\\\', $this->ncxXP->evaluate('nc:navLabel', $nav)->item(0)->textContent), 1753 | 'src'=>$src, 1754 | 'navPoints'=>$this->_getNavPoints($nav) 1755 | ); 1756 | } 1757 | 1758 | public function navElToArray($nav) 1759 | { 1760 | 1761 | return $this->_navElToArray($nav); 1762 | 1763 | } 1764 | 1765 | private function _getItemFullpath($id) 1766 | { 1767 | //$this->_speedTestGetItemEl($id); 1768 | //$this->logerr('seeking id '.$id); 1769 | if(!$this->opfXP) return null; 1770 | if(!($el = $this->_getItemElById($id))) { 1771 | //$this->logerr('cannot find id, maybe xpath parser is broken'); 1772 | if($el = $this->opf->getElementById($id)) { 1773 | //$this->logerr('okay, found it the old fashioned way'); 1774 | } else { 1775 | //$this->logerr('still could not find it, assuming an error'); 1776 | return false; 1777 | } 1778 | } 1779 | return $this->opspath . '/'. $this->getOpsStem() . '/' . $el->getAttribute('href'); 1780 | } 1781 | 1782 | public function getOpsStem() 1783 | { 1784 | return (strlen($this->opsrel)) ? $this->opsrel : ''; 1785 | 1786 | } 1787 | 1788 | private function _itemElToArray($item) 1789 | { 1790 | if(!is_object($item)) { 1791 | //error_log('BookGluttonEpub: passed data is not an object'); 1792 | return null; 1793 | } 1794 | return array('id'=>$item->getAttribute('id'), 1795 | 'href'=>$item->getAttribute('href'), 1796 | 'media-type'=>$item->getAttribute('media-type'), 1797 | 'fallback'=>$item->getAttribute('fallback')); 1798 | } 1799 | 1800 | private function _itemElToFullArray($item, $encode=false, $encode_non_binary=false, $recurse=false) 1801 | { 1802 | //$this->logerr('itemElToFullArray'); 1803 | // intensive!! don't call this on iterations unless you have to 1804 | return $this->_itemElAppendContent($this->_itemElToArray($item), $encode, $encode_non_binary, $recurse); 1805 | } 1806 | 1807 | private function _itemElAppendContent($item, $encode=true, $encode_non_binary=false, $recurse=false) 1808 | { 1809 | // Takes an item ARRAY, not ELEMENT! Convert to array first with _itemElToArray 1810 | 1811 | $item['content'] = ''; 1812 | $cunt = @file_get_contents($this->packagepath . '/'. $this->opsrel . '/'. $item['href']); 1813 | 1814 | if(!$cunt) { 1815 | //error_log('error getting file '.$this->packagepath . '/'. $this->opsrel . '/'. @$item['href']); 1816 | } 1817 | 1818 | if(($this->_isBinaryType($item['media-type'])||$encode_non_binary) && $encode==true && $cunt) { 1819 | $item['content'] = chunk_split(base64_encode($cunt), 76, "\n"); 1820 | } else if($cunt) { 1821 | $item['content'] = $cunt; 1822 | } 1823 | if($this->_isBinaryType($item['media-type'])) { 1824 | $item['imginfo'] = getimagesize($this->packagepath . '/'. $this->opsrel . '/'. $item['href']); 1825 | } 1826 | 1827 | // filter out processing instructions 1828 | $item['content'] = $item['content']; 1829 | if($recurse) { // will be slow, use judiciously 1830 | $item['content'] = $this->_encodeItemAssets($item['content']); 1831 | } 1832 | 1833 | $item['length'] = strlen($item['content']); 1834 | $stat = @stat($this->packagepath . '/'. $this->opsrel . '/'.$item['href']); 1835 | $item['updated'] = $stat['mtime']; 1836 | return $item; 1837 | } 1838 | 1839 | private function _encodeItemAssets($itemcontent) 1840 | { 1841 | // takes a FULL item ARRAY!! Convert to array first with _itemElToArray 1842 | // then makes sure to append content, with _itemElAppendContent 1843 | // this will give you the full item array 1844 | // pull in images encoded as data uris 1845 | 1846 | // modifes the 'content' value in array 1847 | 1848 | 1849 | return preg_replace_callback('/]*?)>/m', array($this, '_encodematch'), $itemcontent); 1850 | } 1851 | 1852 | private function _encodematch($matches) { 1853 | //error_log('matched image tag:'.$matches[0]); 1854 | 1855 | $itemel = $this->_getItemElByPath($matches[2]); 1856 | if(!$itemel) { 1857 | //error_log('not found'); 1858 | return $matches[0]; 1859 | } 1860 | $cob = $this->_itemElAppendContent($this->_itemElToArray($itemel), true, true, true); 1861 | $url = 'data:'.$cob['media-type'].';base64,'.$cob['content']; 1862 | $dims = ""; 1863 | if(!preg_match('/height/i', $matches[0])) { 1864 | $dims = 'height="'.$cob['imginfo'][1].'" '; 1865 | //error_log($dims); 1866 | } 1867 | return ''; 1868 | } 1869 | 1870 | private function _getItemDataUrl($item, $recurse=false) 1871 | { 1872 | // Takes an item ARRAY, not ELEMENT! Convert to array first with _itemElToArray 1873 | 1874 | $cob = $this->_itemElAppendContent($item, true, true, true); 1875 | $ret = 'data:'.$cob['media-type'].';base64,'.$cob['content']; 1876 | return $ret; 1877 | } 1878 | 1879 | 1880 | private function _queryOPF($expr) 1881 | { 1882 | 1883 | } 1884 | 1885 | 1886 | 1887 | 1888 | 1889 | 1890 | 1891 | /** 1892 | 1893 | Get things by referencing the OPF id for the item 1894 | 1895 | */ 1896 | 1897 | public function getItemHref($id) 1898 | { 1899 | if(!$this->opfXP) return null; 1900 | return $this->opfXP->evaluate('//*[@id="'.$id.'"]')->item(0)->getAttribute('href'); 1901 | } 1902 | 1903 | public function getItemType($id) 1904 | { 1905 | if(!$this->opfXP) return null; 1906 | 1907 | return $this->opfXP->evaluate('//*[@id="'.$id.'"]')->item(0)->getAttribute('media-type'); 1908 | } 1909 | 1910 | public function getItemHashById($id) 1911 | { 1912 | return $this->_itemElToFullArray($this->_getItemElById($id), false); // return binaries unencoded 1913 | } 1914 | 1915 | public function getItemById($id) 1916 | { 1917 | /* different way to pull a manifest item */ 1918 | // this one just returns the contents as a 1919 | // string, unlike the meta-enabled funcs 1920 | 1921 | if($path = $this->_getItemFullpath($id)) { 1922 | //$this->logerr('full path to this is:'.$path); 1923 | return @file_get_contents($path); 1924 | } else { 1925 | return ''; 1926 | } 1927 | } 1928 | 1929 | private function _getItemElById($id) 1930 | { 1931 | if(!$this->opfXP) return null; 1932 | return $this->opfXP->evaluate('//*[@id="'.$id.'"]')->item(0); 1933 | } 1934 | 1935 | 1936 | /** 1937 | Get or check things by passing the path of the item 1938 | */ 1939 | 1940 | public function hasItemByPath($path) 1941 | { //alias 1942 | return $this->hasItemByHref($path); 1943 | } 1944 | 1945 | public function hasItemByHref($path) 1946 | { 1947 | $item = $this->_getItemElByPath($path); 1948 | return ($item) ? true : false; 1949 | } 1950 | 1951 | public function getItemByPath($path) 1952 | { 1953 | // returns first item matching the rel url 1954 | // appends contents of file to hashed array 1955 | 1956 | 1957 | $item = ($itemel = $this->_getItemElByPath($path)) ? $this->_itemElToFullArray($itemel, false, false, false) : array(); 1958 | //error_log('epub class logging item array'); 1959 | //error_log(print_r($item,true)); 1960 | return $item; 1961 | } 1962 | 1963 | public function getItemByPathWithAssets($path) 1964 | { 1965 | // returns first item matching the rel url 1966 | // appends contents of file to hashed array 1967 | // base64 encodes all external assets 1968 | return ($itemel = $this->_getItemElByPath($path)) ? $this->_itemElToFullArray($itemel, false, false, true) : array(); 1969 | } 1970 | 1971 | private function _getItemElByPath($path) 1972 | { 1973 | // error_log('epub class getting '.$path); 1974 | 1975 | if(!$this->opfXP) { 1976 | //error_log('no xpath parser defined'); 1977 | return null; 1978 | } else { 1979 | return $this->opfXP->evaluate('//*[@href="'.$path.'"]',$this->opf_manifestNode)->item(0); 1980 | } 1981 | } 1982 | 1983 | public function getFlatNav() 1984 | { 1985 | return $this->ncx->getElementsByTagName('navPoint'); 1986 | } 1987 | 1988 | public function getNavLabelByHref($href) 1989 | { 1990 | $found = null; 1991 | $navpoints = $this->getFlatNav(); 1992 | foreach($navpoints as $np) { 1993 | $arr = $this->_navElToArray($np); 1994 | if($arr['src']==$href) { 1995 | $found = $arr['label']; 1996 | break; 1997 | } 1998 | } 1999 | if(!$found && strpos($href,'#')) { 2000 | $hrefparts = explode('#', $href); 2001 | return $this->getNavLabelByHref($hrefparts[0]); 2002 | } 2003 | 2004 | return $found; 2005 | } 2006 | 2007 | /** 2008 | 2009 | Factories - create doms and dom elements 2010 | 2011 | */ 2012 | 2013 | 2014 | 2015 | 2016 | private function _makeContDoc($xml = null) 2017 | { 2018 | $contdoc = new DomDocument('1.0', 'utf-8'); 2019 | $contdoc->preserveWhiteSpace = FALSE; 2020 | 2021 | if($xml == null) { 2022 | $contstr = ''; 2023 | $contdoc->loadXML($contstr); 2024 | $rootfiles = $contdoc->appendChild($contdoc->createElement('rootfiles')); 2025 | $rootfile = $contdoc->createElement('rootfile'); 2026 | $rootfile->setAttribute('full-path', $this->opfpath); 2027 | $rootfile->setAttribute('media-type', $this->opsmime); 2028 | $rootfiles->appendChild($rootfile); 2029 | $contdoc->getElementsByTagName('container')->item(0)->appendChild($rootfiles); 2030 | } else { 2031 | //error_log($xml); 2032 | @$contdoc->loadXML($xml); 2033 | if(!$contdoc) { 2034 | throw new Exception('content xml is not properly formed'); 2035 | } 2036 | } 2037 | return $contdoc; 2038 | } 2039 | 2040 | private function _buildNcxDoc($ncx = null) 2041 | { 2042 | if(!$ncx) { 2043 | $ncx = new DomDocument('1.0', 'utf-8'); 2044 | $ncx->preserveWhiteSpace = FALSE; 2045 | } 2046 | $ncx->loadXML('doctypeNISO.'" "'.$this->daisydtd.'">'); 2047 | $n = $ncx->getElementsByTagName('ncx')->item(0); 2048 | $this->ncx_headNode = $n->appendChild($ncx->createElement('head')); 2049 | 2050 | $this->ncx_docTitleNode = $n->appendChild($ncx->createElement('docTitle')); 2051 | $this->ncx_docTitleNode->appendChild($ncx->createElement('text'))->appendChild($ncx->createTextNode(' ')); 2052 | $this->ncx_docAuthorNode = $n->appendChild($ncx->createElement('docAuthor')); 2053 | $this->ncx_docAuthorNode->appendChild($ncx->createElement('text'))->appendChild($ncx->createTextNode(' ')); 2054 | 2055 | $this->ncx_navMapNode = $n->appendChild($ncx->createElement('navMap')); 2056 | // add required legacies (except uid which we add later) 2057 | $this->ncx_headNode->appendChild($this->_ncxMeta('dtb:depth', '1', $ncx)); 2058 | $this->ncx_headNode->appendChild($this->_ncxMeta('dtb:totalPageCount', '0', $ncx)); 2059 | $this->ncx_headNode->appendChild($this->_ncxMeta('dtb:maxPageNumber', '0', $ncx)); 2060 | // TODO remove the following--should not get called here! 2061 | $this->_createNcx(); // add to manifest 2062 | $this->ncxXP = new DomXpath($ncx); 2063 | $this->ncxXP->registerNamespace("nc", $this->ncxNS); 2064 | return $ncx; 2065 | } 2066 | 2067 | public function makeNcxDoc($xml) 2068 | { 2069 | return $this->_makeNcxDoc($xml); 2070 | } 2071 | 2072 | private function _makeNcxDoc($xml = null) 2073 | { 2074 | $ncx = new DomDocument('1.0', 'utf-8'); 2075 | $ncx->preserveWhiteSpace = FALSE; 2076 | 2077 | if($xml==null) { 2078 | 2079 | $ncx = $this->_buildNcxDoc($ncx); 2080 | 2081 | } else { 2082 | 2083 | @$ncx->loadXML($xml); 2084 | 2085 | 2086 | $n = $ncx->getElementsByTagName('ncx')->item(0); 2087 | 2088 | if(!is_object($n)) { 2089 | //error_log('epub:ncx not found, could not parse dom structure from it'); 2090 | $ncx = $this->_buildNcxDoc($ncx); 2091 | } 2092 | 2093 | $this->ncx_headNode = $n->getElementsByTagName('head')->item(0); 2094 | $this->ncx_docTitleNode = $n->getElementsByTagName('docTitle')->item(0); 2095 | $this->ncx_docAuthorNode = $n->getElementsByTagName('docAuthor')->item(0); 2096 | $this->ncx_navMapNode = $n->getElementsByTagName('navMap')->item(0); 2097 | /* 2098 | //if(!$this->ncx_docAuthorNode) { 2099 | // $this->ncx_docAuthorNode = $n->insertBefore($ncx->createElement('docAuthor'), $n->firstChild); 2100 | //} 2101 | */ 2102 | if(!$this->ncx_docTitleNode) { 2103 | $this->ncx_docTitleNode = $n->insertBefore($ncx->createElement('docTitle'), $n->firstChild); 2104 | } 2105 | 2106 | 2107 | if(!$this->ncx_headNode) { 2108 | // in theory, this should be a critical failure, per the spec 2109 | // the reality is that many books may not have this set 2110 | // to comply, we create it 2111 | $this->ncx_headNode = $n->insertBefore($ncx->createElement('head'), $n->firstChild); 2112 | } 2113 | if(!$this->ncx_navMapNode) { 2114 | throw new Exception('the ncx structure is incomplete:'.$ncx->saveXML()); 2115 | } 2116 | 2117 | $this->ncxXP = new DomXpath($ncx); 2118 | $this->ncxXP->registerNamespace("nc", $this->ncxNS); 2119 | 2120 | 2121 | //$ncw = $this->_parseNcx($xml); 2122 | //$ncx = $this->_rebuildNcx($ncw); 2123 | 2124 | } 2125 | // set up xpath parser 2126 | 2127 | return $ncx; 2128 | } 2129 | 2130 | public function makeOpfDoc($xml) 2131 | { 2132 | return $this->_makeOpfDoc($xml); 2133 | } 2134 | 2135 | private function _makeOpfDoc($xml = null) 2136 | { 2137 | $opf = new DomDocument('1.0','utf-8'); 2138 | $opf->preserveWhiteSpace = FALSE; 2139 | $opf->validateOnParse = true; 2140 | 2141 | 2142 | if ($xml==null) { 2143 | $xml = ''; 2144 | $opf->loadXML($xml); 2145 | $package = $opf->getElementsByTagName('package')->item(0); 2146 | // multiple namespaces on an element require us to use document fragments from strings 2147 | $frag = $opf->createDocumentFragment(); 2148 | if(!$frag->appendXML('')) { 2149 | throw new Exception('could not create metadata fragment'."\n"); 2150 | } 2151 | $this->opf_metadataNode = $package->appendChild($frag); 2152 | $this->opf_manifestNode =$package->appendChild($opf->createElement('manifest')); 2153 | $this->opf_spineNode = $package->appendChild($opf->createElement('spine')); 2154 | } else { 2155 | 2156 | // fix a typo bug that infected many BG packages (we 2157 | // do it this way because php wont let us modify the 2158 | // xmlns attribute after parsing: 2159 | 2160 | $xml = str_replace('http://www.idpf.og/2007/opf', 'http://www.idpf.org/2007/opf', $xml); 2161 | 2162 | 2163 | @$opf->loadXML($xml); 2164 | $package = $opf->getElementsByTagName('package')->item(0); 2165 | if(!is_object($package)) { 2166 | //error_log('no package element found'); 2167 | throw new Exception('package element missing or malformed'); 2168 | 2169 | } 2170 | if($package->hasAttribute('xmlns')) { 2171 | if($package->getAttribute('xmlns')!=$this->opfNS) { // correct this 2172 | 2173 | // this won't work due to a bug in PHP, but whatever 2174 | // maybe someday some asshole will fix it. 2175 | 2176 | $package->setAttribute('xmlns', $this->opfNS); 2177 | 2178 | // should maybe save but holding off on that 2179 | 2180 | //$opf->formatOutput = TRUE; 2181 | //$opf->save($this->packagepath . '/'.$this->opfpath); 2182 | } else { 2183 | //error_log($package->getAttribute('xmlns')); 2184 | } 2185 | } else { 2186 | //error_log('package has no xmlns attribute!!'); 2187 | } 2188 | 2189 | if($package->getElementsByTagName('dc-metadata')->length > 0) { 2190 | // old-style meta 2191 | $this->opf_metadataNode = $package->getElementsByTagName('dc-metadata')->item(0); 2192 | } else { 2193 | $this->opf_metadataNode = $package->getElementsByTagName('metadata')->item(0); 2194 | } 2195 | $this->opf_manifestNode = $package->getElementsByTagName('manifest')->item(0); 2196 | 2197 | 2198 | 2199 | 2200 | $this->uniqIDscheme = $package->getAttribute('unique-identifier'); 2201 | 2202 | if(!$this->uniqIDscheme) { 2203 | $this->uniqIDscheme = 'PrimaryID'; 2204 | $package->setAttribute('unique-identifier', $this->uniqIDscheme); 2205 | } 2206 | 2207 | if(!$opf->getElementsByTagName('identifier')->item(0)) { // no identifiers at all!! 2208 | $newval = 'urn:uuid:'.uuidGen::generateUuid(); 2209 | $id = $this->opf_metadataNode->appendChild($opf->createElement('identifier')); 2210 | $id->appendChild($opf->createTextNode($newval)); 2211 | $id->setAttribute('id', $this->uniqIDscheme); 2212 | $this->uniqIDval = $newval; 2213 | } 2214 | 2215 | foreach($opf->getElementsByTagName('identifier') as $id) { 2216 | if($id->getAttribute('id')==$this->uniqIDscheme) { 2217 | $this->uniqIDval = trim($id->nodeValue); 2218 | } 2219 | } 2220 | 2221 | if(!strlen($this->uniqIDval)>1) { 2222 | throw new Exception("EPUB file must have a globally unique identifier such as a GUID."); 2223 | } 2224 | 2225 | $this->opf_spineNode = $package->getElementsByTagName('spine')->item(0); 2226 | if(!$this->opf_metadataNode || !$this->opf_manifestNode || !$this->opf_spineNode) { 2227 | throw new Exception('this opf structure is incomplete'); 2228 | } 2229 | } 2230 | 2231 | 2232 | 2233 | $this->opfXP = new DomXpath($opf); 2234 | $this->opfXP->registerNamespace("opfns", $this->opfNS); 2235 | $this->opfXP->registerNamespace("dc", $this->dcNS); 2236 | return $opf; 2237 | } 2238 | 2239 | 2240 | private function _setNavLabel($node, $labeltext) 2241 | { 2242 | // creates and returns a navLabel with the given text 2243 | 2244 | $label = $this->ncx->createElement('navLabel'); 2245 | $text = $this->ncx->createElement('text'); 2246 | $domtext = new DOMText($labeltext); 2247 | $text->appendChild($domtext); 2248 | $label->appendChild($text); 2249 | $node->appendChild($label); 2250 | return $node; 2251 | } 2252 | 2253 | private function _createNcx() 2254 | { 2255 | // creates the ncx, adding a ref to it in the spine el 2256 | // also gives its nav map a label 2257 | 2258 | $this->addItem('ncx', $this->ncxpath, $this->ncxmime, null); 2259 | $this->_getSpineEl()->setAttribute('toc', 'ncx'); 2260 | // the following causes validation errors: 2261 | //$this->_setNavLabel($this->_getNavMapEl(), $this->navmaplabel); 2262 | } 2263 | 2264 | private function _ncxMeta($aname, $aval, $ncx = null) 2265 | { 2266 | // creates a meta el for the ncx 2267 | if($ncx==null) $ncx = $this->ncx; 2268 | $meta = $ncx->createElement('meta'); 2269 | $meta->setAttribute('name', $aname); 2270 | $meta->setAttribute('content', $aval); 2271 | return $meta; 2272 | } 2273 | 2274 | private function _createNavPoint($id, $heading=null, $src=null, $class='section') 2275 | { 2276 | // creates a nav point for the navMap and returns it 2277 | 2278 | if($this->ncx->getElementById($id)) { 2279 | throw new Exception('item with id '.$id.' already exists in the ncx'); 2280 | } 2281 | $playorder = $this->_getNavMapEl()->getElementsByTagName('navPoint')->length + 1; 2282 | //$this->logerr('playorder will be '+$playorder); 2283 | $navpoint = $this->ncx->createElement('navPoint'); 2284 | $navpoint->setAttribute('id', $id); 2285 | $navpoint->setIdAttribute('id', true); 2286 | $navpoint->setAttribute('playOrder', $playorder); 2287 | $navpoint->setAttribute('class', $class); 2288 | if($heading && $src) { 2289 | $navpoint = $this->_setNavLabel($navpoint, $heading); 2290 | $content = $this->ncx->createElement('content'); 2291 | $content->setAttribute('src', $src); 2292 | $navpoint->appendChild($content); 2293 | } 2294 | return $navpoint; 2295 | } 2296 | 2297 | private function _createItem($id, $href, $mediatype) 2298 | { 2299 | // creates an item for the manifest el and returns it 2300 | 2301 | if($this->opf->getElementById($id)) { 2302 | throw new Exception('item with id '.$id.' already exists in the manifest'); 2303 | } 2304 | $item = $this->opf->createElement('item'); 2305 | $item->setAttribute('id', $id); 2306 | $item->setIdAttribute('id', true); 2307 | $item->setAttribute('href', $href); 2308 | $item->setAttribute('media-type', $mediatype); 2309 | return $item; 2310 | } 2311 | 2312 | private function _createItemRef($idref, $linear) 2313 | { 2314 | // creates an itemref for the spine el and returns it 2315 | // returns an unattached itemref element 2316 | 2317 | if($this->opf->getElementById($idref)) { // requires item to exist already 2318 | $itemref = $this->opf->createElement('itemref'); 2319 | $itemref->setAttribute('idref', $idref); 2320 | $itemref->setAttribute('linear', $linear); 2321 | return $itemref; 2322 | } else { 2323 | throw new Exception('itemrefs require items with that id to exist, and '.$idref.' no existy'); 2324 | } 2325 | } 2326 | 2327 | 2328 | 2329 | /** 2330 | Builders - 2331 | some look like simple setters but 2332 | they are actually additive 2333 | */ 2334 | 2335 | 2336 | 2337 | public function addTitle($title) 2338 | { 2339 | // stub: will add another dctitle el 2340 | $this->setMeta('title',$title); 2341 | } 2342 | 2343 | public function setTitle($title) 2344 | { 2345 | if(!$this->opfXP) return null; 2346 | $q = $this->opfXP->query('//dc:title', $this->opf_metadataNode); 2347 | if($q->length>0) { 2348 | foreach($q as $node) { 2349 | $node->parentNode->removeChild($node); 2350 | } 2351 | } 2352 | $this->addTitle($title); 2353 | $this->title = $title; 2354 | } 2355 | 2356 | public function addAuthor($author) 2357 | { 2358 | $this->setMeta('creator', $author, 'aut'); 2359 | } 2360 | 2361 | public function setAuthor($author) 2362 | { 2363 | // purge any existing with role 'aut' 2364 | if($q = $this->getDcCreators()) { 2365 | foreach($q as $cr) { 2366 | if($cr->getAttribute('role')=='aut') { 2367 | if($cr->parentNode->removeChild($cr)) { 2368 | 2369 | } 2370 | } 2371 | } 2372 | } 2373 | $this->addAuthor($author); 2374 | } 2375 | 2376 | public function setIsbn($isbn) 2377 | { 2378 | $this->_setIdentifier($isbn, 'isbn', false); 2379 | } 2380 | 2381 | public function setPublisher($pub) 2382 | { 2383 | $this->addMeta('publisher', $pub); 2384 | } 2385 | public function setDescription($des) 2386 | { 2387 | $this->addMeta('description', $des); 2388 | } 2389 | public function setOriginalPubdate($date) 2390 | { 2391 | $this->addMeta('date', $date, 'original-publication'); 2392 | } 2393 | public function setOpsPubdate($date) 2394 | { 2395 | $this->addMeta('date', $date, 'ops-publication'); 2396 | } 2397 | public function setLanguage($lang) 2398 | { 2399 | $this->addMeta('language', $lang); 2400 | } 2401 | public function addSubject($sub) 2402 | { 2403 | $this->addMeta('subject', $sub); 2404 | } 2405 | public function setRights($rights) 2406 | { 2407 | $this->addMeta('rights', $rights); 2408 | } 2409 | 2410 | public function getIsbn() 2411 | { 2412 | foreach($this->opf->getElementsByTagName('identifier') as $id) { 2413 | if(strtolower($id->getAttribute('scheme'))=='isbn' || strtolower($id->getAttribute('scheme'))=='isbn13') { 2414 | return $id->nodeValue; 2415 | } 2416 | } 2417 | return null; 2418 | } 2419 | 2420 | public function replacePrimaryIdValue($newval) 2421 | { 2422 | if(!$this->opf->getElementsByTagName('identifier')->item(0)) { 2423 | $id = $this->opf_metadataNode->appendChild($this->opf->createElement('identifier')); 2424 | $id->appendChild($this->opf->createTextNode($newval)); 2425 | $id->setAttribute('id', $this->uniqIDscheme); 2426 | $this->uniqIDval = $newval; 2427 | } else { 2428 | 2429 | foreach($this->opf->getElementsByTagName('identifier') as $id) { 2430 | if($id->getAttribute('id')==$this->uniqIDscheme) { 2431 | foreach($id->childNodes as $child) { 2432 | $id->removeChild($child); 2433 | } 2434 | $id->appendChild($this->opf->createTextNode($newval)); 2435 | $this->uniqIDval = $newval; 2436 | } 2437 | } 2438 | 2439 | } 2440 | } 2441 | 2442 | 2443 | 2444 | private function _setIdentifier($content=null, $scheme=null, $isprimary=true) 2445 | { 2446 | $uuid=null; 2447 | if(!$content) { 2448 | $uuid = uuidGen::generateUuid(); 2449 | $scheme = "URN"; 2450 | $content = 'urn:uuid:'.$uuid; 2451 | } 2452 | $dcidentifier = $this->opf->createElement('dc:identifier', $content); 2453 | if($scheme) { 2454 | $dcidentifier->setAttributeNS($this->opfNS, 'opf:scheme', $scheme); 2455 | } 2456 | if($isprimary) { 2457 | $dcidentifier->setAttribute('id', $this->uniqIDscheme); 2458 | $this->uniqIDval = $content; 2459 | } 2460 | $this->opf_metadataNode->appendChild($dcidentifier); 2461 | if($uuid) { 2462 | $this->ncx_headNode->appendChild($this->_ncxMeta('dtb:uid', $uuid)); 2463 | } else { 2464 | $this->ncx_headNode->appendChild($this->_ncxMeta('dtb:uid', $content)); 2465 | } 2466 | } 2467 | 2468 | public function hasLang() 2469 | { 2470 | 2471 | } 2472 | 2473 | public function setMeta($name, $content, $variant=null) 2474 | { 2475 | $this->addMeta($name, $content, $variant); 2476 | } 2477 | 2478 | public function addMeta($name, $content, $variant=null) 2479 | { 2480 | 2481 | array_push($this->dcdata, array( 2482 | 2483 | $name,$content,$variant 2484 | 2485 | )); 2486 | // opf -- replace first if there, otherwise add 2487 | $dcel = $this->opf->createElement('dc:'.$name, $content); 2488 | if($variant) { 2489 | if($name=='creator' || $name=='contributor') { 2490 | $dcel->setAttributeNS($this->opfNS, 'opf:role', $variant); 2491 | } else if($name=='date') { 2492 | $dcel->setAttributeNS($this->opfNS, 'opf:event', $variant); 2493 | } 2494 | } 2495 | $this->opf_metadataNode->appendChild($dcel); 2496 | } 2497 | 2498 | public function addCoverMeta($content) 2499 | { 2500 | $cover = $this->opf->createElement('meta'); 2501 | $cover->setAttribute('name', 'cover'); 2502 | $cover->setAttribute('content', $content); 2503 | $this->opf_metadataNode->appendChild($cover); 2504 | } 2505 | 2506 | public function addItem($id, $href, $mediatype, $content=null, $linear=null, $fallback=null) 2507 | { 2508 | // adds an item to the manifest 2509 | // 2510 | // if a content string is passed, 2511 | // also creates a file from that content 2512 | // and adds it to the zip queue 2513 | // 2514 | // if linear is set to either 'yes' 2515 | // or 'no', a spine ref will be created 2516 | // with a linear attribute set to that value 2517 | // 2518 | // note: spine refs to non content docs not allowed 2519 | 2520 | 2521 | $this->logerr('BookGluttonEpub->addItem:'.$href, 4); 2522 | 2523 | if($id=='ncx') $linear = null; 2524 | // we don't want ncx in the main content root, keep it level with opf 2525 | $href = (strlen($this->opsrel)>0 && $id!='ncx') ? "$this->opsrel/".$href : $href; 2526 | $this->_getManifestEl()->appendChild($this->_createItem($id, $href, $mediatype)); 2527 | if($content) { 2528 | 2529 | //$this->logerr('this has content payload'); 2530 | //$this->logerr('href is '.$href); 2531 | $path = pathinfo($href); 2532 | //$this->logerr('relative path to this href is '.$path['dirname']); 2533 | $relpath = $path['dirname']; 2534 | 2535 | 2536 | //$this->logerr('checking for dir:'.$this->opspath . '/' . $relpath); 2537 | 2538 | if(!file_exists($this->opspath . '/' . $relpath)) { 2539 | //$this->logerr('creating directory:'.$this->opspath . '/' . $relpath); 2540 | if(!mkdir($this->opspath . '/' . $relpath, 0755, true)) { 2541 | $this->logerr('could not make directory:'.$this->opspath . '/' . $relpath, 0); 2542 | } 2543 | } 2544 | if(!file_put_contents($this->opspath . '/' . $href , $content)) { 2545 | $this->logerr('Could not put file:'.$this->opspath . '/'. $href, 0); 2546 | } else { 2547 | $this->logerr('adding '.$this->opspath . '/' . $href.' to zip'); 2548 | $this->zipQ[$href]=$this->opspath . '/' . $href; 2549 | } 2550 | } 2551 | if($linear) { // linear must be null for non-content items! 2552 | //$this->logerr('adding spine ref:'.$id); 2553 | $this->addSpineRef($id, $linear); 2554 | } 2555 | } 2556 | 2557 | public function addNavItem($id, $heading, $src, $class, $np=null) 2558 | { 2559 | // pass in a navpoint np to attach to that instead of navmap 2560 | $src = (strlen($this->opsrel)>0) ? "$this->opsrel/".$src : $src; 2561 | if(!$np) { 2562 | $this->_getNavMapEl()->appendChild($this->_createNavPoint($id, $heading, $src, $class)); 2563 | } else { 2564 | $np->appendChild($this->_createNavPoint($id, $heading, $src, $class)); 2565 | } 2566 | } 2567 | 2568 | public function addSpineRef($idref, $linear='yes') 2569 | { 2570 | $this->_getSpineEl()->appendChild($this->_createItemRef($idref, $linear)); 2571 | } 2572 | 2573 | private function _addCoverItem() 2574 | { 2575 | if($this->includecover != false) { 2576 | $this->addHTMLItem($this->includecover); 2577 | //$this->logerr('adding cover item HTML:'.print_r($this->includecover, true)); 2578 | $this->includecover = false; 2579 | } 2580 | } 2581 | 2582 | 2583 | public function addHTMLItem($page) { 2584 | 2585 | /** 2586 | Takes an html page object, with an assets key 2587 | that contains an array of assets for the page, 2588 | adds page to manifest and spine and assets as 2589 | out of spine items. Links to page in ncx with 2590 | label. Page object is keyed array with id, 2591 | html, and assets keys. Each asset is keyed with 2592 | id, relpath (relative to html), mimetype, and content: 2593 | 2594 | id=> 2595 | relpath=> 2596 | mimetype=> 2597 | content=> 2598 | 2599 | */ 2600 | 2601 | $this->logerr('BookGLuttonEpub->addHTMLItem', 4); 2602 | 2603 | $this->addItem( 2604 | 2605 | $page['id'], 2606 | $page['id'] .'.html', 2607 | 'application/xhtml+xml', 2608 | $page['html'], 2609 | 'yes'); 2610 | 2611 | $this->addNavItem($page['id'], $page['label'], $page['id'] .'.html', 'cover'); 2612 | 2613 | 2614 | if(count($page['assets'])>0) { 2615 | foreach($page['assets'] as $asset) { 2616 | 2617 | $this->addItem( 2618 | $asset['id'], 2619 | $asset['relpath'], 2620 | $asset['mimetype'], 2621 | $asset['content'] 2622 | ); 2623 | } 2624 | } 2625 | $this->_saveMeta(); 2626 | } 2627 | 2628 | public function prependItem($id, $href, $mediatype, $content=null, $linear=null, $fallback=null) 2629 | { 2630 | // prepends an item to the manifest 2631 | if(!$this->opfXP) return null; 2632 | $items = $this->opfXP->evaluate('//item', $this->opf_manifestNode); 2633 | 2634 | 2635 | /* 2636 | 2637 | insertBefore is fucking broken if you get the nodelist using XPath 2638 | it inserts at the end of the nodelist instead of the beginning if 2639 | you reference item(0) 2640 | 2641 | using getElementsByTagName returns the proper behaviors 2642 | 2643 | 2644 | 2645 | 2646 | */ 2647 | 2648 | 2649 | $this->logerr($items->length.' items in manifest'); 2650 | 2651 | $items = $this->opf->getElementsByTagName('item'); 2652 | 2653 | $href = (strlen($this->opsrel)>0) ? "$this->opsrel/".$href : $href; 2654 | $this->_getManifestEl()->insertBefore($this->_createItem($id, $href, $mediatype), $items->item(0)); 2655 | 2656 | if($content) { 2657 | $path = pathinfo($href); 2658 | $relpath = $path['dirname']; 2659 | 2660 | if(!file_exists($this->opspath . '/' . $relpath)) { 2661 | //$this->logerr('creating directory:'.$this->opspath . '/' . $relpath); 2662 | if(!mkdir($this->opspath . '/' . $relpath, 0755, true)) { 2663 | $this->logerr('could not make directory:'.$this->opspath . '/' . $relpath); 2664 | } 2665 | } 2666 | if(!file_put_contents($this->opspath . '/' . $href , $content)) { 2667 | $this->logerr('Could not put file:'.$this->opspath . '/'. $href); 2668 | } else { 2669 | //$this->logerr('adding '.$this->opspath . '/' . $href.' to zip'); 2670 | $this->zipQ[$href]=$this->opspath . '/' . $href; 2671 | } 2672 | } 2673 | if($linear) { // linear must be null for non-content items! 2674 | //$this->logerr('adding spine ref:'.$id); 2675 | $this->prependSpineRef($id, $linear); 2676 | } 2677 | } 2678 | 2679 | public function prependHTMLItem($page) { 2680 | 2681 | /** 2682 | Like above, except prepends it in the manifest, 2683 | spine, and ncx 2684 | */ 2685 | //$this->logerr('prepending an HTML item and its assets...'); 2686 | $this->prependItem( 2687 | 2688 | $page['id'], 2689 | $page['id'] .'.html', 2690 | 'application/xhtml+xml', 2691 | $page['html'], 2692 | 'yes'); 2693 | 2694 | $this->prependNavItem($page['id'], $page['label'], $page['id'] .'.html', 'cover'); 2695 | 2696 | if(count($page['assets'])>0) { 2697 | foreach($page['assets'] as $asset) { 2698 | $this->addItem( 2699 | $asset['id'], 2700 | $asset['relpath'], 2701 | $asset['mimetype'], 2702 | $asset['content'] 2703 | ); 2704 | } 2705 | } 2706 | //$this->logerr('saving...'); 2707 | $this->_saveMeta(); 2708 | } 2709 | 2710 | public function prependCover($cover) 2711 | { 2712 | $this->prependHTMLItem($cover); 2713 | } 2714 | 2715 | public function prependSpineRef($idref, $linear='yes') 2716 | { 2717 | $this->_getSpineEl()->insertBefore($this->_createItemRef($idref, $linear), $this->opf->getElementsByTagName('itemref')->item(0)); 2718 | } 2719 | 2720 | public function prependNavItem($id, $heading, $src, $class) 2721 | { 2722 | $this->logerr('prependNavItem'); 2723 | $nmap = $this->_getNavMapEl(); 2724 | $src = (strlen($this->opsrel)>0) ? "$this->opsrel/".$src : $src; 2725 | $newnav = $this->_createNavPoint($id, $heading, $src, $class); 2726 | //$this->logerr('new nav has label '.$newnav->nodeValue); 2727 | 2728 | // DO NOT try to use the xpath parser to do this! 2729 | 2730 | $navs = $nmap->getElementsByTagName('navPoint'); 2731 | 2732 | $inserted = $nmap->insertBefore($newnav, $navs->item(0)); 2733 | 2734 | //$this->logerr('rebuilding'); 2735 | $this->_rebuildNcx(); 2736 | 2737 | // TODO : regenerate ncx from scratch here, because document order differs from play order 2738 | 2739 | } 2740 | 2741 | public function replaceItemById($id, $replacement, $savechanges=true) 2742 | { 2743 | // keeps filename but replaces content of item chosen by id 2744 | 2745 | $item = $this->_getItemElById($id); 2746 | if(!$item) { 2747 | $this->logerr("item with id $id not found. dumping item refs:".print_r($this->getItemRefs(), true),2); 2748 | return; 2749 | } 2750 | $item->setAttribute('media-type', $replacement['media-type']); 2751 | //$item->setAttribute('fallback', $replacement['fallback']); 2752 | if($savechanges) { 2753 | //$this->logerr('saving changes to filesystem'); 2754 | // replace in filesystem 2755 | if(strlen($replacement['content'])>0) { 2756 | //$this->logerr('we do have content, so we write it'); 2757 | $itempath = $this->opspath .'/'.$item->getAttribute('href'); 2758 | if(!file_put_contents($itempath, $replacement['content'])) { 2759 | $this->logerr('could not write replacement content to:'.$itempath,0); 2760 | } else { 2761 | $this->logerr('wrote replacement content to:'.$itempath, 1); 2762 | } 2763 | } 2764 | 2765 | } 2766 | return $item; 2767 | } 2768 | 2769 | 2770 | 2771 | 2772 | 2773 | /** Helpers - for file and archive operations */ 2774 | 2775 | 2776 | 2777 | private function _getMimeFromExt($src) 2778 | { 2779 | $pi = pathinfo($src); 2780 | @$ext = $pi['extension']; 2781 | switch(strtolower($ext)) 2782 | { 2783 | case 'svg': 2784 | return 'image/svg+xml'; 2785 | case 'png': 2786 | return 'image/png'; 2787 | case 'jpg': 2788 | return 'image/jpeg'; 2789 | case 'jpeg': 2790 | return 'image/jpeg'; 2791 | case 'gif': 2792 | return 'image/gif'; 2793 | case 'ttf': 2794 | return 'application/x-font-ttf'; 2795 | case 'otf': 2796 | return 'application/x-font-otf'; 2797 | case 'xml': 2798 | return 'application/xml'; 2799 | case 'html': 2800 | return 'application/xhtml+xml'; 2801 | case 'xhtml': 2802 | return 'application/xhtml+xml'; 2803 | case 'htm': 2804 | return 'application/xhtml+xml'; 2805 | case 'pdf': 2806 | return 'application/pdf'; 2807 | case 'css': 2808 | return 'text/css'; 2809 | case 'swf': 2810 | return 'application/x-shockwave-flash'; 2811 | default: 2812 | return 'application/octet-stream'; 2813 | } 2814 | } 2815 | 2816 | 2817 | 2818 | private function _getRelPathWithOpf($root) 2819 | { 2820 | // given an opf full-path from content.xml, this returns the 2821 | // relative path portion and the filename 2822 | 2823 | return $this->_getRelPathToContent($root); 2824 | 2825 | } 2826 | 2827 | private function _getRelPathToContentRelToOpf($cfile) 2828 | { 2829 | // determines the relative path to a content file as 2830 | // the inclusion between the relpath to opf and the 2831 | // path to the content file (eg node steps in the tree) 2832 | // between the two of them, as a relative path 2833 | 2834 | 2835 | $rel = $this->_getRelPathToContent($cfile); 2836 | 2837 | if(@$this->relpath==='' || @$this->relpath===null) return $rel[0]; 2838 | 2839 | $thispath = explode('/',$rel[0]); 2840 | $opfpath = explode('/', $this->relpath); 2841 | 2842 | 2843 | 2844 | return implode('/',array_diff($thispath, $opfpath)); 2845 | 2846 | 2847 | } 2848 | 2849 | private function _getRelPathToContent($root) 2850 | { 2851 | // determines the relative path to a content file 2852 | $this->logerr('trying to determine relative path to '.$root, 2); 2853 | $pi = explode('/', $root); // utf8 filenames okay 2854 | $ret = array(); 2855 | $ret[0] = ''; // default for relpath is empty string 2856 | if(count($pi)==1) { // exploding on separator only returned one thing 2857 | $ret[1] = $pi[0]; // the filename 2858 | return $ret; 2859 | } else if(count($pi)>1) { // more than one piece 2860 | $this->logerr('more than one item in path explosion', 2); 2861 | $ret[1] = array_pop($pi); // last one should always be filename 2862 | $this->logerr('filename is '.$ret[1], 2); 2863 | $parts = array(); 2864 | foreach($pi as $part) { // iterate on what's left of path steps 2865 | if($part != null && $part != "") { // check to make sure it's valid 2866 | $parts[] = $part; //filter to store only valid steps in path 2867 | } 2868 | } 2869 | if(count($parts)>1) { // if we have more than one, join with sep 2870 | $ret[0] = implode("/", $parts); 2871 | } else { 2872 | if(count($parts)==1) { // only one, just assign it 2873 | $this->logerr('only one part left:'.$parts[0], 2); 2874 | $ret[0] = $parts[0]; 2875 | } 2876 | } 2877 | $this->logerr('returning '.print_r($ret, true), 2); 2878 | return $ret; 2879 | } else { 2880 | return false; 2881 | } 2882 | 2883 | } 2884 | 2885 | private function _validID($idstr=null) 2886 | { 2887 | 2888 | if($idstr==null) { 2889 | $idstr = uniqid(); 2890 | } 2891 | 2892 | if(!preg_match('/^[A-Za-z]/', $idstr)) { 2893 | $idstr = 'ID'.$idstr; 2894 | } 2895 | 2896 | $idstr = preg_replace('/[^A-Za-z0-9:_.-]/', '', $idstr); 2897 | 2898 | $idstr = $idstr . uniqid(); 2899 | 2900 | return $idstr; 2901 | 2902 | } 2903 | 2904 | private function _isBinaryType($mime) 2905 | { 2906 | if(preg_match('/xml$/', $mime)) { 2907 | return false; 2908 | } else if (preg_match('/(png|jpeg|gif|pdf|flash|stream)$/',$mime)) { 2909 | return true; 2910 | } else { 2911 | return false; 2912 | } 2913 | } 2914 | 2915 | /** Writers - for file and archive operations */ 2916 | 2917 | 2918 | private function _makeDirs() 2919 | { 2920 | DiskUtil::assertPath($this->packagepath); 2921 | DiskUtil::assertPath($this->metapath); 2922 | DiskUtil::assertPath($this->opspath); 2923 | DiskUtil::assertPath($this->opspath."/".$this->opsrel); 2924 | } 2925 | 2926 | private function _writeFile($loc, $contents) 2927 | { 2928 | if(!@file_put_contents($loc, $contents, LOCK_EX)) { 2929 | 2930 | $me = posix_getpwuid(posix_getuid()); 2931 | $mename = $me['name']; 2932 | $user = posix_getpwuid(fileowner($loc)); 2933 | throw new Exception("Do not have write permission for this action. Script is running as $mename but owner is ".$user['name']." and perms are ".substr(sprintf('%o', fileperms($loc)), -4)); 2934 | } 2935 | } 2936 | 2937 | public function writeFile($file) 2938 | { 2939 | /** 2940 | Raw writes to path in package, overwriting existing file with new content, 2941 | or creating new file with content 2942 | */ 2943 | 2944 | // make sure dirs exist 2945 | $dd = pathinfo($file['path']); 2946 | $pi = explode('/', $dd['dirname']); 2947 | $fullpath = $this->packagepath; 2948 | foreach($pi as $step) { 2949 | $fullpath = $fullpath . '/'.$step; 2950 | DiskUtil::makeDir($fullpath); 2951 | } 2952 | if(!file_put_contents($this->packagepath . '/'. $file['path'], $file['content'])) { 2953 | $this->logerr('unable to write to '.$this->packagepath . '/'. $file['path'], 0); 2954 | } else { 2955 | //chmod($this->packagepath . '/'. $file['path'], 0755); 2956 | //error_log("wrote to ".$this->packagepath . '/'. $file['path']); 2957 | } 2958 | 2959 | } 2960 | 2961 | public function writeOPS() 2962 | { 2963 | if($this->readonly) return; 2964 | $this->_saveMeta(); 2965 | } 2966 | 2967 | public function save($filename=null) 2968 | { 2969 | if($this->readonly) return; 2970 | if(!$filename) throw new Exception('Save requires a filename '); 2971 | $this->starttime = time(); 2972 | $this->_saveMeta(); 2973 | $saved = $this->_makeEpubTarget($this->packagepath.'.epub'); 2974 | DiskUtil::xRename($saved, $filename); 2975 | } 2976 | 2977 | public function _saveMeta() 2978 | { 2979 | if($this->readonly) return; 2980 | // error_log('writing to '.$this->packagepath . '/'.$this->opfpath); 2981 | // error_log('and '.$this->packagepath . '/'. $this->ncxpath); 2982 | $this->_prepPretty(); 2983 | $this->_writeFile($this->mimetypepath, $this->getMimetypeString()); 2984 | $this->opf->save($this->packagepath . '/'.$this->opfpath); 2985 | $this->ncx->save($this->packagepath . '/'. $this->ncxpath); 2986 | $this->contdoc->save($this->metapath.'/container.xml'); 2987 | $this->logerr('saved',2); 2988 | /* 2989 | $docauthor = $this->ncx->getElementsByTagName('docAuthor')->item(0); 2990 | 2991 | if($docauthor) { 2992 | try { 2993 | $this->ncx->removeChild($docauthor); 2994 | } catch (Exception $e) { 2995 | error_log('caught non-fatal Exception trying to remove docAuthor node: '.$e->getMessage()); 2996 | error_log('saved anyway. ncx is '.$this->packagepath . '/'. $this->ncxpath); 2997 | } 2998 | } 2999 | */ 3000 | } 3001 | 3002 | public function moveOps($workpath, $opsname=null) 3003 | { 3004 | if($this->readonly) return; 3005 | return $this->_moveOps($workpath, $opsname); 3006 | } 3007 | 3008 | public function _moveOps($workpath, $opsname=null, $savefirst=true) 3009 | { 3010 | if($this->readonly) return; 3011 | // takes new workpath and new ops directory name and tries 3012 | // to move the whole structure to the target [workpath + '/' + opsname] 3013 | //error_log('moving ops'); 3014 | 3015 | if($savefirst==true) { 3016 | //error_log('saving metadata first'); 3017 | try { 3018 | $this->_saveMeta(); 3019 | } catch (Exception $e) { 3020 | // error_log('caught Exception: '.$e->getMessage()); 3021 | } 3022 | } 3023 | if($opsname==null) $opsname = uniqid(); // new uniqid for path 3024 | 3025 | if(!DiskUtil::xRename($this->packagepath, $workpath . '/' . $opsname)) { // fail, try to backup existing first 3026 | $processUser = posix_getpwuid(posix_geteuid()); 3027 | error_log("failed to rename working package path ".$this->packagepath." to $workpath/$opsname. Does the web server have write permissions there? Script user is ".get_current_user()." and process owner is ".$processUser['name']); 3028 | $bkp = $workpath . '/' . $opsname.".BACKUP".time(); 3029 | if(!DiskUtil::xRename($workpath . '/' . $opsname, $bkp)) { 3030 | throw new Exception('could not move ops path from '.$this->packagepath.' to '.$bkp.'!!!'); 3031 | } else { // now try it 3032 | if(!@DiskUtil::xRename($this->packagepath, $workpath . '/' . $opsname)) { 3033 | throw new Exception('could not backup and move ops path from '.$this->packagepath.' to '.$opsname.'!!!'); 3034 | } 3035 | } 3036 | } else { 3037 | 3038 | error_log('renamed OPS structure'); 3039 | 3040 | $this->opsname = $opsname; 3041 | $this->workpath = $workpath; 3042 | $this->packagepath = $this->workpath . '/' . $this->opsname; // this will be the working package dir (ops) 3043 | $this->mimetypepath = $this->packagepath . '/mimetype'; // filename of mimetype file 3044 | $this->metapath = $this->packagepath . '/META-INF'; 3045 | $this->opspath = $this->packagepath; 3046 | } 3047 | } 3048 | 3049 | public function close() 3050 | { 3051 | 3052 | /* only call when you're ready to destroy the object!!! */ 3053 | $junk = $this->download(); 3054 | 3055 | 3056 | /* 3057 | in some operations, we open up epub files from a read-only 3058 | source, dump their OPS structures to a temporary location, 3059 | use that as a file store while acting on the structure, 3060 | then remove those (sometimes after copying the file store 3061 | to make it permanent. in these operations, we need to 3062 | clean up after ourselves, or we will have used (2 * q * n) 3063 | MB of disk space up. 3064 | 3065 | */ 3066 | // clear the package path if it's temporary 3067 | $tmpdir = DiskUtil::getTempDir(); 3068 | error_log('tmpdir is '.$tmpdir.' and package dir is '.$this->packagepath); 3069 | $regex = "`^".preg_quote($tmpdir)."`"; 3070 | //error_log('regex is '.$regex); 3071 | if(preg_match($regex, $this->packagepath)) { 3072 | error_log('closing and removing tmp cache at '.$this->packagepath); 3073 | exec('rm -rf '.$this->packagepath); 3074 | if(is_dir($this->packagepath)) { 3075 | error_log('tmp package path not removed'); 3076 | } 3077 | } 3078 | 3079 | 3080 | } 3081 | 3082 | 3083 | public function download() 3084 | { 3085 | /** 3086 | 3087 | Download whole book as epub file. 3088 | 3089 | */ 3090 | return $this->read(); 3091 | } 3092 | 3093 | public function dump() 3094 | { 3095 | return $this->read(); 3096 | } 3097 | 3098 | public function read() 3099 | { 3100 | /** 3101 | 3102 | Synonym for download. May get rolled into it. 3103 | 3104 | */ 3105 | try { // may not have write permissions 3106 | $this->_saveMeta(); 3107 | } catch (Exception $e) { 3108 | //error_log($e->getMessage()); 3109 | } 3110 | //error_log('proceeding with epub creation'); 3111 | $arcname = $this->_makeEpubTarget(); 3112 | $ret = file_get_contents($arcname); 3113 | unlink($arcname); 3114 | return $ret; 3115 | 3116 | } 3117 | 3118 | private function _makeZipContainer($arcname) 3119 | { 3120 | if($this->readonly) return; 3121 | $packagedir = $this->packagepath; 3122 | $zip = ZIP_LOC; // path to zip command 3123 | $zipflags = '-0 -j -X'; 3124 | $zipcmd = "$zip $zipflags $arcname $this->mimetypepath"; 3125 | if(!file_exists($this->mimetypepath)) { 3126 | $this->logerr('***mimetype file does not exist! attempting to fix this...'); 3127 | $this->_writeFile($this->mimetypepath, $this->getMimetypeString()); 3128 | 3129 | if(file_exists($this->mimetypepath)) { 3130 | $this->logerr('success!'); 3131 | } else { 3132 | throw new Exception('could not create a mimetype file for this ops structure'); 3133 | } 3134 | } 3135 | //$this->logerr('executing:'.$zipcmd); 3136 | exec(escapeshellcmd($zipcmd), $output); 3137 | //$this->logerr('output was:'.print_r($output, true)); 3138 | 3139 | } 3140 | 3141 | 3142 | private function _getZipArchive($arcname) 3143 | { 3144 | 3145 | $zip = new ZipArchive(); 3146 | if($zip->open($arcname)!==TRUE) { 3147 | $this->logerr("cannot open <$arcname>"); 3148 | } 3149 | return $zip; 3150 | 3151 | } 3152 | 3153 | private function _makeEpubTarget($arcname=null) 3154 | { 3155 | 3156 | if($this->readonly) return; 3157 | if($arcname==null) $arcname = DiskUtil::getTempDir().'/download'.uniqid(time()).'.epub'; 3158 | 3159 | 3160 | 3161 | // STEP ONE: 3162 | 3163 | // start a zip file with only uncompressed mimetype file in it 3164 | //$this->logerr('makeEpubTarget called for '.$arcname); 3165 | 3166 | $packagedir = $this->packagepath; 3167 | 3168 | $this->_makeZipContainer($arcname); 3169 | 3170 | 3171 | // STEP TWO: 3172 | 3173 | // zip file according to the epub spec now has a single 3174 | // mimetype file, uncompressed, at the start of the archive 3175 | // 3176 | // file is now closed and can be opened again by zip handler 3177 | // the PHP zip handler does not allow you to specify storing 3178 | // uncompressed files 3179 | 3180 | $zip = $this->_getZipArchive($arcname); 3181 | 3182 | // asm: the following line causes problems reading these on stanza, so leave commented 3183 | $zip->addEmptyDir('META-INF'); 3184 | $dirnames = array(); 3185 | foreach($this->zipQ as $file=>$pathfile) { 3186 | $pi = pathinfo($file); 3187 | if($pi['dirname']!="." && $pi['dirname'] != "..") { 3188 | $dirnames[$pi['dirname']] = $pi['dirname']; 3189 | } 3190 | } 3191 | 3192 | $fullpath = ""; 3193 | // make sure dirs exist 3194 | foreach($dirnames as $dirpath=>$bool) { 3195 | $dirs = explode('/', $dirpath); 3196 | $fullpath = ""; 3197 | foreach($dirs as $step) { 3198 | if($fullpath=="") { 3199 | $fullpath = $step; 3200 | } else { 3201 | $fullpath = $fullpath . '/'.$step; 3202 | } 3203 | if(!$zip->statName($fullpath)) { 3204 | $zip->addEmptyDir($fullpath); 3205 | } 3206 | } 3207 | } 3208 | $zip->addFile("$packagedir/META-INF/container.xml", "META-INF/container.xml"); 3209 | $zip->addFile($packagedir . '/' . $this->opfpath, trim($this->opfpath, '/')); 3210 | 3211 | //$zip->addFile($packagedir . '/' . $this->ncxpath, trim($this->ncxpath, '/')); 3212 | 3213 | $filenum = 3; 3214 | foreach($this->zipQ as $file=>$pathfile) { 3215 | $filenum++; 3216 | $this->logerr("$filenum:$file:$pathfile"); 3217 | // with PHP, the zip extension is limited by 3218 | // the number of filehandles allowed by the 3219 | // system, so we have to close the zip 3220 | // and reopen it when that limit is reached 3221 | // see http://bugs.php.net/bug.php?id=40494 3222 | if($filenum > $this->ziphandle_limit) { 3223 | $zip->close(); 3224 | $zip->open($arcname); 3225 | $filenum = 1; 3226 | } 3227 | 3228 | 3229 | //error_log('adding contents for '.$file.' from file at '.$pathfile); 3230 | if(!file_exists($pathfile)) { 3231 | //error_log('epub id '.$this->getPrimaryId().': file does not exist: '.$pathfile); 3232 | } 3233 | if(!$zip->addFile($pathfile, $file)) { 3234 | //error_log('could not add file '.$file); 3235 | } 3236 | 3237 | } 3238 | $zip->close(); 3239 | //error_log('done creating epub archive '.$arcname); 3240 | return $arcname; 3241 | } 3242 | 3243 | 3244 | private function _prepPretty() 3245 | { 3246 | 3247 | $this->contdoc->formatOutput = TRUE; 3248 | $this->ncx->formatOutput = TRUE; 3249 | $this->opf->formatOutput = TRUE; 3250 | // no longer used 3251 | } 3252 | 3253 | private function _modifyItem($man_id, $newcontent, $savechanges=false) { 3254 | 3255 | } 3256 | 3257 | public function suppressPurify($bool=true) 3258 | { 3259 | $this->suppress_purify = $bool; 3260 | } 3261 | 3262 | 3263 | 3264 | 3265 | 3266 | 3267 | /** Errors - logger / TODO - need exception classes */ 3268 | 3269 | public function setLogLevel($lev) 3270 | { 3271 | if($lev>4) $lev = 4; 3272 | if($lev<0) $lev = 0; 3273 | $this->loglevel = $lev; 3274 | } 3275 | 3276 | public function logerr($msg, $level=0) 3277 | { 3278 | if($this->logverbose && $level <= $this->loglevel) { 3279 | error_log($msg); 3280 | } 3281 | } 3282 | 3283 | 3284 | public function setLogVerbose($bool) 3285 | { 3286 | $this->logverbose = $bool; 3287 | } 3288 | 3289 | 3290 | public function preflightReport($msg, $severity=0) 3291 | { 3292 | //error_log($msg); 3293 | $this->preflight[] = array($msg, $severity); 3294 | } 3295 | 3296 | public function getReport() 3297 | { 3298 | return $this->preflight; 3299 | } 3300 | 3301 | public function getValidationReport() 3302 | { 3303 | if($this->readonly) return; 3304 | if(!$this->tmpdump) { 3305 | $this->storeAsTmpdump(); 3306 | } 3307 | 3308 | //error_log('executing: '.$this->epubcheck . ' ' .$this->tmpdump); 3309 | 3310 | exec($this->epubcheck . ' ' .$this->tmpdump.' > /dev/stdout 2>&1', $output, $result); 3311 | 3312 | //error_log('result:"'.$result.'"'); 3313 | 3314 | return array($this->tmpdump, implode("\n",$output), $result); 3315 | } 3316 | 3317 | public function storeAsTmpdump() 3318 | { 3319 | if($this->readonly) return; 3320 | //error_log('storeAsTmpDump--saving metadata'); 3321 | $this->_saveMeta(); 3322 | //error_log('done saving to tmp'); 3323 | $this->tmpdump = $this->_makeEpubTarget(DiskUtil::getTempDir().'/_tmpepub_'.uniqid(time()).'.epub'); 3324 | //error_log('returning '.$this->tmpdump); 3325 | return $this->tmpdump; 3326 | } 3327 | 3328 | public function getTmpdumpName() 3329 | { 3330 | return $this->tmpdump; 3331 | } 3332 | 3333 | public function removeTmpdump() 3334 | { 3335 | if(file_exists($this->tmpdump)) { 3336 | return unlink($this->tmpdump); 3337 | } 3338 | return false; 3339 | } 3340 | 3341 | 3342 | /* BG only functions */ 3343 | 3344 | 3345 | 3346 | public function fixNcx() 3347 | { 3348 | $this->_rebuildNcx(); 3349 | } 3350 | 3351 | 3352 | 3353 | public function hasBGCover() { 3354 | foreach($this->getItemRefs() as $itemfile) { 3355 | if(preg_match('/cover/', $itemfile['href'])) { 3356 | //if(preg_match('//m', $itemfile['content'], $matches)) { 3357 | //echo ''; 3358 | // } 3359 | //$this->logerr('this has a new style cover'); 3360 | return $itemfile['id']; 3361 | } 3362 | if(preg_match('/title/', $itemfile['href'])) { 3363 | //$this->logerr('this has an old style cover, probably no mimetype file, and linked stylesheet with Adobe DE template'); 3364 | return $itemfile['id']; 3365 | } 3366 | } 3367 | return false; 3368 | } 3369 | 3370 | public function getBGCoverData() 3371 | { 3372 | $ret = false; 3373 | $refs = $this->getItemRefs(); 3374 | foreach($refs as $item) { 3375 | if( preg_match('/cover/', $item['href']) || preg_match('/title/', $item['href']) ) { 3376 | $ret = $this->_getItemDataUrl($item, false); 3377 | break; 3378 | } 3379 | } 3380 | return $ret; 3381 | } 3382 | 3383 | public function getRel($href) 3384 | { 3385 | $rel = $this->opsrel; 3386 | if(strlen($rel)>0) { 3387 | $href = $rel . '/' . $href; 3388 | } 3389 | return $href; 3390 | } 3391 | 3392 | public function replaceBGCover($id, $cover) { 3393 | //$this->logerr('replace bg cover '.$id); 3394 | //$this->addHTMLItem($cover); 3395 | $replacement = array( 3396 | 'media-type'=>'application/xhtml+xml', 3397 | 'content'=>$cover['html'], 3398 | 'fallback'=>null); 3399 | $item = $this->replaceItemById($id, $replacement, true); 3400 | $assets = $cover['assets']; 3401 | $cover = null; 3402 | 3403 | $rel = $this->_getRelPathToContentRelToOpf($item->getAttribute('href')); 3404 | 3405 | $this->logerr('*** rel path to this file is:'.$rel, 2); 3406 | $relpath = (strlen($rel)>0) ? "$rel/" : ""; 3407 | 3408 | foreach($assets as $asset) { 3409 | if($this->getItemById($asset['id'])) { 3410 | $this->replaceItemById($asset['id'], array( 3411 | 'media-type'=>$asset['mimetype'], 3412 | 'content'=>$asset['content'], 3413 | 'fallback'=>null 3414 | ), true); 3415 | } else { 3416 | $this->addItem( 3417 | $asset['id'], 3418 | $relpath.$asset['relpath'], 3419 | $asset['mimetype'], 3420 | $asset['content'] 3421 | ); 3422 | } 3423 | } 3424 | 3425 | $this->opf->save($this->packagepath . '/'.$this->opfpath); 3426 | $this->ncx->save($this->packagepath . '/'. $this->ncxpath); 3427 | //$this->logerr('saved new opf file, dumping cover data...'); 3428 | } 3429 | 3430 | 3431 | 3432 | private function _rebuildNcx() 3433 | { 3434 | 3435 | /** 3436 | converts ncx into mutable ordered structure 3437 | that can be written back out to a dom later 3438 | rebuilds ncx from structure 3439 | */ 3440 | 3441 | 3442 | $N = $this->ncxXP; 3443 | 3444 | $ncw = array('docTitle'=>@$N->evaluate('//docTitle')->item(0)->textContent, 3445 | 'docAuthor'=>@$N->evaluate('//docAuthor')->item(0)->textContent, 3446 | 'navMap'=>array()); 3447 | $nm = $N->evaluate('//nc:navMap')->item(0); 3448 | $ind = 0; 3449 | 3450 | 3451 | foreach($this->ncx_navMapNode->getElementsByTagName('navPoint') as $np) { 3452 | $ind++; 3453 | $uid = ($np->getAttribute('id')) ? $np->getAttribute('id') : $this->_validID('nav'); 3454 | $class = ($np->getAttribute('class')) ? $np->getAttribute('class') : 'section'; 3455 | $playorder = $ind; 3456 | $ncw['navMap'][] = array( 3457 | 'id'=>$uid, 3458 | 'playOrder'=>$playorder, 3459 | 'class'=>$class, 3460 | 'label'=>$np->getElementsByTagName('navLabel')->item(0)->nodeValue, 3461 | 'src'=>$np->getElementsByTagName('content')->item(0)->getAttribute('src'), 3462 | 'content'=>$np->getElementsByTagName('content')->item(0)->nodeValue 3463 | ); 3464 | } 3465 | //$this->logerr('processed and stored '.$ind.' navpoints from current ncx'); 3466 | 3467 | $ncx = $this->ncx; 3468 | $n = $ncx->getElementsByTagName('ncx')->item(0); 3469 | 3470 | $n->removeChild($this->ncx_headNode); 3471 | $head = $n->appendChild($ncx->createElement('head')); 3472 | $head->appendChild($this->_ncxMeta('dtb:depth', '1', $ncx)); 3473 | $head->appendChild($this->_ncxMeta('dtb:totalPageCount', '0', $ncx)); 3474 | $head->appendChild($this->_ncxMeta('dtb:maxPageNumber', '0', $ncx)); 3475 | $this->ncx_headNode = $head; 3476 | 3477 | $n->removeChild($this->ncx_docTitleNode); 3478 | $dt = $n->appendChild($ncx->createElement('docTitle')); 3479 | $dt->appendChild($ncx->createElement('text', $ncw['docTitle'])); 3480 | $this->ncx_docTitleNode = $dt; 3481 | 3482 | $n->removeChild($this->ncx_docAuthorNode); 3483 | $da = $n->appendChild($ncx->createElement('docAuthor')); 3484 | $da->appendChild($ncx->createElement('text', $ncw['docAuthor'])); 3485 | $this->ncx_docAuthorNode = $da; 3486 | 3487 | $nmap = $this->ncx_navMapNode; 3488 | 3489 | //$this->logerr($nmap->childNodes->length . ' nodes in navmap'); 3490 | 3491 | $nulltards = array(); 3492 | foreach($nmap->childNodes as $nc) 3493 | { 3494 | $nulltards[] = $nc; 3495 | } 3496 | foreach($nulltards as $dead) 3497 | { 3498 | $nmap->removeChild($dead); 3499 | } 3500 | 3501 | foreach($ncw['navMap'] as $ncwitem) { 3502 | $np = $ncx->createElement('navPoint'); 3503 | $np->setAttribute('id', $ncwitem['id']); 3504 | $np->setAttribute('class', $ncwitem['class']); 3505 | $np->setAttribute('playOrder', $ncwitem['playOrder']); 3506 | $npel = $nmap->appendChild($np); 3507 | $nl = $npel->appendChild($ncx->createElement('navLabel')); 3508 | 3509 | 3510 | //fix labels with misencoded entities 3511 | 3512 | if(preg_match('/&#39;/', $ncwitem['label'])) { 3513 | $ncwitem['label'] = preg_replace('/&#39;/', "'", $ncwitem['label']); 3514 | $this->logerr('**** fixed a bad label:'.$ncwitem['label'], 2); 3515 | } 3516 | 3517 | 3518 | 3519 | $nl->appendChild($ncx->createElement('text', $ncwitem['label'])); 3520 | $c = $npel->appendChild($ncx->createElement('content')); 3521 | $c->setAttribute('src', $ncwitem['src']); 3522 | } 3523 | $this->ncx_navMapNode = $nmap; 3524 | if($this->readonly) return; 3525 | $this->ncx->save($this->packagepath . '/'. $this->ncxpath); 3526 | } 3527 | 3528 | 3529 | /* No longer used but held here for reference */ 3530 | 3531 | 3532 | /* 3533 | private function _tidySource($filename) 3534 | { 3535 | //$this->logerr('tidySource called'); 3536 | $t = new BGTidy($this->tidyloc); 3537 | 3538 | $t->setOpts(array('utf8', 'asxhtml', 'clean', 'numeric', 'quiet', 'file /dev/null', '-drop-proprietary-attributes false --force-output true --output-xml true --word-2000 true --doctype strict --enclose-text true --enclose-block-text true --drop-empty-paras true --drop-font-tags true')); 3539 | 3540 | //$t->setOpts(array('utf8', 'asxhtml', 'numeric', 'quiet', 'file /dev/null', '-drop-proprietary-attributes false --force-output true --word-2000 true --doctype strict')); 3541 | 3542 | //'-drop-proprietary-attributes false --force-output true --word-2000 true' 3543 | // asm: removed 'clean' from the list of tidy options, so it preserves inline styles 3544 | 3545 | //$t->setOpts(array('utf8', 'asxhtml', 'quiet', 'file /dev/null', 'clean', 'numeric', '-drop-proprietary-attributes false --force-output true --word-2000 true')); 3546 | $t->tidyFile($filename); 3547 | //$this->logerr('tidied version is at '.$filename); 3548 | $tid = file_get_contents($filename); 3549 | //$this->logerr($tid); 3550 | return $tid; 3551 | } 3552 | */ 3553 | 3554 | 3555 | /* 3556 | private function _headsToItems($heads, $basename) 3557 | { 3558 | // adds id to heading node for toc nav access 3559 | $order = 0; 3560 | foreach($heads as $head) { 3561 | $order++; 3562 | $myid = $head->getAttribute('id'); 3563 | if(!$myid) { 3564 | $myid = $this->_validID('heading'); 3565 | $head->setAttribute('id', $myid); 3566 | } 3567 | $mylabel = preg_replace('/\n/', ' ', $head->textContent); 3568 | $this->addNavItem($this->_validID('nav'), $mylabel, $basename.'#'.$myid, 'section'); 3569 | } 3570 | } 3571 | */ 3572 | /* 3573 | private function _headsToNavItems($doc, $heads, $basename) 3574 | { 3575 | // inserts an identified anchor before the heading 3576 | $order = 0; $thishead = 0; $headcount = $heads->length; 3577 | foreach($heads as $head) { 3578 | //$this->logerr('processing head element navPoint:'.$head->textContent); 3579 | $order++; 3580 | $myid = $this->_validID("navPoint".$order); 3581 | // insert an anchor point before the heading 3582 | $a = $doc->createElement('a'); 3583 | $a->setAttribute('name', $myid); 3584 | $a->setAttribute('id', $myid); 3585 | // $a->setAttribute('class', 'chapter'); 3586 | if($inserted = $head->parentNode->insertBefore($a, $head)) { 3587 | $mylabel = preg_replace('/\n/', ' ', $head->textContent); 3588 | $this->addNavItem($this->_validID('nav'), $mylabel, $basename.'#'.$myid, 'section'); 3589 | } 3590 | $thishead++; 3591 | } 3592 | } 3593 | */ 3594 | /* 3595 | private function _headsToNavDivs($doc, $heads, $basename) 3596 | { 3597 | // wraps sections in divs 3598 | //$this->logerr('creating nav divs from head info -- basename is '.$basename); 3599 | $order = 0; $thishead = 0; $headcount = $heads->length; 3600 | $marker = $doc->createComment('bgdelimiter#beginContent'); 3601 | // insert before first child of body element 3602 | $body = $doc->getElementsByTagName('body')->item(0); 3603 | $begin = $body->insertBefore($marker, $body->firstChild); 3604 | $marker = $doc->createComment('bgdelimiter#endContent'); 3605 | // append to end of body element 3606 | $end = $body->appendChild($marker); 3607 | if($heads->length > 0) { 3608 | foreach($heads as $head) { 3609 | //$this->logerr('processing head element navPoint:'.$head->textContent); 3610 | $order++; 3611 | $myid = $this->_validID("navPointDiv".$order); 3612 | $marker = $doc->createComment('bgdelimiter#'.$myid); 3613 | 3614 | if($inserted = $head->parentNode->insertBefore($marker, $head)) { 3615 | $mylabel = preg_replace('/\n/', ' ', $head->textContent); 3616 | if(preg_match('/^\s$/m', $mylabel)) { 3617 | //$this->logerr('label is empty, setting label to default'); 3618 | $mylabel = '§'; 3619 | } 3620 | $this->addNavItem($this->_validID('nav'), $mylabel, $basename.'#'.$myid, 'section'); 3621 | } 3622 | $thishead++; 3623 | } 3624 | } else { 3625 | $this->addNavItem($this->_validID('nav'), '§', $basename.'#beginContent', 'document'); 3626 | } 3627 | // now we should have markers for where each section is going to begin, 3628 | // each marker tagged with the id referenced by the navpoint link 3629 | // bring in the wizard of tricks and his mighty staff of deception! 3630 | $docstr = $doc->saveXML(); 3631 | 3632 | $docstr = preg_replace('/<\!\-\-\s?bgdelimiter#beginContent\s?\-\->/m', '
'."\n\n", $docstr); 3633 | $docstr = preg_replace('/<\!\-\-\s?bgdelimiter#(.+?)\s?\-\->/m', '
'."\n\n".'
'."\n\n", $docstr); 3634 | $docstr = preg_replace('/<\!\-\-\s?bgdelimiter#endContent\s?\-\->/m', "\n\n".'
', $docstr); 3635 | 3636 | //tidy it 3637 | $tmp = tempnam($this->workpath, 'epubwork_'); 3638 | //$this->logerr('working tempfile for marker replacement is '.$tmp); 3639 | file_put_contents($tmp, $docstr); 3640 | $tidied = $this->_tidySource($tmp); 3641 | unlink($tmp); 3642 | // reload 3643 | $doc->loadXML($tidied); 3644 | return $doc; 3645 | } 3646 | */ 3647 | /* 3648 | private function _headsToNavDocs($doc, $headsold, $basename) 3649 | { 3650 | // creates new content documents based on headings 3651 | $order = 0; $thishead = 0; 3652 | $navpointids = array(); 3653 | $headcount = 0; 3654 | $html = $doc->getElementsByTagName("html")->item(0); 3655 | 3656 | $body = $html->getElementsByTagName("body")->item(0); 3657 | //} catch (Exception $e) { 3658 | // $body = $html->appendChild($doc->createElement("body")); 3659 | //} 3660 | $heads = $body->getElementsByTagName("h1"); 3661 | if($heads->length<1) { 3662 | $heads = $doc->getElementsByTagName("h2"); 3663 | if($heads->length<1) { 3664 | $heads = $doc->getElementsByTagName("h3"); 3665 | if($heads->length<1) { 3666 | $body->insertBefore($doc->createElement("h1","***"), $body->firstChild); 3667 | } 3668 | } 3669 | } 3670 | // calculate whether our heads ratio is good for this: 3671 | $sectionsize = (strlen($doc->saveXML())/($headcount+1)); 3672 | $marker = $doc->createComment('bgdelimiter#beginContent'); 3673 | $begin = $body->insertBefore($marker, $body->firstChild); 3674 | $marker = $doc->createComment('bgdelimiter#endContent'); 3675 | $end = $body->appendChild($marker); 3676 | foreach($heads as $head) { 3677 | $this->logerr('processing head element navPoint:'.$head->textContent); 3678 | $order++; // basis of 1 for id strings 3679 | $thishead++; 3680 | $uni = uniqid(); 3681 | $myid = $this->_validID("navPointDiv$order"); 3682 | $docid = $this->_validID("content$basename"."_section$order"); 3683 | $docidfile = $basename.'-'.$order.'.html'; 3684 | $mylabel = preg_replace('/(\n|\r)/', ' ', $head->textContent); 3685 | if(preg_match('/^\s$/m', $mylabel)) { 3686 | $this->logerr('label is empty, setting label to default'); 3687 | $mylabel = '§'; 3688 | } 3689 | $marker = $doc->createComment('bgdelimiter#'.$myid); 3690 | $this->logerr("created comment"); 3691 | $ref = $head; 3692 | while($ref->parentNode->nodeName != "body") { 3693 | if($ref->parentNode->nodeName == "html") { 3694 | break; 3695 | } else { 3696 | $ref = $ref->parentNode; 3697 | } 3698 | } 3699 | $inserted = $ref->parentNode->insertBefore($marker, $ref); 3700 | $navpointids[] = array('myid'=>$myid, 'docid'=>$docid, 'docidfile'=>$docidfile, 'mylabel'=>$mylabel); 3701 | } 3702 | $docstr = $doc->saveXML(); // dump for regex processing 3703 | $html->replaceChild($doc->createElement("body"),$body); 3704 | $domtmpl = $doc; // copy remaining stuff as template 3705 | unset($body); unset($heads); unset($doc); 3706 | $docstr = preg_replace('/<\!\-\-\s?bgdelimiter#beginContent\s?\-\->/m', '
'."\n\n", $docstr); 3707 | $docstr = preg_replace('/<\!\-\-\s?bgdelimiter#endContent\s?\-\->/m', "\n\n".'
', $docstr); 3708 | $docstr = preg_replace('/<\!\-\-\s?bgdelimiter#(.+?)\s?\-\->/m', ''."\n\n".'
'."\n\n", $docstr); 3709 | $doc = new DomDocument('1.0', 'UTF-8'); 3710 | $doc->validateOnParse = true; 3711 | @$doc->loadXML($docstr); // load it back in 3712 | $this->logerr('Loaded back into dom, processing new divs as separate docs'); 3713 | foreach($navpointids as $navid) { 3714 | $myid = $navid['myid']; 3715 | $newdoc = $domtmpl; 3716 | $newhtmlnode = $newdoc->getElementsByTagName("html")->item(0); 3717 | $newbodynode = $newdoc->getElementsByTagName("body")->item(0); 3718 | $this->logerr("looking for id ".$myid); 3719 | $navnode = $doc->getElementById($myid); 3720 | $newbodynode->appendChild($newdoc->importNode($navnode, true)); // import the content block 3721 | $this->addItem($navid['docid'], $navid['docidfile'], 'application/xhtml+xml', $newdoc->saveXML(), 'yes'); // add to manifest 3722 | $this->addNavItem($this->_validID(), $navid['mylabel'], $navid['docidfile'].'#'.$myid, 'section'); // add to ncx 3723 | } 3724 | return true; 3725 | } 3726 | */ 3727 | 3728 | /* 3729 | public function _domFromDoc($filename, $contents=null) { 3730 | // returns a dom from a tidied doc 3731 | $this->logerr('domFromDoc:'.$filename, 1); 3732 | if($contents != null) { 3733 | $res = BookGluttonPurifier::loadFileContent($contents); 3734 | $contents = null; 3735 | } else { 3736 | $res = BookGluttonPurifier::loadFile($filename); 3737 | } 3738 | if($res) { 3739 | if($this->suppress_purify) { 3740 | $dom = BookGluttonPurifier::getDom(); 3741 | } else { 3742 | $dom = BookGluttonPurifier::purify(); 3743 | } 3744 | } else { 3745 | throw new Exception('problem loading source data into dom'); 3746 | } 3747 | return $dom; 3748 | } 3749 | */ 3750 | 3751 | 3752 | } 3753 | 3754 | 3755 | 3756 | class DiskUtil { 3757 | public static function fileIsZip($file) 3758 | { 3759 | return substr(file_get_contents($file, TRUE,null,0,3),0,2)=='PK'; 3760 | } 3761 | 3762 | public static function assertPath($dir) 3763 | { // takes full path to directory (not file) 3764 | if(is_file 3765 | ($dir)) { 3766 | error_log("this is a regular file!"); 3767 | $dir = pathinfo($dir, PATHINFO_DIRNAME); 3768 | } 3769 | $parts = explode("/", $dir); 3770 | $stem = ""; 3771 | while(count($parts)>0) { 3772 | $part = array_shift($parts); 3773 | $stem .= "$part/"; 3774 | if(!file_exists($stem)) { 3775 | error_log("making non-existent directory:".$stem); 3776 | mkdir($stem, 0775); 3777 | } 3778 | } 3779 | } 3780 | 3781 | public static function getGroupName($file) 3782 | { 3783 | $oinfo = self::getGroupArray($file); 3784 | return $oinfo['name']; 3785 | } 3786 | 3787 | 3788 | public static function getOwnerName($file) 3789 | { 3790 | $oinfo = self::getOwnerArray($file); 3791 | return $oinfo['name']; 3792 | } 3793 | 3794 | public static function getGroupArray($file) 3795 | { 3796 | return posix_getgrgid(filegroup($file)); 3797 | } 3798 | 3799 | public static function getOwnerArray($file) 3800 | { 3801 | return posix_getpwuid(fileowner($file)); 3802 | } 3803 | 3804 | public static function makeDir($dir) 3805 | { 3806 | //error_log('trying to make directory:'.$dir); 3807 | if(!file_exists($dir)) { 3808 | // check the mode 3809 | if(is_writable(dirname($dir))) { 3810 | if(!mkdir($dir)) { 3811 | throw new Exception("Could not create package directory:".$dir); 3812 | } else { 3813 | chmod($dir, 0777); 3814 | } 3815 | } else { // parent dir is not writeable 3816 | if(chmod(dirname($dir),0777)) // try to chmod it 3817 | { 3818 | if(!mkdir($dir)) { 3819 | throw new Exception("Could not create package directory:".$dir); 3820 | } else { 3821 | chmod($dir, 0777); 3822 | } 3823 | } else { 3824 | throw new Exception("Couldn't chmod dir ".$dir); 3825 | } 3826 | } 3827 | } else { 3828 | error_log("Package directory ".$dir." already exists--not going to overwrite it"); 3829 | } 3830 | } 3831 | 3832 | public static function findFile($path, $regex) 3833 | { 3834 | 3835 | exec(escapeshellcmd("find $path -name $regex"), $output, $retval); 3836 | if($retval==0) { 3837 | return $output[0]; 3838 | } else { 3839 | return false; 3840 | } 3841 | 3842 | 3843 | } 3844 | 3845 | public static function dir_copy($srcdir, $dstdir, $offset = '', $verbose = false) 3846 | { 3847 | // A function to copy files from one directory to another one, including subdirectories and 3848 | // nonexisting or newer files. Function returns number of files copied. 3849 | // This function is PHP implementation of Windows xcopy A:\dir1\* B:\dir2 /D /E /F /H /R /Y 3850 | // Syntaxis: [$returnstring =] dircopy($sourcedirectory, $destinationdirectory [, $offset] [, $verbose]); 3851 | // Example: $num = dircopy('A:\dir1', 'B:\dir2', 1); 3852 | 3853 | // Original by SkyEye. Remake by AngelKiha. 3854 | // Linux compatibility by marajax. 3855 | // Offset count added for the possibilty that it somehow miscounts your files. This is NOT required. 3856 | // Remake returns an explodable string with comma differentiables, in the order of: 3857 | // Number copied files, Number of files which failed to copy, Total size (in bytes) of the copied files, 3858 | // and the files which fail to copy. Example: 5,2,150000,\SOMEPATH\SOMEFILE.EXT|\SOMEPATH\SOMEOTHERFILE.EXT 3859 | // If you feel adventurous, or have an error reporting system that can log the failed copy files, they can be 3860 | // exploded using the | differentiable, after exploding the result string. 3861 | // 3862 | if(!isset($offset)) $offset=0; 3863 | $num = 0; 3864 | $fail = 0; 3865 | $sizetotal = 0; 3866 | $fifail = ''; 3867 | $ret = ''; 3868 | if(!is_dir($dstdir)) mkdir($dstdir); 3869 | if($curdir = opendir($srcdir)) { 3870 | while($file = readdir($curdir)) { 3871 | if($file != '.' && $file != '..') { 3872 | // $srcfile = $srcdir . '\\' . $file; # deleted by marajax 3873 | // $dstfile = $dstdir . '\\' . $file; # deleted by marajax 3874 | $srcfile = $srcdir . '/' . $file; # added by marajax 3875 | $dstfile = $dstdir . '/' . $file; # added by marajax 3876 | if(is_file($srcfile)) { 3877 | if(is_file($dstfile)) $ow = filemtime($srcfile) - filemtime($dstfile); else $ow = 1; 3878 | if($ow > 0) { 3879 | if($verbose) echo "Copying '$srcfile' to '$dstfile'...
"; 3880 | if(copy($srcfile, $dstfile)) { 3881 | touch($dstfile, filemtime($srcfile)); $num++; 3882 | chmod($dstfile, 0777); # added by marajax 3883 | $sizetotal = ($sizetotal + filesize($dstfile)); 3884 | if($verbose) echo "OK\n"; 3885 | } 3886 | else { 3887 | echo "Error: File '$srcfile' could not be copied to '$dstfile'!
\n"; 3888 | $fail++; 3889 | $fifail = $fifail.$srcfile.'|'; 3890 | } 3891 | } 3892 | } 3893 | else if(is_dir($srcfile)) { 3894 | $res = explode(',',$ret); 3895 | $ret = self::dir_copy($srcfile, $dstfile, $verbose); 3896 | $mod = explode(',',$ret); 3897 | @$imp = array($res[0] + $mod[0],$mod[1] + $res[1],$mod[2] + $res[2],$mod[3].$res[3]); 3898 | $ret = implode(',',$imp); 3899 | } 3900 | } 3901 | } 3902 | closedir($curdir); 3903 | } 3904 | $red = explode(',',$ret); 3905 | @$ret = ($num + $red[0]).','.(($fail-$offset) + $red[1]).','.($sizetotal + $red[2]).','.$fifail.$red[3]; 3906 | return $ret; 3907 | } 3908 | 3909 | public static function getTempDir() 3910 | { 3911 | // Get temporary directory 3912 | if (!empty($_ENV['TMP'])) { 3913 | $tempdir = $_ENV['TMP']; 3914 | } elseif (!empty($_ENV['TMPDIR'])) { 3915 | $tempdir = $_ENV['TMPDIR']; 3916 | } elseif (!empty($_ENV['TEMP'])) { 3917 | $tempdir = $_ENV['TEMP']; 3918 | } else { 3919 | $tempdir = dirname(tempnam('', 'na')); 3920 | } 3921 | 3922 | if (empty($tempdir)) { error_log ('No temporary directory'); } 3923 | 3924 | return $tempdir; 3925 | } 3926 | 3927 | public static function xRename($src,$target) 3928 | { 3929 | // bypass PHP rename by shelling to mv, which can 3930 | // move across partitions 3931 | $cmd = 'mv "'.$src.'" "'.$target.'"'; 3932 | $o = shell_exec($cmd); 3933 | return true; 3934 | } 3935 | 3936 | } 3937 | 3938 | --------------------------------------------------------------------------------