├── ExcelMerge.php ├── LICENSE ├── README.md ├── Tasks ├── App.php ├── ContentTypes.php ├── MergeTask.php ├── SharedStrings.php ├── Styles.php ├── Vba.php ├── Workbook.php ├── WorkbookRels.php └── Worksheet.php └── composer.json /ExcelMerge.php: -------------------------------------------------------------------------------- 1 | working_dir = 31 | sys_get_temp_dir() . 32 | DIRECTORY_SEPARATOR . 33 | 'ExcelMerge-' . 34 | date('Ymd-His') . 35 | '-' . 36 | uniqid() . 37 | DIRECTORY_SEPARATOR; 38 | 39 | if (!is_dir($this->working_dir)) { 40 | mkdir($this->working_dir, 0755, true); 41 | break; 42 | } 43 | } 44 | 45 | if (!is_dir($this->working_dir)) { 46 | trigger_error("Could not create temporary working directory {$this->working_dir}", E_USER_ERROR); 47 | } 48 | 49 | 50 | $this->tmp_dir = $this->working_dir . "tmp" . DIRECTORY_SEPARATOR; 51 | mkdir($this->tmp_dir, 0755, true); 52 | 53 | $this->result_dir = $this->working_dir . "result" . DIRECTORY_SEPARATOR; 54 | mkdir($this->result_dir, 0755, true); 55 | 56 | $this->registerMergeTasks(); 57 | 58 | foreach ($files as $f) { 59 | $this->addFile($f); 60 | } 61 | } 62 | 63 | public function __destruct() { 64 | if (!$this->debug) { 65 | $this->removeTree(realpath($this->working_dir)); 66 | } 67 | } 68 | 69 | public function addFile($filename) { 70 | if ($this->isSupportedFile($filename)) { 71 | if ($this->resultsDirEmpty()) { 72 | $this->addFirstFile($filename); 73 | } else { 74 | $this->mergeWorksheets($filename); 75 | } 76 | $this->files[] = $filename; 77 | } 78 | } 79 | 80 | 81 | /** 82 | * Saves the merged file. 83 | * 84 | * @param null $where 85 | * @return string The path and filename to the saved file. The file extension can be 86 | * different from the one you provided (!) 87 | */ 88 | public function save($where = null) { 89 | $zipfile = $this->zipContents(); 90 | if ($where === NULL) { 91 | $where = $zipfile; 92 | } 93 | 94 | // ignore whatever extension the user might have given us and use the one 95 | // we obtained in 'zipContents' (i.e. either XLSX or XLSM) 96 | $where = 97 | pathinfo($where, PATHINFO_DIRNAME) . 98 | DIRECTORY_SEPARATOR . 99 | pathinfo($where, PATHINFO_FILENAME) . "." . 100 | pathinfo($zipfile, PATHINFO_EXTENSION); 101 | 102 | // move the zipped file to the provided destination 103 | rename($zipfile, $where); 104 | 105 | // returns the name of the file 106 | return $where; 107 | } 108 | 109 | /** 110 | * Downloads the merged file 111 | * 112 | * @param null $download_filename 113 | */ 114 | public function download($download_filename = null) { 115 | $zipfile = $this->zipContents(); 116 | if ($download_filename === NULL) { 117 | $download_filename = $zipfile; 118 | } 119 | 120 | // ignore whatever extension the user might have given us and use the one 121 | // we obtained in 'zipContents' (i.e. either XLSX or XLSM) 122 | $download_filename = 123 | pathinfo($download_filename, PATHINFO_FILENAME) . "." . 124 | pathinfo($zipfile, PATHINFO_EXTENSION); 125 | 126 | header('Content-Type: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'); 127 | header('Content-Disposition: attachment;filename="' . $download_filename . '"'); 128 | header('Cache-Control: max-age=0'); 129 | echo file_get_contents($zipfile); 130 | unlink($zipfile); 131 | die; 132 | } 133 | 134 | 135 | protected function addFirstFile($filename) { 136 | if ($this->resultsDirEmpty()) { 137 | if ($this->isSupportedFile($filename)) { 138 | $this->unzip($filename, $this->result_dir); 139 | } 140 | } else { 141 | $this->mergeWorksheets($filename); 142 | } 143 | } 144 | 145 | 146 | protected function mergeWorksheets($filename) { 147 | if ($this->resultsDirEmpty()) { 148 | $this->addFirstFile($filename); 149 | } else { 150 | if ($this->isSupportedFile($filename)) { 151 | $zip_dir = $this->tmp_dir . DIRECTORY_SEPARATOR . basename($filename); 152 | $this->unzip($filename, $zip_dir); 153 | 154 | $shared_strings = $this->tasks->sharedStrings->merge($zip_dir); 155 | list($styles, $conditional_styles) = $this->tasks->styles->merge($zip_dir); 156 | $this->tasks->vba->merge($zip_dir); 157 | 158 | $worksheets = glob("{$zip_dir}/xl/worksheets/sheet*.xml"); 159 | foreach ($worksheets as $s) { 160 | list($sheet_number, $sheet_name) = $this->tasks->worksheet->merge($s, $shared_strings, $styles, $conditional_styles); 161 | 162 | if ($sheet_number!==false) { 163 | $this->tasks->workbookRels->set($sheet_number, $sheet_name)->merge(); 164 | $this->tasks->contentTypes->set($sheet_number, $sheet_name)->merge(); 165 | $this->tasks->app->set($sheet_number, $sheet_name)->merge(); 166 | $this->tasks->workbook->set($sheet_number, $sheet_name)->merge(); 167 | } 168 | } 169 | } 170 | } 171 | } 172 | 173 | protected function registerMergeTasks() { 174 | $this->tasks = new \stdClass(); 175 | 176 | // global tasks 177 | $this->tasks->sharedStrings = new Tasks\SharedStrings($this); 178 | $this->tasks->styles = new Tasks\Styles($this); 179 | $this->tasks->vba = new Tasks\Vba($this); 180 | 181 | // worksheet tasks 182 | $this->tasks->worksheet = new Tasks\Worksheet($this); 183 | $this->tasks->workbookRels = new Tasks\WorkbookRels($this); 184 | $this->tasks->contentTypes = new Tasks\ContentTypes($this); 185 | $this->tasks->app = new Tasks\App($this); 186 | $this->tasks->workbook = new Tasks\Workbook($this); 187 | } 188 | 189 | 190 | protected function isSupportedFile($filename, $throw_error = true) { 191 | $ext = pathinfo($filename, PATHINFO_EXTENSION); 192 | $is_supported = in_array(strtolower($ext), array('xlsx', 'xlsm')); 193 | if (!$is_supported && $throw_error) { 194 | user_error("Can only merge Excel files in .XLSX or .XLSM format. Skipping " . $filename, E_USER_WARNING); 195 | } 196 | 197 | return $is_supported; 198 | } 199 | 200 | protected function resultsDirEmpty() { 201 | return count(array_diff(scandir($this->result_dir), array('.', '..'))) == 0; 202 | } 203 | 204 | 205 | protected function unzip($filename, $directory) { 206 | $zip = new \ZipArchive(); 207 | $zip->open($filename); 208 | $zip->extractTo($directory); 209 | $zip->close(); 210 | } 211 | 212 | protected function removeTree($dir) { 213 | $result = false; 214 | 215 | $dir = realpath($dir); 216 | if (strpos($dir, realpath(sys_get_temp_dir())) === 0) { 217 | $result = true; 218 | $files = array_diff(scandir($dir), array('.', '..')); 219 | foreach ($files as $file) { 220 | if (is_dir("$dir/$file")) { 221 | $result &= $this->removeTree("$dir/$file"); 222 | } else { 223 | $result &= unlink("$dir/$file"); 224 | } 225 | } 226 | $result &= rmdir($dir); 227 | } 228 | 229 | return $result; 230 | } 231 | 232 | protected function zipContents() { 233 | $zip_directory = realpath($this->result_dir); 234 | $target_zip = $this->working_dir . DIRECTORY_SEPARATOR . "merged-excel-file"; 235 | $ext = "xlsx"; 236 | 237 | $delete = array(); 238 | 239 | $zip = new \ZipArchive(); 240 | $zip->open($target_zip, \ZipArchive::CREATE | \ZipArchive::OVERWRITE); 241 | 242 | // Create recursive directory iterator 243 | /** @var \SplFileInfo[] $files */ 244 | $files = new \RecursiveIteratorIterator( 245 | new \RecursiveDirectoryIterator($zip_directory), 246 | \RecursiveIteratorIterator::LEAVES_ONLY 247 | ); 248 | 249 | foreach ($files as $name => $file) { 250 | // Skip directories (they would be added automatically) 251 | if (!$file->isDir()) { 252 | // Get real and relative path for current file 253 | $filePath = $file->getRealPath(); 254 | if (basename($filePath) != $target_zip) { 255 | $relativePath = substr($filePath, strlen($zip_directory) + 1); 256 | 257 | // Add current file to archive 258 | $zip->addFile($filePath, $relativePath); 259 | 260 | $delete[] = $filePath; 261 | 262 | if (basename($filePath) == "vbaProject.bin") { 263 | // we found VBA code; we change the extension to 'XLSM' to enable macros 264 | $ext = "xlsm"; 265 | } 266 | } 267 | } 268 | } 269 | 270 | // Zip archive will be created only after closing object 271 | $zip->close(); 272 | 273 | // by default, we delete the files that we put in the zip file 274 | if (!$this->debug) { 275 | foreach ($delete as $d) { 276 | unlink($d); 277 | } 278 | } 279 | 280 | // give the zipfile its final name 281 | rename($target_zip, "$target_zip.$ext"); 282 | 283 | return "$target_zip.$ext"; 284 | } 285 | 286 | public function __get($name) { 287 | switch ($name) { 288 | case "result_dir": 289 | return $this->result_dir; 290 | case "working_dir": 291 | return $this->working_dir; 292 | } 293 | return null; 294 | } 295 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Edward Akerboom 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Excel Merge 2 | =========== 3 | 4 | Merges two or more Excel files into one file, while keeping formatting, formulas, VBA code and 5 | conditional styling intact. This software works with Excel 2007 (.xlsx and .xlsm) files and can 6 | only generate Excel 2007 files as output. The older .xls format is unfortunately not supported, 7 | but you can work around that if necessary. 8 | 9 | This is a software library that is designed to be used as part of a larger piece of software. It 10 | cannot be used as standalone software by itself. 11 | 12 | Installation 13 | ------------ 14 | 15 | **With composer** 16 | 17 | php composer.phar require infostreams/excel-merge 18 | 19 | Use 20 | --- 21 | 22 | The most basic use of this software looks something like this 23 | 24 | download("my-filename.xlsm"); 31 | 32 | // or 33 | 34 | $filename = $merged->save("my-directory/my-filename.xlsm"); 35 | ?> 36 | 37 | 38 | Raison d'être and use case 39 | -------------------------- 40 | This library exists for one reason only: to work around the humongous memory requirements of the 41 | otherwise excellent [PHPExcel](https://github.com/PHPOffice/PHPExcel) library. I had to export 42 | the contents of a database as an Excel file with about 10 worksheets, some of them relatively 43 | large, and PHPExcel very quickly ran out of memory after producing about 2 or 3 of the required 44 | worksheets, even after increasing the PHP memory limit to 256 and then 512 Mb. I was not doing 45 | anything spectacular and am certainly 46 | [not the only one](http://stackoverflow.com/questions/4817651/phpexcel-runs-out-of-256-512-and-also-1024mb-of-ram) 47 | to have run into this issue. 48 | 49 | At this point I could have chosen a different Excel library to generate the export, and 50 | [I did](https://github.com/MAXakaWIZARD/xls-writer), but these would not allow me to use VBA code 51 | in my exported file, and would not recognize some of the Excel formulas I needed. PHPExcel would 52 | allow me to do these things, but ran out of memory because it insists on keeping a complete mental 53 | model of all the sheets in memory before it could produce an output file. That makes sense for 54 | PHPExcel but doesn't work for my use case. 55 | 56 | Therefore, I decided to circumvent PHPExcel's memory limitations by using it to generate and then 57 | write all sheets as **individual Excel files**, and then write some code to merge these Excel 58 | files into one. 59 | 60 | How it works 61 | ------------ 62 | Instead of trying to keep a mental model of the whole Excel file in memory, this library simply 63 | operates directly on the XML files that are inside Excel2007 files. The library doesn't 64 | really understand these XML files, it just knows which files it needs to copy where and how to 65 | modify the XML in order to add one sheet of one Excel file to the other. 66 | 67 | This means that the most memory it will ever use is directly related to how large your largest 68 | worksheet is. 69 | 70 | Results 71 | ------- 72 | I had to generate an Excel file with 11 relatively sizable worksheets (two or three sheets with 73 | about 2000 rows). PHPExcel took over 30 minutes and over 512 Mb of memory to generate this, after 74 | which I aborted the process. With this library, I can generate the same export in 28.2 seconds with 75 | a peak memory use of 67 Mb. 76 | 77 | Support for 'native' Excel files 78 | -------------------------------- 79 | I've tried merging files produced by Excel itself, but somehow it fails. I worked around it by 80 | loading the file with PHPExcel and writing it as a new Excel2007 file, and then merging that 81 | instead. If you figure out why it fails: pull requests welcome. 82 | 83 | Support for .xls files and Libre/OpenOffice Calc and Gnumeric 84 | ------------------------------------------------------------- 85 | You can merge .xls files, or any of the import formats supported by PHPExcel, by reading the 86 | file with PHPExcel and writing it as a temporary Excel2007 file. You then merge the temporary 87 | Excel2007 file instead of the original file 88 | 89 | Requirements 90 | ------------ 91 | This library uses DOMDocument and DOMXPath extensively. These are installed and available in PHP5 by 92 | default. If they aren't, check [here](http://php.net/manual/en/dom.setup.php). 93 | 94 | Minimum PHP version is most likely v5.3. -------------------------------------------------------------------------------- /Tasks/App.php: -------------------------------------------------------------------------------- 1 | result_dir}/docProps/app.xml"; 12 | 13 | $dom = new \DOMDocument(); 14 | $dom->load($filename); 15 | 16 | /* 17 | * => in HeadingPairs/vt:vector/vt:variant[2] set to {N} 18 | => in TitlesOfParts/vt:vector set attribute 'size' to {N} 19 | => add 20 | {New sheet} 21 | 22 | */ 23 | 24 | $xpath = new \DOMXPath($dom); 25 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/officeDocument/2006/extended-properties"); 26 | $xpath->registerNamespace("mvt", "http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes"); 27 | 28 | $elems = $xpath->query("//m:HeadingPairs/mvt:vector/mvt:variant[2]/mvt:i4"); 29 | foreach ($elems as $e) { 30 | $e->nodeValue = $this->sheet_number; 31 | } 32 | 33 | $elems = $xpath->query("//m:TitlesOfParts/mvt:vector"); 34 | foreach ($elems as $e) { 35 | 36 | // Caroline Clep: Rename if already exists 37 | $nodes = $e->childNodes; 38 | foreach ($nodes as $node) 39 | { 40 | if ($node->nodeValue === $this->sheet_name) 41 | { 42 | $node->nodeValue = 'Previous_' . $node->nodeValue; 43 | break; 44 | } 45 | } 46 | 47 | // Caroline Clep: sheets numbers incorrectly 48 | // $e->setAttribute('size', $this->sheet_number); 49 | $e->setAttribute('size', $e->getAttribute('size') + 1); 50 | 51 | $e->setAttribute('size', $this->sheet_number); 52 | 53 | $tag = $dom->createElement('vt:lpstr'); 54 | $tag->nodeValue = $this->sheet_name; 55 | 56 | $e->appendChild($tag); 57 | } 58 | $dom->save($filename); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /Tasks/ContentTypes.php: -------------------------------------------------------------------------------- 1 | result_dir}/[Content_Types].xml"; 12 | 13 | $dom = new \DOMDocument(); 14 | $dom->load($filename); 15 | 16 | $tag = $dom->createElement("Override"); 17 | $tag->setAttribute('PartName', "/xl/worksheets/sheet{$this->sheet_number}.xml"); 18 | $tag->setAttribute('ContentType', "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"); 19 | 20 | $dom->documentElement->appendChild($tag); 21 | 22 | $dom->save($filename); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /Tasks/MergeTask.php: -------------------------------------------------------------------------------- 1 | parent = $parent; 20 | } 21 | 22 | abstract public function merge(); 23 | 24 | public function __get($name) { 25 | switch ($name) { 26 | case "result_dir": 27 | return $this->parent->result_dir; 28 | case "working_dir": 29 | return $this->parent->working_dir; 30 | } 31 | return null; 32 | } 33 | 34 | public function __set($name, $value) { 35 | switch ($name) { 36 | // working_dir and result_dir should be read only, so try to set them on the 37 | // parent object to throw an error to show that you can't do that. 38 | case "result_dir": 39 | $this->parent->result_dir = $value; 40 | break; 41 | case "working_dir": 42 | $this->parent->working_dir = $value; 43 | break; 44 | } 45 | } 46 | 47 | public function set($sheet_number, $sheet_name) { 48 | $this->sheet_number = $sheet_number; 49 | $this->sheet_name = $sheet_name; 50 | 51 | return $this; // so we can chain methods, as in $this->set()->merge() 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /Tasks/SharedStrings.php: -------------------------------------------------------------------------------- 1 | result_dir . $xml_filename; 14 | $source_filename = $zip_dir . $xml_filename; 15 | 16 | $shared_strings = array(); 17 | $target = new \DOMDocument(); 18 | $target->load($target_filename); 19 | foreach ($target->documentElement->childNodes as $i=>$ss) { 20 | // read in current list of shared strings 21 | $shared_strings[$i] = $ss->nodeValue; 22 | } 23 | 24 | // add new shared strings, and provide a mapping between old id and new id 25 | $source = new \DOMDocument(); 26 | $source->load($source_filename); 27 | $mapping = array(); 28 | foreach ($source->documentElement->childNodes as $i=>$ss) { 29 | $string = $ss->textContent; 30 | 31 | if (in_array($string, $shared_strings)) { 32 | $mapping[$i] = array_search($string, $shared_strings); 33 | } else { 34 | // we didn't have this string yet 35 | $shared_strings[] = $string; 36 | 37 | // also add it to $target 38 | $node = $target->createElement('si'); 39 | $sub = $target->createElement('t'); 40 | $text = $target->createTextNode($string); 41 | $sub->appendChild($text); 42 | 43 | $node->appendChild($sub); 44 | $target->documentElement->appendChild($node); 45 | 46 | // record the new mapping 47 | $mapping[$i] = count($shared_strings) - 1; 48 | } 49 | } 50 | $target->documentElement->setAttribute('uniqueCount',count($shared_strings)); // <-- update uniqueCount 51 | $target->save($target_filename); 52 | 53 | return $mapping; 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /Tasks/Styles.php: -------------------------------------------------------------------------------- 1 | result_dir . $xml_filename; 19 | $source_filename = $zip_dir . $xml_filename; 20 | 21 | // get hash signature for each entry in 'numfmt', 'fonts', 'fills' and 'borders' 22 | // see if there are any new ones 23 | // - if so, add them and store the id. Make sure to update the 'count' attribute in the parent tag 24 | // - if it already existed, get the id 25 | $existing_dom = new \DOMDocument(); 26 | $existing_dom->load($existing_filename); 27 | 28 | $existing_xpath = new \DOMXPath($existing_dom); 29 | $existing_xpath->registerNamespace("m", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 30 | 31 | $styles = $this->getStyles($existing_xpath); 32 | 33 | $source_dom = new \DOMDocument(); 34 | $source_dom->load($source_filename); 35 | 36 | // re-assign xpath to work on source doc 37 | $source_xpath = new \DOMXPath($source_dom); 38 | $source_xpath->registerNamespace("m", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 39 | 40 | // iterate all the style tags in document that we want to merge in 41 | list($mapping, $styles) = $this->addNewStyles($source_xpath, $styles); 42 | 43 | // replace styles from existing styles.xml document with the merged styles 44 | $this->replaceStyleTags($styles, $existing_xpath); 45 | 46 | // now go through the 'cellXfs' tags. Update the references to 'fontId', 'numFmtId', 47 | // 'fillId', and 'borderId'. Generate a tag for each style that we're importing. 48 | // 49 | // If it already existed, note the id. If it didn't exist, add it and store the id. 50 | // Return this mapping of ids 51 | list($defined_styles, $styles_mapping) = $this->rewriteCells($existing_xpath, $source_xpath, $mapping); 52 | 53 | // write the new styles list 54 | $this->replaceStylesList($defined_styles, $existing_xpath); 55 | 56 | // save the merged style file 57 | $existing_dom->save($existing_filename); 58 | 59 | // return a mapping of how style ids in this workbook relate to style ids in the merged workbook 60 | return array($styles_mapping, $mapping['dxfs']); 61 | } 62 | 63 | /** 64 | * @param $existing_xpath 65 | * @return array 66 | */ 67 | protected function getStyles($existing_xpath) { 68 | $existing_styles = array(); 69 | foreach ($this->style_tags as $tag) { 70 | $elems = $existing_xpath->query("//m:{$tag}"); 71 | $existing_styles[$tag] = array(); 72 | if ($elems->length > 0) { 73 | if ($elems->item(0)->hasChildNodes()) { 74 | foreach ($elems->item(0)->childNodes as $id => $style) { 75 | $existing_styles[$tag][$id] = array( 76 | "node" => $style, 77 | "string" => $style->C14N(true, false), 78 | "id" => $id 79 | ); 80 | } 81 | } 82 | } 83 | } 84 | return $existing_styles; 85 | } 86 | 87 | /** 88 | * @param \DOMXPath $source_xpath The document to add styles from 89 | * @param $existing_styles 90 | * @return array 91 | */ 92 | protected function addNewStyles($source_xpath, $existing_styles) { 93 | $mapping = array(); 94 | foreach ($this->style_tags as $tag) { 95 | $elems = $source_xpath->query("//m:{$tag}"); 96 | 97 | $mapping[$tag] = array(); 98 | if ($elems && $elems->item(0) && $elems->item(0)->hasChildNodes()) { 99 | foreach ($elems->item(0)->childNodes as $id => $style) { 100 | $string = $style->C14N(true, false); 101 | 102 | foreach ($existing_styles[$tag] as $e) { 103 | if ($e['string'] === $string) { 104 | // this is an existing style 105 | $mapping[$tag][$id] = $e['id']; 106 | continue 2; // continue to next style 107 | } 108 | } 109 | 110 | 111 | // this is a new style 112 | $new_id = count($existing_styles[$tag]); 113 | 114 | $existing_styles[$tag][] = array( 115 | "node" => $style, 116 | "string" => $style->C14N(true, false), 117 | "id" => $new_id, 118 | ); 119 | $mapping[$tag][$id] = $new_id; 120 | } 121 | } 122 | } 123 | return array($mapping, $existing_styles); 124 | } 125 | 126 | /** 127 | * @param $existing_styles 128 | * @param \DOMXPath $xpath 129 | */ 130 | protected function replaceStyleTags($existing_styles, $xpath) { 131 | foreach ($existing_styles as $tag => $styles) { 132 | $elems = $xpath->query("//m:{$tag}"); 133 | 134 | if ($elems->length > 0) { 135 | $elem = $elems->item(0); 136 | while ($elem->hasChildNodes()) { 137 | $elem->removeChild($elem->firstChild); 138 | } 139 | foreach ($styles as $s) { 140 | $elem->appendChild($xpath->document->importNode($s['node'], true)); 141 | } 142 | $elem->setAttribute("count", count($styles)); 143 | } 144 | } 145 | } 146 | 147 | /** 148 | * @param \DOMXPath $existing_xpath 149 | * @param \DOMXPath $source_xpath 150 | * @param $mapping 151 | * @return array 152 | */ 153 | protected function rewriteCells($existing_xpath, $source_xpath, $mapping) { 154 | $elems = $existing_xpath->query("//m:cellXfs"); 155 | $defined_styles = array(); 156 | if ($elems->length > 0) { 157 | if ($elems->item(0)->hasChildNodes()) { 158 | foreach ($elems->item(0)->childNodes as $id => $style) { 159 | $defined_styles[$id] = array( 160 | "node" => $style, 161 | "string" => $style->C14N(true, false), 162 | "id" => $id 163 | ); 164 | } 165 | } 166 | } 167 | 168 | $styles_mapping = array(); 169 | $elems = $source_xpath->query("//m:cellXfs"); 170 | if ($elems->length > 0) { 171 | if ($elems->item(0)->hasChildNodes()) { 172 | foreach ($elems->item(0)->childNodes as $id => $style) { 173 | 174 | $fontId = intval($style->getAttribute('fontId')); 175 | if (array_key_exists($fontId, $mapping['fonts'])) { 176 | $style->setAttribute('fontId', 0 + $mapping['fonts'][$fontId]); 177 | } 178 | 179 | $numFmtId = intval($style->getAttribute('numFmtId')); 180 | if (array_key_exists($numFmtId, $mapping['numFmts'])) { 181 | $style->setAttribute('numFmtId', 0 + $mapping['numFmts'][$numFmtId]); 182 | } 183 | 184 | $fillId = intval($style->getAttribute('fillId')); 185 | if (array_key_exists($fillId, $mapping['fills'])) { 186 | $style->setAttribute('fillId', 0 + $mapping['fills'][$fillId]); 187 | } 188 | 189 | $borderId = intval($style->getAttribute('borderId')); 190 | if (array_key_exists($borderId, $mapping['borders'])) { 191 | $style->setAttribute('borderId', 0 + $mapping['borders'][$borderId]); 192 | } 193 | 194 | $string = $style->C14N(true, false); 195 | 196 | foreach ($defined_styles as $d) { 197 | if ($d['string'] == $string) { 198 | // we found an existing style 199 | $styles_mapping[$id] = $d['id']; 200 | continue 2; 201 | } 202 | } 203 | 204 | // this is a new style! 205 | $new_id = count($defined_styles); 206 | $defined_styles[$new_id] = array( 207 | "node" => $style, 208 | "string" => $style->C14N(true, false), 209 | "id" => $new_id 210 | ); 211 | 212 | $styles_mapping[$id] = $new_id; 213 | } 214 | 215 | } 216 | } 217 | return array($defined_styles, $styles_mapping); 218 | } 219 | 220 | /** 221 | * @param $defined_styles 222 | * @param \DOMXPath $existing_xpath 223 | */ 224 | protected function replaceStylesList($defined_styles, $existing_xpath) { 225 | $elems = $existing_xpath->query("//m:cellXfs"); 226 | if ($elems->length > 0) { 227 | $elem = $elems->item(0); 228 | while ($elem->hasChildNodes()) { 229 | $elem->removeChild($elem->firstChild); 230 | } 231 | foreach ($defined_styles as $s) { 232 | $elem->appendChild($existing_xpath->document->importNode($s['node'], true)); 233 | } 234 | $elem->setAttribute("count", count($defined_styles)); 235 | } 236 | } 237 | } 238 | -------------------------------------------------------------------------------- /Tasks/Vba.php: -------------------------------------------------------------------------------- 1 | insertFile($zip_dir)) { 17 | // successfully copied VBA code into merged file 18 | $this->addWorkbookRelation(); 19 | $this->registerContentType(); 20 | } 21 | } 22 | 23 | protected function insertFile($zip_dir) { 24 | $filename = "/xl/vbaProject.bin"; 25 | $target_filename = $this->result_dir . $filename; 26 | $source_filename = $zip_dir . $filename; 27 | 28 | if (file_exists($source_filename)) { 29 | if (file_exists($target_filename)) { 30 | // if the target file already exists, try to delete it first 31 | @unlink($target_filename); 32 | } 33 | if (!file_exists($target_filename)) { 34 | // we only try to copy the file to the target location 35 | // if there's no identically named file there already 36 | if (copy($source_filename, $target_filename)) { 37 | return true; 38 | } 39 | } 40 | } 41 | 42 | return false; 43 | } 44 | 45 | protected function addWorkbookRelation() { 46 | // Add (if necessary) the following to _rels/workbook.xml.rels: 47 | // 48 | $rels_file = $this->result_dir . "xl/_rels/workbook.xml.rels"; 49 | 50 | $doc = new \DOMDocument(); 51 | $doc->load($rels_file); 52 | 53 | $xpath = new \DOMXPath($doc); 54 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/package/2006/relationships"); 55 | 56 | $elems = $xpath->query("//m:Relationship[@Target='vbaProject.bin']"); 57 | 58 | if ($elems->length == 0) { 59 | $ids = $xpath->query("//m:Relationship"); 60 | 61 | $node = $doc->createElement("Relationship"); 62 | $node->setAttribute("Id", "rId" . ($ids->length + 1)); 63 | $node->setAttribute("Type", "http://schemas.microsoft.com/office/2006/relationships/vbaProject"); 64 | $node->setAttribute("Target", "vbaProject.bin"); 65 | $doc->documentElement->appendChild($node); 66 | 67 | $doc->save($rels_file); 68 | } 69 | } 70 | 71 | protected function registerContentType() { 72 | // and add (if necessary) the following to [Content_Types].xml: 73 | // 74 | $content_types_file = $this->result_dir . "[Content_Types].xml"; 75 | 76 | $doc = new \DOMDocument(); 77 | $doc->load($content_types_file); 78 | 79 | $xpath = new \DOMXPath($doc); 80 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/package/2006/content-types"); 81 | 82 | $elems = $xpath->query("//m:Default[@Extension='bin']"); 83 | if ($elems->length == 0) { 84 | $node = $doc->createElement("Default"); 85 | $node->setAttribute("Extension", "bin"); 86 | $node->setAttribute("ContentType", "application/vnd.ms-office.vbaProject"); 87 | 88 | $doc->documentElement->appendChild($node); 89 | 90 | $doc->save($content_types_file); 91 | } 92 | } 93 | } -------------------------------------------------------------------------------- /Tasks/Workbook.php: -------------------------------------------------------------------------------- 1 | add 14 | * 15 | */ 16 | $filename = "{$this->result_dir}/xl/workbook.xml"; 17 | $dom = new \DOMDocument(); 18 | $dom->load($filename); 19 | 20 | $xpath = new \DOMXPath($dom); 21 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 22 | $elems = $xpath->query("//m:sheets"); 23 | foreach ($elems as $e) { 24 | $tag = $dom->createElement('sheet'); 25 | $tag->setAttribute('name', $this->sheet_name); 26 | $tag->setAttribute('sheetId', $this->sheet_number); 27 | $tag->setAttribute('r:id', "rId" . $this->sheet_number); 28 | 29 | $e->appendChild($tag); 30 | break; 31 | } 32 | 33 | // make sure all worksheets have the correct rId - we might have assigned them new ids 34 | // in the Tasks\WorkbookRels::merge() method 35 | 36 | // Caroline Clep: this is breaking the result file - need to make sure we don't touch the sheets ids and only update the external links 37 | //$elems = $xpath->query("//m:sheets/m:sheet"); 38 | //foreach ($elems as $e) { 39 | // $e->setAttribute("r:id", "rId" . ($e->getAttribute("sheetId"))); 40 | //} 41 | 42 | $relfilename = "{$this->result_dir}/xl/_rels/workbook.xml.rels"; 43 | $reldom = new \DOMDocument(); 44 | $reldom->load($relfilename); 45 | 46 | $relxpath = new \DOMXPath($reldom); 47 | $relxpath->registerNamespace("m", "http://schemas.openxmlformats.org/package/2006/relationships"); 48 | $relelems = $relxpath->query("//m:Relationship"); 49 | 50 | 51 | $elems = $xpath->query("//m:externalReference"); 52 | $refId = 1; 53 | foreach ($elems as $e) 54 | { 55 | foreach ($relelems as $rele) 56 | { 57 | if ($rele->getAttribute("Target") === "externalLinks/externalLink" . $refId . ".xml") 58 | { 59 | $e->setAttribute("r:id", $rele->getAttribute("Id")); 60 | break; 61 | } 62 | } 63 | $refId++; 64 | } 65 | // Caroline Clep: End of fix 66 | 67 | $dom->save($filename); 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /Tasks/WorkbookRels.php: -------------------------------------------------------------------------------- 1 | in 'Relationships' 14 | * => add 15 | * 16 | * 17 | * => Renumber all rId{X} values to rId{X+1} where X >= N 18 | * 19 | * -> Re-order and re-number so that we first list all the sheets, and then the rest 20 | */ 21 | 22 | $filename = "{$this->result_dir}/xl/_rels/workbook.xml.rels"; 23 | $dom = new \DOMDocument(); 24 | $dom->load($filename); 25 | 26 | $xpath = new \DOMXPath($dom); 27 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/package/2006/relationships"); 28 | $elems = $xpath->query("//m:Relationship"); 29 | 30 | $rest_id = $this->sheet_number + 1; 31 | foreach ($elems as $e) { 32 | $type = $e->getAttribute("Type"); 33 | $is_worksheet = (strpos($type, "worksheet")!==false); 34 | 35 | if ($is_worksheet) { 36 | sscanf($e->getAttribute("Target"), "worksheets/sheet%d.xml", $sheet_nr); 37 | $e->setAttribute("Id", "rId" . ($sheet_nr)); 38 | } else { 39 | $e->setAttribute("Id", "rId" . ($rest_id++)); 40 | } 41 | } 42 | 43 | $new_rid = "rId" . $this->sheet_number; 44 | $tag = $dom->createElement('Relationship'); 45 | $tag->setAttribute('Id', $new_rid); 46 | $tag->setAttribute('Type', "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"); 47 | $tag->setAttribute('Target', "worksheets/sheet" . $this->sheet_number . ".xml"); 48 | 49 | $dom->documentElement->appendChild($tag); 50 | 51 | $dom->save($filename); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /Tasks/Worksheet.php: -------------------------------------------------------------------------------- 1 | getSheetCount($this->result_dir) + 1; 24 | 25 | // copy file into place 26 | $new_name = $this->result_dir . "/xl/worksheets/sheet{$new_sheet_number}.xml"; 27 | if (!is_dir(dirname($new_name))) { 28 | mkdir(dirname($new_name)); 29 | } 30 | copy($filename, $new_name); 31 | 32 | // adjust references to any shared strings 33 | $sheet = new \DOMDocument(); 34 | $sheet->load($new_name); 35 | 36 | $this->remapSharedStrings($sheet, $shared_strings_mapping); 37 | $this->remapStyles($sheet, $styles_mapping); 38 | $this->remapConditionalStyles($sheet, $conditional_styles_mapping); 39 | $this->remapColsStyles($sheet, $styles_mapping); 40 | 41 | // save worksheet with adjustments 42 | $sheet->save($new_name); 43 | 44 | // extract worksheet name 45 | $sheet_name = $this->extractWorksheetName($filename); 46 | 47 | return array($new_sheet_number, $sheet_name); 48 | } 49 | 50 | protected function getSheetCount($dir) { 51 | $existing_sheets = glob("{$dir}/xl/worksheets/sheet*.xml"); 52 | 53 | if (count($existing_sheets)>0) { 54 | natsort($existing_sheets); 55 | $last = basename(end($existing_sheets)); 56 | 57 | if (sscanf($last, "sheet%d.xml", $number)) { 58 | return $number; 59 | } 60 | } 61 | 62 | return 0; 63 | } 64 | 65 | protected function remapSharedStrings($sheet, $mapping) { 66 | $xpath = new \DOMXPath($sheet); 67 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 68 | $shared = $xpath->query("//m:c[@t='s']/m:v"); 69 | 70 | if (!is_null($shared)) { 71 | foreach ($shared as $tag) { 72 | $old_id = $tag->nodeValue; 73 | 74 | if (is_numeric($old_id)) { 75 | $old_id = intval($old_id); 76 | if (array_key_exists($old_id, $mapping)) { 77 | $tag->nodeValue = $mapping[$old_id]; 78 | } 79 | } 80 | } 81 | } 82 | } 83 | 84 | protected function remapStyles($sheet, $mapping) { 85 | $this->doRemapping($sheet, "//m:c[@s]", "s", $mapping); 86 | } 87 | 88 | protected function remapConditionalStyles($sheet, $mapping) { 89 | $this->doRemapping($sheet, "//m:conditionalFormatting/m:cfRule[@dxfId]", "dxfId", $mapping); 90 | } 91 | 92 | protected function remapColsStyles($sheet, $mapping) { 93 | $this->doRemapping($sheet, "//m:col[@Style]", "style", $mapping); 94 | } 95 | 96 | protected function doRemapping($sheet, $xpath_query, $attribute, $mapping) { 97 | // adjust references to styles 98 | $xpath = new \DOMXPath($sheet); 99 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 100 | $conditional_styles = $xpath->query($xpath_query); 101 | 102 | if (!is_null($conditional_styles)) { 103 | foreach ($conditional_styles as $tag) { 104 | $old_id = $tag->getAttribute($attribute); 105 | 106 | if (is_numeric($old_id)) { 107 | $old_id = intval($old_id); 108 | if (array_key_exists($old_id, $mapping)) { 109 | $tag->setAttribute($attribute, $mapping[$old_id]); 110 | } 111 | } 112 | } 113 | } 114 | } 115 | 116 | protected function extractWorksheetName($filename) { 117 | $workbook = new \DOMDocument(); 118 | $workbook->load(dirname($filename) . "/../workbook.xml"); 119 | 120 | $xpath = new \DOMXPath($workbook); 121 | $xpath->registerNamespace("m", "http://schemas.openxmlformats.org/spreadsheetml/2006/main"); 122 | sscanf(basename($filename), "sheet%d.xml", $number); 123 | 124 | $sheet_name = "Worksheet $number"; 125 | $elems = $xpath->query("//m:sheets/m:sheet[@sheetId='" . $number . "']"); 126 | // $elems = $xpath->query("//m:sheets/m:sheet[@sheetId='" . $sheet_number . "']"); 127 | foreach ($elems as $e) { 128 | // should be one only 129 | $sheet_name = $e->getAttribute('name'); 130 | break; 131 | } 132 | 133 | return $sheet_name; 134 | } 135 | } 136 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "infostreams/excel-merge", 3 | "license": "MIT", 4 | "description": "A PHP library to merge two or more Excel files into one", 5 | "keywords": ["excel", "merge", "combine", "xlsx", "xls", "xlsm", "PHPExcel"], 6 | "authors": [ 7 | { 8 | "name": "Edward Akerboom", 9 | "email": "github@infostreams.net", 10 | "homepage": "https://www.infostreams.net/", 11 | "role": "Developer" 12 | } 13 | ], 14 | "autoload": { 15 | "psr-4": { 16 | "ExcelMerge\\": "" 17 | } 18 | } 19 | } --------------------------------------------------------------------------------