├── CHANGELOG.md ├── .gitattributes ├── README.md ├── LICENSE ├── converter.php ├── .gitignore └── LingoesConverter.php /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ChangeLog for Lingoes Converter 2 | =============================== 3 | 4 | Version 0.1 (08-Mar-2013) 5 | ------------------------- 6 | * Initial release 7 | * TODO: 8 | - More documentation 9 | - Automatically detect the dictionary encoding -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Lingoes Converter** 2 | ================= 3 | 4 | Introduction 5 | ------------ 6 | Lingoes Converter is a script written in PHP that can convert *.LD2/*.LDX dictionaries of [Lingoes](http://lingoes.net "Lingoes") into human-readable text files. The script is based on Xiaoyun Zhu analysis ([lingoes-extractor](http://code.google.com/p/lingoes-extractor/)) on the LD2/LDX dictionary format . 7 | 8 | Requirements 9 | ------------ 10 | * PHP5 or higher 11 | * Multibyte String extension enabled 12 | 13 | Usage 14 | ----- 15 | 16 | You can just download a binary distribution for Windows here and run it: 17 | 18 | http://tiny.cc/lingoes-converter 19 | 20 | Or if you are having a running webserver, upload the source and point your browser address to: 21 | 22 | `http://yourwebsite/converter.php?input=path/to/somefile.ld2&encodingWord=UTF-8&encodingDef=UTF-16LE` 23 | 24 | Or if you already have PHP downloaded / installed on your computer, issue this comand and follow the on-screen instruction: 25 | 26 | `php converter.php` 27 | 28 | Currently the class itself can't determine the encoding of the dictionary so let's just try to enter some of the encoding names to see what should work (mostly *UTF-8*, *UTF-16LE* or *UTF-16BE*). 29 | 30 | About and License 31 | ----------------- 32 | Copyright (c) 2013, WindyLea. All right reserved. Website : www.windylea.com 33 | 34 | This project is made under BSD license. See LICENSE file for more information. -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, WindyLea 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | 3. All advertising materials mentioning features or use of this software 12 | must display the following acknowledgement: 13 | This product includes software developed by WindyLea. 14 | 4. Neither the name of WindyLea nor the 15 | names of its contributors may be used to endorse or promote products 16 | derived from this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY 19 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /converter.php: -------------------------------------------------------------------------------- 1 | by WindyLea" . PHP_EOL; 6 | echo "---" . PHP_EOL; 7 | 8 | $input = isset($_SERVER["argv"][1]) ? trim($_SERVER["argv"][1]) : ""; 9 | $output = isset($_SERVER["argv"][2]) ? trim($_SERVER["argv"][2]) : ""; 10 | if (empty($input)) 11 | { 12 | $line = false; 13 | while(!$line) 14 | { 15 | echo "+ Input file: "; 16 | $cmdHandle = fopen("php://stdin", "r"); 17 | $line = trim(fgets($cmdHandle)); 18 | } 19 | $input = trim($line, '"'); 20 | } 21 | 22 | echo "+ Output file (Optional): "; 23 | $line = trim(fgets($cmdHandle)); 24 | $output = trim($line, '"'); 25 | 26 | echo "+ Entry word encoding (Optional / Default is UTF-8): "; 27 | $line = trim(fgets($cmdHandle)); 28 | $encodingWord = trim($line, '"'); 29 | 30 | echo "+ Entry definition encoding (Optional / Default is UTF-16LE): "; 31 | $line = trim(fgets($cmdHandle)); 32 | $encodingDef = trim($line, '"'); 33 | 34 | } else 35 | { 36 | $input = isset($_GET["input"]) ? trim($_GET["input"]) : ""; 37 | $output = isset($_GET["output"]) ? trim($_GET["output"]) : ""; 38 | $encodingWord = isset($_GET["encodingWord"]) ? trim($_GET["encodingWord"]) : "UTF-8"; 39 | $encodingDef = isset($_GET["encodingDef"]) ? trim($_GET["encodingDef"]) : "UTF-16LE"; 40 | } 41 | 42 | set_time_limit(0); 43 | ini_set("memory_limit", "128M"); 44 | include("LingoesConverter.php"); 45 | 46 | echo PHP_EOL . "Converting..." . PHP_EOL; 47 | 48 | $timeStart = microtime(true); 49 | $plc = new LingoesConverter; 50 | $plc->input = $input; 51 | $plc->output = $output; 52 | $plc->encodingDef = $encodingDef; 53 | $plc->encodingWord = $encodingWord; 54 | $convert = $plc->convert(); 55 | if (!$convert) 56 | { 57 | $lastMessage = end($plc->logs); 58 | echo "* " . $lastMessage[1] . PHP_EOL; 59 | } 60 | 61 | $timeEnd = microtime(true); 62 | 63 | echo PHP_EOL . "# Execution time: " . round(($timeEnd - $timeStart), 2) . " (s)"; 64 | echo PHP_EOL . "# Memory usage: " . (memory_get_usage(true) / 1024) . " KB"; 65 | echo PHP_EOL . "# Peak memory usage: " . (memory_get_peak_usage(true) / 1024) . " KB"; 66 | ?> -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Eclipse 3 | ################# 4 | 5 | *.pydevproject 6 | .project 7 | .metadata 8 | bin/ 9 | tmp/ 10 | *.tmp 11 | *.bak 12 | *.swp 13 | *~.nib 14 | local.properties 15 | .classpath 16 | .settings/ 17 | .loadpath 18 | 19 | # External tool builders 20 | .externalToolBuilders/ 21 | 22 | # Locally stored "Eclipse launch configurations" 23 | *.launch 24 | 25 | # CDT-specific 26 | .cproject 27 | 28 | # PDT-specific 29 | .buildpath 30 | 31 | 32 | ################# 33 | ## Visual Studio 34 | ################# 35 | 36 | ## Ignore Visual Studio temporary files, build results, and 37 | ## files generated by popular Visual Studio add-ons. 38 | 39 | # User-specific files 40 | *.suo 41 | *.user 42 | *.sln.docstates 43 | 44 | # Build results 45 | [Dd]ebug/ 46 | [Rr]elease/ 47 | *_i.c 48 | *_p.c 49 | *.ilk 50 | *.meta 51 | *.obj 52 | *.pch 53 | *.pdb 54 | *.pgc 55 | *.pgd 56 | *.rsp 57 | *.sbr 58 | *.tlb 59 | *.tli 60 | *.tlh 61 | *.tmp 62 | *.vspscc 63 | .builds 64 | *.dotCover 65 | 66 | ## TODO: If you have NuGet Package Restore enabled, uncomment this 67 | #packages/ 68 | 69 | # Visual C++ cache files 70 | ipch/ 71 | *.aps 72 | *.ncb 73 | *.opensdf 74 | *.sdf 75 | 76 | # Visual Studio profiler 77 | *.psess 78 | *.vsp 79 | 80 | # ReSharper is a .NET coding add-in 81 | _ReSharper* 82 | 83 | # Installshield output folder 84 | [Ee]xpress 85 | 86 | # DocProject is a documentation generator add-in 87 | DocProject/buildhelp/ 88 | DocProject/Help/*.HxT 89 | DocProject/Help/*.HxC 90 | DocProject/Help/*.hhc 91 | DocProject/Help/*.hhk 92 | DocProject/Help/*.hhp 93 | DocProject/Help/Html2 94 | DocProject/Help/html 95 | 96 | # Click-Once directory 97 | publish 98 | 99 | # Others 100 | [Bb]in 101 | [Oo]bj 102 | sql 103 | TestResults 104 | *.Cache 105 | ClientBin 106 | stylecop.* 107 | ~$* 108 | *.dbmdl 109 | Generated_Code #added for RIA/Silverlight projects 110 | 111 | # Backup & report files from converting an old project file to a newer 112 | # Visual Studio version. Backup files are not needed, because we have git ;-) 113 | _UpgradeReport_Files/ 114 | Backup*/ 115 | UpgradeLog*.XML 116 | 117 | 118 | 119 | ############ 120 | ## Windows 121 | ############ 122 | 123 | # Windows image file caches 124 | Thumbs.db 125 | 126 | # Folder config file 127 | Desktop.ini 128 | 129 | 130 | ############# 131 | ## Python 132 | ############# 133 | 134 | *.py[co] 135 | 136 | # Packages 137 | *.egg 138 | *.egg-info 139 | dist 140 | build 141 | eggs 142 | parts 143 | bin 144 | var 145 | sdist 146 | develop-eggs 147 | .installed.cfg 148 | 149 | # Installer logs 150 | pip-log.txt 151 | 152 | # Unit test / coverage reports 153 | .coverage 154 | .tox 155 | 156 | #Translations 157 | *.mo 158 | 159 | #Mr Developer 160 | .mr.developer.cfg 161 | 162 | # Mac crap 163 | .DS_Store 164 | -------------------------------------------------------------------------------- /LingoesConverter.php: -------------------------------------------------------------------------------- 1 | 6 | * @copyright Copyright (c) 2013, WindyLea. All right reserved 7 | * @version 0.1 8 | */ 9 | class LingoesConverter 10 | { 11 | /* 12 | * Path to a *.LD2/*.LDX dictionary file 13 | * 14 | * @access public 15 | * @var string 16 | */ 17 | public $input; 18 | 19 | /* 20 | * (Optional) Path to the output file to be written to. If not specified, 21 | * this value will be [Input file's name].txt 22 | * 23 | * @access public 24 | * @var string 25 | */ 26 | public $output; 27 | 28 | /* 29 | * Log messages 30 | * 31 | * @access public 32 | * @var array 33 | */ 34 | public $logs; 35 | 36 | /* 37 | * Encoding for the entry words in the dictionary. Currently the class 38 | * itself can't determine the encoding of the dictionary so this property 39 | * is needed. Default is "UTF-8" 40 | * 41 | * @access public 42 | * @var string 43 | */ 44 | public $encodingWord = "UTF-8"; 45 | 46 | /* 47 | * Encoding for the entry definitions in the dictionary. Default is "UTF-16LE" 48 | * 49 | * @access public 50 | * @var string 51 | */ 52 | public $encodingDef = "UTF-16LE"; 53 | 54 | /* 55 | * Input file's properties 56 | * 57 | * @access public 58 | * @var array 59 | */ 60 | public $prop = array(); 61 | 62 | /* 63 | * File handle for the input file 64 | * 65 | * @access protected 66 | * @var resources 67 | */ 68 | protected $inputHandle; 69 | 70 | /* 71 | * File handle for the uncompressed data file 72 | * 73 | * @access protected 74 | * @var resources 75 | */ 76 | protected $inflatedHandle; 77 | 78 | /* 79 | * Class destructor 80 | * 81 | * @access public 82 | */ 83 | public function __destruct() 84 | { 85 | @fclose($this->inputHandle); 86 | @fclose($this->inflatedHandle); 87 | } 88 | 89 | /* 90 | * Checks if the selected encoding is valid or is supported 91 | * 92 | * @access public 93 | * @author windylea 94 | * @param string $input The encoding name to be checked 95 | * @param string $defaultValue If the input encoding is not found, it will 96 | be replaced by this value 97 | * @return string Returns the correct encoding name 98 | */ 99 | public function validateEncoding($input, $defaultValue) 100 | { 101 | if (!empty($input)) 102 | { 103 | $encodingList = mb_list_encodings(); 104 | $input = trim(strtolower($input)); 105 | foreach ($encodingList as $encoding) 106 | { 107 | $test = strtolower($encoding); 108 | if ($test == $input) 109 | { 110 | return $encoding; 111 | } 112 | } 113 | } 114 | 115 | return $defaultValue; 116 | } 117 | 118 | /* 119 | * Writes a message to the log 120 | * 121 | * @access public 122 | * @author windylea 123 | * @param string $message The log message 124 | * @return null 125 | */ 126 | public function log($message) 127 | { 128 | $this->logs[] = array(time(), $message); 129 | return null; 130 | } 131 | 132 | /* 133 | * Parses file properties 134 | * 135 | * @access public 136 | * @author windylea 137 | * @return bool Returns TRUE on success, otherwise FALSE if an error occured 138 | */ 139 | public function prop() 140 | { 141 | /* 142 | * Prepare the input file and get its information 143 | */ 144 | $this->input = realpath($this->input); 145 | if (!file_exists($this->input) || !is_readable($this->input)) 146 | { 147 | $this->log("Error: File does not exist or not readable!"); 148 | return false; 149 | } 150 | 151 | $this->inputHandle = fopen($this->input, "r"); 152 | 153 | /* 154 | * Gets version infomation by reading 2 bytes at offset 0x18 and 2 bytes 155 | * at offset 0x1A as unsigned shorts 156 | */ 157 | fseek($this->inputHandle, 0x18); 158 | 159 | $major = current(unpack("S", fread($this->inputHandle, 2))); 160 | $minor = current(unpack("S", fread($this->inputHandle, 2))); 161 | 162 | $this->prop["dictVersion"] = $major . "." . $minor; 163 | 164 | /* 165 | * Gets dictionary ID by reading 16 bytes at offset 0x1C and convert 166 | * them to hex string 167 | */ 168 | fseek($this->inputHandle, 0x1C); 169 | $data = fread($this->inputHandle, 16); 170 | 171 | $this->prop["dictId"] = ""; 172 | $chars = str_split($data); 173 | foreach($chars as $char) 174 | { 175 | $this->prop["dictId"] .= dechex(ord($char)); 176 | } 177 | 178 | /* 179 | * Gets beginning offset for other offset information by reading 4 180 | * bytes at offset 0x5C as an integer and add 0x60 to this value 181 | */ 182 | fseek($this->inputHandle, 0x5C); 183 | $data = current(unpack("S", fread($this->inputHandle, 4))); 184 | $this->prop["offsetStart"] = $data + 0x60; 185 | 186 | /* 187 | * Gets dictionary type by reading 4 bytes at the beginning offset as an 188 | * integer 189 | */ 190 | fseek($this->inputHandle, $this->prop["offsetStart"]); 191 | $this->prop["dictType"] = current(unpack("S", fread($this->inputHandle, 4))); 192 | 193 | /* 194 | * Gets the end offset of the compressed data 195 | * 196 | * Gets information offset(?). On some dictionaries the beginning offset 197 | * equals to this information offset 198 | */ 199 | fseek($this->inputHandle, $this->prop["offsetStart"] + 4); 200 | $data = current(unpack("I", fread($this->inputHandle, 4))); 201 | 202 | $this->prop["offsetInfo"] = $data + $this->prop["offsetStart"] + 0x0C; 203 | if($this->prop["dictType"] == 3) 204 | { 205 | /* 206 | * Just ignore it 207 | */ 208 | } elseif(filesize($this->input) > ($this->prop["offsetInfo"] - 0x1C)) 209 | { 210 | $this->prop["offsetStart"] = $this->prop["offsetInfo"]; 211 | } else 212 | { 213 | $this->log("Error: Unsupported dictionary format"); 214 | return false; 215 | } 216 | 217 | fseek($this->inputHandle, $this->prop["offsetStart"] + 4); 218 | $data = current(unpack("I", fread($this->inputHandle, 4))); 219 | $this->prop["offsetCompressedDataEnd"] = $data + $this->prop["offsetStart"] + 0x08; 220 | 221 | /* 222 | * Gets offset for the header of the compressed data 223 | */ 224 | 225 | fseek($this->inputHandle, $this->prop["offsetStart"] + 8); 226 | $data = current(unpack("I", fread($this->inputHandle, 4))); 227 | $this->prop["offsetCompressedDataHeader"] = $data + $this->prop["offsetStart"] + 0x1C; 228 | 229 | fseek($this->inputHandle, $this->prop["offsetCompressedDataHeader"] + 0x08); 230 | $this->prop["offsetCompressedDataBegin"] = current(unpack("I", fread($this->inputHandle, 4))); 231 | 232 | /* 233 | * Gets offset of the dictionary words in the inflated file 234 | */ 235 | fseek($this->inputHandle, $this->prop["offsetStart"] + 12); 236 | $this->prop["offsetWord"] = current(unpack("I", fread($this->inputHandle, 4))); 237 | 238 | /* 239 | * Gets total length of the words and offset of the dictionary XML 240 | * strings in the inflated file 241 | */ 242 | fseek($this->inputHandle, $this->prop["offsetStart"] + 16); 243 | $this->prop["lengthWord"] = current(unpack("I", fread($this->inputHandle, 4))); 244 | $this->prop["offsetXml"] = $this->prop["offsetWord"] + $this->prop["lengthWord"]; 245 | 246 | /* 247 | * Gets total length of the XML definitions 248 | */ 249 | fseek($this->inputHandle, $this->prop["offsetStart"] + 20); 250 | $this->prop["lengthXml"] = current(unpack("I", fread($this->inputHandle, 4))); 251 | 252 | ksort($this->prop); 253 | return true; 254 | } 255 | 256 | /* 257 | * Decompress gz-compressed data to file 258 | * 259 | * @access public 260 | * @author windylea 261 | * @return bool Returns TRUE on success, otherwise FALSE if an error occured 262 | */ 263 | function unpack() 264 | { 265 | if (empty($this->prop)) 266 | { 267 | $return = $this->prop(); 268 | if (!$return) 269 | { 270 | return false; 271 | } 272 | } 273 | 274 | fseek($this->inputHandle, $this->prop["offsetCompressedDataHeader"] + 0x0C); 275 | $offsetList = array(); 276 | 277 | $timeStart = microtime(true); 278 | $this->log("Message: Decompression started on " . @date(DATE_RFC1123, $timeStart)); 279 | 280 | while($this->prop["offsetCompressedDataBegin"] + ftell($this->inputHandle) 281 | <= $this->prop["offsetCompressedDataEnd"]) 282 | { 283 | $data = fread($this->inputHandle, 4); 284 | if (strlen($data) == 4) 285 | { 286 | $offset = current(unpack("I", $data)); 287 | if ($offset > 0) 288 | { 289 | $offsetList[] = $offset; 290 | $startOffset = ftell($this->inputHandle); 291 | } else 292 | { 293 | break; 294 | } 295 | } else 296 | { 297 | break; 298 | } 299 | } 300 | 301 | $lastOffset = 0; 302 | $this->inflatedHandle = fopen($this->input . ".inflated", "w+"); 303 | 304 | foreach ($offsetList as $offset) 305 | { 306 | fseek($this->inputHandle, $startOffset + $lastOffset); 307 | $data = fread($this->inputHandle, ($offset - $lastOffset)); 308 | $uncompressed = @gzuncompress($data); 309 | 310 | if(!$uncompressed) 311 | { 312 | $this->log("Error: Decompression failed at offset 0x" . 313 | sprintf("%04x", ($startOffset + $lastOffset)) . " (tried to" . 314 | " uncompress " . ($offset - $lastOffset) . " bytes of data)"); 315 | return false; 316 | } else 317 | { 318 | fwrite($this->inflatedHandle, $uncompressed); 319 | } 320 | 321 | $lastOffset = $offset; 322 | } 323 | 324 | $timeEnd = microtime(true); 325 | $this->log("Message: Decompression finished on " . @date(DATE_RFC1123, $timeStart) . 326 | " - Execution time: " . round(($timeEnd - $timeStart), 2) . " (s)"); 327 | 328 | return true; 329 | } 330 | 331 | /* 332 | * Convert the uncompressed data stream to human-readable format 333 | * 334 | * @access public 335 | * @author windylea 336 | * @return bool Returns TRUE on success, otherwise FALSE if an error occured 337 | */ 338 | function convert() 339 | { 340 | if (!$this->inflatedHandle) 341 | { 342 | $return = $this->unpack(); 343 | if (!$return) 344 | { 345 | return false; 346 | } 347 | } 348 | 349 | if (empty($this->output)) 350 | { 351 | $slashes = (strtoupper(substr(PHP_OS, 0, 3)) === "WIN") ? "\\" : "/"; 352 | $pathInfo = pathinfo($this->input); 353 | $this->output = $pathInfo["dirname"] . $slashes . $pathInfo["filename"] . ".txt"; 354 | } 355 | 356 | $this->encodingWord = self::validateEncoding($this->encodingWord, "UTF-8"); 357 | $this->encodingDef = self::validateEncoding($this->encodingDef, "UTF-16LE"); 358 | 359 | $timeStart = microtime(true); 360 | $this->log("Message: Conversion started on " . @date(DATE_RFC1123, $timeStart)); 361 | $outputHandle = fopen($this->output, "w+"); 362 | 363 | $dataLength = 10; 364 | $offsetWord = $this->prop["offsetWord"]; 365 | $offsetXml = $this->prop["offsetXml"]; 366 | $totalEntries = ($offsetWord / $dataLength) - 1; 367 | 368 | for ($i = 0; $i < $totalEntries; $i++) 369 | { 370 | fseek($this->inflatedHandle, $dataLength * $i); 371 | $lastWordOffset = fread($this->inflatedHandle, 4); 372 | 373 | if (strlen($lastWordOffset) == 4) 374 | { 375 | $lastWordOffset = current(unpack("I", $lastWordOffset)); 376 | $lastXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4))); 377 | $flags = ord(fread($this->inflatedHandle, 1)) & 0xff; 378 | $crossRefs = ord(fread($this->inflatedHandle, 1)) & 0xff; 379 | $currentWordOffset = current(unpack("I", fread($this->inflatedHandle, 4))); 380 | $currentXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4))); 381 | 382 | if ($currentXmlOffset - $lastXmlOffset > 0) 383 | { 384 | fseek($this->inflatedHandle, $offsetXml + $lastXmlOffset); 385 | $xml = fread($this->inflatedHandle, ($currentXmlOffset - $lastXmlOffset)); 386 | } else 387 | { 388 | $xml = ""; 389 | } 390 | 391 | for($j = $crossRefs; $j > 0; $j--) 392 | { 393 | fseek($this->inflatedHandle, $offsetWord + $lastWordOffset); 394 | $currentRef = current(unpack("I", fread($this->inflatedHandle, 4))); 395 | 396 | fseek($this->inflatedHandle, $dataLength * $currentRef); 397 | fseek($this->inflatedHandle, 4, SEEK_CUR); 398 | $lastXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4))); 399 | 400 | fseek($this->inflatedHandle, 6, SEEK_CUR); 401 | $currentXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4))); 402 | 403 | fseek($this->inflatedHandle,$offsetXml + $lastXmlOffset); 404 | $xml .= fread($this->inflatedHandle, ($currentXmlOffset - $lastXmlOffset)); 405 | 406 | $lastWordOffset += 4; 407 | } 408 | 409 | $xml = @mb_convert_encoding($xml, "UTF-8", $this->encodingDef); 410 | if($currentWordOffset - $lastWordOffset <= 0) 411 | { 412 | continue; 413 | } 414 | 415 | $leftPosition = strpos($xml, ""); 417 | 418 | if (strpos($xml, " $leftPosition) 423 | ? $leftPosition : $rightPosition; 424 | $xml = substr($xml, $position, $length); 425 | 426 | # Remove image tags 427 | $xml = preg_replace("//i", "", $xml); 428 | 429 | # Dictionary cross-reference 430 | $xml = str_replace('dict://key.[$DictID]/', "", $xml); 431 | } else 432 | { 433 | /* 434 | * Replace some of Lingoes's custom markup tags 435 | */ 436 | 437 | # Remove self-closing tags except line break 438 | $xml = preg_replace('/<[^>n]+?\/>/', '', $xml); 439 | 440 | # Text color 441 | //$xml = str_replace('', '', $xml); 443 | 444 | # Dictionary cross-reference 445 | //$xml = str_replace('', '', $xml); 447 | 448 | # Font size 449 | $xml = str_replace('<Ã>', '', $xml); 450 | $xml = str_replace('', '', $xml); 451 | 452 | # Font size 453 | $xml = str_replace('<Å>', '', $xml); 454 | $xml = str_replace('', '', $xml); 455 | 456 | # Bold text 457 | $xml = str_replace('', '', $xml); 458 | $xml = str_replace('', '', $xml); 459 | 460 | # Styling elements 461 | //$xml = str_replace('<Í P="', '', $xml); 466 | $xml = str_replace('', '', $xml); 467 | 468 | # Special text color 469 | $xml = str_replace('', '', $xml); 470 | $xml = str_replace('', '', $xml); 471 | 472 | # Unordered list elements 473 | $xml = preg_replace('/<ï>/', '', $xml, 1); 475 | $xml = str_replace('<ï>', '
  • ', $xml); 476 | $xml = str_replace('', '
  • ', $xml); 477 | 478 | # Italic text 479 | $xml = str_replace('', '', $xml); 480 | $xml = str_replace('', '', $xml); 481 | 482 | # Line break 483 | $xml = str_replace('', '
    ', $xml); 484 | } 485 | 486 | # Escape slashes 487 | $xml = str_replace("\\", "\\\\", $xml); 488 | 489 | fseek($this->inflatedHandle, $offsetWord + $lastWordOffset); 490 | $word = fread($this->inflatedHandle, ($currentWordOffset - $lastWordOffset)); 491 | $word = @mb_convert_encoding($word, "UTF-8", $this->encodingWord); 492 | 493 | fwrite($outputHandle, $word . "\t" . $xml . "\r\n"); 494 | } else 495 | { 496 | break; 497 | } 498 | } 499 | 500 | fclose($this->inflatedHandle); 501 | fclose($this->inputHandle); 502 | @unlink($this->input . ".inflated"); 503 | 504 | $timeEnd = microtime(true); 505 | $this->log("Message: Conversion finished on " . @date(DATE_RFC1123, $timeStart) . 506 | " - Execution time: " . round(($timeEnd - $timeStart), 2) . " (s)"); 507 | return true; 508 | } 509 | } 510 | ?> --------------------------------------------------------------------------------