├── CHANGELOG.md
├── .gitattributes
├── README.md
├── LICENSE
├── converter.php
├── .gitignore
└── LingoesConverter.php
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ChangeLog for Lingoes Converter
2 | ===============================
3 |
4 | Version 0.1 (08-Mar-2013)
5 | -------------------------
6 | * Initial release
7 | * TODO:
8 | - More documentation
9 | - Automatically detect the dictionary encoding
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
4 | # Custom for Visual Studio
5 | *.cs diff=csharp
6 | *.sln merge=union
7 | *.csproj merge=union
8 | *.vbproj merge=union
9 | *.fsproj merge=union
10 | *.dbproj merge=union
11 |
12 | # Standard to msysgit
13 | *.doc diff=astextplain
14 | *.DOC diff=astextplain
15 | *.docx diff=astextplain
16 | *.DOCX diff=astextplain
17 | *.dot diff=astextplain
18 | *.DOT diff=astextplain
19 | *.pdf diff=astextplain
20 | *.PDF diff=astextplain
21 | *.rtf diff=astextplain
22 | *.RTF diff=astextplain
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | **Lingoes Converter**
2 | =================
3 |
4 | Introduction
5 | ------------
6 | Lingoes Converter is a script written in PHP that can convert *.LD2/*.LDX dictionaries of [Lingoes](http://lingoes.net "Lingoes") into human-readable text files. The script is based on Xiaoyun Zhu analysis ([lingoes-extractor](http://code.google.com/p/lingoes-extractor/)) on the LD2/LDX dictionary format .
7 |
8 | Requirements
9 | ------------
10 | * PHP5 or higher
11 | * Multibyte String extension enabled
12 |
13 | Usage
14 | -----
15 |
16 | You can just download a binary distribution for Windows here and run it:
17 |
18 | http://tiny.cc/lingoes-converter
19 |
20 | Or if you are having a running webserver, upload the source and point your browser address to:
21 |
22 | `http://yourwebsite/converter.php?input=path/to/somefile.ld2&encodingWord=UTF-8&encodingDef=UTF-16LE`
23 |
24 | Or if you already have PHP downloaded / installed on your computer, issue this comand and follow the on-screen instruction:
25 |
26 | `php converter.php`
27 |
28 | Currently the class itself can't determine the encoding of the dictionary so let's just try to enter some of the encoding names to see what should work (mostly *UTF-8*, *UTF-16LE* or *UTF-16BE*).
29 |
30 | About and License
31 | -----------------
32 | Copyright (c) 2013, WindyLea. All right reserved. Website : www.windylea.com
33 |
34 | This project is made under BSD license. See LICENSE file for more information.
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013, WindyLea
2 | All rights reserved.
3 |
4 | Redistribution and use in source and binary forms, with or without
5 | modification, are permitted provided that the following conditions are met:
6 | 1. Redistributions of source code must retain the above copyright
7 | notice, this list of conditions and the following disclaimer.
8 | 2. Redistributions in binary form must reproduce the above copyright
9 | notice, this list of conditions and the following disclaimer in the
10 | documentation and/or other materials provided with the distribution.
11 | 3. All advertising materials mentioning features or use of this software
12 | must display the following acknowledgement:
13 | This product includes software developed by WindyLea.
14 | 4. Neither the name of WindyLea nor the
15 | names of its contributors may be used to endorse or promote products
16 | derived from this software without specific prior written permission.
17 |
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
19 | EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
22 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/converter.php:
--------------------------------------------------------------------------------
1 | by WindyLea" . PHP_EOL;
6 | echo "---" . PHP_EOL;
7 |
8 | $input = isset($_SERVER["argv"][1]) ? trim($_SERVER["argv"][1]) : "";
9 | $output = isset($_SERVER["argv"][2]) ? trim($_SERVER["argv"][2]) : "";
10 | if (empty($input))
11 | {
12 | $line = false;
13 | while(!$line)
14 | {
15 | echo "+ Input file: ";
16 | $cmdHandle = fopen("php://stdin", "r");
17 | $line = trim(fgets($cmdHandle));
18 | }
19 | $input = trim($line, '"');
20 | }
21 |
22 | echo "+ Output file (Optional): ";
23 | $line = trim(fgets($cmdHandle));
24 | $output = trim($line, '"');
25 |
26 | echo "+ Entry word encoding (Optional / Default is UTF-8): ";
27 | $line = trim(fgets($cmdHandle));
28 | $encodingWord = trim($line, '"');
29 |
30 | echo "+ Entry definition encoding (Optional / Default is UTF-16LE): ";
31 | $line = trim(fgets($cmdHandle));
32 | $encodingDef = trim($line, '"');
33 |
34 | } else
35 | {
36 | $input = isset($_GET["input"]) ? trim($_GET["input"]) : "";
37 | $output = isset($_GET["output"]) ? trim($_GET["output"]) : "";
38 | $encodingWord = isset($_GET["encodingWord"]) ? trim($_GET["encodingWord"]) : "UTF-8";
39 | $encodingDef = isset($_GET["encodingDef"]) ? trim($_GET["encodingDef"]) : "UTF-16LE";
40 | }
41 |
42 | set_time_limit(0);
43 | ini_set("memory_limit", "128M");
44 | include("LingoesConverter.php");
45 |
46 | echo PHP_EOL . "Converting..." . PHP_EOL;
47 |
48 | $timeStart = microtime(true);
49 | $plc = new LingoesConverter;
50 | $plc->input = $input;
51 | $plc->output = $output;
52 | $plc->encodingDef = $encodingDef;
53 | $plc->encodingWord = $encodingWord;
54 | $convert = $plc->convert();
55 | if (!$convert)
56 | {
57 | $lastMessage = end($plc->logs);
58 | echo "* " . $lastMessage[1] . PHP_EOL;
59 | }
60 |
61 | $timeEnd = microtime(true);
62 |
63 | echo PHP_EOL . "# Execution time: " . round(($timeEnd - $timeStart), 2) . " (s)";
64 | echo PHP_EOL . "# Memory usage: " . (memory_get_usage(true) / 1024) . " KB";
65 | echo PHP_EOL . "# Peak memory usage: " . (memory_get_peak_usage(true) / 1024) . " KB";
66 | ?>
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #################
2 | ## Eclipse
3 | #################
4 |
5 | *.pydevproject
6 | .project
7 | .metadata
8 | bin/
9 | tmp/
10 | *.tmp
11 | *.bak
12 | *.swp
13 | *~.nib
14 | local.properties
15 | .classpath
16 | .settings/
17 | .loadpath
18 |
19 | # External tool builders
20 | .externalToolBuilders/
21 |
22 | # Locally stored "Eclipse launch configurations"
23 | *.launch
24 |
25 | # CDT-specific
26 | .cproject
27 |
28 | # PDT-specific
29 | .buildpath
30 |
31 |
32 | #################
33 | ## Visual Studio
34 | #################
35 |
36 | ## Ignore Visual Studio temporary files, build results, and
37 | ## files generated by popular Visual Studio add-ons.
38 |
39 | # User-specific files
40 | *.suo
41 | *.user
42 | *.sln.docstates
43 |
44 | # Build results
45 | [Dd]ebug/
46 | [Rr]elease/
47 | *_i.c
48 | *_p.c
49 | *.ilk
50 | *.meta
51 | *.obj
52 | *.pch
53 | *.pdb
54 | *.pgc
55 | *.pgd
56 | *.rsp
57 | *.sbr
58 | *.tlb
59 | *.tli
60 | *.tlh
61 | *.tmp
62 | *.vspscc
63 | .builds
64 | *.dotCover
65 |
66 | ## TODO: If you have NuGet Package Restore enabled, uncomment this
67 | #packages/
68 |
69 | # Visual C++ cache files
70 | ipch/
71 | *.aps
72 | *.ncb
73 | *.opensdf
74 | *.sdf
75 |
76 | # Visual Studio profiler
77 | *.psess
78 | *.vsp
79 |
80 | # ReSharper is a .NET coding add-in
81 | _ReSharper*
82 |
83 | # Installshield output folder
84 | [Ee]xpress
85 |
86 | # DocProject is a documentation generator add-in
87 | DocProject/buildhelp/
88 | DocProject/Help/*.HxT
89 | DocProject/Help/*.HxC
90 | DocProject/Help/*.hhc
91 | DocProject/Help/*.hhk
92 | DocProject/Help/*.hhp
93 | DocProject/Help/Html2
94 | DocProject/Help/html
95 |
96 | # Click-Once directory
97 | publish
98 |
99 | # Others
100 | [Bb]in
101 | [Oo]bj
102 | sql
103 | TestResults
104 | *.Cache
105 | ClientBin
106 | stylecop.*
107 | ~$*
108 | *.dbmdl
109 | Generated_Code #added for RIA/Silverlight projects
110 |
111 | # Backup & report files from converting an old project file to a newer
112 | # Visual Studio version. Backup files are not needed, because we have git ;-)
113 | _UpgradeReport_Files/
114 | Backup*/
115 | UpgradeLog*.XML
116 |
117 |
118 |
119 | ############
120 | ## Windows
121 | ############
122 |
123 | # Windows image file caches
124 | Thumbs.db
125 |
126 | # Folder config file
127 | Desktop.ini
128 |
129 |
130 | #############
131 | ## Python
132 | #############
133 |
134 | *.py[co]
135 |
136 | # Packages
137 | *.egg
138 | *.egg-info
139 | dist
140 | build
141 | eggs
142 | parts
143 | bin
144 | var
145 | sdist
146 | develop-eggs
147 | .installed.cfg
148 |
149 | # Installer logs
150 | pip-log.txt
151 |
152 | # Unit test / coverage reports
153 | .coverage
154 | .tox
155 |
156 | #Translations
157 | *.mo
158 |
159 | #Mr Developer
160 | .mr.developer.cfg
161 |
162 | # Mac crap
163 | .DS_Store
164 |
--------------------------------------------------------------------------------
/LingoesConverter.php:
--------------------------------------------------------------------------------
1 |
6 | * @copyright Copyright (c) 2013, WindyLea. All right reserved
7 | * @version 0.1
8 | */
9 | class LingoesConverter
10 | {
11 | /*
12 | * Path to a *.LD2/*.LDX dictionary file
13 | *
14 | * @access public
15 | * @var string
16 | */
17 | public $input;
18 |
19 | /*
20 | * (Optional) Path to the output file to be written to. If not specified,
21 | * this value will be [Input file's name].txt
22 | *
23 | * @access public
24 | * @var string
25 | */
26 | public $output;
27 |
28 | /*
29 | * Log messages
30 | *
31 | * @access public
32 | * @var array
33 | */
34 | public $logs;
35 |
36 | /*
37 | * Encoding for the entry words in the dictionary. Currently the class
38 | * itself can't determine the encoding of the dictionary so this property
39 | * is needed. Default is "UTF-8"
40 | *
41 | * @access public
42 | * @var string
43 | */
44 | public $encodingWord = "UTF-8";
45 |
46 | /*
47 | * Encoding for the entry definitions in the dictionary. Default is "UTF-16LE"
48 | *
49 | * @access public
50 | * @var string
51 | */
52 | public $encodingDef = "UTF-16LE";
53 |
54 | /*
55 | * Input file's properties
56 | *
57 | * @access public
58 | * @var array
59 | */
60 | public $prop = array();
61 |
62 | /*
63 | * File handle for the input file
64 | *
65 | * @access protected
66 | * @var resources
67 | */
68 | protected $inputHandle;
69 |
70 | /*
71 | * File handle for the uncompressed data file
72 | *
73 | * @access protected
74 | * @var resources
75 | */
76 | protected $inflatedHandle;
77 |
78 | /*
79 | * Class destructor
80 | *
81 | * @access public
82 | */
83 | public function __destruct()
84 | {
85 | @fclose($this->inputHandle);
86 | @fclose($this->inflatedHandle);
87 | }
88 |
89 | /*
90 | * Checks if the selected encoding is valid or is supported
91 | *
92 | * @access public
93 | * @author windylea
94 | * @param string $input The encoding name to be checked
95 | * @param string $defaultValue If the input encoding is not found, it will
96 | be replaced by this value
97 | * @return string Returns the correct encoding name
98 | */
99 | public function validateEncoding($input, $defaultValue)
100 | {
101 | if (!empty($input))
102 | {
103 | $encodingList = mb_list_encodings();
104 | $input = trim(strtolower($input));
105 | foreach ($encodingList as $encoding)
106 | {
107 | $test = strtolower($encoding);
108 | if ($test == $input)
109 | {
110 | return $encoding;
111 | }
112 | }
113 | }
114 |
115 | return $defaultValue;
116 | }
117 |
118 | /*
119 | * Writes a message to the log
120 | *
121 | * @access public
122 | * @author windylea
123 | * @param string $message The log message
124 | * @return null
125 | */
126 | public function log($message)
127 | {
128 | $this->logs[] = array(time(), $message);
129 | return null;
130 | }
131 |
132 | /*
133 | * Parses file properties
134 | *
135 | * @access public
136 | * @author windylea
137 | * @return bool Returns TRUE on success, otherwise FALSE if an error occured
138 | */
139 | public function prop()
140 | {
141 | /*
142 | * Prepare the input file and get its information
143 | */
144 | $this->input = realpath($this->input);
145 | if (!file_exists($this->input) || !is_readable($this->input))
146 | {
147 | $this->log("Error: File does not exist or not readable!");
148 | return false;
149 | }
150 |
151 | $this->inputHandle = fopen($this->input, "r");
152 |
153 | /*
154 | * Gets version infomation by reading 2 bytes at offset 0x18 and 2 bytes
155 | * at offset 0x1A as unsigned shorts
156 | */
157 | fseek($this->inputHandle, 0x18);
158 |
159 | $major = current(unpack("S", fread($this->inputHandle, 2)));
160 | $minor = current(unpack("S", fread($this->inputHandle, 2)));
161 |
162 | $this->prop["dictVersion"] = $major . "." . $minor;
163 |
164 | /*
165 | * Gets dictionary ID by reading 16 bytes at offset 0x1C and convert
166 | * them to hex string
167 | */
168 | fseek($this->inputHandle, 0x1C);
169 | $data = fread($this->inputHandle, 16);
170 |
171 | $this->prop["dictId"] = "";
172 | $chars = str_split($data);
173 | foreach($chars as $char)
174 | {
175 | $this->prop["dictId"] .= dechex(ord($char));
176 | }
177 |
178 | /*
179 | * Gets beginning offset for other offset information by reading 4
180 | * bytes at offset 0x5C as an integer and add 0x60 to this value
181 | */
182 | fseek($this->inputHandle, 0x5C);
183 | $data = current(unpack("S", fread($this->inputHandle, 4)));
184 | $this->prop["offsetStart"] = $data + 0x60;
185 |
186 | /*
187 | * Gets dictionary type by reading 4 bytes at the beginning offset as an
188 | * integer
189 | */
190 | fseek($this->inputHandle, $this->prop["offsetStart"]);
191 | $this->prop["dictType"] = current(unpack("S", fread($this->inputHandle, 4)));
192 |
193 | /*
194 | * Gets the end offset of the compressed data
195 | *
196 | * Gets information offset(?). On some dictionaries the beginning offset
197 | * equals to this information offset
198 | */
199 | fseek($this->inputHandle, $this->prop["offsetStart"] + 4);
200 | $data = current(unpack("I", fread($this->inputHandle, 4)));
201 |
202 | $this->prop["offsetInfo"] = $data + $this->prop["offsetStart"] + 0x0C;
203 | if($this->prop["dictType"] == 3)
204 | {
205 | /*
206 | * Just ignore it
207 | */
208 | } elseif(filesize($this->input) > ($this->prop["offsetInfo"] - 0x1C))
209 | {
210 | $this->prop["offsetStart"] = $this->prop["offsetInfo"];
211 | } else
212 | {
213 | $this->log("Error: Unsupported dictionary format");
214 | return false;
215 | }
216 |
217 | fseek($this->inputHandle, $this->prop["offsetStart"] + 4);
218 | $data = current(unpack("I", fread($this->inputHandle, 4)));
219 | $this->prop["offsetCompressedDataEnd"] = $data + $this->prop["offsetStart"] + 0x08;
220 |
221 | /*
222 | * Gets offset for the header of the compressed data
223 | */
224 |
225 | fseek($this->inputHandle, $this->prop["offsetStart"] + 8);
226 | $data = current(unpack("I", fread($this->inputHandle, 4)));
227 | $this->prop["offsetCompressedDataHeader"] = $data + $this->prop["offsetStart"] + 0x1C;
228 |
229 | fseek($this->inputHandle, $this->prop["offsetCompressedDataHeader"] + 0x08);
230 | $this->prop["offsetCompressedDataBegin"] = current(unpack("I", fread($this->inputHandle, 4)));
231 |
232 | /*
233 | * Gets offset of the dictionary words in the inflated file
234 | */
235 | fseek($this->inputHandle, $this->prop["offsetStart"] + 12);
236 | $this->prop["offsetWord"] = current(unpack("I", fread($this->inputHandle, 4)));
237 |
238 | /*
239 | * Gets total length of the words and offset of the dictionary XML
240 | * strings in the inflated file
241 | */
242 | fseek($this->inputHandle, $this->prop["offsetStart"] + 16);
243 | $this->prop["lengthWord"] = current(unpack("I", fread($this->inputHandle, 4)));
244 | $this->prop["offsetXml"] = $this->prop["offsetWord"] + $this->prop["lengthWord"];
245 |
246 | /*
247 | * Gets total length of the XML definitions
248 | */
249 | fseek($this->inputHandle, $this->prop["offsetStart"] + 20);
250 | $this->prop["lengthXml"] = current(unpack("I", fread($this->inputHandle, 4)));
251 |
252 | ksort($this->prop);
253 | return true;
254 | }
255 |
256 | /*
257 | * Decompress gz-compressed data to file
258 | *
259 | * @access public
260 | * @author windylea
261 | * @return bool Returns TRUE on success, otherwise FALSE if an error occured
262 | */
263 | function unpack()
264 | {
265 | if (empty($this->prop))
266 | {
267 | $return = $this->prop();
268 | if (!$return)
269 | {
270 | return false;
271 | }
272 | }
273 |
274 | fseek($this->inputHandle, $this->prop["offsetCompressedDataHeader"] + 0x0C);
275 | $offsetList = array();
276 |
277 | $timeStart = microtime(true);
278 | $this->log("Message: Decompression started on " . @date(DATE_RFC1123, $timeStart));
279 |
280 | while($this->prop["offsetCompressedDataBegin"] + ftell($this->inputHandle)
281 | <= $this->prop["offsetCompressedDataEnd"])
282 | {
283 | $data = fread($this->inputHandle, 4);
284 | if (strlen($data) == 4)
285 | {
286 | $offset = current(unpack("I", $data));
287 | if ($offset > 0)
288 | {
289 | $offsetList[] = $offset;
290 | $startOffset = ftell($this->inputHandle);
291 | } else
292 | {
293 | break;
294 | }
295 | } else
296 | {
297 | break;
298 | }
299 | }
300 |
301 | $lastOffset = 0;
302 | $this->inflatedHandle = fopen($this->input . ".inflated", "w+");
303 |
304 | foreach ($offsetList as $offset)
305 | {
306 | fseek($this->inputHandle, $startOffset + $lastOffset);
307 | $data = fread($this->inputHandle, ($offset - $lastOffset));
308 | $uncompressed = @gzuncompress($data);
309 |
310 | if(!$uncompressed)
311 | {
312 | $this->log("Error: Decompression failed at offset 0x" .
313 | sprintf("%04x", ($startOffset + $lastOffset)) . " (tried to" .
314 | " uncompress " . ($offset - $lastOffset) . " bytes of data)");
315 | return false;
316 | } else
317 | {
318 | fwrite($this->inflatedHandle, $uncompressed);
319 | }
320 |
321 | $lastOffset = $offset;
322 | }
323 |
324 | $timeEnd = microtime(true);
325 | $this->log("Message: Decompression finished on " . @date(DATE_RFC1123, $timeStart) .
326 | " - Execution time: " . round(($timeEnd - $timeStart), 2) . " (s)");
327 |
328 | return true;
329 | }
330 |
331 | /*
332 | * Convert the uncompressed data stream to human-readable format
333 | *
334 | * @access public
335 | * @author windylea
336 | * @return bool Returns TRUE on success, otherwise FALSE if an error occured
337 | */
338 | function convert()
339 | {
340 | if (!$this->inflatedHandle)
341 | {
342 | $return = $this->unpack();
343 | if (!$return)
344 | {
345 | return false;
346 | }
347 | }
348 |
349 | if (empty($this->output))
350 | {
351 | $slashes = (strtoupper(substr(PHP_OS, 0, 3)) === "WIN") ? "\\" : "/";
352 | $pathInfo = pathinfo($this->input);
353 | $this->output = $pathInfo["dirname"] . $slashes . $pathInfo["filename"] . ".txt";
354 | }
355 |
356 | $this->encodingWord = self::validateEncoding($this->encodingWord, "UTF-8");
357 | $this->encodingDef = self::validateEncoding($this->encodingDef, "UTF-16LE");
358 |
359 | $timeStart = microtime(true);
360 | $this->log("Message: Conversion started on " . @date(DATE_RFC1123, $timeStart));
361 | $outputHandle = fopen($this->output, "w+");
362 |
363 | $dataLength = 10;
364 | $offsetWord = $this->prop["offsetWord"];
365 | $offsetXml = $this->prop["offsetXml"];
366 | $totalEntries = ($offsetWord / $dataLength) - 1;
367 |
368 | for ($i = 0; $i < $totalEntries; $i++)
369 | {
370 | fseek($this->inflatedHandle, $dataLength * $i);
371 | $lastWordOffset = fread($this->inflatedHandle, 4);
372 |
373 | if (strlen($lastWordOffset) == 4)
374 | {
375 | $lastWordOffset = current(unpack("I", $lastWordOffset));
376 | $lastXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4)));
377 | $flags = ord(fread($this->inflatedHandle, 1)) & 0xff;
378 | $crossRefs = ord(fread($this->inflatedHandle, 1)) & 0xff;
379 | $currentWordOffset = current(unpack("I", fread($this->inflatedHandle, 4)));
380 | $currentXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4)));
381 |
382 | if ($currentXmlOffset - $lastXmlOffset > 0)
383 | {
384 | fseek($this->inflatedHandle, $offsetXml + $lastXmlOffset);
385 | $xml = fread($this->inflatedHandle, ($currentXmlOffset - $lastXmlOffset));
386 | } else
387 | {
388 | $xml = "";
389 | }
390 |
391 | for($j = $crossRefs; $j > 0; $j--)
392 | {
393 | fseek($this->inflatedHandle, $offsetWord + $lastWordOffset);
394 | $currentRef = current(unpack("I", fread($this->inflatedHandle, 4)));
395 |
396 | fseek($this->inflatedHandle, $dataLength * $currentRef);
397 | fseek($this->inflatedHandle, 4, SEEK_CUR);
398 | $lastXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4)));
399 |
400 | fseek($this->inflatedHandle, 6, SEEK_CUR);
401 | $currentXmlOffset = current(unpack("I", fread($this->inflatedHandle, 4)));
402 |
403 | fseek($this->inflatedHandle,$offsetXml + $lastXmlOffset);
404 | $xml .= fread($this->inflatedHandle, ($currentXmlOffset - $lastXmlOffset));
405 |
406 | $lastWordOffset += 4;
407 | }
408 |
409 | $xml = @mb_convert_encoding($xml, "UTF-8", $this->encodingDef);
410 | if($currentWordOffset - $lastWordOffset <= 0)
411 | {
412 | continue;
413 | }
414 |
415 | $leftPosition = strpos($xml, "");
417 |
418 | if (strpos($xml, " $leftPosition)
423 | ? $leftPosition : $rightPosition;
424 | $xml = substr($xml, $position, $length);
425 |
426 | # Remove image tags
427 | $xml = preg_replace("/
/i", "", $xml);
428 |
429 | # Dictionary cross-reference
430 | $xml = str_replace('dict://key.[$DictID]/', "", $xml);
431 | } else
432 | {
433 | /*
434 | * Replace some of Lingoes's custom markup tags
435 | */
436 |
437 | # Remove self-closing tags except line break
438 | $xml = preg_replace('/<[^>n]+?\/>/', '', $xml);
439 |
440 | # Text color
441 | //$xml = str_replace('', '', $xml);
443 |
444 | # Dictionary cross-reference
445 | //$xml = str_replace('', '', $xml);
447 |
448 | # Font size
449 | $xml = str_replace('<Ã>', '', $xml);
450 | $xml = str_replace('Ã>', '', $xml);
451 |
452 | # Font size
453 | $xml = str_replace('<Å>', '', $xml);
454 | $xml = str_replace('Å>', '', $xml);
455 |
456 | # Bold text
457 | $xml = str_replace('', '', $xml);
458 | $xml = str_replace('', '', $xml);
459 |
460 | # Styling elements
461 | //$xml = str_replace('<Í P="', '', $xml);
466 | $xml = str_replace('', '', $xml);
467 |
468 | # Special text color
469 | $xml = str_replace('', '', $xml);
470 | $xml = str_replace('', '', $xml);
471 |
472 | # Unordered list elements
473 | $xml = preg_replace('/<ï>/', '- ', $xml, 1);
474 | $xml = preg_replace('/<\/ï>(?!.*<\/ï>)/', '
', $xml, 1);
475 | $xml = str_replace('<ï>', '', $xml);
476 | $xml = str_replace('ï>', '', $xml);
477 |
478 | # Italic text
479 | $xml = str_replace('', '', $xml);
480 | $xml = str_replace('', '', $xml);
481 |
482 | # Line break
483 | $xml = str_replace('', '
', $xml);
484 | }
485 |
486 | # Escape slashes
487 | $xml = str_replace("\\", "\\\\", $xml);
488 |
489 | fseek($this->inflatedHandle, $offsetWord + $lastWordOffset);
490 | $word = fread($this->inflatedHandle, ($currentWordOffset - $lastWordOffset));
491 | $word = @mb_convert_encoding($word, "UTF-8", $this->encodingWord);
492 |
493 | fwrite($outputHandle, $word . "\t" . $xml . "\r\n");
494 | } else
495 | {
496 | break;
497 | }
498 | }
499 |
500 | fclose($this->inflatedHandle);
501 | fclose($this->inputHandle);
502 | @unlink($this->input . ".inflated");
503 |
504 | $timeEnd = microtime(true);
505 | $this->log("Message: Conversion finished on " . @date(DATE_RFC1123, $timeStart) .
506 | " - Execution time: " . round(($timeEnd - $timeStart), 2) . " (s)");
507 | return true;
508 | }
509 | }
510 | ?>
--------------------------------------------------------------------------------