├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── analysis_options.yaml ├── bin └── main.dart ├── lib ├── mdict_reader.dart └── src │ ├── input_stream.dart │ └── mdict_reader_base.dart └── pubspec.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | # Files and directories created by pub 2 | .dart_tool/ 3 | .packages 4 | # Remove the following pattern if you wish to check in your lock file 5 | pubspec.lock 6 | 7 | # Conventional directory for build outputs 8 | build/ 9 | 10 | # Directory created by dartdoc 11 | doc/api/ 12 | 13 | .idea 14 | *.iml 15 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## 0.0.1 2 | 3 | - Initial version. 4 | 5 | ## 1.0.0 6 | 7 | - Dart 2.12 requirement and null safety. 8 | 9 | ## 1.1.0 10 | 11 | - Support mdict files created with engine 1.2. 12 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Qingshan 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Mdict Reader 2 | ============ 3 | 4 | A dart library for reading mdict files. support MDX/MDD file formats. 5 | 6 | [![Pub Package](https://img.shields.io/pub/v/mdict_reader.svg)](https://pub.dev/packages/mdict_reader) 7 | 8 | Tutorial 9 | -------- 10 | 11 | ### Using the API 12 | 13 | Import the package: 14 | 15 | ```dart 16 | import 'package:mdict_reader/mdict_reader.dart'; 17 | ``` 18 | 19 | And call some code: 20 | 21 | ```dart 22 | var mdict = MdictReader('example.mdx'); 23 | var record = mdict.query('hello'); 24 | stdout.write(record); 25 | ``` 26 | 27 | ### Using the command-line 28 | 29 | Read defintion from MDX file: 30 | 31 | ```shell 32 | dart bin/main.dart defintion [mdx_file] [query_word] 33 | ``` 34 | 35 | Parse sounds URLs from MDX file: 36 | 37 | ```shell 38 | dart bin/main.dart sounds [mdx_file] [query_word] 39 | ``` 40 | 41 | Read data from MDD file (directly output binary to stdout): 42 | 43 | ```shell 44 | dart bin/main.dart read [mdd_file] [sound_url] 45 | ``` 46 | 47 | Misc 48 | ---- 49 | 50 | ### Acknowledge 51 | 52 | This project was initially converted from [mdict analysis](https://bitbucket.org/xwang/mdict-analysis). 53 | 54 | ### License 55 | 56 | The MIT License, see [LICENSE](https://github.com/qingshan/mdict_reader/raw/main/LICENSE). 57 | -------------------------------------------------------------------------------- /analysis_options.yaml: -------------------------------------------------------------------------------- 1 | # This file configures the static analysis results for your project (errors, 2 | # warnings, and lints). 3 | # 4 | # This enables the 'recommended' set of lints from `package:lints`. 5 | # This set helps identify many issues that may lead to problems when running 6 | # or consuming Dart code, and enforces writing Dart using a single, idiomatic 7 | # style and format. 8 | # 9 | # If you want a smaller set of lints you can change this to specify 10 | # 'package:lints/core.yaml'. These are just the most critical lints 11 | # (the recommended set includes the core lints). 12 | # The core lints are also what is used by pub.dev for scoring packages. 13 | 14 | include: package:lints/recommended.yaml 15 | 16 | # Uncomment the following section to specify additional rules. 17 | 18 | # linter: 19 | # rules: 20 | # - camel_case_types 21 | 22 | # analyzer: 23 | # exclude: 24 | # - path/to/excluded/files/** 25 | 26 | # For more information about the core and recommended set of lints, see 27 | # https://dart.dev/go/core-lints 28 | 29 | # For additional information about configuring this file, see 30 | # https://dart.dev/guides/language/analysis-options 31 | -------------------------------------------------------------------------------- /bin/main.dart: -------------------------------------------------------------------------------- 1 | import 'dart:io'; 2 | import 'package:args/args.dart'; 3 | import 'package:mdict_reader/mdict_reader.dart'; 4 | 5 | void main(List args) { 6 | var parser = ArgParser(); 7 | var results = parser.parse(args); 8 | var command = results.rest[0]; 9 | var path = results.rest[1]; 10 | var words = results.rest.sublist(2); 11 | var mdict = MdictReader(path); 12 | if (words.isEmpty) { 13 | words = mdict.keys(); 14 | } else { 15 | words = words.expand((word) { 16 | if (word.endsWith('.txt')) { 17 | return File(word).readAsLinesSync(); 18 | } 19 | return [word]; 20 | }).toList(); 21 | } 22 | if ('words' == command) { 23 | print(words.join("\n")); 24 | } else if ('sounds' == command) { 25 | words.where((word) => word.isNotEmpty).forEach((word) { 26 | var record = mdict.query(word); 27 | var sounds = parseSounds(record); 28 | sounds.forEach((sound) { 29 | print("$word\t$sound"); 30 | }); 31 | }); 32 | } else if ('export' == command) { 33 | words.where((word) => word.isNotEmpty).forEach((word) { 34 | var file; 35 | if (word.startsWith('/')) { 36 | file = File(word.substring(1)); 37 | } else if (word.contains('\t')) { 38 | var parts = word.split('\t'); 39 | file = File(parts[0]); 40 | word = parts[1]; 41 | } else { 42 | file = File(word + '.html'); 43 | } 44 | word = word.replaceAll('/', '\\'); 45 | if (file.existsSync()) { 46 | return; 47 | } 48 | file.createSync(recursive: true); 49 | var out = file.openWrite(); 50 | var record = mdict.query(word); 51 | if (record is String) { 52 | out.write(record); 53 | } else if (record != null) { 54 | out.add(record); 55 | } 56 | return out.close(); 57 | }); 58 | } else { 59 | words.where((word) => word.isNotEmpty).forEach((word) { 60 | var record = mdict.query(word); 61 | if (record is String) { 62 | stdout.write(record); 63 | } else { 64 | stdout.add(record); 65 | } 66 | }); 67 | } 68 | } 69 | 70 | List parseSounds(String html) { 71 | var re = RegExp(' href="sound:/(\\S+)"'); 72 | var sounds = re 73 | .allMatches(html) 74 | .map((match) => match.group(1)) 75 | .map((sound) => sound?.replaceAll('/', '\\')) 76 | .whereType() 77 | .toList(); 78 | return sounds; 79 | } 80 | -------------------------------------------------------------------------------- /lib/mdict_reader.dart: -------------------------------------------------------------------------------- 1 | /// Support for reading mdict file. 2 | /// 3 | /// Reader for mdict file 4 | library mdict_reader; 5 | 6 | export 'src/mdict_reader_base.dart'; 7 | -------------------------------------------------------------------------------- /lib/src/input_stream.dart: -------------------------------------------------------------------------------- 1 | import 'dart:convert'; 2 | import 'dart:io'; 3 | import 'dart:typed_data'; 4 | 5 | enum ByteOrder { 6 | littleEndian, 7 | bigEndian, 8 | } 9 | 10 | abstract class InputStream { 11 | /// The current read position relative to the start of the buffer. 12 | int get position; 13 | 14 | /// How many bytes are left in the stream. 15 | int get length; 16 | 17 | /// Is the current position at the end of the stream? 18 | bool get isEOS; 19 | 20 | /// Reset to the beginning of the stream. 21 | void reset(); 22 | 23 | /// Rewind the read head of the stream by the given number of bytes. 24 | void rewind([int length = 1]); 25 | 26 | /// Move the read position by [length] bytes. 27 | void skip(int length); 28 | 29 | /// Read a single byte. 30 | int readByte(); 31 | 32 | /// Read [length] bytes from the stream. 33 | Uint8List readBytes(int length); 34 | 35 | /// Read a null-terminated string, or if [len] is provided, that number of 36 | /// bytes returned as a string. 37 | String readString({int length, bool utf8}); 38 | 39 | /// Read a 16-bit word from the stream. 40 | int readUint16(); 41 | 42 | /// Read a 32-bit word from the stream. 43 | int readUint32(); 44 | 45 | /// Read a 64-bit word form the stream. 46 | int readUint64(); 47 | 48 | Uint8List toUint8List(); 49 | } 50 | 51 | /// A buffer that can be read as a stream of bytes 52 | class BytesInputStream extends InputStream { 53 | Uint8List _buffer; 54 | int _offset; 55 | int _start; 56 | ByteOrder byteOrder; 57 | late int _length; 58 | 59 | /// Create a InputStream for reading from a List 60 | BytesInputStream(Uint8List buffer, 61 | {this.byteOrder = ByteOrder.bigEndian, int start = 0, int? length}) 62 | : _buffer = buffer, 63 | _start = start, 64 | _offset = start, 65 | _length = length ?? buffer.length; 66 | 67 | /// The current read position relative to the start of the buffer. 68 | @override 69 | int get position => _offset - _start; 70 | 71 | /// How many bytes are left in the stream. 72 | @override 73 | int get length => _length - (_offset - _start); 74 | 75 | /// Is the current position at the end of the stream? 76 | @override 77 | bool get isEOS => _offset >= (_start + _length); 78 | 79 | /// Reset to the beginning of the stream. 80 | @override 81 | void reset() { 82 | _offset = _start; 83 | } 84 | 85 | /// Rewind the read head of the stream by the given number of bytes. 86 | @override 87 | void rewind([int length = 1]) { 88 | _offset -= length; 89 | if (_offset < 0) { 90 | _offset = 0; 91 | } 92 | } 93 | 94 | /// Access the buffer relative from the current position. 95 | int operator [](int index) => _buffer[_offset + index]; 96 | 97 | /// Return a InputStream to read a subset of this stream. It does not 98 | /// move the read position of this stream. [position] is specified relative 99 | /// to the start of the buffer. If [position] is not specified, the current 100 | /// read position is used. If [length] is not specified, the remainder of this 101 | /// stream is used. 102 | InputStream subset([int? position, int? length]) { 103 | if (position == null) { 104 | position = _offset; 105 | } else { 106 | position += _start; 107 | } 108 | 109 | if (length == null || length < 0) { 110 | length = _length - (position - _start); 111 | } 112 | 113 | return BytesInputStream(_buffer, 114 | byteOrder: byteOrder, start: position, length: length); 115 | } 116 | 117 | /// Returns the position of the given [value] within the buffer, starting 118 | /// from the current read position with the given [offset]. The position 119 | /// returned is relative to the start of the buffer, or -1 if the [value] 120 | /// was not found. 121 | int indexOf(int value, [int offset = 0]) { 122 | for (var i = _offset + offset, end = _offset + length; 123 | i < end; 124 | ++i) { 125 | if (_buffer[i] == value) { 126 | return i - _start; 127 | } 128 | } 129 | return -1; 130 | } 131 | 132 | /// Move the read position by [length] bytes. 133 | @override 134 | void skip(int length) { 135 | _offset += length; 136 | } 137 | 138 | /// Read a single byte. 139 | @override 140 | int readByte() { 141 | return _buffer[_offset++]; 142 | } 143 | 144 | /// Read [length] bytes from the stream. 145 | @override 146 | Uint8List readBytes(int length) { 147 | final bytes = subset(_offset - _start, length); 148 | _offset += bytes.length; 149 | return bytes.toUint8List(); 150 | } 151 | 152 | /// Read a null-terminated string, or if [len] is provided, that number of 153 | /// bytes returned as a string. 154 | @override 155 | String readString({int length = -1, bool utf8 = true}) { 156 | final codes = []; 157 | if (length == -1) { 158 | while (!isEOS) { 159 | var c = readByte(); 160 | if (!utf8) { 161 | var c2 = readByte(); 162 | c = (c2 << 8) | c; 163 | } 164 | if (c == 0) { 165 | break; 166 | } 167 | codes.add(c); 168 | } 169 | } else { 170 | while (length > 0) { 171 | var c = readByte(); 172 | if (!utf8) { 173 | var c2 = readByte(); 174 | c = (c2 << 8) | c; 175 | } 176 | length--; 177 | if (c == 0) { 178 | break; 179 | } 180 | codes.add(c); 181 | } 182 | } 183 | 184 | return utf8 ? Utf8Decoder().convert(codes) : String.fromCharCodes(codes); 185 | } 186 | 187 | /// Read a 16-bit word from the stream. 188 | @override 189 | int readUint16() { 190 | final b1 = _buffer[_offset++] & 0xff; 191 | final b2 = _buffer[_offset++] & 0xff; 192 | if (byteOrder == ByteOrder.bigEndian) { 193 | return (b1 << 8) | b2; 194 | } 195 | return (b2 << 8) | b1; 196 | } 197 | 198 | /// Read a 32-bit word from the stream. 199 | @override 200 | int readUint32() { 201 | final b1 = _buffer[_offset++] & 0xff; 202 | final b2 = _buffer[_offset++] & 0xff; 203 | final b3 = _buffer[_offset++] & 0xff; 204 | final b4 = _buffer[_offset++] & 0xff; 205 | if (byteOrder == ByteOrder.bigEndian) { 206 | return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4; 207 | } 208 | return (b4 << 24) | (b3 << 16) | (b2 << 8) | b1; 209 | } 210 | 211 | /// Read a 64-bit word form the stream. 212 | @override 213 | int readUint64() { 214 | final b1 = _buffer[_offset++] & 0xff; 215 | final b2 = _buffer[_offset++] & 0xff; 216 | final b3 = _buffer[_offset++] & 0xff; 217 | final b4 = _buffer[_offset++] & 0xff; 218 | final b5 = _buffer[_offset++] & 0xff; 219 | final b6 = _buffer[_offset++] & 0xff; 220 | final b7 = _buffer[_offset++] & 0xff; 221 | final b8 = _buffer[_offset++] & 0xff; 222 | if (byteOrder == ByteOrder.bigEndian) { 223 | return (b1 << 56) | 224 | (b2 << 48) | 225 | (b3 << 40) | 226 | (b4 << 32) | 227 | (b5 << 24) | 228 | (b6 << 16) | 229 | (b7 << 8) | 230 | b8; 231 | } 232 | return (b8 << 56) | 233 | (b7 << 48) | 234 | (b6 << 40) | 235 | (b5 << 32) | 236 | (b4 << 24) | 237 | (b3 << 16) | 238 | (b2 << 8) | 239 | b1; 240 | } 241 | 242 | @override 243 | Uint8List toUint8List() { 244 | var len = length; 245 | if ((_offset + len) > _buffer.length) { 246 | len = _buffer.length - _offset; 247 | } 248 | final bytes = 249 | Uint8List.view(_buffer.buffer, _buffer.offsetInBytes + _offset, len); 250 | return bytes; 251 | } 252 | 253 | } 254 | 255 | class FileInputStream extends InputStream { 256 | static const int _kDefaultBufferSize = 4096; 257 | final String path; 258 | final ByteOrder byteOrder; 259 | late final RandomAccessFile _file; 260 | late final int _fileSize; 261 | int _filePosition = 0; 262 | final Uint8List _buffer; 263 | int _bufferSize = 0; 264 | int _bufferPosition = 0; 265 | 266 | FileInputStream(this.path, 267 | {this.byteOrder = ByteOrder.bigEndian, 268 | int bufferSize = _kDefaultBufferSize}): 269 | _buffer = Uint8List(bufferSize) { 270 | _file = File(path).openSync(); 271 | _fileSize = _file.lengthSync(); 272 | _readBuffer(); 273 | } 274 | 275 | void close() { 276 | _file.closeSync(); 277 | } 278 | 279 | @override 280 | int get length => _fileSize; 281 | 282 | @override 283 | int get position => _filePosition - bufferRemaining; 284 | 285 | @override 286 | bool get isEOS => 287 | (_filePosition >= _fileSize) && (_bufferPosition >= _bufferSize); 288 | 289 | int get bufferSize => _bufferSize; 290 | 291 | int get bufferPosition => _bufferPosition; 292 | 293 | int get bufferRemaining => _bufferSize - _bufferPosition; 294 | 295 | int get fileRemaining => _fileSize - _filePosition; 296 | 297 | @override 298 | void reset() { 299 | _filePosition = 0; 300 | _file.setPositionSync(0); 301 | _readBuffer(); 302 | } 303 | 304 | @override 305 | void skip(int length) { 306 | if ((_bufferPosition + length) < _bufferSize) { 307 | _bufferPosition += length; 308 | } else { 309 | var remaining = length - (_bufferSize - _bufferPosition); 310 | while (!isEOS) { 311 | _readBuffer(); 312 | if (remaining < _bufferSize) { 313 | _bufferPosition += remaining; 314 | break; 315 | } 316 | remaining -= _bufferSize; 317 | } 318 | } 319 | } 320 | 321 | @override 322 | void rewind([int length = 1]) { 323 | if (_bufferPosition - length < 0) { 324 | var remaining = (_bufferPosition - length).abs(); 325 | _filePosition = _filePosition - _bufferSize - remaining; 326 | if (_filePosition < 0) { 327 | _filePosition = 0; 328 | } 329 | _file.setPositionSync(_filePosition); 330 | _readBuffer(); 331 | return; 332 | } 333 | _bufferPosition -= length; 334 | } 335 | 336 | @override 337 | int readByte() { 338 | if (isEOS) { 339 | return 0; 340 | } 341 | if (_bufferPosition >= _bufferSize) { 342 | _readBuffer(); 343 | } 344 | if (_bufferPosition >= _bufferSize) { 345 | return 0; 346 | } 347 | return _buffer[_bufferPosition++] & 0xff; 348 | } 349 | 350 | /// Read a 16-bit word from the stream. 351 | @override 352 | int readUint16() { 353 | var b1 = 0; 354 | var b2 = 0; 355 | if ((_bufferPosition + 2) < _bufferSize) { 356 | b1 = _buffer[_bufferPosition++] & 0xff; 357 | b2 = _buffer[_bufferPosition++] & 0xff; 358 | } else { 359 | b1 = readByte(); 360 | b2 = readByte(); 361 | } 362 | if (byteOrder == ByteOrder.bigEndian) { 363 | return (b1 << 8) | b2; 364 | } 365 | return (b2 << 8) | b1; 366 | } 367 | 368 | /// Read a 32-bit word from the stream. 369 | @override 370 | int readUint32() { 371 | var b1 = 0; 372 | var b2 = 0; 373 | var b3 = 0; 374 | var b4 = 0; 375 | if ((_bufferPosition + 4) < _bufferSize) { 376 | b1 = _buffer[_bufferPosition++] & 0xff; 377 | b2 = _buffer[_bufferPosition++] & 0xff; 378 | b3 = _buffer[_bufferPosition++] & 0xff; 379 | b4 = _buffer[_bufferPosition++] & 0xff; 380 | } else { 381 | b1 = readByte(); 382 | b2 = readByte(); 383 | b3 = readByte(); 384 | b4 = readByte(); 385 | } 386 | 387 | if (byteOrder == ByteOrder.bigEndian) { 388 | return (b1 << 24) | (b2 << 16) | (b3 << 8) | b4; 389 | } 390 | return (b4 << 24) | (b3 << 16) | (b2 << 8) | b1; 391 | } 392 | 393 | /// Read a 64-bit word form the stream. 394 | @override 395 | int readUint64() { 396 | var b1 = 0; 397 | var b2 = 0; 398 | var b3 = 0; 399 | var b4 = 0; 400 | var b5 = 0; 401 | var b6 = 0; 402 | var b7 = 0; 403 | var b8 = 0; 404 | if ((_bufferPosition + 8) < _bufferSize) { 405 | b1 = _buffer[_bufferPosition++] & 0xff; 406 | b2 = _buffer[_bufferPosition++] & 0xff; 407 | b3 = _buffer[_bufferPosition++] & 0xff; 408 | b4 = _buffer[_bufferPosition++] & 0xff; 409 | b5 = _buffer[_bufferPosition++] & 0xff; 410 | b6 = _buffer[_bufferPosition++] & 0xff; 411 | b7 = _buffer[_bufferPosition++] & 0xff; 412 | b8 = _buffer[_bufferPosition++] & 0xff; 413 | } else { 414 | b1 = readByte(); 415 | b2 = readByte(); 416 | b3 = readByte(); 417 | b4 = readByte(); 418 | b5 = readByte(); 419 | b6 = readByte(); 420 | b7 = readByte(); 421 | b8 = readByte(); 422 | } 423 | 424 | if (byteOrder == ByteOrder.bigEndian) { 425 | return (b1 << 56) | 426 | (b2 << 48) | 427 | (b3 << 40) | 428 | (b4 << 32) | 429 | (b5 << 24) | 430 | (b6 << 16) | 431 | (b7 << 8) | 432 | b8; 433 | } 434 | return (b8 << 56) | 435 | (b7 << 48) | 436 | (b6 << 40) | 437 | (b5 << 32) | 438 | (b4 << 24) | 439 | (b3 << 16) | 440 | (b2 << 8) | 441 | b1; 442 | } 443 | 444 | @override 445 | Uint8List readBytes(int length) { 446 | if (isEOS) { 447 | return Uint8List.fromList([]); 448 | } 449 | 450 | if (_bufferPosition == _bufferSize) { 451 | _readBuffer(); 452 | } 453 | 454 | if (_remainingBufferSize >= length) { 455 | final bytes = _buffer.sublist(_bufferPosition, _bufferPosition + length); 456 | _bufferPosition += length; 457 | return bytes; 458 | } 459 | 460 | var totalRemaining = fileRemaining + _remainingBufferSize; 461 | if (length > totalRemaining) { 462 | length = totalRemaining; 463 | } 464 | 465 | final bytes = Uint8List(length); 466 | 467 | var offset = 0; 468 | while (length > 0) { 469 | var remaining = _bufferSize - _bufferPosition; 470 | var end = (length > remaining) ? _bufferSize : (_bufferPosition + length); 471 | final l = _buffer.sublist(_bufferPosition, end); 472 | // TODO probably better to use bytes.setRange here. 473 | for (var i = 0; i < l.length; ++i) { 474 | bytes[offset + i] = l[i]; 475 | } 476 | offset += l.length; 477 | length -= l.length; 478 | _bufferPosition = end; 479 | if (length > 0 && _bufferPosition == _bufferSize) { 480 | _readBuffer(); 481 | if (_bufferSize == 0) { 482 | break; 483 | } 484 | } 485 | } 486 | 487 | return bytes; 488 | } 489 | 490 | @override 491 | Uint8List toUint8List() { 492 | return readBytes(_fileSize); 493 | } 494 | 495 | /// Read a null-terminated string, or if [length] is provided, that number of 496 | /// bytes returned as a string. 497 | @override 498 | String readString({int length = -1, bool utf8 = true}) { 499 | final codes = []; 500 | if (length == -1) { 501 | while (!isEOS) { 502 | var c = readByte(); 503 | if (!utf8) { 504 | var c2 = readByte(); 505 | c = (c2 << 8) | c; 506 | } 507 | if (c == 0) { 508 | break; 509 | } 510 | codes.add(c); 511 | } 512 | } else { 513 | while (length > 0) { 514 | var c = readByte(); 515 | if (!utf8) { 516 | var c2 = readByte(); 517 | c = (c2 << 8) | c; 518 | } 519 | length--; 520 | if (c == 0) { 521 | break; 522 | } 523 | codes.add(c); 524 | } 525 | } 526 | 527 | return utf8 ? Utf8Decoder().convert(codes) : String.fromCharCodes(codes); 528 | } 529 | 530 | int get _remainingBufferSize => _bufferSize - _bufferPosition; 531 | 532 | void _readBuffer() { 533 | _bufferPosition = 0; 534 | _bufferSize = _file.readIntoSync(_buffer); 535 | if (_bufferSize == 0) { 536 | return; 537 | } 538 | _filePosition += _bufferSize; 539 | } 540 | } 541 | -------------------------------------------------------------------------------- /lib/src/mdict_reader_base.dart: -------------------------------------------------------------------------------- 1 | import 'dart:io'; 2 | import 'dart:typed_data'; 3 | import "package:pointycastle/pointycastle.dart"; 4 | import 'package:xml/xml.dart'; 5 | import 'input_stream.dart'; 6 | 7 | class Key { 8 | String key; 9 | int offset; 10 | int length; 11 | Key(this.key, this.offset, [this.length = -1]); 12 | } 13 | 14 | class Record { 15 | int compSize; 16 | int decompSize; 17 | Record(this.compSize, this.decompSize); 18 | } 19 | 20 | class MdictReader { 21 | String path; 22 | late final Map _header; 23 | late final double _version; 24 | late final int _numberWidth; 25 | late final List _keyList; 26 | late final List _recordList; 27 | late final int _recordBlockOffset; 28 | 29 | MdictReader(this.path) { 30 | var fin = FileInputStream(path, bufferSize: 64 * 1024); 31 | _header = _readHeader(fin); 32 | _version = double.parse(_header['GeneratedByEngineVersion']!); 33 | _numberWidth = _version >= 2.0 ? 8 : 4; 34 | _keyList = _read_keys(fin); 35 | _recordList = _readRecords(fin); 36 | _recordBlockOffset = fin.position; 37 | fin.close(); 38 | } 39 | 40 | List keys() { 41 | return _keyList.map((key) => key.key).toList(); 42 | } 43 | 44 | dynamic query(String word) { 45 | var mdd = path.endsWith('.mdd'); 46 | var keys = _keyList.where((key) => key.key == word).toList(); 47 | var records = keys 48 | .map((key) => _readRecord(key.key, key.offset, key.length, mdd)) 49 | .toList(); 50 | if (mdd) { 51 | if (records.length == 0) { 52 | return null; 53 | } 54 | return records[0]; 55 | } 56 | return records.join('\n---\n'); 57 | } 58 | 59 | Map _readHeader(FileInputStream fin) { 60 | var headerLength = fin.readUint32(); 61 | var header = fin.readString(length: headerLength, utf8: false); 62 | fin.skip(4); 63 | return _parseHeader(header); 64 | } 65 | 66 | Map _parseHeader(String header) { 67 | var attributes = {}; 68 | var doc = XmlDocument.parse(header); 69 | for (var a in doc.rootElement.attributes) { 70 | attributes[a.name.local] = a.value; 71 | } 72 | return attributes; 73 | } 74 | 75 | List _read_keys(FileInputStream fin) { 76 | var encrypted = _header['Encrypted'] == '2'; 77 | var encrypted_value = _header['Encrypted']; 78 | var utf8 = _header['Encoding'] == 'UTF-8'; 79 | var keyNumBlocks = _readNumber(fin); 80 | var keyNumEntries = _readNumber(fin); 81 | if (_version >= 2.0) { 82 | _readNumber(fin); 83 | } 84 | var keyIndexCompLen = _readNumber(fin); 85 | var keyBlocksLen = _readNumber(fin); 86 | if (_version >= 2.0) { 87 | fin.skip(4); 88 | } 89 | var compSize = List.filled(keyNumBlocks, 0); 90 | var decompSize = List.filled(keyNumBlocks, 0); 91 | var numEntries = List.filled(keyNumBlocks, 0); 92 | var indexCompBlock = fin.readBytes(keyIndexCompLen); 93 | if (encrypted) { 94 | var key = _computeKey(indexCompBlock); 95 | _decryptBlock(key, indexCompBlock, 8); 96 | } 97 | var indexDs = _version >= 2.0 ? _decompressBlock(indexCompBlock) 98 | : BytesInputStream(indexCompBlock); 99 | for (var i = 0; i < keyNumBlocks; i++) { 100 | numEntries[i] = _readNumber(indexDs); 101 | var firstWordSize = _readShort(indexDs); 102 | var firstWord = indexDs.readString(length: firstWordSize, utf8: utf8); 103 | var lastWordSize = _readShort(indexDs); 104 | var lastWord = indexDs.readString(length: lastWordSize, utf8: utf8); 105 | print("first: size=$firstWordSize word=$firstWord last: size=$lastWordSize word=$lastWord"); 106 | compSize[i] = _readNumber(indexDs); 107 | decompSize[i] = _readNumber(indexDs); 108 | } 109 | var keyList = []; 110 | for (var i = 0; i < keyNumBlocks; i++) { 111 | var keyCompBlock = fin.readBytes(compSize[i]); 112 | var blockIn = _decompressBlock(keyCompBlock); 113 | for (var j = 0; j < numEntries[i]; j++) { 114 | var offset = _readNumber(blockIn); 115 | var word = blockIn.readString(utf8: utf8); 116 | if (keyList.isNotEmpty) { 117 | keyList[keyList.length - 1].length = 118 | offset - keyList[keyList.length - 1].offset; 119 | } 120 | keyList.add(Key(word, offset)); 121 | } 122 | break; 123 | } 124 | return keyList; 125 | } 126 | 127 | List _readRecords(FileInputStream fin) { 128 | var recordNumBlocks = _readNumber(fin); 129 | var recordNumEntries = _readNumber(fin); 130 | var recordIndexLen = _readNumber(fin); 131 | var recordBlocksLen = _readNumber(fin); 132 | var recordList = []; 133 | for (var i = 0; i < recordNumBlocks; i++) { 134 | var recordBlockCompSize = _readNumber(fin); 135 | var recordBlockDecompSize = _readNumber(fin); 136 | recordList.add(Record(recordBlockCompSize, recordBlockDecompSize)); 137 | } 138 | return recordList; 139 | } 140 | 141 | dynamic _readRecord(String word, int offset, int length, bool mdd) { 142 | var compressedOffset = 0; 143 | var decompressedOffset = 0; 144 | var compressedSize = 0; 145 | var decompressedSize = 0; 146 | for (var record in _recordList) { 147 | compressedSize = record.compSize; 148 | decompressedSize = record.decompSize; 149 | if ((decompressedOffset + decompressedSize) > offset) { 150 | break; 151 | } 152 | decompressedOffset += decompressedSize; 153 | compressedOffset += compressedSize; 154 | } 155 | var fin = File(path).openSync(); 156 | fin.setPositionSync(_recordBlockOffset + compressedOffset); 157 | var block = fin.readSync(compressedSize); 158 | fin.closeSync(); 159 | var blockIn = _decompressBlock(block); 160 | blockIn.skip(offset - decompressedOffset); 161 | if (mdd) { 162 | var recordBlock = blockIn.toUint8List(); 163 | if (length > 0) { 164 | return recordBlock.sublist(0, length); 165 | } else { 166 | return recordBlock; 167 | } 168 | } else { 169 | var utf8 = _header['Encoding'] == 'UTF-8'; 170 | return blockIn.readString(length: length, utf8: utf8); 171 | } 172 | } 173 | 174 | InputStream _decompressBlock(Uint8List compBlock) { 175 | var flag = compBlock[0]; 176 | var data = compBlock.sublist(8); 177 | if (flag == 1) { 178 | throw FormatException("LZO compression is not supported"); 179 | } else if (flag == 2) { 180 | return BytesInputStream(Uint8List.fromList(zlib.decoder.convert(data))); 181 | } else { 182 | return BytesInputStream(data); 183 | } 184 | } 185 | 186 | void _decryptBlock(Uint8List key, Uint8List data, int offset) { 187 | var previous = 0x36; 188 | for (var i = 0; i < data.length - offset; i++) { 189 | var t = (data[i + offset] >> 4 | data[i + offset] << 4) & 0xff; 190 | t = t ^ previous ^ (i & 0xff) ^ key[i % key.length]; 191 | previous = data[i + offset]; 192 | data[i + offset] = t; 193 | } 194 | } 195 | 196 | Uint8List _computeKey(Uint8List data) { 197 | var ripemd128 = Digest('RIPEMD-128'); 198 | ripemd128.update(data, 4, 4); 199 | ripemd128.update( 200 | Uint8List.fromList(const [0x95, 0x36, 0x00, 0x00]), 0, 4); 201 | var key = Uint8List(16); 202 | ripemd128.doFinal(key, 0); 203 | return key; 204 | } 205 | 206 | int _readNumber(InputStream ins) { 207 | return _numberWidth == 8 ? ins.readUint64() : ins.readUint32(); 208 | } 209 | 210 | int _readShort(InputStream ins) { 211 | return _numberWidth == 8 ? ins.readUint16() : ins.readByte(); 212 | } 213 | } 214 | -------------------------------------------------------------------------------- /pubspec.yaml: -------------------------------------------------------------------------------- 1 | name: mdict_reader 2 | description: A dart library for reading mdict files. support MDX/MDD file formats. 3 | version: 1.1.0 4 | homepage: https://github.com/qingshan/mdict_reader 5 | 6 | environment: 7 | sdk: '>=2.12.1 <3.0.0' 8 | 9 | dependencies: 10 | args: ^2.3.1 11 | xml: ^6.0.1 12 | pointycastle: ^3.6.0 13 | 14 | dev_dependencies: 15 | lints: ^2.0.0 16 | test: ^1.16.0 17 | --------------------------------------------------------------------------------