├── .gitignore ├── README.md └── src └── LingoesLd2Reader.java /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | out/ 3 | .idea/ 4 | Dict.iml 5 | indexFile 6 | *.iml 7 | *.indexFile 8 | \#*\# 9 | *.log 10 | *.log.* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LingoesStarDictConverter 2 | 3 | 这个项目会把灵格斯Lingoes的LD2文件转制成星际王StarDict的格式。只是改动了Xiaoyun Zhu的源码,英文、德文字典测试可以使用,俄文好像有编码问题,未来会改进。 4 | 5 | # 使用 6 | 7 | 源码中的main函数里的ld2File改成你ld2的路径,然后会输出4个文件: 8 | 9 | ## .inflated 10 | 11 | 解压文件 12 | 13 | ## .ifo 14 | 星际王词典信息文件 15 | 16 | ## .idx 17 | 星际王词典文件 18 | 19 | ## .dict 20 | 星际王词典的释意文件 21 | 22 | ## ifo, idx, dict就是星际王需要的3个文件。 23 | -------------------------------------------------------------------------------- /src/LingoesLd2Reader.java: -------------------------------------------------------------------------------- 1 | import java.io.*; 2 | import java.nio.ByteBuffer; 3 | import java.nio.ByteOrder; 4 | import java.nio.CharBuffer; 5 | import java.nio.channels.FileChannel; 6 | import java.nio.charset.CharacterCodingException; 7 | import java.nio.charset.Charset; 8 | import java.nio.charset.CharsetDecoder; 9 | import java.nio.charset.CoderResult; 10 | import java.nio.charset.CodingErrorAction; 11 | import java.nio.file.Path; 12 | import java.nio.file.Paths; 13 | import java.util.ArrayList; 14 | import java.util.Arrays; 15 | import java.util.List; 16 | import java.util.zip.Inflater; 17 | import java.util.zip.InflaterInputStream; 18 | 19 | /** 20 | * Lingoes LD2/LDF File Reader 21 | * 22 | *
23 | * Lingoes Format overview: 24 | * 25 | * General Information: 26 | * - Dictionary data are stored in deflate streams. 27 | * - Index group information is stored in an index array in the LD2 file itself. 28 | * - Numbers are using little endian byte order. 29 | * - Definitions and xml data have UTF-8 or UTF-16LE encodings. 30 | * 31 | * LD2 file schema: 32 | * - File Header 33 | * - File Description 34 | * - Additional Information (optional) 35 | * - Index Group (corresponds to definitions in dictionary) 36 | * - Deflated Dictionary Streams 37 | * -- Index Data 38 | * --- Offsets of definitions 39 | * --- Offsets of translations 40 | * --- Flags 41 | * --- References to other translations 42 | * -- Definitions 43 | * -- Translations (xml) 44 | * 45 | * TODO: find encoding / language fields to replace auto-detect of encodings 46 | * 47 | *48 | * 49 | * @author keke 50 | * 51 | */ 52 | public class LingoesLd2Reader { 53 | private static final SensitiveStringDecoder[] AVAIL_ENCODINGS = { 54 | new SensitiveStringDecoder(Charset.forName("UTF-8")), 55 | new SensitiveStringDecoder(Charset.forName("UTF-16LE")), 56 | new SensitiveStringDecoder(Charset.forName("UTF-16BE")), 57 | new SensitiveStringDecoder(Charset.forName("EUC-JP")) }; 58 | 59 | public static void main(final String[] args) throws IOException { 60 | final String ld2File = "E:/FTP/LingoesDict/Vicon Russian-English Dictionary.ld2"; 61 | // read lingoes ld2 into byte array 62 | final ByteBuffer dataRawBytes; 63 | RandomAccessFile file = new RandomAccessFile(ld2File, "r"); 64 | FileChannel fChannel = file.getChannel(); 65 | dataRawBytes = ByteBuffer.allocate((int) fChannel.size()); 66 | fChannel.read(dataRawBytes); 67 | 68 | dataRawBytes.order(ByteOrder.LITTLE_ENDIAN); 69 | dataRawBytes.rewind(); 70 | 71 | System.out.println("文件:" + ld2File); 72 | System.out.println("类型:" 73 | + new String(dataRawBytes.array(), 0, 4, "ASCII")); 74 | System.out.println("版本:" + dataRawBytes.getShort(0x18) + "." 75 | + dataRawBytes.getShort(0x1A)); 76 | System.out.println("ID: 0x" 77 | + Long.toHexString(dataRawBytes.getLong(0x1C))); 78 | 79 | final int offsetData = dataRawBytes.getInt(0x5C) + 0x60; 80 | if (dataRawBytes.limit() > offsetData) { 81 | System.out.println("简介地址:0x" + Integer.toHexString(offsetData)); 82 | final int type = dataRawBytes.getInt(offsetData); 83 | System.out.println("简介类型:0x" + Integer.toHexString(type)); 84 | final int offsetWithInfo = dataRawBytes.getInt(offsetData + 4) 85 | + offsetData + 12; 86 | if (type == 3) { 87 | // without additional information 88 | LingoesLd2Reader.readDictionary(ld2File, dataRawBytes, 89 | offsetData); 90 | } else if (dataRawBytes.limit() > (offsetWithInfo - 0x1C)) { 91 | LingoesLd2Reader.readDictionary(ld2File, dataRawBytes, 92 | offsetWithInfo); 93 | } else { 94 | System.err.println("文件不包含字典数据。网上字典?"); 95 | } 96 | } else { 97 | System.err.println("文件不包含字典数据。网上字典?"); 98 | } 99 | } 100 | 101 | private static long decompress(final String inflatedFile, 102 | final ByteBuffer data, final int offset, final int length, 103 | final boolean append) throws IOException { 104 | final Inflater inflator = new Inflater(); 105 | final InflaterInputStream in = new InflaterInputStream( 106 | new ByteArrayInputStream(data.array(), offset, length), 107 | inflator, 1024 * 8); 108 | final FileOutputStream out = new FileOutputStream(inflatedFile, append); 109 | 110 | LingoesLd2Reader.writeInputStream(in, out); 111 | final long bytesRead = inflator.getBytesRead(); 112 | inflator.end(); 113 | return bytesRead; 114 | } 115 | 116 | private static SensitiveStringDecoder[] detectEncodings( 117 | final ByteBuffer inflatedBytes, final int offsetWords, 118 | final int offsetXml, final int defTotal, final int dataLen, 119 | final int[] idxData, final String[] defData) { 120 | final int test = Math.min(defTotal, 10); 121 | for (int j = 0; j < LingoesLd2Reader.AVAIL_ENCODINGS.length; j++) { 122 | for (int k = 0; k < LingoesLd2Reader.AVAIL_ENCODINGS.length; k++) { 123 | try { 124 | for (int i = 0; i < test; i++) { 125 | LingoesLd2Reader.readDefinitionData(inflatedBytes, 126 | offsetWords, offsetXml, dataLen, 127 | LingoesLd2Reader.AVAIL_ENCODINGS[j], 128 | LingoesLd2Reader.AVAIL_ENCODINGS[k], idxData, 129 | defData, i); 130 | } 131 | System.out.println("词组编码:" 132 | + LingoesLd2Reader.AVAIL_ENCODINGS[j].name); 133 | System.out.println("XML编码:" 134 | + LingoesLd2Reader.AVAIL_ENCODINGS[k].name); 135 | return new SensitiveStringDecoder[] { 136 | LingoesLd2Reader.AVAIL_ENCODINGS[j], 137 | LingoesLd2Reader.AVAIL_ENCODINGS[k] }; 138 | } catch (final Throwable e) { 139 | // ignore 140 | } 141 | } 142 | } 143 | System.err.println("自动识别编码失败!选择UTF-16LE继续。"); 144 | return new SensitiveStringDecoder[] { 145 | LingoesLd2Reader.AVAIL_ENCODINGS[1], 146 | LingoesLd2Reader.AVAIL_ENCODINGS[1] }; 147 | } 148 | 149 | private static void extract(final String inflatedFile, 150 | final String indexFile, 151 | final String extractedOutputFile, 152 | final String informationFile, 153 | final int offsetDefs, 154 | final int offsetXml) 155 | 156 | throws IOException, FileNotFoundException, 157 | UnsupportedEncodingException { 158 | System.out.println("写入'" + extractedOutputFile + "'。。。"); 159 | int counter = 0; 160 | //解压后的文件 161 | RandomAccessFile file = new RandomAccessFile(inflatedFile, "r"); 162 | //索引文件 163 | // final FileWriter indexWriter = new FileWriter(indexFile); 164 | //final Writer indexWriter = new BufferedWriter(new OutputStreamWriter( new FileOutputStream("indexFile"),"UTF-8")); 165 | final FileOutputStream indexWriter = new FileOutputStream(indexFile); 166 | 167 | //释意文件 168 | final FileOutputStream outputWriter = new FileOutputStream(extractedOutputFile); 169 | 170 | // 读解压后的文件 171 | final FileChannel fChannel = file.getChannel(); 172 | final ByteBuffer dataRawBytes = ByteBuffer.allocate((int) fChannel.size()); 173 | fChannel.read(dataRawBytes); 174 | fChannel.close(); 175 | dataRawBytes.order(ByteOrder.LITTLE_ENDIAN); 176 | dataRawBytes.rewind(); 177 | 178 | final int dataLen = 10; 179 | 180 | // 单词总数 181 | final int defTotal = (offsetDefs / dataLen) - 1; 182 | 183 | 184 | final int[] idxData = new int[6]; 185 | final String[] defData = new String[2]; 186 | 187 | final SensitiveStringDecoder[] encodings = LingoesLd2Reader 188 | .detectEncodings(dataRawBytes, offsetDefs, offsetXml, defTotal, dataLen, idxData, defData); 189 | 190 | dataRawBytes.position(8); 191 | int currDefPosition = 0; 192 | for (int i = 0; i < defTotal; i++) { 193 | LingoesLd2Reader.readDefinitionData(dataRawBytes, offsetDefs, 194 | offsetXml, dataLen, encodings[0], encodings[1], idxData, 195 | defData, i); 196 | 197 | //释意写入索引文件 198 | 199 | indexWriter.write(defData[0].getBytes("UTF-8")); 200 | //写入\0分隔 201 | indexWriter.write(0); 202 | //写入位置 203 | byte[] positionIntegerByte = ByteBuffer.allocate(4).putInt(currDefPosition).array(); 204 | indexWriter.write(positionIntegerByte); 205 | 206 | int defintionNumOfBytes = defData[1].getBytes("UTF-8").length; 207 | 208 | byte[] definitionLengthIntegerByte = ByteBuffer.allocate(4).putInt(defintionNumOfBytes).array(); 209 | 210 | indexWriter.write(definitionLengthIntegerByte); 211 | 212 | currDefPosition += defintionNumOfBytes; 213 | outputWriter.write(defData[1].getBytes("UTF-8")); 214 | 215 | //System.out.println(defData[0] + " = " + defData[1]); 216 | counter++; 217 | } 218 | 219 | 220 | // 给出最后的information文件 221 | File idxFile = new File(indexFile); 222 | long idxFileSize = idxFile.length(); 223 | FileWriter infomationFileWriter = new FileWriter(informationFile); 224 | String fileName = idxFile.getName(); 225 | String dictName = fileName.substring(0,fileName.lastIndexOf(".")); 226 | infomationFileWriter.write( 227 | "StartDict's dict ifo file\n" + 228 | "version=2.4.2\n" + 229 | "wordcount=" + counter + "\n" + 230 | "idxfilesize="+idxFileSize + "\n" + 231 | "bookname="+dictName.substring(0,dictName.lastIndexOf("."))); 232 | infomationFileWriter.flush(); 233 | indexWriter.flush(); 234 | outputWriter.flush(); 235 | System.out.println("成功读出" + counter + "组数据。"); 236 | } 237 | 238 | private static void getIdxData(final ByteBuffer dataRawBytes, 239 | final int position, final int[] wordIdxData) { 240 | dataRawBytes.position(position); 241 | wordIdxData[0] = dataRawBytes.getInt(); 242 | wordIdxData[1] = dataRawBytes.getInt(); 243 | wordIdxData[2] = dataRawBytes.get() & 0xff; 244 | wordIdxData[3] = dataRawBytes.get() & 0xff; 245 | wordIdxData[4] = dataRawBytes.getInt(); 246 | wordIdxData[5] = dataRawBytes.getInt(); 247 | } 248 | 249 | private static void inflate(final ByteBuffer dataRawBytes, 250 | final List