├── lib ├── dtd.jar ├── json.jar ├── mapdb.jar ├── tmengine.jar ├── openxliff.jar ├── jsoup-1.11.3.jar └── mariadb-java-client-2.4.3.jar ├── .gitattributes ├── docs ├── TMEngine.pdf ├── images │ └── tmengine.png ├── concepts │ ├── untitled1.dita │ ├── JavaApps.dita │ ├── databases.dita │ ├── serverStop.dita │ ├── ITmEngine.dita │ ├── dependencies.dita │ ├── rest │ │ ├── StopServer.dita │ │ ├── ListMemories.dita │ │ ├── CloseMemory.dita │ │ ├── DeleteMemory.dita │ │ ├── OpenMemory.dita │ │ ├── RenameMemory.dita │ │ ├── GetLanguages.dita │ │ ├── ProcessStatus.dita │ │ ├── ExportTMX.dita │ │ ├── ImportTMX.dita │ │ ├── ConcordanceSearch.dita │ │ ├── CreateMemory.dita │ │ └── SearchTranslations.dita │ ├── Server.dita │ ├── ServerAPI.dita │ ├── TMEngine.dita │ └── methods.dita └── TMEngine.ditamap ├── .gitignore ├── tmserver.bat ├── tmserver.sh ├── org.eclipse.jdt.core.prefs ├── .project ├── src ├── module-info.java └── com │ └── maxprograms │ ├── tmengine │ ├── Constants.java │ ├── FuzzyIndex.java │ ├── NGrams.java │ ├── MatchQuality.java │ ├── ITmEngine.java │ ├── TuDatabase.java │ ├── Match.java │ ├── TuvDatabase.java │ ├── MapDbEngine.java │ └── SQLEngine.java │ ├── tmx │ ├── TMXReader.java │ ├── TMXResolver.java │ ├── TMXContentHandler.java │ ├── tmx11.dtd │ ├── tmx12.dtd │ ├── tmx13.dtd │ └── tmx14.dtd │ ├── tmserver │ └── TmServer.java │ └── tmutils │ └── TMUtils.java ├── .classpath ├── README.md └── LICENSE /lib/dtd.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/dtd.jar -------------------------------------------------------------------------------- /lib/json.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/json.jar -------------------------------------------------------------------------------- /lib/mapdb.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/mapdb.jar -------------------------------------------------------------------------------- /lib/tmengine.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/tmengine.jar -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /docs/TMEngine.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/docs/TMEngine.pdf -------------------------------------------------------------------------------- /lib/openxliff.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/openxliff.jar -------------------------------------------------------------------------------- /lib/jsoup-1.11.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/jsoup-1.11.3.jar -------------------------------------------------------------------------------- /docs/images/tmengine.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/docs/images/tmengine.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /bin/ 3 | /dist/ 4 | /sonar* 5 | .sonar_lock 6 | report-task.txt 7 | /docs/out 8 | 9 | -------------------------------------------------------------------------------- /lib/mariadb-java-client-2.4.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/mariadb-java-client-2.4.3.jar -------------------------------------------------------------------------------- /tmserver.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | pushd "%~dp0" 3 | 4 | set CP="lib\mariadb-java-client-2.4.3.jar" 5 | 6 | .\bin\java -cp %CP% --module-path lib com.maxprograms.tmserver.TmServer $@ -------------------------------------------------------------------------------- /tmserver.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | cd "$(dirname "$0")/" 4 | 5 | export CP="lib/mariadb-java-client-2.4.3.jar" 6 | 7 | bin/java -cp $CP --module-path lib com.maxprograms.tmserver.TmServer $@ -------------------------------------------------------------------------------- /docs/concepts/untitled1.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | <shortdesc/> 6 | <conbody> 7 | <p/> 8 | </conbody> 9 | </concept> 10 | -------------------------------------------------------------------------------- /docs/concepts/JavaApps.dita: -------------------------------------------------------------------------------- 1 | <?xml version="1.0" encoding="UTF-8"?> 2 | <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> 3 | <concept id="JavaApps"> 4 | <title>Java Applications 5 | 6 | 7 |

TMEngine can be embedded in a Java application that needs to deal with translation 8 | memories.

9 |
10 |
11 | -------------------------------------------------------------------------------- /docs/concepts/databases.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Supported Databases 5 | 6 |

Two options are currently available for storing TM data:

7 | 11 |
12 |
13 | -------------------------------------------------------------------------------- /org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=11 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=10 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.source=11 12 | -------------------------------------------------------------------------------- /docs/concepts/serverStop.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Stopping the Server 5 | 6 |

A running TMEngine server can be stopped using the Stop 7 | Server method from its REST API.

8 |

Simply visit "stop" using a browser or open a connection to that URL when the 10 | server is embedded in a Java application. Adjust the port number if necessary.

11 |
12 |
13 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | TMEngine 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | 19 | 1671707855370 20 | 21 | 30 22 | 23 | org.eclipse.core.resources.regexFilterMatcher 24 | node_modules|\.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__ 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /src/module-info.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | 13 | module tmengine { 14 | 15 | exports com.maxprograms.tmengine; 16 | 17 | opens com.maxprograms.tmengine to mapdb; 18 | 19 | requires mapdb; 20 | requires java.xml; 21 | requires java.base; 22 | requires java.sql; 23 | requires jdk.httpserver; 24 | requires transitive json; 25 | requires transitive openxliff; 26 | } -------------------------------------------------------------------------------- /docs/concepts/ITmEngine.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | ITmEngine Interface 5 | 6 | 7 |

The interface com.maxprograms.tmengine.ITmEngine provides the set of 8 | methods that applications can use to interact with TM data.

9 |

Applications that use TMEngine can work with the classes that implement 10 | ITmEngine or implement new versions that work with other database 11 | systems.

12 |

The classes that implement ITmEngine interface are:

13 |
    14 |
  • com.maxprograms.tmengine.MapDbEngine
  • 15 |
  • com.maxprograms.tmengine.SQLEngine
  • 16 |
17 |
18 |
19 | -------------------------------------------------------------------------------- /docs/concepts/dependencies.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Dependencies 5 | 6 | 7 |

TMEngine is built on top of OpenXLIFF Filters, an open source project that 9 | provides support for managing the XML side of TMX documents.

10 |

OpenXLIFF includes the last Java release of MapDB, modified to work with modules in Java 12 | 11 or newer.

13 |

MariaDB JDBC driver is also included as an optional dependency. MariaDB code does not 14 | support mudularization at this moment.

15 |

Java 11 is used to compile and link TMEngine binaries for distribution. Newer versions of 16 | Java can also be used.

17 |
18 |
19 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/Constants.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | public class Constants { 15 | 16 | private Constants() { 17 | // private for security 18 | } 19 | 20 | public static final String CREATIONTOOL = "Maxprograms TM Engine"; 21 | public static final String VERSION = "5.0.3"; 22 | public static final String BUILD = "20211003_0942"; 23 | 24 | public static final String PENDING = "Pending"; 25 | public static final String COMPLETED = "Completed"; 26 | public static final String FAILED = "Failed"; 27 | } 28 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /docs/concepts/rest/StopServer.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Stop Server 5 | 6 |

End Point: [TMEngine URL]/create

7 |

Default: stop

9 | 10 |

Send a 'GET' request to the method end point.

11 |

The server responds with a JSON object. On success, field 'status' is 12 | set to 'OK'.

13 |

Example:

14 | { 15 | "status": "OK" 16 | } 17 |

On error, field 'status' is set to 'failed' and field 18 | 'reason' contains the error cause.

19 | 20 |

Example:

21 | { 22 | "status": "failed", 23 | "reason": "Error connecting to database" 24 | } 25 | 26 |
27 | -------------------------------------------------------------------------------- /docs/concepts/Server.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Starting the Server 5 | 6 |

Running .\tmserver.bat or ./tmserver.sh without 7 | parameters displays help for starting TMEngine as a standalone server.

8 | Usage: 9 | 10 | tmserver.sh [-help] [-version] [-port portNumber] 11 | 12 | Where: 13 | 14 | -help: (optional) Display this help information and exit 15 | -version: (optional) Display version & build information and exit 16 | -port: (optional) Port for running HTTP server. Default is 8000 17 | 18 | You can verify that the server is running by visiting its default web page: http://localhost:8000/TMServer/ (adjust port number if you change 21 | it). 22 | 23 |
24 |
25 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/TMXReader.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmx; 13 | 14 | import java.io.IOException; 15 | import java.net.URL; 16 | 17 | import javax.xml.parsers.ParserConfigurationException; 18 | 19 | import org.xml.sax.SAXException; 20 | 21 | import com.maxprograms.tmengine.ITmEngine; 22 | import com.maxprograms.xml.SAXBuilder; 23 | 24 | public class TMXReader { 25 | 26 | private SAXBuilder builder; 27 | private TMXContentHandler handler; 28 | 29 | public TMXReader(ITmEngine database) { 30 | handler = new TMXContentHandler(database); 31 | builder = new SAXBuilder(); 32 | builder.setEntityResolver(new TMXResolver()); 33 | builder.setContentHandler(handler); 34 | } 35 | 36 | public void parse(URL url) throws IOException, SAXException, ParserConfigurationException { 37 | builder.build(url); 38 | } 39 | 40 | public int getCount() { 41 | return handler.getCount(); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /docs/concepts/rest/ListMemories.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | List Memories 5 | 6 |

End Point: [TMEngine URL]/list

7 |

Default: list

8 | 9 |

Send a 'GET' request to the method end point.

10 |

The server responds with a JSON object containing two fields. On success, field 11 | 'status' is set to 'OK' and field 12 | 'memories' contains an array with memory details.

13 | { 14 | "memories": [ 15 | { 16 | "owner": "manager", 17 | "isOpen": false, 18 | "name": "Fluenta Localization", 19 | "id": "fluenta", 20 | "type": "MapDbEngine", 21 | "creationDate": "2019-09-10 21:54:13 UYT" 22 | }, 23 | { 24 | "owner": "manager", 25 | "isOpen": false, 26 | "name": "First Memory", 27 | "id": "1568163112478", 28 | "type": "MapDbEngine", 29 | "creationDate": "2019-09-10 21:51:52 UYT" 30 | } 31 | ], 32 | "status": "OK" 33 | } 34 | 35 |

On error, field 'status' is set to 'failed' and field 36 | 'reason' contains the error cause.

37 | 38 |

Example:

39 | { 40 | "status": "failed", 41 | "reason": "Error reading memories" 42 | } 43 | 44 |
45 | -------------------------------------------------------------------------------- /docs/concepts/rest/CloseMemory.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Close Memory 5 | 6 |

End Point: [TMEngine URL]/close

7 |

Default: close

9 |

Send a 'POST' request to the method end point with this parameter in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | Yes 20 | ID of the memory to close 21 | 22 | 23 |

Example:

24 | { 25 | "id": "1568163112478" 26 | } 27 |

The server responds with a JSON object. On success, field 'status' is 28 | set to 'OK'.

29 | { 30 | "status": "OK" 31 | } 32 |

On error, field 'status' is set to 'failed' and field 33 | 'reason' contains the error cause.

34 | 35 |

Example:

36 | { 37 | "status": "failed", 38 | "reason": "Unknown memory" 39 | } 40 | 41 |
42 | -------------------------------------------------------------------------------- /docs/concepts/rest/DeleteMemory.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Delete Memory 5 | 6 |

End Point: [TMEngine URL]/delete

7 |

Default: delete

9 |

Send a 'POST' request to the method end point with this parameter in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | Yes 20 | ID of the memory to delete 21 | 22 | 23 |

Example:

24 | { 25 | "id": "1568163112478" 26 | } 27 |

The server responds with a JSON object. On success, field 'status' is 28 | set to 'OK'.

29 | { 30 | "status": "OK" 31 | } 32 |

On error, field 'status' is set to 'failed' and field 33 | 'reason' contains the error cause.

34 |

Example:

35 | { 36 | "status": "failed", 37 | "reason": "Unknown memory" 38 | } 39 | 40 |
41 | -------------------------------------------------------------------------------- /docs/concepts/rest/OpenMemory.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Open Memory 5 | 6 |

End Point: [TMEngine URL]/create

7 |

Default: open

9 |

Send a 'POST' request to the method end point with this parameter in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | Yes 20 | ID of the memory to open 21 | 22 | 23 |

Example:

24 | { 25 | "id": "1568163112478" 26 | } 27 |

The server responds with a JSON object. On success, field 'status' is 28 | set to 'OK'.

29 | { 30 | "status": "OK" 31 | } 32 |

On error, field 'status' is set to 'failed' and field 33 | 'reason' contains the error cause.

34 | 35 |

Example:

36 | { 37 | "status": "failed", 38 | "reason": "Unknown memory type" 39 | } 40 | 41 |
42 | -------------------------------------------------------------------------------- /docs/concepts/ServerAPI.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | REST API 5 | 6 | 7 |

The REST methods that TMEngine's server supports are:

8 |
    9 |
  • 10 |
  • 11 |
  • 12 |
  • 13 |
  • 14 |
  • 15 |
  • 16 |
  • 17 |
  • 18 |
  • 19 |
  • 20 |
  • 21 |
22 |

Default TMEngine URL is 'http://localhost:8000/TMServer/'.

24 | It is possible to select a custom port for the server, passing the 25 | '-port' parameter to the script used for launching it. 26 |

All methods return a JSON object with a 'status' field. Applications 27 | must watch this field and verify that it is set to 'OK'.

28 |

In case of error, the JSON response includes a field named 'reason' that 29 | contains the error cause.

30 |
31 |
32 | -------------------------------------------------------------------------------- /docs/concepts/TMEngine.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | TMEngine 5 | TMEngine is an open source Translation Memory (TM) manager written in Java. 8 | 9 |

TMEngine is based on the translation memory library used by Swordfish III, Fluenta and RemoteTM.

16 |

TMEngine can be used in two ways:

17 | 22 |

The standalone server runs on these platforms:

23 | 28 |

A TMEngine server allows sharing Translation Memory data in a local network or over the 29 | Internet.

30 | 31 | The .jar files included in TMEngine distributions are compiled with Java 11. 32 |
33 |
34 | -------------------------------------------------------------------------------- /docs/TMEngine.ditamap: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | TMEngine 7 | An Open Source Translation Memory Manager Copyright (c) 2003-2021 Maxprograms 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /docs/concepts/rest/RenameMemory.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Rename Memory 5 | 6 |

End Point: [TMEngine URL]/rename

7 |

Default: rename

9 |

Send a 'POST' request to the method end point with these parameters in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | Yes 20 | 21 |

ID of the memory to rename

22 |
23 |
24 | 25 | name 26 | Yes 27 | New name for the memory 28 | 29 |
30 | Only memories of type 'MapDbEngine' can be renamed. 31 |

Example:

32 | { 33 | "id": "1568163112478", 34 | "name": "Updated Memory Name" 35 | } 36 |

The server responds with a JSON object containing two fields.

37 |

On success, field 'status' is set to 'OK'.

38 | 39 |

Example:

40 | { 41 | "status": "OK" 42 | } 43 | 44 |

On error, field 'status' is set to 'failed' and field 45 | 'reason' contains the error cause.

46 | 47 |

Example:

48 | { 49 | "status": "failed", 50 | "reason": "Wrong memory type" 51 | } 52 | 53 |
54 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/FuzzyIndex.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.util.Iterator; 17 | import java.util.Map; 18 | import java.util.NavigableSet; 19 | import java.util.Set; 20 | import java.util.concurrent.ConcurrentHashMap; 21 | 22 | import org.mapdb.DB; 23 | import org.mapdb.DBMaker; 24 | import org.mapdb.Fun; 25 | 26 | public class FuzzyIndex { 27 | 28 | private Map>> maps; 29 | private Map databases; 30 | private File folder; 31 | 32 | public FuzzyIndex(File folder) { 33 | this.folder = folder; 34 | databases = new ConcurrentHashMap<>(); 35 | maps = new ConcurrentHashMap<>(); 36 | } 37 | 38 | NavigableSet> getIndex(String lang) throws IOException { 39 | if (!maps.containsKey(lang)) { 40 | DB mapdb = null; 41 | try { 42 | mapdb = DBMaker.newFileDB(new File(folder, "index_" + lang)).closeOnJvmShutdown().make(); 43 | } catch (Error ioe) { 44 | throw new IOException(ioe.getMessage()); 45 | } 46 | NavigableSet> multiMap = mapdb.getTreeSet(lang); 47 | databases.put(lang, mapdb); 48 | maps.put(lang, multiMap); 49 | } 50 | return maps.get(lang); 51 | } 52 | 53 | public void commit() { 54 | Set set = databases.keySet(); 55 | Iterator keys = set.iterator(); 56 | while (keys.hasNext()) { 57 | String key = keys.next(); 58 | databases.get(key).commit(); 59 | } 60 | } 61 | 62 | public void close() { 63 | Set keys = databases.keySet(); 64 | Iterator it = keys.iterator(); 65 | while (it.hasNext()) { 66 | DB db = databases.get(it.next()); 67 | db.commit(); 68 | db.close(); 69 | } 70 | } 71 | 72 | } 73 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/TMXResolver.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmx; 13 | 14 | import java.io.IOException; 15 | import java.net.URL; 16 | 17 | import org.xml.sax.EntityResolver; 18 | import org.xml.sax.InputSource; 19 | import org.xml.sax.SAXException; 20 | 21 | public class TMXResolver implements EntityResolver { 22 | 23 | @Override 24 | public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { 25 | 26 | if (publicId != null) { 27 | if (publicId.equals("-//LISA OSCAR:1998//DTD for Translation Memory eXchange//EN")) { 28 | URL url = TMXResolver.class.getResource("tmx14.dtd"); 29 | return new InputSource(url.openStream()); 30 | } 31 | if (publicId.equals("http://www.lisa.org/tmx14")) { 32 | URL url = TMXResolver.class.getResource("tmx14.dtd"); 33 | return new InputSource(url.openStream()); 34 | } 35 | if (publicId.equals("http://www.lisa.org/tmx")) { 36 | URL url = TMXResolver.class.getResource("tmx13.dtd"); 37 | return new InputSource(url.openStream()); 38 | } 39 | } 40 | if (systemId != null) { 41 | if (systemId.toLowerCase().endsWith("tmx14.dtd")) { 42 | URL url = TMXResolver.class.getResource("tmx14.dtd"); 43 | return new InputSource(url.openStream()); 44 | } 45 | if (systemId.toLowerCase().endsWith("tmx13.dtd")) { 46 | URL url = TMXResolver.class.getResource("tmx13.dtd"); 47 | return new InputSource(url.openStream()); 48 | } 49 | if (systemId.toLowerCase().endsWith("tmx12.dtd")) { 50 | URL url = TMXResolver.class.getResource("tmx12.dtd"); 51 | return new InputSource(url.openStream()); 52 | } 53 | if (systemId.toLowerCase().endsWith("tmx11.dtd")) { 54 | URL url = TMXResolver.class.getResource("tmx11.dtd"); 55 | return new InputSource(url.openStream()); 56 | } 57 | } 58 | return null; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /docs/concepts/rest/GetLanguages.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Get Languages 5 | 6 |

End Point: [TMEngine URL]/languages

7 |

Default: languages

9 |

Send a 'POST' request to the method end point with this parameter in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | Yes 20 | ID of the memory to query 21 | 22 | 23 |

Example:

24 | { 25 | "id": "1568163112456" 26 | } 27 |

The server responds with a JSON object containing two fields.

28 |

On success, field 'status' is set to 'OK' and field 29 | 'process' contains the ID of the background query process that was 30 | initiated.

31 | 32 | { 33 | "process": "1568222345683", 34 | "status": "OK" 35 | } 36 | 37 |

On error, field 'status' is set to 'failed' and field 38 | 'reason' contains the error cause.

39 | { 40 | "status": "failed", 41 | "reason": "Unknown memory type" 42 | } 43 |

After starting the query process, monitor its status using the method. On successful completion, the 45 | data field will contain a list of languages present in the 46 | memory.

47 |

Example:

48 | { 49 | "result": "Completed", 50 | "data": { 51 | "languages": [ "es", "en" ] 52 | }, 53 | "status": "OK" 54 | } 55 |
56 |
57 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/NGrams.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.util.Collections; 15 | import java.util.Iterator; 16 | import java.util.List; 17 | import java.util.Set; 18 | import java.util.StringTokenizer; 19 | import java.util.TreeSet; 20 | import java.util.Vector; 21 | 22 | public class NGrams { 23 | 24 | private static final int NGRAMSIZE = 3; 25 | public static final String SEPARATORS = " \r\n\f\t\u2028\u2029,.;\":<>¿?¡!()[]{}=+-/*\u00AB\u00BB\u201C\u201D\u201E\uFF00"; 26 | // allow hyphen in terms 27 | public static final String TERM_SEPARATORS = " \u00A0\r\n\f\t\u2028\u2029,.;\":<>¿?¡!()[]{}=+/*\u00AB\u00BB\u201C\u201D\u201E\uFF00"; 28 | 29 | private NGrams() { 30 | // private for security 31 | } 32 | 33 | public static int[] getNGrams(String string) { 34 | String src = string.toLowerCase(); 35 | List words = buildWordList(src); 36 | Set set = Collections.synchronizedSortedSet(new TreeSet<>()); 37 | 38 | Iterator it = words.iterator(); 39 | while (it.hasNext()) { 40 | String word = it.next(); 41 | char[] array = word.toCharArray(); 42 | int length = word.length(); 43 | int ngrams = length / NGRAMSIZE; 44 | if (ngrams * NGRAMSIZE < length) { 45 | ngrams++; 46 | } 47 | for (int i = 0; i < ngrams; i++) { 48 | StringBuilder gram = new StringBuilder(); 49 | for (int j = 0; j < NGRAMSIZE; j++) { 50 | if (i * NGRAMSIZE + j < length) { 51 | gram.append(array[i * NGRAMSIZE + j]); 52 | } 53 | } 54 | set.add("" + gram.toString().hashCode()); 55 | } 56 | } 57 | 58 | int[] result = new int[set.size()]; 59 | int idx = 0; 60 | it = set.iterator(); 61 | while (it.hasNext()) { 62 | result[idx++] = Integer.parseInt(it.next()); 63 | } 64 | return result; 65 | } 66 | 67 | private static List buildWordList(String src) { 68 | List result = new Vector<>(); 69 | StringTokenizer tokenizer = new StringTokenizer(src, SEPARATORS); 70 | while (tokenizer.hasMoreElements()) { 71 | result.add(tokenizer.nextToken()); 72 | } 73 | return result; 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/MatchQuality.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | public class MatchQuality { 15 | 16 | private static final int PENALTY = 2; 17 | 18 | private MatchQuality() { 19 | // private for security 20 | } 21 | 22 | static String lcs(String x, String y) { 23 | StringBuilder result = new StringBuilder(); 24 | int m = x.length(); 25 | int n = y.length(); 26 | int max = 0; 27 | int mx = 0; 28 | 29 | // opt[i][j] = length of LCS of x[i..M] and y[j..N] 30 | int[][] opt = new int[m + 1][n + 1]; 31 | 32 | // fill the matrix 33 | for (int i = 1; i <= m; i++) { 34 | for (int j = 1; j <= n; j++) { 35 | if (x.charAt(i - 1) == y.charAt(j - 1)) { 36 | opt[i][j] = opt[i - 1][j - 1] + 1; 37 | if (opt[i][j] > max) { 38 | // remember where the maximum length is 39 | max = opt[i][j]; 40 | mx = i; 41 | } 42 | } else { 43 | opt[i][j] = 0; 44 | } 45 | } 46 | } 47 | 48 | // recover the LCS 49 | while (max > 0) { 50 | result.insert(0, x.charAt(mx - 1)); 51 | max--; 52 | mx--; 53 | } 54 | 55 | return result.toString(); 56 | } 57 | 58 | public static int similarity(String one, String two) { 59 | int result = 0; 60 | String x = one.trim(); 61 | String y = two.trim(); 62 | int longest = Math.max(x.length(), y.length()); 63 | if (longest == 0) { 64 | return 0; 65 | } 66 | String a; 67 | String b; 68 | if (x.length() == longest) { 69 | a = x; 70 | b = y; 71 | } else { 72 | a = y; 73 | b = x; 74 | } 75 | // a is the longest string 76 | int count = -1; 77 | int idx; 78 | String lcs = lcs(a, b); 79 | while (!lcs.trim().isEmpty() && lcs.length() > longest * PENALTY / 100) { 80 | count++; 81 | idx = a.indexOf(lcs); 82 | a = a.substring(0, idx) + a.substring(idx + lcs.length()); 83 | idx = b.indexOf(lcs); 84 | b = b.substring(0, idx) + b.substring(idx + lcs.length()); 85 | lcs = lcs(a, b); 86 | } 87 | result = 100 * (longest - a.length()) / longest - count * PENALTY; 88 | if (result < 0) { 89 | result = 0; 90 | } 91 | return result; 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # This project is not maintained. Please use [RemoteTM](https://github.com/rmraya/RemoteTM) instead. 2 | 3 | 4 | # TMEngine 5 | 6 | TMEngine logo 7 | 8 | An open source [Translation Memory](https://en.wikipedia.org/wiki/Translation_memory) Engine written in Java. 9 | 10 | TMEngine is based on the translation memory library used by [Swordfish III](https://www.maxprograms.com/products/swordfish.html), [Fluenta](https://www.maxprograms.com/products/fluenta.html) and [RemoteTM](https://www.maxprograms.com/products/remotetm.html). 11 | 12 | TMEngine can be used either as an embedded library that manages translation memories in a Java application or as a standalone TM server via its REST API. 13 | 14 | ## Requirements 15 | 16 | - JDK 11 or newer is required for compiling and building. 17 | - Apache Ant 1.10.6 or newer 18 | 19 | ## Building 20 | 21 | - Checkout this repository. 22 | - Point your JAVA_HOME variable to JDK 11 23 | - Run `ant compile`. 24 | 25 | ## Downloads 26 | 27 | Ready to use distributions are available at [https://www.maxprograms.com/products/tmengine.html](https://www.maxprograms.com/products/tmengine.html). 28 | 29 | ## Related Links 30 | 31 | - [TMEngine Manual (PDF)](https://www.maxprograms.com/support/tmengine.pdf) 32 | - [TMEgine Manual (Web Help)](https://www.maxprograms.com/support/tmengine.html) 33 | 34 | ## Standalone Server 35 | 36 | Running `.\tmserver.bat` or `./tmserver.sh` without parameters displays help for starting TMEngine as a standalone server. 37 | ``` 38 | Usage: 39 | 40 | tmserver.sh [-help] [-version] [-port portNumber] 41 | 42 | Where: 43 | 44 | -help: (optional) Display this help information and exit 45 | -version: (optional) Display version & build information and exit 46 | -port: (optional) Port for running HTTP server. Default is 8000 47 | ``` 48 | 49 | Visit http://localhost:8000/TMServer/stop to stop the server. Adjust the port number if required. 50 | 51 | ## Java Library 52 | 53 | TMEngine can be embedded in Java applications that need to deal with Translation Memory data. 54 | 55 | Add all .jar files from `/lib` folder to the classpath of your application and use instances of `ITmEngine` interface. 56 | 57 | Two classes implement interface `ITmEngine`: 58 | 59 | - `MapDbEngine`: a translation memory engine built using [MapDB](http://mapdb.org) 60 | - `SQLEngine`: an implementation designed to be used with [MariaDB](https://mariadb.org/) or [MySQL](https://www.mysql.com/) 61 | 62 | See more details on the available Java methods in the [documentation](https://www.maxprograms.com/support/tmengine/TMEngine.html). 63 | -------------------------------------------------------------------------------- /docs/concepts/rest/ProcessStatus.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Get Process Status 5 | 6 |

End Point: [TMEngine URL]/status

7 |

Default: status

9 | 10 |

Send a POST request to the method end point with this parameter in a 11 | JSON body:

12 | 13 | 14 | Field 15 | Required 16 | Content 17 | 18 | 19 | process 20 | Yes 21 | ID of the background process to check 22 | 23 | 24 |

Example:

25 | { 26 | "process": "1568223016762" 27 | } 28 |

The server responds with a JSON object.

29 |

On successful status check, field 'status' is set to 30 | 'OK' and field 'result' contains current 31 | status.

32 |

Example:

33 | 34 | 35 |

Field 'result' may have these values:

36 | 37 |
    38 |
  • Pending: processing is still going on. 39 | { 40 | "result": "Pending", 41 | "status": "OK" 42 | }
  • 43 |
  • Completed: processing has finished. If the process produces any data, it is 44 | placed in the 'data' field. 45 | { 46 | "result": "Completed", 47 | "data": { 48 | "imported": "57678" 49 | }, 50 | "status": "OK" 51 | }
  • 52 |
  • Failed: processing failed. Failure reason is provided in 53 | 'reason' 54 | field.{ 55 | "result": "Failed", 56 | "reason": "/Volumes/Data/something.tmx (No such file or directory)", 57 | "status": "failed" 58 | } 59 |
  • 60 |
61 |

If process status cannot be checked, the server omits the 'result' field 62 | and provides a failure reason.

63 | { 64 | "reason": "Missing 'process' parameter", 65 | "status": "failed" 66 | } 67 | 68 |
69 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/ITmEngine.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.IOException; 15 | import java.sql.SQLException; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.Set; 19 | 20 | import javax.xml.parsers.ParserConfigurationException; 21 | 22 | import org.xml.sax.SAXException; 23 | 24 | import com.maxprograms.xml.Element; 25 | 26 | public interface ITmEngine { 27 | 28 | public abstract String getType(); 29 | 30 | public abstract void close() throws IOException, SQLException; 31 | 32 | public abstract String getName(); 33 | 34 | public abstract int storeTMX(String tmxFile, String project, String customer, String subject) 35 | throws SAXException, IOException, ParserConfigurationException, SQLException; 36 | 37 | public abstract void exportMemory(String tmxfile, Set langs, String srcLang, Map properties) 38 | throws IOException, SAXException, ParserConfigurationException, SQLException; 39 | 40 | public abstract void flag(String tuid) throws SQLException; 41 | 42 | public abstract Set getAllClients() throws SQLException; 43 | 44 | public abstract Set getAllLanguages() throws SQLException; 45 | 46 | public abstract Set getAllProjects() throws SQLException; 47 | 48 | public abstract Set getAllSubjects() throws SQLException; 49 | 50 | public abstract List searchTranslation(String searchStr, String srcLang, String tgtLang, int similarity, 51 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException; 52 | 53 | public abstract List concordanceSearch(String searchStr, String srcLang, int limit, boolean isRegexp, 54 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException; 55 | 56 | public abstract void storeTu(Element tu) throws IOException, SQLException; 57 | 58 | public abstract void commit() throws SQLException; 59 | 60 | public abstract Element getTu(String tuid) 61 | throws IOException, SAXException, ParserConfigurationException, SQLException; 62 | 63 | public abstract void removeTu(String tuid) 64 | throws IOException, SAXException, ParserConfigurationException, SQLException; 65 | 66 | public abstract void deleteDatabase() throws IOException, SQLException; 67 | } 68 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/TuDatabase.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.util.Map; 17 | import java.util.Set; 18 | 19 | import org.mapdb.DB; 20 | import org.mapdb.DBMaker; 21 | import org.mapdb.HTreeMap; 22 | 23 | public class TuDatabase { 24 | 25 | private DB mapdb; 26 | private HTreeMap> tumap; 27 | private Set projects; 28 | private Set subjects; 29 | private Set customers; 30 | private Set languages; 31 | 32 | public TuDatabase(File folder) throws IOException { 33 | try { 34 | mapdb = DBMaker.newFileDB(new File(folder, "tudata")).closeOnJvmShutdown().make(); 35 | tumap = mapdb.getHashMap("tuvmap"); 36 | projects = mapdb.getHashSet("projects"); 37 | subjects = mapdb.getHashSet("subjects"); 38 | customers = mapdb.getHashSet("customers"); 39 | languages = mapdb.getHashSet("languages"); 40 | } catch (Error ioe) { 41 | throw new IOException(ioe.getMessage()); 42 | } 43 | } 44 | 45 | public void commit() { 46 | mapdb.commit(); 47 | } 48 | 49 | public void close() { 50 | mapdb.commit(); 51 | mapdb.close(); 52 | } 53 | 54 | public void store(String tuid, Map tu) { 55 | if (tumap.containsKey(tuid.hashCode())) { 56 | tumap.replace(tuid.hashCode(), tu); 57 | } else { 58 | tumap.put(tuid.hashCode(), tu); 59 | } 60 | } 61 | 62 | public void storeSubject(String sub) { 63 | subjects.add(sub); 64 | } 65 | 66 | public void storeCustomer(String cust) { 67 | customers.add(cust); 68 | } 69 | 70 | public void storeProject(String proj) { 71 | projects.add(proj); 72 | } 73 | 74 | public void storeLanguage(String lang) { 75 | languages.add(lang); 76 | } 77 | 78 | public Set getCustomers() { 79 | return customers; 80 | } 81 | 82 | public Set getProjects() { 83 | return projects; 84 | } 85 | 86 | public Set getSubjects() { 87 | return subjects; 88 | } 89 | 90 | public Set getKeys() { 91 | return tumap.keySet(); 92 | } 93 | 94 | public Set getLanguages() { 95 | return languages; 96 | } 97 | 98 | public Map getTu(Integer hashCode) { 99 | return tumap.get(hashCode); 100 | } 101 | 102 | public void remove(String tuid) { 103 | if (tumap.containsKey(tuid.hashCode())) { 104 | tumap.remove(tuid.hashCode()); 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /docs/concepts/rest/ExportTMX.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Export TMX File 5 | 6 |

End Point: [TMEngine URL]/create

7 |

Default: export

9 | 10 |

Send a 'POST' request to the method end point with these parameters in a 11 | JSON body:

12 | 13 | 14 | Field 15 | Required 16 | Content 17 | 18 | 19 | id 20 | Yes 21 | ID of the memory to populate with TMX data 22 | 23 | 24 | file 25 | Yes 26 | Path to the TMX file being created 27 | 28 | 29 | langs 30 | No 31 | JSON array contaning the list of languages to export 32 | 33 | 34 | srcLang 35 | No 36 | Language to set as source langage. The wildcard '*all*' is 37 | used by default 38 | 39 | 40 | properties 41 | No 42 | JSON object with string properties to set in the exported file 43 | 44 | 45 | when exporting a TMX file on a remote server, make sure the TMEngine server has access 46 | to the specified location. 47 |

Example:

48 | { 49 | "id": "1568163112478", 50 | "file": "/Volumes/Data/segments.tmx", 51 | "langs": [ 52 | "en-US", 53 | "ja", 54 | "fr-FR", 55 | "it" 56 | ], 57 | "srcLang": "en-US", 58 | "properties": { 59 | "project": "Milky Way", 60 | "subject": "Astronomy Device" 61 | } 62 | } 63 | 64 |

The server responds with a JSON object containing two fields.

65 |

On success, field 'status' is set to 'OK' and field 66 | 'process' contains the ID of the background export process that was 67 | initiated.

68 | 69 | { 70 | "process": "1568222345643", 71 | "status": "OK" 72 | } 73 | 74 |

On error, field 'status' is set to 'failed' and field 75 | 'reason' contains the error cause.

76 | { 77 | "status": "failed", 78 | "reason": "Unknown memory type" 79 | } 80 |

After starting the export process, monitor its status using the method.

82 | 83 |
84 | -------------------------------------------------------------------------------- /docs/concepts/rest/ImportTMX.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Import TMX File 5 | 6 |

End Point: [TMEngine URL]/import

7 |

Default: import

9 |

Send a 'POST' request to the method end point with these parameters in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | Yes 20 | ID of the memory to populate with TMX data 21 | 22 | 23 | file 24 | Yes 25 | Path to the TMX file being imported 26 | 27 | 28 | subject 29 | No 30 | Name or identifier of the subject associated with the TMX file 31 | 32 | 33 | client 34 | No 35 | Name or identifier of the client associated with the TMX file 36 | 37 | 38 | project 39 | No 40 | Name or identifier of the project associated with the TMX file 41 | 42 | 43 | 44 | The TMEngine server must have access to the TMX file being imported. When importing a 45 | TMX file into a remote server, copy or upload the file to the server first and supply 46 | the right path in the JSON body. 47 |

Example:

48 | { 49 | "id": "1568163112478", 50 | "file": "/Volumes/Data/segments.tmx", 51 | "project": "Main TM" 52 | } 53 | 54 |

The server responds with a JSON object containing two fields.

55 |

On success, field 'status' is set to 'OK' and field 56 | 'process' contains the ID of the background import process that was 57 | initiated.

58 | 59 | { 60 | "process": "1568222345643", 61 | "status": "OK" 62 | } 63 | 64 |

On error, field 'status' is set to 'failed' and field 65 | 'reason' contains the error cause.

66 | { 67 | "status": "failed", 68 | "reason": "Unknown memory type" 69 | } 70 |

After starting the import process, monitor its status using the method. On successful completion, the result will 72 | contain the number of segments imported.

73 |

Example:

74 | 75 | { 76 | "result": "Completed", 77 | "data": { 78 | "imported": "57678" 79 | }, 80 | "status": "OK" 81 | } 82 | 83 |
84 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/Match.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.Serializable; 15 | import java.util.Iterator; 16 | import java.util.Map; 17 | 18 | import com.maxprograms.tmutils.TMUtils; 19 | import com.maxprograms.xml.Element; 20 | 21 | import org.json.JSONObject; 22 | 23 | public class Match implements Serializable, Comparable { 24 | 25 | private static final long serialVersionUID = -944405164833933436L; 26 | 27 | private Element source; 28 | private Element target; 29 | private int similarity; 30 | private String origin; 31 | private Map properties; 32 | 33 | public Match(Element source, Element target, int similarity, String origin, Map properties) { 34 | this.source = source; 35 | this.target = target; 36 | this.similarity = similarity; 37 | this.origin = origin; 38 | this.properties = properties; 39 | } 40 | 41 | public JSONObject toJSON() { 42 | JSONObject result = new JSONObject(); 43 | result.put("source", source.toString()); 44 | result.put("target", target.toString()); 45 | result.put("similarity", similarity); 46 | result.put("origin", origin); 47 | if (properties != null && !properties.isEmpty()) { 48 | JSONObject props = new JSONObject(); 49 | Iterator keys = properties.keySet().iterator(); 50 | while (keys.hasNext()) { 51 | String key = keys.next(); 52 | props.put(key, properties.get(key)); 53 | } 54 | result.put("properties", props); 55 | } 56 | return result; 57 | } 58 | 59 | public Element getSource() { 60 | return source; 61 | } 62 | 63 | public void setSource(Element source) { 64 | this.source = source; 65 | } 66 | 67 | public Element getTarget() { 68 | return target; 69 | } 70 | 71 | public void setTarget(Element target) { 72 | this.target = target; 73 | } 74 | 75 | public int getSimilarity() { 76 | return similarity; 77 | } 78 | 79 | public void setSimilarity(int similarity) { 80 | this.similarity = similarity; 81 | } 82 | 83 | public String getOrigin() { 84 | return origin; 85 | } 86 | 87 | public void setOrigin(String origin) { 88 | this.origin = origin; 89 | } 90 | 91 | public Map getProperties() { 92 | return properties; 93 | } 94 | 95 | @Override 96 | public int compareTo(Match o) { 97 | if (similarity < o.getSimilarity()) { 98 | return 1; 99 | } 100 | if (similarity > o.getSimilarity()) { 101 | return -1; 102 | } 103 | if (getCreationDate() < o.getCreationDate()) { 104 | return 1; 105 | } 106 | if (getCreationDate() > o.getCreationDate()) { 107 | return -1; 108 | } 109 | return origin.compareTo(o.getOrigin()); 110 | } 111 | 112 | private long getCreationDate() { 113 | String created = properties.get("creationdate"); 114 | if (created != null) { 115 | return TMUtils.getGMTtime(created); 116 | } 117 | return -1l; 118 | } 119 | 120 | @Override 121 | public boolean equals(Object obj) { 122 | if (!(obj instanceof Match)) { 123 | return false; 124 | } 125 | Match m = (Match) obj; 126 | return source.equals(m.getSource()) && target.equals(m.getTarget()) && similarity == m.getSimilarity() 127 | && origin.equals(m.getOrigin()) && properties.equals(m.getProperties()); 128 | } 129 | 130 | @Override 131 | public int hashCode() { 132 | return source.hashCode() * target.hashCode() * similarity * origin.hashCode() * properties.hashCode(); 133 | } 134 | 135 | } 136 | -------------------------------------------------------------------------------- /docs/concepts/rest/ConcordanceSearch.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Concordance Search 5 | 6 |

End Point: [TMEngine URL]/concordance

7 |

Default: concordance

9 |

Send a 'POST' request to the method end point with these parameters in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | yes 20 | 21 |

ID of the memory where the search should be performed

22 |
23 |
24 | 25 | text 26 | Yes 27 | Text string to search 28 | 29 | 30 | srcLang 31 | Yes 32 | Source language code 33 | 34 | 35 | limit 36 | Yes 37 | Integer value indicating the maximum number of matches to include 38 | 39 | 40 | isRegexp 41 | Yes 42 | Boolean value indicationg wether the search text should be treated as a 43 | regular expression 44 | 45 | 46 | caseSensitive 47 | Yes 48 | Boolean value indicating whether the search should be case sensitive or 49 | not 50 | 51 |
52 |

Example:

53 | { 54 | "id": "fluenta", 55 | "text": "segment", 56 | "srcLang": "en", 57 | "limit": 5, 58 | "isRegexp": false, 59 | "caseSensitive": true 60 | } 61 |

On success, field 'status' is set to 'OK' and field 62 | 'process' contains the ID of the background search process that was 63 | initiated.

64 | 65 | { 66 | "process": "1572531573026", 67 | "status": "OK" 68 | } 69 | 70 |

On error, field 'status' is set to 'failed' and field 71 | 'reason' contains the error cause.

72 | { 73 | "status": "failed", 74 | "reason": "Unknown memory type" 75 | } 76 |

After starting the search process, monitor its status using the method.

78 |

On successful completion, the result will contain an array of <tu> elements that 79 | contain the searched text in the data field.

80 |

Example:

81 | { 82 | "result": "Completed", 83 | "data": { 84 | "entries": [ 85 | "<tu creationid="rmraya" creationdate="20161225T150949Z" creationtool="Swordfish" 86 | creationtoolversion="3.3-8" tuid="-1247472893-0-1586928971"> 87 | <prop type="project">Fluenta</prop> 88 | <tuv xml:lang="es"><seg>Hay segmentos con errores de etiquetas.</seg></tuv> 89 | <tuv xml:lang="en"><seg>There are segments with tag errors.</seg></tuv></tu>" 90 | ], 91 | }, 92 | "status": "OK" 93 | } 94 | 95 |
96 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmserver/TmServer.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmserver; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.lang.System.Logger; 17 | import java.lang.System.Logger.Level; 18 | import java.net.InetSocketAddress; 19 | import java.nio.file.Files; 20 | 21 | import com.maxprograms.converters.Utils; 22 | import com.maxprograms.server.IServer; 23 | import com.maxprograms.tmengine.Constants; 24 | import com.sun.net.httpserver.HttpServer; 25 | 26 | public class TmServer implements IServer { 27 | 28 | private static final Logger LOGGER = System.getLogger(TmServer.class.getName()); 29 | 30 | private HttpServer server; 31 | private File workDir; 32 | 33 | public static void main(String[] args) { 34 | String[] arguments = Utils.fixPath(args); 35 | String port = "8000"; 36 | for (int i = 0; i < arguments.length; i++) { 37 | String param = arguments[i]; 38 | if (param.equals("-help")) { 39 | help(); 40 | return; 41 | } 42 | if (param.equals("-version")) { 43 | LOGGER.log(Level.INFO, () -> "Version: " + Constants.VERSION + " Build: " + Constants.BUILD); 44 | return; 45 | } 46 | if (param.equals("-port") && (i + 1) < arguments.length) { 47 | port = arguments[i + 1]; 48 | } 49 | } 50 | try { 51 | TmServer instance = new TmServer(Integer.valueOf(port)); 52 | instance.run(); 53 | } catch (IOException e) { 54 | LOGGER.log(Level.ERROR, "Server error", e); 55 | } 56 | } 57 | 58 | private static void help() { 59 | String launcher = " tmserver.sh "; 60 | if (System.getProperty("file.separator").equals("\\")) { 61 | launcher = " tmserver.bat "; 62 | } 63 | String help = "Usage:\n\n" + launcher + "[-help] [-version] [-port portNumber]\n\n" + " Where:\n\n" 64 | + " -help: (optional) Display this help information and exit\n" 65 | + " -version: (optional) Display version & build information and exit\n" 66 | + " -port: (optional) Port for running HTTP server. Default is 8000\n"; 67 | System.out.println(help); 68 | } 69 | 70 | public TmServer(int port) throws IOException { 71 | server = HttpServer.create(new InetSocketAddress(port), 0); 72 | server.createContext("/TMServer", new TmHandler(this, "/TMServer")); 73 | server.setExecutor(null); // creates a default executor 74 | } 75 | 76 | public void run() { 77 | server.start(); 78 | LOGGER.log(Level.INFO, "TMEngine server started"); 79 | } 80 | 81 | @Override 82 | public void stop() { 83 | server.removeContext("/TMServer"); 84 | LOGGER.log(Level.INFO, "TMEngine server closed"); 85 | System.exit(0); 86 | } 87 | 88 | @Override 89 | public File getWorkFolder() throws IOException { 90 | if (workDir == null) { 91 | String os = System.getProperty("os.name").toLowerCase(); 92 | if (os.startsWith("mac")) { 93 | workDir = new File(System.getProperty("user.home") + "/Library/Application Support/TMEngine/"); 94 | } else if (os.startsWith("windows")) { 95 | workDir = new File(System.getenv("AppData") + "\\TMEngine\\"); 96 | } else { 97 | workDir = new File(System.getProperty("user.home") + "/.tmengine/"); 98 | } 99 | if (!workDir.exists()) { 100 | Files.createDirectories(workDir.toPath()); 101 | } 102 | } 103 | return workDir; 104 | } 105 | } -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/TMXContentHandler.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmx; 13 | 14 | import java.io.IOException; 15 | import java.lang.System.Logger; 16 | import java.lang.System.Logger.Level; 17 | import java.sql.SQLException; 18 | import java.util.Deque; 19 | import java.util.concurrent.ConcurrentLinkedDeque; 20 | 21 | import org.xml.sax.Attributes; 22 | import org.xml.sax.Locator; 23 | import org.xml.sax.SAXException; 24 | 25 | import com.maxprograms.tmengine.ITmEngine; 26 | import com.maxprograms.xml.Catalog; 27 | import com.maxprograms.xml.Document; 28 | import com.maxprograms.xml.Element; 29 | import com.maxprograms.xml.IContentHandler; 30 | 31 | class TMXContentHandler implements IContentHandler { 32 | 33 | protected static final Logger LOGGER = System.getLogger(TMXContentHandler.class.getName()); 34 | 35 | private Element current; 36 | private Deque stack; 37 | private boolean inCDATA = false; 38 | private int count; 39 | private ITmEngine db; 40 | 41 | public TMXContentHandler(ITmEngine tmEngine) { 42 | db = tmEngine; 43 | stack = new ConcurrentLinkedDeque<>(); 44 | } 45 | 46 | @Override 47 | public void characters(char[] ch, int start, int length) throws SAXException { 48 | if (!inCDATA && current != null) { 49 | current.addContent(new String(ch, start, length)); 50 | } 51 | } 52 | 53 | @Override 54 | public void endDocument() throws SAXException { 55 | stack = null; 56 | } 57 | 58 | @Override 59 | public void endElement(String uri, String localName, String qName) throws SAXException { 60 | if (localName.equals("tu")) { 61 | try { 62 | db.storeTu(current); 63 | if (count % 500 == 0) { 64 | db.commit(); 65 | } 66 | } catch (IOException | SQLException e) { 67 | // ignore 68 | LOGGER.log(Level.WARNING, "Error storing " + current); 69 | } 70 | count++; 71 | current = null; 72 | stack.clear(); 73 | } else { 74 | if (!stack.isEmpty()) { 75 | current = stack.removeFirst(); 76 | } 77 | } 78 | } 79 | 80 | @Override 81 | public void endPrefixMapping(String prefix) throws SAXException { 82 | // do nothing 83 | } 84 | 85 | @Override 86 | public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { 87 | // do nothing 88 | } 89 | 90 | @Override 91 | public void processingInstruction(String target, String data) throws SAXException { 92 | // do nothing 93 | } 94 | 95 | @Override 96 | public void setDocumentLocator(Locator locator) { 97 | // do nothing 98 | } 99 | 100 | @Override 101 | public void skippedEntity(String name) throws SAXException { 102 | // do nothing, the entity resolver must support this 103 | } 104 | 105 | @Override 106 | public void startDocument() throws SAXException { 107 | // do nothing 108 | } 109 | 110 | @Override 111 | public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { 112 | if (current == null) { 113 | current = new Element(qName); 114 | stack.addFirst(current); 115 | } else { 116 | Element child = new Element(qName); 117 | if (!qName.equals("ut")) { 118 | current.addContent(child); 119 | } 120 | stack.addFirst(current); 121 | current = child; 122 | } 123 | for (int i = 0; i < atts.getLength(); i++) { 124 | current.setAttribute(atts.getQName(i), atts.getValue(i)); 125 | } 126 | } 127 | 128 | @Override 129 | public void startPrefixMapping(String prefix, String uri) throws SAXException { 130 | // do nothing 131 | } 132 | 133 | @Override 134 | public void comment(char[] ch, int start, int length) throws SAXException { 135 | // do nothing 136 | } 137 | 138 | @Override 139 | public void endCDATA() throws SAXException { 140 | inCDATA = false; 141 | } 142 | 143 | @Override 144 | public void endDTD() throws SAXException { 145 | // do nothing 146 | } 147 | 148 | @Override 149 | public void endEntity(String arg0) throws SAXException { 150 | // do nothing, let the EntityResolver handle this 151 | } 152 | 153 | @Override 154 | public void startCDATA() throws SAXException { 155 | inCDATA = true; 156 | } 157 | 158 | @Override 159 | public void startDTD(String name, String publicId1, String systemId1) throws SAXException { 160 | // do nothing 161 | } 162 | 163 | @Override 164 | public void startEntity(String arg0) throws SAXException { 165 | // do nothing, let the EntityResolver handle this 166 | } 167 | 168 | public int getCount() { 169 | return count; 170 | } 171 | 172 | @Override 173 | public Document getDocument() { 174 | // do nothing 175 | return null; 176 | } 177 | 178 | @Override 179 | public void setCatalog(Catalog arg0) { 180 | // do nothing 181 | } 182 | } 183 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/TuvDatabase.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.File; 15 | import java.io.IOException; 16 | import java.util.Iterator; 17 | import java.util.Map; 18 | import java.util.NavigableSet; 19 | import java.util.Set; 20 | import java.util.TreeSet; 21 | import java.util.concurrent.ConcurrentHashMap; 22 | 23 | import org.mapdb.BTreeMap; 24 | import org.mapdb.DB; 25 | import org.mapdb.DBMaker; 26 | 27 | public class TuvDatabase { 28 | 29 | private Map> textMaps; 30 | private Map>> hashesMaps; 31 | private Map> segsMaps; 32 | private Map databases; 33 | private File folder; 34 | 35 | public TuvDatabase(File folder) { 36 | this.folder = folder; 37 | databases = new ConcurrentHashMap<>(); 38 | textMaps = new ConcurrentHashMap<>(); 39 | hashesMaps = new ConcurrentHashMap<>(); 40 | segsMaps = new ConcurrentHashMap<>(); 41 | } 42 | 43 | private void buildIndex(String lang) throws IOException { 44 | try { 45 | DB mapdb = DBMaker.newFileDB(new File(folder, "tuv_" + lang)).closeOnJvmShutdown().make(); 46 | databases.put(lang, mapdb); 47 | textMaps.put(lang, databases.get(lang).getTreeMap("tuvs")); 48 | hashesMaps.put(lang, databases.get(lang).getTreeMap("hashes")); 49 | segsMaps.put(lang, databases.get(lang).getTreeMap("segs")); 50 | } catch (Error ioe) { 51 | throw new IOException(ioe.getMessage()); 52 | } 53 | } 54 | 55 | public void commit() { 56 | Set langSet = databases.keySet(); 57 | Iterator keys = langSet.iterator(); 58 | while (keys.hasNext()) { 59 | String key = keys.next(); 60 | databases.get(key).commit(); 61 | } 62 | } 63 | 64 | public void close() { 65 | commit(); 66 | Set langSet = databases.keySet(); 67 | Iterator keys = langSet.iterator(); 68 | while (keys.hasNext()) { 69 | String key = keys.next(); 70 | databases.get(key).close(); 71 | } 72 | } 73 | 74 | public void store(String lang, String tuid, String puretext, String seg) throws IOException { 75 | if (!textMaps.containsKey(lang)) { 76 | buildIndex(lang); 77 | } 78 | int idHash = tuid.hashCode(); 79 | BTreeMap textmap = textMaps.get(lang); 80 | if (textmap.containsKey(idHash)) { 81 | textmap.replace(idHash, puretext); 82 | } else { 83 | textmap.put(idHash, puretext); 84 | } 85 | BTreeMap segmap = segsMaps.get(lang); 86 | if (segmap.containsKey(idHash)) { 87 | segmap.replace(idHash, seg); 88 | } else { 89 | segmap.put(idHash, seg); 90 | } 91 | int hash = puretext.toLowerCase().hashCode(); 92 | BTreeMap> hashmap = hashesMaps.get(lang); 93 | if (hashmap.containsKey(hash)) { 94 | Set set = hashesMaps.get(lang).get(hash); 95 | set.add(tuid); 96 | hashmap.replace(hash, set); 97 | } else { 98 | Set set = new TreeSet<>(); 99 | set.add(tuid); 100 | hashmap.put(hash, set); 101 | } 102 | } 103 | 104 | public String getSegText(String lang, String tuid) throws IOException { 105 | if (!segsMaps.containsKey(lang)) { 106 | buildIndex(lang); 107 | } 108 | return segsMaps.get(lang).get(tuid.hashCode()); 109 | } 110 | 111 | public void remove(String lang, String tuid) throws IOException { 112 | if (!textMaps.containsKey(lang)) { 113 | buildIndex(lang); 114 | } 115 | int idHash = tuid.hashCode(); 116 | String oldText = getPureText(lang, idHash); 117 | if (oldText != null) { 118 | textMaps.get(lang).remove(idHash); 119 | segsMaps.get(lang).remove(idHash); 120 | int textHash = oldText.toLowerCase().hashCode(); 121 | Set set = hashesMaps.get(lang).get(textHash); 122 | set.remove(tuid); 123 | hashesMaps.get(lang).replace(textHash, set); 124 | } 125 | } 126 | 127 | public Set getPerfectMatches(String lang, String searchStr) throws IOException { 128 | if (!hashesMaps.containsKey(lang)) { 129 | buildIndex(lang); 130 | } 131 | BTreeMap> hashmap = hashesMaps.get(lang); 132 | int textHash = searchStr.toLowerCase().hashCode(); 133 | if (hashmap.containsKey(textHash)) { 134 | return hashmap.get(textHash); 135 | } 136 | return new TreeSet<>(); 137 | } 138 | 139 | public NavigableSet getKeySet(String lang) throws IOException { 140 | if (!hashesMaps.containsKey(lang)) { 141 | buildIndex(lang); 142 | } 143 | return textMaps.get(lang).keySet(); 144 | } 145 | 146 | public String getPureText(String lang, Integer id) throws IOException { 147 | if (!textMaps.containsKey(lang)) { 148 | buildIndex(lang); 149 | } 150 | return textMaps.get(lang).get(id); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /docs/concepts/rest/CreateMemory.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Create Memory 5 | 6 |

End Point: [TMEngine URL]/create

7 |

Default: create

9 | 10 |

Send a 'POST' request to the method end point with these parameters in a 11 | JSON body:

12 | 13 | 14 | Field 15 | Required 16 | Content 17 | 18 | 19 | id 20 | No 21 | 22 |

ID of the memory to create. The value of 'id' must be 23 | unique.

24 |

Default value is current server time represented as the number of 25 | milliseconds since January 1, 1970, 00:00:00 GMT

26 |
27 |
28 | 29 | name 30 | Yes 31 | A meaningful name to identify the memory 32 | 33 | 34 | owner 35 | No 36 | 37 |

Text string used to identify the owner of the memory.

38 |

Default value is the login name of the user running the server.

39 |
40 |
41 | 42 | type 43 | No 44 | 45 |

Type of engine to use. Possible values are:

    46 |
  • 'MapDbEngine' (default)
  • 47 |
  • 'SQLEngine'
  • 48 |
49 |

50 |
51 |
52 | 53 | serverName 54 | No 55 | 56 |

Name or IP of the server running MySQL or MariaDB.

57 |

Required for SQLEngine. Defaut value: 58 | 'localhost'

59 |
60 |
61 | 62 | port 63 | No 64 | 65 |

Port in which MySQL or MariaDB listens for requests.

66 |

Required for SQLEngine. Default value: 67 | 3306

68 |
69 |
70 | 71 | userName 72 | No 73 | 74 |

ID of of the MySQL or MariaDB user creating the database.

75 |

Required for SQLEngine.

76 |
77 |
78 | 79 | password 80 | No 81 | 82 |

Password of the MySQL or MariaDB user creating the database.

83 |

Required for SQLEngine.

84 |
85 |
86 |
87 | 88 |

Example:

89 | { 90 | "name": "First Memory", 91 | "type": "MapDbEngine" 92 | } 93 | 94 | { 95 | "name": "MariaMemory", 96 | "type": "SQLEngine", 97 | "serverName": "localhost", 98 | "port": 3306, 99 | "userName": "root", 100 | "password": "secret123!" 101 | } 102 |

The server responds with a JSON object containing two fields.

103 |

On success, field 'status' is set to 'OK' and field 104 | 'id' contains the ID assigned to the new memory.

105 | 106 |

Example:

107 | { 108 | "status": "OK", 109 | "id": "1234567890987" 110 | } 111 | 112 |

On error, field 'status' is set to 'failed' and field 113 | 'reason' contains the error cause.

114 | 115 |

Example:

116 | { 117 | "status": "failed", 118 | "reason": "Duplicated id" 119 | } 120 | 121 |
122 | -------------------------------------------------------------------------------- /docs/concepts/rest/SearchTranslations.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Search Translations 5 | 6 |

End Point: [TMEngine URL]/create

7 |

Default: search

9 |

Send a 'POST' request to the method end point with these parameters in a 10 | JSON body:

11 | 12 | 13 | Field 14 | Required 15 | Content 16 | 17 | 18 | id 19 | yes 20 | 21 |

ID of the memory where the search should be performed

22 |
23 |
24 | 25 | text 26 | Yes 27 | Text string to search 28 | 29 | 30 | srcLang 31 | Yes 32 | Source language code 33 | 34 | 35 | tgtLang 36 | Yes 37 | Target language code 38 | 39 | 40 | similarity 41 | Yes 42 | Integer value indicating the lowest similarity percentage to include in 43 | results 44 | 45 | 46 | caseSensitive 47 | Yes 48 | Boolean value indicating whether the search should be case sensitive or 49 | not 50 | 51 |
52 |

Example:

53 | { 54 | "id": "1572538708492", 55 | "text": "tax compliance", 56 | "srcLang": "en-GB", 57 | "tgtLang": "fr-FR", 58 | "similarity": 70, 59 | "caseSensitive": false 60 | } 61 |

The server responds with a JSON object containing two fields.

62 |

On success, field 'status' is set to 'OK' and field 63 | 'process' contains the ID of the background search process that was 64 | initiated.

65 | 66 | { 67 | "process": "1572531573026", 68 | "status": "OK" 69 | } 70 | 71 |

On error, field 'status' is set to 'failed' and field 72 | 'reason' contains the error cause.

73 | { 74 | "status": "failed", 75 | "reason": "Unknown memory type" 76 | } 77 |

After starting the search process, monitor its status using the method.

79 |

On successful completion, the result will contain an array of similar segments in the 80 | data field.

81 |

Example:

82 | 83 | { 84 | "result": "Completed", 85 | "data": { 86 | "matches": [ 87 | { 88 | "similarity": 71, 89 | "origin": "1572538708492", 90 | "source": "<tuv xml:lang="en-GB"><seg>Non-compliance</seg></tuv>", 91 | "target": "<tuv xml:lang="fr-FR"><seg>Violation</seg></tuv>", 92 | "properties": { 93 | "creationdate": "20070126T082848Z", 94 | "subject": "Taxes", 95 | "x-Origin": "TM", 96 | "project": "Main TM", 97 | "changedate": "20070126T082848Z", 98 | "tuid": "1546700322331", 99 | "creationid": "MC", 100 | "changeid": "MC", 101 | "lastusagedate": "20070126T082848Z", 102 | "customer": "ACME Auditors" 103 | } 104 | }, { 105 | "similarity": 73, 106 | "origin": "1572538708492", 107 | "source": "<tuv xml:lang="en-GB"><seg>Legal Compliance</seg></tuv>", 108 | "target": "<tuv xml:lang="fr-FR"><seg>Conformité légale</seg></tuv>", 109 | "properties": { 110 | "creationdate": "20160725T141611Z", 111 | "x-ConfirmationLevel": "ApprovedTranslation", 112 | "subject": "Taxes", 113 | "x-Origin": "TM", 114 | "project": "Main TM", 115 | "changedate": "20160727T093143Z", 116 | "tuid": "1546700366038", 117 | "creationid": "Aqcis9\Aqcis", 118 | "changeid": "FG", 119 | "lastusagedate": "20160727T093143Z", 120 | "customer": "ACME Auditors" 121 | } 122 | }, { 123 | "similarity": 100, 124 | "origin": "fluenta", 125 | "source": "<tuv xml:lang="en-GB"><seg>tax compliance</seg></tuv>", 126 | "target": "<tuv xml:lang="fr-FR"><seg>Conformité fiscale</seg></tuv>", 127 | "properties": { 128 | "creationdate": "20171004T111450Z", 129 | "subject": "Taxes", 130 | "project": "Main TM", 131 | "changedate": "20171004T111450Z", 132 | "tuid": "1546700370945", 133 | "changeid": "translator2", 134 | "usagecount": "1", 135 | "x-ConfirmationLevel": "Translated", 136 | "x-Origin": "TM", 137 | "creationid": "translator2", 138 | "lastusagedate": "20171006T103930Z", 139 | "customer": "ACME Auditors" 140 | } 141 | } 142 | ], 143 | }, 144 | "status": "OK" 145 | } 146 | 147 |
148 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmutils/TMUtils.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmutils; 13 | 14 | import com.maxprograms.languages.RegistryParser; 15 | import com.maxprograms.xml.Element; 16 | import com.maxprograms.xml.SAXBuilder; 17 | import com.maxprograms.xml.TextNode; 18 | import com.maxprograms.xml.XMLNode; 19 | 20 | import org.xml.sax.SAXException; 21 | 22 | import java.io.ByteArrayInputStream; 23 | import java.io.File; 24 | import java.io.IOException; 25 | import java.lang.System.Logger; 26 | import java.lang.System.Logger.Level; 27 | import java.nio.charset.StandardCharsets; 28 | import java.nio.file.Files; 29 | import java.util.Calendar; 30 | import java.util.Date; 31 | import java.util.Iterator; 32 | import java.util.List; 33 | import java.util.Locale; 34 | import java.util.TimeZone; 35 | 36 | import javax.xml.parsers.ParserConfigurationException; 37 | 38 | public class TMUtils { 39 | 40 | private static Logger logger = System.getLogger(TMUtils.class.getName()); 41 | private static RegistryParser registry; 42 | 43 | private TMUtils() { 44 | // private for security 45 | } 46 | 47 | public static String pureText(Element seg) { 48 | List l = seg.getContent(); 49 | Iterator i = l.iterator(); 50 | StringBuilder text = new StringBuilder(); 51 | while (i.hasNext()) { 52 | XMLNode o = i.next(); 53 | if (o.getNodeType() == XMLNode.TEXT_NODE) { 54 | text.append(((TextNode) o).getText()); 55 | } else if (o.getNodeType() == XMLNode.ELEMENT_NODE) { 56 | String type = ((Element) o).getName(); 57 | // discard all inline elements 58 | // except and 59 | if (type.equals("sub") || type.equals("hi")) { 60 | Element e = (Element) o; 61 | text.append(pureText(e)); 62 | } 63 | } 64 | } 65 | return text.toString(); 66 | } 67 | 68 | public static String normalizeLang(String langCode) throws IOException { 69 | if (registry == null) { 70 | registry = new RegistryParser(); 71 | } 72 | if (langCode == null) { 73 | return null; 74 | } 75 | if (!registry.getTagDescription(langCode).isEmpty()) { 76 | return langCode; 77 | } 78 | String lang = langCode.replaceAll("_", "-"); 79 | String[] parts = lang.split("-"); 80 | 81 | if (parts.length == 2) { 82 | if (parts[1].length() == 2) { 83 | // has country code 84 | String code = lang.substring(0, 2).toLowerCase() + "-" + lang.substring(3).toUpperCase(); 85 | if (!registry.getTagDescription(code).isEmpty()) { 86 | return code; 87 | } 88 | return null; 89 | } 90 | if (isRegion(parts[1])) { 91 | // try lowercasing language code while keeping region number 92 | String code = lang.substring(0, 2).toLowerCase() + "-" + parts[1]; 93 | if (!registry.getTagDescription(code).isEmpty()) { 94 | return code; 95 | } 96 | return null; 97 | } 98 | // may have a script 99 | String code = lang.substring(0, 2).toLowerCase() + "-" + lang.substring(3, 4).toUpperCase() 100 | + lang.substring(4).toLowerCase(); 101 | if (!registry.getTagDescription(code).isEmpty()) { 102 | return code; 103 | } 104 | return null; 105 | } 106 | // check if its a valid thing with more than 2 parts 107 | if (!registry.getTagDescription(lang).isEmpty()) { 108 | return lang; 109 | } 110 | return null; 111 | } 112 | 113 | private static boolean isRegion(String string) { 114 | if (string.length() != 3) { 115 | return false; 116 | } 117 | return isNumber(string.charAt(0)) && isNumber(string.charAt(1)) && isNumber(string.charAt(2)); 118 | } 119 | 120 | private static boolean isNumber(char c) { 121 | return c >= '0' && c <= '9'; 122 | } 123 | 124 | public static String createId() throws InterruptedException { 125 | Date now = new Date(); 126 | long lng = now.getTime(); 127 | // wait until we are in the next millisecond 128 | // before leaving to ensure uniqueness 129 | Thread.sleep(1); 130 | return "" + lng; 131 | } 132 | 133 | public static String tmxDate() { 134 | Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT")); 135 | String sec = (calendar.get(Calendar.SECOND) < 10 ? "0" : "") + calendar.get(Calendar.SECOND); 136 | String min = (calendar.get(Calendar.MINUTE) < 10 ? "0" : "") + calendar.get(Calendar.MINUTE); 137 | String hour = (calendar.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "") + calendar.get(Calendar.HOUR_OF_DAY); 138 | String mday = (calendar.get(Calendar.DATE) < 10 ? "0" : "") + calendar.get(Calendar.DATE); 139 | String mon = (calendar.get(Calendar.MONTH) < 9 ? "0" : "") + (calendar.get(Calendar.MONTH) + 1); 140 | String longyear = "" + calendar.get(Calendar.YEAR); 141 | 142 | return longyear + mon + mday + "T" + hour + min + sec + "Z"; 143 | } 144 | 145 | public static long getGMTtime(String tmxDate) { 146 | Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT")); 147 | try { 148 | int second = Integer.parseInt(tmxDate.substring(13, 15)); 149 | int minute = Integer.parseInt(tmxDate.substring(11, 13)); 150 | int hour = Integer.parseInt(tmxDate.substring(9, 11)); 151 | int date = Integer.parseInt(tmxDate.substring(6, 8)); 152 | int month = Integer.parseInt(tmxDate.substring(4, 6)) - 1; 153 | int year = Integer.parseInt(tmxDate.substring(0, 4)); 154 | calendar.set(year, month, date, hour, minute, second); 155 | return calendar.getTimeInMillis(); 156 | } catch (NumberFormatException e) { 157 | logger.log(Level.WARNING, "Unsupported TMX date: " + tmxDate); 158 | return 0l; 159 | } 160 | } 161 | 162 | public static Element buildTuv(String lang, String seg) 163 | throws SAXException, IOException, ParserConfigurationException { 164 | Element tuv = new Element("tuv"); 165 | tuv.setAttribute("xml:lang", lang); 166 | SAXBuilder builder = new SAXBuilder(); 167 | Element e = builder.build(new ByteArrayInputStream(("" + seg + "").getBytes(StandardCharsets.UTF_8))) 168 | .getRootElement(); 169 | tuv.addContent(e); 170 | return tuv; 171 | } 172 | 173 | public static String creationDate() { 174 | Calendar calendar = Calendar.getInstance(Locale.US); 175 | String sec = (calendar.get(Calendar.SECOND) < 10 ? "0" : "") + calendar.get(Calendar.SECOND); 176 | String min = (calendar.get(Calendar.MINUTE) < 10 ? "0" : "") + calendar.get(Calendar.MINUTE); 177 | String hour = (calendar.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "") + calendar.get(Calendar.HOUR_OF_DAY); 178 | String mday = (calendar.get(Calendar.DATE) < 10 ? "0" : "") + calendar.get(Calendar.DATE); 179 | String mon = (calendar.get(Calendar.MONTH) < 9 ? "0" : "") + (calendar.get(Calendar.MONTH) + 1); 180 | String longyear = "" + calendar.get(Calendar.YEAR); 181 | 182 | return longyear + mon + mday + "T" + hour + min + sec + "Z"; 183 | } 184 | 185 | public static String extractText(Element seg) { 186 | List l = seg.getContent(); 187 | Iterator i = l.iterator(); 188 | StringBuilder text = new StringBuilder(); 189 | while (i.hasNext()) { 190 | XMLNode o = i.next(); 191 | if (o.getNodeType() == XMLNode.TEXT_NODE) { 192 | text.append(((TextNode) o).getText()); 193 | } else if (o.getNodeType() == XMLNode.ELEMENT_NODE) { 194 | Element e = (Element) o; 195 | String type = e.getName(); 196 | // discard all inline elements 197 | // except and 198 | if (type.equals("sub") || type.equals("hi")) { 199 | text.append(extractText(e)); 200 | } 201 | } 202 | } 203 | return text.toString(); 204 | } 205 | 206 | public static void deleteFolder(String folder) throws IOException { 207 | File f = new File(folder); 208 | if (f.isDirectory()) { 209 | String[] list = f.list(); 210 | for (int i = 0; i < list.length; i++) { 211 | deleteFolder(new File(f, list[i]).getAbsolutePath()); 212 | } 213 | } 214 | Files.delete(f.toPath()); 215 | } 216 | 217 | 218 | } 219 | -------------------------------------------------------------------------------- /docs/concepts/methods.dita: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Interface Methods 5 | 6 | 7 |

The methods exposed by ITmEngine interface are:

8 |
    9 |
  • public abstract String getName(); 10 |

    Returns the name of the engine instance.

  • 11 | 12 |
  • public abstract String getType(); 13 |

    Returns the ITmEngine type used for storing data. Possible 14 | values are:

    15 |
      16 |
    • MapDbEngine
    • 17 |
    • SQLEngine
    • 18 |
    19 |
  • 20 | 21 |
  • public abstract void close() throws IOException, SQLException; 22 |

    Closes the engine instance.

    23 |
  • 24 | 25 |
  • public abstract int storeTMX(String tmxFile, String project, String customer, 26 | String subject) throws SAXException, IOException, 27 | ParserConfigurationException, SQLException; 28 |

    Imports a TMX document located at "tmxFile" and associates its 29 | data with "project", "customer" and 30 | "subject" properties

    31 |
  • 32 | 33 |
  • public abstract void exportMemory(String tmxfile, Set<String> langs, String 34 | srcLang, Map<String, String> properties) throws IOException, 35 | SAXException, ParserConfigurationException, SQLException; 36 |

    Exports engine data to a TMX document located at "tmxfile".

    37 |

    The "langs" argument may contain a set of language codes. If 38 | "langs" is not null, only the entries with the given 39 | language codes are exported.

    40 |

    The "srcLang" argument indicates the source language assigned to 41 | the TMX file. It can be one of the languages from the data set (see "getAllLanguages()") or the string 43 | "*all*".

    44 |

    The "properties" argument may contain a set of property-value 45 | pairs to be set in the exported TMX file.

    46 |
  • 47 | 48 |
  • public abstract void flag(String tuid) throws SQLException; 49 |

    Adds the property "x-flag" and sets its value to 50 | "SW-Flag" to the translation unit identified by the 51 | "tuid" argument.

    52 |
  • 53 | 54 |
  • public abstract Set<String> getAllLanguages() throws SQLException; 55 |

    Returns a collection containing all language codes used in the engine's data.

    56 |
  • 57 | 58 |
  • public abstract Set<String> getAllClients() throws SQLException; 59 |

    Returns a collection containing all values assigned to the 60 | "client" property.

    61 |
  • 62 | 63 |
  • public abstract Set<String> getAllProjects() throws SQLException; 64 |

    Returns a collection containing all values assigned to the 65 | "project" property.

    66 |
  • 67 | 68 |
  • public abstract Set<String> getAllSubjects() throws SQLException; 69 |

    Returns a collection containing all values assigned to the 70 | "subject" property.

    71 |
  • 72 | 73 |
  • public abstract List<Match> searchTranslation(String searchStr, String 74 | srcLang, String tgtLang, int similarity, boolean 75 | caseSensitive) throws IOException, SAXException, 76 | ParserConfigurationException, SQLException; 77 |

    Returns a list of possible translations of the "searchStr" 78 | argument.

    79 |

    The search result is restricted to entries with the source language indicated by 80 | "srcLang" and target language indicated by 81 | "tgtLang" whose similarity to the given text is greater or 82 | equal to the value of the "similarity" argument.

    83 |

    The "caseSensitive" argument indicates whether the search engine 84 | should consider letter case differences or not.

    85 |
  • 86 | 87 |
  • public abstract List<Element> concordanceSearch(String searchStr, 88 | String srcLang, int limit, boolean isRegexp, boolean 89 | caseSensitive) throws IOException, SAXException, 90 | ParserConfigurationException, SQLException; 91 |

    Returns a list of all translation units (<tu> elements) that contain the text 92 | indicated in "searchStr" argument.

    93 |

    Searches are performed against the translation unit variant (<tuv> element) 94 | with language set to "srcLang".

    95 |

    Search result contains at most "limit" entries. Returned data 96 | is in no particular order.

    97 |

    Argument "isRegexp" indicates whether the 98 | "searchStr" parameter should be considered a regular 99 | expression that matches the whole segment.

    100 |

    The "caseSensitive" argument indicates whether the search engine 101 | should consider letter case differences or not.

    102 |
  • 103 | 104 |
  • public abstract void storeTu(Element tu) throws IOException, SQLException; 105 |

    Stores translation unit "tu" into the database, overwriting any 106 | existing <tu> element with the same "id" attribute.

    107 |
  • 108 | 109 |
  • public abstract void commit() throws SQLException; 110 |

    Flushes to disk any data held in memory and not written yet.

    111 |
  • 112 | 113 |
  • public abstract Element getTu(String tuid) throws IOException, 114 | SAXException, ParserConfigurationException, SQLException; 115 |

    Returns the translation unit (<tu> element) that has the "id" attribute set to 116 | the "tuid" argument.

    117 |
  • 118 | 119 |
  • public abstract void removeTu(String tuid) throws IOException, SAXException, 120 | ParserConfigurationException, SQLException; 121 |

    Removes from the database the <tu> element that has the "id" attribute set to 122 | the "tuid" argument.

    123 |
  • 124 | 125 |
  • public abstract void deleteDatabase() throws IOException, SQLException; 126 |
      127 |
    • When used with "MapDbEngine" instances, closes the engine 128 | and removes all associated files from disk;
    • 129 |
    • When used with "SQLEngine" instances, closes the engine and 130 | drops the associated database from the SQL server.
    • 131 |
    132 |
  • 133 |
134 |
135 |
136 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/tmx11.dtd: -------------------------------------------------------------------------------- 1 | 13 | 14 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 90 | 91 | 92 | 93 | 94 | 96 | 97 | 98 | 99 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 122 | 123 | 124 | 125 | 126 | 129 | 131 | 132 | 133 | 134 | 139 | 140 | 141 | 142 | 146 | 147 | 148 | 149 | 164 | 165 | 166 | 167 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 192 | 193 | 194 | 196 | 197 | 198 | 199 | 200 | 204 | 205 | 206 | 210 | 211 | 212 | 214 | 215 | 216 | 217 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/tmx12.dtd: -------------------------------------------------------------------------------- 1 | 13 | 14 | 33 | 34 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 111 | 112 | 113 | 114 | 115 | 117 | 118 | 119 | 120 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 143 | 144 | 145 | 146 | 147 | 150 | 152 | 153 | 154 | 155 | 160 | 161 | 162 | 163 | 167 | 168 | 169 | 170 | 185 | 186 | 187 | 188 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 213 | 214 | 215 | 217 | 218 | 219 | 222 | 223 | 224 | 228 | 229 | 230 | 234 | 235 | 236 | 238 | 239 | 240 | 243 | 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/tmx13.dtd: -------------------------------------------------------------------------------- 1 | 13 | 14 | 27 | 28 | 47 | 48 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 125 | 126 | 127 | 128 | 129 | 131 | 132 | 133 | 134 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 158 | 159 | 160 | 161 | 162 | 165 | 167 | 168 | 169 | 170 | 175 | 176 | 177 | 178 | 182 | 183 | 184 | 185 | 200 | 201 | 202 | 203 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 230 | 231 | 232 | 234 | 235 | 236 | 239 | 240 | 241 | 245 | 246 | 247 | 251 | 252 | 253 | 255 | 256 | 257 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmx/tmx14.dtd: -------------------------------------------------------------------------------- 1 | 13 | 14 | 25 | 26 | 39 | 40 | 59 | 60 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 137 | 138 | 139 | 140 | 141 | 143 | 144 | 145 | 146 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 170 | 171 | 172 | 173 | 174 | 177 | 179 | 180 | 181 | 182 | 187 | 188 | 189 | 190 | 195 | 196 | 197 | 198 | 199 | 200 | 215 | 216 | 217 | 218 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 245 | 246 | 247 | 249 | 250 | 251 | 254 | 255 | 256 | 260 | 261 | 262 | 266 | 267 | 268 | 271 | 272 | 273 | 274 | 275 | 276 | 278 | 279 | 280 | 281 | 282 | 283 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 1.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF 5 | THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial code and 12 | documentation distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | 16 | i) changes to the Program, and 17 | 18 | ii) additions to the Program; 19 | 20 | where such changes and/or additions to the Program originate from 21 | and are distributed by that particular Contributor. A Contribution 22 | 'originates' from a Contributor if it was added to the Program by 23 | such Contributor itself or anyone acting on such Contributor's 24 | behalf. Contributions do not include additions to the Program 25 | which: (i) are separate modules of software distributed in 26 | conjunction with the Program under their own license agreement, 27 | and (ii) are not derivative works of the Program. 28 | 29 | "Contributor" means any person or entity that distributes the Program. 30 | 31 | "Licensed Patents" mean patent claims licensable by a Contributor 32 | which are necessarily infringed by the use or sale of its Contribution 33 | alone or when combined with the Program. 34 | 35 | "Program" means the Contributions distributed in accordance with this 36 | Agreement. 37 | 38 | "Recipient" means anyone who receives the Program under this 39 | Agreement, including all Contributors. 40 | 41 | 2. GRANT OF RIGHTS 42 | 43 | a) Subject to the terms of this Agreement, each Contributor hereby 44 | grants Recipient a non-exclusive, worldwide, royalty-free 45 | copyright license to reproduce, prepare derivative works of, 46 | publicly display, publicly perform, distribute and sublicense the 47 | Contribution of such Contributor, if any, and such derivative 48 | works, in source code and object code form. 49 | 50 | b) Subject to the terms of this Agreement, each Contributor hereby 51 | grants Recipient a non-exclusive, worldwide, royalty-free patent 52 | license under Licensed Patents to make, use, sell, offer to sell, 53 | import and otherwise transfer the Contribution of such 54 | Contributor, if any, in source code and object code form. This 55 | patent license shall apply to the combination of the Contribution 56 | and the Program if, at the time the Contribution is added by the 57 | Contributor, such addition of the Contribution causes such 58 | combination to be covered by the Licensed Patents. The patent 59 | license shall not apply to any other combinations which include 60 | the Contribution. No hardware per se is licensed hereunder. 61 | 62 | c) Recipient understands that although each Contributor grants the 63 | licenses to its Contributions set forth herein, no assurances are 64 | provided by any Contributor that the Program does not infringe the 65 | patent or other intellectual property rights of any other 66 | entity. Each Contributor disclaims any liability to Recipient for 67 | claims brought by any other entity based on infringement of 68 | intellectual property rights or otherwise. As a condition to 69 | exercising the rights and licenses granted hereunder, each 70 | Recipient hereby assumes sole responsibility to secure any other 71 | intellectual property rights needed, if any. For example, if a 72 | third party patent license is required to allow Recipient to 73 | distribute the Program, it is Recipient's responsibility to 74 | acquire that license before distributing the Program. 75 | 76 | d) Each Contributor represents that to its knowledge it has 77 | sufficient copyright rights in its Contribution, if any, to grant 78 | the copyright license set forth in this Agreement. 79 | 80 | 3. REQUIREMENTS 81 | 82 | A Contributor may choose to distribute the Program in object code form 83 | under its own license agreement, provided that: 84 | 85 | a) it complies with the terms and conditions of this Agreement; 86 | and 87 | 88 | b) its license agreement: 89 | 90 | i) effectively disclaims on behalf of all Contributors all 91 | warranties and conditions, express and implied, including 92 | warranties or conditions of title and non-infringement, and 93 | implied warranties or conditions of merchantability and fitness 94 | for a particular purpose; 95 | 96 | ii) effectively excludes on behalf of all Contributors all 97 | liability for damages, including direct, indirect, special, 98 | incidental and consequential damages, such as lost profits; 99 | 100 | iii) states that any provisions which differ from this Agreement 101 | are offered by that Contributor alone and not by any other party; 102 | and 103 | 104 | iv) states that source code for the Program is available from such 105 | Contributor, and informs licensees how to obtain it in a 106 | reasonable manner on or through a medium customarily used for 107 | software exchange. 108 | 109 | When the Program is made available in source code form: 110 | 111 | a) it must be made available under this Agreement; and 112 | 113 | b) a copy of this Agreement must be included with each copy of the 114 | Program. 115 | 116 | Contributors may not remove or alter any copyright notices contained 117 | within the Program. 118 | 119 | Each Contributor must identify itself as the originator of its 120 | Contribution, if any, in a manner that reasonably allows subsequent 121 | Recipients to identify the originator of the Contribution. 122 | 123 | 4. COMMERCIAL DISTRIBUTION 124 | 125 | Commercial distributors of software may accept certain 126 | responsibilities with respect to end users, business partners and the 127 | like. While this license is intended to facilitate the commercial use 128 | of the Program, the Contributor who includes the Program in a 129 | commercial product offering should do so in a manner which does not 130 | create potential liability for other Contributors. Therefore, if a 131 | Contributor includes the Program in a commercial product offering, 132 | such Contributor ("Commercial Contributor") hereby agrees to defend 133 | and indemnify every other Contributor ("Indemnified Contributor") 134 | against any losses, damages and costs (collectively "Losses") arising 135 | from claims, lawsuits and other legal actions brought by a third party 136 | against the Indemnified Contributor to the extent caused by the acts 137 | or omissions of such Commercial Contributor in connection with its 138 | distribution of the Program in a commercial product offering. The 139 | obligations in this section do not apply to any claims or Losses 140 | relating to any actual or alleged intellectual property 141 | infringement. In order to qualify, an Indemnified Contributor must: a) 142 | promptly notify the Commercial Contributor in writing of such claim, 143 | and b) allow the Commercial Contributor to control, and cooperate with 144 | the Commercial Contributor in, the defense and any related settlement 145 | negotiations. The Indemnified Contributor may participate in any such 146 | claim at its own expense. 147 | 148 | For example, a Contributor might include the Program in a commercial 149 | product offering, Product X. That Contributor is then a Commercial 150 | Contributor. If that Commercial Contributor then makes performance 151 | claims, or offers warranties related to Product X, those performance 152 | claims and warranties are such Commercial Contributor's responsibility 153 | alone. Under this section, the Commercial Contributor would have to 154 | defend claims against the other Contributors related to those 155 | performance claims and warranties, and if a court requires any other 156 | Contributor to pay any damages as a result, the Commercial Contributor 157 | must pay those damages. 158 | 159 | 5. NO WARRANTY 160 | 161 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS 162 | PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 163 | KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY 164 | WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY 165 | OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely 166 | responsible for determining the appropriateness of using and 167 | distributing the Program and assumes all risks associated with its 168 | exercise of rights under this Agreement , including but not limited to 169 | the risks and costs of program errors, compliance with applicable 170 | laws, damage to or loss of data, programs or equipment, and 171 | unavailability or interruption of operations. 172 | 173 | 6. DISCLAIMER OF LIABILITY 174 | 175 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR 176 | ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, 177 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING 178 | WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF 179 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 180 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR 181 | DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED 182 | HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 183 | 184 | 7. GENERAL 185 | 186 | If any provision of this Agreement is invalid or unenforceable under 187 | applicable law, it shall not affect the validity or enforceability of 188 | the remainder of the terms of this Agreement, and without further 189 | action by the parties hereto, such provision shall be reformed to the 190 | minimum extent necessary to make such provision valid and enforceable. 191 | 192 | If Recipient institutes patent litigation against any entity 193 | (including a cross-claim or counterclaim in a lawsuit) alleging that 194 | the Program itself (excluding combinations of the Program with other 195 | software or hardware) infringes such Recipient's patent(s), then such 196 | Recipient's rights granted under Section 2(b) shall terminate as of 197 | the date such litigation is filed. 198 | 199 | All Recipient's rights under this Agreement shall terminate if it 200 | fails to comply with any of the material terms or conditions of this 201 | Agreement and does not cure such failure in a reasonable period of 202 | time after becoming aware of such noncompliance. If all Recipient's 203 | rights under this Agreement terminate, Recipient agrees to cease use 204 | and distribution of the Program as soon as reasonably 205 | practicable. However, Recipient's obligations under this Agreement and 206 | any licenses granted by Recipient relating to the Program shall 207 | continue and survive. 208 | 209 | Everyone is permitted to copy and distribute copies of this Agreement, 210 | but in order to avoid inconsistency the Agreement is copyrighted and 211 | may only be modified in the following manner. The Agreement Steward 212 | reserves the right to publish new versions (including revisions) of 213 | this Agreement from time to time. No one other than the Agreement 214 | Steward has the right to modify this Agreement. The Eclipse Foundation 215 | is the initial Agreement Steward. The Eclipse Foundation may assign 216 | the responsibility to serve as the Agreement Steward to a suitable 217 | separate entity. Each new version of the Agreement will be given a 218 | distinguishing version number. The Program (including Contributions) 219 | may always be distributed subject to the version of the Agreement 220 | under which it was received. In addition, after a new version of the 221 | Agreement is published, Contributor may elect to distribute the 222 | Program (including its Contributions) under the new version. Except as 223 | expressly stated in Sections 2(a) and 2(b) above, Recipient receives 224 | no rights or licenses to the intellectual property of any Contributor 225 | under this Agreement, whether expressly, by implication, estoppel or 226 | otherwise. All rights in the Program not expressly granted under this 227 | Agreement are reserved. 228 | 229 | This Agreement is governed by the laws of the State of New York and 230 | the intellectual property laws of the United States of America. No 231 | party to this Agreement will bring a legal action under this Agreement 232 | more than one year after the cause of action arose. Each party waives 233 | its rights to a jury trial in any resulting litigation. -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/MapDbEngine.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003-2021 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.File; 15 | import java.io.FileOutputStream; 16 | import java.io.IOException; 17 | import java.lang.System.Logger; 18 | import java.lang.System.Logger.Level; 19 | import java.nio.charset.StandardCharsets; 20 | import java.text.MessageFormat; 21 | import java.util.Calendar; 22 | import java.util.Collections; 23 | import java.util.Hashtable; 24 | import java.util.Iterator; 25 | import java.util.List; 26 | import java.util.Map; 27 | import java.util.NavigableSet; 28 | import java.util.Set; 29 | import java.util.TreeSet; 30 | import java.util.Vector; 31 | import java.util.regex.Pattern; 32 | import java.util.regex.PatternSyntaxException; 33 | 34 | import javax.xml.parsers.ParserConfigurationException; 35 | 36 | import org.mapdb.Fun; 37 | import org.mapdb.Fun.Tuple2; 38 | import org.xml.sax.SAXException; 39 | 40 | import com.maxprograms.tmx.TMXReader; 41 | import com.maxprograms.tmutils.TMUtils; 42 | import com.maxprograms.xml.Attribute; 43 | import com.maxprograms.xml.Element; 44 | import com.maxprograms.xml.Indenter; 45 | 46 | public class MapDbEngine implements ITmEngine, AutoCloseable { 47 | 48 | private static final Logger LOGGER = System.getLogger(MapDbEngine.class.getName()); 49 | 50 | private String dbname; 51 | private File database; 52 | private TuDatabase tuDb; 53 | private TuvDatabase tuvDb; 54 | private FuzzyIndex fuzzyIndex; 55 | 56 | private String currProject; 57 | private String currSubject; 58 | private String currCustomer; 59 | private String creationDate; 60 | 61 | private Set tuAttributes; 62 | 63 | private long next; 64 | 65 | public MapDbEngine(String dbname, String workFolder) throws IOException { 66 | this.dbname = dbname; 67 | tuAttributes = Collections.synchronizedSortedSet(new TreeSet<>()); 68 | String[] array = new String[] { "tuid", "o-encoding", "datatype", "usagecount", "lastusagedate", "creationtool", 69 | "creationtoolversion", "creationdate", "creationid", "changedate", "segtype", "changeid", "o-tmf", 70 | "srclang" }; 71 | for (int i = 0; i < array.length; i++) { 72 | tuAttributes.add(array[i]); 73 | } 74 | File wfolder = new File(workFolder); 75 | database = new File(wfolder, dbname); 76 | if (!database.exists()) { 77 | database.mkdirs(); 78 | } 79 | try { 80 | tuDb = new TuDatabase(database); 81 | } catch (Exception e) { 82 | LOGGER.log(Level.ERROR, e.getMessage(), e); 83 | MessageFormat mf = new MessageFormat("TU storage of database {0} is damaged."); 84 | throw new IOException(mf.format(new String[] { dbname })); 85 | } 86 | try { 87 | tuvDb = new TuvDatabase(database); 88 | } catch (Exception e) { 89 | LOGGER.log(Level.ERROR, e.getMessage(), e); 90 | MessageFormat mf = new MessageFormat("TUV storage of database {0} is damaged."); 91 | throw new IOException(mf.format(new String[] { dbname })); 92 | } 93 | try { 94 | fuzzyIndex = new FuzzyIndex(database); 95 | } catch (Exception e) { 96 | LOGGER.log(Level.ERROR, e.getMessage(), e); 97 | MessageFormat mf = new MessageFormat("Fuzzy index of database {0} is damaged."); 98 | throw new IOException(mf.format(new String[] { dbname })); 99 | } 100 | } 101 | 102 | @Override 103 | public void deleteDatabase() throws IOException { 104 | close(); 105 | TMUtils.deleteFolder(database.getAbsolutePath()); 106 | } 107 | 108 | @Override 109 | public String getType() { 110 | return MapDbEngine.class.getName(); 111 | } 112 | 113 | @Override 114 | public synchronized void close() throws IOException { 115 | fuzzyIndex.close(); 116 | tuDb.close(); 117 | tuvDb.close(); 118 | } 119 | 120 | @Override 121 | public String getName() { 122 | return dbname; 123 | } 124 | 125 | @Override 126 | public int storeTMX(String tmxFile, String project, String customer, String subject) 127 | throws SAXException, IOException, ParserConfigurationException { 128 | next = 0l; 129 | currProject = project != null ? project : ""; 130 | currSubject = subject != null ? subject : ""; 131 | currCustomer = customer != null ? customer : ""; 132 | creationDate = TMUtils.creationDate(); 133 | 134 | TMXReader reader = new TMXReader(this); 135 | reader.parse(new File(tmxFile).toURI().toURL()); 136 | commit(); 137 | 138 | return reader.getCount(); 139 | } 140 | 141 | @Override 142 | public void exportMemory(String tmxfile, Set langs, String srcLang, Map props) 143 | throws IOException, SAXException, ParserConfigurationException { 144 | Map properties = props != null ? props : new Hashtable<>(); 145 | try (FileOutputStream output = new FileOutputStream(new File(tmxfile))) { 146 | writeHeader(output, srcLang, properties); 147 | writeString(output, " \n"); 148 | 149 | Set set = tuDb.getKeys(); 150 | Iterator it = set.iterator(); 151 | while (it.hasNext()) { 152 | int hash = it.next(); 153 | Map tuProps = tuDb.getTu(hash); 154 | Element tu = buildElement(tuProps); 155 | if (langs != null) { 156 | List tuvs = tu.getChildren("tuv"); 157 | Iterator et = tuvs.iterator(); 158 | while (et.hasNext()) { 159 | Element tuv = et.next(); 160 | if (!langs.contains(tuv.getAttributeValue("xml:lang"))) { 161 | tu.removeChild(tuv); 162 | } 163 | } 164 | } 165 | Indenter.indent(tu, 3, 2); 166 | writeString(output, " " + tu.toString() + "\n"); 167 | } 168 | writeString(output, " \n"); 169 | writeString(output, ""); 170 | } 171 | } 172 | 173 | @Override 174 | public void flag(String tuid) { 175 | Map properties = tuDb.getTu(tuid.hashCode()); 176 | if (properties != null) { 177 | properties.put("x-flag", "SW-Flag"); 178 | tuDb.store(tuid, properties); 179 | } 180 | } 181 | 182 | @Override 183 | public Set getAllClients() { 184 | return tuDb.getCustomers(); 185 | } 186 | 187 | @Override 188 | public Set getAllLanguages() { 189 | return tuDb.getLanguages(); 190 | } 191 | 192 | @Override 193 | public Set getAllProjects() { 194 | return tuDb.getProjects(); 195 | } 196 | 197 | @Override 198 | public Set getAllSubjects() { 199 | return tuDb.getSubjects(); 200 | } 201 | 202 | @Override 203 | public List searchTranslation(String searchStr, String srcLang, String tgtLang, int similarity, 204 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException { 205 | 206 | List result = new Vector<>(); 207 | 208 | if (similarity == 100) { 209 | // check for perfect matches 210 | Set perfect = tuvDb.getPerfectMatches(srcLang, searchStr); 211 | if (!perfect.isEmpty()) { 212 | Iterator it = perfect.iterator(); 213 | while (it.hasNext()) { 214 | String tuid = it.next(); 215 | String puretext = tuvDb.getPureText(srcLang, tuid.hashCode()); 216 | boolean isMatch = true; 217 | if (caseSensitive) { 218 | isMatch = searchStr.equals(puretext); 219 | } 220 | if (isMatch) { 221 | String targetSeg = tuvDb.getSegText(tgtLang, tuid); 222 | if (targetSeg != null) { 223 | String sourceSeg = tuvDb.getSegText(srcLang, tuid); 224 | Element source = TMUtils.buildTuv(srcLang, sourceSeg); 225 | Element target = TMUtils.buildTuv(tgtLang, targetSeg); 226 | Map properties = tuDb.getTu(tuid.hashCode()); 227 | Match match = new Match(source, target, 100, dbname, properties); 228 | result.add(match); 229 | } 230 | } 231 | } 232 | } 233 | } 234 | if (similarity < 100) { 235 | // Check for fuzzy matches 236 | int[] ngrams = NGrams.getNGrams(searchStr); 237 | int size = ngrams.length; 238 | if (size == 0) { 239 | return result; 240 | } 241 | int min = size * similarity / 100; 242 | int max = size * (200 - similarity) / 100; 243 | 244 | Map candidates = new Hashtable<>(); 245 | String lowerSearch = searchStr.toLowerCase(); 246 | 247 | NavigableSet> index = fuzzyIndex.getIndex(srcLang); 248 | for (int i = 0; i < ngrams.length; i++) { 249 | Iterable keys = Fun.filter(index, ngrams[i]); 250 | Iterator it = keys.iterator(); 251 | while (it.hasNext()) { 252 | String tuid = it.next(); 253 | if (candidates.containsKey(tuid)) { 254 | int count = candidates.get(tuid); 255 | candidates.put(tuid, count + 1); 256 | } else { 257 | candidates.put(tuid, 1); 258 | } 259 | } 260 | } 261 | 262 | Set tuids = candidates.keySet(); 263 | Iterator it = tuids.iterator(); 264 | while (it.hasNext()) { 265 | String tuid = it.next(); 266 | int count = candidates.get(tuid); 267 | if (count >= min && count <= max) { 268 | int distance; 269 | String puretext = tuvDb.getPureText(srcLang, tuid.hashCode()); 270 | if (caseSensitive) { 271 | distance = MatchQuality.similarity(searchStr, puretext); 272 | } else { 273 | distance = MatchQuality.similarity(lowerSearch, puretext.toLowerCase()); 274 | } 275 | if (distance >= similarity) { 276 | String targetSeg = tuvDb.getSegText(tgtLang, tuid); 277 | if (targetSeg != null) { 278 | String sourceSeg = tuvDb.getSegText(srcLang, tuid); 279 | Element source = TMUtils.buildTuv(srcLang, sourceSeg); 280 | Element target = TMUtils.buildTuv(tgtLang, targetSeg); 281 | Map properties = tuDb.getTu(tuid.hashCode()); 282 | Match match = new Match(source, target, distance, dbname, properties); 283 | result.add(match); 284 | } 285 | } 286 | } 287 | } 288 | } 289 | return result; 290 | } 291 | 292 | private Element buildElement(Map properties) 293 | throws IOException, SAXException, ParserConfigurationException { 294 | Element tu = new Element("tu"); 295 | Set keys = properties.keySet(); 296 | Iterator it = keys.iterator(); 297 | while (it.hasNext()) { 298 | String key = it.next(); 299 | if (tuAttributes.contains(key)) { 300 | tu.setAttribute(key, properties.get(key)); 301 | } else { 302 | Element prop = new Element("prop"); 303 | prop.setAttribute("type", key); 304 | prop.setText(properties.get(key)); 305 | tu.addContent(prop); 306 | } 307 | } 308 | String tuid = tu.getAttributeValue("tuid"); 309 | Set langs = tuDb.getLanguages(); 310 | it = langs.iterator(); 311 | while (it.hasNext()) { 312 | String lang = it.next(); 313 | String seg = tuvDb.getSegText(lang, tuid); 314 | if (seg != null) { 315 | Element tuv = TMUtils.buildTuv(lang, seg); 316 | tu.addContent(tuv); 317 | } 318 | } 319 | return tu; 320 | } 321 | 322 | @Override 323 | public List concordanceSearch(String searchStr, String srcLang, int limit, boolean isRegexp, 324 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException { 325 | List result = new Vector<>(); 326 | Pattern pattern = null; 327 | if (isRegexp) { 328 | try { 329 | pattern = Pattern.compile(searchStr); 330 | } catch (PatternSyntaxException pse) { 331 | throw new IOException(pse.getMessage()); 332 | } 333 | } 334 | String lowerStr = searchStr.toLowerCase(); 335 | NavigableSet keySet = tuvDb.getKeySet(srcLang); 336 | Iterator it = keySet.iterator(); 337 | while (it.hasNext()) { 338 | int hash = it.next(); 339 | String pureText = tuvDb.getPureText(srcLang, hash); 340 | if (isRegexp) { 341 | if (pattern != null && pattern.matcher(pureText).matches()) { 342 | result.add(buildElement(tuDb.getTu(hash))); 343 | if (result.size() == limit) { 344 | return result; 345 | } 346 | } 347 | } else { 348 | if (caseSensitive) { 349 | if (pureText.indexOf(searchStr) != -1) { 350 | result.add(buildElement(tuDb.getTu(hash))); 351 | if (result.size() == limit) { 352 | return result; 353 | } 354 | } 355 | } else { 356 | if (pureText.toLowerCase().indexOf(lowerStr) != -1) { 357 | result.add(buildElement(tuDb.getTu(hash))); 358 | if (result.size() == limit) { 359 | return result; 360 | } 361 | } 362 | } 363 | } 364 | 365 | } 366 | return result; 367 | } 368 | 369 | @Override 370 | public void storeTu(Element tu) throws IOException { 371 | String tuid = tu.getAttributeValue("tuid"); 372 | if (tuid.isEmpty()) { 373 | tuid = nextId(); 374 | tu.setAttribute("tuid", tuid); 375 | } 376 | if (tu.getAttributeValue("creationdate").isEmpty()) { 377 | tu.setAttribute("creationdate", creationDate); 378 | } 379 | if (tu.getAttributeValue("creationid").isEmpty()) { 380 | tu.setAttribute("creationid", System.getProperty("user.name")); 381 | } 382 | Map tuProperties = new Hashtable<>(); 383 | 384 | List atts = tu.getAttributes(); 385 | Iterator at = atts.iterator(); 386 | while (at.hasNext()) { 387 | Attribute a = at.next(); 388 | tuProperties.put(a.getName(), a.getValue()); 389 | } 390 | List properties = tu.getChildren("prop"); 391 | Iterator kt = properties.iterator(); 392 | while (kt.hasNext()) { 393 | Element prop = kt.next(); 394 | tuProperties.put(prop.getAttributeValue("type"), prop.getText()); 395 | } 396 | if (currSubject != null && !currSubject.isEmpty()) { 397 | tuProperties.put("subject", currSubject); 398 | } 399 | if (currCustomer != null && !currCustomer.isEmpty()) { 400 | tuProperties.put("customer", currCustomer); 401 | } 402 | if (currProject != null && !currProject.isEmpty()) { 403 | tuProperties.put("project", currProject); 404 | } 405 | List tuvs = tu.getChildren("tuv"); 406 | Set tuLangs = Collections.synchronizedSortedSet(new TreeSet<>()); 407 | 408 | Iterator it = tuvs.iterator(); 409 | while (it.hasNext()) { 410 | Element tuv = it.next(); 411 | String lang = TMUtils.normalizeLang(tuv.getAttributeValue("xml:lang")); 412 | if (lang == null) { 413 | // Invalid language code, ignore this tuv 414 | continue; 415 | } 416 | tuDb.storeLanguage(lang); 417 | tuvDb.remove(lang, tuid); 418 | if (!tuLangs.contains(lang)) { 419 | Element seg = tuv.getChild("seg"); 420 | String puretext = TMUtils.extractText(seg); 421 | if (!puretext.isBlank()) { 422 | String segText = seg.toString(); 423 | segText = segText.substring("".length()); 424 | segText = segText.substring(0, segText.length() - "".length()); 425 | tuvDb.store(lang, tuid, puretext, segText); 426 | 427 | int[] ngrams = NGrams.getNGrams(puretext); 428 | NavigableSet> index = fuzzyIndex.getIndex(lang); 429 | for (int i = 0; i < ngrams.length; i++) { 430 | Tuple2 entry = Fun.t2(ngrams[i], tuid); 431 | if (!index.contains(entry)) { 432 | index.add(entry); 433 | } 434 | } 435 | tuLangs.add(lang); 436 | } 437 | } 438 | } 439 | tuDb.store(tuid, tuProperties); 440 | } 441 | 442 | @Override 443 | public synchronized void commit() { 444 | fuzzyIndex.commit(); 445 | tuDb.commit(); 446 | tuvDb.commit(); 447 | } 448 | 449 | @Override 450 | public Element getTu(String tuid) throws IOException, SAXException, ParserConfigurationException { 451 | Map properties = tuDb.getTu(tuid.hashCode()); 452 | return buildElement(properties); 453 | } 454 | 455 | private static void writeHeader(FileOutputStream output, String srcLang, Map properties) 456 | throws IOException { 457 | writeString(output, "\n"); 458 | writeString(output, 459 | "\n"); 460 | writeString(output, "\n"); 461 | writeString(output, 462 | "
\n"); 468 | } else { 469 | writeString(output, ">\n"); 470 | Set keys = properties.keySet(); 471 | Iterator it = keys.iterator(); 472 | while (it.hasNext()) { 473 | String key = it.next(); 474 | writeString(output, " " + properties.get(key) + "\n"); 475 | } 476 | writeString(output, "
\n"); 477 | } 478 | } 479 | 480 | private static void writeString(FileOutputStream output, String string) throws IOException { 481 | output.write(string.getBytes(StandardCharsets.UTF_8)); 482 | } 483 | 484 | private String nextId() { 485 | if (next == 0l) { 486 | next = Calendar.getInstance().getTimeInMillis(); 487 | } 488 | return "" + next++; 489 | } 490 | 491 | @Override 492 | public void removeTu(String tuid) throws IOException, SAXException, ParserConfigurationException { 493 | Element tu = getTu(tuid); 494 | tuDb.remove(tuid); 495 | 496 | List tuvs = tu.getChildren("tuv"); 497 | Iterator it = tuvs.iterator(); 498 | while (it.hasNext()) { 499 | Element tuv = it.next(); 500 | String lang = tuv.getAttributeValue("xml:lang"); 501 | tuvDb.remove(lang, tuid); 502 | 503 | Element seg = tuv.getChild("seg"); 504 | String puretext = TMUtils.extractText(seg); 505 | 506 | int[] ngrams = NGrams.getNGrams(puretext); 507 | NavigableSet> index = fuzzyIndex.getIndex(lang); 508 | for (int i = 0; i < ngrams.length; i++) { 509 | Tuple2 entry = Fun.t2(ngrams[i], tuid); 510 | if (index.contains(entry)) { 511 | index.remove(entry); 512 | } 513 | } 514 | } 515 | } 516 | 517 | } 518 | -------------------------------------------------------------------------------- /src/com/maxprograms/tmengine/SQLEngine.java: -------------------------------------------------------------------------------- 1 | /******************************************************************************* 2 | * Copyright (c) 2003, 2018 Maxprograms. 3 | * 4 | * This program and the accompanying materials 5 | * are made available under the terms of the Eclipse Public License 1.0 6 | * which accompanies this distribution, and is available at 7 | * https://www.eclipse.org/org/documents/epl-v10.html 8 | * 9 | * Contributors: 10 | * Maxprograms - initial API and implementation 11 | *******************************************************************************/ 12 | package com.maxprograms.tmengine; 13 | 14 | import java.io.File; 15 | import java.io.FileOutputStream; 16 | import java.io.IOException; 17 | import java.lang.System.Logger; 18 | import java.lang.System.Logger.Level; 19 | import java.nio.charset.StandardCharsets; 20 | import java.sql.Connection; 21 | import java.sql.DriverManager; 22 | import java.sql.PreparedStatement; 23 | import java.sql.ResultSet; 24 | import java.sql.SQLException; 25 | import java.sql.Statement; 26 | import java.util.Calendar; 27 | import java.util.Collections; 28 | import java.util.Hashtable; 29 | import java.util.Iterator; 30 | import java.util.List; 31 | import java.util.Map; 32 | import java.util.Properties; 33 | import java.util.Set; 34 | import java.util.TreeSet; 35 | import java.util.Vector; 36 | 37 | import javax.xml.parsers.ParserConfigurationException; 38 | 39 | import com.maxprograms.tmutils.TMUtils; 40 | import com.maxprograms.tmx.TMXReader; 41 | import com.maxprograms.xml.Attribute; 42 | import com.maxprograms.xml.Element; 43 | import com.maxprograms.xml.Indenter; 44 | 45 | import org.xml.sax.SAXException; 46 | 47 | public class SQLEngine implements ITmEngine { 48 | 49 | private static final Logger LOGGER = System.getLogger(SQLEngine.class.getName()); 50 | 51 | private Connection conn; 52 | private String dbName; 53 | private String serverName; 54 | private int port; 55 | private String userName; 56 | private String password; 57 | 58 | private String currProject; 59 | private String currSubject; 60 | private String currCustomer; 61 | private String creationDate; 62 | 63 | private long next; 64 | 65 | private Set languages; 66 | 67 | private PreparedStatement insertProperties; 68 | private PreparedStatement removeProperties; 69 | private PreparedStatement insertTuv; 70 | private PreparedStatement removeTuv; 71 | private PreparedStatement checkTu; 72 | private PreparedStatement selectProperties; 73 | private PreparedStatement selectSeg; 74 | private PreparedStatement selectPureText; 75 | 76 | private Hashtable insertNgram; 77 | private Hashtable removeNgram; 78 | private Hashtable selectNgram; 79 | 80 | private Set tuAttributes; 81 | 82 | public SQLEngine(String dbName, String serverName, int port, String userName, String password) throws SQLException { 83 | this.dbName = dbName; 84 | this.serverName = serverName; 85 | this.port = port; 86 | this.userName = userName; 87 | this.password = password; 88 | StringBuilder connBuilder = new StringBuilder(); 89 | connBuilder.append("jdbc:mariadb://"); 90 | connBuilder.append(serverName); 91 | connBuilder.append(':'); 92 | connBuilder.append(port); 93 | connBuilder.append('/'); 94 | connBuilder.append(dbName); 95 | connBuilder.append("?user="); 96 | connBuilder.append(userName); 97 | connBuilder.append("&password="); 98 | connBuilder.append(password); 99 | try { 100 | conn = DriverManager.getConnection(connBuilder.toString()); // "jdbc:mariadb://localhost:3306/DB?user=root&password=myPassword" 101 | conn.setAutoCommit(false); 102 | } catch (SQLException e) { 103 | createDatabase(); 104 | conn = DriverManager.getConnection(connBuilder.toString()); 105 | conn.setAutoCommit(false); 106 | LOGGER.log(Level.INFO, "Database " + dbName + " created."); 107 | } 108 | insertNgram = new Hashtable<>(); 109 | removeNgram = new Hashtable<>(); 110 | selectNgram = new Hashtable<>(); 111 | } 112 | 113 | @Override 114 | public String getType() { 115 | return SQLEngine.class.getName(); 116 | } 117 | 118 | private void createDatabase() throws SQLException { 119 | StringBuilder serverBuilder = new StringBuilder(); 120 | serverBuilder.append("jdbc:mariadb://"); 121 | serverBuilder.append(serverName); 122 | serverBuilder.append(':'); 123 | serverBuilder.append(port); 124 | serverBuilder.append('/'); 125 | Properties prop = new Properties(); 126 | prop.setProperty("user", userName); 127 | prop.setProperty("password", password); 128 | prop.setProperty("useUnicode", "true"); 129 | prop.setProperty("characterEncoding", StandardCharsets.UTF_8.name()); 130 | try (Connection connection = DriverManager.getConnection(serverBuilder.toString(), prop)) { 131 | try (Statement stmt = connection.createStatement()) { 132 | stmt.execute("CREATE DATABASE `" + dbName + "` CHARACTER SET utf8"); 133 | stmt.execute("CREATE TABLE `" + dbName + "`.tuv ( tuid VARCHAR(30) NOT NULL," 134 | + " lang VARCHAR(15) NOT NULL, seg TEXT NOT NULL, pureText TEXT," 135 | + " PRIMARY KEY (tuid,lang));"); 136 | stmt.execute("CREATE TABLE `" + dbName + "`.tuprop ( tuid VARCHAR(30) NOT NULL," 137 | + " propType VARCHAR(30) NOT NULL, content TEXT, PRIMARY KEY (tuid, propType)" + ");"); 138 | stmt.execute("CREATE TABLE `" + dbName + "`.langs ( lang VARCHAR(15) NOT NULL);"); 139 | } 140 | } 141 | } 142 | 143 | @Override 144 | public void deleteDatabase() throws SQLException { 145 | close(); 146 | StringBuilder serverBuilder = new StringBuilder(); 147 | serverBuilder.append("jdbc:mariadb://"); 148 | serverBuilder.append(serverName); 149 | serverBuilder.append(':'); 150 | serverBuilder.append(port); 151 | serverBuilder.append('/'); 152 | Properties prop = new Properties(); 153 | prop.setProperty("user", userName); 154 | prop.setProperty("password", password); 155 | prop.setProperty("useUnicode", "true"); 156 | prop.setProperty("characterEncoding", StandardCharsets.UTF_8.name()); 157 | try (Connection connection = DriverManager.getConnection(serverBuilder.toString(), prop)) { 158 | try (Statement stmt = connection.createStatement()) { 159 | stmt.execute("DROP DATABASE `" + dbName + "`"); 160 | } 161 | } 162 | } 163 | 164 | @Override 165 | public void close() throws SQLException { 166 | conn.commit(); 167 | if (insertProperties != null) { 168 | insertProperties.close(); 169 | } 170 | if (removeProperties != null) { 171 | removeProperties.close(); 172 | } 173 | if (selectProperties != null) { 174 | selectProperties.close(); 175 | } 176 | if (insertTuv != null) { 177 | insertTuv.close(); 178 | } 179 | if (removeTuv != null) { 180 | removeTuv.close(); 181 | } 182 | if (checkTu != null) { 183 | checkTu.close(); 184 | } 185 | if (selectSeg != null) { 186 | selectSeg.close(); 187 | } 188 | if (selectPureText != null) { 189 | selectPureText.close(); 190 | } 191 | Set keys = insertNgram.keySet(); 192 | Iterator it = keys.iterator(); 193 | while (it.hasNext()) { 194 | insertNgram.get(it.next()).close(); 195 | } 196 | keys = removeNgram.keySet(); 197 | it = keys.iterator(); 198 | while (it.hasNext()) { 199 | removeNgram.get(it.next()).close(); 200 | } 201 | keys = selectNgram.keySet(); 202 | it = keys.iterator(); 203 | while (it.hasNext()) { 204 | selectNgram.get(it.next()).close(); 205 | } 206 | conn.close(); 207 | } 208 | 209 | @Override 210 | public String getName() { 211 | return dbName; 212 | } 213 | 214 | @Override 215 | public int storeTMX(String tmxFile, String project, String customer, String subject) 216 | throws SAXException, IOException, ParserConfigurationException, SQLException { 217 | next = 0l; 218 | if (customer == null) { 219 | customer = ""; 220 | } 221 | if (subject == null) { 222 | subject = ""; 223 | } 224 | if (project == null) { 225 | project = ""; 226 | } 227 | 228 | currProject = project; 229 | currSubject = subject; 230 | currCustomer = customer; 231 | creationDate = TMUtils.creationDate(); 232 | 233 | TMXReader reader = new TMXReader(this); 234 | reader.parse(new File(tmxFile).toURI().toURL()); 235 | commit(); 236 | 237 | return reader.getCount(); 238 | } 239 | 240 | @Override 241 | public void exportMemory(String tmxfile, Set langs, String srcLang, Map properties) 242 | throws IOException, SAXException, ParserConfigurationException, SQLException { 243 | if (languages == null) { 244 | getAllLanguages(); 245 | } 246 | if (properties == null) { 247 | properties = new Hashtable<>(); 248 | } 249 | try (FileOutputStream output = new FileOutputStream(new File(tmxfile))) { 250 | writeHeader(output, srcLang, properties); 251 | writeString(output, " \n"); 252 | try (Statement stmt = conn.createStatement()) { 253 | try (ResultSet rs = stmt.executeQuery("SELECT DISTINCT tuid FROM `" + dbName + "`.tuprop")) { 254 | while (rs.next()) { 255 | String tuid = rs.getString(1); 256 | Element tu = getTu(tuid, langs); 257 | Indenter.indent(tu, 3, 2); 258 | writeString(output, " " + tu.toString() + "\n"); 259 | } 260 | } 261 | } 262 | writeString(output, " \n"); 263 | writeString(output, "
"); 264 | } 265 | } 266 | 267 | @Override 268 | public void flag(String tuid) throws SQLException { 269 | if (checkPreviousTu(tuid)) { 270 | Hashtable properties = getTuProperies(tuid); 271 | if (!properties.containsKey("x-flag")) { 272 | if (insertProperties == null) { 273 | insertProperties = conn.prepareStatement( 274 | "INSERT INTO `" + dbName + "`.tuprop (tuid, propType, content) VALUES (?,?,?)"); 275 | } 276 | insertProperties.setString(1, tuid); 277 | insertProperties.setString(2, "x-flag"); 278 | insertProperties.setString(3, "SW-Flag"); 279 | insertProperties.execute(); 280 | } 281 | } 282 | } 283 | 284 | @Override 285 | public Set getAllClients() throws SQLException { 286 | Set result = Collections.synchronizedSortedSet(new TreeSet<>()); 287 | try (Statement stmt = conn.createStatement()) { 288 | try (ResultSet rs = stmt 289 | .executeQuery("SELECT DISTINCT content FROM `" + dbName + "`.tuprop WHERE propType='customer'")) { 290 | while (rs.next()) { 291 | result.add(rs.getNString(1)); 292 | } 293 | } 294 | } 295 | return result; 296 | } 297 | 298 | @Override 299 | public Set getAllLanguages() throws SQLException { 300 | if (languages == null) { 301 | languages = Collections.synchronizedSortedSet(new TreeSet<>()); 302 | try (Statement stmt = conn.createStatement()) { 303 | try (ResultSet rs = stmt.executeQuery("SELECT lang FROM `" + dbName + "`.langs")) { 304 | while (rs.next()) { 305 | languages.add(rs.getString(1)); 306 | } 307 | } 308 | } 309 | } 310 | return languages; 311 | } 312 | 313 | @Override 314 | public Set getAllProjects() throws SQLException { 315 | Set result = Collections.synchronizedSortedSet(new TreeSet<>()); 316 | try (Statement stmt = conn.createStatement()) { 317 | try (ResultSet rs = stmt 318 | .executeQuery("SELECT DISTINCT content FROM `" + dbName + "`.tuprop WHERE propType='project'")) { 319 | while (rs.next()) { 320 | result.add(rs.getNString(1)); 321 | } 322 | } 323 | } 324 | return result; 325 | } 326 | 327 | @Override 328 | public Set getAllSubjects() throws SQLException { 329 | Set result = Collections.synchronizedSortedSet(new TreeSet<>()); 330 | try (Statement stmt = conn.createStatement()) { 331 | try (ResultSet rs = stmt 332 | .executeQuery("SELECT DISTINCT content FROM `" + dbName + "`.tuprop WHERE propType='subject'")) { 333 | while (rs.next()) { 334 | result.add(rs.getNString(1)); 335 | } 336 | } 337 | } 338 | return result; 339 | } 340 | 341 | @Override 342 | public List searchTranslation(String searchStr, String srcLang, String tgtLang, int similarity, 343 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException { 344 | List result = new Vector<>(); 345 | 346 | int[] ngrams = NGrams.getNGrams(searchStr); 347 | int size = ngrams.length; 348 | if (size == 0) { 349 | return result; 350 | } 351 | 352 | int minLength = searchStr.length() * similarity / 100; 353 | int maxLength = searchStr.length() * (200 - similarity) / 100; 354 | 355 | StringBuilder set = new StringBuilder(); 356 | set.append("" + ngrams[0]); 357 | for (int i = 1; i < size; i++) { 358 | set.append("," + ngrams[i]); 359 | } 360 | 361 | Set candidates = Collections.synchronizedSortedSet(new TreeSet<>()); 362 | String lowerSearch = searchStr.toLowerCase(); 363 | 364 | PreparedStatement stmt = selectNgram.get(srcLang); 365 | if (stmt == null) { 366 | stmt = conn.prepareStatement("SELECT tuid from `" + dbName + "`.matrix_" 367 | + srcLang.replace('-', '_').toLowerCase() + " WHERE ngram in (?) and segSize>=? AND segSize<=?"); 368 | selectNgram.put(srcLang, stmt); 369 | } 370 | stmt.setString(1, set.toString()); 371 | stmt.setInt(2, minLength); 372 | stmt.setInt(3, maxLength); 373 | 374 | try (ResultSet rs = stmt.executeQuery()) { 375 | while (rs.next()) { 376 | String tuid = rs.getString(1); 377 | candidates.add(tuid); 378 | } 379 | } 380 | 381 | Iterator it = candidates.iterator(); 382 | while (it.hasNext()) { 383 | String tuid = it.next(); 384 | int distance; 385 | String puretext = getPureText(srcLang, tuid); 386 | if (caseSensitive) { 387 | distance = MatchQuality.similarity(searchStr, puretext); 388 | } else { 389 | distance = MatchQuality.similarity(lowerSearch, puretext.toLowerCase()); 390 | } 391 | if (distance >= similarity) { 392 | String targetSeg = getSegText(tgtLang, tuid); 393 | if (targetSeg != null) { 394 | String sourceSeg = getSegText(srcLang, tuid); 395 | Element source = TMUtils.buildTuv(srcLang, sourceSeg); 396 | Element target = TMUtils.buildTuv(tgtLang, targetSeg); 397 | Hashtable properties = getTuProperies(tuid); 398 | Match match = new Match(source, target, distance, dbName, properties); 399 | result.add(match); 400 | } 401 | } 402 | } 403 | return result; 404 | } 405 | 406 | private String getPureText(String lang, String tuid) throws SQLException { 407 | if (selectPureText == null) { 408 | selectPureText = conn.prepareStatement("SELECT pureText FROM `" + dbName + "`.tuv WHERE tuid=? AND lang=?"); 409 | } 410 | String pureText = ""; 411 | selectPureText.setString(1, tuid); 412 | selectPureText.setString(2, lang); 413 | try (ResultSet rs = selectPureText.executeQuery()) { 414 | while (rs.next()) { 415 | pureText = rs.getNString(1); 416 | } 417 | } 418 | return pureText; 419 | } 420 | 421 | @Override 422 | public List concordanceSearch(String searchStr, String srcLang, int limit, boolean isRegexp, 423 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException { 424 | Set candidates = Collections.synchronizedSortedSet(new TreeSet<>()); 425 | if (isRegexp) { 426 | try (PreparedStatement stmt = conn.prepareStatement( 427 | "SELECT tuid, pureText FROM `" + dbName + "`.tuv WHERE lang=? AND pureText REGEXP ? LIMIT ?")) { 428 | stmt.setString(1, srcLang); 429 | stmt.setString(2, searchStr); 430 | stmt.setInt(3, limit); 431 | try (ResultSet rs = stmt.executeQuery()) { 432 | while (rs.next()) { 433 | candidates.add(rs.getString(1)); 434 | } 435 | } 436 | } 437 | } else { 438 | if (!caseSensitive) { 439 | try (PreparedStatement stmt = conn.prepareStatement("SELECT tuid, pureText FROM `" + dbName 440 | + "`.tuv WHERE lang=? AND LOWER(pureText) LIKE ? LIMIT ?")) { 441 | stmt.setString(1, srcLang); 442 | stmt.setString(2, "%" + searchStr.toLowerCase() + "%"); 443 | stmt.setInt(3, limit); 444 | try (ResultSet rs = stmt.executeQuery()) { 445 | while (rs.next()) { 446 | candidates.add(rs.getString(1)); 447 | } 448 | } 449 | } 450 | } else { 451 | try (PreparedStatement stmt = conn.prepareStatement( 452 | "SELECT tuid, pureText FROM `" + dbName + "`.tuv WHERE lang=? AND pureText LIKE ? LIMIT ?")) { 453 | stmt.setString(1, srcLang); 454 | stmt.setString(2, "%" + searchStr + "%"); 455 | stmt.setInt(3, limit); 456 | try (ResultSet rs = stmt.executeQuery()) { 457 | while (rs.next()) { 458 | candidates.add(rs.getString(1)); 459 | } 460 | } 461 | } 462 | } 463 | } 464 | List result = new Vector<>(); 465 | Iterator it = candidates.iterator(); 466 | while (it.hasNext()) { 467 | Element tu = getTu(it.next()); 468 | result.add(tu); 469 | } 470 | return result; 471 | } 472 | 473 | @Override 474 | public void storeTu(Element tu) throws IOException, SQLException { 475 | String tuid = tu.getAttributeValue("tuid"); 476 | boolean isNew = false; 477 | if (tuid.isEmpty()) { 478 | tuid = nextId(); 479 | tu.setAttribute("tuid", tuid); 480 | isNew = true; 481 | } 482 | if (!isNew) { 483 | isNew = checkPreviousTu(tuid); 484 | } 485 | if (tu.getAttributeValue("creationdate").isEmpty()) { 486 | tu.setAttribute("creationdate", creationDate); 487 | } 488 | if (tu.getAttributeValue("creationid").isEmpty()) { 489 | tu.setAttribute("creationid", System.getProperty("user.name")); 490 | } 491 | Hashtable tuProperties = new Hashtable<>(); 492 | 493 | List atts = tu.getAttributes(); 494 | Iterator at = atts.iterator(); 495 | while (at.hasNext()) { 496 | Attribute a = at.next(); 497 | tuProperties.put(a.getName(), a.getValue()); 498 | } 499 | List properties = tu.getChildren("prop"); 500 | Iterator kt = properties.iterator(); 501 | while (kt.hasNext()) { 502 | Element prop = kt.next(); 503 | tuProperties.put(prop.getAttributeValue("type"), prop.getText()); 504 | } 505 | if (currSubject != null && !currSubject.isEmpty()) { 506 | tuProperties.put("subject", currSubject); 507 | } 508 | if (currCustomer != null && !currCustomer.isEmpty()) { 509 | tuProperties.put("customer", currCustomer); 510 | } 511 | if (currProject != null && !currProject.isEmpty()) { 512 | tuProperties.put("project", currProject); 513 | } 514 | List tuvs = tu.getChildren("tuv"); 515 | Set tuLangs = Collections.synchronizedSortedSet(new TreeSet<>()); 516 | 517 | Iterator it = tuvs.iterator(); 518 | while (it.hasNext()) { 519 | Element tuv = it.next(); 520 | String lang = TMUtils.normalizeLang(tuv.getAttributeValue("xml:lang")); 521 | if (lang == null) { 522 | // Invalid language code, ignore this tuv 523 | continue; 524 | } 525 | storeLanguage(lang); 526 | if (!isNew) { 527 | removeTuv(lang, tuid); 528 | } 529 | if (!tuLangs.contains(lang)) { 530 | Element seg = tuv.getChild("seg"); 531 | String puretext = TMUtils.extractText(seg); 532 | if (puretext.isBlank()) { 533 | continue; 534 | } 535 | 536 | String segText = seg.toString(); 537 | segText = segText.substring("".length()); 538 | segText = segText.substring(0, segText.length() - "".length()); 539 | storeTuv(lang, tuid, puretext, segText); 540 | 541 | int[] ngrams = NGrams.getNGrams(puretext); 542 | storeNgrams(lang, tuid, ngrams, puretext.length()); 543 | 544 | tuLangs.add(lang); 545 | } 546 | } 547 | if (!isNew) { 548 | removeTuProperties(tuid); 549 | } 550 | storeTuProperties(tuid, tuProperties); 551 | } 552 | 553 | private boolean checkPreviousTu(String tuid) throws SQLException { 554 | if (checkTu == null) { 555 | checkTu = conn.prepareStatement("SELECT COUNT(*) FROM `" + dbName + "`.tuprop WHERE tuid=?"); 556 | } 557 | checkTu.setString(1, tuid); 558 | int count = 0; 559 | try (ResultSet rs = checkTu.executeQuery()) { 560 | while (rs.next()) { 561 | count = rs.getInt(1); 562 | } 563 | } 564 | return count != 0; 565 | } 566 | 567 | private void storeNgrams(String lang, String tuid, int[] ngrams, int segSize) throws SQLException { 568 | PreparedStatement stmt = insertNgram.get(lang); 569 | if (stmt == null) { 570 | stmt = conn.prepareStatement("INSERT INTO `" + dbName + "`.matrix_" + lang.replace('-', '_').toLowerCase() 571 | + " (tuid, ngram, segSize) VALUES (?,?,?)"); 572 | insertNgram.put(lang, stmt); 573 | } 574 | stmt.setString(1, tuid); 575 | stmt.setInt(3, segSize); 576 | for (int i = 0; i < ngrams.length; i++) { 577 | stmt.setInt(2, ngrams[i]); 578 | stmt.execute(); 579 | } 580 | } 581 | 582 | private void storeTuv(String lang, String tuid, String puretext, String segText) throws SQLException { 583 | if (insertTuv == null) { 584 | insertTuv = conn 585 | .prepareStatement("INSERT INTO `" + dbName + "`.tuv (lang, tuid, seg, pureText ) VALUES (?,?,?,?)"); 586 | } 587 | insertTuv.setString(1, lang); 588 | insertTuv.setString(2, tuid); 589 | insertTuv.setNString(3, segText); 590 | insertTuv.setNString(4, puretext); 591 | insertTuv.execute(); 592 | } 593 | 594 | private void removeTuProperties(String tuid) throws SQLException { 595 | if (removeProperties == null) { 596 | removeProperties = conn.prepareStatement("DELETE FROM `" + dbName + "`.tuprop WHERE tuid=?"); 597 | } 598 | removeProperties.setString(1, tuid); 599 | removeProperties.execute(); 600 | } 601 | 602 | private void storeTuProperties(String tuid, Hashtable properties) throws SQLException { 603 | if (insertProperties == null) { 604 | insertProperties = conn 605 | .prepareStatement("INSERT INTO `" + dbName + "`.tuprop (tuid, propType, content) VALUES (?,?,?)"); 606 | } 607 | insertProperties.setString(1, tuid); 608 | Set keys = properties.keySet(); 609 | Iterator it = keys.iterator(); 610 | while (it.hasNext()) { 611 | String prop = it.next(); 612 | insertProperties.setString(2, prop); 613 | insertProperties.setNString(3, properties.get(prop)); 614 | insertProperties.execute(); 615 | } 616 | } 617 | 618 | private void removeTuv(String lang, String tuid) throws SQLException { 619 | if (removeTuv == null) { 620 | removeTuv = conn.prepareStatement("DELETE FROM `" + dbName + "`.tuv WHERE tuid=? AND lang=?"); 621 | } 622 | removeTuv.setString(1, tuid); 623 | removeTuv.setString(2, lang); 624 | removeTuv.execute(); 625 | PreparedStatement stmt = removeNgram.get(lang); 626 | if (stmt == null) { 627 | stmt = conn.prepareStatement( 628 | "DELETE FROM `" + dbName + "`.matrix_" + lang.replace('-', '_').toLowerCase() + " WHERE tuid=?"); 629 | removeNgram.put(lang, stmt); 630 | } 631 | stmt.setString(1, tuid); 632 | stmt.execute(); 633 | } 634 | 635 | private void storeLanguage(String lang) throws SQLException { 636 | if (languages == null) { 637 | getAllLanguages(); 638 | } 639 | if (!languages.contains(lang)) { 640 | try (PreparedStatement stmt = conn 641 | .prepareStatement("INSERT INTO `" + dbName + "`.langs (lang) VALUES (?)")) { 642 | stmt.setString(1, lang); 643 | stmt.execute(); 644 | } 645 | try (Statement stmt = conn.createStatement()) { 646 | stmt.execute("CREATE TABLE `" + dbName + "`.matrix_" + lang.replace('-', '_').toLowerCase() + " (" 647 | + " tuid VARCHAR(30) NOT NULL, ngram INTEGER NOT NULL, segSize INTEGER," 648 | + " INDEX `ngrams` (`ngram` ASC) VISIBLE, PRIMARY KEY (tuid,ngram));"); 649 | } 650 | conn.commit(); 651 | languages.add(lang); 652 | } 653 | } 654 | 655 | @Override 656 | public void commit() throws SQLException { 657 | conn.commit(); 658 | } 659 | 660 | private Element getTu(String tuid, Set langs) 661 | throws SQLException, SAXException, IOException, ParserConfigurationException { 662 | if (tuAttributes == null) { 663 | tuAttributes = Collections.synchronizedSortedSet(new TreeSet<>()); 664 | String[] array = new String[] { "tuid", "o-encoding", "datatype", "usagecount", "lastusagedate", 665 | "creationtool", "creationtoolversion", "creationdate", "creationid", "changedate", "segtype", 666 | "changeid", "o-tmf", "srclang" }; 667 | for (int i = 0; i < array.length; i++) { 668 | tuAttributes.add(array[i]); 669 | } 670 | } 671 | Hashtable properties = getTuProperies(tuid); 672 | Element tu = new Element("tu"); 673 | Set keys = properties.keySet(); 674 | Iterator it = keys.iterator(); 675 | while (it.hasNext()) { 676 | String key = it.next(); 677 | if (tuAttributes.contains(key)) { 678 | tu.setAttribute(key, properties.get(key)); 679 | } else { 680 | Element prop = new Element("prop"); 681 | prop.setAttribute("type", key); 682 | prop.setText(properties.get(key)); 683 | tu.addContent(prop); 684 | } 685 | } 686 | if (langs.isEmpty()) { 687 | langs = getAllLanguages(); 688 | } 689 | it = langs.iterator(); 690 | while (it.hasNext()) { 691 | String lang = it.next(); 692 | String seg = getSegText(lang, tuid); 693 | if (!seg.isEmpty()) { 694 | Element tuv = TMUtils.buildTuv(lang, seg); 695 | tu.addContent(tuv); 696 | } 697 | } 698 | return tu; 699 | } 700 | 701 | @Override 702 | public Element getTu(String tuid) throws IOException, SAXException, ParserConfigurationException, SQLException { 703 | return getTu(tuid, Collections.synchronizedSortedSet(new TreeSet<>())); 704 | } 705 | 706 | private String getSegText(String lang, String tuid) throws SQLException { 707 | if (selectSeg == null) { 708 | selectSeg = conn.prepareStatement("SELECT seg FROM `" + dbName + "`.tuv WHERE tuid=? AND lang=?"); 709 | } 710 | String seg = ""; 711 | selectSeg.setString(1, tuid); 712 | selectSeg.setString(2, lang); 713 | try (ResultSet rs = selectSeg.executeQuery()) { 714 | while (rs.next()) { 715 | seg = rs.getNString(1); 716 | } 717 | } 718 | return seg; 719 | } 720 | 721 | private Hashtable getTuProperies(String tuid) throws SQLException { 722 | if (selectProperties == null) { 723 | selectProperties = conn 724 | .prepareStatement("SELECT propType, content FROM `" + dbName + "`.tuprop WHERE tuid=?"); 725 | } 726 | selectProperties.setString(1, tuid); 727 | Hashtable properties = new Hashtable<>(); 728 | try (ResultSet rs = selectProperties.executeQuery()) { 729 | while (rs.next()) { 730 | properties.put(rs.getString(1), rs.getNString(2)); 731 | } 732 | } 733 | return properties; 734 | } 735 | 736 | private String nextId() { 737 | if (next == 0l) { 738 | next = Calendar.getInstance().getTimeInMillis(); 739 | } 740 | return "" + next++; 741 | } 742 | 743 | private static void writeHeader(FileOutputStream output, String srcLang, Map properties) 744 | throws IOException { 745 | writeString(output, "\n"); 746 | writeString(output, 747 | "\n"); 748 | writeString(output, "\n"); 749 | writeString(output, 750 | "
\n"); 756 | } else { 757 | writeString(output, ">\n"); 758 | Set keys = properties.keySet(); 759 | Iterator it = keys.iterator(); 760 | while (it.hasNext()) { 761 | String key = it.next(); 762 | writeString(output, " " + properties.get(key) + "\n"); 763 | } 764 | writeString(output, "
\n"); 765 | } 766 | } 767 | 768 | private static void writeString(FileOutputStream output, String string) throws IOException { 769 | output.write(string.getBytes(StandardCharsets.UTF_8)); 770 | } 771 | 772 | @Override 773 | public void removeTu(String tuid) throws IOException, SAXException, ParserConfigurationException, SQLException { 774 | Element tu = getTu(tuid); 775 | removeTuProperties(tuid); 776 | List tuvs = tu.getChildren("tuv"); 777 | 778 | Iterator it = tuvs.iterator(); 779 | while (it.hasNext()) { 780 | Element tuv = it.next(); 781 | String lang = tuv.getAttributeValue("xml:lang"); 782 | removeTuv(lang, tuid); 783 | } 784 | } 785 | } 786 | --------------------------------------------------------------------------------