├── lib
├── dtd.jar
├── json.jar
├── mapdb.jar
├── tmengine.jar
├── openxliff.jar
├── jsoup-1.11.3.jar
└── mariadb-java-client-2.4.3.jar
├── .gitattributes
├── docs
├── TMEngine.pdf
├── images
│ └── tmengine.png
├── concepts
│ ├── untitled1.dita
│ ├── JavaApps.dita
│ ├── databases.dita
│ ├── serverStop.dita
│ ├── ITmEngine.dita
│ ├── dependencies.dita
│ ├── rest
│ │ ├── StopServer.dita
│ │ ├── ListMemories.dita
│ │ ├── CloseMemory.dita
│ │ ├── DeleteMemory.dita
│ │ ├── OpenMemory.dita
│ │ ├── RenameMemory.dita
│ │ ├── GetLanguages.dita
│ │ ├── ProcessStatus.dita
│ │ ├── ExportTMX.dita
│ │ ├── ImportTMX.dita
│ │ ├── ConcordanceSearch.dita
│ │ ├── CreateMemory.dita
│ │ └── SearchTranslations.dita
│ ├── Server.dita
│ ├── ServerAPI.dita
│ ├── TMEngine.dita
│ └── methods.dita
└── TMEngine.ditamap
├── .gitignore
├── tmserver.bat
├── tmserver.sh
├── org.eclipse.jdt.core.prefs
├── .project
├── src
├── module-info.java
└── com
│ └── maxprograms
│ ├── tmengine
│ ├── Constants.java
│ ├── FuzzyIndex.java
│ ├── NGrams.java
│ ├── MatchQuality.java
│ ├── ITmEngine.java
│ ├── TuDatabase.java
│ ├── Match.java
│ ├── TuvDatabase.java
│ ├── MapDbEngine.java
│ └── SQLEngine.java
│ ├── tmx
│ ├── TMXReader.java
│ ├── TMXResolver.java
│ ├── TMXContentHandler.java
│ ├── tmx11.dtd
│ ├── tmx12.dtd
│ ├── tmx13.dtd
│ └── tmx14.dtd
│ ├── tmserver
│ └── TmServer.java
│ └── tmutils
│ └── TMUtils.java
├── .classpath
├── README.md
└── LICENSE
/lib/dtd.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/dtd.jar
--------------------------------------------------------------------------------
/lib/json.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/json.jar
--------------------------------------------------------------------------------
/lib/mapdb.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/mapdb.jar
--------------------------------------------------------------------------------
/lib/tmengine.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/tmengine.jar
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/docs/TMEngine.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/docs/TMEngine.pdf
--------------------------------------------------------------------------------
/lib/openxliff.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/openxliff.jar
--------------------------------------------------------------------------------
/lib/jsoup-1.11.3.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/jsoup-1.11.3.jar
--------------------------------------------------------------------------------
/docs/images/tmengine.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/docs/images/tmengine.png
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | /bin/
3 | /dist/
4 | /sonar*
5 | .sonar_lock
6 | report-task.txt
7 | /docs/out
8 |
9 |
--------------------------------------------------------------------------------
/lib/mariadb-java-client-2.4.3.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/rmraya/TMEngine/HEAD/lib/mariadb-java-client-2.4.3.jar
--------------------------------------------------------------------------------
/tmserver.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | pushd "%~dp0"
3 |
4 | set CP="lib\mariadb-java-client-2.4.3.jar"
5 |
6 | .\bin\java -cp %CP% --module-path lib com.maxprograms.tmserver.TmServer $@
--------------------------------------------------------------------------------
/tmserver.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | cd "$(dirname "$0")/"
4 |
5 | export CP="lib/mariadb-java-client-2.4.3.jar"
6 |
7 | bin/java -cp $CP --module-path lib com.maxprograms.tmserver.TmServer $@
--------------------------------------------------------------------------------
/docs/concepts/untitled1.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
--------------------------------------------------------------------------------
/docs/concepts/JavaApps.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Java Applications
5 |
6 |
7 | TMEngine can be embedded in a Java application that needs to deal with translation
8 | memories.
9 |
10 |
11 |
--------------------------------------------------------------------------------
/docs/concepts/databases.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Supported Databases
5 |
6 | Two options are currently available for storing TM data:
7 |
8 | - MapDB databases
9 | - MySQL or MariaDB databases.
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=11
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=10
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.source=11
12 |
--------------------------------------------------------------------------------
/docs/concepts/serverStop.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Stopping the Server
5 |
6 | A running TMEngine server can be stopped using the Stop
7 | Server method from its REST API.
8 | Simply visit "stop" using a browser or open a connection to that URL when the
10 | server is embedded in a Java application. Adjust the port number if necessary.
11 |
12 |
13 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | TMEngine
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
19 | 1671707855370
20 |
21 | 30
22 |
23 | org.eclipse.core.resources.regexFilterMatcher
24 | node_modules|\.git|__CREATED_BY_JAVA_LANGUAGE_SERVER__
25 |
26 |
27 |
28 |
29 |
--------------------------------------------------------------------------------
/src/module-info.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 |
13 | module tmengine {
14 |
15 | exports com.maxprograms.tmengine;
16 |
17 | opens com.maxprograms.tmengine to mapdb;
18 |
19 | requires mapdb;
20 | requires java.xml;
21 | requires java.base;
22 | requires java.sql;
23 | requires jdk.httpserver;
24 | requires transitive json;
25 | requires transitive openxliff;
26 | }
--------------------------------------------------------------------------------
/docs/concepts/ITmEngine.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | ITmEngine Interface
5 |
6 |
7 | The interface com.maxprograms.tmengine.ITmEngine provides the set of
8 | methods that applications can use to interact with TM data.
9 | Applications that use TMEngine can work with the classes that implement
10 | ITmEngine or implement new versions that work with other database
11 | systems.
12 | The classes that implement ITmEngine interface are:
13 |
14 | - com.maxprograms.tmengine.MapDbEngine
15 | - com.maxprograms.tmengine.SQLEngine
16 |
17 |
18 |
19 |
--------------------------------------------------------------------------------
/docs/concepts/dependencies.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Dependencies
5 |
6 |
7 | TMEngine is built on top of OpenXLIFF Filters, an open source project that
9 | provides support for managing the XML side of TMX documents.
10 | OpenXLIFF includes the last Java release of MapDB, modified to work with modules in Java
12 | 11 or newer.
13 | MariaDB JDBC driver is also included as an optional dependency. MariaDB code does not
14 | support mudularization at this moment.
15 | Java 11 is used to compile and link TMEngine binaries for distribution. Newer versions of
16 | Java can also be used.
17 |
18 |
19 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/Constants.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | public class Constants {
15 |
16 | private Constants() {
17 | // private for security
18 | }
19 |
20 | public static final String CREATIONTOOL = "Maxprograms TM Engine";
21 | public static final String VERSION = "5.0.3";
22 | public static final String BUILD = "20211003_0942";
23 |
24 | public static final String PENDING = "Pending";
25 | public static final String COMPLETED = "Completed";
26 | public static final String FAILED = "Failed";
27 | }
28 |
--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
--------------------------------------------------------------------------------
/docs/concepts/rest/StopServer.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Stop Server
5 |
6 | End Point: [TMEngine URL]/create
7 | Default: stop
9 |
10 | Send a 'GET' request to the method end point.
11 | The server responds with a JSON object. On success, field 'status' is
12 | set to 'OK'.
13 | Example:
14 | {
15 | "status": "OK"
16 | }
17 | On error, field 'status' is set to 'failed' and field
18 | 'reason' contains the error cause.
19 |
20 | Example:
21 | {
22 | "status": "failed",
23 | "reason": "Error connecting to database"
24 | }
25 |
26 |
27 |
--------------------------------------------------------------------------------
/docs/concepts/Server.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Starting the Server
5 |
6 | Running .\tmserver.bat or ./tmserver.sh without
7 | parameters displays help for starting TMEngine as a standalone server.
8 | Usage:
9 |
10 | tmserver.sh [-help] [-version] [-port portNumber]
11 |
12 | Where:
13 |
14 | -help: (optional) Display this help information and exit
15 | -version: (optional) Display version & build information and exit
16 | -port: (optional) Port for running HTTP server. Default is 8000
17 |
18 | You can verify that the server is running by visiting its default web page: http://localhost:8000/TMServer/ (adjust port number if you change
21 | it).
22 |
23 |
24 |
25 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/TMXReader.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmx;
13 |
14 | import java.io.IOException;
15 | import java.net.URL;
16 |
17 | import javax.xml.parsers.ParserConfigurationException;
18 |
19 | import org.xml.sax.SAXException;
20 |
21 | import com.maxprograms.tmengine.ITmEngine;
22 | import com.maxprograms.xml.SAXBuilder;
23 |
24 | public class TMXReader {
25 |
26 | private SAXBuilder builder;
27 | private TMXContentHandler handler;
28 |
29 | public TMXReader(ITmEngine database) {
30 | handler = new TMXContentHandler(database);
31 | builder = new SAXBuilder();
32 | builder.setEntityResolver(new TMXResolver());
33 | builder.setContentHandler(handler);
34 | }
35 |
36 | public void parse(URL url) throws IOException, SAXException, ParserConfigurationException {
37 | builder.build(url);
38 | }
39 |
40 | public int getCount() {
41 | return handler.getCount();
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/docs/concepts/rest/ListMemories.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | List Memories
5 |
6 | End Point: [TMEngine URL]/list
7 | Default: list
8 |
9 | Send a 'GET' request to the method end point.
10 | The server responds with a JSON object containing two fields. On success, field
11 | 'status' is set to 'OK' and field
12 | 'memories' contains an array with memory details.
13 | {
14 | "memories": [
15 | {
16 | "owner": "manager",
17 | "isOpen": false,
18 | "name": "Fluenta Localization",
19 | "id": "fluenta",
20 | "type": "MapDbEngine",
21 | "creationDate": "2019-09-10 21:54:13 UYT"
22 | },
23 | {
24 | "owner": "manager",
25 | "isOpen": false,
26 | "name": "First Memory",
27 | "id": "1568163112478",
28 | "type": "MapDbEngine",
29 | "creationDate": "2019-09-10 21:51:52 UYT"
30 | }
31 | ],
32 | "status": "OK"
33 | }
34 |
35 | On error, field 'status' is set to 'failed' and field
36 | 'reason' contains the error cause.
37 |
38 | Example:
39 | {
40 | "status": "failed",
41 | "reason": "Error reading memories"
42 | }
43 |
44 |
45 |
--------------------------------------------------------------------------------
/docs/concepts/rest/CloseMemory.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Close Memory
5 |
6 | End Point: [TMEngine URL]/close
7 | Default: close
9 | Send a 'POST' request to the method end point with this parameter in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | Yes
20 | ID of the memory to close
21 |
22 |
23 | Example:
24 | {
25 | "id": "1568163112478"
26 | }
27 | The server responds with a JSON object. On success, field 'status' is
28 | set to 'OK'.
29 | {
30 | "status": "OK"
31 | }
32 | On error, field 'status' is set to 'failed' and field
33 | 'reason' contains the error cause.
34 |
35 | Example:
36 | {
37 | "status": "failed",
38 | "reason": "Unknown memory"
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/docs/concepts/rest/DeleteMemory.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Delete Memory
5 |
6 | End Point: [TMEngine URL]/delete
7 | Default: delete
9 | Send a 'POST' request to the method end point with this parameter in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | Yes
20 | ID of the memory to delete
21 |
22 |
23 | Example:
24 | {
25 | "id": "1568163112478"
26 | }
27 | The server responds with a JSON object. On success, field 'status' is
28 | set to 'OK'.
29 | {
30 | "status": "OK"
31 | }
32 | On error, field 'status' is set to 'failed' and field
33 | 'reason' contains the error cause.
34 | Example:
35 | {
36 | "status": "failed",
37 | "reason": "Unknown memory"
38 | }
39 |
40 |
41 |
--------------------------------------------------------------------------------
/docs/concepts/rest/OpenMemory.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Open Memory
5 |
6 | End Point: [TMEngine URL]/create
7 | Default: open
9 | Send a 'POST' request to the method end point with this parameter in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | Yes
20 | ID of the memory to open
21 |
22 |
23 | Example:
24 | {
25 | "id": "1568163112478"
26 | }
27 | The server responds with a JSON object. On success, field 'status' is
28 | set to 'OK'.
29 | {
30 | "status": "OK"
31 | }
32 | On error, field 'status' is set to 'failed' and field
33 | 'reason' contains the error cause.
34 |
35 | Example:
36 | {
37 | "status": "failed",
38 | "reason": "Unknown memory type"
39 | }
40 |
41 |
42 |
--------------------------------------------------------------------------------
/docs/concepts/ServerAPI.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | REST API
5 |
6 |
7 | The REST methods that TMEngine's server supports are:
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | Default TMEngine URL is 'http://localhost:8000/TMServer/'.
24 | It is possible to select a custom port for the server, passing the
25 | '-port' parameter to the script used for launching it.
26 | All methods return a JSON object with a 'status' field. Applications
27 | must watch this field and verify that it is set to 'OK'.
28 | In case of error, the JSON response includes a field named 'reason' that
29 | contains the error cause.
30 |
31 |
32 |
--------------------------------------------------------------------------------
/docs/concepts/TMEngine.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | TMEngine
5 | TMEngine is an open source Translation Memory (TM) manager written in Java.
8 |
9 | TMEngine is based on the translation memory library used by Swordfish III, Fluenta and RemoteTM.
16 | TMEngine can be used in two ways:
17 |
18 | - As a standalone TM server via its REST API.
19 | - As an embedded library that manages translation memories in a Java application.
21 |
22 | The standalone server runs on these platforms:
23 |
24 | - Microsoft Windows 8, 1.1 and 10
25 | - macOS 10.13, 10.14 and 10.15
26 | - Linux (any version capable of running Java 11)
27 |
28 | A TMEngine server allows sharing Translation Memory data in a local network or over the
29 | Internet.
30 |
31 | The .jar files included in TMEngine distributions are compiled with Java 11.
32 |
33 |
34 |
--------------------------------------------------------------------------------
/docs/TMEngine.ditamap:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 | TMEngine
7 | An Open Source Translation Memory Manager Copyright (c) 2003-2021 Maxprograms
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/docs/concepts/rest/RenameMemory.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Rename Memory
5 |
6 | End Point: [TMEngine URL]/rename
7 | Default: rename
9 | Send a 'POST' request to the method end point with these parameters in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | Yes
20 |
21 | ID of the memory to rename
22 |
23 |
24 |
25 | name
26 | Yes
27 | New name for the memory
28 |
29 |
30 | Only memories of type 'MapDbEngine' can be renamed.
31 | Example:
32 | {
33 | "id": "1568163112478",
34 | "name": "Updated Memory Name"
35 | }
36 | The server responds with a JSON object containing two fields.
37 | On success, field 'status' is set to 'OK'.
38 |
39 | Example:
40 | {
41 | "status": "OK"
42 | }
43 |
44 | On error, field 'status' is set to 'failed' and field
45 | 'reason' contains the error cause.
46 |
47 | Example:
48 | {
49 | "status": "failed",
50 | "reason": "Wrong memory type"
51 | }
52 |
53 |
54 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/FuzzyIndex.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.File;
15 | import java.io.IOException;
16 | import java.util.Iterator;
17 | import java.util.Map;
18 | import java.util.NavigableSet;
19 | import java.util.Set;
20 | import java.util.concurrent.ConcurrentHashMap;
21 |
22 | import org.mapdb.DB;
23 | import org.mapdb.DBMaker;
24 | import org.mapdb.Fun;
25 |
26 | public class FuzzyIndex {
27 |
28 | private Map>> maps;
29 | private Map databases;
30 | private File folder;
31 |
32 | public FuzzyIndex(File folder) {
33 | this.folder = folder;
34 | databases = new ConcurrentHashMap<>();
35 | maps = new ConcurrentHashMap<>();
36 | }
37 |
38 | NavigableSet> getIndex(String lang) throws IOException {
39 | if (!maps.containsKey(lang)) {
40 | DB mapdb = null;
41 | try {
42 | mapdb = DBMaker.newFileDB(new File(folder, "index_" + lang)).closeOnJvmShutdown().make();
43 | } catch (Error ioe) {
44 | throw new IOException(ioe.getMessage());
45 | }
46 | NavigableSet> multiMap = mapdb.getTreeSet(lang);
47 | databases.put(lang, mapdb);
48 | maps.put(lang, multiMap);
49 | }
50 | return maps.get(lang);
51 | }
52 |
53 | public void commit() {
54 | Set set = databases.keySet();
55 | Iterator keys = set.iterator();
56 | while (keys.hasNext()) {
57 | String key = keys.next();
58 | databases.get(key).commit();
59 | }
60 | }
61 |
62 | public void close() {
63 | Set keys = databases.keySet();
64 | Iterator it = keys.iterator();
65 | while (it.hasNext()) {
66 | DB db = databases.get(it.next());
67 | db.commit();
68 | db.close();
69 | }
70 | }
71 |
72 | }
73 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/TMXResolver.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmx;
13 |
14 | import java.io.IOException;
15 | import java.net.URL;
16 |
17 | import org.xml.sax.EntityResolver;
18 | import org.xml.sax.InputSource;
19 | import org.xml.sax.SAXException;
20 |
21 | public class TMXResolver implements EntityResolver {
22 |
23 | @Override
24 | public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException {
25 |
26 | if (publicId != null) {
27 | if (publicId.equals("-//LISA OSCAR:1998//DTD for Translation Memory eXchange//EN")) {
28 | URL url = TMXResolver.class.getResource("tmx14.dtd");
29 | return new InputSource(url.openStream());
30 | }
31 | if (publicId.equals("http://www.lisa.org/tmx14")) {
32 | URL url = TMXResolver.class.getResource("tmx14.dtd");
33 | return new InputSource(url.openStream());
34 | }
35 | if (publicId.equals("http://www.lisa.org/tmx")) {
36 | URL url = TMXResolver.class.getResource("tmx13.dtd");
37 | return new InputSource(url.openStream());
38 | }
39 | }
40 | if (systemId != null) {
41 | if (systemId.toLowerCase().endsWith("tmx14.dtd")) {
42 | URL url = TMXResolver.class.getResource("tmx14.dtd");
43 | return new InputSource(url.openStream());
44 | }
45 | if (systemId.toLowerCase().endsWith("tmx13.dtd")) {
46 | URL url = TMXResolver.class.getResource("tmx13.dtd");
47 | return new InputSource(url.openStream());
48 | }
49 | if (systemId.toLowerCase().endsWith("tmx12.dtd")) {
50 | URL url = TMXResolver.class.getResource("tmx12.dtd");
51 | return new InputSource(url.openStream());
52 | }
53 | if (systemId.toLowerCase().endsWith("tmx11.dtd")) {
54 | URL url = TMXResolver.class.getResource("tmx11.dtd");
55 | return new InputSource(url.openStream());
56 | }
57 | }
58 | return null;
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/docs/concepts/rest/GetLanguages.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Get Languages
5 |
6 | End Point: [TMEngine URL]/languages
7 | Default: languages
9 | Send a 'POST' request to the method end point with this parameter in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | Yes
20 | ID of the memory to query
21 |
22 |
23 | Example:
24 | {
25 | "id": "1568163112456"
26 | }
27 | The server responds with a JSON object containing two fields.
28 | On success, field 'status' is set to 'OK' and field
29 | 'process' contains the ID of the background query process that was
30 | initiated.
31 |
32 | {
33 | "process": "1568222345683",
34 | "status": "OK"
35 | }
36 |
37 | On error, field 'status' is set to 'failed' and field
38 | 'reason' contains the error cause.
39 | {
40 | "status": "failed",
41 | "reason": "Unknown memory type"
42 | }
43 | After starting the query process, monitor its status using the method. On successful completion, the
45 | data field will contain a list of languages present in the
46 | memory.
47 | Example:
48 | {
49 | "result": "Completed",
50 | "data": {
51 | "languages": [ "es", "en" ]
52 | },
53 | "status": "OK"
54 | }
55 |
56 |
57 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/NGrams.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.util.Collections;
15 | import java.util.Iterator;
16 | import java.util.List;
17 | import java.util.Set;
18 | import java.util.StringTokenizer;
19 | import java.util.TreeSet;
20 | import java.util.Vector;
21 |
22 | public class NGrams {
23 |
24 | private static final int NGRAMSIZE = 3;
25 | public static final String SEPARATORS = " \r\n\f\t\u2028\u2029,.;\":<>¿?¡!()[]{}=+-/*\u00AB\u00BB\u201C\u201D\u201E\uFF00";
26 | // allow hyphen in terms
27 | public static final String TERM_SEPARATORS = " \u00A0\r\n\f\t\u2028\u2029,.;\":<>¿?¡!()[]{}=+/*\u00AB\u00BB\u201C\u201D\u201E\uFF00";
28 |
29 | private NGrams() {
30 | // private for security
31 | }
32 |
33 | public static int[] getNGrams(String string) {
34 | String src = string.toLowerCase();
35 | List words = buildWordList(src);
36 | Set set = Collections.synchronizedSortedSet(new TreeSet<>());
37 |
38 | Iterator it = words.iterator();
39 | while (it.hasNext()) {
40 | String word = it.next();
41 | char[] array = word.toCharArray();
42 | int length = word.length();
43 | int ngrams = length / NGRAMSIZE;
44 | if (ngrams * NGRAMSIZE < length) {
45 | ngrams++;
46 | }
47 | for (int i = 0; i < ngrams; i++) {
48 | StringBuilder gram = new StringBuilder();
49 | for (int j = 0; j < NGRAMSIZE; j++) {
50 | if (i * NGRAMSIZE + j < length) {
51 | gram.append(array[i * NGRAMSIZE + j]);
52 | }
53 | }
54 | set.add("" + gram.toString().hashCode());
55 | }
56 | }
57 |
58 | int[] result = new int[set.size()];
59 | int idx = 0;
60 | it = set.iterator();
61 | while (it.hasNext()) {
62 | result[idx++] = Integer.parseInt(it.next());
63 | }
64 | return result;
65 | }
66 |
67 | private static List buildWordList(String src) {
68 | List result = new Vector<>();
69 | StringTokenizer tokenizer = new StringTokenizer(src, SEPARATORS);
70 | while (tokenizer.hasMoreElements()) {
71 | result.add(tokenizer.nextToken());
72 | }
73 | return result;
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/MatchQuality.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | public class MatchQuality {
15 |
16 | private static final int PENALTY = 2;
17 |
18 | private MatchQuality() {
19 | // private for security
20 | }
21 |
22 | static String lcs(String x, String y) {
23 | StringBuilder result = new StringBuilder();
24 | int m = x.length();
25 | int n = y.length();
26 | int max = 0;
27 | int mx = 0;
28 |
29 | // opt[i][j] = length of LCS of x[i..M] and y[j..N]
30 | int[][] opt = new int[m + 1][n + 1];
31 |
32 | // fill the matrix
33 | for (int i = 1; i <= m; i++) {
34 | for (int j = 1; j <= n; j++) {
35 | if (x.charAt(i - 1) == y.charAt(j - 1)) {
36 | opt[i][j] = opt[i - 1][j - 1] + 1;
37 | if (opt[i][j] > max) {
38 | // remember where the maximum length is
39 | max = opt[i][j];
40 | mx = i;
41 | }
42 | } else {
43 | opt[i][j] = 0;
44 | }
45 | }
46 | }
47 |
48 | // recover the LCS
49 | while (max > 0) {
50 | result.insert(0, x.charAt(mx - 1));
51 | max--;
52 | mx--;
53 | }
54 |
55 | return result.toString();
56 | }
57 |
58 | public static int similarity(String one, String two) {
59 | int result = 0;
60 | String x = one.trim();
61 | String y = two.trim();
62 | int longest = Math.max(x.length(), y.length());
63 | if (longest == 0) {
64 | return 0;
65 | }
66 | String a;
67 | String b;
68 | if (x.length() == longest) {
69 | a = x;
70 | b = y;
71 | } else {
72 | a = y;
73 | b = x;
74 | }
75 | // a is the longest string
76 | int count = -1;
77 | int idx;
78 | String lcs = lcs(a, b);
79 | while (!lcs.trim().isEmpty() && lcs.length() > longest * PENALTY / 100) {
80 | count++;
81 | idx = a.indexOf(lcs);
82 | a = a.substring(0, idx) + a.substring(idx + lcs.length());
83 | idx = b.indexOf(lcs);
84 | b = b.substring(0, idx) + b.substring(idx + lcs.length());
85 | lcs = lcs(a, b);
86 | }
87 | result = 100 * (longest - a.length()) / longest - count * PENALTY;
88 | if (result < 0) {
89 | result = 0;
90 | }
91 | return result;
92 | }
93 |
94 | }
95 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # This project is not maintained. Please use [RemoteTM](https://github.com/rmraya/RemoteTM) instead.
2 |
3 |
4 | # TMEngine
5 |
6 |
7 |
8 | An open source [Translation Memory](https://en.wikipedia.org/wiki/Translation_memory) Engine written in Java.
9 |
10 | TMEngine is based on the translation memory library used by [Swordfish III](https://www.maxprograms.com/products/swordfish.html), [Fluenta](https://www.maxprograms.com/products/fluenta.html) and [RemoteTM](https://www.maxprograms.com/products/remotetm.html).
11 |
12 | TMEngine can be used either as an embedded library that manages translation memories in a Java application or as a standalone TM server via its REST API.
13 |
14 | ## Requirements
15 |
16 | - JDK 11 or newer is required for compiling and building.
17 | - Apache Ant 1.10.6 or newer
18 |
19 | ## Building
20 |
21 | - Checkout this repository.
22 | - Point your JAVA_HOME variable to JDK 11
23 | - Run `ant compile`.
24 |
25 | ## Downloads
26 |
27 | Ready to use distributions are available at [https://www.maxprograms.com/products/tmengine.html](https://www.maxprograms.com/products/tmengine.html).
28 |
29 | ## Related Links
30 |
31 | - [TMEngine Manual (PDF)](https://www.maxprograms.com/support/tmengine.pdf)
32 | - [TMEgine Manual (Web Help)](https://www.maxprograms.com/support/tmengine.html)
33 |
34 | ## Standalone Server
35 |
36 | Running `.\tmserver.bat` or `./tmserver.sh` without parameters displays help for starting TMEngine as a standalone server.
37 | ```
38 | Usage:
39 |
40 | tmserver.sh [-help] [-version] [-port portNumber]
41 |
42 | Where:
43 |
44 | -help: (optional) Display this help information and exit
45 | -version: (optional) Display version & build information and exit
46 | -port: (optional) Port for running HTTP server. Default is 8000
47 | ```
48 |
49 | Visit http://localhost:8000/TMServer/stop to stop the server. Adjust the port number if required.
50 |
51 | ## Java Library
52 |
53 | TMEngine can be embedded in Java applications that need to deal with Translation Memory data.
54 |
55 | Add all .jar files from `/lib` folder to the classpath of your application and use instances of `ITmEngine` interface.
56 |
57 | Two classes implement interface `ITmEngine`:
58 |
59 | - `MapDbEngine`: a translation memory engine built using [MapDB](http://mapdb.org)
60 | - `SQLEngine`: an implementation designed to be used with [MariaDB](https://mariadb.org/) or [MySQL](https://www.mysql.com/)
61 |
62 | See more details on the available Java methods in the [documentation](https://www.maxprograms.com/support/tmengine/TMEngine.html).
63 |
--------------------------------------------------------------------------------
/docs/concepts/rest/ProcessStatus.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Get Process Status
5 |
6 | End Point: [TMEngine URL]/status
7 | Default: status
9 |
10 | Send a POST request to the method end point with this parameter in a
11 | JSON body:
12 |
13 |
14 | Field
15 | Required
16 | Content
17 |
18 |
19 | process
20 | Yes
21 | ID of the background process to check
22 |
23 |
24 | Example:
25 | {
26 | "process": "1568223016762"
27 | }
28 | The server responds with a JSON object.
29 | On successful status check, field 'status' is set to
30 | 'OK' and field 'result' contains current
31 | status.
32 | Example:
33 |
34 |
35 | Field 'result' may have these values:
36 |
37 |
38 | - Pending: processing is still going on.
39 | {
40 | "result": "Pending",
41 | "status": "OK"
42 | }
43 | - Completed: processing has finished. If the process produces any data, it is
44 | placed in the 'data' field.
45 | {
46 | "result": "Completed",
47 | "data": {
48 | "imported": "57678"
49 | },
50 | "status": "OK"
51 | }
52 | - Failed: processing failed. Failure reason is provided in
53 | 'reason'
54 | field.{
55 | "result": "Failed",
56 | "reason": "/Volumes/Data/something.tmx (No such file or directory)",
57 | "status": "failed"
58 | }
59 |
60 |
61 | If process status cannot be checked, the server omits the 'result' field
62 | and provides a failure reason.
63 | {
64 | "reason": "Missing 'process' parameter",
65 | "status": "failed"
66 | }
67 |
68 |
69 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/ITmEngine.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.IOException;
15 | import java.sql.SQLException;
16 | import java.util.List;
17 | import java.util.Map;
18 | import java.util.Set;
19 |
20 | import javax.xml.parsers.ParserConfigurationException;
21 |
22 | import org.xml.sax.SAXException;
23 |
24 | import com.maxprograms.xml.Element;
25 |
26 | public interface ITmEngine {
27 |
28 | public abstract String getType();
29 |
30 | public abstract void close() throws IOException, SQLException;
31 |
32 | public abstract String getName();
33 |
34 | public abstract int storeTMX(String tmxFile, String project, String customer, String subject)
35 | throws SAXException, IOException, ParserConfigurationException, SQLException;
36 |
37 | public abstract void exportMemory(String tmxfile, Set langs, String srcLang, Map properties)
38 | throws IOException, SAXException, ParserConfigurationException, SQLException;
39 |
40 | public abstract void flag(String tuid) throws SQLException;
41 |
42 | public abstract Set getAllClients() throws SQLException;
43 |
44 | public abstract Set getAllLanguages() throws SQLException;
45 |
46 | public abstract Set getAllProjects() throws SQLException;
47 |
48 | public abstract Set getAllSubjects() throws SQLException;
49 |
50 | public abstract List searchTranslation(String searchStr, String srcLang, String tgtLang, int similarity,
51 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException;
52 |
53 | public abstract List concordanceSearch(String searchStr, String srcLang, int limit, boolean isRegexp,
54 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException;
55 |
56 | public abstract void storeTu(Element tu) throws IOException, SQLException;
57 |
58 | public abstract void commit() throws SQLException;
59 |
60 | public abstract Element getTu(String tuid)
61 | throws IOException, SAXException, ParserConfigurationException, SQLException;
62 |
63 | public abstract void removeTu(String tuid)
64 | throws IOException, SAXException, ParserConfigurationException, SQLException;
65 |
66 | public abstract void deleteDatabase() throws IOException, SQLException;
67 | }
68 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/TuDatabase.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.File;
15 | import java.io.IOException;
16 | import java.util.Map;
17 | import java.util.Set;
18 |
19 | import org.mapdb.DB;
20 | import org.mapdb.DBMaker;
21 | import org.mapdb.HTreeMap;
22 |
23 | public class TuDatabase {
24 |
25 | private DB mapdb;
26 | private HTreeMap> tumap;
27 | private Set projects;
28 | private Set subjects;
29 | private Set customers;
30 | private Set languages;
31 |
32 | public TuDatabase(File folder) throws IOException {
33 | try {
34 | mapdb = DBMaker.newFileDB(new File(folder, "tudata")).closeOnJvmShutdown().make();
35 | tumap = mapdb.getHashMap("tuvmap");
36 | projects = mapdb.getHashSet("projects");
37 | subjects = mapdb.getHashSet("subjects");
38 | customers = mapdb.getHashSet("customers");
39 | languages = mapdb.getHashSet("languages");
40 | } catch (Error ioe) {
41 | throw new IOException(ioe.getMessage());
42 | }
43 | }
44 |
45 | public void commit() {
46 | mapdb.commit();
47 | }
48 |
49 | public void close() {
50 | mapdb.commit();
51 | mapdb.close();
52 | }
53 |
54 | public void store(String tuid, Map tu) {
55 | if (tumap.containsKey(tuid.hashCode())) {
56 | tumap.replace(tuid.hashCode(), tu);
57 | } else {
58 | tumap.put(tuid.hashCode(), tu);
59 | }
60 | }
61 |
62 | public void storeSubject(String sub) {
63 | subjects.add(sub);
64 | }
65 |
66 | public void storeCustomer(String cust) {
67 | customers.add(cust);
68 | }
69 |
70 | public void storeProject(String proj) {
71 | projects.add(proj);
72 | }
73 |
74 | public void storeLanguage(String lang) {
75 | languages.add(lang);
76 | }
77 |
78 | public Set getCustomers() {
79 | return customers;
80 | }
81 |
82 | public Set getProjects() {
83 | return projects;
84 | }
85 |
86 | public Set getSubjects() {
87 | return subjects;
88 | }
89 |
90 | public Set getKeys() {
91 | return tumap.keySet();
92 | }
93 |
94 | public Set getLanguages() {
95 | return languages;
96 | }
97 |
98 | public Map getTu(Integer hashCode) {
99 | return tumap.get(hashCode);
100 | }
101 |
102 | public void remove(String tuid) {
103 | if (tumap.containsKey(tuid.hashCode())) {
104 | tumap.remove(tuid.hashCode());
105 | }
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/docs/concepts/rest/ExportTMX.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Export TMX File
5 |
6 | End Point: [TMEngine URL]/create
7 | Default: export
9 |
10 | Send a 'POST' request to the method end point with these parameters in a
11 | JSON body:
12 |
13 |
14 | Field
15 | Required
16 | Content
17 |
18 |
19 | id
20 | Yes
21 | ID of the memory to populate with TMX data
22 |
23 |
24 | file
25 | Yes
26 | Path to the TMX file being created
27 |
28 |
29 | langs
30 | No
31 | JSON array contaning the list of languages to export
32 |
33 |
34 | srcLang
35 | No
36 | Language to set as source langage. The wildcard '*all*' is
37 | used by default
38 |
39 |
40 | properties
41 | No
42 | JSON object with string properties to set in the exported file
43 |
44 |
45 | when exporting a TMX file on a remote server, make sure the TMEngine server has access
46 | to the specified location.
47 | Example:
48 | {
49 | "id": "1568163112478",
50 | "file": "/Volumes/Data/segments.tmx",
51 | "langs": [
52 | "en-US",
53 | "ja",
54 | "fr-FR",
55 | "it"
56 | ],
57 | "srcLang": "en-US",
58 | "properties": {
59 | "project": "Milky Way",
60 | "subject": "Astronomy Device"
61 | }
62 | }
63 |
64 | The server responds with a JSON object containing two fields.
65 | On success, field 'status' is set to 'OK' and field
66 | 'process' contains the ID of the background export process that was
67 | initiated.
68 |
69 | {
70 | "process": "1568222345643",
71 | "status": "OK"
72 | }
73 |
74 | On error, field 'status' is set to 'failed' and field
75 | 'reason' contains the error cause.
76 | {
77 | "status": "failed",
78 | "reason": "Unknown memory type"
79 | }
80 | After starting the export process, monitor its status using the method.
82 |
83 |
84 |
--------------------------------------------------------------------------------
/docs/concepts/rest/ImportTMX.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Import TMX File
5 |
6 | End Point: [TMEngine URL]/import
7 | Default: import
9 | Send a 'POST' request to the method end point with these parameters in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | Yes
20 | ID of the memory to populate with TMX data
21 |
22 |
23 | file
24 | Yes
25 | Path to the TMX file being imported
26 |
27 |
28 | subject
29 | No
30 | Name or identifier of the subject associated with the TMX file
31 |
32 |
33 | client
34 | No
35 | Name or identifier of the client associated with the TMX file
36 |
37 |
38 | project
39 | No
40 | Name or identifier of the project associated with the TMX file
41 |
42 |
43 |
44 | The TMEngine server must have access to the TMX file being imported. When importing a
45 | TMX file into a remote server, copy or upload the file to the server first and supply
46 | the right path in the JSON body.
47 | Example:
48 | {
49 | "id": "1568163112478",
50 | "file": "/Volumes/Data/segments.tmx",
51 | "project": "Main TM"
52 | }
53 |
54 | The server responds with a JSON object containing two fields.
55 | On success, field 'status' is set to 'OK' and field
56 | 'process' contains the ID of the background import process that was
57 | initiated.
58 |
59 | {
60 | "process": "1568222345643",
61 | "status": "OK"
62 | }
63 |
64 | On error, field 'status' is set to 'failed' and field
65 | 'reason' contains the error cause.
66 | {
67 | "status": "failed",
68 | "reason": "Unknown memory type"
69 | }
70 | After starting the import process, monitor its status using the method. On successful completion, the result will
72 | contain the number of segments imported.
73 | Example:
74 |
75 | {
76 | "result": "Completed",
77 | "data": {
78 | "imported": "57678"
79 | },
80 | "status": "OK"
81 | }
82 |
83 |
84 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/Match.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.Serializable;
15 | import java.util.Iterator;
16 | import java.util.Map;
17 |
18 | import com.maxprograms.tmutils.TMUtils;
19 | import com.maxprograms.xml.Element;
20 |
21 | import org.json.JSONObject;
22 |
23 | public class Match implements Serializable, Comparable {
24 |
25 | private static final long serialVersionUID = -944405164833933436L;
26 |
27 | private Element source;
28 | private Element target;
29 | private int similarity;
30 | private String origin;
31 | private Map properties;
32 |
33 | public Match(Element source, Element target, int similarity, String origin, Map properties) {
34 | this.source = source;
35 | this.target = target;
36 | this.similarity = similarity;
37 | this.origin = origin;
38 | this.properties = properties;
39 | }
40 |
41 | public JSONObject toJSON() {
42 | JSONObject result = new JSONObject();
43 | result.put("source", source.toString());
44 | result.put("target", target.toString());
45 | result.put("similarity", similarity);
46 | result.put("origin", origin);
47 | if (properties != null && !properties.isEmpty()) {
48 | JSONObject props = new JSONObject();
49 | Iterator keys = properties.keySet().iterator();
50 | while (keys.hasNext()) {
51 | String key = keys.next();
52 | props.put(key, properties.get(key));
53 | }
54 | result.put("properties", props);
55 | }
56 | return result;
57 | }
58 |
59 | public Element getSource() {
60 | return source;
61 | }
62 |
63 | public void setSource(Element source) {
64 | this.source = source;
65 | }
66 |
67 | public Element getTarget() {
68 | return target;
69 | }
70 |
71 | public void setTarget(Element target) {
72 | this.target = target;
73 | }
74 |
75 | public int getSimilarity() {
76 | return similarity;
77 | }
78 |
79 | public void setSimilarity(int similarity) {
80 | this.similarity = similarity;
81 | }
82 |
83 | public String getOrigin() {
84 | return origin;
85 | }
86 |
87 | public void setOrigin(String origin) {
88 | this.origin = origin;
89 | }
90 |
91 | public Map getProperties() {
92 | return properties;
93 | }
94 |
95 | @Override
96 | public int compareTo(Match o) {
97 | if (similarity < o.getSimilarity()) {
98 | return 1;
99 | }
100 | if (similarity > o.getSimilarity()) {
101 | return -1;
102 | }
103 | if (getCreationDate() < o.getCreationDate()) {
104 | return 1;
105 | }
106 | if (getCreationDate() > o.getCreationDate()) {
107 | return -1;
108 | }
109 | return origin.compareTo(o.getOrigin());
110 | }
111 |
112 | private long getCreationDate() {
113 | String created = properties.get("creationdate");
114 | if (created != null) {
115 | return TMUtils.getGMTtime(created);
116 | }
117 | return -1l;
118 | }
119 |
120 | @Override
121 | public boolean equals(Object obj) {
122 | if (!(obj instanceof Match)) {
123 | return false;
124 | }
125 | Match m = (Match) obj;
126 | return source.equals(m.getSource()) && target.equals(m.getTarget()) && similarity == m.getSimilarity()
127 | && origin.equals(m.getOrigin()) && properties.equals(m.getProperties());
128 | }
129 |
130 | @Override
131 | public int hashCode() {
132 | return source.hashCode() * target.hashCode() * similarity * origin.hashCode() * properties.hashCode();
133 | }
134 |
135 | }
136 |
--------------------------------------------------------------------------------
/docs/concepts/rest/ConcordanceSearch.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Concordance Search
5 |
6 | End Point: [TMEngine URL]/concordance
7 | Default: concordance
9 | Send a 'POST' request to the method end point with these parameters in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | yes
20 |
21 | ID of the memory where the search should be performed
22 |
23 |
24 |
25 | text
26 | Yes
27 | Text string to search
28 |
29 |
30 | srcLang
31 | Yes
32 | Source language code
33 |
34 |
35 | limit
36 | Yes
37 | Integer value indicating the maximum number of matches to include
38 |
39 |
40 | isRegexp
41 | Yes
42 | Boolean value indicationg wether the search text should be treated as a
43 | regular expression
44 |
45 |
46 | caseSensitive
47 | Yes
48 | Boolean value indicating whether the search should be case sensitive or
49 | not
50 |
51 |
52 | Example:
53 | {
54 | "id": "fluenta",
55 | "text": "segment",
56 | "srcLang": "en",
57 | "limit": 5,
58 | "isRegexp": false,
59 | "caseSensitive": true
60 | }
61 | On success, field 'status' is set to 'OK' and field
62 | 'process' contains the ID of the background search process that was
63 | initiated.
64 |
65 | {
66 | "process": "1572531573026",
67 | "status": "OK"
68 | }
69 |
70 | On error, field 'status' is set to 'failed' and field
71 | 'reason' contains the error cause.
72 | {
73 | "status": "failed",
74 | "reason": "Unknown memory type"
75 | }
76 | After starting the search process, monitor its status using the method.
78 | On successful completion, the result will contain an array of <tu> elements that
79 | contain the searched text in the data field.
80 | Example:
81 | {
82 | "result": "Completed",
83 | "data": {
84 | "entries": [
85 | "<tu creationid="rmraya" creationdate="20161225T150949Z" creationtool="Swordfish"
86 | creationtoolversion="3.3-8" tuid="-1247472893-0-1586928971">
87 | <prop type="project">Fluenta</prop>
88 | <tuv xml:lang="es"><seg>Hay segmentos con errores de etiquetas.</seg></tuv>
89 | <tuv xml:lang="en"><seg>There are segments with tag errors.</seg></tuv></tu>"
90 | ],
91 | },
92 | "status": "OK"
93 | }
94 |
95 |
96 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmserver/TmServer.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmserver;
13 |
14 | import java.io.File;
15 | import java.io.IOException;
16 | import java.lang.System.Logger;
17 | import java.lang.System.Logger.Level;
18 | import java.net.InetSocketAddress;
19 | import java.nio.file.Files;
20 |
21 | import com.maxprograms.converters.Utils;
22 | import com.maxprograms.server.IServer;
23 | import com.maxprograms.tmengine.Constants;
24 | import com.sun.net.httpserver.HttpServer;
25 |
26 | public class TmServer implements IServer {
27 |
28 | private static final Logger LOGGER = System.getLogger(TmServer.class.getName());
29 |
30 | private HttpServer server;
31 | private File workDir;
32 |
33 | public static void main(String[] args) {
34 | String[] arguments = Utils.fixPath(args);
35 | String port = "8000";
36 | for (int i = 0; i < arguments.length; i++) {
37 | String param = arguments[i];
38 | if (param.equals("-help")) {
39 | help();
40 | return;
41 | }
42 | if (param.equals("-version")) {
43 | LOGGER.log(Level.INFO, () -> "Version: " + Constants.VERSION + " Build: " + Constants.BUILD);
44 | return;
45 | }
46 | if (param.equals("-port") && (i + 1) < arguments.length) {
47 | port = arguments[i + 1];
48 | }
49 | }
50 | try {
51 | TmServer instance = new TmServer(Integer.valueOf(port));
52 | instance.run();
53 | } catch (IOException e) {
54 | LOGGER.log(Level.ERROR, "Server error", e);
55 | }
56 | }
57 |
58 | private static void help() {
59 | String launcher = " tmserver.sh ";
60 | if (System.getProperty("file.separator").equals("\\")) {
61 | launcher = " tmserver.bat ";
62 | }
63 | String help = "Usage:\n\n" + launcher + "[-help] [-version] [-port portNumber]\n\n" + " Where:\n\n"
64 | + " -help: (optional) Display this help information and exit\n"
65 | + " -version: (optional) Display version & build information and exit\n"
66 | + " -port: (optional) Port for running HTTP server. Default is 8000\n";
67 | System.out.println(help);
68 | }
69 |
70 | public TmServer(int port) throws IOException {
71 | server = HttpServer.create(new InetSocketAddress(port), 0);
72 | server.createContext("/TMServer", new TmHandler(this, "/TMServer"));
73 | server.setExecutor(null); // creates a default executor
74 | }
75 |
76 | public void run() {
77 | server.start();
78 | LOGGER.log(Level.INFO, "TMEngine server started");
79 | }
80 |
81 | @Override
82 | public void stop() {
83 | server.removeContext("/TMServer");
84 | LOGGER.log(Level.INFO, "TMEngine server closed");
85 | System.exit(0);
86 | }
87 |
88 | @Override
89 | public File getWorkFolder() throws IOException {
90 | if (workDir == null) {
91 | String os = System.getProperty("os.name").toLowerCase();
92 | if (os.startsWith("mac")) {
93 | workDir = new File(System.getProperty("user.home") + "/Library/Application Support/TMEngine/");
94 | } else if (os.startsWith("windows")) {
95 | workDir = new File(System.getenv("AppData") + "\\TMEngine\\");
96 | } else {
97 | workDir = new File(System.getProperty("user.home") + "/.tmengine/");
98 | }
99 | if (!workDir.exists()) {
100 | Files.createDirectories(workDir.toPath());
101 | }
102 | }
103 | return workDir;
104 | }
105 | }
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/TMXContentHandler.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmx;
13 |
14 | import java.io.IOException;
15 | import java.lang.System.Logger;
16 | import java.lang.System.Logger.Level;
17 | import java.sql.SQLException;
18 | import java.util.Deque;
19 | import java.util.concurrent.ConcurrentLinkedDeque;
20 |
21 | import org.xml.sax.Attributes;
22 | import org.xml.sax.Locator;
23 | import org.xml.sax.SAXException;
24 |
25 | import com.maxprograms.tmengine.ITmEngine;
26 | import com.maxprograms.xml.Catalog;
27 | import com.maxprograms.xml.Document;
28 | import com.maxprograms.xml.Element;
29 | import com.maxprograms.xml.IContentHandler;
30 |
31 | class TMXContentHandler implements IContentHandler {
32 |
33 | protected static final Logger LOGGER = System.getLogger(TMXContentHandler.class.getName());
34 |
35 | private Element current;
36 | private Deque stack;
37 | private boolean inCDATA = false;
38 | private int count;
39 | private ITmEngine db;
40 |
41 | public TMXContentHandler(ITmEngine tmEngine) {
42 | db = tmEngine;
43 | stack = new ConcurrentLinkedDeque<>();
44 | }
45 |
46 | @Override
47 | public void characters(char[] ch, int start, int length) throws SAXException {
48 | if (!inCDATA && current != null) {
49 | current.addContent(new String(ch, start, length));
50 | }
51 | }
52 |
53 | @Override
54 | public void endDocument() throws SAXException {
55 | stack = null;
56 | }
57 |
58 | @Override
59 | public void endElement(String uri, String localName, String qName) throws SAXException {
60 | if (localName.equals("tu")) {
61 | try {
62 | db.storeTu(current);
63 | if (count % 500 == 0) {
64 | db.commit();
65 | }
66 | } catch (IOException | SQLException e) {
67 | // ignore
68 | LOGGER.log(Level.WARNING, "Error storing " + current);
69 | }
70 | count++;
71 | current = null;
72 | stack.clear();
73 | } else {
74 | if (!stack.isEmpty()) {
75 | current = stack.removeFirst();
76 | }
77 | }
78 | }
79 |
80 | @Override
81 | public void endPrefixMapping(String prefix) throws SAXException {
82 | // do nothing
83 | }
84 |
85 | @Override
86 | public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
87 | // do nothing
88 | }
89 |
90 | @Override
91 | public void processingInstruction(String target, String data) throws SAXException {
92 | // do nothing
93 | }
94 |
95 | @Override
96 | public void setDocumentLocator(Locator locator) {
97 | // do nothing
98 | }
99 |
100 | @Override
101 | public void skippedEntity(String name) throws SAXException {
102 | // do nothing, the entity resolver must support this
103 | }
104 |
105 | @Override
106 | public void startDocument() throws SAXException {
107 | // do nothing
108 | }
109 |
110 | @Override
111 | public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
112 | if (current == null) {
113 | current = new Element(qName);
114 | stack.addFirst(current);
115 | } else {
116 | Element child = new Element(qName);
117 | if (!qName.equals("ut")) {
118 | current.addContent(child);
119 | }
120 | stack.addFirst(current);
121 | current = child;
122 | }
123 | for (int i = 0; i < atts.getLength(); i++) {
124 | current.setAttribute(atts.getQName(i), atts.getValue(i));
125 | }
126 | }
127 |
128 | @Override
129 | public void startPrefixMapping(String prefix, String uri) throws SAXException {
130 | // do nothing
131 | }
132 |
133 | @Override
134 | public void comment(char[] ch, int start, int length) throws SAXException {
135 | // do nothing
136 | }
137 |
138 | @Override
139 | public void endCDATA() throws SAXException {
140 | inCDATA = false;
141 | }
142 |
143 | @Override
144 | public void endDTD() throws SAXException {
145 | // do nothing
146 | }
147 |
148 | @Override
149 | public void endEntity(String arg0) throws SAXException {
150 | // do nothing, let the EntityResolver handle this
151 | }
152 |
153 | @Override
154 | public void startCDATA() throws SAXException {
155 | inCDATA = true;
156 | }
157 |
158 | @Override
159 | public void startDTD(String name, String publicId1, String systemId1) throws SAXException {
160 | // do nothing
161 | }
162 |
163 | @Override
164 | public void startEntity(String arg0) throws SAXException {
165 | // do nothing, let the EntityResolver handle this
166 | }
167 |
168 | public int getCount() {
169 | return count;
170 | }
171 |
172 | @Override
173 | public Document getDocument() {
174 | // do nothing
175 | return null;
176 | }
177 |
178 | @Override
179 | public void setCatalog(Catalog arg0) {
180 | // do nothing
181 | }
182 | }
183 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/TuvDatabase.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.File;
15 | import java.io.IOException;
16 | import java.util.Iterator;
17 | import java.util.Map;
18 | import java.util.NavigableSet;
19 | import java.util.Set;
20 | import java.util.TreeSet;
21 | import java.util.concurrent.ConcurrentHashMap;
22 |
23 | import org.mapdb.BTreeMap;
24 | import org.mapdb.DB;
25 | import org.mapdb.DBMaker;
26 |
27 | public class TuvDatabase {
28 |
29 | private Map> textMaps;
30 | private Map>> hashesMaps;
31 | private Map> segsMaps;
32 | private Map databases;
33 | private File folder;
34 |
35 | public TuvDatabase(File folder) {
36 | this.folder = folder;
37 | databases = new ConcurrentHashMap<>();
38 | textMaps = new ConcurrentHashMap<>();
39 | hashesMaps = new ConcurrentHashMap<>();
40 | segsMaps = new ConcurrentHashMap<>();
41 | }
42 |
43 | private void buildIndex(String lang) throws IOException {
44 | try {
45 | DB mapdb = DBMaker.newFileDB(new File(folder, "tuv_" + lang)).closeOnJvmShutdown().make();
46 | databases.put(lang, mapdb);
47 | textMaps.put(lang, databases.get(lang).getTreeMap("tuvs"));
48 | hashesMaps.put(lang, databases.get(lang).getTreeMap("hashes"));
49 | segsMaps.put(lang, databases.get(lang).getTreeMap("segs"));
50 | } catch (Error ioe) {
51 | throw new IOException(ioe.getMessage());
52 | }
53 | }
54 |
55 | public void commit() {
56 | Set langSet = databases.keySet();
57 | Iterator keys = langSet.iterator();
58 | while (keys.hasNext()) {
59 | String key = keys.next();
60 | databases.get(key).commit();
61 | }
62 | }
63 |
64 | public void close() {
65 | commit();
66 | Set langSet = databases.keySet();
67 | Iterator keys = langSet.iterator();
68 | while (keys.hasNext()) {
69 | String key = keys.next();
70 | databases.get(key).close();
71 | }
72 | }
73 |
74 | public void store(String lang, String tuid, String puretext, String seg) throws IOException {
75 | if (!textMaps.containsKey(lang)) {
76 | buildIndex(lang);
77 | }
78 | int idHash = tuid.hashCode();
79 | BTreeMap textmap = textMaps.get(lang);
80 | if (textmap.containsKey(idHash)) {
81 | textmap.replace(idHash, puretext);
82 | } else {
83 | textmap.put(idHash, puretext);
84 | }
85 | BTreeMap segmap = segsMaps.get(lang);
86 | if (segmap.containsKey(idHash)) {
87 | segmap.replace(idHash, seg);
88 | } else {
89 | segmap.put(idHash, seg);
90 | }
91 | int hash = puretext.toLowerCase().hashCode();
92 | BTreeMap> hashmap = hashesMaps.get(lang);
93 | if (hashmap.containsKey(hash)) {
94 | Set set = hashesMaps.get(lang).get(hash);
95 | set.add(tuid);
96 | hashmap.replace(hash, set);
97 | } else {
98 | Set set = new TreeSet<>();
99 | set.add(tuid);
100 | hashmap.put(hash, set);
101 | }
102 | }
103 |
104 | public String getSegText(String lang, String tuid) throws IOException {
105 | if (!segsMaps.containsKey(lang)) {
106 | buildIndex(lang);
107 | }
108 | return segsMaps.get(lang).get(tuid.hashCode());
109 | }
110 |
111 | public void remove(String lang, String tuid) throws IOException {
112 | if (!textMaps.containsKey(lang)) {
113 | buildIndex(lang);
114 | }
115 | int idHash = tuid.hashCode();
116 | String oldText = getPureText(lang, idHash);
117 | if (oldText != null) {
118 | textMaps.get(lang).remove(idHash);
119 | segsMaps.get(lang).remove(idHash);
120 | int textHash = oldText.toLowerCase().hashCode();
121 | Set set = hashesMaps.get(lang).get(textHash);
122 | set.remove(tuid);
123 | hashesMaps.get(lang).replace(textHash, set);
124 | }
125 | }
126 |
127 | public Set getPerfectMatches(String lang, String searchStr) throws IOException {
128 | if (!hashesMaps.containsKey(lang)) {
129 | buildIndex(lang);
130 | }
131 | BTreeMap> hashmap = hashesMaps.get(lang);
132 | int textHash = searchStr.toLowerCase().hashCode();
133 | if (hashmap.containsKey(textHash)) {
134 | return hashmap.get(textHash);
135 | }
136 | return new TreeSet<>();
137 | }
138 |
139 | public NavigableSet getKeySet(String lang) throws IOException {
140 | if (!hashesMaps.containsKey(lang)) {
141 | buildIndex(lang);
142 | }
143 | return textMaps.get(lang).keySet();
144 | }
145 |
146 | public String getPureText(String lang, Integer id) throws IOException {
147 | if (!textMaps.containsKey(lang)) {
148 | buildIndex(lang);
149 | }
150 | return textMaps.get(lang).get(id);
151 | }
152 | }
153 |
--------------------------------------------------------------------------------
/docs/concepts/rest/CreateMemory.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Create Memory
5 |
6 | End Point: [TMEngine URL]/create
7 | Default: create
9 |
10 | Send a 'POST' request to the method end point with these parameters in a
11 | JSON body:
12 |
13 |
14 | Field
15 | Required
16 | Content
17 |
18 |
19 | id
20 | No
21 |
22 | ID of the memory to create. The value of 'id' must be
23 | unique.
24 | Default value is current server time represented as the number of
25 | milliseconds since January 1, 1970, 00:00:00 GMT
26 |
27 |
28 |
29 | name
30 | Yes
31 | A meaningful name to identify the memory
32 |
33 |
34 | owner
35 | No
36 |
37 | Text string used to identify the owner of the memory.
38 | Default value is the login name of the user running the server.
39 |
40 |
41 |
42 | type
43 | No
44 |
45 | Type of engine to use. Possible values are:
46 | - 'MapDbEngine' (default)
47 | - 'SQLEngine'
48 |
49 |
50 |
51 |
52 |
53 | serverName
54 | No
55 |
56 | Name or IP of the server running MySQL or MariaDB.
57 | Required for SQLEngine. Defaut value:
58 | 'localhost'
59 |
60 |
61 |
62 | port
63 | No
64 |
65 | Port in which MySQL or MariaDB listens for requests.
66 | Required for SQLEngine. Default value:
67 | 3306
68 |
69 |
70 |
71 | userName
72 | No
73 |
74 | ID of of the MySQL or MariaDB user creating the database.
75 | Required for SQLEngine.
76 |
77 |
78 |
79 | password
80 | No
81 |
82 | Password of the MySQL or MariaDB user creating the database.
83 | Required for SQLEngine.
84 |
85 |
86 |
87 |
88 | Example:
89 | {
90 | "name": "First Memory",
91 | "type": "MapDbEngine"
92 | }
93 |
94 | {
95 | "name": "MariaMemory",
96 | "type": "SQLEngine",
97 | "serverName": "localhost",
98 | "port": 3306,
99 | "userName": "root",
100 | "password": "secret123!"
101 | }
102 | The server responds with a JSON object containing two fields.
103 | On success, field 'status' is set to 'OK' and field
104 | 'id' contains the ID assigned to the new memory.
105 |
106 | Example:
107 | {
108 | "status": "OK",
109 | "id": "1234567890987"
110 | }
111 |
112 | On error, field 'status' is set to 'failed' and field
113 | 'reason' contains the error cause.
114 |
115 | Example:
116 | {
117 | "status": "failed",
118 | "reason": "Duplicated id"
119 | }
120 |
121 |
122 |
--------------------------------------------------------------------------------
/docs/concepts/rest/SearchTranslations.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Search Translations
5 |
6 | End Point: [TMEngine URL]/create
7 | Default: search
9 | Send a 'POST' request to the method end point with these parameters in a
10 | JSON body:
11 |
12 |
13 | Field
14 | Required
15 | Content
16 |
17 |
18 | id
19 | yes
20 |
21 | ID of the memory where the search should be performed
22 |
23 |
24 |
25 | text
26 | Yes
27 | Text string to search
28 |
29 |
30 | srcLang
31 | Yes
32 | Source language code
33 |
34 |
35 | tgtLang
36 | Yes
37 | Target language code
38 |
39 |
40 | similarity
41 | Yes
42 | Integer value indicating the lowest similarity percentage to include in
43 | results
44 |
45 |
46 | caseSensitive
47 | Yes
48 | Boolean value indicating whether the search should be case sensitive or
49 | not
50 |
51 |
52 | Example:
53 | {
54 | "id": "1572538708492",
55 | "text": "tax compliance",
56 | "srcLang": "en-GB",
57 | "tgtLang": "fr-FR",
58 | "similarity": 70,
59 | "caseSensitive": false
60 | }
61 | The server responds with a JSON object containing two fields.
62 | On success, field 'status' is set to 'OK' and field
63 | 'process' contains the ID of the background search process that was
64 | initiated.
65 |
66 | {
67 | "process": "1572531573026",
68 | "status": "OK"
69 | }
70 |
71 | On error, field 'status' is set to 'failed' and field
72 | 'reason' contains the error cause.
73 | {
74 | "status": "failed",
75 | "reason": "Unknown memory type"
76 | }
77 | After starting the search process, monitor its status using the method.
79 | On successful completion, the result will contain an array of similar segments in the
80 | data field.
81 | Example:
82 |
83 | {
84 | "result": "Completed",
85 | "data": {
86 | "matches": [
87 | {
88 | "similarity": 71,
89 | "origin": "1572538708492",
90 | "source": "<tuv xml:lang="en-GB"><seg>Non-compliance</seg></tuv>",
91 | "target": "<tuv xml:lang="fr-FR"><seg>Violation</seg></tuv>",
92 | "properties": {
93 | "creationdate": "20070126T082848Z",
94 | "subject": "Taxes",
95 | "x-Origin": "TM",
96 | "project": "Main TM",
97 | "changedate": "20070126T082848Z",
98 | "tuid": "1546700322331",
99 | "creationid": "MC",
100 | "changeid": "MC",
101 | "lastusagedate": "20070126T082848Z",
102 | "customer": "ACME Auditors"
103 | }
104 | }, {
105 | "similarity": 73,
106 | "origin": "1572538708492",
107 | "source": "<tuv xml:lang="en-GB"><seg>Legal Compliance</seg></tuv>",
108 | "target": "<tuv xml:lang="fr-FR"><seg>Conformité légale</seg></tuv>",
109 | "properties": {
110 | "creationdate": "20160725T141611Z",
111 | "x-ConfirmationLevel": "ApprovedTranslation",
112 | "subject": "Taxes",
113 | "x-Origin": "TM",
114 | "project": "Main TM",
115 | "changedate": "20160727T093143Z",
116 | "tuid": "1546700366038",
117 | "creationid": "Aqcis9\Aqcis",
118 | "changeid": "FG",
119 | "lastusagedate": "20160727T093143Z",
120 | "customer": "ACME Auditors"
121 | }
122 | }, {
123 | "similarity": 100,
124 | "origin": "fluenta",
125 | "source": "<tuv xml:lang="en-GB"><seg>tax compliance</seg></tuv>",
126 | "target": "<tuv xml:lang="fr-FR"><seg>Conformité fiscale</seg></tuv>",
127 | "properties": {
128 | "creationdate": "20171004T111450Z",
129 | "subject": "Taxes",
130 | "project": "Main TM",
131 | "changedate": "20171004T111450Z",
132 | "tuid": "1546700370945",
133 | "changeid": "translator2",
134 | "usagecount": "1",
135 | "x-ConfirmationLevel": "Translated",
136 | "x-Origin": "TM",
137 | "creationid": "translator2",
138 | "lastusagedate": "20171006T103930Z",
139 | "customer": "ACME Auditors"
140 | }
141 | }
142 | ],
143 | },
144 | "status": "OK"
145 | }
146 |
147 |
148 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmutils/TMUtils.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmutils;
13 |
14 | import com.maxprograms.languages.RegistryParser;
15 | import com.maxprograms.xml.Element;
16 | import com.maxprograms.xml.SAXBuilder;
17 | import com.maxprograms.xml.TextNode;
18 | import com.maxprograms.xml.XMLNode;
19 |
20 | import org.xml.sax.SAXException;
21 |
22 | import java.io.ByteArrayInputStream;
23 | import java.io.File;
24 | import java.io.IOException;
25 | import java.lang.System.Logger;
26 | import java.lang.System.Logger.Level;
27 | import java.nio.charset.StandardCharsets;
28 | import java.nio.file.Files;
29 | import java.util.Calendar;
30 | import java.util.Date;
31 | import java.util.Iterator;
32 | import java.util.List;
33 | import java.util.Locale;
34 | import java.util.TimeZone;
35 |
36 | import javax.xml.parsers.ParserConfigurationException;
37 |
38 | public class TMUtils {
39 |
40 | private static Logger logger = System.getLogger(TMUtils.class.getName());
41 | private static RegistryParser registry;
42 |
43 | private TMUtils() {
44 | // private for security
45 | }
46 |
47 | public static String pureText(Element seg) {
48 | List l = seg.getContent();
49 | Iterator i = l.iterator();
50 | StringBuilder text = new StringBuilder();
51 | while (i.hasNext()) {
52 | XMLNode o = i.next();
53 | if (o.getNodeType() == XMLNode.TEXT_NODE) {
54 | text.append(((TextNode) o).getText());
55 | } else if (o.getNodeType() == XMLNode.ELEMENT_NODE) {
56 | String type = ((Element) o).getName();
57 | // discard all inline elements
58 | // except and
59 | if (type.equals("sub") || type.equals("hi")) {
60 | Element e = (Element) o;
61 | text.append(pureText(e));
62 | }
63 | }
64 | }
65 | return text.toString();
66 | }
67 |
68 | public static String normalizeLang(String langCode) throws IOException {
69 | if (registry == null) {
70 | registry = new RegistryParser();
71 | }
72 | if (langCode == null) {
73 | return null;
74 | }
75 | if (!registry.getTagDescription(langCode).isEmpty()) {
76 | return langCode;
77 | }
78 | String lang = langCode.replaceAll("_", "-");
79 | String[] parts = lang.split("-");
80 |
81 | if (parts.length == 2) {
82 | if (parts[1].length() == 2) {
83 | // has country code
84 | String code = lang.substring(0, 2).toLowerCase() + "-" + lang.substring(3).toUpperCase();
85 | if (!registry.getTagDescription(code).isEmpty()) {
86 | return code;
87 | }
88 | return null;
89 | }
90 | if (isRegion(parts[1])) {
91 | // try lowercasing language code while keeping region number
92 | String code = lang.substring(0, 2).toLowerCase() + "-" + parts[1];
93 | if (!registry.getTagDescription(code).isEmpty()) {
94 | return code;
95 | }
96 | return null;
97 | }
98 | // may have a script
99 | String code = lang.substring(0, 2).toLowerCase() + "-" + lang.substring(3, 4).toUpperCase()
100 | + lang.substring(4).toLowerCase();
101 | if (!registry.getTagDescription(code).isEmpty()) {
102 | return code;
103 | }
104 | return null;
105 | }
106 | // check if its a valid thing with more than 2 parts
107 | if (!registry.getTagDescription(lang).isEmpty()) {
108 | return lang;
109 | }
110 | return null;
111 | }
112 |
113 | private static boolean isRegion(String string) {
114 | if (string.length() != 3) {
115 | return false;
116 | }
117 | return isNumber(string.charAt(0)) && isNumber(string.charAt(1)) && isNumber(string.charAt(2));
118 | }
119 |
120 | private static boolean isNumber(char c) {
121 | return c >= '0' && c <= '9';
122 | }
123 |
124 | public static String createId() throws InterruptedException {
125 | Date now = new Date();
126 | long lng = now.getTime();
127 | // wait until we are in the next millisecond
128 | // before leaving to ensure uniqueness
129 | Thread.sleep(1);
130 | return "" + lng;
131 | }
132 |
133 | public static String tmxDate() {
134 | Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
135 | String sec = (calendar.get(Calendar.SECOND) < 10 ? "0" : "") + calendar.get(Calendar.SECOND);
136 | String min = (calendar.get(Calendar.MINUTE) < 10 ? "0" : "") + calendar.get(Calendar.MINUTE);
137 | String hour = (calendar.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "") + calendar.get(Calendar.HOUR_OF_DAY);
138 | String mday = (calendar.get(Calendar.DATE) < 10 ? "0" : "") + calendar.get(Calendar.DATE);
139 | String mon = (calendar.get(Calendar.MONTH) < 9 ? "0" : "") + (calendar.get(Calendar.MONTH) + 1);
140 | String longyear = "" + calendar.get(Calendar.YEAR);
141 |
142 | return longyear + mon + mday + "T" + hour + min + sec + "Z";
143 | }
144 |
145 | public static long getGMTtime(String tmxDate) {
146 | Calendar calendar = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
147 | try {
148 | int second = Integer.parseInt(tmxDate.substring(13, 15));
149 | int minute = Integer.parseInt(tmxDate.substring(11, 13));
150 | int hour = Integer.parseInt(tmxDate.substring(9, 11));
151 | int date = Integer.parseInt(tmxDate.substring(6, 8));
152 | int month = Integer.parseInt(tmxDate.substring(4, 6)) - 1;
153 | int year = Integer.parseInt(tmxDate.substring(0, 4));
154 | calendar.set(year, month, date, hour, minute, second);
155 | return calendar.getTimeInMillis();
156 | } catch (NumberFormatException e) {
157 | logger.log(Level.WARNING, "Unsupported TMX date: " + tmxDate);
158 | return 0l;
159 | }
160 | }
161 |
162 | public static Element buildTuv(String lang, String seg)
163 | throws SAXException, IOException, ParserConfigurationException {
164 | Element tuv = new Element("tuv");
165 | tuv.setAttribute("xml:lang", lang);
166 | SAXBuilder builder = new SAXBuilder();
167 | Element e = builder.build(new ByteArrayInputStream(("" + seg + "").getBytes(StandardCharsets.UTF_8)))
168 | .getRootElement();
169 | tuv.addContent(e);
170 | return tuv;
171 | }
172 |
173 | public static String creationDate() {
174 | Calendar calendar = Calendar.getInstance(Locale.US);
175 | String sec = (calendar.get(Calendar.SECOND) < 10 ? "0" : "") + calendar.get(Calendar.SECOND);
176 | String min = (calendar.get(Calendar.MINUTE) < 10 ? "0" : "") + calendar.get(Calendar.MINUTE);
177 | String hour = (calendar.get(Calendar.HOUR_OF_DAY) < 10 ? "0" : "") + calendar.get(Calendar.HOUR_OF_DAY);
178 | String mday = (calendar.get(Calendar.DATE) < 10 ? "0" : "") + calendar.get(Calendar.DATE);
179 | String mon = (calendar.get(Calendar.MONTH) < 9 ? "0" : "") + (calendar.get(Calendar.MONTH) + 1);
180 | String longyear = "" + calendar.get(Calendar.YEAR);
181 |
182 | return longyear + mon + mday + "T" + hour + min + sec + "Z";
183 | }
184 |
185 | public static String extractText(Element seg) {
186 | List l = seg.getContent();
187 | Iterator i = l.iterator();
188 | StringBuilder text = new StringBuilder();
189 | while (i.hasNext()) {
190 | XMLNode o = i.next();
191 | if (o.getNodeType() == XMLNode.TEXT_NODE) {
192 | text.append(((TextNode) o).getText());
193 | } else if (o.getNodeType() == XMLNode.ELEMENT_NODE) {
194 | Element e = (Element) o;
195 | String type = e.getName();
196 | // discard all inline elements
197 | // except and
198 | if (type.equals("sub") || type.equals("hi")) {
199 | text.append(extractText(e));
200 | }
201 | }
202 | }
203 | return text.toString();
204 | }
205 |
206 | public static void deleteFolder(String folder) throws IOException {
207 | File f = new File(folder);
208 | if (f.isDirectory()) {
209 | String[] list = f.list();
210 | for (int i = 0; i < list.length; i++) {
211 | deleteFolder(new File(f, list[i]).getAbsolutePath());
212 | }
213 | }
214 | Files.delete(f.toPath());
215 | }
216 |
217 |
218 | }
219 |
--------------------------------------------------------------------------------
/docs/concepts/methods.dita:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 | Interface Methods
5 |
6 |
7 | The methods exposed by ITmEngine interface are:
8 |
9 | - public abstract String getName();
10 |
Returns the name of the engine instance.
11 |
12 | - public abstract String getType();
13 |
Returns the ITmEngine type used for storing data. Possible
14 | values are:
15 |
16 | - MapDbEngine
17 | - SQLEngine
18 |
19 |
20 |
21 | - public abstract void close() throws IOException, SQLException;
22 |
Closes the engine instance.
23 |
24 |
25 | - public abstract int storeTMX(String tmxFile, String project, String customer,
26 | String subject) throws SAXException, IOException,
27 | ParserConfigurationException, SQLException;
28 |
Imports a TMX document located at "tmxFile" and associates its
29 | data with "project", "customer" and
30 | "subject" properties
31 |
32 |
33 | - public abstract void exportMemory(String tmxfile, Set<String> langs, String
34 | srcLang, Map<String, String> properties) throws IOException,
35 | SAXException, ParserConfigurationException, SQLException;
36 |
Exports engine data to a TMX document located at "tmxfile".
37 | The "langs" argument may contain a set of language codes. If
38 | "langs" is not null, only the entries with the given
39 | language codes are exported.
40 | The "srcLang" argument indicates the source language assigned to
41 | the TMX file. It can be one of the languages from the data set (see "getAllLanguages()") or the string
43 | "*all*".
44 | The "properties" argument may contain a set of property-value
45 | pairs to be set in the exported TMX file.
46 |
47 |
48 | - public abstract void flag(String tuid) throws SQLException;
49 |
Adds the property "x-flag" and sets its value to
50 | "SW-Flag" to the translation unit identified by the
51 | "tuid" argument.
52 |
53 |
54 | - public abstract Set<String> getAllLanguages() throws SQLException;
55 |
Returns a collection containing all language codes used in the engine's data.
56 |
57 |
58 | - public abstract Set<String> getAllClients() throws SQLException;
59 |
Returns a collection containing all values assigned to the
60 | "client" property.
61 |
62 |
63 | - public abstract Set<String> getAllProjects() throws SQLException;
64 |
Returns a collection containing all values assigned to the
65 | "project" property.
66 |
67 |
68 | - public abstract Set<String> getAllSubjects() throws SQLException;
69 |
Returns a collection containing all values assigned to the
70 | "subject" property.
71 |
72 |
73 | - public abstract List<Match> searchTranslation(String searchStr, String
74 | srcLang, String tgtLang, int similarity, boolean
75 | caseSensitive) throws IOException, SAXException,
76 | ParserConfigurationException, SQLException;
77 |
Returns a list of possible translations of the "searchStr"
78 | argument.
79 | The search result is restricted to entries with the source language indicated by
80 | "srcLang" and target language indicated by
81 | "tgtLang" whose similarity to the given text is greater or
82 | equal to the value of the "similarity" argument.
83 | The "caseSensitive" argument indicates whether the search engine
84 | should consider letter case differences or not.
85 |
86 |
87 | - public abstract List<Element> concordanceSearch(String searchStr,
88 | String srcLang, int limit, boolean isRegexp, boolean
89 | caseSensitive) throws IOException, SAXException,
90 | ParserConfigurationException, SQLException;
91 |
Returns a list of all translation units (<tu> elements) that contain the text
92 | indicated in "searchStr" argument.
93 | Searches are performed against the translation unit variant (<tuv> element)
94 | with language set to "srcLang".
95 | Search result contains at most "limit" entries. Returned data
96 | is in no particular order.
97 | Argument "isRegexp" indicates whether the
98 | "searchStr" parameter should be considered a regular
99 | expression that matches the whole segment.
100 | The "caseSensitive" argument indicates whether the search engine
101 | should consider letter case differences or not.
102 |
103 |
104 | - public abstract void storeTu(Element tu) throws IOException, SQLException;
105 |
Stores translation unit "tu" into the database, overwriting any
106 | existing <tu> element with the same "id" attribute.
107 |
108 |
109 | - public abstract void commit() throws SQLException;
110 |
Flushes to disk any data held in memory and not written yet.
111 |
112 |
113 | - public abstract Element getTu(String tuid) throws IOException,
114 | SAXException, ParserConfigurationException, SQLException;
115 |
Returns the translation unit (<tu> element) that has the "id" attribute set to
116 | the "tuid" argument.
117 |
118 |
119 | - public abstract void removeTu(String tuid) throws IOException, SAXException,
120 | ParserConfigurationException, SQLException;
121 |
Removes from the database the <tu> element that has the "id" attribute set to
122 | the "tuid" argument.
123 |
124 |
125 | - public abstract void deleteDatabase() throws IOException, SQLException;
126 |
127 | - When used with "MapDbEngine" instances, closes the engine
128 | and removes all associated files from disk;
129 | - When used with "SQLEngine" instances, closes the engine and
130 | drops the associated database from the SQL server.
131 |
132 |
133 |
134 |
135 |
136 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/tmx11.dtd:
--------------------------------------------------------------------------------
1 |
13 |
14 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
90 |
91 |
92 |
93 |
94 |
96 |
97 |
98 |
99 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
122 |
123 |
124 |
125 |
126 |
129 |
131 |
132 |
133 |
134 |
139 |
140 |
141 |
142 |
146 |
147 |
148 |
149 |
164 |
165 |
166 |
167 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
192 |
193 |
194 |
196 |
197 |
198 |
199 |
200 |
204 |
205 |
206 |
210 |
211 |
212 |
214 |
215 |
216 |
217 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/tmx12.dtd:
--------------------------------------------------------------------------------
1 |
13 |
14 |
33 |
34 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
111 |
112 |
113 |
114 |
115 |
117 |
118 |
119 |
120 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
143 |
144 |
145 |
146 |
147 |
150 |
152 |
153 |
154 |
155 |
160 |
161 |
162 |
163 |
167 |
168 |
169 |
170 |
185 |
186 |
187 |
188 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
213 |
214 |
215 |
217 |
218 |
219 |
222 |
223 |
224 |
228 |
229 |
230 |
234 |
235 |
236 |
238 |
239 |
240 |
243 |
244 |
245 |
246 |
247 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/tmx13.dtd:
--------------------------------------------------------------------------------
1 |
13 |
14 |
27 |
28 |
47 |
48 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
125 |
126 |
127 |
128 |
129 |
131 |
132 |
133 |
134 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
158 |
159 |
160 |
161 |
162 |
165 |
167 |
168 |
169 |
170 |
175 |
176 |
177 |
178 |
182 |
183 |
184 |
185 |
200 |
201 |
202 |
203 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
230 |
231 |
232 |
234 |
235 |
236 |
239 |
240 |
241 |
245 |
246 |
247 |
251 |
252 |
253 |
255 |
256 |
257 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmx/tmx14.dtd:
--------------------------------------------------------------------------------
1 |
13 |
14 |
25 |
26 |
39 |
40 |
59 |
60 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
137 |
138 |
139 |
140 |
141 |
143 |
144 |
145 |
146 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
170 |
171 |
172 |
173 |
174 |
177 |
179 |
180 |
181 |
182 |
187 |
188 |
189 |
190 |
195 |
196 |
197 |
198 |
199 |
200 |
215 |
216 |
217 |
218 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
245 |
246 |
247 |
249 |
250 |
251 |
254 |
255 |
256 |
260 |
261 |
262 |
266 |
267 |
268 |
271 |
272 |
273 |
274 |
275 |
276 |
278 |
279 |
280 |
281 |
282 |
283 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Eclipse Public License - v 1.0
2 |
3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE
4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF
5 | THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT.
6 |
7 | 1. DEFINITIONS
8 |
9 | "Contribution" means:
10 |
11 | a) in the case of the initial Contributor, the initial code and
12 | documentation distributed under this Agreement, and
13 |
14 | b) in the case of each subsequent Contributor:
15 |
16 | i) changes to the Program, and
17 |
18 | ii) additions to the Program;
19 |
20 | where such changes and/or additions to the Program originate from
21 | and are distributed by that particular Contributor. A Contribution
22 | 'originates' from a Contributor if it was added to the Program by
23 | such Contributor itself or anyone acting on such Contributor's
24 | behalf. Contributions do not include additions to the Program
25 | which: (i) are separate modules of software distributed in
26 | conjunction with the Program under their own license agreement,
27 | and (ii) are not derivative works of the Program.
28 |
29 | "Contributor" means any person or entity that distributes the Program.
30 |
31 | "Licensed Patents" mean patent claims licensable by a Contributor
32 | which are necessarily infringed by the use or sale of its Contribution
33 | alone or when combined with the Program.
34 |
35 | "Program" means the Contributions distributed in accordance with this
36 | Agreement.
37 |
38 | "Recipient" means anyone who receives the Program under this
39 | Agreement, including all Contributors.
40 |
41 | 2. GRANT OF RIGHTS
42 |
43 | a) Subject to the terms of this Agreement, each Contributor hereby
44 | grants Recipient a non-exclusive, worldwide, royalty-free
45 | copyright license to reproduce, prepare derivative works of,
46 | publicly display, publicly perform, distribute and sublicense the
47 | Contribution of such Contributor, if any, and such derivative
48 | works, in source code and object code form.
49 |
50 | b) Subject to the terms of this Agreement, each Contributor hereby
51 | grants Recipient a non-exclusive, worldwide, royalty-free patent
52 | license under Licensed Patents to make, use, sell, offer to sell,
53 | import and otherwise transfer the Contribution of such
54 | Contributor, if any, in source code and object code form. This
55 | patent license shall apply to the combination of the Contribution
56 | and the Program if, at the time the Contribution is added by the
57 | Contributor, such addition of the Contribution causes such
58 | combination to be covered by the Licensed Patents. The patent
59 | license shall not apply to any other combinations which include
60 | the Contribution. No hardware per se is licensed hereunder.
61 |
62 | c) Recipient understands that although each Contributor grants the
63 | licenses to its Contributions set forth herein, no assurances are
64 | provided by any Contributor that the Program does not infringe the
65 | patent or other intellectual property rights of any other
66 | entity. Each Contributor disclaims any liability to Recipient for
67 | claims brought by any other entity based on infringement of
68 | intellectual property rights or otherwise. As a condition to
69 | exercising the rights and licenses granted hereunder, each
70 | Recipient hereby assumes sole responsibility to secure any other
71 | intellectual property rights needed, if any. For example, if a
72 | third party patent license is required to allow Recipient to
73 | distribute the Program, it is Recipient's responsibility to
74 | acquire that license before distributing the Program.
75 |
76 | d) Each Contributor represents that to its knowledge it has
77 | sufficient copyright rights in its Contribution, if any, to grant
78 | the copyright license set forth in this Agreement.
79 |
80 | 3. REQUIREMENTS
81 |
82 | A Contributor may choose to distribute the Program in object code form
83 | under its own license agreement, provided that:
84 |
85 | a) it complies with the terms and conditions of this Agreement;
86 | and
87 |
88 | b) its license agreement:
89 |
90 | i) effectively disclaims on behalf of all Contributors all
91 | warranties and conditions, express and implied, including
92 | warranties or conditions of title and non-infringement, and
93 | implied warranties or conditions of merchantability and fitness
94 | for a particular purpose;
95 |
96 | ii) effectively excludes on behalf of all Contributors all
97 | liability for damages, including direct, indirect, special,
98 | incidental and consequential damages, such as lost profits;
99 |
100 | iii) states that any provisions which differ from this Agreement
101 | are offered by that Contributor alone and not by any other party;
102 | and
103 |
104 | iv) states that source code for the Program is available from such
105 | Contributor, and informs licensees how to obtain it in a
106 | reasonable manner on or through a medium customarily used for
107 | software exchange.
108 |
109 | When the Program is made available in source code form:
110 |
111 | a) it must be made available under this Agreement; and
112 |
113 | b) a copy of this Agreement must be included with each copy of the
114 | Program.
115 |
116 | Contributors may not remove or alter any copyright notices contained
117 | within the Program.
118 |
119 | Each Contributor must identify itself as the originator of its
120 | Contribution, if any, in a manner that reasonably allows subsequent
121 | Recipients to identify the originator of the Contribution.
122 |
123 | 4. COMMERCIAL DISTRIBUTION
124 |
125 | Commercial distributors of software may accept certain
126 | responsibilities with respect to end users, business partners and the
127 | like. While this license is intended to facilitate the commercial use
128 | of the Program, the Contributor who includes the Program in a
129 | commercial product offering should do so in a manner which does not
130 | create potential liability for other Contributors. Therefore, if a
131 | Contributor includes the Program in a commercial product offering,
132 | such Contributor ("Commercial Contributor") hereby agrees to defend
133 | and indemnify every other Contributor ("Indemnified Contributor")
134 | against any losses, damages and costs (collectively "Losses") arising
135 | from claims, lawsuits and other legal actions brought by a third party
136 | against the Indemnified Contributor to the extent caused by the acts
137 | or omissions of such Commercial Contributor in connection with its
138 | distribution of the Program in a commercial product offering. The
139 | obligations in this section do not apply to any claims or Losses
140 | relating to any actual or alleged intellectual property
141 | infringement. In order to qualify, an Indemnified Contributor must: a)
142 | promptly notify the Commercial Contributor in writing of such claim,
143 | and b) allow the Commercial Contributor to control, and cooperate with
144 | the Commercial Contributor in, the defense and any related settlement
145 | negotiations. The Indemnified Contributor may participate in any such
146 | claim at its own expense.
147 |
148 | For example, a Contributor might include the Program in a commercial
149 | product offering, Product X. That Contributor is then a Commercial
150 | Contributor. If that Commercial Contributor then makes performance
151 | claims, or offers warranties related to Product X, those performance
152 | claims and warranties are such Commercial Contributor's responsibility
153 | alone. Under this section, the Commercial Contributor would have to
154 | defend claims against the other Contributors related to those
155 | performance claims and warranties, and if a court requires any other
156 | Contributor to pay any damages as a result, the Commercial Contributor
157 | must pay those damages.
158 |
159 | 5. NO WARRANTY
160 |
161 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS
162 | PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
163 | KIND, EITHER EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY
164 | WARRANTIES OR CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY
165 | OR FITNESS FOR A PARTICULAR PURPOSE. Each Recipient is solely
166 | responsible for determining the appropriateness of using and
167 | distributing the Program and assumes all risks associated with its
168 | exercise of rights under this Agreement , including but not limited to
169 | the risks and costs of program errors, compliance with applicable
170 | laws, damage to or loss of data, programs or equipment, and
171 | unavailability or interruption of operations.
172 |
173 | 6. DISCLAIMER OF LIABILITY
174 |
175 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR
176 | ANY CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT,
177 | INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING
178 | WITHOUT LIMITATION LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF
179 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
180 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OR
181 | DISTRIBUTION OF THE PROGRAM OR THE EXERCISE OF ANY RIGHTS GRANTED
182 | HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.
183 |
184 | 7. GENERAL
185 |
186 | If any provision of this Agreement is invalid or unenforceable under
187 | applicable law, it shall not affect the validity or enforceability of
188 | the remainder of the terms of this Agreement, and without further
189 | action by the parties hereto, such provision shall be reformed to the
190 | minimum extent necessary to make such provision valid and enforceable.
191 |
192 | If Recipient institutes patent litigation against any entity
193 | (including a cross-claim or counterclaim in a lawsuit) alleging that
194 | the Program itself (excluding combinations of the Program with other
195 | software or hardware) infringes such Recipient's patent(s), then such
196 | Recipient's rights granted under Section 2(b) shall terminate as of
197 | the date such litigation is filed.
198 |
199 | All Recipient's rights under this Agreement shall terminate if it
200 | fails to comply with any of the material terms or conditions of this
201 | Agreement and does not cure such failure in a reasonable period of
202 | time after becoming aware of such noncompliance. If all Recipient's
203 | rights under this Agreement terminate, Recipient agrees to cease use
204 | and distribution of the Program as soon as reasonably
205 | practicable. However, Recipient's obligations under this Agreement and
206 | any licenses granted by Recipient relating to the Program shall
207 | continue and survive.
208 |
209 | Everyone is permitted to copy and distribute copies of this Agreement,
210 | but in order to avoid inconsistency the Agreement is copyrighted and
211 | may only be modified in the following manner. The Agreement Steward
212 | reserves the right to publish new versions (including revisions) of
213 | this Agreement from time to time. No one other than the Agreement
214 | Steward has the right to modify this Agreement. The Eclipse Foundation
215 | is the initial Agreement Steward. The Eclipse Foundation may assign
216 | the responsibility to serve as the Agreement Steward to a suitable
217 | separate entity. Each new version of the Agreement will be given a
218 | distinguishing version number. The Program (including Contributions)
219 | may always be distributed subject to the version of the Agreement
220 | under which it was received. In addition, after a new version of the
221 | Agreement is published, Contributor may elect to distribute the
222 | Program (including its Contributions) under the new version. Except as
223 | expressly stated in Sections 2(a) and 2(b) above, Recipient receives
224 | no rights or licenses to the intellectual property of any Contributor
225 | under this Agreement, whether expressly, by implication, estoppel or
226 | otherwise. All rights in the Program not expressly granted under this
227 | Agreement are reserved.
228 |
229 | This Agreement is governed by the laws of the State of New York and
230 | the intellectual property laws of the United States of America. No
231 | party to this Agreement will bring a legal action under this Agreement
232 | more than one year after the cause of action arose. Each party waives
233 | its rights to a jury trial in any resulting litigation.
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/MapDbEngine.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003-2021 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.File;
15 | import java.io.FileOutputStream;
16 | import java.io.IOException;
17 | import java.lang.System.Logger;
18 | import java.lang.System.Logger.Level;
19 | import java.nio.charset.StandardCharsets;
20 | import java.text.MessageFormat;
21 | import java.util.Calendar;
22 | import java.util.Collections;
23 | import java.util.Hashtable;
24 | import java.util.Iterator;
25 | import java.util.List;
26 | import java.util.Map;
27 | import java.util.NavigableSet;
28 | import java.util.Set;
29 | import java.util.TreeSet;
30 | import java.util.Vector;
31 | import java.util.regex.Pattern;
32 | import java.util.regex.PatternSyntaxException;
33 |
34 | import javax.xml.parsers.ParserConfigurationException;
35 |
36 | import org.mapdb.Fun;
37 | import org.mapdb.Fun.Tuple2;
38 | import org.xml.sax.SAXException;
39 |
40 | import com.maxprograms.tmx.TMXReader;
41 | import com.maxprograms.tmutils.TMUtils;
42 | import com.maxprograms.xml.Attribute;
43 | import com.maxprograms.xml.Element;
44 | import com.maxprograms.xml.Indenter;
45 |
46 | public class MapDbEngine implements ITmEngine, AutoCloseable {
47 |
48 | private static final Logger LOGGER = System.getLogger(MapDbEngine.class.getName());
49 |
50 | private String dbname;
51 | private File database;
52 | private TuDatabase tuDb;
53 | private TuvDatabase tuvDb;
54 | private FuzzyIndex fuzzyIndex;
55 |
56 | private String currProject;
57 | private String currSubject;
58 | private String currCustomer;
59 | private String creationDate;
60 |
61 | private Set tuAttributes;
62 |
63 | private long next;
64 |
65 | public MapDbEngine(String dbname, String workFolder) throws IOException {
66 | this.dbname = dbname;
67 | tuAttributes = Collections.synchronizedSortedSet(new TreeSet<>());
68 | String[] array = new String[] { "tuid", "o-encoding", "datatype", "usagecount", "lastusagedate", "creationtool",
69 | "creationtoolversion", "creationdate", "creationid", "changedate", "segtype", "changeid", "o-tmf",
70 | "srclang" };
71 | for (int i = 0; i < array.length; i++) {
72 | tuAttributes.add(array[i]);
73 | }
74 | File wfolder = new File(workFolder);
75 | database = new File(wfolder, dbname);
76 | if (!database.exists()) {
77 | database.mkdirs();
78 | }
79 | try {
80 | tuDb = new TuDatabase(database);
81 | } catch (Exception e) {
82 | LOGGER.log(Level.ERROR, e.getMessage(), e);
83 | MessageFormat mf = new MessageFormat("TU storage of database {0} is damaged.");
84 | throw new IOException(mf.format(new String[] { dbname }));
85 | }
86 | try {
87 | tuvDb = new TuvDatabase(database);
88 | } catch (Exception e) {
89 | LOGGER.log(Level.ERROR, e.getMessage(), e);
90 | MessageFormat mf = new MessageFormat("TUV storage of database {0} is damaged.");
91 | throw new IOException(mf.format(new String[] { dbname }));
92 | }
93 | try {
94 | fuzzyIndex = new FuzzyIndex(database);
95 | } catch (Exception e) {
96 | LOGGER.log(Level.ERROR, e.getMessage(), e);
97 | MessageFormat mf = new MessageFormat("Fuzzy index of database {0} is damaged.");
98 | throw new IOException(mf.format(new String[] { dbname }));
99 | }
100 | }
101 |
102 | @Override
103 | public void deleteDatabase() throws IOException {
104 | close();
105 | TMUtils.deleteFolder(database.getAbsolutePath());
106 | }
107 |
108 | @Override
109 | public String getType() {
110 | return MapDbEngine.class.getName();
111 | }
112 |
113 | @Override
114 | public synchronized void close() throws IOException {
115 | fuzzyIndex.close();
116 | tuDb.close();
117 | tuvDb.close();
118 | }
119 |
120 | @Override
121 | public String getName() {
122 | return dbname;
123 | }
124 |
125 | @Override
126 | public int storeTMX(String tmxFile, String project, String customer, String subject)
127 | throws SAXException, IOException, ParserConfigurationException {
128 | next = 0l;
129 | currProject = project != null ? project : "";
130 | currSubject = subject != null ? subject : "";
131 | currCustomer = customer != null ? customer : "";
132 | creationDate = TMUtils.creationDate();
133 |
134 | TMXReader reader = new TMXReader(this);
135 | reader.parse(new File(tmxFile).toURI().toURL());
136 | commit();
137 |
138 | return reader.getCount();
139 | }
140 |
141 | @Override
142 | public void exportMemory(String tmxfile, Set langs, String srcLang, Map props)
143 | throws IOException, SAXException, ParserConfigurationException {
144 | Map properties = props != null ? props : new Hashtable<>();
145 | try (FileOutputStream output = new FileOutputStream(new File(tmxfile))) {
146 | writeHeader(output, srcLang, properties);
147 | writeString(output, " \n");
148 |
149 | Set set = tuDb.getKeys();
150 | Iterator it = set.iterator();
151 | while (it.hasNext()) {
152 | int hash = it.next();
153 | Map tuProps = tuDb.getTu(hash);
154 | Element tu = buildElement(tuProps);
155 | if (langs != null) {
156 | List tuvs = tu.getChildren("tuv");
157 | Iterator et = tuvs.iterator();
158 | while (et.hasNext()) {
159 | Element tuv = et.next();
160 | if (!langs.contains(tuv.getAttributeValue("xml:lang"))) {
161 | tu.removeChild(tuv);
162 | }
163 | }
164 | }
165 | Indenter.indent(tu, 3, 2);
166 | writeString(output, " " + tu.toString() + "\n");
167 | }
168 | writeString(output, " \n");
169 | writeString(output, "");
170 | }
171 | }
172 |
173 | @Override
174 | public void flag(String tuid) {
175 | Map properties = tuDb.getTu(tuid.hashCode());
176 | if (properties != null) {
177 | properties.put("x-flag", "SW-Flag");
178 | tuDb.store(tuid, properties);
179 | }
180 | }
181 |
182 | @Override
183 | public Set getAllClients() {
184 | return tuDb.getCustomers();
185 | }
186 |
187 | @Override
188 | public Set getAllLanguages() {
189 | return tuDb.getLanguages();
190 | }
191 |
192 | @Override
193 | public Set getAllProjects() {
194 | return tuDb.getProjects();
195 | }
196 |
197 | @Override
198 | public Set getAllSubjects() {
199 | return tuDb.getSubjects();
200 | }
201 |
202 | @Override
203 | public List searchTranslation(String searchStr, String srcLang, String tgtLang, int similarity,
204 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException {
205 |
206 | List result = new Vector<>();
207 |
208 | if (similarity == 100) {
209 | // check for perfect matches
210 | Set perfect = tuvDb.getPerfectMatches(srcLang, searchStr);
211 | if (!perfect.isEmpty()) {
212 | Iterator it = perfect.iterator();
213 | while (it.hasNext()) {
214 | String tuid = it.next();
215 | String puretext = tuvDb.getPureText(srcLang, tuid.hashCode());
216 | boolean isMatch = true;
217 | if (caseSensitive) {
218 | isMatch = searchStr.equals(puretext);
219 | }
220 | if (isMatch) {
221 | String targetSeg = tuvDb.getSegText(tgtLang, tuid);
222 | if (targetSeg != null) {
223 | String sourceSeg = tuvDb.getSegText(srcLang, tuid);
224 | Element source = TMUtils.buildTuv(srcLang, sourceSeg);
225 | Element target = TMUtils.buildTuv(tgtLang, targetSeg);
226 | Map properties = tuDb.getTu(tuid.hashCode());
227 | Match match = new Match(source, target, 100, dbname, properties);
228 | result.add(match);
229 | }
230 | }
231 | }
232 | }
233 | }
234 | if (similarity < 100) {
235 | // Check for fuzzy matches
236 | int[] ngrams = NGrams.getNGrams(searchStr);
237 | int size = ngrams.length;
238 | if (size == 0) {
239 | return result;
240 | }
241 | int min = size * similarity / 100;
242 | int max = size * (200 - similarity) / 100;
243 |
244 | Map candidates = new Hashtable<>();
245 | String lowerSearch = searchStr.toLowerCase();
246 |
247 | NavigableSet> index = fuzzyIndex.getIndex(srcLang);
248 | for (int i = 0; i < ngrams.length; i++) {
249 | Iterable keys = Fun.filter(index, ngrams[i]);
250 | Iterator it = keys.iterator();
251 | while (it.hasNext()) {
252 | String tuid = it.next();
253 | if (candidates.containsKey(tuid)) {
254 | int count = candidates.get(tuid);
255 | candidates.put(tuid, count + 1);
256 | } else {
257 | candidates.put(tuid, 1);
258 | }
259 | }
260 | }
261 |
262 | Set tuids = candidates.keySet();
263 | Iterator it = tuids.iterator();
264 | while (it.hasNext()) {
265 | String tuid = it.next();
266 | int count = candidates.get(tuid);
267 | if (count >= min && count <= max) {
268 | int distance;
269 | String puretext = tuvDb.getPureText(srcLang, tuid.hashCode());
270 | if (caseSensitive) {
271 | distance = MatchQuality.similarity(searchStr, puretext);
272 | } else {
273 | distance = MatchQuality.similarity(lowerSearch, puretext.toLowerCase());
274 | }
275 | if (distance >= similarity) {
276 | String targetSeg = tuvDb.getSegText(tgtLang, tuid);
277 | if (targetSeg != null) {
278 | String sourceSeg = tuvDb.getSegText(srcLang, tuid);
279 | Element source = TMUtils.buildTuv(srcLang, sourceSeg);
280 | Element target = TMUtils.buildTuv(tgtLang, targetSeg);
281 | Map properties = tuDb.getTu(tuid.hashCode());
282 | Match match = new Match(source, target, distance, dbname, properties);
283 | result.add(match);
284 | }
285 | }
286 | }
287 | }
288 | }
289 | return result;
290 | }
291 |
292 | private Element buildElement(Map properties)
293 | throws IOException, SAXException, ParserConfigurationException {
294 | Element tu = new Element("tu");
295 | Set keys = properties.keySet();
296 | Iterator it = keys.iterator();
297 | while (it.hasNext()) {
298 | String key = it.next();
299 | if (tuAttributes.contains(key)) {
300 | tu.setAttribute(key, properties.get(key));
301 | } else {
302 | Element prop = new Element("prop");
303 | prop.setAttribute("type", key);
304 | prop.setText(properties.get(key));
305 | tu.addContent(prop);
306 | }
307 | }
308 | String tuid = tu.getAttributeValue("tuid");
309 | Set langs = tuDb.getLanguages();
310 | it = langs.iterator();
311 | while (it.hasNext()) {
312 | String lang = it.next();
313 | String seg = tuvDb.getSegText(lang, tuid);
314 | if (seg != null) {
315 | Element tuv = TMUtils.buildTuv(lang, seg);
316 | tu.addContent(tuv);
317 | }
318 | }
319 | return tu;
320 | }
321 |
322 | @Override
323 | public List concordanceSearch(String searchStr, String srcLang, int limit, boolean isRegexp,
324 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException {
325 | List result = new Vector<>();
326 | Pattern pattern = null;
327 | if (isRegexp) {
328 | try {
329 | pattern = Pattern.compile(searchStr);
330 | } catch (PatternSyntaxException pse) {
331 | throw new IOException(pse.getMessage());
332 | }
333 | }
334 | String lowerStr = searchStr.toLowerCase();
335 | NavigableSet keySet = tuvDb.getKeySet(srcLang);
336 | Iterator it = keySet.iterator();
337 | while (it.hasNext()) {
338 | int hash = it.next();
339 | String pureText = tuvDb.getPureText(srcLang, hash);
340 | if (isRegexp) {
341 | if (pattern != null && pattern.matcher(pureText).matches()) {
342 | result.add(buildElement(tuDb.getTu(hash)));
343 | if (result.size() == limit) {
344 | return result;
345 | }
346 | }
347 | } else {
348 | if (caseSensitive) {
349 | if (pureText.indexOf(searchStr) != -1) {
350 | result.add(buildElement(tuDb.getTu(hash)));
351 | if (result.size() == limit) {
352 | return result;
353 | }
354 | }
355 | } else {
356 | if (pureText.toLowerCase().indexOf(lowerStr) != -1) {
357 | result.add(buildElement(tuDb.getTu(hash)));
358 | if (result.size() == limit) {
359 | return result;
360 | }
361 | }
362 | }
363 | }
364 |
365 | }
366 | return result;
367 | }
368 |
369 | @Override
370 | public void storeTu(Element tu) throws IOException {
371 | String tuid = tu.getAttributeValue("tuid");
372 | if (tuid.isEmpty()) {
373 | tuid = nextId();
374 | tu.setAttribute("tuid", tuid);
375 | }
376 | if (tu.getAttributeValue("creationdate").isEmpty()) {
377 | tu.setAttribute("creationdate", creationDate);
378 | }
379 | if (tu.getAttributeValue("creationid").isEmpty()) {
380 | tu.setAttribute("creationid", System.getProperty("user.name"));
381 | }
382 | Map tuProperties = new Hashtable<>();
383 |
384 | List atts = tu.getAttributes();
385 | Iterator at = atts.iterator();
386 | while (at.hasNext()) {
387 | Attribute a = at.next();
388 | tuProperties.put(a.getName(), a.getValue());
389 | }
390 | List properties = tu.getChildren("prop");
391 | Iterator kt = properties.iterator();
392 | while (kt.hasNext()) {
393 | Element prop = kt.next();
394 | tuProperties.put(prop.getAttributeValue("type"), prop.getText());
395 | }
396 | if (currSubject != null && !currSubject.isEmpty()) {
397 | tuProperties.put("subject", currSubject);
398 | }
399 | if (currCustomer != null && !currCustomer.isEmpty()) {
400 | tuProperties.put("customer", currCustomer);
401 | }
402 | if (currProject != null && !currProject.isEmpty()) {
403 | tuProperties.put("project", currProject);
404 | }
405 | List tuvs = tu.getChildren("tuv");
406 | Set tuLangs = Collections.synchronizedSortedSet(new TreeSet<>());
407 |
408 | Iterator it = tuvs.iterator();
409 | while (it.hasNext()) {
410 | Element tuv = it.next();
411 | String lang = TMUtils.normalizeLang(tuv.getAttributeValue("xml:lang"));
412 | if (lang == null) {
413 | // Invalid language code, ignore this tuv
414 | continue;
415 | }
416 | tuDb.storeLanguage(lang);
417 | tuvDb.remove(lang, tuid);
418 | if (!tuLangs.contains(lang)) {
419 | Element seg = tuv.getChild("seg");
420 | String puretext = TMUtils.extractText(seg);
421 | if (!puretext.isBlank()) {
422 | String segText = seg.toString();
423 | segText = segText.substring("".length());
424 | segText = segText.substring(0, segText.length() - "".length());
425 | tuvDb.store(lang, tuid, puretext, segText);
426 |
427 | int[] ngrams = NGrams.getNGrams(puretext);
428 | NavigableSet> index = fuzzyIndex.getIndex(lang);
429 | for (int i = 0; i < ngrams.length; i++) {
430 | Tuple2 entry = Fun.t2(ngrams[i], tuid);
431 | if (!index.contains(entry)) {
432 | index.add(entry);
433 | }
434 | }
435 | tuLangs.add(lang);
436 | }
437 | }
438 | }
439 | tuDb.store(tuid, tuProperties);
440 | }
441 |
442 | @Override
443 | public synchronized void commit() {
444 | fuzzyIndex.commit();
445 | tuDb.commit();
446 | tuvDb.commit();
447 | }
448 |
449 | @Override
450 | public Element getTu(String tuid) throws IOException, SAXException, ParserConfigurationException {
451 | Map properties = tuDb.getTu(tuid.hashCode());
452 | return buildElement(properties);
453 | }
454 |
455 | private static void writeHeader(FileOutputStream output, String srcLang, Map properties)
456 | throws IOException {
457 | writeString(output, "\n");
458 | writeString(output,
459 | "\n");
460 | writeString(output, "\n");
461 | writeString(output,
462 | " \n");
468 | } else {
469 | writeString(output, ">\n");
470 | Set keys = properties.keySet();
471 | Iterator it = keys.iterator();
472 | while (it.hasNext()) {
473 | String key = it.next();
474 | writeString(output, " " + properties.get(key) + "\n");
475 | }
476 | writeString(output, " \n");
477 | }
478 | }
479 |
480 | private static void writeString(FileOutputStream output, String string) throws IOException {
481 | output.write(string.getBytes(StandardCharsets.UTF_8));
482 | }
483 |
484 | private String nextId() {
485 | if (next == 0l) {
486 | next = Calendar.getInstance().getTimeInMillis();
487 | }
488 | return "" + next++;
489 | }
490 |
491 | @Override
492 | public void removeTu(String tuid) throws IOException, SAXException, ParserConfigurationException {
493 | Element tu = getTu(tuid);
494 | tuDb.remove(tuid);
495 |
496 | List tuvs = tu.getChildren("tuv");
497 | Iterator it = tuvs.iterator();
498 | while (it.hasNext()) {
499 | Element tuv = it.next();
500 | String lang = tuv.getAttributeValue("xml:lang");
501 | tuvDb.remove(lang, tuid);
502 |
503 | Element seg = tuv.getChild("seg");
504 | String puretext = TMUtils.extractText(seg);
505 |
506 | int[] ngrams = NGrams.getNGrams(puretext);
507 | NavigableSet> index = fuzzyIndex.getIndex(lang);
508 | for (int i = 0; i < ngrams.length; i++) {
509 | Tuple2 entry = Fun.t2(ngrams[i], tuid);
510 | if (index.contains(entry)) {
511 | index.remove(entry);
512 | }
513 | }
514 | }
515 | }
516 |
517 | }
518 |
--------------------------------------------------------------------------------
/src/com/maxprograms/tmengine/SQLEngine.java:
--------------------------------------------------------------------------------
1 | /*******************************************************************************
2 | * Copyright (c) 2003, 2018 Maxprograms.
3 | *
4 | * This program and the accompanying materials
5 | * are made available under the terms of the Eclipse Public License 1.0
6 | * which accompanies this distribution, and is available at
7 | * https://www.eclipse.org/org/documents/epl-v10.html
8 | *
9 | * Contributors:
10 | * Maxprograms - initial API and implementation
11 | *******************************************************************************/
12 | package com.maxprograms.tmengine;
13 |
14 | import java.io.File;
15 | import java.io.FileOutputStream;
16 | import java.io.IOException;
17 | import java.lang.System.Logger;
18 | import java.lang.System.Logger.Level;
19 | import java.nio.charset.StandardCharsets;
20 | import java.sql.Connection;
21 | import java.sql.DriverManager;
22 | import java.sql.PreparedStatement;
23 | import java.sql.ResultSet;
24 | import java.sql.SQLException;
25 | import java.sql.Statement;
26 | import java.util.Calendar;
27 | import java.util.Collections;
28 | import java.util.Hashtable;
29 | import java.util.Iterator;
30 | import java.util.List;
31 | import java.util.Map;
32 | import java.util.Properties;
33 | import java.util.Set;
34 | import java.util.TreeSet;
35 | import java.util.Vector;
36 |
37 | import javax.xml.parsers.ParserConfigurationException;
38 |
39 | import com.maxprograms.tmutils.TMUtils;
40 | import com.maxprograms.tmx.TMXReader;
41 | import com.maxprograms.xml.Attribute;
42 | import com.maxprograms.xml.Element;
43 | import com.maxprograms.xml.Indenter;
44 |
45 | import org.xml.sax.SAXException;
46 |
47 | public class SQLEngine implements ITmEngine {
48 |
49 | private static final Logger LOGGER = System.getLogger(SQLEngine.class.getName());
50 |
51 | private Connection conn;
52 | private String dbName;
53 | private String serverName;
54 | private int port;
55 | private String userName;
56 | private String password;
57 |
58 | private String currProject;
59 | private String currSubject;
60 | private String currCustomer;
61 | private String creationDate;
62 |
63 | private long next;
64 |
65 | private Set languages;
66 |
67 | private PreparedStatement insertProperties;
68 | private PreparedStatement removeProperties;
69 | private PreparedStatement insertTuv;
70 | private PreparedStatement removeTuv;
71 | private PreparedStatement checkTu;
72 | private PreparedStatement selectProperties;
73 | private PreparedStatement selectSeg;
74 | private PreparedStatement selectPureText;
75 |
76 | private Hashtable insertNgram;
77 | private Hashtable removeNgram;
78 | private Hashtable selectNgram;
79 |
80 | private Set tuAttributes;
81 |
82 | public SQLEngine(String dbName, String serverName, int port, String userName, String password) throws SQLException {
83 | this.dbName = dbName;
84 | this.serverName = serverName;
85 | this.port = port;
86 | this.userName = userName;
87 | this.password = password;
88 | StringBuilder connBuilder = new StringBuilder();
89 | connBuilder.append("jdbc:mariadb://");
90 | connBuilder.append(serverName);
91 | connBuilder.append(':');
92 | connBuilder.append(port);
93 | connBuilder.append('/');
94 | connBuilder.append(dbName);
95 | connBuilder.append("?user=");
96 | connBuilder.append(userName);
97 | connBuilder.append("&password=");
98 | connBuilder.append(password);
99 | try {
100 | conn = DriverManager.getConnection(connBuilder.toString()); // "jdbc:mariadb://localhost:3306/DB?user=root&password=myPassword"
101 | conn.setAutoCommit(false);
102 | } catch (SQLException e) {
103 | createDatabase();
104 | conn = DriverManager.getConnection(connBuilder.toString());
105 | conn.setAutoCommit(false);
106 | LOGGER.log(Level.INFO, "Database " + dbName + " created.");
107 | }
108 | insertNgram = new Hashtable<>();
109 | removeNgram = new Hashtable<>();
110 | selectNgram = new Hashtable<>();
111 | }
112 |
113 | @Override
114 | public String getType() {
115 | return SQLEngine.class.getName();
116 | }
117 |
118 | private void createDatabase() throws SQLException {
119 | StringBuilder serverBuilder = new StringBuilder();
120 | serverBuilder.append("jdbc:mariadb://");
121 | serverBuilder.append(serverName);
122 | serverBuilder.append(':');
123 | serverBuilder.append(port);
124 | serverBuilder.append('/');
125 | Properties prop = new Properties();
126 | prop.setProperty("user", userName);
127 | prop.setProperty("password", password);
128 | prop.setProperty("useUnicode", "true");
129 | prop.setProperty("characterEncoding", StandardCharsets.UTF_8.name());
130 | try (Connection connection = DriverManager.getConnection(serverBuilder.toString(), prop)) {
131 | try (Statement stmt = connection.createStatement()) {
132 | stmt.execute("CREATE DATABASE `" + dbName + "` CHARACTER SET utf8");
133 | stmt.execute("CREATE TABLE `" + dbName + "`.tuv ( tuid VARCHAR(30) NOT NULL,"
134 | + " lang VARCHAR(15) NOT NULL, seg TEXT NOT NULL, pureText TEXT,"
135 | + " PRIMARY KEY (tuid,lang));");
136 | stmt.execute("CREATE TABLE `" + dbName + "`.tuprop ( tuid VARCHAR(30) NOT NULL,"
137 | + " propType VARCHAR(30) NOT NULL, content TEXT, PRIMARY KEY (tuid, propType)" + ");");
138 | stmt.execute("CREATE TABLE `" + dbName + "`.langs ( lang VARCHAR(15) NOT NULL);");
139 | }
140 | }
141 | }
142 |
143 | @Override
144 | public void deleteDatabase() throws SQLException {
145 | close();
146 | StringBuilder serverBuilder = new StringBuilder();
147 | serverBuilder.append("jdbc:mariadb://");
148 | serverBuilder.append(serverName);
149 | serverBuilder.append(':');
150 | serverBuilder.append(port);
151 | serverBuilder.append('/');
152 | Properties prop = new Properties();
153 | prop.setProperty("user", userName);
154 | prop.setProperty("password", password);
155 | prop.setProperty("useUnicode", "true");
156 | prop.setProperty("characterEncoding", StandardCharsets.UTF_8.name());
157 | try (Connection connection = DriverManager.getConnection(serverBuilder.toString(), prop)) {
158 | try (Statement stmt = connection.createStatement()) {
159 | stmt.execute("DROP DATABASE `" + dbName + "`");
160 | }
161 | }
162 | }
163 |
164 | @Override
165 | public void close() throws SQLException {
166 | conn.commit();
167 | if (insertProperties != null) {
168 | insertProperties.close();
169 | }
170 | if (removeProperties != null) {
171 | removeProperties.close();
172 | }
173 | if (selectProperties != null) {
174 | selectProperties.close();
175 | }
176 | if (insertTuv != null) {
177 | insertTuv.close();
178 | }
179 | if (removeTuv != null) {
180 | removeTuv.close();
181 | }
182 | if (checkTu != null) {
183 | checkTu.close();
184 | }
185 | if (selectSeg != null) {
186 | selectSeg.close();
187 | }
188 | if (selectPureText != null) {
189 | selectPureText.close();
190 | }
191 | Set keys = insertNgram.keySet();
192 | Iterator it = keys.iterator();
193 | while (it.hasNext()) {
194 | insertNgram.get(it.next()).close();
195 | }
196 | keys = removeNgram.keySet();
197 | it = keys.iterator();
198 | while (it.hasNext()) {
199 | removeNgram.get(it.next()).close();
200 | }
201 | keys = selectNgram.keySet();
202 | it = keys.iterator();
203 | while (it.hasNext()) {
204 | selectNgram.get(it.next()).close();
205 | }
206 | conn.close();
207 | }
208 |
209 | @Override
210 | public String getName() {
211 | return dbName;
212 | }
213 |
214 | @Override
215 | public int storeTMX(String tmxFile, String project, String customer, String subject)
216 | throws SAXException, IOException, ParserConfigurationException, SQLException {
217 | next = 0l;
218 | if (customer == null) {
219 | customer = "";
220 | }
221 | if (subject == null) {
222 | subject = "";
223 | }
224 | if (project == null) {
225 | project = "";
226 | }
227 |
228 | currProject = project;
229 | currSubject = subject;
230 | currCustomer = customer;
231 | creationDate = TMUtils.creationDate();
232 |
233 | TMXReader reader = new TMXReader(this);
234 | reader.parse(new File(tmxFile).toURI().toURL());
235 | commit();
236 |
237 | return reader.getCount();
238 | }
239 |
240 | @Override
241 | public void exportMemory(String tmxfile, Set langs, String srcLang, Map properties)
242 | throws IOException, SAXException, ParserConfigurationException, SQLException {
243 | if (languages == null) {
244 | getAllLanguages();
245 | }
246 | if (properties == null) {
247 | properties = new Hashtable<>();
248 | }
249 | try (FileOutputStream output = new FileOutputStream(new File(tmxfile))) {
250 | writeHeader(output, srcLang, properties);
251 | writeString(output, " \n");
252 | try (Statement stmt = conn.createStatement()) {
253 | try (ResultSet rs = stmt.executeQuery("SELECT DISTINCT tuid FROM `" + dbName + "`.tuprop")) {
254 | while (rs.next()) {
255 | String tuid = rs.getString(1);
256 | Element tu = getTu(tuid, langs);
257 | Indenter.indent(tu, 3, 2);
258 | writeString(output, " " + tu.toString() + "\n");
259 | }
260 | }
261 | }
262 | writeString(output, " \n");
263 | writeString(output, "");
264 | }
265 | }
266 |
267 | @Override
268 | public void flag(String tuid) throws SQLException {
269 | if (checkPreviousTu(tuid)) {
270 | Hashtable properties = getTuProperies(tuid);
271 | if (!properties.containsKey("x-flag")) {
272 | if (insertProperties == null) {
273 | insertProperties = conn.prepareStatement(
274 | "INSERT INTO `" + dbName + "`.tuprop (tuid, propType, content) VALUES (?,?,?)");
275 | }
276 | insertProperties.setString(1, tuid);
277 | insertProperties.setString(2, "x-flag");
278 | insertProperties.setString(3, "SW-Flag");
279 | insertProperties.execute();
280 | }
281 | }
282 | }
283 |
284 | @Override
285 | public Set getAllClients() throws SQLException {
286 | Set result = Collections.synchronizedSortedSet(new TreeSet<>());
287 | try (Statement stmt = conn.createStatement()) {
288 | try (ResultSet rs = stmt
289 | .executeQuery("SELECT DISTINCT content FROM `" + dbName + "`.tuprop WHERE propType='customer'")) {
290 | while (rs.next()) {
291 | result.add(rs.getNString(1));
292 | }
293 | }
294 | }
295 | return result;
296 | }
297 |
298 | @Override
299 | public Set getAllLanguages() throws SQLException {
300 | if (languages == null) {
301 | languages = Collections.synchronizedSortedSet(new TreeSet<>());
302 | try (Statement stmt = conn.createStatement()) {
303 | try (ResultSet rs = stmt.executeQuery("SELECT lang FROM `" + dbName + "`.langs")) {
304 | while (rs.next()) {
305 | languages.add(rs.getString(1));
306 | }
307 | }
308 | }
309 | }
310 | return languages;
311 | }
312 |
313 | @Override
314 | public Set getAllProjects() throws SQLException {
315 | Set result = Collections.synchronizedSortedSet(new TreeSet<>());
316 | try (Statement stmt = conn.createStatement()) {
317 | try (ResultSet rs = stmt
318 | .executeQuery("SELECT DISTINCT content FROM `" + dbName + "`.tuprop WHERE propType='project'")) {
319 | while (rs.next()) {
320 | result.add(rs.getNString(1));
321 | }
322 | }
323 | }
324 | return result;
325 | }
326 |
327 | @Override
328 | public Set getAllSubjects() throws SQLException {
329 | Set result = Collections.synchronizedSortedSet(new TreeSet<>());
330 | try (Statement stmt = conn.createStatement()) {
331 | try (ResultSet rs = stmt
332 | .executeQuery("SELECT DISTINCT content FROM `" + dbName + "`.tuprop WHERE propType='subject'")) {
333 | while (rs.next()) {
334 | result.add(rs.getNString(1));
335 | }
336 | }
337 | }
338 | return result;
339 | }
340 |
341 | @Override
342 | public List searchTranslation(String searchStr, String srcLang, String tgtLang, int similarity,
343 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException {
344 | List result = new Vector<>();
345 |
346 | int[] ngrams = NGrams.getNGrams(searchStr);
347 | int size = ngrams.length;
348 | if (size == 0) {
349 | return result;
350 | }
351 |
352 | int minLength = searchStr.length() * similarity / 100;
353 | int maxLength = searchStr.length() * (200 - similarity) / 100;
354 |
355 | StringBuilder set = new StringBuilder();
356 | set.append("" + ngrams[0]);
357 | for (int i = 1; i < size; i++) {
358 | set.append("," + ngrams[i]);
359 | }
360 |
361 | Set candidates = Collections.synchronizedSortedSet(new TreeSet<>());
362 | String lowerSearch = searchStr.toLowerCase();
363 |
364 | PreparedStatement stmt = selectNgram.get(srcLang);
365 | if (stmt == null) {
366 | stmt = conn.prepareStatement("SELECT tuid from `" + dbName + "`.matrix_"
367 | + srcLang.replace('-', '_').toLowerCase() + " WHERE ngram in (?) and segSize>=? AND segSize<=?");
368 | selectNgram.put(srcLang, stmt);
369 | }
370 | stmt.setString(1, set.toString());
371 | stmt.setInt(2, minLength);
372 | stmt.setInt(3, maxLength);
373 |
374 | try (ResultSet rs = stmt.executeQuery()) {
375 | while (rs.next()) {
376 | String tuid = rs.getString(1);
377 | candidates.add(tuid);
378 | }
379 | }
380 |
381 | Iterator it = candidates.iterator();
382 | while (it.hasNext()) {
383 | String tuid = it.next();
384 | int distance;
385 | String puretext = getPureText(srcLang, tuid);
386 | if (caseSensitive) {
387 | distance = MatchQuality.similarity(searchStr, puretext);
388 | } else {
389 | distance = MatchQuality.similarity(lowerSearch, puretext.toLowerCase());
390 | }
391 | if (distance >= similarity) {
392 | String targetSeg = getSegText(tgtLang, tuid);
393 | if (targetSeg != null) {
394 | String sourceSeg = getSegText(srcLang, tuid);
395 | Element source = TMUtils.buildTuv(srcLang, sourceSeg);
396 | Element target = TMUtils.buildTuv(tgtLang, targetSeg);
397 | Hashtable properties = getTuProperies(tuid);
398 | Match match = new Match(source, target, distance, dbName, properties);
399 | result.add(match);
400 | }
401 | }
402 | }
403 | return result;
404 | }
405 |
406 | private String getPureText(String lang, String tuid) throws SQLException {
407 | if (selectPureText == null) {
408 | selectPureText = conn.prepareStatement("SELECT pureText FROM `" + dbName + "`.tuv WHERE tuid=? AND lang=?");
409 | }
410 | String pureText = "";
411 | selectPureText.setString(1, tuid);
412 | selectPureText.setString(2, lang);
413 | try (ResultSet rs = selectPureText.executeQuery()) {
414 | while (rs.next()) {
415 | pureText = rs.getNString(1);
416 | }
417 | }
418 | return pureText;
419 | }
420 |
421 | @Override
422 | public List concordanceSearch(String searchStr, String srcLang, int limit, boolean isRegexp,
423 | boolean caseSensitive) throws IOException, SAXException, ParserConfigurationException, SQLException {
424 | Set candidates = Collections.synchronizedSortedSet(new TreeSet<>());
425 | if (isRegexp) {
426 | try (PreparedStatement stmt = conn.prepareStatement(
427 | "SELECT tuid, pureText FROM `" + dbName + "`.tuv WHERE lang=? AND pureText REGEXP ? LIMIT ?")) {
428 | stmt.setString(1, srcLang);
429 | stmt.setString(2, searchStr);
430 | stmt.setInt(3, limit);
431 | try (ResultSet rs = stmt.executeQuery()) {
432 | while (rs.next()) {
433 | candidates.add(rs.getString(1));
434 | }
435 | }
436 | }
437 | } else {
438 | if (!caseSensitive) {
439 | try (PreparedStatement stmt = conn.prepareStatement("SELECT tuid, pureText FROM `" + dbName
440 | + "`.tuv WHERE lang=? AND LOWER(pureText) LIKE ? LIMIT ?")) {
441 | stmt.setString(1, srcLang);
442 | stmt.setString(2, "%" + searchStr.toLowerCase() + "%");
443 | stmt.setInt(3, limit);
444 | try (ResultSet rs = stmt.executeQuery()) {
445 | while (rs.next()) {
446 | candidates.add(rs.getString(1));
447 | }
448 | }
449 | }
450 | } else {
451 | try (PreparedStatement stmt = conn.prepareStatement(
452 | "SELECT tuid, pureText FROM `" + dbName + "`.tuv WHERE lang=? AND pureText LIKE ? LIMIT ?")) {
453 | stmt.setString(1, srcLang);
454 | stmt.setString(2, "%" + searchStr + "%");
455 | stmt.setInt(3, limit);
456 | try (ResultSet rs = stmt.executeQuery()) {
457 | while (rs.next()) {
458 | candidates.add(rs.getString(1));
459 | }
460 | }
461 | }
462 | }
463 | }
464 | List result = new Vector<>();
465 | Iterator it = candidates.iterator();
466 | while (it.hasNext()) {
467 | Element tu = getTu(it.next());
468 | result.add(tu);
469 | }
470 | return result;
471 | }
472 |
473 | @Override
474 | public void storeTu(Element tu) throws IOException, SQLException {
475 | String tuid = tu.getAttributeValue("tuid");
476 | boolean isNew = false;
477 | if (tuid.isEmpty()) {
478 | tuid = nextId();
479 | tu.setAttribute("tuid", tuid);
480 | isNew = true;
481 | }
482 | if (!isNew) {
483 | isNew = checkPreviousTu(tuid);
484 | }
485 | if (tu.getAttributeValue("creationdate").isEmpty()) {
486 | tu.setAttribute("creationdate", creationDate);
487 | }
488 | if (tu.getAttributeValue("creationid").isEmpty()) {
489 | tu.setAttribute("creationid", System.getProperty("user.name"));
490 | }
491 | Hashtable tuProperties = new Hashtable<>();
492 |
493 | List atts = tu.getAttributes();
494 | Iterator at = atts.iterator();
495 | while (at.hasNext()) {
496 | Attribute a = at.next();
497 | tuProperties.put(a.getName(), a.getValue());
498 | }
499 | List properties = tu.getChildren("prop");
500 | Iterator kt = properties.iterator();
501 | while (kt.hasNext()) {
502 | Element prop = kt.next();
503 | tuProperties.put(prop.getAttributeValue("type"), prop.getText());
504 | }
505 | if (currSubject != null && !currSubject.isEmpty()) {
506 | tuProperties.put("subject", currSubject);
507 | }
508 | if (currCustomer != null && !currCustomer.isEmpty()) {
509 | tuProperties.put("customer", currCustomer);
510 | }
511 | if (currProject != null && !currProject.isEmpty()) {
512 | tuProperties.put("project", currProject);
513 | }
514 | List tuvs = tu.getChildren("tuv");
515 | Set tuLangs = Collections.synchronizedSortedSet(new TreeSet<>());
516 |
517 | Iterator it = tuvs.iterator();
518 | while (it.hasNext()) {
519 | Element tuv = it.next();
520 | String lang = TMUtils.normalizeLang(tuv.getAttributeValue("xml:lang"));
521 | if (lang == null) {
522 | // Invalid language code, ignore this tuv
523 | continue;
524 | }
525 | storeLanguage(lang);
526 | if (!isNew) {
527 | removeTuv(lang, tuid);
528 | }
529 | if (!tuLangs.contains(lang)) {
530 | Element seg = tuv.getChild("seg");
531 | String puretext = TMUtils.extractText(seg);
532 | if (puretext.isBlank()) {
533 | continue;
534 | }
535 |
536 | String segText = seg.toString();
537 | segText = segText.substring("".length());
538 | segText = segText.substring(0, segText.length() - "".length());
539 | storeTuv(lang, tuid, puretext, segText);
540 |
541 | int[] ngrams = NGrams.getNGrams(puretext);
542 | storeNgrams(lang, tuid, ngrams, puretext.length());
543 |
544 | tuLangs.add(lang);
545 | }
546 | }
547 | if (!isNew) {
548 | removeTuProperties(tuid);
549 | }
550 | storeTuProperties(tuid, tuProperties);
551 | }
552 |
553 | private boolean checkPreviousTu(String tuid) throws SQLException {
554 | if (checkTu == null) {
555 | checkTu = conn.prepareStatement("SELECT COUNT(*) FROM `" + dbName + "`.tuprop WHERE tuid=?");
556 | }
557 | checkTu.setString(1, tuid);
558 | int count = 0;
559 | try (ResultSet rs = checkTu.executeQuery()) {
560 | while (rs.next()) {
561 | count = rs.getInt(1);
562 | }
563 | }
564 | return count != 0;
565 | }
566 |
567 | private void storeNgrams(String lang, String tuid, int[] ngrams, int segSize) throws SQLException {
568 | PreparedStatement stmt = insertNgram.get(lang);
569 | if (stmt == null) {
570 | stmt = conn.prepareStatement("INSERT INTO `" + dbName + "`.matrix_" + lang.replace('-', '_').toLowerCase()
571 | + " (tuid, ngram, segSize) VALUES (?,?,?)");
572 | insertNgram.put(lang, stmt);
573 | }
574 | stmt.setString(1, tuid);
575 | stmt.setInt(3, segSize);
576 | for (int i = 0; i < ngrams.length; i++) {
577 | stmt.setInt(2, ngrams[i]);
578 | stmt.execute();
579 | }
580 | }
581 |
582 | private void storeTuv(String lang, String tuid, String puretext, String segText) throws SQLException {
583 | if (insertTuv == null) {
584 | insertTuv = conn
585 | .prepareStatement("INSERT INTO `" + dbName + "`.tuv (lang, tuid, seg, pureText ) VALUES (?,?,?,?)");
586 | }
587 | insertTuv.setString(1, lang);
588 | insertTuv.setString(2, tuid);
589 | insertTuv.setNString(3, segText);
590 | insertTuv.setNString(4, puretext);
591 | insertTuv.execute();
592 | }
593 |
594 | private void removeTuProperties(String tuid) throws SQLException {
595 | if (removeProperties == null) {
596 | removeProperties = conn.prepareStatement("DELETE FROM `" + dbName + "`.tuprop WHERE tuid=?");
597 | }
598 | removeProperties.setString(1, tuid);
599 | removeProperties.execute();
600 | }
601 |
602 | private void storeTuProperties(String tuid, Hashtable properties) throws SQLException {
603 | if (insertProperties == null) {
604 | insertProperties = conn
605 | .prepareStatement("INSERT INTO `" + dbName + "`.tuprop (tuid, propType, content) VALUES (?,?,?)");
606 | }
607 | insertProperties.setString(1, tuid);
608 | Set keys = properties.keySet();
609 | Iterator it = keys.iterator();
610 | while (it.hasNext()) {
611 | String prop = it.next();
612 | insertProperties.setString(2, prop);
613 | insertProperties.setNString(3, properties.get(prop));
614 | insertProperties.execute();
615 | }
616 | }
617 |
618 | private void removeTuv(String lang, String tuid) throws SQLException {
619 | if (removeTuv == null) {
620 | removeTuv = conn.prepareStatement("DELETE FROM `" + dbName + "`.tuv WHERE tuid=? AND lang=?");
621 | }
622 | removeTuv.setString(1, tuid);
623 | removeTuv.setString(2, lang);
624 | removeTuv.execute();
625 | PreparedStatement stmt = removeNgram.get(lang);
626 | if (stmt == null) {
627 | stmt = conn.prepareStatement(
628 | "DELETE FROM `" + dbName + "`.matrix_" + lang.replace('-', '_').toLowerCase() + " WHERE tuid=?");
629 | removeNgram.put(lang, stmt);
630 | }
631 | stmt.setString(1, tuid);
632 | stmt.execute();
633 | }
634 |
635 | private void storeLanguage(String lang) throws SQLException {
636 | if (languages == null) {
637 | getAllLanguages();
638 | }
639 | if (!languages.contains(lang)) {
640 | try (PreparedStatement stmt = conn
641 | .prepareStatement("INSERT INTO `" + dbName + "`.langs (lang) VALUES (?)")) {
642 | stmt.setString(1, lang);
643 | stmt.execute();
644 | }
645 | try (Statement stmt = conn.createStatement()) {
646 | stmt.execute("CREATE TABLE `" + dbName + "`.matrix_" + lang.replace('-', '_').toLowerCase() + " ("
647 | + " tuid VARCHAR(30) NOT NULL, ngram INTEGER NOT NULL, segSize INTEGER,"
648 | + " INDEX `ngrams` (`ngram` ASC) VISIBLE, PRIMARY KEY (tuid,ngram));");
649 | }
650 | conn.commit();
651 | languages.add(lang);
652 | }
653 | }
654 |
655 | @Override
656 | public void commit() throws SQLException {
657 | conn.commit();
658 | }
659 |
660 | private Element getTu(String tuid, Set langs)
661 | throws SQLException, SAXException, IOException, ParserConfigurationException {
662 | if (tuAttributes == null) {
663 | tuAttributes = Collections.synchronizedSortedSet(new TreeSet<>());
664 | String[] array = new String[] { "tuid", "o-encoding", "datatype", "usagecount", "lastusagedate",
665 | "creationtool", "creationtoolversion", "creationdate", "creationid", "changedate", "segtype",
666 | "changeid", "o-tmf", "srclang" };
667 | for (int i = 0; i < array.length; i++) {
668 | tuAttributes.add(array[i]);
669 | }
670 | }
671 | Hashtable properties = getTuProperies(tuid);
672 | Element tu = new Element("tu");
673 | Set keys = properties.keySet();
674 | Iterator it = keys.iterator();
675 | while (it.hasNext()) {
676 | String key = it.next();
677 | if (tuAttributes.contains(key)) {
678 | tu.setAttribute(key, properties.get(key));
679 | } else {
680 | Element prop = new Element("prop");
681 | prop.setAttribute("type", key);
682 | prop.setText(properties.get(key));
683 | tu.addContent(prop);
684 | }
685 | }
686 | if (langs.isEmpty()) {
687 | langs = getAllLanguages();
688 | }
689 | it = langs.iterator();
690 | while (it.hasNext()) {
691 | String lang = it.next();
692 | String seg = getSegText(lang, tuid);
693 | if (!seg.isEmpty()) {
694 | Element tuv = TMUtils.buildTuv(lang, seg);
695 | tu.addContent(tuv);
696 | }
697 | }
698 | return tu;
699 | }
700 |
701 | @Override
702 | public Element getTu(String tuid) throws IOException, SAXException, ParserConfigurationException, SQLException {
703 | return getTu(tuid, Collections.synchronizedSortedSet(new TreeSet<>()));
704 | }
705 |
706 | private String getSegText(String lang, String tuid) throws SQLException {
707 | if (selectSeg == null) {
708 | selectSeg = conn.prepareStatement("SELECT seg FROM `" + dbName + "`.tuv WHERE tuid=? AND lang=?");
709 | }
710 | String seg = "";
711 | selectSeg.setString(1, tuid);
712 | selectSeg.setString(2, lang);
713 | try (ResultSet rs = selectSeg.executeQuery()) {
714 | while (rs.next()) {
715 | seg = rs.getNString(1);
716 | }
717 | }
718 | return seg;
719 | }
720 |
721 | private Hashtable getTuProperies(String tuid) throws SQLException {
722 | if (selectProperties == null) {
723 | selectProperties = conn
724 | .prepareStatement("SELECT propType, content FROM `" + dbName + "`.tuprop WHERE tuid=?");
725 | }
726 | selectProperties.setString(1, tuid);
727 | Hashtable properties = new Hashtable<>();
728 | try (ResultSet rs = selectProperties.executeQuery()) {
729 | while (rs.next()) {
730 | properties.put(rs.getString(1), rs.getNString(2));
731 | }
732 | }
733 | return properties;
734 | }
735 |
736 | private String nextId() {
737 | if (next == 0l) {
738 | next = Calendar.getInstance().getTimeInMillis();
739 | }
740 | return "" + next++;
741 | }
742 |
743 | private static void writeHeader(FileOutputStream output, String srcLang, Map properties)
744 | throws IOException {
745 | writeString(output, "\n");
746 | writeString(output,
747 | "\n");
748 | writeString(output, "\n");
749 | writeString(output,
750 | " \n");
756 | } else {
757 | writeString(output, ">\n");
758 | Set keys = properties.keySet();
759 | Iterator it = keys.iterator();
760 | while (it.hasNext()) {
761 | String key = it.next();
762 | writeString(output, " " + properties.get(key) + "\n");
763 | }
764 | writeString(output, " \n");
765 | }
766 | }
767 |
768 | private static void writeString(FileOutputStream output, String string) throws IOException {
769 | output.write(string.getBytes(StandardCharsets.UTF_8));
770 | }
771 |
772 | @Override
773 | public void removeTu(String tuid) throws IOException, SAXException, ParserConfigurationException, SQLException {
774 | Element tu = getTu(tuid);
775 | removeTuProperties(tuid);
776 | List tuvs = tu.getChildren("tuv");
777 |
778 | Iterator it = tuvs.iterator();
779 | while (it.hasNext()) {
780 | Element tuv = it.next();
781 | String lang = tuv.getAttributeValue("xml:lang");
782 | removeTuv(lang, tuid);
783 | }
784 | }
785 | }
786 |
--------------------------------------------------------------------------------