├── .gitignore
├── Tests
├── bootstrap.php
├── testConfig.php
├── Classes
│ ├── XlfTranslatorTest.php
│ └── MicrosoftTranslatorTest.php
└── xlf
│ ├── messages.de.xlf
│ ├── messages.es.xlf
│ ├── messages.en_GB.xlf
│ ├── validators.de.xlf
│ ├── validators.en_GB.xlf
│ └── validators.es.xlf
├── composer.json
├── phpunit.xml.dist
├── LICENSE
├── src
└── Classes
│ ├── SimpleXmlExtended.php
│ ├── MachineTranslator.php
│ ├── MicrosoftTranslator.php
│ └── XlfTranslator.php
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | composer.lock
3 | vendor
4 | nbproject/
5 | testConfig.php
6 |
--------------------------------------------------------------------------------
/Tests/bootstrap.php:
--------------------------------------------------------------------------------
1 | =5.4.0",
14 | "guzzlehttp/guzzle": "~6.0"
15 | },
16 | "require-dev": {
17 | "composer/composer": "1.0.*@dev",
18 | "phpunit/phpunit": "4.8.35"
19 | },
20 | "autoload": {
21 | "psr-4": { "SMACP\\MachineTranslator\\": "src/" }
22 | },
23 | "extra": {
24 | "branch-alias": {
25 | "dev-master": "1.0.x-dev"
26 | }
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/phpunit.xml.dist:
--------------------------------------------------------------------------------
1 |
2 |
3 |
14 |
15 |
16 |
17 | ./Tests
18 |
19 |
20 |
21 |
22 |
23 | performance
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2016 Stuart MacPherson
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Tests/Classes/XlfTranslatorTest.php:
--------------------------------------------------------------------------------
1 | 'ar',
14 | 'ca_ES' => 'ca',
15 | 'cs_CZ' => 'cs',
16 | 'en_GB' => 'en',
17 | 'en_US' => 'en',
18 | 'es_ES' => 'es',
19 | 'he_HE' => 'he',
20 | 'zh_CN' => 'zh-CHS',
21 | 'zh_TW' => 'zh-CHT',
22 | ];
23 |
24 | public function testTranslate()
25 | {
26 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET);
27 | $translator->setLocaleMap($this->localeMap);
28 |
29 | $xlfTranslator = new XlfTranslator();
30 | $xlfTranslator->setTranslator($translator)
31 | ->setSourceLocale('en_GB')
32 | ->setDir(dirname(__FILE__) . '/../xlf/')
33 | ->setMemory(false)
34 | ->setCommit(false)
35 | ->setOutput(true);
36 |
37 | $parsed = $xlfTranslator->translate();
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/Classes/SimpleXmlExtended.php:
--------------------------------------------------------------------------------
1 | ownerDocument;
44 | $node->appendChild($oNode->createCDATASection($str));
45 |
46 | return $this;
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/Classes/MachineTranslator.php:
--------------------------------------------------------------------------------
1 | 'ar',
13 | 'ca_ES' => 'ca',
14 | 'cs_CZ' => 'cs',
15 | 'en_GB' => 'en',
16 | 'en_US' => 'en',
17 | 'es_ES' => 'es',
18 | 'he_HE' => 'he',
19 | 'zh_CN' => 'zh-CHS',
20 | 'zh_TW' => 'zh-CHT',
21 | ];
22 |
23 | public function testSetLocaleMap()
24 | {
25 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET);
26 | $translator->setLocaleMap($this->localeMap);
27 |
28 | $localeMap = $translator->getLocaleMap();
29 |
30 | $this->assertEquals($this->localeMap, $localeMap);
31 | }
32 |
33 | public function testTranslate()
34 | {
35 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET);
36 | $translator->setLocaleMap($this->localeMap);
37 |
38 | $result = $translator->translate('Hello', 'en_GB', 'es_ES');
39 |
40 | $this->assertEquals('Hola', $result);
41 | }
42 |
43 | public function testTranslateRetainPlaceHolders()
44 | {
45 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET);
46 | $translator->setLocaleMap($this->localeMap);
47 |
48 | $result = $translator->translate('Hello %name%', 'en_GB', 'es_ES');
49 |
50 | $this->assertEquals('Hola %name%', $result);
51 | }
52 |
53 | public function testDetectLanguage()
54 | {
55 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET);
56 | $translator->setLocaleMap($this->localeMap);
57 |
58 | $result = $translator->detectLanguage('Hola');
59 |
60 | $this->assertEquals('es', $result);
61 | }
62 |
63 | public function testDetectLanguageAndReturnMyLanguageCode()
64 | {
65 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET);
66 | $translator->setLocaleMap($this->localeMap);
67 |
68 | $result = $translator->detectLanguage('Hola', true);
69 |
70 | $this->assertEquals('es_ES', $result);
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # MachineTranslator
2 | MachineTranslator is a PHP component that uses the Microsoft Translator service to translate strings from one language to another. It is also able to machine translate xliff (.xlf) files. It currently supports the Microsoft service but other api providers may be also be implemented in future (e.g. Google Translate).
3 |
4 | PHP
5 | ----
6 | v5.4.0+
7 |
8 | Installation
9 | ----
10 | Add the following to composer.json to install via composer:
11 | ```composer
12 | "require": {
13 | "smacp/machine-translator": "dev-master"
14 | },
15 | "repositories": [
16 | {
17 | "type": "vcs",
18 | "url": "https://github.com/smacp/machine-translator.git"
19 | }
20 | ]
21 | ```
22 | A client key and secret is required to use Microsoft's Translation service api. Free or paid accounts can be created at [Microsoft Azure](https://azure.microsoft.com).
23 |
24 | MicrosoftTranslator
25 | ----
26 | The MicrosoftTranslator requires a client key and client secret to access Microsoft's service. Example use for translating a string from English to Spanish:
27 |
28 | ```php
29 | use SMACP\MachineTranslator\Classes\MicrosoftTranslator;
30 |
31 | $translator = new MicrosoftTranslator($myMsTranslationClientId, $myMsTranslationClientSecret);
32 | $translated = $translator->translate('Hello %name%', 'en', 'es');
33 | ```
34 |
35 | It is also possible to detect the language of a given string
36 | ```php
37 | $detected = $translator->detectLanguage('Hola');
38 | ```
39 |
40 | XlfTranslator
41 | ----
42 | The XlfTranslator machine translates xliff files found in a given directory. It machine translates files based on a naming convention of catalogue.locale.xlf (e.g. 'messages.ca_ES.xlf'). Example use:
43 |
44 | ```php
45 | use SMACP\MachineTranslator\Classes\MicrosoftTranslator;
46 | use SMACP\MachineTranslator\Classes\XlfTranslator;
47 |
48 | $translator = new MicrosoftTranslator($myMsTranslationClientId, $myMsTranslationClientSecret);
49 | // map my xlf file language codes to Microsoft's :)
50 | $translator->setLocaleMap([
51 | 'ar_SY' => 'ar',
52 | 'ca_ES' => 'ca',
53 | 'cs_CZ' => 'cs',
54 | 'en_GB' => 'en',
55 | 'en_US' => 'en',
56 | 'es_ES' => 'es',
57 | 'no_NO' => 'no',
58 | 'he_HE' => 'he',
59 | 'zh_CN' => 'zh-CHS',
60 | 'zh_TW' => 'zh-CHT',
61 | ]);
62 |
63 | $xlfTranslator = new XlfTranslator();
64 | $xlfTranslator->setTranslator($translator)
65 | ->setSourceLocale('en_GB')
66 | ->setDir('/home/me/xlf/')
67 | ->setOutput(true)
68 | ->translate();
69 | ```
70 | Known issues
71 | ----
72 | Microsoft's free or paid plans for their Translation service are currently subject to word quotas and rate limits. When these constraints are applied to an individual account then the Microsoft service may not honour translation requests.
73 |
74 | License
75 | ----
76 |
77 | MIT
78 |
79 | Todo
80 | ----
81 | Finish tests
--------------------------------------------------------------------------------
/Tests/xlf/messages.de.xlf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message.
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | %s - password reset
15 | %s - password reset
16 |
17 |
18 | 1 hour
19 | 1 hour
20 |
21 |
22 | A paid session is required.
23 | A paid session is required.
24 |
25 |
26 | Welcome back %name%, good to see you again.
27 | Welcome %name%, good to see you again.
28 |
29 |
30 | Already paid?
31 | Already paid?
32 |
33 |
34 | Already registered?
35 | Already registered?
36 |
37 |
38 | Your password must be at least %num_chars% characters long.
39 | Your password must be at least %num_chars% characters long.
40 |
41 |
42 | Back to login
43 | Back to login
44 |
45 |
46 | Click here to choose your new password
47 | Click here to choose your new password
48 |
49 |
50 | Complete the details below
51 | Complete the details below
52 |
53 |
54 | Back
55 | Back
56 |
57 |
58 | Country
59 | Country
60 |
61 |
62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/Tests/xlf/messages.es.xlf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message.
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | %s - password reset
15 | %s - password reset
16 |
17 |
18 | 1 hour
19 | 1 hour
20 |
21 |
22 | A paid session is required.
23 | A paid session is required.
24 |
25 |
26 | Welcome back %name%, good to see you again.
27 | Welcome %name%, good to see you again.
28 |
29 |
30 | Already paid?
31 | Already paid?
32 |
33 |
34 | Already registered?
35 | Already registered?
36 |
37 |
38 | Your password must be at least %num_chars% characters long.
39 | Your password must be at least %num_chars% characters long.
40 |
41 |
42 | Back to login
43 | Back to login
44 |
45 |
46 | Click here to choose your new password
47 | Click here to choose your new password
48 |
49 |
50 | Complete the details below
51 | Complete the details below
52 |
53 |
54 | Back
55 | Back
56 |
57 |
58 | Country
59 | Country
60 |
61 |
62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/Tests/xlf/messages.en_GB.xlf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message.
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | %s - password reset
15 | %s - password reset
16 |
17 |
18 | 1 hour
19 | 1 hour
20 |
21 |
22 | A paid session is required.
23 | A paid session is required.
24 |
25 |
26 | Welcome back %name%, good to see you again.
27 | Welcome %name%, good to see you again.
28 |
29 |
30 | Already paid?
31 | Already paid?
32 |
33 |
34 | Already registered?
35 | Already registered?
36 |
37 |
38 | Your password must be at least %num_chars% characters long.
39 | Your password must be at least %num_chars% characters long.
40 |
41 |
42 | Back to login
43 | Back to login
44 |
45 |
46 | Click here to choose your new password
47 | Click here to choose your new password
48 |
49 |
50 | Complete the details below
51 | Complete the details below
52 |
53 |
54 | Back
55 | Back
56 |
57 |
58 | Country
59 | Country
60 |
61 |
62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/Tests/xlf/validators.de.xlf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message.
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | %s - password reset
15 | %s - password reset
16 |
17 |
18 | 1 hour
19 | 1 hour
20 |
21 |
22 | A paid session is required.
23 | A paid session is required.
24 |
25 |
26 | Welcome back %name%, good to see you again.
27 | Welcome %name%, good to see you again.
28 |
29 |
30 | Already paid?
31 | Already paid?
32 |
33 |
34 | Already registered?
35 | Already registered?
36 |
37 |
38 | Your password must be at least %num_chars% characters long.
39 | Your password must be at least %num_chars% characters long.
40 |
41 |
42 | Back to login
43 | Back to login
44 |
45 |
46 | Click here to choose your new password
47 | Click here to choose your new password
48 |
49 |
50 | Complete the details below
51 | Complete the details below
52 |
53 |
54 | Back
55 | Back
56 |
57 |
58 | Country
59 | Country
60 |
61 |
62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/Tests/xlf/validators.en_GB.xlf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message.
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | %s - password reset
15 | %s - password reset
16 |
17 |
18 | 1 hour
19 | 1 hour
20 |
21 |
22 | A paid session is required.
23 | A paid session is required.
24 |
25 |
26 | Welcome back %name%, good to see you again.
27 | Welcome %name%, good to see you again.
28 |
29 |
30 | Already paid?
31 | Already paid?
32 |
33 |
34 | Already registered?
35 | Already registered?
36 |
37 |
38 | Your password must be at least %num_chars% characters long.
39 | Your password must be at least %num_chars% characters long.
40 |
41 |
42 | Back to login
43 | Back to login
44 |
45 |
46 | Click here to choose your new password
47 | Click here to choose your new password
48 |
49 |
50 | Complete the details below
51 | Complete the details below
52 |
53 |
54 | Back
55 | Back
56 |
57 |
58 | Country
59 | Country
60 |
61 |
62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/Tests/xlf/validators.es.xlf:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message.
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 | %s - password reset
15 | %s - password reset
16 |
17 |
18 | 1 hour
19 | 1 hour
20 |
21 |
22 | A paid session is required.
23 | A paid session is required.
24 |
25 |
26 | Welcome back %name%, good to see you again.
27 | Welcome %name%, good to see you again.
28 |
29 |
30 | Already paid?
31 | Already paid?
32 |
33 |
34 | Already registered?
35 | Already registered?
36 |
37 |
38 | Your password must be at least %num_chars% characters long.
39 | Your password must be at least %num_chars% characters long.
40 |
41 |
42 | Back to login
43 | Back to login
44 |
45 |
46 | Click here to choose your new password
47 | Click here to choose your new password
48 |
49 |
50 | Complete the details below
51 | Complete the details below
52 |
53 |
54 | Back
55 | Back
56 |
57 |
58 | Country
59 | Country
60 |
61 |
62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead.
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/src/Classes/MicrosoftTranslator.php:
--------------------------------------------------------------------------------
1 | 'Arabic',
65 | 'bs-Latn' => 'Bosnian (Latin)',
66 | 'bg' => 'Bulgarian',
67 | 'ca' => 'Catalan',
68 | 'zh-CHS' => 'Chinese Simplified',
69 | 'zh-CHT' => 'Chinese Traditional',
70 | 'hr' => 'Croatian',
71 | 'cs' => 'Czech',
72 | 'da' => 'Danish',
73 | 'nl' => 'Dutch',
74 | 'en' => 'English',
75 | 'et' => 'Estonian',
76 | 'fi' => 'Finnish',
77 | 'fr' => 'French',
78 | 'de' => 'German',
79 | 'el' => 'Greek',
80 | 'ht' => 'Haitian Creole',
81 | 'he' => 'Hebrew',
82 | 'hi' => 'Hindi',
83 | 'mww' => 'Hmong Daw',
84 | 'hu' => 'Hungarian',
85 | 'id' => 'Indonesian',
86 | 'it' => 'Italian',
87 | 'ja' => 'Japanese',
88 | 'sw' => 'Kiswahili',
89 | 'tlh' => 'Klingon',
90 | 'tlh-Qaak' => 'Klingon (pIqaD)',
91 | 'ko' => 'Korean',
92 | 'lv' => 'Latvian',
93 | 'lt' => 'Lithuanian',
94 | 'ms' => 'Malay',
95 | 'mt' => 'Maltese',
96 | 'no' => 'Norwegian',
97 | 'fa' => 'Persian',
98 | 'pl' => 'Polish',
99 | 'pt' => 'Portuguese',
100 | 'otq' => 'Querétaro Otomi',
101 | 'ro' => 'Romanian',
102 | 'ru' => 'Russian',
103 | 'sr-Cyrl' => 'Serbian (Cyrillic)',
104 | 'sr-Latn' => 'Serbian (Latin)',
105 | 'sk' => 'Slovak',
106 | 'sl' => 'Slovenian',
107 | 'es' => 'Spanish',
108 | 'sv' => 'Swedish',
109 | 'th' => 'Thai',
110 | 'tr' => 'Turkish',
111 | 'uk' => 'Ukrainian',
112 | 'ur' => 'Urdu',
113 | 'vi' => 'Vietnamese',
114 | 'cy' => 'Welsh',
115 | 'yua' => 'Yucatec Maya',
116 | ];
117 |
118 | /** @var array */
119 | protected $localeMap = [];
120 |
121 | /**
122 | * Constructor
123 | *
124 | * @param string $cid
125 | * @param string $secret
126 | * @param boolean $decodeHtmlEntities
127 | * @return MicrosoftTranslator
128 | */
129 | public function __construct($cid, $secret, $decodeHtmlEntities = true)
130 | {
131 | $this->clientID = $cid;
132 | $this->clientSecret = $secret;
133 | $this->decodeHtmlEntities = $decodeHtmlEntities;
134 | }
135 |
136 | /**
137 | * Get provider
138 | *
139 | * @return string
140 | */
141 | public function getProvider()
142 | {
143 | return self::PROVIDER;
144 | }
145 |
146 | /**
147 | * Gets locales
148 | *
149 | * @return array
150 | */
151 | public function getLocales()
152 | {
153 | return $this->locales;
154 | }
155 |
156 | /**
157 | * Set localeMap
158 | *
159 | * @param array $localeMap
160 | * @return MicrosoftTranslator
161 | */
162 | public function setLocaleMap(array $localeMap)
163 | {
164 | $this->localeMap = $localeMap;
165 |
166 | return $this;
167 | }
168 |
169 | /**
170 | * Get localeMap
171 | *
172 | * @param array $localeMap
173 | * @return MicrosoftTranslator
174 | */
175 | public function getLocaleMap()
176 | {
177 | return $this->localeMap;
178 | }
179 |
180 | /**
181 | * Attempts to normalise the given language code to a Microsoft translation code
182 | *
183 | * @param string $code
184 | * @return string
185 | */
186 | public function normaliseLanguageCode($code)
187 | {
188 | if (isset($this->locales[$code])) {
189 | return $code;
190 | }
191 |
192 | $locales = array_keys($this->getLocales());
193 |
194 | $localeMap = $this->localeMap;
195 |
196 | if (count($localeMap) > 0) {
197 | return isset($localeMap[$code]) ? $localeMap[$code] : '';
198 | }
199 |
200 | $code = str_replace('_', '-', strtolower($code));
201 | $find = ['-cn', '-tw'];
202 | $replace = ['-chs', '-cht'];
203 | $code = str_replace($find, $replace, $code);
204 |
205 | foreach ($locales as $mLocale) {
206 | if ($code === strtolower($mLocale)) {
207 | return $mLocale;
208 | }
209 | }
210 |
211 | return '';
212 | }
213 |
214 | /**
215 | * Gets an access token for the Microsoft Translator service
216 | *
217 | * @throws Exception
218 | *
219 | * @return string
220 | */
221 | public function getAccessToken()
222 | {
223 | if ($this->accessToken) {
224 | return $this->accessToken;
225 | }
226 |
227 | $url = 'https://api.cognitive.microsoft.com/sts/v1.0/issueToken?Subscription-Key=' . urlencode($this->clientID);
228 | // Get a JWT for the Microsoft Translator API.
229 | $client = new Client();
230 |
231 | $response = $client->post($url);
232 |
233 | $statusCode = $response->getStatusCode();
234 |
235 | if ($statusCode !== self::HTTP_STATUS_CODE_OK) {
236 | throw new Exception('No access token could be obtained.');
237 | }
238 |
239 | $this->accessToken = $response->getBody()->getContents();
240 |
241 | return $this->accessToken;
242 | }
243 |
244 |
245 | /**
246 | * Translates a string
247 | *
248 | * @param string $word The source string to translate
249 | * @param string $from The locale code for the source string
250 | * @param string $to The locale to translate into
251 | * @return string
252 | */
253 | public function translate($word, $from, $to)
254 | {
255 | if (!$word) {
256 | return '';
257 | }
258 |
259 | $from = $this->normaliseLanguageCode($from);
260 | $to = $this->normaliseLanguageCode($to);
261 |
262 | if (!$from || !$to) {
263 | return '';
264 | }
265 |
266 | if ($to === $from) {
267 | return $word;
268 | }
269 |
270 | // extract and preserve placeholders
271 | $extracted = $this->getPlaceholders($word);
272 | $placeholders = [];
273 |
274 | if (count($extracted) > 0) {
275 | $placeholders = $this->createPlaceholdersMap($extracted);
276 | }
277 |
278 | if (count($placeholders) > 0) {
279 | $word = str_replace(array_keys($placeholders), array_values($placeholders), $word);
280 | }
281 |
282 | $url = 'http://api.microsofttranslator.com/V2/Http.svc/Translate?text=' . urlencode($word) . '&from=' . $from . '&to=' . $to;
283 | $access_token = $this->getAccessToken();
284 |
285 | $ch = curl_init();
286 | curl_setopt($ch, CURLOPT_URL, $url);
287 | curl_setopt($ch, CURLOPT_HTTPHEADER, ['Authorization:bearer ' . $access_token]);
288 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
289 | $this->response = curl_exec($ch);
290 |
291 | preg_match_all('/(.*?)<\/string>/s', $this->response, $matches);
292 |
293 | if (isset($matches[2][0])) {
294 | $translated = $matches[2][0];
295 | $translated = count($placeholders) > 0 ? str_replace(array_values($placeholders), array_keys($placeholders), $translated) : $translated;
296 |
297 | // fix any html entity conversion that may have been applied
298 | if ($this->decodeHtmlEntities === true) {
299 | $translated = $this->decodeEntities($translated);
300 | }
301 |
302 | return $translated;
303 | }
304 |
305 | return '';
306 | }
307 |
308 | /**
309 | * Detects the language for the given string
310 | *
311 | * @param string $str
312 | * @param boolean $normaliseLocaleCode
313 | * @return string
314 | */
315 | public function detectLanguage($str, $normaliseLocaleCode = false)
316 | {
317 | $access_token = $this->getAccessToken();
318 |
319 | $url = 'http://api.microsofttranslator.com/V2/Http.svc/Detect?text=' . urlencode($str);
320 |
321 | $ch = curl_init();
322 | curl_setopt($ch, CURLOPT_URL, $url);
323 | curl_setopt($ch, CURLOPT_HTTPHEADER, ['Authorization:bearer ' . $access_token]);
324 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
325 | $this->response = curl_exec($ch);
326 |
327 | preg_match_all('/(.*?)<\/string>/s', $this->response, $matches);
328 |
329 | $result = '';
330 |
331 | if (isset($matches[2][0]) && $matches[2][0]) {
332 | $result = $matches[2][0];
333 | if ($normaliseLocaleCode && $this->localeMap) {
334 | $map = array_flip($this->localeMap);
335 | if (isset($map[$result])) {
336 | $result = $map[$result];
337 | }
338 | }
339 | }
340 |
341 | return $result;
342 | }
343 |
344 | /**
345 | * Gets response
346 | *
347 | * @return string
348 | */
349 | public function getResponse()
350 | {
351 | return $this->response;
352 | }
353 |
354 | /**
355 | * Sets decodeHtmlEntities
356 | *
357 | * @param boolean $decodeHtmlEntities
358 | * @return \SP\TranslationBundle\Classes\MicrosoftTranslator
359 | */
360 | public function setDecodeHtmlEntities($decodeHtmlEntities)
361 | {
362 | $this->decodeHtmlEntities = $decodeHtmlEntities;
363 |
364 | return $this;
365 | }
366 |
367 | /**
368 | * Matches placeholders within a string
369 | *
370 | * @param string $str
371 | * @return array
372 | */
373 | public function getPlaceholders($str)
374 | {
375 | preg_match_all('/%([^%\s]+)%/', $str, $matches);
376 |
377 | return isset($matches[0]) ? $matches[0] : [];
378 | }
379 |
380 | /**
381 | * Creates array of placeholder keys and values
382 | *
383 | * @param array $array
384 | * @return array
385 | */
386 | public function createPlaceholdersMap($array)
387 | {
388 | $result = [];
389 |
390 | foreach ($array as $key => $val) {
391 | $result[$val] = '[[' . ($key+1) . ']]';
392 | }
393 |
394 | return $result;
395 | }
396 |
397 | /**
398 | * Determines whether string is a CDATA string
399 | *
400 | * @param string $str
401 | * @return boolean
402 | */
403 | public function isCdata($str)
404 | {
405 | return strpos($str, ' -1;
406 | }
407 |
408 | /**
409 | * Determines whether string contains HTML tags
410 | *
411 | * @param string $str
412 | * @return boolean
413 | */
414 | public function containsHtml($str)
415 | {
416 | return $str !== strip_tags($str) ? true : false;
417 | }
418 |
419 | /**
420 | * Fixes html encoding anomolies returned by the Microsoft translator service and decodes html entities
421 | *
422 | * @param string $str
423 | * @return string
424 | */
425 | public function decodeEntities($str, $applyCdataTags = false)
426 | {
427 | // fix any odd html entity conversion
428 | $find = ['<', '>'];
429 | $replace = ['<', '>'];
430 | $str = str_replace($find, $replace, $str);
431 |
432 | $str = html_entity_decode($str, ENT_QUOTES, 'UTF-8');
433 |
434 | if ($applyCdataTags === true && !$this->isCdata($str)) {
435 | // wrap in CDATA tags
436 | $str = '';
437 | } else {
438 | // fix any trailing whitespace in CDATA tags
439 | $str = preg_replace('//', "", $str);
440 | }
441 |
442 | return $str;
443 | }
444 | }
445 |
--------------------------------------------------------------------------------
/src/Classes/XlfTranslator.php:
--------------------------------------------------------------------------------
1 | 'machinetranslated',
76 | 'mt_date' => 'datemachinetranslated'
77 | ];
78 |
79 | /** @var boolean */
80 | protected $memory = true;
81 |
82 | /** @var boolean */
83 | protected $output = true;
84 |
85 | /** @var boolean */
86 | protected $outputTranslated = false;
87 |
88 | /**
89 | * Get translator
90 | *
91 | * @return MachineTranslator
92 | */
93 | public function getTranslator()
94 | {
95 | return $this->translator;
96 | }
97 |
98 | /**
99 | * Set translator
100 | *
101 | * @param MachineTranslator $translator
102 | * @return XlfTranslator
103 | */
104 | public function setTranslator(MachineTranslator $translator)
105 | {
106 | $this->translator = $translator;
107 |
108 | return $this;
109 | }
110 |
111 | /**
112 | * Get locales
113 | *
114 | * @return array
115 | */
116 | public function getLocales()
117 | {
118 | return $this->locales;
119 | }
120 |
121 | /**
122 | * Set locales
123 | *
124 | * @param array $locales
125 | * @return XlfTranslator
126 | */
127 | public function setLocales(array $locales)
128 | {
129 | $this->locales = $locales;
130 |
131 | return $this;
132 | }
133 |
134 | /**
135 | * Get excluded locales
136 | *
137 | * @return array
138 | */
139 | public function getExcludedLocales()
140 | {
141 | return $this->excludedLocales;
142 | }
143 |
144 | /**
145 | * Set excludedLocales
146 | *
147 | * @param array $locales
148 | * @return XlfTranslator
149 | */
150 | public function setExcludedLocales(array $locales)
151 | {
152 | $this->excludedLocales = $locales;
153 |
154 | return $this;
155 | }
156 |
157 | /**
158 | * Get dir
159 | *
160 | * @return string
161 | */
162 | public function getDir()
163 | {
164 | return $this->dir;
165 | }
166 |
167 | /**
168 | * Set dir
169 | *
170 | * @param string $dir
171 | * @return XlfTranslator
172 | */
173 | public function setDir($dir)
174 | {
175 | $this->dir = $dir;
176 |
177 | return $this;
178 | }
179 |
180 | /**
181 | * Get sourceLocale
182 | *
183 | * @return string
184 | */
185 | public function getSourceLocale()
186 | {
187 | return $this->sourceLocale;
188 | }
189 |
190 | /**
191 | * Set sourceLocale
192 | *
193 | * @param string $locale
194 | * @return XlfTranslator
195 | */
196 | public function setSourceLocale($locale)
197 | {
198 | $this->sourceLocale = $locale;
199 |
200 | return $this;
201 | }
202 |
203 | /**
204 | * Get catalogues
205 | *
206 | * @return string
207 | */
208 | public function getCatalogues()
209 | {
210 | return $this->catalogues;
211 | }
212 |
213 | /**
214 | * Set catalogues
215 | *
216 | * @param array $catalogues
217 | * @return XlfTranslator
218 | */
219 | public function setCatalogues(array $catalogues)
220 | {
221 | $this->catalogues = $catalogues;
222 |
223 | return $this;
224 | }
225 |
226 | /**
227 | * Get newOnly
228 | *
229 | * @return boolean
230 | */
231 | public function getNewOnly()
232 | {
233 | return $this->newOnly;
234 | }
235 |
236 | /**
237 | * Set newOnly
238 | *
239 | * @param boolean $newOnly
240 | * @return XlfTranslator
241 | */
242 | public function setNewOnly($newOnly)
243 | {
244 | $this->newOnly = $newOnly;
245 |
246 | return $this;
247 | }
248 |
249 | /**
250 | * Get commit
251 | *
252 | * @return boolean
253 | */
254 | public function getCommit()
255 | {
256 | return $this->commit;
257 | }
258 |
259 | /**
260 | * Set commit
261 | *
262 | * @param boolean $commit
263 | * @return XlfTranslator
264 | */
265 | public function setCommit($commit)
266 | {
267 | $this->commit = $commit;
268 |
269 | return $this;
270 | }
271 |
272 | /**
273 | * Get output
274 | *
275 | * @return boolean
276 | */
277 | public function getOutput()
278 | {
279 | return $this->output;
280 | }
281 |
282 | /**
283 | * Set output
284 | *
285 | * @param boolean $output
286 | * @return XlfTranslator
287 | */
288 | public function setOutput($output)
289 | {
290 | $this->output = $output;
291 |
292 | return $this;
293 | }
294 |
295 | /**
296 | * Get outputTanslated
297 | *
298 | * @return boolean
299 | */
300 | public function getOutputTranslated()
301 | {
302 | return $this->outputTranslated;
303 | }
304 |
305 | /**
306 | * Set outputTranslated
307 | *
308 | * @param boolean $outputTranslated
309 | * @return XlfTranslator
310 | */
311 | public function setOutputTranslated($outputTranslated)
312 | {
313 | $this->outputTranslated = $outputTranslated;
314 |
315 | return $this;
316 | }
317 |
318 | /**
319 | * Get memory
320 | *
321 | * @return boolean
322 | */
323 | public function getMemory()
324 | {
325 | return $this->memory;
326 | }
327 |
328 | /**
329 | * Set memory
330 | *
331 | * @param boolean $memory
332 | * @return XlfTranslator
333 | */
334 | public function setMemory($memory)
335 | {
336 | $this->memory = $memory;
337 |
338 | return $this;
339 | }
340 |
341 | /**
342 | * Machine translates
343 | *
344 | * @return XlfTranslator
345 | */
346 | public function translate()
347 | {
348 | $this->parsed = [];
349 | $this->mtFailCount = 0;
350 |
351 | $provider = $this->translator->getProvider();
352 | $catalogues = $this->getCatalogues();
353 | $cataloguesTranslated = [];
354 | $cataloguesSkipped = [];
355 | $localesTranslated = [];
356 | $localesSkipped = [];
357 | $strRequested = 0;
358 | $strTranslated = 0;
359 | $filesWritten = 0;
360 |
361 | if ($this->output) {
362 | echo PHP_EOL;
363 | echo '-----------------------------------------' . PHP_EOL;
364 | echo 'XlfTranslator' . PHP_EOL;
365 | echo '-----------------------------------------' . PHP_EOL;
366 | echo 'MT provider: ' . $provider . PHP_EOL;
367 | echo PHP_EOL;
368 | }
369 |
370 | if ($dh = opendir($this->dir)) {
371 |
372 | if ($this->output) {
373 | echo 'Translating xlf in: ' . $this->dir . PHP_EOL;
374 | echo PHP_EOL;
375 | }
376 |
377 | while (false !== ($filename = readdir($dh))) {
378 | $filePath = $this->dir . $filename;
379 |
380 | if (is_file($filePath)) {
381 | $parts = explode('.', $filename);
382 |
383 | if (count($parts) !== 3) {
384 | throw new Exception('Cannot parse file. Expected file in format catalogue.locale.xlf.');
385 | }
386 |
387 | if (strpos($filePath, '.xlf') < 0) {
388 | throw new Exception('Not a valid xlf file: ' . $filename);
389 | }
390 |
391 | $catalogue = $parts[0];
392 | $locale = $parts[1];
393 |
394 | $i = 0;
395 |
396 | if (!$this->shouldParseCatalogue($catalogue)) {
397 | if (!in_array($catalogue, $cataloguesSkipped)) {
398 | $cataloguesSkipped[] = $catalogue;
399 | }
400 | continue;
401 | }
402 |
403 | if (!$this->shouldParseLocale($locale)) {
404 | if (!in_array($locale, $localesSkipped)) {
405 | $localesSkipped[] = $locale;
406 | }
407 | continue;
408 | }
409 |
410 | if ($this->output) {
411 | echo 'File: ' . $filename . PHP_EOL;
412 | echo 'Catalogue: ' . $catalogue . PHP_EOL;
413 | echo 'Locale: ' . $locale . PHP_EOL;
414 | echo 'MT locale: ' . $this->translator->normaliseLanguageCode($locale) . PHP_EOL;
415 | echo PHP_EOL;
416 | echo 'P: ';
417 | }
418 |
419 | $contents = file_get_contents($filePath);
420 | $xlfData = new SimpleXMLExtended($contents);
421 | $new = [];
422 |
423 | $this->mtFailCount = 0;
424 |
425 | foreach ($xlfData->file->body as $bItem) {
426 | $xlfStrTranslated = 0;
427 |
428 | foreach ($bItem as $bValue) {
429 | if ($this->mtFailCount >= $this->maxMtFailCount) {
430 | // skip to the end as we may have hit the flood limit
431 | continue;
432 | }
433 |
434 | $targetAttributes = $bValue->target->attributes();
435 |
436 | if ($this->newOnly === true && (!isset($targetAttributes['state']) || (string) $targetAttributes['state'] !== 'new')) {
437 | continue;
438 | }
439 |
440 | $source = (string) $bValue->source;
441 | $target = (string) $bValue->target;
442 | $attributes = $bValue->attributes();
443 |
444 | if ($source || ($target && $this->isICU( $target ))) {
445 |
446 | if ($source == '' && $this->isICU( $target )) {
447 | $tokens = $this->parseTokensFromICU( $target );
448 | $resultants = [];
449 | foreach ($tokens as $value) {
450 | $strRequested++;
451 | $resultants[] = $this->translator->translate( $value, $this->sourceLocale, $locale );
452 | }
453 |
454 | foreach ($tokens as &$t) {$t = '{' . $t .'}'; }
455 | foreach ($resultants as &$r) {$r = '{' . $r . '}'; }
456 |
457 | $translated = str_replace( $tokens, $resultants, $target );
458 | }
459 |
460 | elseif ( ! $source) {
461 | continue;
462 | }
463 |
464 | else {
465 | if ($this->memory && isset($attributes[$this->attributes['mt']])) {
466 | continue;
467 | }
468 |
469 | $strRequested++;
470 | $translated = $this->translator->translate($source, $this->sourceLocale, $locale);
471 | }
472 |
473 | if ($translated) {
474 | $new[$i]['source'] = $source;
475 | $new[$i]['target'] = $translated;
476 |
477 | if (!isset($attributes[$this->attributes['mt']])) {
478 | $bValue->addAttribute($this->attributes['mt'], 1);
479 | $bValue->addAttribute($this->attributes['mt_date'], date('Y-m-d H:i:s'));
480 | }
481 |
482 | $bValue->attributes()->{$this->attributes['mt']} = 1;
483 |
484 | if ($this->translator->containsHtml($translated)) {
485 | $bValue->target = null;
486 | $bValue->target->addCData($translated);
487 | } else {
488 | $bValue->target = $translated;
489 | }
490 |
491 | $i++;
492 | $xlfStrTranslated++;
493 | $strTranslated++;
494 |
495 | if ($this->output) {
496 | echo '.';
497 | }
498 | } else {
499 | $this->mtFailCount++;
500 | }
501 | }
502 | }
503 |
504 | if ($this->output) {
505 | if ($xlfStrTranslated === 0) {
506 | echo 'No strings translated';
507 | }
508 | echo PHP_EOL;
509 | echo 'T: ' . $xlfStrTranslated . PHP_EOL;
510 | echo PHP_EOL;
511 | }
512 | }
513 |
514 | if (count($new) > 0) {
515 | if ($this->output && $this->outputTranslated) {
516 | foreach ($new as $key => $row) {
517 | echo '[#' . ($key+1) . '] Source: ' . $row['source'] . PHP_EOL;
518 | echo '[#' . ($key+1) . '] Translated: ' . $row['target'] . PHP_EOL;
519 | }
520 | echo PHP_EOL;
521 | }
522 |
523 | if ($this->commit === true) {
524 | $this->write($xlfData, $filePath);
525 | $filesWritten++;
526 | }
527 |
528 | if (!in_array($catalogue, $cataloguesTranslated)) {
529 | $cataloguesTranslated[] = $catalogue;
530 | }
531 |
532 | $localesTranslated[] = $locale;
533 |
534 | $this->parsed[] = $filename;
535 | }
536 | }
537 | }
538 |
539 | closedir($dh);
540 | }
541 |
542 | if ($this->output) {
543 | echo PHP_EOL;
544 | echo 'Done' . PHP_EOL;
545 | echo '-----------------------------------------' . PHP_EOL;
546 | echo 'Total locales translated: ' . count($localesTranslated) . PHP_EOL;
547 | echo 'Total strings requested: ' . $strRequested . PHP_EOL;
548 | echo 'Total strings translated: ' . $strTranslated . PHP_EOL;
549 | echo 'Catalogues translated: ' . (count($cataloguesTranslated) === 0 ? '0' : implode(', ', $cataloguesTranslated)) . PHP_EOL;
550 |
551 | if ($cataloguesSkipped) {
552 | echo 'Catalogues skipped: ' . implode(', ', $cataloguesSkipped) . PHP_EOL;
553 | }
554 |
555 | if ($localesSkipped) {
556 | echo 'Locales skipped: ' .implode(', ', $localesSkipped) . PHP_EOL;
557 | }
558 |
559 | echo 'xlf updated: ' . $filesWritten . PHP_EOL;
560 | }
561 |
562 | return $this;
563 | }
564 |
565 | /**
566 | * Determines whether catalogue should be parsed
567 | *
568 | * @param string $catalogue
569 | * @return boolean
570 | */
571 | protected function shouldParseCatalogue($catalogue)
572 | {
573 | if (count($this->catalogues) > 0 && !in_array($catalogue, $this->catalogues)) {
574 | return false;
575 | }
576 |
577 | return true;
578 | }
579 |
580 | /**
581 | * Determines whether locale should be parsed
582 | *
583 | * @param string $locale
584 | * @return boolean
585 | */
586 | protected function shouldParseLocale($locale)
587 | {
588 | if (!$this->translator->normaliseLanguageCode($locale)) {
589 | return false;
590 | }
591 |
592 | if ($this->locales && !in_array($locale, $this->locales)) {
593 | return false;
594 | }
595 |
596 | if ($this->excludeLocales && in_array($locale, $this->excludeLocales)) {
597 | return false;
598 | }
599 |
600 | return $this->sourceLocale !== $locale;
601 | }
602 |
603 | /**
604 | * Writes to file
605 | *
606 | * @param SimpleXMLExtended $xmlData
607 | * @param string $file
608 | * @return XlfTranslator
609 | */
610 | protected function write(SimpleXMLExtended $xmlData, $file)
611 | {
612 | $xml = $xmlData->asXML();
613 | $fwh = fopen($file, 'w');
614 | fwrite($fwh, $xml);
615 |
616 | return $this;
617 | }
618 |
619 | protected function parseTokensFromICU( $target ) {
620 | $target = trim( $target );
621 |
622 | if ($target[0] == '{') {
623 | $tokens = []; $stack = 0; $cur = '';
624 |
625 | for($i=0; $i