├── .gitignore ├── Tests ├── bootstrap.php ├── testConfig.php ├── Classes │ ├── XlfTranslatorTest.php │ └── MicrosoftTranslatorTest.php └── xlf │ ├── messages.de.xlf │ ├── messages.es.xlf │ ├── messages.en_GB.xlf │ ├── validators.de.xlf │ ├── validators.en_GB.xlf │ └── validators.es.xlf ├── composer.json ├── phpunit.xml.dist ├── LICENSE ├── src └── Classes │ ├── SimpleXmlExtended.php │ ├── MachineTranslator.php │ ├── MicrosoftTranslator.php │ └── XlfTranslator.php └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | composer.lock 3 | vendor 4 | nbproject/ 5 | testConfig.php 6 | -------------------------------------------------------------------------------- /Tests/bootstrap.php: -------------------------------------------------------------------------------- 1 | =5.4.0", 14 | "guzzlehttp/guzzle": "~6.0" 15 | }, 16 | "require-dev": { 17 | "composer/composer": "1.0.*@dev", 18 | "phpunit/phpunit": "4.8.35" 19 | }, 20 | "autoload": { 21 | "psr-4": { "SMACP\\MachineTranslator\\": "src/" } 22 | }, 23 | "extra": { 24 | "branch-alias": { 25 | "dev-master": "1.0.x-dev" 26 | } 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /phpunit.xml.dist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 14 | 15 | 16 | 17 | ./Tests 18 | 19 | 20 | 21 | 22 | 23 | performance 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 Stuart MacPherson 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Tests/Classes/XlfTranslatorTest.php: -------------------------------------------------------------------------------- 1 | 'ar', 14 | 'ca_ES' => 'ca', 15 | 'cs_CZ' => 'cs', 16 | 'en_GB' => 'en', 17 | 'en_US' => 'en', 18 | 'es_ES' => 'es', 19 | 'he_HE' => 'he', 20 | 'zh_CN' => 'zh-CHS', 21 | 'zh_TW' => 'zh-CHT', 22 | ]; 23 | 24 | public function testTranslate() 25 | { 26 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET); 27 | $translator->setLocaleMap($this->localeMap); 28 | 29 | $xlfTranslator = new XlfTranslator(); 30 | $xlfTranslator->setTranslator($translator) 31 | ->setSourceLocale('en_GB') 32 | ->setDir(dirname(__FILE__) . '/../xlf/') 33 | ->setMemory(false) 34 | ->setCommit(false) 35 | ->setOutput(true); 36 | 37 | $parsed = $xlfTranslator->translate(); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/Classes/SimpleXmlExtended.php: -------------------------------------------------------------------------------- 1 | ownerDocument; 44 | $node->appendChild($oNode->createCDATASection($str)); 45 | 46 | return $this; 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/Classes/MachineTranslator.php: -------------------------------------------------------------------------------- 1 | 'ar', 13 | 'ca_ES' => 'ca', 14 | 'cs_CZ' => 'cs', 15 | 'en_GB' => 'en', 16 | 'en_US' => 'en', 17 | 'es_ES' => 'es', 18 | 'he_HE' => 'he', 19 | 'zh_CN' => 'zh-CHS', 20 | 'zh_TW' => 'zh-CHT', 21 | ]; 22 | 23 | public function testSetLocaleMap() 24 | { 25 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET); 26 | $translator->setLocaleMap($this->localeMap); 27 | 28 | $localeMap = $translator->getLocaleMap(); 29 | 30 | $this->assertEquals($this->localeMap, $localeMap); 31 | } 32 | 33 | public function testTranslate() 34 | { 35 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET); 36 | $translator->setLocaleMap($this->localeMap); 37 | 38 | $result = $translator->translate('Hello', 'en_GB', 'es_ES'); 39 | 40 | $this->assertEquals('Hola', $result); 41 | } 42 | 43 | public function testTranslateRetainPlaceHolders() 44 | { 45 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET); 46 | $translator->setLocaleMap($this->localeMap); 47 | 48 | $result = $translator->translate('Hello %name%', 'en_GB', 'es_ES'); 49 | 50 | $this->assertEquals('Hola %name%', $result); 51 | } 52 | 53 | public function testDetectLanguage() 54 | { 55 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET); 56 | $translator->setLocaleMap($this->localeMap); 57 | 58 | $result = $translator->detectLanguage('Hola'); 59 | 60 | $this->assertEquals('es', $result); 61 | } 62 | 63 | public function testDetectLanguageAndReturnMyLanguageCode() 64 | { 65 | $translator = new MicrosoftTranslator(testConfig::MICROSOFT_KEY, testConfig::MICROSOFT_SECRET); 66 | $translator->setLocaleMap($this->localeMap); 67 | 68 | $result = $translator->detectLanguage('Hola', true); 69 | 70 | $this->assertEquals('es_ES', $result); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MachineTranslator 2 | MachineTranslator is a PHP component that uses the Microsoft Translator service to translate strings from one language to another. It is also able to machine translate xliff (.xlf) files. It currently supports the Microsoft service but other api providers may be also be implemented in future (e.g. Google Translate). 3 | 4 | PHP 5 | ---- 6 | v5.4.0+ 7 | 8 | Installation 9 | ---- 10 | Add the following to composer.json to install via composer: 11 | ```composer 12 | "require": { 13 | "smacp/machine-translator": "dev-master" 14 | }, 15 | "repositories": [ 16 | { 17 | "type": "vcs", 18 | "url": "https://github.com/smacp/machine-translator.git" 19 | } 20 | ] 21 | ``` 22 | A client key and secret is required to use Microsoft's Translation service api. Free or paid accounts can be created at [Microsoft Azure](https://azure.microsoft.com). 23 | 24 | MicrosoftTranslator 25 | ---- 26 | The MicrosoftTranslator requires a client key and client secret to access Microsoft's service. Example use for translating a string from English to Spanish: 27 | 28 | ```php 29 | use SMACP\MachineTranslator\Classes\MicrosoftTranslator; 30 | 31 | $translator = new MicrosoftTranslator($myMsTranslationClientId, $myMsTranslationClientSecret); 32 | $translated = $translator->translate('Hello %name%', 'en', 'es'); 33 | ``` 34 | 35 | It is also possible to detect the language of a given string 36 | ```php 37 | $detected = $translator->detectLanguage('Hola'); 38 | ``` 39 | 40 | XlfTranslator 41 | ---- 42 | The XlfTranslator machine translates xliff files found in a given directory. It machine translates files based on a naming convention of catalogue.locale.xlf (e.g. 'messages.ca_ES.xlf'). Example use: 43 | 44 | ```php 45 | use SMACP\MachineTranslator\Classes\MicrosoftTranslator; 46 | use SMACP\MachineTranslator\Classes\XlfTranslator; 47 | 48 | $translator = new MicrosoftTranslator($myMsTranslationClientId, $myMsTranslationClientSecret); 49 | // map my xlf file language codes to Microsoft's :) 50 | $translator->setLocaleMap([ 51 | 'ar_SY' => 'ar', 52 | 'ca_ES' => 'ca', 53 | 'cs_CZ' => 'cs', 54 | 'en_GB' => 'en', 55 | 'en_US' => 'en', 56 | 'es_ES' => 'es', 57 | 'no_NO' => 'no', 58 | 'he_HE' => 'he', 59 | 'zh_CN' => 'zh-CHS', 60 | 'zh_TW' => 'zh-CHT', 61 | ]); 62 | 63 | $xlfTranslator = new XlfTranslator(); 64 | $xlfTranslator->setTranslator($translator) 65 | ->setSourceLocale('en_GB') 66 | ->setDir('/home/me/xlf/') 67 | ->setOutput(true) 68 | ->translate(); 69 | ``` 70 | Known issues 71 | ---- 72 | Microsoft's free or paid plans for their Translation service are currently subject to word quotas and rate limits. When these constraints are applied to an individual account then the Microsoft service may not honour translation requests. 73 | 74 | License 75 | ---- 76 | 77 | MIT 78 | 79 | Todo 80 | ---- 81 | Finish tests -------------------------------------------------------------------------------- /Tests/xlf/messages.de.xlf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message. 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | %s - password reset 15 | %s - password reset 16 | 17 | 18 | 1 hour 19 | 1 hour 20 | 21 | 22 | A paid session is required. 23 | A paid session is required. 24 | 25 | 26 | Welcome back %name%, good to see you again. 27 | Welcome %name%, good to see you again. 28 | 29 | 30 | Already paid? 31 | Already paid? 32 | 33 | 34 | Already registered? 35 | Already registered? 36 | 37 | 38 | Your password must be at least %num_chars% characters long. 39 | Your password must be at least %num_chars% characters long. 40 | 41 | 42 | Back to login 43 | Back to login 44 | 45 | 46 | Click here to choose your new password 47 | Click here to choose your new password 48 | 49 | 50 | Complete the details below 51 | Complete the details below 52 | 53 | 54 | Back 55 | Back 56 | 57 | 58 | Country 59 | Country 60 | 61 | 62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 64 | 65 | 66 |
67 |
68 | -------------------------------------------------------------------------------- /Tests/xlf/messages.es.xlf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message. 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | %s - password reset 15 | %s - password reset 16 | 17 | 18 | 1 hour 19 | 1 hour 20 | 21 | 22 | A paid session is required. 23 | A paid session is required. 24 | 25 | 26 | Welcome back %name%, good to see you again. 27 | Welcome %name%, good to see you again. 28 | 29 | 30 | Already paid? 31 | Already paid? 32 | 33 | 34 | Already registered? 35 | Already registered? 36 | 37 | 38 | Your password must be at least %num_chars% characters long. 39 | Your password must be at least %num_chars% characters long. 40 | 41 | 42 | Back to login 43 | Back to login 44 | 45 | 46 | Click here to choose your new password 47 | Click here to choose your new password 48 | 49 | 50 | Complete the details below 51 | Complete the details below 52 | 53 | 54 | Back 55 | Back 56 | 57 | 58 | Country 59 | Country 60 | 61 | 62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 64 | 65 | 66 |
67 |
68 | -------------------------------------------------------------------------------- /Tests/xlf/messages.en_GB.xlf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message. 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | %s - password reset 15 | %s - password reset 16 | 17 | 18 | 1 hour 19 | 1 hour 20 | 21 | 22 | A paid session is required. 23 | A paid session is required. 24 | 25 | 26 | Welcome back %name%, good to see you again. 27 | Welcome %name%, good to see you again. 28 | 29 | 30 | Already paid? 31 | Already paid? 32 | 33 | 34 | Already registered? 35 | Already registered? 36 | 37 | 38 | Your password must be at least %num_chars% characters long. 39 | Your password must be at least %num_chars% characters long. 40 | 41 | 42 | Back to login 43 | Back to login 44 | 45 | 46 | Click here to choose your new password 47 | Click here to choose your new password 48 | 49 | 50 | Complete the details below 51 | Complete the details below 52 | 53 | 54 | Back 55 | Back 56 | 57 | 58 | Country 59 | Country 60 | 61 | 62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 64 | 65 | 66 |
67 |
68 | -------------------------------------------------------------------------------- /Tests/xlf/validators.de.xlf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message. 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | %s - password reset 15 | %s - password reset 16 | 17 | 18 | 1 hour 19 | 1 hour 20 | 21 | 22 | A paid session is required. 23 | A paid session is required. 24 | 25 | 26 | Welcome back %name%, good to see you again. 27 | Welcome %name%, good to see you again. 28 | 29 | 30 | Already paid? 31 | Already paid? 32 | 33 | 34 | Already registered? 35 | Already registered? 36 | 37 | 38 | Your password must be at least %num_chars% characters long. 39 | Your password must be at least %num_chars% characters long. 40 | 41 | 42 | Back to login 43 | Back to login 44 | 45 | 46 | Click here to choose your new password 47 | Click here to choose your new password 48 | 49 | 50 | Complete the details below 51 | Complete the details below 52 | 53 | 54 | Back 55 | Back 56 | 57 | 58 | Country 59 | Country 60 | 61 | 62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 64 | 65 | 66 |
67 |
68 | -------------------------------------------------------------------------------- /Tests/xlf/validators.en_GB.xlf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message. 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | %s - password reset 15 | %s - password reset 16 | 17 | 18 | 1 hour 19 | 1 hour 20 | 21 | 22 | A paid session is required. 23 | A paid session is required. 24 | 25 | 26 | Welcome back %name%, good to see you again. 27 | Welcome %name%, good to see you again. 28 | 29 | 30 | Already paid? 31 | Already paid? 32 | 33 | 34 | Already registered? 35 | Already registered? 36 | 37 | 38 | Your password must be at least %num_chars% characters long. 39 | Your password must be at least %num_chars% characters long. 40 | 41 | 42 | Back to login 43 | Back to login 44 | 45 | 46 | Click here to choose your new password 47 | Click here to choose your new password 48 | 49 | 50 | Complete the details below 51 | Complete the details below 52 | 53 | 54 | Back 55 | Back 56 | 57 | 58 | Country 59 | Country 60 | 61 | 62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 64 | 65 | 66 |
67 |
68 | -------------------------------------------------------------------------------- /Tests/xlf/validators.es.xlf: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 |
5 | 6 | The source node in most cases contains the sample message as written by the developer. If it looks like a dot-delimitted string such as "form.label.firstname", then the developer has not provided a default message. 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | %s - password reset 15 | %s - password reset 16 | 17 | 18 | 1 hour 19 | 1 hour 20 | 21 | 22 | A paid session is required. 23 | A paid session is required. 24 | 25 | 26 | Welcome back %name%, good to see you again. 27 | Welcome %name%, good to see you again. 28 | 29 | 30 | Already paid? 31 | Already paid? 32 | 33 | 34 | Already registered? 35 | Already registered? 36 | 37 | 38 | Your password must be at least %num_chars% characters long. 39 | Your password must be at least %num_chars% characters long. 40 | 41 | 42 | Back to login 43 | Back to login 44 | 45 | 46 | Click here to choose your new password 47 | Click here to choose your new password 48 | 49 | 50 | Complete the details below 51 | Complete the details below 52 | 53 | 54 | Back 55 | Back 56 | 57 | 58 | Country 59 | Country 60 | 61 | 62 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 63 | Why not relax in the tranquil surroundings of our medieval garden and enjoy a glass of mead. 64 | 65 | 66 |
67 |
68 | -------------------------------------------------------------------------------- /src/Classes/MicrosoftTranslator.php: -------------------------------------------------------------------------------- 1 | 'Arabic', 65 | 'bs-Latn' => 'Bosnian (Latin)', 66 | 'bg' => 'Bulgarian', 67 | 'ca' => 'Catalan', 68 | 'zh-CHS' => 'Chinese Simplified', 69 | 'zh-CHT' => 'Chinese Traditional', 70 | 'hr' => 'Croatian', 71 | 'cs' => 'Czech', 72 | 'da' => 'Danish', 73 | 'nl' => 'Dutch', 74 | 'en' => 'English', 75 | 'et' => 'Estonian', 76 | 'fi' => 'Finnish', 77 | 'fr' => 'French', 78 | 'de' => 'German', 79 | 'el' => 'Greek', 80 | 'ht' => 'Haitian Creole', 81 | 'he' => 'Hebrew', 82 | 'hi' => 'Hindi', 83 | 'mww' => 'Hmong Daw', 84 | 'hu' => 'Hungarian', 85 | 'id' => 'Indonesian', 86 | 'it' => 'Italian', 87 | 'ja' => 'Japanese', 88 | 'sw' => 'Kiswahili', 89 | 'tlh' => 'Klingon', 90 | 'tlh-Qaak' => 'Klingon (pIqaD)', 91 | 'ko' => 'Korean', 92 | 'lv' => 'Latvian', 93 | 'lt' => 'Lithuanian', 94 | 'ms' => 'Malay', 95 | 'mt' => 'Maltese', 96 | 'no' => 'Norwegian', 97 | 'fa' => 'Persian', 98 | 'pl' => 'Polish', 99 | 'pt' => 'Portuguese', 100 | 'otq' => 'Querétaro Otomi', 101 | 'ro' => 'Romanian', 102 | 'ru' => 'Russian', 103 | 'sr-Cyrl' => 'Serbian (Cyrillic)', 104 | 'sr-Latn' => 'Serbian (Latin)', 105 | 'sk' => 'Slovak', 106 | 'sl' => 'Slovenian', 107 | 'es' => 'Spanish', 108 | 'sv' => 'Swedish', 109 | 'th' => 'Thai', 110 | 'tr' => 'Turkish', 111 | 'uk' => 'Ukrainian', 112 | 'ur' => 'Urdu', 113 | 'vi' => 'Vietnamese', 114 | 'cy' => 'Welsh', 115 | 'yua' => 'Yucatec Maya', 116 | ]; 117 | 118 | /** @var array */ 119 | protected $localeMap = []; 120 | 121 | /** 122 | * Constructor 123 | * 124 | * @param string $cid 125 | * @param string $secret 126 | * @param boolean $decodeHtmlEntities 127 | * @return MicrosoftTranslator 128 | */ 129 | public function __construct($cid, $secret, $decodeHtmlEntities = true) 130 | { 131 | $this->clientID = $cid; 132 | $this->clientSecret = $secret; 133 | $this->decodeHtmlEntities = $decodeHtmlEntities; 134 | } 135 | 136 | /** 137 | * Get provider 138 | * 139 | * @return string 140 | */ 141 | public function getProvider() 142 | { 143 | return self::PROVIDER; 144 | } 145 | 146 | /** 147 | * Gets locales 148 | * 149 | * @return array 150 | */ 151 | public function getLocales() 152 | { 153 | return $this->locales; 154 | } 155 | 156 | /** 157 | * Set localeMap 158 | * 159 | * @param array $localeMap 160 | * @return MicrosoftTranslator 161 | */ 162 | public function setLocaleMap(array $localeMap) 163 | { 164 | $this->localeMap = $localeMap; 165 | 166 | return $this; 167 | } 168 | 169 | /** 170 | * Get localeMap 171 | * 172 | * @param array $localeMap 173 | * @return MicrosoftTranslator 174 | */ 175 | public function getLocaleMap() 176 | { 177 | return $this->localeMap; 178 | } 179 | 180 | /** 181 | * Attempts to normalise the given language code to a Microsoft translation code 182 | * 183 | * @param string $code 184 | * @return string 185 | */ 186 | public function normaliseLanguageCode($code) 187 | { 188 | if (isset($this->locales[$code])) { 189 | return $code; 190 | } 191 | 192 | $locales = array_keys($this->getLocales()); 193 | 194 | $localeMap = $this->localeMap; 195 | 196 | if (count($localeMap) > 0) { 197 | return isset($localeMap[$code]) ? $localeMap[$code] : ''; 198 | } 199 | 200 | $code = str_replace('_', '-', strtolower($code)); 201 | $find = ['-cn', '-tw']; 202 | $replace = ['-chs', '-cht']; 203 | $code = str_replace($find, $replace, $code); 204 | 205 | foreach ($locales as $mLocale) { 206 | if ($code === strtolower($mLocale)) { 207 | return $mLocale; 208 | } 209 | } 210 | 211 | return ''; 212 | } 213 | 214 | /** 215 | * Gets an access token for the Microsoft Translator service 216 | * 217 | * @throws Exception 218 | * 219 | * @return string 220 | */ 221 | public function getAccessToken() 222 | { 223 | if ($this->accessToken) { 224 | return $this->accessToken; 225 | } 226 | 227 | $url = 'https://api.cognitive.microsoft.com/sts/v1.0/issueToken?Subscription-Key=' . urlencode($this->clientID); 228 | // Get a JWT for the Microsoft Translator API. 229 | $client = new Client(); 230 | 231 | $response = $client->post($url); 232 | 233 | $statusCode = $response->getStatusCode(); 234 | 235 | if ($statusCode !== self::HTTP_STATUS_CODE_OK) { 236 | throw new Exception('No access token could be obtained.'); 237 | } 238 | 239 | $this->accessToken = $response->getBody()->getContents(); 240 | 241 | return $this->accessToken; 242 | } 243 | 244 | 245 | /** 246 | * Translates a string 247 | * 248 | * @param string $word The source string to translate 249 | * @param string $from The locale code for the source string 250 | * @param string $to The locale to translate into 251 | * @return string 252 | */ 253 | public function translate($word, $from, $to) 254 | { 255 | if (!$word) { 256 | return ''; 257 | } 258 | 259 | $from = $this->normaliseLanguageCode($from); 260 | $to = $this->normaliseLanguageCode($to); 261 | 262 | if (!$from || !$to) { 263 | return ''; 264 | } 265 | 266 | if ($to === $from) { 267 | return $word; 268 | } 269 | 270 | // extract and preserve placeholders 271 | $extracted = $this->getPlaceholders($word); 272 | $placeholders = []; 273 | 274 | if (count($extracted) > 0) { 275 | $placeholders = $this->createPlaceholdersMap($extracted); 276 | } 277 | 278 | if (count($placeholders) > 0) { 279 | $word = str_replace(array_keys($placeholders), array_values($placeholders), $word); 280 | } 281 | 282 | $url = 'http://api.microsofttranslator.com/V2/Http.svc/Translate?text=' . urlencode($word) . '&from=' . $from . '&to=' . $to; 283 | $access_token = $this->getAccessToken(); 284 | 285 | $ch = curl_init(); 286 | curl_setopt($ch, CURLOPT_URL, $url); 287 | curl_setopt($ch, CURLOPT_HTTPHEADER, ['Authorization:bearer ' . $access_token]); 288 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 289 | $this->response = curl_exec($ch); 290 | 291 | preg_match_all('/(.*?)<\/string>/s', $this->response, $matches); 292 | 293 | if (isset($matches[2][0])) { 294 | $translated = $matches[2][0]; 295 | $translated = count($placeholders) > 0 ? str_replace(array_values($placeholders), array_keys($placeholders), $translated) : $translated; 296 | 297 | // fix any html entity conversion that may have been applied 298 | if ($this->decodeHtmlEntities === true) { 299 | $translated = $this->decodeEntities($translated); 300 | } 301 | 302 | return $translated; 303 | } 304 | 305 | return ''; 306 | } 307 | 308 | /** 309 | * Detects the language for the given string 310 | * 311 | * @param string $str 312 | * @param boolean $normaliseLocaleCode 313 | * @return string 314 | */ 315 | public function detectLanguage($str, $normaliseLocaleCode = false) 316 | { 317 | $access_token = $this->getAccessToken(); 318 | 319 | $url = 'http://api.microsofttranslator.com/V2/Http.svc/Detect?text=' . urlencode($str); 320 | 321 | $ch = curl_init(); 322 | curl_setopt($ch, CURLOPT_URL, $url); 323 | curl_setopt($ch, CURLOPT_HTTPHEADER, ['Authorization:bearer ' . $access_token]); 324 | curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); 325 | $this->response = curl_exec($ch); 326 | 327 | preg_match_all('/(.*?)<\/string>/s', $this->response, $matches); 328 | 329 | $result = ''; 330 | 331 | if (isset($matches[2][0]) && $matches[2][0]) { 332 | $result = $matches[2][0]; 333 | if ($normaliseLocaleCode && $this->localeMap) { 334 | $map = array_flip($this->localeMap); 335 | if (isset($map[$result])) { 336 | $result = $map[$result]; 337 | } 338 | } 339 | } 340 | 341 | return $result; 342 | } 343 | 344 | /** 345 | * Gets response 346 | * 347 | * @return string 348 | */ 349 | public function getResponse() 350 | { 351 | return $this->response; 352 | } 353 | 354 | /** 355 | * Sets decodeHtmlEntities 356 | * 357 | * @param boolean $decodeHtmlEntities 358 | * @return \SP\TranslationBundle\Classes\MicrosoftTranslator 359 | */ 360 | public function setDecodeHtmlEntities($decodeHtmlEntities) 361 | { 362 | $this->decodeHtmlEntities = $decodeHtmlEntities; 363 | 364 | return $this; 365 | } 366 | 367 | /** 368 | * Matches placeholders within a string 369 | * 370 | * @param string $str 371 | * @return array 372 | */ 373 | public function getPlaceholders($str) 374 | { 375 | preg_match_all('/%([^%\s]+)%/', $str, $matches); 376 | 377 | return isset($matches[0]) ? $matches[0] : []; 378 | } 379 | 380 | /** 381 | * Creates array of placeholder keys and values 382 | * 383 | * @param array $array 384 | * @return array 385 | */ 386 | public function createPlaceholdersMap($array) 387 | { 388 | $result = []; 389 | 390 | foreach ($array as $key => $val) { 391 | $result[$val] = '[[' . ($key+1) . ']]'; 392 | } 393 | 394 | return $result; 395 | } 396 | 397 | /** 398 | * Determines whether string is a CDATA string 399 | * 400 | * @param string $str 401 | * @return boolean 402 | */ 403 | public function isCdata($str) 404 | { 405 | return strpos($str, ' -1; 406 | } 407 | 408 | /** 409 | * Determines whether string contains HTML tags 410 | * 411 | * @param string $str 412 | * @return boolean 413 | */ 414 | public function containsHtml($str) 415 | { 416 | return $str !== strip_tags($str) ? true : false; 417 | } 418 | 419 | /** 420 | * Fixes html encoding anomolies returned by the Microsoft translator service and decodes html entities 421 | * 422 | * @param string $str 423 | * @return string 424 | */ 425 | public function decodeEntities($str, $applyCdataTags = false) 426 | { 427 | // fix any odd html entity conversion 428 | $find = ['&lt;', '&gt;']; 429 | $replace = ['<', '>']; 430 | $str = str_replace($find, $replace, $str); 431 | 432 | $str = html_entity_decode($str, ENT_QUOTES, 'UTF-8'); 433 | 434 | if ($applyCdataTags === true && !$this->isCdata($str)) { 435 | // wrap in CDATA tags 436 | $str = ''; 437 | } else { 438 | // fix any trailing whitespace in CDATA tags 439 | $str = preg_replace('//', "", $str); 440 | } 441 | 442 | return $str; 443 | } 444 | } 445 | -------------------------------------------------------------------------------- /src/Classes/XlfTranslator.php: -------------------------------------------------------------------------------- 1 | 'machinetranslated', 76 | 'mt_date' => 'datemachinetranslated' 77 | ]; 78 | 79 | /** @var boolean */ 80 | protected $memory = true; 81 | 82 | /** @var boolean */ 83 | protected $output = true; 84 | 85 | /** @var boolean */ 86 | protected $outputTranslated = false; 87 | 88 | /** 89 | * Get translator 90 | * 91 | * @return MachineTranslator 92 | */ 93 | public function getTranslator() 94 | { 95 | return $this->translator; 96 | } 97 | 98 | /** 99 | * Set translator 100 | * 101 | * @param MachineTranslator $translator 102 | * @return XlfTranslator 103 | */ 104 | public function setTranslator(MachineTranslator $translator) 105 | { 106 | $this->translator = $translator; 107 | 108 | return $this; 109 | } 110 | 111 | /** 112 | * Get locales 113 | * 114 | * @return array 115 | */ 116 | public function getLocales() 117 | { 118 | return $this->locales; 119 | } 120 | 121 | /** 122 | * Set locales 123 | * 124 | * @param array $locales 125 | * @return XlfTranslator 126 | */ 127 | public function setLocales(array $locales) 128 | { 129 | $this->locales = $locales; 130 | 131 | return $this; 132 | } 133 | 134 | /** 135 | * Get excluded locales 136 | * 137 | * @return array 138 | */ 139 | public function getExcludedLocales() 140 | { 141 | return $this->excludedLocales; 142 | } 143 | 144 | /** 145 | * Set excludedLocales 146 | * 147 | * @param array $locales 148 | * @return XlfTranslator 149 | */ 150 | public function setExcludedLocales(array $locales) 151 | { 152 | $this->excludedLocales = $locales; 153 | 154 | return $this; 155 | } 156 | 157 | /** 158 | * Get dir 159 | * 160 | * @return string 161 | */ 162 | public function getDir() 163 | { 164 | return $this->dir; 165 | } 166 | 167 | /** 168 | * Set dir 169 | * 170 | * @param string $dir 171 | * @return XlfTranslator 172 | */ 173 | public function setDir($dir) 174 | { 175 | $this->dir = $dir; 176 | 177 | return $this; 178 | } 179 | 180 | /** 181 | * Get sourceLocale 182 | * 183 | * @return string 184 | */ 185 | public function getSourceLocale() 186 | { 187 | return $this->sourceLocale; 188 | } 189 | 190 | /** 191 | * Set sourceLocale 192 | * 193 | * @param string $locale 194 | * @return XlfTranslator 195 | */ 196 | public function setSourceLocale($locale) 197 | { 198 | $this->sourceLocale = $locale; 199 | 200 | return $this; 201 | } 202 | 203 | /** 204 | * Get catalogues 205 | * 206 | * @return string 207 | */ 208 | public function getCatalogues() 209 | { 210 | return $this->catalogues; 211 | } 212 | 213 | /** 214 | * Set catalogues 215 | * 216 | * @param array $catalogues 217 | * @return XlfTranslator 218 | */ 219 | public function setCatalogues(array $catalogues) 220 | { 221 | $this->catalogues = $catalogues; 222 | 223 | return $this; 224 | } 225 | 226 | /** 227 | * Get newOnly 228 | * 229 | * @return boolean 230 | */ 231 | public function getNewOnly() 232 | { 233 | return $this->newOnly; 234 | } 235 | 236 | /** 237 | * Set newOnly 238 | * 239 | * @param boolean $newOnly 240 | * @return XlfTranslator 241 | */ 242 | public function setNewOnly($newOnly) 243 | { 244 | $this->newOnly = $newOnly; 245 | 246 | return $this; 247 | } 248 | 249 | /** 250 | * Get commit 251 | * 252 | * @return boolean 253 | */ 254 | public function getCommit() 255 | { 256 | return $this->commit; 257 | } 258 | 259 | /** 260 | * Set commit 261 | * 262 | * @param boolean $commit 263 | * @return XlfTranslator 264 | */ 265 | public function setCommit($commit) 266 | { 267 | $this->commit = $commit; 268 | 269 | return $this; 270 | } 271 | 272 | /** 273 | * Get output 274 | * 275 | * @return boolean 276 | */ 277 | public function getOutput() 278 | { 279 | return $this->output; 280 | } 281 | 282 | /** 283 | * Set output 284 | * 285 | * @param boolean $output 286 | * @return XlfTranslator 287 | */ 288 | public function setOutput($output) 289 | { 290 | $this->output = $output; 291 | 292 | return $this; 293 | } 294 | 295 | /** 296 | * Get outputTanslated 297 | * 298 | * @return boolean 299 | */ 300 | public function getOutputTranslated() 301 | { 302 | return $this->outputTranslated; 303 | } 304 | 305 | /** 306 | * Set outputTranslated 307 | * 308 | * @param boolean $outputTranslated 309 | * @return XlfTranslator 310 | */ 311 | public function setOutputTranslated($outputTranslated) 312 | { 313 | $this->outputTranslated = $outputTranslated; 314 | 315 | return $this; 316 | } 317 | 318 | /** 319 | * Get memory 320 | * 321 | * @return boolean 322 | */ 323 | public function getMemory() 324 | { 325 | return $this->memory; 326 | } 327 | 328 | /** 329 | * Set memory 330 | * 331 | * @param boolean $memory 332 | * @return XlfTranslator 333 | */ 334 | public function setMemory($memory) 335 | { 336 | $this->memory = $memory; 337 | 338 | return $this; 339 | } 340 | 341 | /** 342 | * Machine translates 343 | * 344 | * @return XlfTranslator 345 | */ 346 | public function translate() 347 | { 348 | $this->parsed = []; 349 | $this->mtFailCount = 0; 350 | 351 | $provider = $this->translator->getProvider(); 352 | $catalogues = $this->getCatalogues(); 353 | $cataloguesTranslated = []; 354 | $cataloguesSkipped = []; 355 | $localesTranslated = []; 356 | $localesSkipped = []; 357 | $strRequested = 0; 358 | $strTranslated = 0; 359 | $filesWritten = 0; 360 | 361 | if ($this->output) { 362 | echo PHP_EOL; 363 | echo '-----------------------------------------' . PHP_EOL; 364 | echo 'XlfTranslator' . PHP_EOL; 365 | echo '-----------------------------------------' . PHP_EOL; 366 | echo 'MT provider: ' . $provider . PHP_EOL; 367 | echo PHP_EOL; 368 | } 369 | 370 | if ($dh = opendir($this->dir)) { 371 | 372 | if ($this->output) { 373 | echo 'Translating xlf in: ' . $this->dir . PHP_EOL; 374 | echo PHP_EOL; 375 | } 376 | 377 | while (false !== ($filename = readdir($dh))) { 378 | $filePath = $this->dir . $filename; 379 | 380 | if (is_file($filePath)) { 381 | $parts = explode('.', $filename); 382 | 383 | if (count($parts) !== 3) { 384 | throw new Exception('Cannot parse file. Expected file in format catalogue.locale.xlf.'); 385 | } 386 | 387 | if (strpos($filePath, '.xlf') < 0) { 388 | throw new Exception('Not a valid xlf file: ' . $filename); 389 | } 390 | 391 | $catalogue = $parts[0]; 392 | $locale = $parts[1]; 393 | 394 | $i = 0; 395 | 396 | if (!$this->shouldParseCatalogue($catalogue)) { 397 | if (!in_array($catalogue, $cataloguesSkipped)) { 398 | $cataloguesSkipped[] = $catalogue; 399 | } 400 | continue; 401 | } 402 | 403 | if (!$this->shouldParseLocale($locale)) { 404 | if (!in_array($locale, $localesSkipped)) { 405 | $localesSkipped[] = $locale; 406 | } 407 | continue; 408 | } 409 | 410 | if ($this->output) { 411 | echo 'File: ' . $filename . PHP_EOL; 412 | echo 'Catalogue: ' . $catalogue . PHP_EOL; 413 | echo 'Locale: ' . $locale . PHP_EOL; 414 | echo 'MT locale: ' . $this->translator->normaliseLanguageCode($locale) . PHP_EOL; 415 | echo PHP_EOL; 416 | echo 'P: '; 417 | } 418 | 419 | $contents = file_get_contents($filePath); 420 | $xlfData = new SimpleXMLExtended($contents); 421 | $new = []; 422 | 423 | $this->mtFailCount = 0; 424 | 425 | foreach ($xlfData->file->body as $bItem) { 426 | $xlfStrTranslated = 0; 427 | 428 | foreach ($bItem as $bValue) { 429 | if ($this->mtFailCount >= $this->maxMtFailCount) { 430 | // skip to the end as we may have hit the flood limit 431 | continue; 432 | } 433 | 434 | $targetAttributes = $bValue->target->attributes(); 435 | 436 | if ($this->newOnly === true && (!isset($targetAttributes['state']) || (string) $targetAttributes['state'] !== 'new')) { 437 | continue; 438 | } 439 | 440 | $source = (string) $bValue->source; 441 | $target = (string) $bValue->target; 442 | $attributes = $bValue->attributes(); 443 | 444 | if ($source || ($target && $this->isICU( $target ))) { 445 | 446 | if ($source == '' && $this->isICU( $target )) { 447 | $tokens = $this->parseTokensFromICU( $target ); 448 | $resultants = []; 449 | foreach ($tokens as $value) { 450 | $strRequested++; 451 | $resultants[] = $this->translator->translate( $value, $this->sourceLocale, $locale ); 452 | } 453 | 454 | foreach ($tokens as &$t) {$t = '{' . $t .'}'; } 455 | foreach ($resultants as &$r) {$r = '{' . $r . '}'; } 456 | 457 | $translated = str_replace( $tokens, $resultants, $target ); 458 | } 459 | 460 | elseif ( ! $source) { 461 | continue; 462 | } 463 | 464 | else { 465 | if ($this->memory && isset($attributes[$this->attributes['mt']])) { 466 | continue; 467 | } 468 | 469 | $strRequested++; 470 | $translated = $this->translator->translate($source, $this->sourceLocale, $locale); 471 | } 472 | 473 | if ($translated) { 474 | $new[$i]['source'] = $source; 475 | $new[$i]['target'] = $translated; 476 | 477 | if (!isset($attributes[$this->attributes['mt']])) { 478 | $bValue->addAttribute($this->attributes['mt'], 1); 479 | $bValue->addAttribute($this->attributes['mt_date'], date('Y-m-d H:i:s')); 480 | } 481 | 482 | $bValue->attributes()->{$this->attributes['mt']} = 1; 483 | 484 | if ($this->translator->containsHtml($translated)) { 485 | $bValue->target = null; 486 | $bValue->target->addCData($translated); 487 | } else { 488 | $bValue->target = $translated; 489 | } 490 | 491 | $i++; 492 | $xlfStrTranslated++; 493 | $strTranslated++; 494 | 495 | if ($this->output) { 496 | echo '.'; 497 | } 498 | } else { 499 | $this->mtFailCount++; 500 | } 501 | } 502 | } 503 | 504 | if ($this->output) { 505 | if ($xlfStrTranslated === 0) { 506 | echo 'No strings translated'; 507 | } 508 | echo PHP_EOL; 509 | echo 'T: ' . $xlfStrTranslated . PHP_EOL; 510 | echo PHP_EOL; 511 | } 512 | } 513 | 514 | if (count($new) > 0) { 515 | if ($this->output && $this->outputTranslated) { 516 | foreach ($new as $key => $row) { 517 | echo '[#' . ($key+1) . '] Source: ' . $row['source'] . PHP_EOL; 518 | echo '[#' . ($key+1) . '] Translated: ' . $row['target'] . PHP_EOL; 519 | } 520 | echo PHP_EOL; 521 | } 522 | 523 | if ($this->commit === true) { 524 | $this->write($xlfData, $filePath); 525 | $filesWritten++; 526 | } 527 | 528 | if (!in_array($catalogue, $cataloguesTranslated)) { 529 | $cataloguesTranslated[] = $catalogue; 530 | } 531 | 532 | $localesTranslated[] = $locale; 533 | 534 | $this->parsed[] = $filename; 535 | } 536 | } 537 | } 538 | 539 | closedir($dh); 540 | } 541 | 542 | if ($this->output) { 543 | echo PHP_EOL; 544 | echo 'Done' . PHP_EOL; 545 | echo '-----------------------------------------' . PHP_EOL; 546 | echo 'Total locales translated: ' . count($localesTranslated) . PHP_EOL; 547 | echo 'Total strings requested: ' . $strRequested . PHP_EOL; 548 | echo 'Total strings translated: ' . $strTranslated . PHP_EOL; 549 | echo 'Catalogues translated: ' . (count($cataloguesTranslated) === 0 ? '0' : implode(', ', $cataloguesTranslated)) . PHP_EOL; 550 | 551 | if ($cataloguesSkipped) { 552 | echo 'Catalogues skipped: ' . implode(', ', $cataloguesSkipped) . PHP_EOL; 553 | } 554 | 555 | if ($localesSkipped) { 556 | echo 'Locales skipped: ' .implode(', ', $localesSkipped) . PHP_EOL; 557 | } 558 | 559 | echo 'xlf updated: ' . $filesWritten . PHP_EOL; 560 | } 561 | 562 | return $this; 563 | } 564 | 565 | /** 566 | * Determines whether catalogue should be parsed 567 | * 568 | * @param string $catalogue 569 | * @return boolean 570 | */ 571 | protected function shouldParseCatalogue($catalogue) 572 | { 573 | if (count($this->catalogues) > 0 && !in_array($catalogue, $this->catalogues)) { 574 | return false; 575 | } 576 | 577 | return true; 578 | } 579 | 580 | /** 581 | * Determines whether locale should be parsed 582 | * 583 | * @param string $locale 584 | * @return boolean 585 | */ 586 | protected function shouldParseLocale($locale) 587 | { 588 | if (!$this->translator->normaliseLanguageCode($locale)) { 589 | return false; 590 | } 591 | 592 | if ($this->locales && !in_array($locale, $this->locales)) { 593 | return false; 594 | } 595 | 596 | if ($this->excludeLocales && in_array($locale, $this->excludeLocales)) { 597 | return false; 598 | } 599 | 600 | return $this->sourceLocale !== $locale; 601 | } 602 | 603 | /** 604 | * Writes to file 605 | * 606 | * @param SimpleXMLExtended $xmlData 607 | * @param string $file 608 | * @return XlfTranslator 609 | */ 610 | protected function write(SimpleXMLExtended $xmlData, $file) 611 | { 612 | $xml = $xmlData->asXML(); 613 | $fwh = fopen($file, 'w'); 614 | fwrite($fwh, $xml); 615 | 616 | return $this; 617 | } 618 | 619 | protected function parseTokensFromICU( $target ) { 620 | $target = trim( $target ); 621 | 622 | if ($target[0] == '{') { 623 | $tokens = []; $stack = 0; $cur = ''; 624 | 625 | for($i=0; $i