├── bin ├── export-plural-rules.bat ├── import-cldr-data.bat ├── export-plural-rules └── import-cldr-data ├── src ├── autoloader.php ├── Exporter │ ├── Prettyjson.php │ ├── Po.php │ ├── Ruby.php │ ├── Php.php │ ├── Html.php │ ├── Xml.php │ ├── Json.php │ └── Exporter.php ├── Category.php ├── FormulaConverter.php ├── cldr-data │ └── main │ │ └── en-US │ │ ├── scripts.json │ │ ├── territories.json │ │ └── languages.json ├── CldrData.php └── Language.php ├── LICENSE ├── composer.json └── UNICODE-LICENSE.txt /bin/export-plural-rules.bat: -------------------------------------------------------------------------------- 1 | @php "%~dpn0" %* -------------------------------------------------------------------------------- /bin/import-cldr-data.bat: -------------------------------------------------------------------------------- 1 | @php "%~dpn0" %* -------------------------------------------------------------------------------- /src/autoloader.php: -------------------------------------------------------------------------------- 1 | id . '\n"'; 32 | $lines[] = '"Plural-Forms: nplurals=' . count($language->categories) . '; plural=' . $language->formula . '\n"'; 33 | $lines[] = ''; 34 | 35 | return implode("\n", $lines); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Michele Locati 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "gettext/languages", 3 | "description": "gettext languages with plural rules", 4 | "keywords": [ 5 | "localization", 6 | "l10n", 7 | "internationalization", 8 | "i18n", 9 | "translations", 10 | "translate", 11 | "php", 12 | "unicode", 13 | "cldr", 14 | "language", 15 | "languages", 16 | "plural", 17 | "plurals", 18 | "plural rules" 19 | ], 20 | "homepage": "https://github.com/php-gettext/Languages", 21 | "license": "MIT", 22 | "authors": [ 23 | { 24 | "name": "Michele Locati", 25 | "email": "mlocati@gmail.com", 26 | "role": "Developer" 27 | } 28 | ], 29 | "autoload": { 30 | "psr-4": { 31 | "Gettext\\Languages\\": "src/" 32 | } 33 | }, 34 | "autoload-dev": { 35 | "psr-4": { 36 | "Gettext\\Languages\\Test\\": "tests/test/" 37 | } 38 | }, 39 | "require": { 40 | "php": ">=5.3" 41 | }, 42 | "require-dev": { 43 | "phpunit/phpunit": "^4.8 || ^5.7 || ^6.5 || ^7.5 || ^8.4" 44 | }, 45 | "scripts": { 46 | "test": "phpunit" 47 | }, 48 | "bin": [ 49 | "bin/export-plural-rules", 50 | "bin/import-cldr-data" 51 | ] 52 | } -------------------------------------------------------------------------------- /src/Exporter/Ruby.php: -------------------------------------------------------------------------------- 1 | id . '\' => {'; 28 | $lines[] = ' \'name\' => \'' . addslashes($lc->name) . '\','; 29 | if (isset($lc->supersededBy)) { 30 | $lines[] = ' \'supersededBy\' => \'' . $lc->supersededBy . '\','; 31 | } 32 | if (isset($lc->script)) { 33 | $lines[] = ' \'script\' => \'' . addslashes($lc->script) . '\','; 34 | } 35 | if (isset($lc->territory)) { 36 | $lines[] = ' \'territory\' => \'' . addslashes($lc->territory) . '\','; 37 | } 38 | if (isset($lc->baseLanguage)) { 39 | $lines[] = ' \'baseLanguage\' => \'' . addslashes($lc->baseLanguage) . '\','; 40 | } 41 | $lines[] = ' \'formula\' => \'' . $lc->formula . '\','; 42 | $lines[] = ' \'plurals\' => ' . count($lc->categories) . ','; 43 | $catNames = array(); 44 | foreach ($lc->categories as $c) { 45 | $catNames[] = "'{$c->id}'"; 46 | } 47 | $lines[] = ' \'cases\' => [' . implode(', ', $catNames) . '],'; 48 | $lines[] = ' \'examples\' => {'; 49 | foreach ($lc->categories as $c) { 50 | $lines[] = ' \'' . $c->id . '\' => \'' . $c->examples . '\','; 51 | } 52 | $lines[] = ' },'; 53 | $lines[] = ' },'; 54 | } 55 | $lines[] = '}'; 56 | $lines[] = ''; 57 | 58 | return implode("\n", $lines); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/Exporter/Php.php: -------------------------------------------------------------------------------- 1 | id . '\' => array('; 29 | $lines[] = ' \'name\' => \'' . addslashes($lc->name) . '\','; 30 | if (isset($lc->supersededBy)) { 31 | $lines[] = ' \'supersededBy\' => \'' . $lc->supersededBy . '\','; 32 | } 33 | if (isset($lc->script)) { 34 | $lines[] = ' \'script\' => \'' . addslashes($lc->script) . '\','; 35 | } 36 | if (isset($lc->territory)) { 37 | $lines[] = ' \'territory\' => \'' . addslashes($lc->territory) . '\','; 38 | } 39 | if (isset($lc->baseLanguage)) { 40 | $lines[] = ' \'baseLanguage\' => \'' . addslashes($lc->baseLanguage) . '\','; 41 | } 42 | $lines[] = ' \'formula\' => \'' . $lc->formula . '\','; 43 | $lines[] = ' \'plurals\' => ' . count($lc->categories) . ','; 44 | $catNames = array(); 45 | foreach ($lc->categories as $c) { 46 | $catNames[] = "'{$c->id}'"; 47 | } 48 | $lines[] = ' \'cases\' => array(' . implode(', ', $catNames) . '),'; 49 | $lines[] = ' \'examples\' => array('; 50 | foreach ($lc->categories as $c) { 51 | $lines[] = ' \'' . $c->id . '\' => \'' . $c->examples . '\','; 52 | } 53 | $lines[] = ' ),'; 54 | $lines[] = ' ),'; 55 | } 56 | $lines[] = ');'; 57 | $lines[] = ''; 58 | 59 | return implode("\n", $lines); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/Exporter/Html.php: -------------------------------------------------------------------------------- 1 | '; 26 | $lines[] = ' '; 27 | $lines[] = ' '; 28 | $lines[] = ' Language code'; 29 | $lines[] = ' Language name'; 30 | $lines[] = ' # plurals'; 31 | $lines[] = ' Formula'; 32 | $lines[] = ' Plurals'; 33 | $lines[] = ' '; 34 | $lines[] = ' '; 35 | $lines[] = ' '; 36 | foreach ($languages as $lc) { 37 | $lines[] = ' '; 38 | $lines[] = ' ' . $lc->id . ''; 39 | $name = self::h($lc->name); 40 | if (isset($lc->supersededBy)) { 41 | $name .= '
Superseded by ' . $lc->supersededBy . ''; 42 | } 43 | $lines[] = ' ' . $name . ''; 44 | $lines[] = ' ' . count($lc->categories) . ''; 45 | $lines[] = ' ' . self::h($lc->formula) . ''; 46 | $cases = array(); 47 | foreach ($lc->categories as $c) { 48 | $cases[] = '
  • ' . $c->id . '' . self::h($c->examples) . '
  • '; 49 | } 50 | $lines[] = '
      ' . implode('', $cases) . '
    '; 51 | $lines[] = ' '; 52 | } 53 | $lines[] = ' '; 54 | $lines[] = ''; 55 | 56 | return implode("\n", $lines); 57 | } 58 | 59 | protected static function h($str) 60 | { 61 | return htmlspecialchars($str, ENT_COMPAT, 'UTF-8'); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /UNICODE-LICENSE.txt: -------------------------------------------------------------------------------- 1 | UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE 2 | 3 | See Terms of Use for definitions of Unicode Inc.'s 4 | Data Files and Software. 5 | 6 | NOTICE TO USER: Carefully read the following legal agreement. 7 | BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S 8 | DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"), 9 | YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE 10 | TERMS AND CONDITIONS OF THIS AGREEMENT. 11 | IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE 12 | THE DATA FILES OR SOFTWARE. 13 | 14 | COPYRIGHT AND PERMISSION NOTICE 15 | 16 | Copyright © 1991-2019 Unicode, Inc. All rights reserved. 17 | Distributed under the Terms of Use in https://www.unicode.org/copyright.html. 18 | 19 | Permission is hereby granted, free of charge, to any person obtaining 20 | a copy of the Unicode data files and any associated documentation 21 | (the "Data Files") or Unicode software and any associated documentation 22 | (the "Software") to deal in the Data Files or Software 23 | without restriction, including without limitation the rights to use, 24 | copy, modify, merge, publish, distribute, and/or sell copies of 25 | the Data Files or Software, and to permit persons to whom the Data Files 26 | or Software are furnished to do so, provided that either 27 | (a) this copyright and permission notice appear with all copies 28 | of the Data Files or Software, or 29 | (b) this copyright and permission notice appear in associated 30 | Documentation. 31 | 32 | THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF 33 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE 34 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 35 | NONINFRINGEMENT OF THIRD PARTY RIGHTS. 36 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS 37 | NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL 38 | DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, 39 | DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER 40 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 41 | PERFORMANCE OF THE DATA FILES OR SOFTWARE. 42 | 43 | Except as contained in this notice, the name of a copyright holder 44 | shall not be used in advertising or otherwise to promote the sale, 45 | use or other dealings in these Data Files or Software without prior 46 | written authorization of the copyright holder. 47 | -------------------------------------------------------------------------------- /src/Exporter/Xml.php: -------------------------------------------------------------------------------- 1 | loadXML(''); 30 | $xLanguages = $xml->firstChild; 31 | foreach ($languages as $language) { 32 | $xLanguage = $xml->createElement('language'); 33 | $xLanguage->setAttribute('id', $language->id); 34 | $xLanguage->setAttribute('name', $language->name); 35 | if (isset($language->supersededBy)) { 36 | $xLanguage->setAttribute('supersededBy', $language->supersededBy); 37 | } 38 | if (isset($language->script)) { 39 | $xLanguage->setAttribute('script', $language->script); 40 | } 41 | if (isset($language->territory)) { 42 | $xLanguage->setAttribute('territory', $language->territory); 43 | } 44 | if (isset($language->baseLanguage)) { 45 | $xLanguage->setAttribute('baseLanguage', $language->baseLanguage); 46 | } 47 | $xLanguage->setAttribute('formula', $language->formula); 48 | foreach ($language->categories as $category) { 49 | $xCategory = $xml->createElement('category'); 50 | $xCategory->setAttribute('id', $category->id); 51 | $xCategory->setAttribute('examples', $category->examples); 52 | $xLanguage->appendChild($xCategory); 53 | } 54 | $xLanguages->appendChild($xLanguage); 55 | } 56 | $xml->formatOutput = true; 57 | 58 | return $xml->saveXML(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/Exporter/Json.php: -------------------------------------------------------------------------------- 1 | name; 56 | if (isset($language->supersededBy)) { 57 | $item['supersededBy'] = $language->supersededBy; 58 | } 59 | if (isset($language->script)) { 60 | $item['script'] = $language->script; 61 | } 62 | if (isset($language->territory)) { 63 | $item['territory'] = $language->territory; 64 | } 65 | if (isset($language->baseLanguage)) { 66 | $item['baseLanguage'] = $language->baseLanguage; 67 | } 68 | if (!empty($options['both-formulas'])) { 69 | $item['formulas'] = array( 70 | 'standard' => $language->buildFormula(true), 71 | 'php' => $language->formula, 72 | ); 73 | } else { 74 | $item['formula'] = $language->formula; 75 | } 76 | $item['plurals'] = count($language->categories); 77 | $item['cases'] = array(); 78 | $item['examples'] = array(); 79 | foreach ($language->categories as $category) { 80 | $item['cases'][] = $category->id; 81 | $item['examples'][$category->id] = $category->examples; 82 | } 83 | $list[$language->id] = $item; 84 | } 85 | 86 | return json_encode($list, static::getEncodeOptions()); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/Category.php: -------------------------------------------------------------------------------- 1 | id = $matches[1]; 51 | $cldrFormulaAndExamplesNormalized = trim(preg_replace('/\s+/', ' ', $cldrFormulaAndExamples)); 52 | if (!preg_match('/^([^@]*)(?:@integer([^@]+))?(?:@decimal(?:[^@]+))?$/', $cldrFormulaAndExamplesNormalized, $matches)) { 53 | throw new Exception("Invalid CLDR category rule: {$cldrFormulaAndExamples}"); 54 | } 55 | $cldrFormula = trim($matches[1]); 56 | $s = isset($matches[2]) ? trim($matches[2]) : ''; 57 | $this->examples = ($s === '') ? null : $s; 58 | switch ($this->id) { 59 | case CldrData::OTHER_CATEGORY: 60 | if ($cldrFormula !== '') { 61 | throw new Exception("The '" . CldrData::OTHER_CATEGORY . "' category should not have any formula, but it has '{$cldrFormula}'"); 62 | } 63 | $this->formula = null; 64 | break; 65 | default: 66 | if ($cldrFormula === '') { 67 | throw new Exception("The '{$this->id}' category does not have a formula"); 68 | } 69 | $this->formula = FormulaConverter::convertFormula($cldrFormula); 70 | break; 71 | } 72 | } 73 | 74 | /** 75 | * Return a list of numbers corresponding to the $examples value. 76 | * 77 | * @throws \Exception throws an Exception if we weren't able to expand the examples 78 | * 79 | * @return int[] 80 | */ 81 | public function getExampleIntegers() 82 | { 83 | return self::expandExamples($this->examples); 84 | } 85 | 86 | /** 87 | * Expand a list of examples as defined by CLDR. 88 | * 89 | * @param string $examples A string like '1, 2, 5...7, …'. 90 | * 91 | * @throws \Exception throws an Exception if we weren't able to expand $examples 92 | * 93 | * @return int[] 94 | */ 95 | public static function expandExamples($examples) 96 | { 97 | $result = array(); 98 | $m = null; 99 | if (substr($examples, -strlen(', …')) === ', …') { 100 | $examples = substr($examples, 0, strlen($examples) - strlen(', …')); 101 | } 102 | foreach (explode(',', str_replace(' ', '', $examples)) as $range) { 103 | if (preg_match('/^(?\d+)((c|e)(?\d+))?$/', $range, $m)) { 104 | $result[] = (int) (isset($m['exp']) ? ($m['num'] . str_repeat('0', (int) $m['exp'])) : $range); 105 | } elseif (preg_match('/^(\d+)~(\d+)$/', $range, $m)) { 106 | $from = (int) $m[1]; 107 | $to = (int) $m[2]; 108 | $delta = $to - $from; 109 | $step = (int) max(1, $delta / 100); 110 | for ($i = $from; $i < $to; $i += $step) { 111 | $result[] = $i; 112 | } 113 | $result[] = $to; 114 | } else { 115 | throw new Exception("Unhandled test range '{$range}' in '{$examples}'"); 116 | } 117 | } 118 | if (empty($result)) { 119 | throw new Exception("No test numbers from '{$examples}'"); 120 | } 121 | 122 | return $result; 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/Exporter/Exporter.php: -------------------------------------------------------------------------------- 1 | $class) { 41 | if (call_user_func(self::getExporterClassName($handle) . '::isForPublicUse') === true) { 42 | $result[$handle] = $class; 43 | } 44 | } 45 | } else { 46 | $result = self::$exporters; 47 | } 48 | 49 | return $result; 50 | } 51 | 52 | /** 53 | * Return the description of a specific exporter. 54 | * 55 | * @param string $exporterHandle the handle of the exporter 56 | * 57 | * @throws \Exception throws an Exception if $exporterHandle is not valid 58 | * 59 | * @return string 60 | */ 61 | final public static function getExporterDescription($exporterHandle) 62 | { 63 | $exporters = self::getExporters(); 64 | if (!isset($exporters[$exporterHandle])) { 65 | throw new Exception("Invalid exporter handle: '{$exporterHandle}'"); 66 | } 67 | 68 | return call_user_func(self::getExporterClassName($exporterHandle) . '::getDescription'); 69 | } 70 | 71 | /** 72 | * Returns the fully qualified class name of a exporter given its handle. 73 | * 74 | * @param string $exporterHandle the exporter class handle 75 | * 76 | * @return string 77 | */ 78 | final public static function getExporterClassName($exporterHandle) 79 | { 80 | return __NAMESPACE__ . '\\' . ucfirst(strtolower($exporterHandle)); 81 | } 82 | 83 | /** 84 | * Convert a list of Language instances to string. 85 | * 86 | * @param \Gettext\Languages\Language[] $languages the Language instances to convert 87 | * @param array|null $options 88 | * 89 | * @return string 90 | */ 91 | final public static function toString($languages, $options = null) 92 | { 93 | if (!isset($options) || !is_array($options)) { 94 | $options = array(); 95 | } 96 | if (isset($options['us-ascii']) && $options['us-ascii']) { 97 | $asciiList = array(); 98 | foreach ($languages as $language) { 99 | $asciiList[] = $language->getUSAsciiClone(); 100 | } 101 | $languages = $asciiList; 102 | } 103 | 104 | return static::toStringDoWithOptions($languages, $options); 105 | } 106 | 107 | /** 108 | * Save the Language instances to a file. 109 | * 110 | * @param \Gettext\Languages\Language[] $languages the Language instances to convert 111 | * @param array|null $options 112 | * 113 | * @throws \Exception 114 | */ 115 | final public static function toFile($languages, $filename, $options = null) 116 | { 117 | $data = self::toString($languages, $options); 118 | if (@file_put_contents($filename, $data) === false) { 119 | throw new Exception("Error writing data to '{$filename}'"); 120 | } 121 | } 122 | 123 | /** 124 | * Is this exporter for public use? 125 | * 126 | * @return bool 127 | */ 128 | public static function isForPublicUse() 129 | { 130 | return true; 131 | } 132 | 133 | /** 134 | * Does this exporter supports exporting formulas both with and without extra parenthesis? 135 | * 136 | * @return bool 137 | */ 138 | public static function supportsFormulasWithAndWithoutParenthesis() 139 | { 140 | return false; 141 | } 142 | 143 | /** 144 | * Return a short description of the exporter. 145 | * 146 | * @return string 147 | */ 148 | public static function getDescription() 149 | { 150 | throw new Exception(get_called_class() . ' does not implement the method ' . __FUNCTION__); 151 | } 152 | 153 | /** 154 | * Convert a list of Language instances to string. 155 | * 156 | * @param \Gettext\Languages\Language[] $languages the Language instances to convert 157 | * @param array $options export options 158 | * 159 | * @return string 160 | */ 161 | protected static function toStringDoWithOptions($languages, array $options) 162 | { 163 | if (method_exists(get_called_class(), 'toStringDo')) { 164 | return static::toStringDo($languages); 165 | } 166 | throw new Exception(get_called_class() . ' does not implement the method ' . __FUNCTION__); 167 | } 168 | } 169 | -------------------------------------------------------------------------------- /src/FormulaConverter.php: -------------------------------------------------------------------------------- 1 | the whole 'and' group is always false 34 | $gettextFormulaChunk = false; 35 | break; 36 | } 37 | if ($gettextAtom !== true) { 38 | $andSeparatedChunks[] = $gettextAtom; 39 | } 40 | } 41 | if (!isset($gettextFormulaChunk)) { 42 | if (empty($andSeparatedChunks)) { 43 | // All the atoms joined by 'and' always evaluate to true => the whole 'and' group is always true 44 | $gettextFormulaChunk = true; 45 | } else { 46 | $gettextFormulaChunk = implode(' && ', $andSeparatedChunks); 47 | // Special cases simplification 48 | switch ($gettextFormulaChunk) { 49 | case 'n >= 0 && n <= 2 && n != 2': 50 | $gettextFormulaChunk = 'n == 0 || n == 1'; 51 | break; 52 | } 53 | } 54 | } 55 | if ($gettextFormulaChunk === true) { 56 | // One part of the formula joined with the others by 'or' always evaluates to true => the whole formula always evaluates to true 57 | return true; 58 | } 59 | if ($gettextFormulaChunk !== false) { 60 | $orSeparatedChunks[] = $gettextFormulaChunk; 61 | } 62 | } 63 | if (empty($orSeparatedChunks)) { 64 | // All the parts joined by 'or' always evaluate to false => the whole formula always evaluates to false 65 | return false; 66 | } 67 | 68 | return implode(' || ', $orSeparatedChunks); 69 | } 70 | 71 | /** 72 | * Converts an atomic part of the CLDR formula to its gettext representation. 73 | * 74 | * @param string $cldrAtom the CLDR formula atom to convert 75 | * 76 | * @throws \Exception 77 | * 78 | * @return bool|string returns true if the gettext will always evaluate to true, false if gettext will always evaluate to false, return the gettext formula otherwise 79 | */ 80 | private static function convertAtom($cldrAtom) 81 | { 82 | $m = null; 83 | $gettextAtom = $cldrAtom; 84 | $gettextAtom = str_replace(' = ', ' == ', $gettextAtom); 85 | $gettextAtom = str_replace('i', 'n', $gettextAtom); 86 | if (preg_match('/^n( % \d+)? (!=|==) \d+$/', $gettextAtom)) { 87 | return $gettextAtom; 88 | } 89 | if (preg_match('/^n( % \d+)? (!=|==) \d+(,\d+|\.\.\d+)+$/', $gettextAtom)) { 90 | return self::expandAtom($gettextAtom); 91 | } 92 | if (preg_match('/^(?:v|w)(?: % 10+)? == (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // For gettext: v == 0, w == 0 93 | return (int) $m[1] === 0 ? true : false; 94 | } 95 | if (preg_match('/^(?:v|w)(?: % 10+)? != (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // For gettext: v == 0, w == 0 96 | return (int) $m[1] === 0 ? false : true; 97 | } 98 | if (preg_match('/^(?:f|t|c|e)(?: % 10+)? == (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // f == empty, t == empty, c == empty, e == empty 99 | return (int) $m[1] === 0 ? true : false; 100 | } 101 | if (preg_match('/^(?:f|t|c|e)(?: % 10+)? != (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // f == empty, t == empty, c == empty, e == empty 102 | return (int) $m[1] === 0 ? false : true; 103 | } 104 | throw new Exception("Unable to convert the formula chunk '{$cldrAtom}' from CLDR to gettext"); 105 | } 106 | 107 | /** 108 | * Expands an atom containing a range (for instance: 'n == 1,3..5'). 109 | * 110 | * @param string $atom 111 | * 112 | * @throws \Exception 113 | * 114 | * @return string 115 | */ 116 | private static function expandAtom($atom) 117 | { 118 | $m = null; 119 | if (preg_match('/^(n(?: % \d+)?) (==|!=) (\d+(?:\.\.\d+|,\d+)+)$/', $atom, $m)) { 120 | $what = $m[1]; 121 | $op = $m[2]; 122 | $chunks = array(); 123 | foreach (explode(',', $m[3]) as $range) { 124 | $chunk = null; 125 | if ((!isset($chunk)) && preg_match('/^\d+$/', $range)) { 126 | $chunk = "{$what} {$op} {$range}"; 127 | } 128 | if ((!isset($chunk)) && preg_match('/^(\d+)\.\.(\d+)$/', $range, $m)) { 129 | $from = (int) $m[1]; 130 | $to = (int) $m[2]; 131 | if (($to - $from) === 1) { 132 | switch ($op) { 133 | case '==': 134 | $chunk = "({$what} == {$from} || {$what} == {$to})"; 135 | break; 136 | case '!=': 137 | $chunk = "{$what} != {$from} && {$what} == {$to}"; 138 | break; 139 | } 140 | } else { 141 | switch ($op) { 142 | case '==': 143 | $chunk = "{$what} >= {$from} && {$what} <= {$to}"; 144 | break; 145 | case '!=': 146 | if ($what === 'n' && $from <= 0) { 147 | $chunk = "{$what} > {$to}"; 148 | } else { 149 | $chunk = "({$what} < {$from} || {$what} > {$to})"; 150 | } 151 | break; 152 | } 153 | } 154 | } 155 | if (!isset($chunk)) { 156 | throw new Exception("Unhandled range '{$range}' in '{$atom}'"); 157 | } 158 | $chunks[] = $chunk; 159 | } 160 | if (count($chunks) === 1) { 161 | return $chunks[0]; 162 | } 163 | switch ($op) { 164 | case '==': 165 | return '(' . implode(' || ', $chunks) . ')'; 166 | case '!=': 167 | return implode(' && ', $chunks); 168 | } 169 | } 170 | throw new Exception("Unable to expand '{$atom}'"); 171 | } 172 | } 173 | -------------------------------------------------------------------------------- /src/cldr-data/main/en-US/scripts.json: -------------------------------------------------------------------------------- 1 | { 2 | "main": { 3 | "en-US": { 4 | "identity": { 5 | "version": { 6 | "_cldrVersion": "47" 7 | }, 8 | "language": "en", 9 | "territory": "US" 10 | }, 11 | "localeDisplayNames": { 12 | "scripts": { 13 | "Adlm": "Adlam", 14 | "Afak": "Afaka", 15 | "Aghb": "Caucasian Albanian", 16 | "Ahom": "Ahom", 17 | "Arab": "Arabic", 18 | "Arab-alt-variant": "Perso-Arabic", 19 | "Aran": "Nastaliq", 20 | "Armi": "Imperial Aramaic", 21 | "Armn": "Armenian", 22 | "Avst": "Avestan", 23 | "Bali": "Balinese", 24 | "Bamu": "Bamum", 25 | "Bass": "Bassa Vah", 26 | "Batk": "Batak", 27 | "Beng": "Bangla", 28 | "Bhks": "Bhaiksuki", 29 | "Blis": "Blissymbols", 30 | "Bopo": "Bopomofo", 31 | "Brah": "Brahmi", 32 | "Brai": "Braille", 33 | "Bugi": "Buginese", 34 | "Buhd": "Buhid", 35 | "Cakm": "Chakma", 36 | "Cans": "Unified Canadian Aboriginal Syllabics", 37 | "Cans-alt-short": "UCAS", 38 | "Cari": "Carian", 39 | "Cham": "Cham", 40 | "Cher": "Cherokee", 41 | "Chrs": "Chorasmian", 42 | "Cirt": "Cirth", 43 | "Copt": "Coptic", 44 | "Cpmn": "Cypro-Minoan", 45 | "Cprt": "Cypriot", 46 | "Cyrl": "Cyrillic", 47 | "Cyrs": "Old Church Slavonic Cyrillic", 48 | "Deva": "Devanagari", 49 | "Diak": "Dives Akuru", 50 | "Dogr": "Dogra", 51 | "Dsrt": "Deseret", 52 | "Dupl": "Duployan shorthand", 53 | "Egyd": "Egyptian demotic", 54 | "Egyh": "Egyptian hieratic", 55 | "Egyp": "Egyptian hieroglyphs", 56 | "Elba": "Elbasan", 57 | "Elym": "Elymaic", 58 | "Ethi": "Ethiopic", 59 | "Gara": "Garay", 60 | "Geok": "Georgian Khutsuri", 61 | "Geor": "Georgian", 62 | "Glag": "Glagolitic", 63 | "Gong": "Gunjala Gondi", 64 | "Gonm": "Masaram Gondi", 65 | "Goth": "Gothic", 66 | "Gran": "Grantha", 67 | "Grek": "Greek", 68 | "Gujr": "Gujarati", 69 | "Gukh": "Gurung Khema", 70 | "Guru": "Gurmukhi", 71 | "Hanb": "Han with Bopomofo", 72 | "Hang": "Hangul", 73 | "Hani": "Han", 74 | "Hano": "Hanunoo", 75 | "Hans": "Simplified", 76 | "Hans-alt-stand-alone": "Simplified Han", 77 | "Hant": "Traditional", 78 | "Hant-alt-stand-alone": "Traditional Han", 79 | "Hatr": "Hatran", 80 | "Hebr": "Hebrew", 81 | "Hira": "Hiragana", 82 | "Hluw": "Anatolian Hieroglyphs", 83 | "Hmng": "Pahawh Hmong", 84 | "Hmnp": "Nyiakeng Puachue Hmong", 85 | "Hrkt": "Japanese syllabaries", 86 | "Hung": "Old Hungarian", 87 | "Inds": "Indus", 88 | "Ital": "Old Italic", 89 | "Jamo": "Jamo", 90 | "Java": "Javanese", 91 | "Jpan": "Japanese", 92 | "Jurc": "Jurchen", 93 | "Kali": "Kayah Li", 94 | "Kana": "Katakana", 95 | "Kawi": "Kawi", 96 | "Khar": "Kharoshthi", 97 | "Khmr": "Khmer", 98 | "Khoj": "Khojki", 99 | "Kits": "Khitan small script", 100 | "Knda": "Kannada", 101 | "Kore": "Korean", 102 | "Kpel": "Kpelle", 103 | "Krai": "Kirat Rai", 104 | "Kthi": "Kaithi", 105 | "Lana": "Lanna", 106 | "Laoo": "Lao", 107 | "Latf": "Fraktur Latin", 108 | "Latg": "Gaelic Latin", 109 | "Latn": "Latin", 110 | "Lepc": "Lepcha", 111 | "Limb": "Limbu", 112 | "Lina": "Linear A", 113 | "Linb": "Linear B", 114 | "Lisu": "Fraser", 115 | "Loma": "Loma", 116 | "Lyci": "Lycian", 117 | "Lydi": "Lydian", 118 | "Mahj": "Mahajani", 119 | "Maka": "Makasar", 120 | "Mand": "Mandaean", 121 | "Mani": "Manichaean", 122 | "Marc": "Marchen", 123 | "Maya": "Mayan hieroglyphs", 124 | "Medf": "Medefaidrin", 125 | "Mend": "Mende", 126 | "Merc": "Meroitic Cursive", 127 | "Mero": "Meroitic", 128 | "Mlym": "Malayalam", 129 | "Modi": "Modi", 130 | "Mong": "Mongolian", 131 | "Moon": "Moon", 132 | "Mroo": "Mro", 133 | "Mtei": "Meitei Mayek", 134 | "Mult": "Multani", 135 | "Mymr": "Myanmar", 136 | "Nagm": "Nag Mundari", 137 | "Nand": "Nandinagari", 138 | "Narb": "Old North Arabian", 139 | "Nbat": "Nabataean", 140 | "Newa": "Newa", 141 | "Nkgb": "Naxi Geba", 142 | "Nkoo": "N’Ko", 143 | "Nshu": "Nüshu", 144 | "Ogam": "Ogham", 145 | "Olck": "Ol Chiki", 146 | "Onao": "Ol Onal", 147 | "Orkh": "Orkhon", 148 | "Orya": "Odia", 149 | "Osge": "Osage", 150 | "Osma": "Osmanya", 151 | "Ougr": "Old Uyghur", 152 | "Palm": "Palmyrene", 153 | "Pauc": "Pau Cin Hau", 154 | "Perm": "Old Permic", 155 | "Phag": "Phags-pa", 156 | "Phli": "Inscriptional Pahlavi", 157 | "Phlp": "Psalter Pahlavi", 158 | "Phlv": "Book Pahlavi", 159 | "Phnx": "Phoenician", 160 | "Plrd": "Pollard Phonetic", 161 | "Prti": "Inscriptional Parthian", 162 | "Qaag": "Zawgyi", 163 | "Rjng": "Rejang", 164 | "Rohg": "Hanifi", 165 | "Rohg-alt-stand-alone": "Hanifi Rohingya", 166 | "Roro": "Rongorongo", 167 | "Runr": "Runic", 168 | "Samr": "Samaritan", 169 | "Sara": "Sarati", 170 | "Sarb": "Old South Arabian", 171 | "Saur": "Saurashtra", 172 | "Sgnw": "SignWriting", 173 | "Shaw": "Shavian", 174 | "Shrd": "Sharada", 175 | "Sidd": "Siddham", 176 | "Sind": "Khudawadi", 177 | "Sinh": "Sinhala", 178 | "Sogd": "Sogdian", 179 | "Sogo": "Old Sogdian", 180 | "Sora": "Sora Sompeng", 181 | "Soyo": "Soyombo", 182 | "Sund": "Sundanese", 183 | "Sunu": "Sunuwar", 184 | "Sylo": "Syloti Nagri", 185 | "Syrc": "Syriac", 186 | "Syre": "Estrangelo Syriac", 187 | "Syrj": "Western Syriac", 188 | "Syrn": "Eastern Syriac", 189 | "Tagb": "Tagbanwa", 190 | "Takr": "Takri", 191 | "Tale": "Tai Le", 192 | "Talu": "New Tai Lue", 193 | "Taml": "Tamil", 194 | "Tang": "Tangut", 195 | "Tavt": "Tai Viet", 196 | "Telu": "Telugu", 197 | "Teng": "Tengwar", 198 | "Tfng": "Tifinagh", 199 | "Tglg": "Tagalog", 200 | "Thaa": "Thaana", 201 | "Thai": "Thai", 202 | "Tibt": "Tibetan", 203 | "Tirh": "Tirhuta", 204 | "Tnsa": "Tangsa", 205 | "Todr": "Todhri", 206 | "Toto": "Toto", 207 | "Tutg": "Tulu-Tigalari", 208 | "Ugar": "Ugaritic", 209 | "Vaii": "Vai", 210 | "Visp": "Visible Speech", 211 | "Vith": "Vithkuqi", 212 | "Wara": "Varang Kshiti", 213 | "Wcho": "Wancho", 214 | "Wole": "Woleai", 215 | "Xpeo": "Old Persian", 216 | "Xsux": "Sumero-Akkadian Cuneiform", 217 | "Xsux-alt-short": "S-A Cuneiform", 218 | "Yezi": "Yezidi", 219 | "Yiii": "Yi", 220 | "Zanb": "Zanabazar Square", 221 | "Zinh": "Inherited", 222 | "Zmth": "Mathematical Notation", 223 | "Zsye": "Emoji", 224 | "Zsym": "Symbols", 225 | "Zxxx": "Unwritten", 226 | "Zyyy": "Common", 227 | "Zzzz": "Unknown Script" 228 | } 229 | } 230 | } 231 | } 232 | } -------------------------------------------------------------------------------- /bin/export-plural-rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | $arg) { 80 | if ($argi === 0) { 81 | continue; 82 | } 83 | if (is_string($arg)) { 84 | $argLC = trim(strtolower($arg)); 85 | switch ($argLC) { 86 | case '-h': 87 | case '--help': 88 | self::showSyntax(); 89 | exit(0); 90 | case '--us-ascii': 91 | self::$outputUSAscii = true; 92 | break; 93 | case '--reduce=yes': 94 | self::$reduce = true; 95 | break; 96 | case '--reduce=no': 97 | self::$reduce = false; 98 | break; 99 | case '--parenthesis=yes': 100 | self::$extraParenthesis = true; 101 | break; 102 | case '--parenthesis=no': 103 | self::$extraParenthesis = false; 104 | break; 105 | case '--parenthesis=both': 106 | self::$extraParenthesis = null; 107 | break; 108 | default: 109 | if (preg_match('/^--output=.+$/', $argLC)) { 110 | if (isset(self::$outputFilename)) { 111 | fwrite(STDERR, "The output file name has been specified more than once!\n"); 112 | self::showSyntax(); 113 | exit(3); 114 | } 115 | list(, self::$outputFilename) = explode('=', $arg, 2); 116 | self::$outputFilename = trim(self::$outputFilename); 117 | } elseif (preg_match('/^--languages?=.+$/', $argLC)) { 118 | list(, $s) = explode('=', $arg, 2); 119 | $list = explode(',', $s); 120 | if (is_array(self::$languages)) { 121 | self::$languages = array_merge(self::$languages, $list); 122 | } else { 123 | self::$languages = $list; 124 | } 125 | } elseif (isset($exporters[$argLC])) { 126 | if (isset(self::$outputFormat)) { 127 | fwrite(STDERR, "The output format has been specified more than once!\n"); 128 | self::showSyntax(); 129 | exit(3); 130 | } 131 | self::$outputFormat = $argLC; 132 | } else { 133 | fwrite(STDERR, "Unknown option: {$arg}\n"); 134 | self::showSyntax(); 135 | exit(2); 136 | } 137 | break; 138 | } 139 | } 140 | } 141 | } 142 | if (!isset(self::$outputFormat)) { 143 | self::showSyntax(); 144 | exit(1); 145 | } 146 | if (isset(self::$languages)) { 147 | self::$languages = array_values(array_unique(self::$languages)); 148 | } 149 | if (!isset(self::$reduce)) { 150 | self::$reduce = isset(self::$languages) ? false : true; 151 | } 152 | } 153 | 154 | /** 155 | * Write out the syntax. 156 | */ 157 | public static function showSyntax() 158 | { 159 | $basename = basename(__FILE__); 160 | $exporters = array_keys(Exporter::getExporters(true)); 161 | $exporterList = implode('|', $exporters); 162 | fwrite( 163 | STDERR, 164 | <<[,,...]] [--reduce=yes|no] [--parenthesis=yes|no] [--output=] <{$exporterList}> 167 | 168 | Where: 169 | --help 170 | show this help message. 171 | 172 | --us-ascii 173 | if specified, the output will contain only US-ASCII characters. 174 | 175 | --languages(or --language) 176 | export only the specified language codes. 177 | Separate languages with commas; you can also use this argument 178 | more than once; it's case insensitive and accepts both '_' and 179 | '-' as locale chunks separator (eg we accept 'it_IT' as well as 180 | 'it-it'). 181 | --reduce 182 | if set to yes the output won't contain languages with the same 183 | base language and rules. 184 | For instance nl_BE ('Flemish') will be omitted because it's the 185 | same as nl ('Dutch'). 186 | Defaults to 'no' if --languages is specified, to 'yes' otherwise. 187 | --parenthesis 188 | if set to no, extra parenthesis will be omitted in generated 189 | plural rules formulas. 190 | Those extra parenthesis are needed to create a PHP-compatible 191 | formula. 192 | Some exporter may also export formulas both with and without 193 | The extra parenthesis: use --parenthesis=both in this case 194 | Defaults to 'yes' 195 | --output 196 | if specified, the output will be saved to . If not 197 | specified we'll output to standard output. 198 | 199 | Output formats 200 | 201 | EOT 202 | ); 203 | $len = max(array_map('strlen', $exporters)); 204 | foreach ($exporters as $exporter) { 205 | fwrite(STDERR, ' ' . str_pad($exporter, $len) . ': ' . Exporter::getExporterDescription($exporter) . "\n"); 206 | } 207 | fwrite(STDERR, "\n"); 208 | } 209 | 210 | /** 211 | * Reduce a language list to the minimum common denominator. 212 | * 213 | * @param Language[] $languages 214 | * 215 | * @return Language[] 216 | */ 217 | public static function reduce($languages) 218 | { 219 | for ($numChunks = 3; $numChunks >= 2; $numChunks--) { 220 | $filtered = array(); 221 | foreach ($languages as $language) { 222 | $chunks = explode('_', $language->id); 223 | $compatibleFound = false; 224 | if ($numChunks === count($chunks)) { 225 | $categoriesHash = serialize($language->categories); 226 | $otherIds = array(); 227 | $otherIds[] = $chunks[0]; 228 | for ($k = 2; $k < $numChunks; $k++) { 229 | $otherIds[] = $chunks[0] . '_' . $chunks[$numChunks - 1]; 230 | } 231 | 232 | foreach ($languages as $check) { 233 | foreach ($otherIds as $otherId) { 234 | if ($check->id === $otherId && $check->formula === $language->formula && $categoriesHash === serialize($check->categories)) { 235 | $compatibleFound = true; 236 | break; 237 | } 238 | } 239 | if ($compatibleFound === true) { 240 | break; 241 | } 242 | } 243 | } 244 | if (!$compatibleFound) { 245 | $filtered[] = $language; 246 | } 247 | } 248 | $languages = $filtered; 249 | } 250 | 251 | return $languages; 252 | } 253 | } 254 | 255 | // Parse the command line options 256 | Enviro::initialize(); 257 | 258 | try { 259 | if (isset(Enviro::$languages)) { 260 | $languages = array(); 261 | foreach (Enviro::$languages as $languageId) { 262 | $language = Language::getById($languageId); 263 | if (!isset($language)) { 264 | throw new Exception("Unable to find the language with id '{$languageId}'"); 265 | } 266 | $languages[] = $language; 267 | } 268 | } else { 269 | $languages = Language::getAll(); 270 | } 271 | if (Enviro::$reduce) { 272 | $languages = Enviro::reduce($languages); 273 | } 274 | if (Enviro::$extraParenthesis === false) { 275 | $languages = array_map( 276 | function (Language $language) { 277 | $language->formula = $language->buildFormula(true); 278 | 279 | return $language; 280 | }, 281 | $languages 282 | ); 283 | } 284 | $exporterClass = Exporter::getExporterClassName(Enviro::$outputFormat); 285 | $options = array( 286 | 'us-ascii' => Enviro::$outputUSAscii, 287 | 'both-formulas' => Enviro::$extraParenthesis === null, 288 | ); 289 | if ($options['both-formulas'] && !call_user_func(array($exporterClass, 'supportsFormulasWithAndWithoutParenthesis'))) { 290 | throw new Exception("The selected exporter doesn't support exporting data with and without extra paranthesis"); 291 | } 292 | if (isset(Enviro::$outputFilename)) { 293 | echo call_user_func(array($exporterClass, 'toFile'), $languages, Enviro::$outputFilename, $options); 294 | } else { 295 | echo call_user_func(array($exporterClass, 'toString'), $languages, $options); 296 | } 297 | } catch (Exception $x) { 298 | fwrite(STDERR, $x->getMessage() . "\n"); 299 | fwrite(STDERR, "Trace:\n"); 300 | fwrite(STDERR, $x->getTraceAsString() . "\n"); 301 | exit(4); 302 | } 303 | 304 | exit(0); 305 | -------------------------------------------------------------------------------- /src/cldr-data/main/en-US/territories.json: -------------------------------------------------------------------------------- 1 | { 2 | "main": { 3 | "en-US": { 4 | "identity": { 5 | "version": { 6 | "_cldrVersion": "47" 7 | }, 8 | "language": "en", 9 | "territory": "US" 10 | }, 11 | "localeDisplayNames": { 12 | "territories": { 13 | "001": "world", 14 | "002": "Africa", 15 | "003": "North America", 16 | "005": "South America", 17 | "009": "Oceania", 18 | "011": "Western Africa", 19 | "013": "Central America", 20 | "014": "Eastern Africa", 21 | "015": "Northern Africa", 22 | "017": "Middle Africa", 23 | "018": "Southern Africa", 24 | "019": "Americas", 25 | "021": "Northern America", 26 | "029": "Caribbean", 27 | "030": "Eastern Asia", 28 | "034": "Southern Asia", 29 | "035": "Southeast Asia", 30 | "039": "Southern Europe", 31 | "053": "Australasia", 32 | "054": "Melanesia", 33 | "057": "Micronesian Region", 34 | "061": "Polynesia", 35 | "142": "Asia", 36 | "143": "Central Asia", 37 | "145": "Western Asia", 38 | "150": "Europe", 39 | "151": "Eastern Europe", 40 | "154": "Northern Europe", 41 | "155": "Western Europe", 42 | "202": "Sub-Saharan Africa", 43 | "419": "Latin America", 44 | "AC": "Ascension Island", 45 | "AD": "Andorra", 46 | "AE": "United Arab Emirates", 47 | "AF": "Afghanistan", 48 | "AG": "Antigua & Barbuda", 49 | "AI": "Anguilla", 50 | "AL": "Albania", 51 | "AM": "Armenia", 52 | "AO": "Angola", 53 | "AQ": "Antarctica", 54 | "AR": "Argentina", 55 | "AS": "American Samoa", 56 | "AT": "Austria", 57 | "AU": "Australia", 58 | "AW": "Aruba", 59 | "AX": "Åland Islands", 60 | "AZ": "Azerbaijan", 61 | "BA": "Bosnia & Herzegovina", 62 | "BA-alt-short": "Bosnia", 63 | "BB": "Barbados", 64 | "BD": "Bangladesh", 65 | "BE": "Belgium", 66 | "BF": "Burkina Faso", 67 | "BG": "Bulgaria", 68 | "BH": "Bahrain", 69 | "BI": "Burundi", 70 | "BJ": "Benin", 71 | "BL": "St. Barthélemy", 72 | "BM": "Bermuda", 73 | "BN": "Brunei", 74 | "BO": "Bolivia", 75 | "BQ": "Caribbean Netherlands", 76 | "BR": "Brazil", 77 | "BS": "Bahamas", 78 | "BT": "Bhutan", 79 | "BV": "Bouvet Island", 80 | "BW": "Botswana", 81 | "BY": "Belarus", 82 | "BZ": "Belize", 83 | "CA": "Canada", 84 | "CC": "Cocos (Keeling) Islands", 85 | "CC-alt-short": "Cocos Islands", 86 | "CD": "Congo - Kinshasa", 87 | "CD-alt-variant": "Congo (DRC)", 88 | "CF": "Central African Republic", 89 | "CG": "Congo - Brazzaville", 90 | "CG-alt-variant": "Congo (Republic)", 91 | "CH": "Switzerland", 92 | "CI": "Côte d’Ivoire", 93 | "CI-alt-variant": "Ivory Coast", 94 | "CK": "Cook Islands", 95 | "CL": "Chile", 96 | "CM": "Cameroon", 97 | "CN": "China", 98 | "CO": "Colombia", 99 | "CP": "Clipperton Island", 100 | "CQ": "Sark", 101 | "CR": "Costa Rica", 102 | "CU": "Cuba", 103 | "CV": "Cape Verde", 104 | "CV-alt-variant": "Cabo Verde", 105 | "CW": "Curaçao", 106 | "CX": "Christmas Island", 107 | "CY": "Cyprus", 108 | "CZ": "Czechia", 109 | "CZ-alt-variant": "Czech Republic", 110 | "DE": "Germany", 111 | "DG": "Diego Garcia", 112 | "DJ": "Djibouti", 113 | "DK": "Denmark", 114 | "DM": "Dominica", 115 | "DO": "Dominican Republic", 116 | "DZ": "Algeria", 117 | "EA": "Ceuta & Melilla", 118 | "EC": "Ecuador", 119 | "EE": "Estonia", 120 | "EG": "Egypt", 121 | "EH": "Western Sahara", 122 | "ER": "Eritrea", 123 | "ES": "Spain", 124 | "ET": "Ethiopia", 125 | "EU": "European Union", 126 | "EZ": "Eurozone", 127 | "FI": "Finland", 128 | "FJ": "Fiji", 129 | "FK": "Falkland Islands", 130 | "FK-alt-variant": "Falkland Islands (Islas Malvinas)", 131 | "FM": "Micronesia", 132 | "FO": "Faroe Islands", 133 | "FR": "France", 134 | "GA": "Gabon", 135 | "GB": "United Kingdom", 136 | "GB-alt-short": "UK", 137 | "GD": "Grenada", 138 | "GE": "Georgia", 139 | "GF": "French Guiana", 140 | "GG": "Guernsey", 141 | "GH": "Ghana", 142 | "GI": "Gibraltar", 143 | "GL": "Greenland", 144 | "GM": "Gambia", 145 | "GN": "Guinea", 146 | "GP": "Guadeloupe", 147 | "GQ": "Equatorial Guinea", 148 | "GR": "Greece", 149 | "GS": "South Georgia & South Sandwich Islands", 150 | "GT": "Guatemala", 151 | "GU": "Guam", 152 | "GW": "Guinea-Bissau", 153 | "GY": "Guyana", 154 | "HK": "Hong Kong SAR China", 155 | "HK-alt-short": "Hong Kong", 156 | "HM": "Heard & McDonald Islands", 157 | "HN": "Honduras", 158 | "HR": "Croatia", 159 | "HT": "Haiti", 160 | "HU": "Hungary", 161 | "IC": "Canary Islands", 162 | "ID": "Indonesia", 163 | "IE": "Ireland", 164 | "IL": "Israel", 165 | "IM": "Isle of Man", 166 | "IN": "India", 167 | "IO": "British Indian Ocean Territory", 168 | "IO-alt-biot": "British Indian Ocean Territory", 169 | "IO-alt-chagos": "Chagos Archipelago", 170 | "IQ": "Iraq", 171 | "IR": "Iran", 172 | "IS": "Iceland", 173 | "IT": "Italy", 174 | "JE": "Jersey", 175 | "JM": "Jamaica", 176 | "JO": "Jordan", 177 | "JP": "Japan", 178 | "KE": "Kenya", 179 | "KG": "Kyrgyzstan", 180 | "KH": "Cambodia", 181 | "KI": "Kiribati", 182 | "KM": "Comoros", 183 | "KN": "St. Kitts & Nevis", 184 | "KP": "North Korea", 185 | "KR": "South Korea", 186 | "KW": "Kuwait", 187 | "KY": "Cayman Islands", 188 | "KZ": "Kazakhstan", 189 | "LA": "Laos", 190 | "LB": "Lebanon", 191 | "LC": "St. Lucia", 192 | "LI": "Liechtenstein", 193 | "LK": "Sri Lanka", 194 | "LR": "Liberia", 195 | "LS": "Lesotho", 196 | "LT": "Lithuania", 197 | "LU": "Luxembourg", 198 | "LV": "Latvia", 199 | "LY": "Libya", 200 | "MA": "Morocco", 201 | "MC": "Monaco", 202 | "MD": "Moldova", 203 | "ME": "Montenegro", 204 | "MF": "St. Martin", 205 | "MG": "Madagascar", 206 | "MH": "Marshall Islands", 207 | "MK": "North Macedonia", 208 | "ML": "Mali", 209 | "MM": "Myanmar (Burma)", 210 | "MM-alt-short": "Myanmar", 211 | "MN": "Mongolia", 212 | "MO": "Macao SAR China", 213 | "MO-alt-short": "Macao", 214 | "MP": "Northern Mariana Islands", 215 | "MQ": "Martinique", 216 | "MR": "Mauritania", 217 | "MS": "Montserrat", 218 | "MT": "Malta", 219 | "MU": "Mauritius", 220 | "MV": "Maldives", 221 | "MW": "Malawi", 222 | "MX": "Mexico", 223 | "MY": "Malaysia", 224 | "MZ": "Mozambique", 225 | "NA": "Namibia", 226 | "NC": "New Caledonia", 227 | "NE": "Niger", 228 | "NF": "Norfolk Island", 229 | "NG": "Nigeria", 230 | "NI": "Nicaragua", 231 | "NL": "Netherlands", 232 | "NO": "Norway", 233 | "NP": "Nepal", 234 | "NR": "Nauru", 235 | "NU": "Niue", 236 | "NZ": "New Zealand", 237 | "NZ-alt-variant": "Aotearoa New Zealand", 238 | "OM": "Oman", 239 | "PA": "Panama", 240 | "PE": "Peru", 241 | "PF": "French Polynesia", 242 | "PG": "Papua New Guinea", 243 | "PH": "Philippines", 244 | "PK": "Pakistan", 245 | "PL": "Poland", 246 | "PM": "St. Pierre & Miquelon", 247 | "PN": "Pitcairn Islands", 248 | "PN-alt-short": "Pitcairn", 249 | "PR": "Puerto Rico", 250 | "PS": "Palestinian Territories", 251 | "PS-alt-short": "Palestine", 252 | "PT": "Portugal", 253 | "PW": "Palau", 254 | "PY": "Paraguay", 255 | "QA": "Qatar", 256 | "QO": "Outlying Oceania", 257 | "RE": "Réunion", 258 | "RO": "Romania", 259 | "RS": "Serbia", 260 | "RU": "Russia", 261 | "RW": "Rwanda", 262 | "SA": "Saudi Arabia", 263 | "SB": "Solomon Islands", 264 | "SC": "Seychelles", 265 | "SD": "Sudan", 266 | "SE": "Sweden", 267 | "SG": "Singapore", 268 | "SH": "St. Helena", 269 | "SI": "Slovenia", 270 | "SJ": "Svalbard & Jan Mayen", 271 | "SK": "Slovakia", 272 | "SL": "Sierra Leone", 273 | "SM": "San Marino", 274 | "SN": "Senegal", 275 | "SO": "Somalia", 276 | "SR": "Suriname", 277 | "SS": "South Sudan", 278 | "ST": "São Tomé & Príncipe", 279 | "SV": "El Salvador", 280 | "SX": "Sint Maarten", 281 | "SY": "Syria", 282 | "SZ": "Eswatini", 283 | "SZ-alt-variant": "Swaziland", 284 | "TA": "Tristan da Cunha", 285 | "TC": "Turks & Caicos Islands", 286 | "TD": "Chad", 287 | "TF": "French Southern Territories", 288 | "TG": "Togo", 289 | "TH": "Thailand", 290 | "TJ": "Tajikistan", 291 | "TK": "Tokelau", 292 | "TL": "Timor-Leste", 293 | "TL-alt-variant": "East Timor", 294 | "TM": "Turkmenistan", 295 | "TN": "Tunisia", 296 | "TO": "Tonga", 297 | "TR": "Türkiye", 298 | "TR-alt-variant": "Turkey", 299 | "TT": "Trinidad & Tobago", 300 | "TV": "Tuvalu", 301 | "TW": "Taiwan", 302 | "TZ": "Tanzania", 303 | "UA": "Ukraine", 304 | "UG": "Uganda", 305 | "UM": "U.S. Outlying Islands", 306 | "UN": "United Nations", 307 | "UN-alt-short": "UN", 308 | "US": "United States", 309 | "US-alt-short": "US", 310 | "UY": "Uruguay", 311 | "UZ": "Uzbekistan", 312 | "VA": "Vatican City", 313 | "VC": "St. Vincent & Grenadines", 314 | "VE": "Venezuela", 315 | "VG": "British Virgin Islands", 316 | "VI": "U.S. Virgin Islands", 317 | "VN": "Vietnam", 318 | "VU": "Vanuatu", 319 | "WF": "Wallis & Futuna", 320 | "WS": "Samoa", 321 | "XA": "Pseudo-Accents", 322 | "XB": "Pseudo-Bidi", 323 | "XK": "Kosovo", 324 | "YE": "Yemen", 325 | "YT": "Mayotte", 326 | "ZA": "South Africa", 327 | "ZM": "Zambia", 328 | "ZW": "Zimbabwe", 329 | "ZZ": "Unknown Region" 330 | } 331 | } 332 | } 333 | } 334 | } -------------------------------------------------------------------------------- /src/CldrData.php: -------------------------------------------------------------------------------- 1 |
     83 |      * "en": {
     84 |      *     "pluralRule-count-one": "i = 1 and v = 0 @integer 1",
     85 |      *     "pluralRule-count-other": " @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …"
     86 |      * }
     87 |      * 
    88 | * 89 | * @return array 90 | */ 91 | public static function getPlurals() 92 | { 93 | return self::getData('plurals'); 94 | } 95 | 96 | /** 97 | * Return a list of superseded language codes. 98 | * 99 | * @return array keys are the former language codes, values are the new language/locale codes 100 | */ 101 | public static function getSupersededLanguages() 102 | { 103 | return self::getData('supersededLanguages'); 104 | } 105 | 106 | /** 107 | * Retrieve the name of a language, as well as if a language code is deprecated in favor of another language code. 108 | * 109 | * @param string $id the language identifier 110 | * 111 | * @return array|null Returns an array with the keys 'id' (normalized), 'name', 'supersededBy' (optional), 'territory' (optional), 'script' (optional), 'baseLanguage' (optional), 'categories'. If $id is not valid returns null. 112 | */ 113 | public static function getLanguageInfo($id) 114 | { 115 | $result = null; 116 | $matches = array(); 117 | if (preg_match('/^([a-z]{2,3})(?:[_\-]([a-z]{4}))?(?:[_\-]([a-z]{2}|[0-9]{3}))?(?:$|-)/i', $id, $matches)) { 118 | $languageId = strtolower($matches[1]); 119 | $scriptId = (isset($matches[2]) && ($matches[2] !== '')) ? ucfirst(strtolower($matches[2])) : null; 120 | $territoryId = (isset($matches[3]) && ($matches[3] !== '')) ? strtoupper($matches[3]) : null; 121 | $normalizedId = $languageId; 122 | if (isset($scriptId)) { 123 | $normalizedId .= '_' . $scriptId; 124 | } 125 | if (isset($territoryId)) { 126 | $normalizedId .= '_' . $territoryId; 127 | } 128 | // Structure precedence: see Likely Subtags - http://www.unicode.org/reports/tr35/tr35-31/tr35.html#Likely_Subtags 129 | $variants = array(); 130 | $variantsWithScript = array(); 131 | $variantsWithTerritory = array(); 132 | if (isset($scriptId) && isset($territoryId)) { 133 | $variantsWithTerritory[] = $variantsWithScript[] = $variants[] = "{$languageId}_{$scriptId}_{$territoryId}"; 134 | } 135 | if (isset($scriptId)) { 136 | $variantsWithScript[] = $variants[] = "{$languageId}_{$scriptId}"; 137 | } 138 | if (isset($territoryId)) { 139 | $variantsWithTerritory[] = $variants[] = "{$languageId}_{$territoryId}"; 140 | } 141 | $variants[] = $languageId; 142 | $allGood = true; 143 | $scriptName = null; 144 | $scriptStandAloneName = null; 145 | if (isset($scriptId)) { 146 | $scriptNames = self::getScriptNames(false); 147 | if (isset($scriptNames[$scriptId])) { 148 | $scriptName = $scriptNames[$scriptId]; 149 | $scriptStandAloneNames = self::getScriptNames(true); 150 | $scriptStandAloneName = $scriptStandAloneNames[$scriptId]; 151 | } else { 152 | $allGood = false; 153 | } 154 | } 155 | $territoryName = null; 156 | if (isset($territoryId)) { 157 | $territoryNames = self::getTerritoryNames(); 158 | if (isset($territoryNames[$territoryId])) { 159 | if ($territoryId !== '001') { 160 | $territoryName = $territoryNames[$territoryId]; 161 | } 162 | } else { 163 | $allGood = false; 164 | } 165 | } 166 | $languageName = null; 167 | $languageNames = self::getLanguageNames(); 168 | foreach ($variants as $variant) { 169 | if (isset($languageNames[$variant])) { 170 | $languageName = $languageNames[$variant]; 171 | if (isset($scriptName) && (!in_array($variant, $variantsWithScript))) { 172 | $languageName = $scriptName . ' ' . $languageName; 173 | } 174 | if (isset($territoryName) && (!in_array($variant, $variantsWithTerritory))) { 175 | $languageName .= ' (' . $territoryNames[$territoryId] . ')'; 176 | } 177 | break; 178 | } 179 | } 180 | if (!isset($languageName)) { 181 | $allGood = false; 182 | } 183 | $baseLanguage = null; 184 | if (isset($scriptId) || isset($territoryId)) { 185 | if (isset($languageNames[$languageId]) && ($languageNames[$languageId] !== $languageName)) { 186 | $baseLanguage = $languageNames[$languageId]; 187 | } 188 | } 189 | $plural = null; 190 | $plurals = self::getPlurals(); 191 | foreach ($variants as $variant) { 192 | if (isset($plurals[$variant])) { 193 | $plural = $plurals[$variant]; 194 | break; 195 | } 196 | } 197 | if (!isset($plural)) { 198 | $allGood = false; 199 | } 200 | $supersededBy = null; 201 | $supersededBys = self::getSupersededLanguages(); 202 | foreach ($variants as $variant) { 203 | if (isset($supersededBys[$variant])) { 204 | $supersededBy = $supersededBys[$variant]; 205 | break; 206 | } 207 | } 208 | if ($allGood) { 209 | $result = array(); 210 | $result['id'] = $normalizedId; 211 | $result['name'] = $languageName; 212 | if (isset($supersededBy)) { 213 | $result['supersededBy'] = $supersededBy; 214 | } 215 | if (isset($scriptStandAloneName)) { 216 | $result['script'] = $scriptStandAloneName; 217 | } 218 | if (isset($territoryName)) { 219 | $result['territory'] = $territoryName; 220 | } 221 | if (isset($baseLanguage)) { 222 | $result['baseLanguage'] = $baseLanguage; 223 | } 224 | $result['categories'] = $plural; 225 | } 226 | } 227 | 228 | return $result; 229 | } 230 | 231 | /** 232 | * Returns the loaded CLDR data. 233 | * 234 | * @param string $key Can be 'languages', 'territories', 'plurals', 'supersededLanguages', 'scripts', 'standAloneScripts' 235 | * 236 | * @return array 237 | */ 238 | private static function getData($key) 239 | { 240 | if (!isset(self::$data)) { 241 | $fixKeys = function ($list, &$standAlone = null) { 242 | $result = array(); 243 | $standAlone = array(); 244 | $match = null; 245 | foreach ($list as $key => $value) { 246 | $variant = ''; 247 | if (preg_match('/^(.+)-alt-(short|variant|stand-alone|long|menu)$/', $key, $match)) { 248 | $key = $match[1]; 249 | $variant = $match[2]; 250 | } 251 | $key = str_replace('-', '_', $key); 252 | switch ($key) { 253 | case 'root': // Language: Root 254 | case 'und': // Language: Unknown Language 255 | case 'zxx': // Language: No linguistic content 256 | case 'ZZ': // Territory: Unknown Region 257 | case 'Zinh': // Script: Inherited 258 | case 'Zmth': // Script: Mathematical Notation 259 | case 'Zsym': // Script: Symbols 260 | case 'Zxxx': // Script: Unwritten 261 | case 'Zyyy': // Script: Common 262 | case 'Zzzz': // Script: Unknown Script 263 | break; 264 | default: 265 | switch ($variant) { 266 | case 'stand-alone': 267 | $standAlone[$key] = $value; 268 | break; 269 | case '': 270 | $result[$key] = $value; 271 | break; 272 | } 273 | break; 274 | } 275 | } 276 | 277 | return $result; 278 | }; 279 | $data = array(); 280 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/languages.json'), true); 281 | $data['languages'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['languages']); 282 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/territories.json'), true); 283 | $data['territories'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['territories']); 284 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/supplemental/plurals.json'), true); 285 | $data['plurals'] = $fixKeys($json['supplemental']['plurals-type-cardinal']); 286 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/scripts.json'), true); 287 | $data['scripts'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['scripts'], $data['standAloneScripts']); 288 | $data['standAloneScripts'] = array_merge($data['scripts'], $data['standAloneScripts']); 289 | $data['scripts'] = array_merge($data['standAloneScripts'], $data['scripts']); 290 | $data['supersededLanguages'] = array(); 291 | // Remove the languages for which we don't have plurals 292 | $m = null; 293 | foreach (array_keys(array_diff_key($data['languages'], $data['plurals'])) as $missingPlural) { 294 | if (preg_match('/^([a-z]{2,3})_/', $missingPlural, $m)) { 295 | if (!isset($data['plurals'][$m[1]])) { 296 | unset($data['languages'][$missingPlural]); 297 | } 298 | } else { 299 | unset($data['languages'][$missingPlural]); 300 | } 301 | } 302 | // Fix the languages for which we have plurals 303 | $formerCodes = array( 304 | 'jw' => 'jv', // former Javanese 305 | 'mo' => 'ro_MD', // former Moldavian 306 | ); 307 | $knownMissingLanguages = array( 308 | 'guw' => 'Gun', 309 | 'hnj' => 'Hmong Njua', 310 | 'lld' => 'Dolomitic Ladin', 311 | 'nah' => 'Nahuatl', 312 | 'smi' => 'Sami', 313 | ); 314 | foreach (array_keys(array_diff_key($data['plurals'], $data['languages'])) as $missingLanguage) { 315 | if (isset($formerCodes[$missingLanguage]) && isset($data['languages'][$formerCodes[$missingLanguage]])) { 316 | $data['languages'][$missingLanguage] = $data['languages'][$formerCodes[$missingLanguage]]; 317 | $data['supersededLanguages'][$missingLanguage] = $formerCodes[$missingLanguage]; 318 | } else { 319 | if (isset($knownMissingLanguages[$missingLanguage])) { 320 | $data['languages'][$missingLanguage] = $knownMissingLanguages[$missingLanguage]; 321 | } else { 322 | throw new Exception("We have the plural rule for the language '{$missingLanguage}' but we don't have its language name"); 323 | } 324 | } 325 | } 326 | ksort($data['languages'], SORT_STRING); 327 | ksort($data['territories'], SORT_STRING); 328 | ksort($data['plurals'], SORT_STRING); 329 | ksort($data['scripts'], SORT_STRING); 330 | ksort($data['standAloneScripts'], SORT_STRING); 331 | ksort($data['supersededLanguages'], SORT_STRING); 332 | self::$data = $data; 333 | } 334 | if (!isset(self::$data[$key])) { 335 | throw new Exception("Invalid CLDR data key: '{$key}'"); 336 | } 337 | 338 | return self::$data[$key]; 339 | } 340 | } 341 | -------------------------------------------------------------------------------- /src/Language.php: -------------------------------------------------------------------------------- 1 | id = $info['id']; 78 | $this->name = $info['name']; 79 | $this->supersededBy = isset($info['supersededBy']) ? $info['supersededBy'] : null; 80 | $this->script = isset($info['script']) ? $info['script'] : null; 81 | $this->territory = isset($info['territory']) ? $info['territory'] : null; 82 | $this->baseLanguage = isset($info['baseLanguage']) ? $info['baseLanguage'] : null; 83 | // Let's build the category list 84 | $this->categories = array(); 85 | foreach ($info['categories'] as $cldrCategoryId => $cldrFormulaAndExamples) { 86 | $category = new Category($cldrCategoryId, $cldrFormulaAndExamples); 87 | foreach ($this->categories as $c) { 88 | if ($category->id === $c->id) { 89 | throw new Exception("The category '{$category->id}' is specified more than once"); 90 | } 91 | } 92 | $this->categories[] = $category; 93 | } 94 | if (empty($this->categories)) { 95 | throw new Exception("The language '{$info['id']}' does not have any plural category"); 96 | } 97 | // Let's sort the categories from 'zero' to 'other' 98 | usort($this->categories, function (Category $category1, Category $category2) { 99 | return array_search($category1->id, CldrData::$categories) - array_search($category2->id, CldrData::$categories); 100 | }); 101 | // The 'other' category should always be there 102 | if ($this->categories[count($this->categories) - 1]->id !== CldrData::OTHER_CATEGORY) { 103 | throw new Exception("The language '{$info['id']}' does not have the '" . CldrData::OTHER_CATEGORY . "' plural category"); 104 | } 105 | $this->checkAlwaysTrueCategories(); 106 | $this->checkAlwaysFalseCategories(); 107 | $this->checkAllCategoriesWithExamples(); 108 | $this->formula = $this->buildFormula(); 109 | } 110 | 111 | /** 112 | * Return a list of all languages available. 113 | * 114 | * @throws \Exception 115 | * 116 | * @return \Gettext\Languages\Language[] 117 | */ 118 | public static function getAll() 119 | { 120 | $result = array(); 121 | foreach (array_keys(CldrData::getLanguageNames()) as $cldrLanguageId) { 122 | $result[] = new self(CldrData::getLanguageInfo($cldrLanguageId)); 123 | } 124 | 125 | return $result; 126 | } 127 | 128 | /** 129 | * Return a Language instance given the language id. 130 | * 131 | * @param string $id 132 | * 133 | * @return \Gettext\Languages\Language|null 134 | */ 135 | public static function getById($id) 136 | { 137 | $result = null; 138 | $info = CldrData::getLanguageInfo($id); 139 | if (isset($info)) { 140 | $result = new self($info); 141 | } 142 | 143 | return $result; 144 | } 145 | 146 | /** 147 | * Returns a clone of this instance with all the strings to US-ASCII. 148 | * 149 | * @return \Gettext\Languages\Language 150 | */ 151 | public function getUSAsciiClone() 152 | { 153 | $clone = clone $this; 154 | self::asciifier($clone->name); 155 | self::asciifier($clone->formula); 156 | $clone->categories = array(); 157 | foreach ($this->categories as $category) { 158 | $categoryClone = clone $category; 159 | self::asciifier($categoryClone->examples); 160 | $clone->categories[] = $categoryClone; 161 | } 162 | 163 | return $clone; 164 | } 165 | 166 | /** 167 | * Build the formula starting from the currently defined categories. 168 | * 169 | * @param bool $withoutParenthesis TRUE to build a formula in standard gettext format, FALSE (default) to build a PHP-compatible formula 170 | * 171 | * @return string 172 | */ 173 | public function buildFormula($withoutParenthesis = false) 174 | { 175 | $numCategories = count($this->categories); 176 | switch ($numCategories) { 177 | case 1: 178 | // Just one category 179 | return '0'; 180 | case 2: 181 | return self::reduceFormula(self::reverseFormula($this->categories[0]->formula)); 182 | default: 183 | $formula = (string) ($numCategories - 1); 184 | for ($i = $numCategories - 2; $i >= 0; $i--) { 185 | $f = self::reduceFormula($this->categories[$i]->formula); 186 | if (!$withoutParenthesis && !preg_match('/^\([^()]+\)$/', $f)) { 187 | $f = "({$f})"; 188 | } 189 | $formula = "{$f} ? {$i} : {$formula}"; 190 | if (!$withoutParenthesis && $i > 0) { 191 | $formula = "({$formula})"; 192 | } 193 | } 194 | 195 | return $formula; 196 | } 197 | } 198 | 199 | /** 200 | * Let's look for categories that will always occur. 201 | * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have just one case. 202 | * If we found that (single) category we reduce the categories to that one only. 203 | * 204 | * @throws \Exception 205 | */ 206 | private function checkAlwaysTrueCategories() 207 | { 208 | $alwaysTrueCategory = null; 209 | foreach ($this->categories as $category) { 210 | if ($category->formula === true) { 211 | if (!isset($category->examples)) { 212 | throw new Exception("The category '{$category->id}' should always occur, but it does not have examples (so for CLDR it will never occur for integers!)"); 213 | } 214 | $alwaysTrueCategory = $category; 215 | break; 216 | } 217 | } 218 | if (isset($alwaysTrueCategory)) { 219 | foreach ($this->categories as $category) { 220 | if (($category !== $alwaysTrueCategory) && isset($category->examples)) { 221 | throw new Exception("The category '{$category->id}' should never occur, but it has some examples (so for CLDR it will occur!)"); 222 | } 223 | } 224 | $alwaysTrueCategory->id = CldrData::OTHER_CATEGORY; 225 | $alwaysTrueCategory->formula = null; 226 | $this->categories = array($alwaysTrueCategory); 227 | } 228 | } 229 | 230 | /** 231 | * Let's look for categories that will never occur. 232 | * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have some less cases. 233 | * If we found those categories we strip them out. 234 | * 235 | * @throws \Exception 236 | */ 237 | private function checkAlwaysFalseCategories() 238 | { 239 | $filtered = array(); 240 | foreach ($this->categories as $category) { 241 | if ($category->formula === false) { 242 | if (isset($category->examples)) { 243 | throw new Exception("The category '{$category->id}' should never occur, but it has examples (so for CLDR it may occur!)"); 244 | } 245 | } else { 246 | $filtered[] = $category; 247 | } 248 | } 249 | $this->categories = $filtered; 250 | } 251 | 252 | /** 253 | * Let's look for categories that don't have examples. 254 | * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have some less cases. 255 | * If we found those categories, we check that they never occur and we strip them out. 256 | * 257 | * @throws \Exception 258 | */ 259 | private function checkAllCategoriesWithExamples() 260 | { 261 | $allCategoriesIds = array(); 262 | $goodCategories = array(); 263 | $badCategories = array(); 264 | $badCategoriesIds = array(); 265 | foreach ($this->categories as $category) { 266 | $allCategoriesIds[] = $category->id; 267 | if (isset($category->examples)) { 268 | $goodCategories[] = $category; 269 | } else { 270 | $badCategories[] = $category; 271 | $badCategoriesIds[] = $category->id; 272 | } 273 | } 274 | if (empty($badCategories)) { 275 | return; 276 | } 277 | $removeCategoriesWithoutExamples = false; 278 | switch (implode(',', $badCategoriesIds) . '@' . implode(',', $allCategoriesIds)) { 279 | case CldrData::OTHER_CATEGORY . '@one,few,many,' . CldrData::OTHER_CATEGORY: 280 | switch ($this->buildFormula()) { 281 | case '(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : ((n % 10 == 0 || n % 10 >= 5 && n % 10 <= 9 || n % 100 >= 11 && n % 100 <= 14) ? 2 : 3))': 282 | // Numbers ending with 0 => case 2 ('many') 283 | // Numbers ending with 1 but not with 11 => case 0 ('one') 284 | // Numbers ending with 11 => case 2 ('many') 285 | // Numbers ending with 2 but not with 12 => case 1 ('few') 286 | // Numbers ending with 12 => case 2 ('many') 287 | // Numbers ending with 3 but not with 13 => case 1 ('few') 288 | // Numbers ending with 13 => case 2 ('many') 289 | // Numbers ending with 4 but not with 14 => case 1 ('few') 290 | // Numbers ending with 14 => case 2 ('many') 291 | // Numbers ending with 5 => case 2 ('many') 292 | // Numbers ending with 6 => case 2 ('many') 293 | // Numbers ending with 7 => case 2 ('many') 294 | // Numbers ending with 8 => case 2 ('many') 295 | // Numbers ending with 9 => case 2 ('many') 296 | // => the 'other' case never occurs: use 'other' for 'many' 297 | $removeCategoriesWithoutExamples = true; 298 | break; 299 | case '(n == 1) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : ((n != 1 && (n % 10 == 0 || n % 10 == 1) || n % 10 >= 5 && n % 10 <= 9 || n % 100 >= 12 && n % 100 <= 14) ? 2 : 3))': 300 | // Numbers ending with 0 => case 2 ('many') 301 | // Numbers ending with 1 but not number 1 => case 2 ('many') 302 | // Number 1 => case 0 ('one') 303 | // Numbers ending with 2 but not with 12 => case 1 ('few') 304 | // Numbers ending with 12 => case 2 ('many') 305 | // Numbers ending with 3 but not with 13 => case 1 ('few') 306 | // Numbers ending with 13 => case 2 ('many') 307 | // Numbers ending with 4 but not with 14 => case 1 ('few') 308 | // Numbers ending with 14 => case 2 ('many') 309 | // Numbers ending with 5 => case 2 ('many') 310 | // Numbers ending with 6 => case 2 ('many') 311 | // Numbers ending with 7 => case 2 ('many') 312 | // Numbers ending with 8 => case 2 ('many') 313 | // Numbers ending with 9 => case 2 ('many') 314 | // => the 'other' case never occurs: use 'other' for 'many' 315 | $removeCategoriesWithoutExamples = true; 316 | break; 317 | } 318 | } 319 | if (!$removeCategoriesWithoutExamples) { 320 | throw new Exception("Unhandled case of plural categories without examples '" . implode(', ', $badCategoriesIds) . "' out of '" . implode(', ', $allCategoriesIds) . "'"); 321 | } 322 | if ($badCategories[count($badCategories) - 1]->id === CldrData::OTHER_CATEGORY) { 323 | // We're removing the 'other' cagory: let's change the last good category to 'other' 324 | $lastGood = $goodCategories[count($goodCategories) - 1]; 325 | $lastGood->id = CldrData::OTHER_CATEGORY; 326 | $lastGood->formula = null; 327 | } 328 | $this->categories = $goodCategories; 329 | } 330 | 331 | /** 332 | * Reverse a formula. 333 | * 334 | * @param string $formula 335 | * 336 | * @throws \Exception 337 | * 338 | * @return string 339 | */ 340 | private static function reverseFormula($formula) 341 | { 342 | if (preg_match('/^n( % \d+)? == \d+(\.\.\d+|,\d+)*?$/', $formula)) { 343 | return str_replace(' == ', ' != ', $formula); 344 | } 345 | if (preg_match('/^n( % \d+)? != \d+(\.\.\d+|,\d+)*?$/', $formula)) { 346 | return str_replace(' != ', ' == ', $formula); 347 | } 348 | if (preg_match('/^\(?n == \d+ \|\| n == \d+\)?$/', $formula)) { 349 | return trim(str_replace(array(' == ', ' || '), array(' != ', ' && '), $formula), '()'); 350 | } 351 | $m = null; 352 | if (preg_match('/^(n(?: % \d+)?) == (\d+) && (n(?: % \d+)?) != (\d+)$/', $formula, $m)) { 353 | return "{$m[1]} != {$m[2]} || {$m[3]} == {$m[4]}"; 354 | } 355 | switch ($formula) { 356 | case '(n == 1 || n == 2 || n == 3) || n % 10 != 4 && n % 10 != 6 && n % 10 != 9': 357 | return 'n != 1 && n != 2 && n != 3 && (n % 10 == 4 || n % 10 == 6 || n % 10 == 9)'; 358 | case '(n == 0 || n == 1) || n >= 11 && n <= 99': 359 | return 'n >= 2 && (n < 11 || n > 99)'; 360 | } 361 | throw new Exception("Unable to reverse the formula '{$formula}'"); 362 | } 363 | 364 | /** 365 | * Reduce some excessively complex formulas. 366 | * 367 | * @param string $formula 368 | * 369 | * @return string 370 | */ 371 | private static function reduceFormula($formula) 372 | { 373 | $map = array( 374 | 'n != 0 && n != 1' => 'n > 1', 375 | '(n == 0 || n == 1) && n != 0' => 'n == 1', 376 | ); 377 | 378 | return isset($map[$formula]) ? $map[$formula] : $formula; 379 | } 380 | 381 | /** 382 | * Take one variable and, if it's a string, we transliterate it to US-ASCII. 383 | * 384 | * @param mixed $value the variable to work on 385 | * 386 | * @throws \Exception 387 | */ 388 | private static function asciifier(&$value) 389 | { 390 | if (is_string($value) && $value !== '') { 391 | // Avoid converting from 'Ÿ' to '"Y', let's prefer 'Y' 392 | $value = strtr($value, array( 393 | 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A', 394 | 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E', 395 | 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I', 396 | 'Ñ' => 'N', 397 | 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O', 398 | 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U', 399 | 'Ÿ' => 'Y', 'Ý' => 'Y', 400 | 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a', 401 | 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e', 402 | 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i', 403 | 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o', 404 | 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u', 405 | 'ý' => 'y', 'ÿ' => 'y', 406 | '…' => '...', 407 | 'ʼ' => "'", '’' => "'", 408 | )); 409 | } 410 | } 411 | } 412 | -------------------------------------------------------------------------------- /bin/import-cldr-data: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env php 2 | outputDir) && !mkdir($options->outputDir, 0777, true)) { 28 | throw new RuntimeException("Cannot create output directory: {$options->outputDir}\n"); 29 | } 30 | $options->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', realpath($options->outputDir)); 31 | $documentStorage = new DocumentStorage($options); 32 | echo 'Processing languages... '; 33 | $languages = new Languages($options, $documentStorage); 34 | echo "done.\n"; 35 | echo 'Processing scripts... '; 36 | $scripts = new Scripts($options, $documentStorage); 37 | echo "done.\n"; 38 | echo 'Processing territories... '; 39 | $territories = new Territories($options, $documentStorage); 40 | echo "done.\n"; 41 | echo 'Processing plural rules... '; 42 | $plurals = new Plurals($options, $documentStorage, $languages); 43 | echo "done.\n"; 44 | echo 'Saving... '; 45 | 46 | $languages->save(); 47 | $scripts->save(); 48 | $territories->save(); 49 | $plurals->save(); 50 | 51 | echo "done.\n"; 52 | } 53 | 54 | class Options 55 | { 56 | /** 57 | * @var string 58 | */ 59 | public $cldrVersion; 60 | 61 | /** 62 | * @var string 63 | */ 64 | public $outputDir; 65 | 66 | public function __construct(array $argv) 67 | { 68 | if (array_intersect($argv, array('-h', '--help'))) { 69 | $this->showSyntax($argv[0], 0); 70 | } 71 | $this->outputDir = $this->getDefaultOutputDir(); 72 | switch (count($argv)) { 73 | case 3: 74 | $this->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', $argv[2]); 75 | // no break 76 | case 2: 77 | $this->cldrVersion = $argv[1]; 78 | if (!preg_match('/^\d+(\.\d+)?(-(alpha|beta)\d+)?$/', $this->cldrVersion)) { 79 | throw new RuntimeException("{$this->cldrVersion} is not a valid CLDR version identifier"); 80 | } 81 | break; 82 | default: 83 | $this->showSyntax($argv[0], 1); 84 | } 85 | } 86 | 87 | /** 88 | * @param string $programName 89 | * @param int $exitCode 90 | * 91 | * @return never 92 | */ 93 | private function showSyntax($programName, $exitCode) 94 | { 95 | $programName = str_replace('/', DIRECTORY_SEPARATOR, $programName); 96 | $defaultOutputDir = str_replace('/', DIRECTORY_SEPARATOR, $this->getDefaultOutputDir()); 97 | 98 | echo << [output-dir] 100 | 101 | Arguments: 102 | cldr-version: the version of the CLDR data. 103 | Examples: 104 | 47 105 | 47-beta2 106 | 47-alpha1 107 | 46.1 108 | 46.1-beta1 109 | output-dir: the directory where the data will be written to 110 | Default: {$defaultOutputDir} 111 | 112 | EOT; 113 | exit($exitCode); 114 | } 115 | 116 | /** 117 | * @return string 118 | */ 119 | private function getDefaultOutputDir() 120 | { 121 | return str_replace(DIRECTORY_SEPARATOR, '/', dirname(__DIR__)) . '/src/cldr-data'; 122 | } 123 | } 124 | 125 | class DocumentStorage 126 | { 127 | /** 128 | * @var string 129 | */ 130 | private $baseUrl; 131 | 132 | private $context; 133 | 134 | private $cache; 135 | 136 | public function __construct(Options $options) 137 | { 138 | $this->baseUrl = 'https://raw.githubusercontent.com/unicode-org/cldr/refs/tags/release-' . str_replace('.', '-', $options->cldrVersion); 139 | $this->context = stream_context_create(array( 140 | 'http' => array( 141 | 'follow_location' => 1, 142 | 'ignore_errors' => false, 143 | ), 144 | )); 145 | $this->cache = array(); 146 | } 147 | 148 | /** 149 | * @param string $path 150 | * 151 | * @throws RuntimeException 152 | * 153 | * @return DOMDocument 154 | */ 155 | public function get($path) 156 | { 157 | if (!isset($this->cache[$path])) { 158 | $xml = $this->fetch($path); 159 | $doc = $this->loadXml($xml); 160 | $this->cache[$path] = $doc; 161 | } 162 | 163 | return $this->cache[$path]; 164 | } 165 | 166 | /** 167 | * @param string $path 168 | * 169 | * @throws RuntimeException 170 | * 171 | * @return string 172 | */ 173 | private function fetch($path) 174 | { 175 | $url = $this->baseUrl . '/' . ltrim($path, '/'); 176 | set_error_handler(function () {}, -1); 177 | $content = file_get_contents($url, false, $this->context); 178 | restore_error_handler(); 179 | if ($content === false) { 180 | $details = ''; 181 | /** @var array $http_response_header */ 182 | if (!empty($http_response_header)) { 183 | $details = " - {$http_response_header[0]}"; 184 | } 185 | throw new RuntimeException("Failed to download from {$url}{$details}"); 186 | } 187 | 188 | return $content; 189 | } 190 | 191 | /** 192 | * @param string $xml 193 | * 194 | * @throws RuntimeException 195 | * 196 | * @return DOMDocument 197 | */ 198 | private function loadXml($xml) 199 | { 200 | $doc = new DOMDocument(); 201 | libxml_clear_errors(); 202 | $restore = libxml_use_internal_errors(true); 203 | $loaded = $doc->loadXML($xml); 204 | $errors = libxml_get_errors(); 205 | libxml_use_internal_errors($restore); 206 | $lines = array(); 207 | foreach ($errors as $error) { 208 | $lines[] = "{$error->message} at line {$error->line}"; 209 | } 210 | if (!$loaded || $errors !== array()) { 211 | throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines)); 212 | } 213 | 214 | return $doc; 215 | } 216 | } 217 | 218 | abstract class Processor 219 | { 220 | /** 221 | * @var Options 222 | */ 223 | protected $options; 224 | 225 | /** 226 | * @var array 227 | */ 228 | protected $data; 229 | 230 | /** 231 | * @var DocumentStorage 232 | */ 233 | private $documentStorage; 234 | 235 | /** 236 | * @var string 237 | */ 238 | private $path; 239 | 240 | protected function __construct(Options $options, DocumentStorage $documentStorage, $path) 241 | { 242 | $this->options = $options; 243 | $this->documentStorage = $documentStorage; 244 | $this->path = ltrim($path, '/'); 245 | $doc = $this->documentStorage->get($this->path); 246 | $this->data = $this->parse($doc); 247 | } 248 | 249 | /** 250 | * @return void 251 | */ 252 | public function save() 253 | { 254 | $file = $this->getOutputFile(); 255 | $dir = dirname($file); 256 | if (!is_dir($dir) && !mkdir($dir, 0777, true)) { 257 | throw new RuntimeException("Cannot create directory: {$dir}"); 258 | } 259 | $flags = 0; 260 | if (defined('JSON_UNESCAPED_SLASHES')) { 261 | $flags |= JSON_UNESCAPED_SLASHES; 262 | } 263 | if (defined('JSON_UNESCAPED_UNICODE')) { 264 | $flags |= JSON_UNESCAPED_UNICODE; 265 | } 266 | if (defined('JSON_PRETTY_PRINT')) { 267 | $flags |= JSON_PRETTY_PRINT; 268 | } 269 | if (defined('JSON_THROW_ON_ERROR')) { 270 | $flags |= JSON_THROW_ON_ERROR; 271 | } 272 | $json = json_encode($this->data, $flags); 273 | if (!file_put_contents($file, $json)) { 274 | throw new RuntimeException("Failed to write to file: {$file}"); 275 | } 276 | } 277 | 278 | /** 279 | * @return array 280 | */ 281 | abstract protected function parse(DOMDocument $doc); 282 | 283 | /** 284 | * @return void 285 | */ 286 | protected function sortByKeyWithPossiblyAlt(array &$data) 287 | { 288 | uksort($data, function ($a, $b) { 289 | $aAlt = strpos($a, '-alt-') !== false; 290 | $bAlt = strpos($b, '-alt-') !== false; 291 | if ($aAlt !== $bAlt) { 292 | if (strpos("{$a}-alt-", $b) === 0) { 293 | return 0; 294 | } 295 | if (strpos($a, "{$b}-alt-") === 0) { 296 | return -1; 297 | } 298 | } 299 | 300 | return strcasecmp($a, $b); 301 | }); 302 | } 303 | 304 | /** 305 | * @return string 306 | */ 307 | abstract protected function getOutputRelativeFileName(); 308 | 309 | /** 310 | * @param string $xml 311 | * 312 | * @return DOMDocument 313 | */ 314 | private function loadXml($xml) 315 | { 316 | $doc = new DOMDocument(); 317 | libxml_clear_errors(); 318 | $restore = libxml_use_internal_errors(true); 319 | $loaded = $doc->loadXML($xml); 320 | $errors = libxml_get_errors(); 321 | libxml_use_internal_errors($restore); 322 | $lines = array(); 323 | foreach ($errors as $error) { 324 | $lines[] = "{$error->message} at line {$error->line}"; 325 | } 326 | if (!$loaded || $errors !== array()) { 327 | throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines)); 328 | } 329 | return $doc; 330 | } 331 | 332 | /** 333 | * @return string 334 | */ 335 | private function getOutputFile() 336 | { 337 | return $this->options->outputDir . '/' . ltrim($this->getOutputRelativeFileName(), '/'); 338 | } 339 | } 340 | 341 | class Plurals extends Processor 342 | { 343 | /** 344 | * @var Languages 345 | */ 346 | private $languages; 347 | 348 | public function __construct(Options $options, DocumentStorage $documentStorage, Languages $languages) 349 | { 350 | $this->languages = $languages; 351 | parent::__construct($options, $documentStorage, 'common/supplemental/plurals.xml'); 352 | } 353 | 354 | /** 355 | * {@inheritdoc} 356 | * 357 | * @see Processor::parse() 358 | */ 359 | protected function parse(DOMDocument $doc) 360 | { 361 | $data = array(); 362 | $xpath = new DOMXPath($doc); 363 | $xPluralRulesList = $xpath->query('/supplementalData/plurals[@type="cardinal"]/pluralRules'); 364 | $definedLanguageIDs = $this->languages->getDefinedLanguageIDs(); 365 | $knownMissingLanguages = array( 366 | 'guw', // Gun 367 | 'lld', // Dolomitic Ladin 368 | 'hnj', // Hmong Njua 369 | 'nah', // Nahuatl 370 | 'smi', // Sami 371 | ); 372 | $replacements = array( 373 | 'in' => 'id', // Former Indonesian 374 | 'iw' => 'he', // Former Hebrew 375 | 'jw' => 'jv', // Former Javanese 376 | 'ji' => 'yi', // Former Yiddish 377 | 'mo' => 'ro-MD', // former Moldavian 378 | 'bh' => '', // Former Bihari: dismissed because it can be 'bho', 'mai' or 'mag' 379 | // Just a CLDR placeholder 380 | 'root' => '', 381 | ); 382 | $unrecognizedLocaleCodes = array(); 383 | foreach ($xPluralRulesList as $xPluralRules) { 384 | $locales = preg_split('/\s+/', (string) $xPluralRules->getAttribute('locales'), -1, PREG_SPLIT_NO_EMPTY); 385 | if ($locales === array()) { 386 | throw new RuntimeException('No locales found in pluralRules element'); 387 | } 388 | $elements = array( 389 | 'pluralRule-count-zero' => null, 390 | 'pluralRule-count-one' => null, 391 | 'pluralRule-count-two' => null, 392 | 'pluralRule-count-few' => null, 393 | 'pluralRule-count-many' => null, 394 | 'pluralRule-count-other' => null, 395 | ); 396 | foreach ($xPluralRules->childNodes as $xPluralRule) { 397 | if (!$xPluralRule instanceof DOMElement) { 398 | continue; 399 | } 400 | if ($xPluralRule->tagName !== 'pluralRule') { 401 | throw new RuntimeException("Unexpected element: {$xPluralRule->tagName}"); 402 | } 403 | $count = (string) $xPluralRule->getAttribute('count'); 404 | if ($count === '') { 405 | throw new RuntimeException('Missing count attribute'); 406 | } 407 | $key = "pluralRule-count-{$count}"; 408 | if (!array_key_exists($key, $elements)) { 409 | throw new RuntimeException("Unknown count: {$count}"); 410 | } 411 | if ($elements[$key] !== null) { 412 | throw new RuntimeException("Duplicate count: {$count}"); 413 | } 414 | $elements[$key] = $xPluralRule->textContent; 415 | } 416 | $elements = array_filter($elements, function ($value) { 417 | return $value !== null; 418 | }); 419 | if ($elements === array()) { 420 | throw new RuntimeException('No plural rules found'); 421 | } 422 | foreach ($locales as $locale) { 423 | $locale = str_replace('_', '-', $locale); 424 | $overwrite = true; 425 | if (isset($data[$locale]) && array_search($locale, $replacements, true) === false) { 426 | throw new RuntimeException("Duplicate locale: {$locale}"); 427 | } 428 | if (!in_array($locale, $definedLanguageIDs, true) && !in_array($locale, $knownMissingLanguages, true)) { 429 | if (!isset($replacements[$locale])) { 430 | $unrecognizedLocaleCodes[] = $locale; 431 | continue; 432 | } 433 | $locale = $replacements[$locale]; 434 | if ($locale === '') { 435 | continue; 436 | } 437 | $overwrite = false; 438 | } 439 | if ($overwrite || !isset($data[$locale])) { 440 | $data[$locale] = $elements; 441 | } 442 | } 443 | } 444 | if ($unrecognizedLocaleCodes !== array()) { 445 | throw new RuntimeException("The following locales are not defined:\n- " . implode("\n- ", $unrecognizedLocaleCodes)); 446 | } 447 | if ($data === array()) { 448 | throw new RuntimeException('No plural rules found'); 449 | } 450 | $this->sortByKeyWithPossiblyAlt($data); 451 | 452 | return array( 453 | 'supplemental' => array( 454 | 'version' => array( 455 | '_cldrVersion' => $this->options->cldrVersion, 456 | ), 457 | 'plurals-type-cardinal' => $data, 458 | ), 459 | ); 460 | } 461 | 462 | /** 463 | * {@inheritdoc} 464 | * 465 | * @see Processor::getOutputRelativeFileName() 466 | */ 467 | protected function getOutputRelativeFileName() 468 | { 469 | return 'supplemental/plurals.json'; 470 | } 471 | } 472 | 473 | abstract class LocaleDisplayName extends Processor 474 | { 475 | public function __construct(Options $options, DocumentStorage $documentStorage) 476 | { 477 | parent::__construct($options, $documentStorage, 'common/main/en.xml'); 478 | } 479 | 480 | /** 481 | * {@inheritdoc} 482 | * 483 | * @see Processor::parse() 484 | */ 485 | protected function parse(DOMDocument $doc) 486 | { 487 | $data = array(); 488 | $xpath = new DOMXPath($doc); 489 | $xElementList = $xpath->query($this->getXPathSelector()); 490 | foreach ($xElementList as $xElement) { 491 | $type = (string) $xElement->getAttribute('type'); 492 | if ($type === '') { 493 | throw new RuntimeException('Missing type attribute'); 494 | } 495 | $key = str_replace('_', '-', $type); 496 | $alt = (string) $xElement->getAttribute('alt'); 497 | if ($alt !== '') { 498 | $key = "{$key}-alt-{$alt}"; 499 | } 500 | if (isset($data[$key])) { 501 | throw new RuntimeException("Duplicate key: {$key}"); 502 | } 503 | $data[$key] = (string) $xElement->textContent; 504 | } 505 | if ($data === array()) { 506 | throw new RuntimeException('No elements found'); 507 | } 508 | $this->sortByKeyWithPossiblyAlt($data); 509 | 510 | return array( 511 | 'main' => array( 512 | 'en-US' => array( 513 | 'identity' => array( 514 | 'version' => array( 515 | '_cldrVersion' => $this->options->cldrVersion, 516 | ), 517 | 'language' => 'en', 518 | 'territory' => 'US', 519 | ), 520 | 'localeDisplayNames' => array( 521 | $this->getExportedNodeName() => $data, 522 | ), 523 | ), 524 | ), 525 | ); 526 | } 527 | 528 | /** 529 | * @return string 530 | */ 531 | abstract protected function getXPathSelector(); 532 | 533 | /** 534 | * @return string 535 | */ 536 | abstract protected function getExportedNodeName(); 537 | } 538 | 539 | class Languages extends LocaleDisplayName 540 | { 541 | /** 542 | * @return string[] 543 | */ 544 | public function getDefinedLanguageIDs() 545 | { 546 | return array_values(array_filter( 547 | array_keys($this->data['main']['en-US']['localeDisplayNames'][$this->getExportedNodeName()]), 548 | function ($key) { 549 | return strpos((string) $key, '-alt-') === false; 550 | } 551 | )); 552 | } 553 | 554 | /** 555 | * {@inheritdoc} 556 | * 557 | * @see LocaleDisplayName::getXPathSelector() 558 | */ 559 | protected function getXPathSelector() 560 | { 561 | return '/ldml/localeDisplayNames/languages/language'; 562 | } 563 | 564 | /** 565 | * {@inheritdoc} 566 | * 567 | * @see LocaleDisplayName::getExportedNodeName() 568 | */ 569 | protected function getExportedNodeName() 570 | { 571 | return 'languages'; 572 | } 573 | 574 | /** 575 | * {@inheritdoc} 576 | * 577 | * @see Processor::getOutputRelativeFileName() 578 | */ 579 | protected function getOutputRelativeFileName() 580 | { 581 | return 'main/en-US/languages.json'; 582 | } 583 | } 584 | 585 | class Scripts extends LocaleDisplayName 586 | { 587 | /** 588 | * {@inheritdoc} 589 | * 590 | * @see LocaleDisplayName::getXPathSelector() 591 | */ 592 | protected function getXPathSelector() 593 | { 594 | return '/ldml/localeDisplayNames/scripts/script'; 595 | } 596 | 597 | /** 598 | * {@inheritdoc} 599 | * 600 | * @see LocaleDisplayName::getExportedNodeName() 601 | */ 602 | protected function getExportedNodeName() 603 | { 604 | return 'scripts'; 605 | } 606 | 607 | /** 608 | * {@inheritdoc} 609 | * 610 | * @see Processor::getOutputRelativeFileName() 611 | */ 612 | protected function getOutputRelativeFileName() 613 | { 614 | return 'main/en-US/scripts.json'; 615 | } 616 | } 617 | 618 | class Territories extends LocaleDisplayName 619 | { 620 | /** 621 | * {@inheritdoc} 622 | * 623 | * @see LocaleDisplayName::getXPathSelector() 624 | */ 625 | protected function getXPathSelector() 626 | { 627 | return '/ldml/localeDisplayNames/territories/territory'; 628 | } 629 | 630 | /** 631 | * {@inheritdoc} 632 | * 633 | * @see LocaleDisplayName::getExportedNodeName() 634 | */ 635 | protected function getExportedNodeName() 636 | { 637 | return 'territories'; 638 | } 639 | 640 | /** 641 | * {@inheritdoc} 642 | * 643 | * @see Processor::getOutputRelativeFileName() 644 | */ 645 | protected function getOutputRelativeFileName() 646 | { 647 | return 'main/en-US/territories.json'; 648 | } 649 | } 650 | 651 | try { 652 | main($argv); 653 | } catch (RuntimeException $e) { 654 | fwrite(STDERR, $e->getMessage() . "\n"); 655 | exit(1); 656 | } 657 | -------------------------------------------------------------------------------- /src/cldr-data/main/en-US/languages.json: -------------------------------------------------------------------------------- 1 | { 2 | "main": { 3 | "en-US": { 4 | "identity": { 5 | "version": { 6 | "_cldrVersion": "47" 7 | }, 8 | "language": "en", 9 | "territory": "US" 10 | }, 11 | "localeDisplayNames": { 12 | "languages": { 13 | "aa": "Afar", 14 | "ab": "Abkhazian", 15 | "ace": "Acehnese", 16 | "ach": "Acoli", 17 | "ada": "Adangme", 18 | "ady": "Adyghe", 19 | "ae": "Avestan", 20 | "aeb": "Tunisian Arabic", 21 | "af": "Afrikaans", 22 | "afh": "Afrihili", 23 | "agq": "Aghem", 24 | "ain": "Ainu", 25 | "ak": "Akan", 26 | "akk": "Akkadian", 27 | "akz": "Alabama", 28 | "ale": "Aleut", 29 | "aln": "Gheg Albanian", 30 | "alt": "Southern Altai", 31 | "am": "Amharic", 32 | "an": "Aragonese", 33 | "ang": "Old English", 34 | "ann": "Obolo", 35 | "anp": "Angika", 36 | "ar": "Arabic", 37 | "ar-001": "Modern Standard Arabic", 38 | "arc": "Aramaic", 39 | "arn": "Mapuche", 40 | "aro": "Araona", 41 | "arp": "Arapaho", 42 | "arq": "Algerian Arabic", 43 | "ars": "Najdi Arabic", 44 | "ars-alt-menu": "Arabic, Najdi", 45 | "arw": "Arawak", 46 | "ary": "Moroccan Arabic", 47 | "arz": "Egyptian Arabic", 48 | "as": "Assamese", 49 | "asa": "Asu", 50 | "ase": "American Sign Language", 51 | "ast": "Asturian", 52 | "atj": "Atikamekw", 53 | "av": "Avaric", 54 | "avk": "Kotava", 55 | "awa": "Awadhi", 56 | "ay": "Aymara", 57 | "az": "Azerbaijani", 58 | "az-alt-short": "Azeri", 59 | "ba": "Bashkir", 60 | "bal": "Baluchi", 61 | "ban": "Balinese", 62 | "bar": "Bavarian", 63 | "bas": "Basaa", 64 | "bax": "Bamun", 65 | "bbc": "Batak Toba", 66 | "bbj": "Ghomala", 67 | "be": "Belarusian", 68 | "bej": "Beja", 69 | "bem": "Bemba", 70 | "bew": "Betawi", 71 | "bez": "Bena", 72 | "bfd": "Bafut", 73 | "bfq": "Badaga", 74 | "bg": "Bulgarian", 75 | "bgc": "Haryanvi", 76 | "bgn": "Western Balochi", 77 | "bho": "Bhojpuri", 78 | "bi": "Bislama", 79 | "bik": "Bikol", 80 | "bin": "Bini", 81 | "bjn": "Banjar", 82 | "bkm": "Kom", 83 | "bla": "Siksiká", 84 | "blo": "Anii", 85 | "blt": "Tai Dam", 86 | "bm": "Bambara", 87 | "bn": "Bangla", 88 | "bo": "Tibetan", 89 | "bpy": "Bishnupriya", 90 | "bqi": "Bakhtiari", 91 | "br": "Breton", 92 | "bra": "Braj", 93 | "brh": "Brahui", 94 | "brx": "Bodo", 95 | "bs": "Bosnian", 96 | "bss": "Akoose", 97 | "bua": "Buriat", 98 | "bug": "Buginese", 99 | "bum": "Bulu", 100 | "byn": "Blin", 101 | "byv": "Medumba", 102 | "ca": "Catalan", 103 | "cad": "Caddo", 104 | "car": "Carib", 105 | "cay": "Cayuga", 106 | "cch": "Atsam", 107 | "ccp": "Chakma", 108 | "ce": "Chechen", 109 | "ceb": "Cebuano", 110 | "cgg": "Chiga", 111 | "ch": "Chamorro", 112 | "chb": "Chibcha", 113 | "chg": "Chagatai", 114 | "chk": "Chuukese", 115 | "chm": "Mari", 116 | "chn": "Chinook Jargon", 117 | "cho": "Choctaw", 118 | "chp": "Chipewyan", 119 | "chr": "Cherokee", 120 | "chy": "Cheyenne", 121 | "cic": "Chickasaw", 122 | "ckb": "Central Kurdish", 123 | "ckb-alt-menu": "Kurdish, Central", 124 | "ckb-alt-variant": "Kurdish, Sorani", 125 | "clc": "Chilcotin", 126 | "co": "Corsican", 127 | "cop": "Coptic", 128 | "cps": "Capiznon", 129 | "cr": "Cree", 130 | "cr-alt-long": "Woods Cree", 131 | "crg": "Michif", 132 | "crh": "Crimean Tatar", 133 | "crj": "Southern East Cree", 134 | "crk": "Plains Cree", 135 | "crl": "Northern East Cree", 136 | "crm": "Moose Cree", 137 | "crr": "Carolina Algonquian", 138 | "crs": "Seselwa Creole French", 139 | "cs": "Czech", 140 | "csb": "Kashubian", 141 | "csw": "Swampy Cree", 142 | "cu": "Church Slavic", 143 | "cv": "Chuvash", 144 | "cy": "Welsh", 145 | "da": "Danish", 146 | "dak": "Dakota", 147 | "dar": "Dargwa", 148 | "dav": "Taita", 149 | "de": "German", 150 | "de-AT": "Austrian German", 151 | "de-CH": "Swiss High German", 152 | "del": "Delaware", 153 | "den": "Slave", 154 | "dgr": "Dogrib", 155 | "din": "Dinka", 156 | "dje": "Zarma", 157 | "doi": "Dogri", 158 | "dsb": "Lower Sorbian", 159 | "dtp": "Central Dusun", 160 | "dua": "Duala", 161 | "dum": "Middle Dutch", 162 | "dv": "Divehi", 163 | "dyo": "Jola-Fonyi", 164 | "dyu": "Dyula", 165 | "dz": "Dzongkha", 166 | "dzg": "Dazaga", 167 | "ebu": "Embu", 168 | "ee": "Ewe", 169 | "efi": "Efik", 170 | "egl": "Emilian", 171 | "egy": "Ancient Egyptian", 172 | "eka": "Ekajuk", 173 | "el": "Greek", 174 | "elx": "Elamite", 175 | "en": "English", 176 | "en-AU": "Australian English", 177 | "en-CA": "Canadian English", 178 | "en-GB": "British English", 179 | "en-GB-alt-short": "UK English", 180 | "en-US": "American English", 181 | "en-US-alt-short": "US English", 182 | "enm": "Middle English", 183 | "eo": "Esperanto", 184 | "es": "Spanish", 185 | "es-419": "Latin American Spanish", 186 | "es-ES": "European Spanish", 187 | "es-MX": "Mexican Spanish", 188 | "esu": "Central Yupik", 189 | "et": "Estonian", 190 | "eu": "Basque", 191 | "ewo": "Ewondo", 192 | "ext": "Extremaduran", 193 | "fa": "Persian", 194 | "fa-AF": "Dari", 195 | "fan": "Fang", 196 | "fat": "Fanti", 197 | "ff": "Fula", 198 | "fi": "Finnish", 199 | "fil": "Filipino", 200 | "fit": "Tornedalen Finnish", 201 | "fj": "Fijian", 202 | "fo": "Faroese", 203 | "fon": "Fon", 204 | "fr": "French", 205 | "fr-CA": "Canadian French", 206 | "fr-CH": "Swiss French", 207 | "frc": "Cajun French", 208 | "frm": "Middle French", 209 | "fro": "Old French", 210 | "frp": "Arpitan", 211 | "frr": "Northern Frisian", 212 | "frs": "Eastern Frisian", 213 | "fur": "Friulian", 214 | "fy": "Western Frisian", 215 | "ga": "Irish", 216 | "gaa": "Ga", 217 | "gag": "Gagauz", 218 | "gan": "Gan Chinese", 219 | "gay": "Gayo", 220 | "gba": "Gbaya", 221 | "gbz": "Zoroastrian Dari", 222 | "gd": "Scottish Gaelic", 223 | "gez": "Geez", 224 | "gil": "Gilbertese", 225 | "gl": "Galician", 226 | "glk": "Gilaki", 227 | "gmh": "Middle High German", 228 | "gn": "Guarani", 229 | "goh": "Old High German", 230 | "gon": "Gondi", 231 | "gor": "Gorontalo", 232 | "got": "Gothic", 233 | "grb": "Grebo", 234 | "grc": "Ancient Greek", 235 | "gsw": "Swiss German", 236 | "gu": "Gujarati", 237 | "guc": "Wayuu", 238 | "gur": "Frafra", 239 | "guz": "Gusii", 240 | "gv": "Manx", 241 | "gwi": "Gwichʼin", 242 | "ha": "Hausa", 243 | "hai": "Haida", 244 | "hak": "Hakka Chinese", 245 | "haw": "Hawaiian", 246 | "hax": "Southern Haida", 247 | "he": "Hebrew", 248 | "hi": "Hindi", 249 | "hi-Latn": "Hindi (Latin)", 250 | "hi-Latn-alt-variant": "Hinglish", 251 | "hif": "Fiji Hindi", 252 | "hil": "Hiligaynon", 253 | "hit": "Hittite", 254 | "hmn": "Hmong", 255 | "hnj": "Hmong Njua", 256 | "ho": "Hiri Motu", 257 | "hr": "Croatian", 258 | "hsb": "Upper Sorbian", 259 | "hsn": "Xiang Chinese", 260 | "ht": "Haitian Creole", 261 | "hu": "Hungarian", 262 | "hup": "Hupa", 263 | "hur": "Halkomelem", 264 | "hy": "Armenian", 265 | "hz": "Herero", 266 | "ia": "Interlingua", 267 | "iba": "Iban", 268 | "ibb": "Ibibio", 269 | "id": "Indonesian", 270 | "ie": "Interlingue", 271 | "ig": "Igbo", 272 | "ii": "Sichuan Yi", 273 | "ik": "Inupiaq", 274 | "ikt": "Western Canadian Inuktitut", 275 | "ilo": "Iloko", 276 | "inh": "Ingush", 277 | "io": "Ido", 278 | "is": "Icelandic", 279 | "it": "Italian", 280 | "iu": "Inuktitut", 281 | "izh": "Ingrian", 282 | "ja": "Japanese", 283 | "jam": "Jamaican Creole English", 284 | "jbo": "Lojban", 285 | "jgo": "Ngomba", 286 | "jmc": "Machame", 287 | "jpr": "Judeo-Persian", 288 | "jrb": "Judeo-Arabic", 289 | "jut": "Jutish", 290 | "jv": "Javanese", 291 | "ka": "Georgian", 292 | "kaa": "Kara-Kalpak", 293 | "kab": "Kabyle", 294 | "kac": "Kachin", 295 | "kaj": "Jju", 296 | "kam": "Kamba", 297 | "kaw": "Kawi", 298 | "kbd": "Kabardian", 299 | "kbl": "Kanembu", 300 | "kcg": "Tyap", 301 | "kde": "Makonde", 302 | "kea": "Kabuverdianu", 303 | "ken": "Kenyang", 304 | "kfo": "Koro", 305 | "kg": "Kongo", 306 | "kgp": "Kaingang", 307 | "kha": "Khasi", 308 | "kho": "Khotanese", 309 | "khq": "Koyra Chiini", 310 | "khw": "Khowar", 311 | "ki": "Kikuyu", 312 | "kiu": "Kirmanjki", 313 | "kj": "Kuanyama", 314 | "kk": "Kazakh", 315 | "kkj": "Kako", 316 | "kl": "Kalaallisut", 317 | "kln": "Kalenjin", 318 | "km": "Khmer", 319 | "kmb": "Kimbundu", 320 | "kn": "Kannada", 321 | "ko": "Korean", 322 | "koi": "Komi-Permyak", 323 | "kok": "Konkani", 324 | "kos": "Kosraean", 325 | "kpe": "Kpelle", 326 | "kr": "Kanuri", 327 | "krc": "Karachay-Balkar", 328 | "kri": "Krio", 329 | "krj": "Kinaray-a", 330 | "krl": "Karelian", 331 | "kru": "Kurukh", 332 | "ks": "Kashmiri", 333 | "ksb": "Shambala", 334 | "ksf": "Bafia", 335 | "ksh": "Colognian", 336 | "ku": "Kurdish", 337 | "kum": "Kumyk", 338 | "kut": "Kutenai", 339 | "kv": "Komi", 340 | "kw": "Cornish", 341 | "kwk": "Kwakʼwala", 342 | "kxv": "Kuvi", 343 | "ky": "Kyrgyz", 344 | "ky-alt-variant": "Kirghiz", 345 | "la": "Latin", 346 | "lad": "Ladino", 347 | "lag": "Langi", 348 | "lah": "Western Panjabi", 349 | "lam": "Lamba", 350 | "lb": "Luxembourgish", 351 | "lez": "Lezghian", 352 | "lfn": "Lingua Franca Nova", 353 | "lg": "Ganda", 354 | "li": "Limburgish", 355 | "lij": "Ligurian", 356 | "lil": "Lillooet", 357 | "liv": "Livonian", 358 | "lkt": "Lakota", 359 | "lmo": "Lombard", 360 | "ln": "Lingala", 361 | "lo": "Lao", 362 | "lol": "Mongo", 363 | "lou": "Louisiana Creole", 364 | "loz": "Lozi", 365 | "lrc": "Northern Luri", 366 | "lsm": "Saamia", 367 | "lt": "Lithuanian", 368 | "ltg": "Latgalian", 369 | "lu": "Luba-Katanga", 370 | "lua": "Luba-Lulua", 371 | "lui": "Luiseno", 372 | "lun": "Lunda", 373 | "luo": "Luo", 374 | "lus": "Mizo", 375 | "luy": "Luyia", 376 | "lv": "Latvian", 377 | "lzh": "Literary Chinese", 378 | "lzz": "Laz", 379 | "mad": "Madurese", 380 | "maf": "Mafa", 381 | "mag": "Magahi", 382 | "mai": "Maithili", 383 | "mak": "Makasar", 384 | "man": "Mandingo", 385 | "mas": "Masai", 386 | "mde": "Maba", 387 | "mdf": "Moksha", 388 | "mdr": "Mandar", 389 | "men": "Mende", 390 | "mer": "Meru", 391 | "mfe": "Morisyen", 392 | "mg": "Malagasy", 393 | "mga": "Middle Irish", 394 | "mgh": "Makhuwa-Meetto", 395 | "mgo": "Metaʼ", 396 | "mh": "Marshallese", 397 | "mi": "Māori", 398 | "mic": "Mi'kmaw", 399 | "min": "Minangkabau", 400 | "mk": "Macedonian", 401 | "ml": "Malayalam", 402 | "mn": "Mongolian", 403 | "mnc": "Manchu", 404 | "mni": "Manipuri", 405 | "moe": "Innu-aimun", 406 | "moh": "Mohawk", 407 | "mos": "Mossi", 408 | "mr": "Marathi", 409 | "mrj": "Western Mari", 410 | "ms": "Malay", 411 | "mt": "Maltese", 412 | "mua": "Mundang", 413 | "mul": "Multiple languages", 414 | "mus": "Muscogee", 415 | "mus-alt-official": "Mvskoke", 416 | "mus-alt-variant": "Muscogee", 417 | "mwl": "Mirandese", 418 | "mwr": "Marwari", 419 | "mwv": "Mentawai", 420 | "my": "Burmese", 421 | "my-alt-variant": "Myanmar Language", 422 | "mye": "Myene", 423 | "myv": "Erzya", 424 | "mzn": "Mazanderani", 425 | "na": "Nauru", 426 | "nan": "Min Nan Chinese", 427 | "nap": "Neapolitan", 428 | "naq": "Nama", 429 | "nb": "Norwegian Bokmål", 430 | "nd": "North Ndebele", 431 | "nds": "Low German", 432 | "nds-NL": "Low Saxon", 433 | "ne": "Nepali", 434 | "new": "Newari", 435 | "ng": "Ndonga", 436 | "nia": "Nias", 437 | "niu": "Niuean", 438 | "njo": "Ao Naga", 439 | "nl": "Dutch", 440 | "nl-BE": "Flemish", 441 | "nmg": "Kwasio", 442 | "nn": "Norwegian Nynorsk", 443 | "nnh": "Ngiemboon", 444 | "no": "Norwegian", 445 | "nog": "Nogai", 446 | "non": "Old Norse", 447 | "nov": "Novial", 448 | "nqo": "N’Ko", 449 | "nr": "South Ndebele", 450 | "nso": "Northern Sotho", 451 | "nus": "Nuer", 452 | "nv": "Navajo", 453 | "nwc": "Classical Newari", 454 | "ny": "Nyanja", 455 | "nym": "Nyamwezi", 456 | "nyn": "Nyankole", 457 | "nyo": "Nyoro", 458 | "nzi": "Nzima", 459 | "oc": "Occitan", 460 | "oj": "Ojibwa", 461 | "ojb": "Northwestern Ojibwa", 462 | "ojc": "Central Ojibwa", 463 | "ojs": "Oji-Cree", 464 | "ojw": "Western Ojibwa", 465 | "oka": "Okanagan", 466 | "om": "Oromo", 467 | "or": "Odia", 468 | "os": "Ossetic", 469 | "osa": "Osage", 470 | "ota": "Ottoman Turkish", 471 | "pa": "Punjabi", 472 | "pag": "Pangasinan", 473 | "pal": "Pahlavi", 474 | "pam": "Pampanga", 475 | "pap": "Papiamento", 476 | "pau": "Palauan", 477 | "pcd": "Picard", 478 | "pcm": "Nigerian Pidgin", 479 | "pdc": "Pennsylvania German", 480 | "pdt": "Plautdietsch", 481 | "peo": "Old Persian", 482 | "pfl": "Palatine German", 483 | "phn": "Phoenician", 484 | "pi": "Pali", 485 | "pis": "Pijin", 486 | "pl": "Polish", 487 | "pms": "Piedmontese", 488 | "pnt": "Pontic", 489 | "pon": "Pohnpeian", 490 | "pqm": "Maliseet-Passamaquoddy", 491 | "prg": "Prussian", 492 | "pro": "Old Provençal", 493 | "ps": "Pashto", 494 | "ps-alt-variant": "Pushto", 495 | "pt": "Portuguese", 496 | "pt-BR": "Brazilian Portuguese", 497 | "pt-PT": "European Portuguese", 498 | "qu": "Quechua", 499 | "quc": "Kʼicheʼ", 500 | "qug": "Chimborazo Highland Quichua", 501 | "raj": "Rajasthani", 502 | "rap": "Rapanui", 503 | "rar": "Rarotongan", 504 | "rgn": "Romagnol", 505 | "rhg": "Rohingya", 506 | "rif": "Riffian", 507 | "rm": "Romansh", 508 | "rn": "Rundi", 509 | "ro": "Romanian", 510 | "ro-MD": "Moldavian", 511 | "rof": "Rombo", 512 | "rom": "Romany", 513 | "rtm": "Rotuman", 514 | "ru": "Russian", 515 | "rue": "Rusyn", 516 | "rug": "Roviana", 517 | "rup": "Aromanian", 518 | "rw": "Kinyarwanda", 519 | "rwk": "Rwa", 520 | "sa": "Sanskrit", 521 | "sad": "Sandawe", 522 | "sah": "Yakut", 523 | "sam": "Samaritan Aramaic", 524 | "saq": "Samburu", 525 | "sas": "Sasak", 526 | "sat": "Santali", 527 | "saz": "Saurashtra", 528 | "sba": "Ngambay", 529 | "sbp": "Sangu", 530 | "sc": "Sardinian", 531 | "scn": "Sicilian", 532 | "sco": "Scots", 533 | "sd": "Sindhi", 534 | "sdc": "Sassarese Sardinian", 535 | "sdh": "Southern Kurdish", 536 | "se": "Northern Sami", 537 | "se-alt-menu": "Sami, Northern", 538 | "see": "Seneca", 539 | "seh": "Sena", 540 | "sei": "Seri", 541 | "sel": "Selkup", 542 | "ses": "Koyraboro Senni", 543 | "sg": "Sango", 544 | "sga": "Old Irish", 545 | "sgs": "Samogitian", 546 | "sh": "Serbo-Croatian", 547 | "shi": "Tachelhit", 548 | "shn": "Shan", 549 | "shu": "Chadian Arabic", 550 | "si": "Sinhala", 551 | "sid": "Sidamo", 552 | "sk": "Slovak", 553 | "sl": "Slovenian", 554 | "slh": "Southern Lushootseed", 555 | "sli": "Lower Silesian", 556 | "sly": "Selayar", 557 | "sm": "Samoan", 558 | "sma": "Southern Sami", 559 | "sma-alt-menu": "Sami, Southern", 560 | "smj": "Lule Sami", 561 | "smj-alt-menu": "Sami, Lule", 562 | "smn": "Inari Sami", 563 | "smn-alt-menu": "Sami, Inari", 564 | "sms": "Skolt Sami", 565 | "sms-alt-menu": "Sami, Skolt", 566 | "sn": "Shona", 567 | "snk": "Soninke", 568 | "so": "Somali", 569 | "sog": "Sogdien", 570 | "sq": "Albanian", 571 | "sr": "Serbian", 572 | "sr-ME": "Montenegrin", 573 | "srn": "Sranan Tongo", 574 | "srr": "Serer", 575 | "ss": "Swati", 576 | "ssy": "Saho", 577 | "st": "Southern Sotho", 578 | "stq": "Saterland Frisian", 579 | "str": "Straits Salish", 580 | "su": "Sundanese", 581 | "suk": "Sukuma", 582 | "sus": "Susu", 583 | "sux": "Sumerian", 584 | "sv": "Swedish", 585 | "sw": "Swahili", 586 | "sw-CD": "Congo Swahili", 587 | "swb": "Comorian", 588 | "syc": "Classical Syriac", 589 | "syr": "Syriac", 590 | "szl": "Silesian", 591 | "ta": "Tamil", 592 | "tce": "Southern Tutchone", 593 | "tcy": "Tulu", 594 | "te": "Telugu", 595 | "tem": "Timne", 596 | "teo": "Teso", 597 | "ter": "Tereno", 598 | "tet": "Tetum", 599 | "tg": "Tajik", 600 | "tgx": "Tagish", 601 | "th": "Thai", 602 | "tht": "Tahltan", 603 | "ti": "Tigrinya", 604 | "tig": "Tigre", 605 | "tiv": "Tiv", 606 | "tk": "Turkmen", 607 | "tkl": "Tokelau", 608 | "tkr": "Tsakhur", 609 | "tl": "Tagalog", 610 | "tlh": "Klingon", 611 | "tli": "Tlingit", 612 | "tly": "Talysh", 613 | "tmh": "Tamashek", 614 | "tn": "Tswana", 615 | "to": "Tongan", 616 | "tog": "Nyasa Tonga", 617 | "tok": "Toki Pona", 618 | "tpi": "Tok Pisin", 619 | "tr": "Turkish", 620 | "tru": "Turoyo", 621 | "trv": "Taroko", 622 | "trw": "Torwali", 623 | "ts": "Tsonga", 624 | "tsd": "Tsakonian", 625 | "tsi": "Tsimshian", 626 | "tt": "Tatar", 627 | "ttm": "Northern Tutchone", 628 | "ttt": "Muslim Tat", 629 | "tum": "Tumbuka", 630 | "tvl": "Tuvalu", 631 | "tw": "Twi", 632 | "twq": "Tasawaq", 633 | "ty": "Tahitian", 634 | "tyv": "Tuvinian", 635 | "tzm": "Central Atlas Tamazight", 636 | "udm": "Udmurt", 637 | "ug": "Uyghur", 638 | "ug-alt-variant": "Uighur", 639 | "uga": "Ugaritic", 640 | "uk": "Ukrainian", 641 | "umb": "Umbundu", 642 | "und": "Unknown language", 643 | "ur": "Urdu", 644 | "uz": "Uzbek", 645 | "vai": "Vai", 646 | "ve": "Venda", 647 | "vec": "Venetian", 648 | "vep": "Veps", 649 | "vi": "Vietnamese", 650 | "vls": "West Flemish", 651 | "vmf": "Main-Franconian", 652 | "vmw": "Makhuwa", 653 | "vo": "Volapük", 654 | "vot": "Votic", 655 | "vro": "Võro", 656 | "vun": "Vunjo", 657 | "wa": "Walloon", 658 | "wae": "Walser", 659 | "wal": "Wolaytta", 660 | "war": "Waray", 661 | "was": "Washo", 662 | "wbp": "Warlpiri", 663 | "wo": "Wolof", 664 | "wuu": "Wu Chinese", 665 | "xal": "Kalmyk", 666 | "xh": "Xhosa", 667 | "xmf": "Mingrelian", 668 | "xnr": "Kangri", 669 | "xog": "Soga", 670 | "yao": "Yao", 671 | "yap": "Yapese", 672 | "yav": "Yangben", 673 | "ybb": "Yemba", 674 | "yi": "Yiddish", 675 | "yo": "Yoruba", 676 | "yrl": "Nheengatu", 677 | "yue": "Cantonese", 678 | "yue-alt-menu": "Chinese, Cantonese", 679 | "za": "Zhuang", 680 | "zap": "Zapotec", 681 | "zbl": "Blissymbols", 682 | "zea": "Zeelandic", 683 | "zen": "Zenaga", 684 | "zgh": "Standard Moroccan Tamazight", 685 | "zh": "Chinese", 686 | "zh-alt-long": "Mandarin Chinese", 687 | "zh-alt-menu": "Chinese, Mandarin", 688 | "zh-Hans": "Simplified Chinese", 689 | "zh-Hans-alt-long": "Simplified Mandarin Chinese", 690 | "zh-Hant": "Traditional Chinese", 691 | "zh-Hant-alt-long": "Traditional Mandarin Chinese", 692 | "zu": "Zulu", 693 | "zun": "Zuni", 694 | "zxx": "No linguistic content", 695 | "zza": "Zaza" 696 | } 697 | } 698 | } 699 | } 700 | } --------------------------------------------------------------------------------