├── bin
├── export-plural-rules.bat
├── import-cldr-data.bat
├── export-plural-rules
└── import-cldr-data
├── src
├── autoloader.php
├── Exporter
│ ├── Prettyjson.php
│ ├── Po.php
│ ├── Ruby.php
│ ├── Php.php
│ ├── Html.php
│ ├── Xml.php
│ ├── Json.php
│ └── Exporter.php
├── Category.php
├── FormulaConverter.php
├── cldr-data
│ └── main
│ │ └── en-US
│ │ ├── scripts.json
│ │ ├── territories.json
│ │ └── languages.json
├── CldrData.php
└── Language.php
├── LICENSE
├── composer.json
└── UNICODE-LICENSE.txt
/bin/export-plural-rules.bat:
--------------------------------------------------------------------------------
1 | @php "%~dpn0" %*
--------------------------------------------------------------------------------
/bin/import-cldr-data.bat:
--------------------------------------------------------------------------------
1 | @php "%~dpn0" %*
--------------------------------------------------------------------------------
/src/autoloader.php:
--------------------------------------------------------------------------------
1 | id . '\n"';
32 | $lines[] = '"Plural-Forms: nplurals=' . count($language->categories) . '; plural=' . $language->formula . '\n"';
33 | $lines[] = '';
34 |
35 | return implode("\n", $lines);
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2015 Michele Locati
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
23 |
--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "gettext/languages",
3 | "description": "gettext languages with plural rules",
4 | "keywords": [
5 | "localization",
6 | "l10n",
7 | "internationalization",
8 | "i18n",
9 | "translations",
10 | "translate",
11 | "php",
12 | "unicode",
13 | "cldr",
14 | "language",
15 | "languages",
16 | "plural",
17 | "plurals",
18 | "plural rules"
19 | ],
20 | "homepage": "https://github.com/php-gettext/Languages",
21 | "license": "MIT",
22 | "authors": [
23 | {
24 | "name": "Michele Locati",
25 | "email": "mlocati@gmail.com",
26 | "role": "Developer"
27 | }
28 | ],
29 | "autoload": {
30 | "psr-4": {
31 | "Gettext\\Languages\\": "src/"
32 | }
33 | },
34 | "autoload-dev": {
35 | "psr-4": {
36 | "Gettext\\Languages\\Test\\": "tests/test/"
37 | }
38 | },
39 | "require": {
40 | "php": ">=5.3"
41 | },
42 | "require-dev": {
43 | "phpunit/phpunit": "^4.8 || ^5.7 || ^6.5 || ^7.5 || ^8.4"
44 | },
45 | "scripts": {
46 | "test": "phpunit"
47 | },
48 | "bin": [
49 | "bin/export-plural-rules",
50 | "bin/import-cldr-data"
51 | ]
52 | }
--------------------------------------------------------------------------------
/src/Exporter/Ruby.php:
--------------------------------------------------------------------------------
1 | id . '\' => {';
28 | $lines[] = ' \'name\' => \'' . addslashes($lc->name) . '\',';
29 | if (isset($lc->supersededBy)) {
30 | $lines[] = ' \'supersededBy\' => \'' . $lc->supersededBy . '\',';
31 | }
32 | if (isset($lc->script)) {
33 | $lines[] = ' \'script\' => \'' . addslashes($lc->script) . '\',';
34 | }
35 | if (isset($lc->territory)) {
36 | $lines[] = ' \'territory\' => \'' . addslashes($lc->territory) . '\',';
37 | }
38 | if (isset($lc->baseLanguage)) {
39 | $lines[] = ' \'baseLanguage\' => \'' . addslashes($lc->baseLanguage) . '\',';
40 | }
41 | $lines[] = ' \'formula\' => \'' . $lc->formula . '\',';
42 | $lines[] = ' \'plurals\' => ' . count($lc->categories) . ',';
43 | $catNames = array();
44 | foreach ($lc->categories as $c) {
45 | $catNames[] = "'{$c->id}'";
46 | }
47 | $lines[] = ' \'cases\' => [' . implode(', ', $catNames) . '],';
48 | $lines[] = ' \'examples\' => {';
49 | foreach ($lc->categories as $c) {
50 | $lines[] = ' \'' . $c->id . '\' => \'' . $c->examples . '\',';
51 | }
52 | $lines[] = ' },';
53 | $lines[] = ' },';
54 | }
55 | $lines[] = '}';
56 | $lines[] = '';
57 |
58 | return implode("\n", $lines);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/Exporter/Php.php:
--------------------------------------------------------------------------------
1 | id . '\' => array(';
29 | $lines[] = ' \'name\' => \'' . addslashes($lc->name) . '\',';
30 | if (isset($lc->supersededBy)) {
31 | $lines[] = ' \'supersededBy\' => \'' . $lc->supersededBy . '\',';
32 | }
33 | if (isset($lc->script)) {
34 | $lines[] = ' \'script\' => \'' . addslashes($lc->script) . '\',';
35 | }
36 | if (isset($lc->territory)) {
37 | $lines[] = ' \'territory\' => \'' . addslashes($lc->territory) . '\',';
38 | }
39 | if (isset($lc->baseLanguage)) {
40 | $lines[] = ' \'baseLanguage\' => \'' . addslashes($lc->baseLanguage) . '\',';
41 | }
42 | $lines[] = ' \'formula\' => \'' . $lc->formula . '\',';
43 | $lines[] = ' \'plurals\' => ' . count($lc->categories) . ',';
44 | $catNames = array();
45 | foreach ($lc->categories as $c) {
46 | $catNames[] = "'{$c->id}'";
47 | }
48 | $lines[] = ' \'cases\' => array(' . implode(', ', $catNames) . '),';
49 | $lines[] = ' \'examples\' => array(';
50 | foreach ($lc->categories as $c) {
51 | $lines[] = ' \'' . $c->id . '\' => \'' . $c->examples . '\',';
52 | }
53 | $lines[] = ' ),';
54 | $lines[] = ' ),';
55 | }
56 | $lines[] = ');';
57 | $lines[] = '';
58 |
59 | return implode("\n", $lines);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/Exporter/Html.php:
--------------------------------------------------------------------------------
1 | ';
26 | $lines[] = ' ';
27 | $lines[] = ' ';
28 | $lines[] = ' | Language code | ';
29 | $lines[] = ' Language name | ';
30 | $lines[] = ' # plurals | ';
31 | $lines[] = ' Formula | ';
32 | $lines[] = ' Plurals | ';
33 | $lines[] = '
';
34 | $lines[] = ' ';
35 | $lines[] = '
';
36 | foreach ($languages as $lc) {
37 | $lines[] = ' ';
38 | $lines[] = ' | ' . $lc->id . ' | ';
39 | $name = self::h($lc->name);
40 | if (isset($lc->supersededBy)) {
41 | $name .= '
Superseded by ' . $lc->supersededBy . '';
42 | }
43 | $lines[] = ' ' . $name . ' | ';
44 | $lines[] = ' ' . count($lc->categories) . ' | ';
45 | $lines[] = ' ' . self::h($lc->formula) . ' | ';
46 | $cases = array();
47 | foreach ($lc->categories as $c) {
48 | $cases[] = '' . $c->id . '' . self::h($c->examples) . '';
49 | }
50 | $lines[] = ' ' . implode('', $cases) . ' | ';
51 | $lines[] = '
';
52 | }
53 | $lines[] = ' ';
54 | $lines[] = '';
55 |
56 | return implode("\n", $lines);
57 | }
58 |
59 | protected static function h($str)
60 | {
61 | return htmlspecialchars($str, ENT_COMPAT, 'UTF-8');
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/UNICODE-LICENSE.txt:
--------------------------------------------------------------------------------
1 | UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
2 |
3 | See Terms of Use for definitions of Unicode Inc.'s
4 | Data Files and Software.
5 |
6 | NOTICE TO USER: Carefully read the following legal agreement.
7 | BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
8 | DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
9 | YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
10 | TERMS AND CONDITIONS OF THIS AGREEMENT.
11 | IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
12 | THE DATA FILES OR SOFTWARE.
13 |
14 | COPYRIGHT AND PERMISSION NOTICE
15 |
16 | Copyright © 1991-2019 Unicode, Inc. All rights reserved.
17 | Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
18 |
19 | Permission is hereby granted, free of charge, to any person obtaining
20 | a copy of the Unicode data files and any associated documentation
21 | (the "Data Files") or Unicode software and any associated documentation
22 | (the "Software") to deal in the Data Files or Software
23 | without restriction, including without limitation the rights to use,
24 | copy, modify, merge, publish, distribute, and/or sell copies of
25 | the Data Files or Software, and to permit persons to whom the Data Files
26 | or Software are furnished to do so, provided that either
27 | (a) this copyright and permission notice appear with all copies
28 | of the Data Files or Software, or
29 | (b) this copyright and permission notice appear in associated
30 | Documentation.
31 |
32 | THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
33 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
34 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
35 | NONINFRINGEMENT OF THIRD PARTY RIGHTS.
36 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
37 | NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
38 | DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
39 | DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
40 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
41 | PERFORMANCE OF THE DATA FILES OR SOFTWARE.
42 |
43 | Except as contained in this notice, the name of a copyright holder
44 | shall not be used in advertising or otherwise to promote the sale,
45 | use or other dealings in these Data Files or Software without prior
46 | written authorization of the copyright holder.
47 |
--------------------------------------------------------------------------------
/src/Exporter/Xml.php:
--------------------------------------------------------------------------------
1 | loadXML('');
30 | $xLanguages = $xml->firstChild;
31 | foreach ($languages as $language) {
32 | $xLanguage = $xml->createElement('language');
33 | $xLanguage->setAttribute('id', $language->id);
34 | $xLanguage->setAttribute('name', $language->name);
35 | if (isset($language->supersededBy)) {
36 | $xLanguage->setAttribute('supersededBy', $language->supersededBy);
37 | }
38 | if (isset($language->script)) {
39 | $xLanguage->setAttribute('script', $language->script);
40 | }
41 | if (isset($language->territory)) {
42 | $xLanguage->setAttribute('territory', $language->territory);
43 | }
44 | if (isset($language->baseLanguage)) {
45 | $xLanguage->setAttribute('baseLanguage', $language->baseLanguage);
46 | }
47 | $xLanguage->setAttribute('formula', $language->formula);
48 | foreach ($language->categories as $category) {
49 | $xCategory = $xml->createElement('category');
50 | $xCategory->setAttribute('id', $category->id);
51 | $xCategory->setAttribute('examples', $category->examples);
52 | $xLanguage->appendChild($xCategory);
53 | }
54 | $xLanguages->appendChild($xLanguage);
55 | }
56 | $xml->formatOutput = true;
57 |
58 | return $xml->saveXML();
59 | }
60 | }
61 |
--------------------------------------------------------------------------------
/src/Exporter/Json.php:
--------------------------------------------------------------------------------
1 | name;
56 | if (isset($language->supersededBy)) {
57 | $item['supersededBy'] = $language->supersededBy;
58 | }
59 | if (isset($language->script)) {
60 | $item['script'] = $language->script;
61 | }
62 | if (isset($language->territory)) {
63 | $item['territory'] = $language->territory;
64 | }
65 | if (isset($language->baseLanguage)) {
66 | $item['baseLanguage'] = $language->baseLanguage;
67 | }
68 | if (!empty($options['both-formulas'])) {
69 | $item['formulas'] = array(
70 | 'standard' => $language->buildFormula(true),
71 | 'php' => $language->formula,
72 | );
73 | } else {
74 | $item['formula'] = $language->formula;
75 | }
76 | $item['plurals'] = count($language->categories);
77 | $item['cases'] = array();
78 | $item['examples'] = array();
79 | foreach ($language->categories as $category) {
80 | $item['cases'][] = $category->id;
81 | $item['examples'][$category->id] = $category->examples;
82 | }
83 | $list[$language->id] = $item;
84 | }
85 |
86 | return json_encode($list, static::getEncodeOptions());
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/src/Category.php:
--------------------------------------------------------------------------------
1 | id = $matches[1];
51 | $cldrFormulaAndExamplesNormalized = trim(preg_replace('/\s+/', ' ', $cldrFormulaAndExamples));
52 | if (!preg_match('/^([^@]*)(?:@integer([^@]+))?(?:@decimal(?:[^@]+))?$/', $cldrFormulaAndExamplesNormalized, $matches)) {
53 | throw new Exception("Invalid CLDR category rule: {$cldrFormulaAndExamples}");
54 | }
55 | $cldrFormula = trim($matches[1]);
56 | $s = isset($matches[2]) ? trim($matches[2]) : '';
57 | $this->examples = ($s === '') ? null : $s;
58 | switch ($this->id) {
59 | case CldrData::OTHER_CATEGORY:
60 | if ($cldrFormula !== '') {
61 | throw new Exception("The '" . CldrData::OTHER_CATEGORY . "' category should not have any formula, but it has '{$cldrFormula}'");
62 | }
63 | $this->formula = null;
64 | break;
65 | default:
66 | if ($cldrFormula === '') {
67 | throw new Exception("The '{$this->id}' category does not have a formula");
68 | }
69 | $this->formula = FormulaConverter::convertFormula($cldrFormula);
70 | break;
71 | }
72 | }
73 |
74 | /**
75 | * Return a list of numbers corresponding to the $examples value.
76 | *
77 | * @throws \Exception throws an Exception if we weren't able to expand the examples
78 | *
79 | * @return int[]
80 | */
81 | public function getExampleIntegers()
82 | {
83 | return self::expandExamples($this->examples);
84 | }
85 |
86 | /**
87 | * Expand a list of examples as defined by CLDR.
88 | *
89 | * @param string $examples A string like '1, 2, 5...7, …'.
90 | *
91 | * @throws \Exception throws an Exception if we weren't able to expand $examples
92 | *
93 | * @return int[]
94 | */
95 | public static function expandExamples($examples)
96 | {
97 | $result = array();
98 | $m = null;
99 | if (substr($examples, -strlen(', …')) === ', …') {
100 | $examples = substr($examples, 0, strlen($examples) - strlen(', …'));
101 | }
102 | foreach (explode(',', str_replace(' ', '', $examples)) as $range) {
103 | if (preg_match('/^(?\d+)((c|e)(?\d+))?$/', $range, $m)) {
104 | $result[] = (int) (isset($m['exp']) ? ($m['num'] . str_repeat('0', (int) $m['exp'])) : $range);
105 | } elseif (preg_match('/^(\d+)~(\d+)$/', $range, $m)) {
106 | $from = (int) $m[1];
107 | $to = (int) $m[2];
108 | $delta = $to - $from;
109 | $step = (int) max(1, $delta / 100);
110 | for ($i = $from; $i < $to; $i += $step) {
111 | $result[] = $i;
112 | }
113 | $result[] = $to;
114 | } else {
115 | throw new Exception("Unhandled test range '{$range}' in '{$examples}'");
116 | }
117 | }
118 | if (empty($result)) {
119 | throw new Exception("No test numbers from '{$examples}'");
120 | }
121 |
122 | return $result;
123 | }
124 | }
125 |
--------------------------------------------------------------------------------
/src/Exporter/Exporter.php:
--------------------------------------------------------------------------------
1 | $class) {
41 | if (call_user_func(self::getExporterClassName($handle) . '::isForPublicUse') === true) {
42 | $result[$handle] = $class;
43 | }
44 | }
45 | } else {
46 | $result = self::$exporters;
47 | }
48 |
49 | return $result;
50 | }
51 |
52 | /**
53 | * Return the description of a specific exporter.
54 | *
55 | * @param string $exporterHandle the handle of the exporter
56 | *
57 | * @throws \Exception throws an Exception if $exporterHandle is not valid
58 | *
59 | * @return string
60 | */
61 | final public static function getExporterDescription($exporterHandle)
62 | {
63 | $exporters = self::getExporters();
64 | if (!isset($exporters[$exporterHandle])) {
65 | throw new Exception("Invalid exporter handle: '{$exporterHandle}'");
66 | }
67 |
68 | return call_user_func(self::getExporterClassName($exporterHandle) . '::getDescription');
69 | }
70 |
71 | /**
72 | * Returns the fully qualified class name of a exporter given its handle.
73 | *
74 | * @param string $exporterHandle the exporter class handle
75 | *
76 | * @return string
77 | */
78 | final public static function getExporterClassName($exporterHandle)
79 | {
80 | return __NAMESPACE__ . '\\' . ucfirst(strtolower($exporterHandle));
81 | }
82 |
83 | /**
84 | * Convert a list of Language instances to string.
85 | *
86 | * @param \Gettext\Languages\Language[] $languages the Language instances to convert
87 | * @param array|null $options
88 | *
89 | * @return string
90 | */
91 | final public static function toString($languages, $options = null)
92 | {
93 | if (!isset($options) || !is_array($options)) {
94 | $options = array();
95 | }
96 | if (isset($options['us-ascii']) && $options['us-ascii']) {
97 | $asciiList = array();
98 | foreach ($languages as $language) {
99 | $asciiList[] = $language->getUSAsciiClone();
100 | }
101 | $languages = $asciiList;
102 | }
103 |
104 | return static::toStringDoWithOptions($languages, $options);
105 | }
106 |
107 | /**
108 | * Save the Language instances to a file.
109 | *
110 | * @param \Gettext\Languages\Language[] $languages the Language instances to convert
111 | * @param array|null $options
112 | *
113 | * @throws \Exception
114 | */
115 | final public static function toFile($languages, $filename, $options = null)
116 | {
117 | $data = self::toString($languages, $options);
118 | if (@file_put_contents($filename, $data) === false) {
119 | throw new Exception("Error writing data to '{$filename}'");
120 | }
121 | }
122 |
123 | /**
124 | * Is this exporter for public use?
125 | *
126 | * @return bool
127 | */
128 | public static function isForPublicUse()
129 | {
130 | return true;
131 | }
132 |
133 | /**
134 | * Does this exporter supports exporting formulas both with and without extra parenthesis?
135 | *
136 | * @return bool
137 | */
138 | public static function supportsFormulasWithAndWithoutParenthesis()
139 | {
140 | return false;
141 | }
142 |
143 | /**
144 | * Return a short description of the exporter.
145 | *
146 | * @return string
147 | */
148 | public static function getDescription()
149 | {
150 | throw new Exception(get_called_class() . ' does not implement the method ' . __FUNCTION__);
151 | }
152 |
153 | /**
154 | * Convert a list of Language instances to string.
155 | *
156 | * @param \Gettext\Languages\Language[] $languages the Language instances to convert
157 | * @param array $options export options
158 | *
159 | * @return string
160 | */
161 | protected static function toStringDoWithOptions($languages, array $options)
162 | {
163 | if (method_exists(get_called_class(), 'toStringDo')) {
164 | return static::toStringDo($languages);
165 | }
166 | throw new Exception(get_called_class() . ' does not implement the method ' . __FUNCTION__);
167 | }
168 | }
169 |
--------------------------------------------------------------------------------
/src/FormulaConverter.php:
--------------------------------------------------------------------------------
1 | the whole 'and' group is always false
34 | $gettextFormulaChunk = false;
35 | break;
36 | }
37 | if ($gettextAtom !== true) {
38 | $andSeparatedChunks[] = $gettextAtom;
39 | }
40 | }
41 | if (!isset($gettextFormulaChunk)) {
42 | if (empty($andSeparatedChunks)) {
43 | // All the atoms joined by 'and' always evaluate to true => the whole 'and' group is always true
44 | $gettextFormulaChunk = true;
45 | } else {
46 | $gettextFormulaChunk = implode(' && ', $andSeparatedChunks);
47 | // Special cases simplification
48 | switch ($gettextFormulaChunk) {
49 | case 'n >= 0 && n <= 2 && n != 2':
50 | $gettextFormulaChunk = 'n == 0 || n == 1';
51 | break;
52 | }
53 | }
54 | }
55 | if ($gettextFormulaChunk === true) {
56 | // One part of the formula joined with the others by 'or' always evaluates to true => the whole formula always evaluates to true
57 | return true;
58 | }
59 | if ($gettextFormulaChunk !== false) {
60 | $orSeparatedChunks[] = $gettextFormulaChunk;
61 | }
62 | }
63 | if (empty($orSeparatedChunks)) {
64 | // All the parts joined by 'or' always evaluate to false => the whole formula always evaluates to false
65 | return false;
66 | }
67 |
68 | return implode(' || ', $orSeparatedChunks);
69 | }
70 |
71 | /**
72 | * Converts an atomic part of the CLDR formula to its gettext representation.
73 | *
74 | * @param string $cldrAtom the CLDR formula atom to convert
75 | *
76 | * @throws \Exception
77 | *
78 | * @return bool|string returns true if the gettext will always evaluate to true, false if gettext will always evaluate to false, return the gettext formula otherwise
79 | */
80 | private static function convertAtom($cldrAtom)
81 | {
82 | $m = null;
83 | $gettextAtom = $cldrAtom;
84 | $gettextAtom = str_replace(' = ', ' == ', $gettextAtom);
85 | $gettextAtom = str_replace('i', 'n', $gettextAtom);
86 | if (preg_match('/^n( % \d+)? (!=|==) \d+$/', $gettextAtom)) {
87 | return $gettextAtom;
88 | }
89 | if (preg_match('/^n( % \d+)? (!=|==) \d+(,\d+|\.\.\d+)+$/', $gettextAtom)) {
90 | return self::expandAtom($gettextAtom);
91 | }
92 | if (preg_match('/^(?:v|w)(?: % 10+)? == (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // For gettext: v == 0, w == 0
93 | return (int) $m[1] === 0 ? true : false;
94 | }
95 | if (preg_match('/^(?:v|w)(?: % 10+)? != (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // For gettext: v == 0, w == 0
96 | return (int) $m[1] === 0 ? false : true;
97 | }
98 | if (preg_match('/^(?:f|t|c|e)(?: % 10+)? == (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // f == empty, t == empty, c == empty, e == empty
99 | return (int) $m[1] === 0 ? true : false;
100 | }
101 | if (preg_match('/^(?:f|t|c|e)(?: % 10+)? != (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // f == empty, t == empty, c == empty, e == empty
102 | return (int) $m[1] === 0 ? false : true;
103 | }
104 | throw new Exception("Unable to convert the formula chunk '{$cldrAtom}' from CLDR to gettext");
105 | }
106 |
107 | /**
108 | * Expands an atom containing a range (for instance: 'n == 1,3..5').
109 | *
110 | * @param string $atom
111 | *
112 | * @throws \Exception
113 | *
114 | * @return string
115 | */
116 | private static function expandAtom($atom)
117 | {
118 | $m = null;
119 | if (preg_match('/^(n(?: % \d+)?) (==|!=) (\d+(?:\.\.\d+|,\d+)+)$/', $atom, $m)) {
120 | $what = $m[1];
121 | $op = $m[2];
122 | $chunks = array();
123 | foreach (explode(',', $m[3]) as $range) {
124 | $chunk = null;
125 | if ((!isset($chunk)) && preg_match('/^\d+$/', $range)) {
126 | $chunk = "{$what} {$op} {$range}";
127 | }
128 | if ((!isset($chunk)) && preg_match('/^(\d+)\.\.(\d+)$/', $range, $m)) {
129 | $from = (int) $m[1];
130 | $to = (int) $m[2];
131 | if (($to - $from) === 1) {
132 | switch ($op) {
133 | case '==':
134 | $chunk = "({$what} == {$from} || {$what} == {$to})";
135 | break;
136 | case '!=':
137 | $chunk = "{$what} != {$from} && {$what} == {$to}";
138 | break;
139 | }
140 | } else {
141 | switch ($op) {
142 | case '==':
143 | $chunk = "{$what} >= {$from} && {$what} <= {$to}";
144 | break;
145 | case '!=':
146 | if ($what === 'n' && $from <= 0) {
147 | $chunk = "{$what} > {$to}";
148 | } else {
149 | $chunk = "({$what} < {$from} || {$what} > {$to})";
150 | }
151 | break;
152 | }
153 | }
154 | }
155 | if (!isset($chunk)) {
156 | throw new Exception("Unhandled range '{$range}' in '{$atom}'");
157 | }
158 | $chunks[] = $chunk;
159 | }
160 | if (count($chunks) === 1) {
161 | return $chunks[0];
162 | }
163 | switch ($op) {
164 | case '==':
165 | return '(' . implode(' || ', $chunks) . ')';
166 | case '!=':
167 | return implode(' && ', $chunks);
168 | }
169 | }
170 | throw new Exception("Unable to expand '{$atom}'");
171 | }
172 | }
173 |
--------------------------------------------------------------------------------
/src/cldr-data/main/en-US/scripts.json:
--------------------------------------------------------------------------------
1 | {
2 | "main": {
3 | "en-US": {
4 | "identity": {
5 | "version": {
6 | "_cldrVersion": "47"
7 | },
8 | "language": "en",
9 | "territory": "US"
10 | },
11 | "localeDisplayNames": {
12 | "scripts": {
13 | "Adlm": "Adlam",
14 | "Afak": "Afaka",
15 | "Aghb": "Caucasian Albanian",
16 | "Ahom": "Ahom",
17 | "Arab": "Arabic",
18 | "Arab-alt-variant": "Perso-Arabic",
19 | "Aran": "Nastaliq",
20 | "Armi": "Imperial Aramaic",
21 | "Armn": "Armenian",
22 | "Avst": "Avestan",
23 | "Bali": "Balinese",
24 | "Bamu": "Bamum",
25 | "Bass": "Bassa Vah",
26 | "Batk": "Batak",
27 | "Beng": "Bangla",
28 | "Bhks": "Bhaiksuki",
29 | "Blis": "Blissymbols",
30 | "Bopo": "Bopomofo",
31 | "Brah": "Brahmi",
32 | "Brai": "Braille",
33 | "Bugi": "Buginese",
34 | "Buhd": "Buhid",
35 | "Cakm": "Chakma",
36 | "Cans": "Unified Canadian Aboriginal Syllabics",
37 | "Cans-alt-short": "UCAS",
38 | "Cari": "Carian",
39 | "Cham": "Cham",
40 | "Cher": "Cherokee",
41 | "Chrs": "Chorasmian",
42 | "Cirt": "Cirth",
43 | "Copt": "Coptic",
44 | "Cpmn": "Cypro-Minoan",
45 | "Cprt": "Cypriot",
46 | "Cyrl": "Cyrillic",
47 | "Cyrs": "Old Church Slavonic Cyrillic",
48 | "Deva": "Devanagari",
49 | "Diak": "Dives Akuru",
50 | "Dogr": "Dogra",
51 | "Dsrt": "Deseret",
52 | "Dupl": "Duployan shorthand",
53 | "Egyd": "Egyptian demotic",
54 | "Egyh": "Egyptian hieratic",
55 | "Egyp": "Egyptian hieroglyphs",
56 | "Elba": "Elbasan",
57 | "Elym": "Elymaic",
58 | "Ethi": "Ethiopic",
59 | "Gara": "Garay",
60 | "Geok": "Georgian Khutsuri",
61 | "Geor": "Georgian",
62 | "Glag": "Glagolitic",
63 | "Gong": "Gunjala Gondi",
64 | "Gonm": "Masaram Gondi",
65 | "Goth": "Gothic",
66 | "Gran": "Grantha",
67 | "Grek": "Greek",
68 | "Gujr": "Gujarati",
69 | "Gukh": "Gurung Khema",
70 | "Guru": "Gurmukhi",
71 | "Hanb": "Han with Bopomofo",
72 | "Hang": "Hangul",
73 | "Hani": "Han",
74 | "Hano": "Hanunoo",
75 | "Hans": "Simplified",
76 | "Hans-alt-stand-alone": "Simplified Han",
77 | "Hant": "Traditional",
78 | "Hant-alt-stand-alone": "Traditional Han",
79 | "Hatr": "Hatran",
80 | "Hebr": "Hebrew",
81 | "Hira": "Hiragana",
82 | "Hluw": "Anatolian Hieroglyphs",
83 | "Hmng": "Pahawh Hmong",
84 | "Hmnp": "Nyiakeng Puachue Hmong",
85 | "Hrkt": "Japanese syllabaries",
86 | "Hung": "Old Hungarian",
87 | "Inds": "Indus",
88 | "Ital": "Old Italic",
89 | "Jamo": "Jamo",
90 | "Java": "Javanese",
91 | "Jpan": "Japanese",
92 | "Jurc": "Jurchen",
93 | "Kali": "Kayah Li",
94 | "Kana": "Katakana",
95 | "Kawi": "Kawi",
96 | "Khar": "Kharoshthi",
97 | "Khmr": "Khmer",
98 | "Khoj": "Khojki",
99 | "Kits": "Khitan small script",
100 | "Knda": "Kannada",
101 | "Kore": "Korean",
102 | "Kpel": "Kpelle",
103 | "Krai": "Kirat Rai",
104 | "Kthi": "Kaithi",
105 | "Lana": "Lanna",
106 | "Laoo": "Lao",
107 | "Latf": "Fraktur Latin",
108 | "Latg": "Gaelic Latin",
109 | "Latn": "Latin",
110 | "Lepc": "Lepcha",
111 | "Limb": "Limbu",
112 | "Lina": "Linear A",
113 | "Linb": "Linear B",
114 | "Lisu": "Fraser",
115 | "Loma": "Loma",
116 | "Lyci": "Lycian",
117 | "Lydi": "Lydian",
118 | "Mahj": "Mahajani",
119 | "Maka": "Makasar",
120 | "Mand": "Mandaean",
121 | "Mani": "Manichaean",
122 | "Marc": "Marchen",
123 | "Maya": "Mayan hieroglyphs",
124 | "Medf": "Medefaidrin",
125 | "Mend": "Mende",
126 | "Merc": "Meroitic Cursive",
127 | "Mero": "Meroitic",
128 | "Mlym": "Malayalam",
129 | "Modi": "Modi",
130 | "Mong": "Mongolian",
131 | "Moon": "Moon",
132 | "Mroo": "Mro",
133 | "Mtei": "Meitei Mayek",
134 | "Mult": "Multani",
135 | "Mymr": "Myanmar",
136 | "Nagm": "Nag Mundari",
137 | "Nand": "Nandinagari",
138 | "Narb": "Old North Arabian",
139 | "Nbat": "Nabataean",
140 | "Newa": "Newa",
141 | "Nkgb": "Naxi Geba",
142 | "Nkoo": "N’Ko",
143 | "Nshu": "Nüshu",
144 | "Ogam": "Ogham",
145 | "Olck": "Ol Chiki",
146 | "Onao": "Ol Onal",
147 | "Orkh": "Orkhon",
148 | "Orya": "Odia",
149 | "Osge": "Osage",
150 | "Osma": "Osmanya",
151 | "Ougr": "Old Uyghur",
152 | "Palm": "Palmyrene",
153 | "Pauc": "Pau Cin Hau",
154 | "Perm": "Old Permic",
155 | "Phag": "Phags-pa",
156 | "Phli": "Inscriptional Pahlavi",
157 | "Phlp": "Psalter Pahlavi",
158 | "Phlv": "Book Pahlavi",
159 | "Phnx": "Phoenician",
160 | "Plrd": "Pollard Phonetic",
161 | "Prti": "Inscriptional Parthian",
162 | "Qaag": "Zawgyi",
163 | "Rjng": "Rejang",
164 | "Rohg": "Hanifi",
165 | "Rohg-alt-stand-alone": "Hanifi Rohingya",
166 | "Roro": "Rongorongo",
167 | "Runr": "Runic",
168 | "Samr": "Samaritan",
169 | "Sara": "Sarati",
170 | "Sarb": "Old South Arabian",
171 | "Saur": "Saurashtra",
172 | "Sgnw": "SignWriting",
173 | "Shaw": "Shavian",
174 | "Shrd": "Sharada",
175 | "Sidd": "Siddham",
176 | "Sind": "Khudawadi",
177 | "Sinh": "Sinhala",
178 | "Sogd": "Sogdian",
179 | "Sogo": "Old Sogdian",
180 | "Sora": "Sora Sompeng",
181 | "Soyo": "Soyombo",
182 | "Sund": "Sundanese",
183 | "Sunu": "Sunuwar",
184 | "Sylo": "Syloti Nagri",
185 | "Syrc": "Syriac",
186 | "Syre": "Estrangelo Syriac",
187 | "Syrj": "Western Syriac",
188 | "Syrn": "Eastern Syriac",
189 | "Tagb": "Tagbanwa",
190 | "Takr": "Takri",
191 | "Tale": "Tai Le",
192 | "Talu": "New Tai Lue",
193 | "Taml": "Tamil",
194 | "Tang": "Tangut",
195 | "Tavt": "Tai Viet",
196 | "Telu": "Telugu",
197 | "Teng": "Tengwar",
198 | "Tfng": "Tifinagh",
199 | "Tglg": "Tagalog",
200 | "Thaa": "Thaana",
201 | "Thai": "Thai",
202 | "Tibt": "Tibetan",
203 | "Tirh": "Tirhuta",
204 | "Tnsa": "Tangsa",
205 | "Todr": "Todhri",
206 | "Toto": "Toto",
207 | "Tutg": "Tulu-Tigalari",
208 | "Ugar": "Ugaritic",
209 | "Vaii": "Vai",
210 | "Visp": "Visible Speech",
211 | "Vith": "Vithkuqi",
212 | "Wara": "Varang Kshiti",
213 | "Wcho": "Wancho",
214 | "Wole": "Woleai",
215 | "Xpeo": "Old Persian",
216 | "Xsux": "Sumero-Akkadian Cuneiform",
217 | "Xsux-alt-short": "S-A Cuneiform",
218 | "Yezi": "Yezidi",
219 | "Yiii": "Yi",
220 | "Zanb": "Zanabazar Square",
221 | "Zinh": "Inherited",
222 | "Zmth": "Mathematical Notation",
223 | "Zsye": "Emoji",
224 | "Zsym": "Symbols",
225 | "Zxxx": "Unwritten",
226 | "Zyyy": "Common",
227 | "Zzzz": "Unknown Script"
228 | }
229 | }
230 | }
231 | }
232 | }
--------------------------------------------------------------------------------
/bin/export-plural-rules:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | $arg) {
80 | if ($argi === 0) {
81 | continue;
82 | }
83 | if (is_string($arg)) {
84 | $argLC = trim(strtolower($arg));
85 | switch ($argLC) {
86 | case '-h':
87 | case '--help':
88 | self::showSyntax();
89 | exit(0);
90 | case '--us-ascii':
91 | self::$outputUSAscii = true;
92 | break;
93 | case '--reduce=yes':
94 | self::$reduce = true;
95 | break;
96 | case '--reduce=no':
97 | self::$reduce = false;
98 | break;
99 | case '--parenthesis=yes':
100 | self::$extraParenthesis = true;
101 | break;
102 | case '--parenthesis=no':
103 | self::$extraParenthesis = false;
104 | break;
105 | case '--parenthesis=both':
106 | self::$extraParenthesis = null;
107 | break;
108 | default:
109 | if (preg_match('/^--output=.+$/', $argLC)) {
110 | if (isset(self::$outputFilename)) {
111 | fwrite(STDERR, "The output file name has been specified more than once!\n");
112 | self::showSyntax();
113 | exit(3);
114 | }
115 | list(, self::$outputFilename) = explode('=', $arg, 2);
116 | self::$outputFilename = trim(self::$outputFilename);
117 | } elseif (preg_match('/^--languages?=.+$/', $argLC)) {
118 | list(, $s) = explode('=', $arg, 2);
119 | $list = explode(',', $s);
120 | if (is_array(self::$languages)) {
121 | self::$languages = array_merge(self::$languages, $list);
122 | } else {
123 | self::$languages = $list;
124 | }
125 | } elseif (isset($exporters[$argLC])) {
126 | if (isset(self::$outputFormat)) {
127 | fwrite(STDERR, "The output format has been specified more than once!\n");
128 | self::showSyntax();
129 | exit(3);
130 | }
131 | self::$outputFormat = $argLC;
132 | } else {
133 | fwrite(STDERR, "Unknown option: {$arg}\n");
134 | self::showSyntax();
135 | exit(2);
136 | }
137 | break;
138 | }
139 | }
140 | }
141 | }
142 | if (!isset(self::$outputFormat)) {
143 | self::showSyntax();
144 | exit(1);
145 | }
146 | if (isset(self::$languages)) {
147 | self::$languages = array_values(array_unique(self::$languages));
148 | }
149 | if (!isset(self::$reduce)) {
150 | self::$reduce = isset(self::$languages) ? false : true;
151 | }
152 | }
153 |
154 | /**
155 | * Write out the syntax.
156 | */
157 | public static function showSyntax()
158 | {
159 | $basename = basename(__FILE__);
160 | $exporters = array_keys(Exporter::getExporters(true));
161 | $exporterList = implode('|', $exporters);
162 | fwrite(
163 | STDERR,
164 | <<[,,...]] [--reduce=yes|no] [--parenthesis=yes|no] [--output=] <{$exporterList}>
167 |
168 | Where:
169 | --help
170 | show this help message.
171 |
172 | --us-ascii
173 | if specified, the output will contain only US-ASCII characters.
174 |
175 | --languages(or --language)
176 | export only the specified language codes.
177 | Separate languages with commas; you can also use this argument
178 | more than once; it's case insensitive and accepts both '_' and
179 | '-' as locale chunks separator (eg we accept 'it_IT' as well as
180 | 'it-it').
181 | --reduce
182 | if set to yes the output won't contain languages with the same
183 | base language and rules.
184 | For instance nl_BE ('Flemish') will be omitted because it's the
185 | same as nl ('Dutch').
186 | Defaults to 'no' if --languages is specified, to 'yes' otherwise.
187 | --parenthesis
188 | if set to no, extra parenthesis will be omitted in generated
189 | plural rules formulas.
190 | Those extra parenthesis are needed to create a PHP-compatible
191 | formula.
192 | Some exporter may also export formulas both with and without
193 | The extra parenthesis: use --parenthesis=both in this case
194 | Defaults to 'yes'
195 | --output
196 | if specified, the output will be saved to . If not
197 | specified we'll output to standard output.
198 |
199 | Output formats
200 |
201 | EOT
202 | );
203 | $len = max(array_map('strlen', $exporters));
204 | foreach ($exporters as $exporter) {
205 | fwrite(STDERR, ' ' . str_pad($exporter, $len) . ': ' . Exporter::getExporterDescription($exporter) . "\n");
206 | }
207 | fwrite(STDERR, "\n");
208 | }
209 |
210 | /**
211 | * Reduce a language list to the minimum common denominator.
212 | *
213 | * @param Language[] $languages
214 | *
215 | * @return Language[]
216 | */
217 | public static function reduce($languages)
218 | {
219 | for ($numChunks = 3; $numChunks >= 2; $numChunks--) {
220 | $filtered = array();
221 | foreach ($languages as $language) {
222 | $chunks = explode('_', $language->id);
223 | $compatibleFound = false;
224 | if ($numChunks === count($chunks)) {
225 | $categoriesHash = serialize($language->categories);
226 | $otherIds = array();
227 | $otherIds[] = $chunks[0];
228 | for ($k = 2; $k < $numChunks; $k++) {
229 | $otherIds[] = $chunks[0] . '_' . $chunks[$numChunks - 1];
230 | }
231 |
232 | foreach ($languages as $check) {
233 | foreach ($otherIds as $otherId) {
234 | if ($check->id === $otherId && $check->formula === $language->formula && $categoriesHash === serialize($check->categories)) {
235 | $compatibleFound = true;
236 | break;
237 | }
238 | }
239 | if ($compatibleFound === true) {
240 | break;
241 | }
242 | }
243 | }
244 | if (!$compatibleFound) {
245 | $filtered[] = $language;
246 | }
247 | }
248 | $languages = $filtered;
249 | }
250 |
251 | return $languages;
252 | }
253 | }
254 |
255 | // Parse the command line options
256 | Enviro::initialize();
257 |
258 | try {
259 | if (isset(Enviro::$languages)) {
260 | $languages = array();
261 | foreach (Enviro::$languages as $languageId) {
262 | $language = Language::getById($languageId);
263 | if (!isset($language)) {
264 | throw new Exception("Unable to find the language with id '{$languageId}'");
265 | }
266 | $languages[] = $language;
267 | }
268 | } else {
269 | $languages = Language::getAll();
270 | }
271 | if (Enviro::$reduce) {
272 | $languages = Enviro::reduce($languages);
273 | }
274 | if (Enviro::$extraParenthesis === false) {
275 | $languages = array_map(
276 | function (Language $language) {
277 | $language->formula = $language->buildFormula(true);
278 |
279 | return $language;
280 | },
281 | $languages
282 | );
283 | }
284 | $exporterClass = Exporter::getExporterClassName(Enviro::$outputFormat);
285 | $options = array(
286 | 'us-ascii' => Enviro::$outputUSAscii,
287 | 'both-formulas' => Enviro::$extraParenthesis === null,
288 | );
289 | if ($options['both-formulas'] && !call_user_func(array($exporterClass, 'supportsFormulasWithAndWithoutParenthesis'))) {
290 | throw new Exception("The selected exporter doesn't support exporting data with and without extra paranthesis");
291 | }
292 | if (isset(Enviro::$outputFilename)) {
293 | echo call_user_func(array($exporterClass, 'toFile'), $languages, Enviro::$outputFilename, $options);
294 | } else {
295 | echo call_user_func(array($exporterClass, 'toString'), $languages, $options);
296 | }
297 | } catch (Exception $x) {
298 | fwrite(STDERR, $x->getMessage() . "\n");
299 | fwrite(STDERR, "Trace:\n");
300 | fwrite(STDERR, $x->getTraceAsString() . "\n");
301 | exit(4);
302 | }
303 |
304 | exit(0);
305 |
--------------------------------------------------------------------------------
/src/cldr-data/main/en-US/territories.json:
--------------------------------------------------------------------------------
1 | {
2 | "main": {
3 | "en-US": {
4 | "identity": {
5 | "version": {
6 | "_cldrVersion": "47"
7 | },
8 | "language": "en",
9 | "territory": "US"
10 | },
11 | "localeDisplayNames": {
12 | "territories": {
13 | "001": "world",
14 | "002": "Africa",
15 | "003": "North America",
16 | "005": "South America",
17 | "009": "Oceania",
18 | "011": "Western Africa",
19 | "013": "Central America",
20 | "014": "Eastern Africa",
21 | "015": "Northern Africa",
22 | "017": "Middle Africa",
23 | "018": "Southern Africa",
24 | "019": "Americas",
25 | "021": "Northern America",
26 | "029": "Caribbean",
27 | "030": "Eastern Asia",
28 | "034": "Southern Asia",
29 | "035": "Southeast Asia",
30 | "039": "Southern Europe",
31 | "053": "Australasia",
32 | "054": "Melanesia",
33 | "057": "Micronesian Region",
34 | "061": "Polynesia",
35 | "142": "Asia",
36 | "143": "Central Asia",
37 | "145": "Western Asia",
38 | "150": "Europe",
39 | "151": "Eastern Europe",
40 | "154": "Northern Europe",
41 | "155": "Western Europe",
42 | "202": "Sub-Saharan Africa",
43 | "419": "Latin America",
44 | "AC": "Ascension Island",
45 | "AD": "Andorra",
46 | "AE": "United Arab Emirates",
47 | "AF": "Afghanistan",
48 | "AG": "Antigua & Barbuda",
49 | "AI": "Anguilla",
50 | "AL": "Albania",
51 | "AM": "Armenia",
52 | "AO": "Angola",
53 | "AQ": "Antarctica",
54 | "AR": "Argentina",
55 | "AS": "American Samoa",
56 | "AT": "Austria",
57 | "AU": "Australia",
58 | "AW": "Aruba",
59 | "AX": "Åland Islands",
60 | "AZ": "Azerbaijan",
61 | "BA": "Bosnia & Herzegovina",
62 | "BA-alt-short": "Bosnia",
63 | "BB": "Barbados",
64 | "BD": "Bangladesh",
65 | "BE": "Belgium",
66 | "BF": "Burkina Faso",
67 | "BG": "Bulgaria",
68 | "BH": "Bahrain",
69 | "BI": "Burundi",
70 | "BJ": "Benin",
71 | "BL": "St. Barthélemy",
72 | "BM": "Bermuda",
73 | "BN": "Brunei",
74 | "BO": "Bolivia",
75 | "BQ": "Caribbean Netherlands",
76 | "BR": "Brazil",
77 | "BS": "Bahamas",
78 | "BT": "Bhutan",
79 | "BV": "Bouvet Island",
80 | "BW": "Botswana",
81 | "BY": "Belarus",
82 | "BZ": "Belize",
83 | "CA": "Canada",
84 | "CC": "Cocos (Keeling) Islands",
85 | "CC-alt-short": "Cocos Islands",
86 | "CD": "Congo - Kinshasa",
87 | "CD-alt-variant": "Congo (DRC)",
88 | "CF": "Central African Republic",
89 | "CG": "Congo - Brazzaville",
90 | "CG-alt-variant": "Congo (Republic)",
91 | "CH": "Switzerland",
92 | "CI": "Côte d’Ivoire",
93 | "CI-alt-variant": "Ivory Coast",
94 | "CK": "Cook Islands",
95 | "CL": "Chile",
96 | "CM": "Cameroon",
97 | "CN": "China",
98 | "CO": "Colombia",
99 | "CP": "Clipperton Island",
100 | "CQ": "Sark",
101 | "CR": "Costa Rica",
102 | "CU": "Cuba",
103 | "CV": "Cape Verde",
104 | "CV-alt-variant": "Cabo Verde",
105 | "CW": "Curaçao",
106 | "CX": "Christmas Island",
107 | "CY": "Cyprus",
108 | "CZ": "Czechia",
109 | "CZ-alt-variant": "Czech Republic",
110 | "DE": "Germany",
111 | "DG": "Diego Garcia",
112 | "DJ": "Djibouti",
113 | "DK": "Denmark",
114 | "DM": "Dominica",
115 | "DO": "Dominican Republic",
116 | "DZ": "Algeria",
117 | "EA": "Ceuta & Melilla",
118 | "EC": "Ecuador",
119 | "EE": "Estonia",
120 | "EG": "Egypt",
121 | "EH": "Western Sahara",
122 | "ER": "Eritrea",
123 | "ES": "Spain",
124 | "ET": "Ethiopia",
125 | "EU": "European Union",
126 | "EZ": "Eurozone",
127 | "FI": "Finland",
128 | "FJ": "Fiji",
129 | "FK": "Falkland Islands",
130 | "FK-alt-variant": "Falkland Islands (Islas Malvinas)",
131 | "FM": "Micronesia",
132 | "FO": "Faroe Islands",
133 | "FR": "France",
134 | "GA": "Gabon",
135 | "GB": "United Kingdom",
136 | "GB-alt-short": "UK",
137 | "GD": "Grenada",
138 | "GE": "Georgia",
139 | "GF": "French Guiana",
140 | "GG": "Guernsey",
141 | "GH": "Ghana",
142 | "GI": "Gibraltar",
143 | "GL": "Greenland",
144 | "GM": "Gambia",
145 | "GN": "Guinea",
146 | "GP": "Guadeloupe",
147 | "GQ": "Equatorial Guinea",
148 | "GR": "Greece",
149 | "GS": "South Georgia & South Sandwich Islands",
150 | "GT": "Guatemala",
151 | "GU": "Guam",
152 | "GW": "Guinea-Bissau",
153 | "GY": "Guyana",
154 | "HK": "Hong Kong SAR China",
155 | "HK-alt-short": "Hong Kong",
156 | "HM": "Heard & McDonald Islands",
157 | "HN": "Honduras",
158 | "HR": "Croatia",
159 | "HT": "Haiti",
160 | "HU": "Hungary",
161 | "IC": "Canary Islands",
162 | "ID": "Indonesia",
163 | "IE": "Ireland",
164 | "IL": "Israel",
165 | "IM": "Isle of Man",
166 | "IN": "India",
167 | "IO": "British Indian Ocean Territory",
168 | "IO-alt-biot": "British Indian Ocean Territory",
169 | "IO-alt-chagos": "Chagos Archipelago",
170 | "IQ": "Iraq",
171 | "IR": "Iran",
172 | "IS": "Iceland",
173 | "IT": "Italy",
174 | "JE": "Jersey",
175 | "JM": "Jamaica",
176 | "JO": "Jordan",
177 | "JP": "Japan",
178 | "KE": "Kenya",
179 | "KG": "Kyrgyzstan",
180 | "KH": "Cambodia",
181 | "KI": "Kiribati",
182 | "KM": "Comoros",
183 | "KN": "St. Kitts & Nevis",
184 | "KP": "North Korea",
185 | "KR": "South Korea",
186 | "KW": "Kuwait",
187 | "KY": "Cayman Islands",
188 | "KZ": "Kazakhstan",
189 | "LA": "Laos",
190 | "LB": "Lebanon",
191 | "LC": "St. Lucia",
192 | "LI": "Liechtenstein",
193 | "LK": "Sri Lanka",
194 | "LR": "Liberia",
195 | "LS": "Lesotho",
196 | "LT": "Lithuania",
197 | "LU": "Luxembourg",
198 | "LV": "Latvia",
199 | "LY": "Libya",
200 | "MA": "Morocco",
201 | "MC": "Monaco",
202 | "MD": "Moldova",
203 | "ME": "Montenegro",
204 | "MF": "St. Martin",
205 | "MG": "Madagascar",
206 | "MH": "Marshall Islands",
207 | "MK": "North Macedonia",
208 | "ML": "Mali",
209 | "MM": "Myanmar (Burma)",
210 | "MM-alt-short": "Myanmar",
211 | "MN": "Mongolia",
212 | "MO": "Macao SAR China",
213 | "MO-alt-short": "Macao",
214 | "MP": "Northern Mariana Islands",
215 | "MQ": "Martinique",
216 | "MR": "Mauritania",
217 | "MS": "Montserrat",
218 | "MT": "Malta",
219 | "MU": "Mauritius",
220 | "MV": "Maldives",
221 | "MW": "Malawi",
222 | "MX": "Mexico",
223 | "MY": "Malaysia",
224 | "MZ": "Mozambique",
225 | "NA": "Namibia",
226 | "NC": "New Caledonia",
227 | "NE": "Niger",
228 | "NF": "Norfolk Island",
229 | "NG": "Nigeria",
230 | "NI": "Nicaragua",
231 | "NL": "Netherlands",
232 | "NO": "Norway",
233 | "NP": "Nepal",
234 | "NR": "Nauru",
235 | "NU": "Niue",
236 | "NZ": "New Zealand",
237 | "NZ-alt-variant": "Aotearoa New Zealand",
238 | "OM": "Oman",
239 | "PA": "Panama",
240 | "PE": "Peru",
241 | "PF": "French Polynesia",
242 | "PG": "Papua New Guinea",
243 | "PH": "Philippines",
244 | "PK": "Pakistan",
245 | "PL": "Poland",
246 | "PM": "St. Pierre & Miquelon",
247 | "PN": "Pitcairn Islands",
248 | "PN-alt-short": "Pitcairn",
249 | "PR": "Puerto Rico",
250 | "PS": "Palestinian Territories",
251 | "PS-alt-short": "Palestine",
252 | "PT": "Portugal",
253 | "PW": "Palau",
254 | "PY": "Paraguay",
255 | "QA": "Qatar",
256 | "QO": "Outlying Oceania",
257 | "RE": "Réunion",
258 | "RO": "Romania",
259 | "RS": "Serbia",
260 | "RU": "Russia",
261 | "RW": "Rwanda",
262 | "SA": "Saudi Arabia",
263 | "SB": "Solomon Islands",
264 | "SC": "Seychelles",
265 | "SD": "Sudan",
266 | "SE": "Sweden",
267 | "SG": "Singapore",
268 | "SH": "St. Helena",
269 | "SI": "Slovenia",
270 | "SJ": "Svalbard & Jan Mayen",
271 | "SK": "Slovakia",
272 | "SL": "Sierra Leone",
273 | "SM": "San Marino",
274 | "SN": "Senegal",
275 | "SO": "Somalia",
276 | "SR": "Suriname",
277 | "SS": "South Sudan",
278 | "ST": "São Tomé & Príncipe",
279 | "SV": "El Salvador",
280 | "SX": "Sint Maarten",
281 | "SY": "Syria",
282 | "SZ": "Eswatini",
283 | "SZ-alt-variant": "Swaziland",
284 | "TA": "Tristan da Cunha",
285 | "TC": "Turks & Caicos Islands",
286 | "TD": "Chad",
287 | "TF": "French Southern Territories",
288 | "TG": "Togo",
289 | "TH": "Thailand",
290 | "TJ": "Tajikistan",
291 | "TK": "Tokelau",
292 | "TL": "Timor-Leste",
293 | "TL-alt-variant": "East Timor",
294 | "TM": "Turkmenistan",
295 | "TN": "Tunisia",
296 | "TO": "Tonga",
297 | "TR": "Türkiye",
298 | "TR-alt-variant": "Turkey",
299 | "TT": "Trinidad & Tobago",
300 | "TV": "Tuvalu",
301 | "TW": "Taiwan",
302 | "TZ": "Tanzania",
303 | "UA": "Ukraine",
304 | "UG": "Uganda",
305 | "UM": "U.S. Outlying Islands",
306 | "UN": "United Nations",
307 | "UN-alt-short": "UN",
308 | "US": "United States",
309 | "US-alt-short": "US",
310 | "UY": "Uruguay",
311 | "UZ": "Uzbekistan",
312 | "VA": "Vatican City",
313 | "VC": "St. Vincent & Grenadines",
314 | "VE": "Venezuela",
315 | "VG": "British Virgin Islands",
316 | "VI": "U.S. Virgin Islands",
317 | "VN": "Vietnam",
318 | "VU": "Vanuatu",
319 | "WF": "Wallis & Futuna",
320 | "WS": "Samoa",
321 | "XA": "Pseudo-Accents",
322 | "XB": "Pseudo-Bidi",
323 | "XK": "Kosovo",
324 | "YE": "Yemen",
325 | "YT": "Mayotte",
326 | "ZA": "South Africa",
327 | "ZM": "Zambia",
328 | "ZW": "Zimbabwe",
329 | "ZZ": "Unknown Region"
330 | }
331 | }
332 | }
333 | }
334 | }
--------------------------------------------------------------------------------
/src/CldrData.php:
--------------------------------------------------------------------------------
1 |
83 | * "en": {
84 | * "pluralRule-count-one": "i = 1 and v = 0 @integer 1",
85 | * "pluralRule-count-other": " @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …"
86 | * }
87 | *
88 | *
89 | * @return array
90 | */
91 | public static function getPlurals()
92 | {
93 | return self::getData('plurals');
94 | }
95 |
96 | /**
97 | * Return a list of superseded language codes.
98 | *
99 | * @return array keys are the former language codes, values are the new language/locale codes
100 | */
101 | public static function getSupersededLanguages()
102 | {
103 | return self::getData('supersededLanguages');
104 | }
105 |
106 | /**
107 | * Retrieve the name of a language, as well as if a language code is deprecated in favor of another language code.
108 | *
109 | * @param string $id the language identifier
110 | *
111 | * @return array|null Returns an array with the keys 'id' (normalized), 'name', 'supersededBy' (optional), 'territory' (optional), 'script' (optional), 'baseLanguage' (optional), 'categories'. If $id is not valid returns null.
112 | */
113 | public static function getLanguageInfo($id)
114 | {
115 | $result = null;
116 | $matches = array();
117 | if (preg_match('/^([a-z]{2,3})(?:[_\-]([a-z]{4}))?(?:[_\-]([a-z]{2}|[0-9]{3}))?(?:$|-)/i', $id, $matches)) {
118 | $languageId = strtolower($matches[1]);
119 | $scriptId = (isset($matches[2]) && ($matches[2] !== '')) ? ucfirst(strtolower($matches[2])) : null;
120 | $territoryId = (isset($matches[3]) && ($matches[3] !== '')) ? strtoupper($matches[3]) : null;
121 | $normalizedId = $languageId;
122 | if (isset($scriptId)) {
123 | $normalizedId .= '_' . $scriptId;
124 | }
125 | if (isset($territoryId)) {
126 | $normalizedId .= '_' . $territoryId;
127 | }
128 | // Structure precedence: see Likely Subtags - http://www.unicode.org/reports/tr35/tr35-31/tr35.html#Likely_Subtags
129 | $variants = array();
130 | $variantsWithScript = array();
131 | $variantsWithTerritory = array();
132 | if (isset($scriptId) && isset($territoryId)) {
133 | $variantsWithTerritory[] = $variantsWithScript[] = $variants[] = "{$languageId}_{$scriptId}_{$territoryId}";
134 | }
135 | if (isset($scriptId)) {
136 | $variantsWithScript[] = $variants[] = "{$languageId}_{$scriptId}";
137 | }
138 | if (isset($territoryId)) {
139 | $variantsWithTerritory[] = $variants[] = "{$languageId}_{$territoryId}";
140 | }
141 | $variants[] = $languageId;
142 | $allGood = true;
143 | $scriptName = null;
144 | $scriptStandAloneName = null;
145 | if (isset($scriptId)) {
146 | $scriptNames = self::getScriptNames(false);
147 | if (isset($scriptNames[$scriptId])) {
148 | $scriptName = $scriptNames[$scriptId];
149 | $scriptStandAloneNames = self::getScriptNames(true);
150 | $scriptStandAloneName = $scriptStandAloneNames[$scriptId];
151 | } else {
152 | $allGood = false;
153 | }
154 | }
155 | $territoryName = null;
156 | if (isset($territoryId)) {
157 | $territoryNames = self::getTerritoryNames();
158 | if (isset($territoryNames[$territoryId])) {
159 | if ($territoryId !== '001') {
160 | $territoryName = $territoryNames[$territoryId];
161 | }
162 | } else {
163 | $allGood = false;
164 | }
165 | }
166 | $languageName = null;
167 | $languageNames = self::getLanguageNames();
168 | foreach ($variants as $variant) {
169 | if (isset($languageNames[$variant])) {
170 | $languageName = $languageNames[$variant];
171 | if (isset($scriptName) && (!in_array($variant, $variantsWithScript))) {
172 | $languageName = $scriptName . ' ' . $languageName;
173 | }
174 | if (isset($territoryName) && (!in_array($variant, $variantsWithTerritory))) {
175 | $languageName .= ' (' . $territoryNames[$territoryId] . ')';
176 | }
177 | break;
178 | }
179 | }
180 | if (!isset($languageName)) {
181 | $allGood = false;
182 | }
183 | $baseLanguage = null;
184 | if (isset($scriptId) || isset($territoryId)) {
185 | if (isset($languageNames[$languageId]) && ($languageNames[$languageId] !== $languageName)) {
186 | $baseLanguage = $languageNames[$languageId];
187 | }
188 | }
189 | $plural = null;
190 | $plurals = self::getPlurals();
191 | foreach ($variants as $variant) {
192 | if (isset($plurals[$variant])) {
193 | $plural = $plurals[$variant];
194 | break;
195 | }
196 | }
197 | if (!isset($plural)) {
198 | $allGood = false;
199 | }
200 | $supersededBy = null;
201 | $supersededBys = self::getSupersededLanguages();
202 | foreach ($variants as $variant) {
203 | if (isset($supersededBys[$variant])) {
204 | $supersededBy = $supersededBys[$variant];
205 | break;
206 | }
207 | }
208 | if ($allGood) {
209 | $result = array();
210 | $result['id'] = $normalizedId;
211 | $result['name'] = $languageName;
212 | if (isset($supersededBy)) {
213 | $result['supersededBy'] = $supersededBy;
214 | }
215 | if (isset($scriptStandAloneName)) {
216 | $result['script'] = $scriptStandAloneName;
217 | }
218 | if (isset($territoryName)) {
219 | $result['territory'] = $territoryName;
220 | }
221 | if (isset($baseLanguage)) {
222 | $result['baseLanguage'] = $baseLanguage;
223 | }
224 | $result['categories'] = $plural;
225 | }
226 | }
227 |
228 | return $result;
229 | }
230 |
231 | /**
232 | * Returns the loaded CLDR data.
233 | *
234 | * @param string $key Can be 'languages', 'territories', 'plurals', 'supersededLanguages', 'scripts', 'standAloneScripts'
235 | *
236 | * @return array
237 | */
238 | private static function getData($key)
239 | {
240 | if (!isset(self::$data)) {
241 | $fixKeys = function ($list, &$standAlone = null) {
242 | $result = array();
243 | $standAlone = array();
244 | $match = null;
245 | foreach ($list as $key => $value) {
246 | $variant = '';
247 | if (preg_match('/^(.+)-alt-(short|variant|stand-alone|long|menu)$/', $key, $match)) {
248 | $key = $match[1];
249 | $variant = $match[2];
250 | }
251 | $key = str_replace('-', '_', $key);
252 | switch ($key) {
253 | case 'root': // Language: Root
254 | case 'und': // Language: Unknown Language
255 | case 'zxx': // Language: No linguistic content
256 | case 'ZZ': // Territory: Unknown Region
257 | case 'Zinh': // Script: Inherited
258 | case 'Zmth': // Script: Mathematical Notation
259 | case 'Zsym': // Script: Symbols
260 | case 'Zxxx': // Script: Unwritten
261 | case 'Zyyy': // Script: Common
262 | case 'Zzzz': // Script: Unknown Script
263 | break;
264 | default:
265 | switch ($variant) {
266 | case 'stand-alone':
267 | $standAlone[$key] = $value;
268 | break;
269 | case '':
270 | $result[$key] = $value;
271 | break;
272 | }
273 | break;
274 | }
275 | }
276 |
277 | return $result;
278 | };
279 | $data = array();
280 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/languages.json'), true);
281 | $data['languages'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['languages']);
282 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/territories.json'), true);
283 | $data['territories'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['territories']);
284 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/supplemental/plurals.json'), true);
285 | $data['plurals'] = $fixKeys($json['supplemental']['plurals-type-cardinal']);
286 | $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/scripts.json'), true);
287 | $data['scripts'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['scripts'], $data['standAloneScripts']);
288 | $data['standAloneScripts'] = array_merge($data['scripts'], $data['standAloneScripts']);
289 | $data['scripts'] = array_merge($data['standAloneScripts'], $data['scripts']);
290 | $data['supersededLanguages'] = array();
291 | // Remove the languages for which we don't have plurals
292 | $m = null;
293 | foreach (array_keys(array_diff_key($data['languages'], $data['plurals'])) as $missingPlural) {
294 | if (preg_match('/^([a-z]{2,3})_/', $missingPlural, $m)) {
295 | if (!isset($data['plurals'][$m[1]])) {
296 | unset($data['languages'][$missingPlural]);
297 | }
298 | } else {
299 | unset($data['languages'][$missingPlural]);
300 | }
301 | }
302 | // Fix the languages for which we have plurals
303 | $formerCodes = array(
304 | 'jw' => 'jv', // former Javanese
305 | 'mo' => 'ro_MD', // former Moldavian
306 | );
307 | $knownMissingLanguages = array(
308 | 'guw' => 'Gun',
309 | 'hnj' => 'Hmong Njua',
310 | 'lld' => 'Dolomitic Ladin',
311 | 'nah' => 'Nahuatl',
312 | 'smi' => 'Sami',
313 | );
314 | foreach (array_keys(array_diff_key($data['plurals'], $data['languages'])) as $missingLanguage) {
315 | if (isset($formerCodes[$missingLanguage]) && isset($data['languages'][$formerCodes[$missingLanguage]])) {
316 | $data['languages'][$missingLanguage] = $data['languages'][$formerCodes[$missingLanguage]];
317 | $data['supersededLanguages'][$missingLanguage] = $formerCodes[$missingLanguage];
318 | } else {
319 | if (isset($knownMissingLanguages[$missingLanguage])) {
320 | $data['languages'][$missingLanguage] = $knownMissingLanguages[$missingLanguage];
321 | } else {
322 | throw new Exception("We have the plural rule for the language '{$missingLanguage}' but we don't have its language name");
323 | }
324 | }
325 | }
326 | ksort($data['languages'], SORT_STRING);
327 | ksort($data['territories'], SORT_STRING);
328 | ksort($data['plurals'], SORT_STRING);
329 | ksort($data['scripts'], SORT_STRING);
330 | ksort($data['standAloneScripts'], SORT_STRING);
331 | ksort($data['supersededLanguages'], SORT_STRING);
332 | self::$data = $data;
333 | }
334 | if (!isset(self::$data[$key])) {
335 | throw new Exception("Invalid CLDR data key: '{$key}'");
336 | }
337 |
338 | return self::$data[$key];
339 | }
340 | }
341 |
--------------------------------------------------------------------------------
/src/Language.php:
--------------------------------------------------------------------------------
1 | id = $info['id'];
78 | $this->name = $info['name'];
79 | $this->supersededBy = isset($info['supersededBy']) ? $info['supersededBy'] : null;
80 | $this->script = isset($info['script']) ? $info['script'] : null;
81 | $this->territory = isset($info['territory']) ? $info['territory'] : null;
82 | $this->baseLanguage = isset($info['baseLanguage']) ? $info['baseLanguage'] : null;
83 | // Let's build the category list
84 | $this->categories = array();
85 | foreach ($info['categories'] as $cldrCategoryId => $cldrFormulaAndExamples) {
86 | $category = new Category($cldrCategoryId, $cldrFormulaAndExamples);
87 | foreach ($this->categories as $c) {
88 | if ($category->id === $c->id) {
89 | throw new Exception("The category '{$category->id}' is specified more than once");
90 | }
91 | }
92 | $this->categories[] = $category;
93 | }
94 | if (empty($this->categories)) {
95 | throw new Exception("The language '{$info['id']}' does not have any plural category");
96 | }
97 | // Let's sort the categories from 'zero' to 'other'
98 | usort($this->categories, function (Category $category1, Category $category2) {
99 | return array_search($category1->id, CldrData::$categories) - array_search($category2->id, CldrData::$categories);
100 | });
101 | // The 'other' category should always be there
102 | if ($this->categories[count($this->categories) - 1]->id !== CldrData::OTHER_CATEGORY) {
103 | throw new Exception("The language '{$info['id']}' does not have the '" . CldrData::OTHER_CATEGORY . "' plural category");
104 | }
105 | $this->checkAlwaysTrueCategories();
106 | $this->checkAlwaysFalseCategories();
107 | $this->checkAllCategoriesWithExamples();
108 | $this->formula = $this->buildFormula();
109 | }
110 |
111 | /**
112 | * Return a list of all languages available.
113 | *
114 | * @throws \Exception
115 | *
116 | * @return \Gettext\Languages\Language[]
117 | */
118 | public static function getAll()
119 | {
120 | $result = array();
121 | foreach (array_keys(CldrData::getLanguageNames()) as $cldrLanguageId) {
122 | $result[] = new self(CldrData::getLanguageInfo($cldrLanguageId));
123 | }
124 |
125 | return $result;
126 | }
127 |
128 | /**
129 | * Return a Language instance given the language id.
130 | *
131 | * @param string $id
132 | *
133 | * @return \Gettext\Languages\Language|null
134 | */
135 | public static function getById($id)
136 | {
137 | $result = null;
138 | $info = CldrData::getLanguageInfo($id);
139 | if (isset($info)) {
140 | $result = new self($info);
141 | }
142 |
143 | return $result;
144 | }
145 |
146 | /**
147 | * Returns a clone of this instance with all the strings to US-ASCII.
148 | *
149 | * @return \Gettext\Languages\Language
150 | */
151 | public function getUSAsciiClone()
152 | {
153 | $clone = clone $this;
154 | self::asciifier($clone->name);
155 | self::asciifier($clone->formula);
156 | $clone->categories = array();
157 | foreach ($this->categories as $category) {
158 | $categoryClone = clone $category;
159 | self::asciifier($categoryClone->examples);
160 | $clone->categories[] = $categoryClone;
161 | }
162 |
163 | return $clone;
164 | }
165 |
166 | /**
167 | * Build the formula starting from the currently defined categories.
168 | *
169 | * @param bool $withoutParenthesis TRUE to build a formula in standard gettext format, FALSE (default) to build a PHP-compatible formula
170 | *
171 | * @return string
172 | */
173 | public function buildFormula($withoutParenthesis = false)
174 | {
175 | $numCategories = count($this->categories);
176 | switch ($numCategories) {
177 | case 1:
178 | // Just one category
179 | return '0';
180 | case 2:
181 | return self::reduceFormula(self::reverseFormula($this->categories[0]->formula));
182 | default:
183 | $formula = (string) ($numCategories - 1);
184 | for ($i = $numCategories - 2; $i >= 0; $i--) {
185 | $f = self::reduceFormula($this->categories[$i]->formula);
186 | if (!$withoutParenthesis && !preg_match('/^\([^()]+\)$/', $f)) {
187 | $f = "({$f})";
188 | }
189 | $formula = "{$f} ? {$i} : {$formula}";
190 | if (!$withoutParenthesis && $i > 0) {
191 | $formula = "({$formula})";
192 | }
193 | }
194 |
195 | return $formula;
196 | }
197 | }
198 |
199 | /**
200 | * Let's look for categories that will always occur.
201 | * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have just one case.
202 | * If we found that (single) category we reduce the categories to that one only.
203 | *
204 | * @throws \Exception
205 | */
206 | private function checkAlwaysTrueCategories()
207 | {
208 | $alwaysTrueCategory = null;
209 | foreach ($this->categories as $category) {
210 | if ($category->formula === true) {
211 | if (!isset($category->examples)) {
212 | throw new Exception("The category '{$category->id}' should always occur, but it does not have examples (so for CLDR it will never occur for integers!)");
213 | }
214 | $alwaysTrueCategory = $category;
215 | break;
216 | }
217 | }
218 | if (isset($alwaysTrueCategory)) {
219 | foreach ($this->categories as $category) {
220 | if (($category !== $alwaysTrueCategory) && isset($category->examples)) {
221 | throw new Exception("The category '{$category->id}' should never occur, but it has some examples (so for CLDR it will occur!)");
222 | }
223 | }
224 | $alwaysTrueCategory->id = CldrData::OTHER_CATEGORY;
225 | $alwaysTrueCategory->formula = null;
226 | $this->categories = array($alwaysTrueCategory);
227 | }
228 | }
229 |
230 | /**
231 | * Let's look for categories that will never occur.
232 | * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have some less cases.
233 | * If we found those categories we strip them out.
234 | *
235 | * @throws \Exception
236 | */
237 | private function checkAlwaysFalseCategories()
238 | {
239 | $filtered = array();
240 | foreach ($this->categories as $category) {
241 | if ($category->formula === false) {
242 | if (isset($category->examples)) {
243 | throw new Exception("The category '{$category->id}' should never occur, but it has examples (so for CLDR it may occur!)");
244 | }
245 | } else {
246 | $filtered[] = $category;
247 | }
248 | }
249 | $this->categories = $filtered;
250 | }
251 |
252 | /**
253 | * Let's look for categories that don't have examples.
254 | * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have some less cases.
255 | * If we found those categories, we check that they never occur and we strip them out.
256 | *
257 | * @throws \Exception
258 | */
259 | private function checkAllCategoriesWithExamples()
260 | {
261 | $allCategoriesIds = array();
262 | $goodCategories = array();
263 | $badCategories = array();
264 | $badCategoriesIds = array();
265 | foreach ($this->categories as $category) {
266 | $allCategoriesIds[] = $category->id;
267 | if (isset($category->examples)) {
268 | $goodCategories[] = $category;
269 | } else {
270 | $badCategories[] = $category;
271 | $badCategoriesIds[] = $category->id;
272 | }
273 | }
274 | if (empty($badCategories)) {
275 | return;
276 | }
277 | $removeCategoriesWithoutExamples = false;
278 | switch (implode(',', $badCategoriesIds) . '@' . implode(',', $allCategoriesIds)) {
279 | case CldrData::OTHER_CATEGORY . '@one,few,many,' . CldrData::OTHER_CATEGORY:
280 | switch ($this->buildFormula()) {
281 | case '(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : ((n % 10 == 0 || n % 10 >= 5 && n % 10 <= 9 || n % 100 >= 11 && n % 100 <= 14) ? 2 : 3))':
282 | // Numbers ending with 0 => case 2 ('many')
283 | // Numbers ending with 1 but not with 11 => case 0 ('one')
284 | // Numbers ending with 11 => case 2 ('many')
285 | // Numbers ending with 2 but not with 12 => case 1 ('few')
286 | // Numbers ending with 12 => case 2 ('many')
287 | // Numbers ending with 3 but not with 13 => case 1 ('few')
288 | // Numbers ending with 13 => case 2 ('many')
289 | // Numbers ending with 4 but not with 14 => case 1 ('few')
290 | // Numbers ending with 14 => case 2 ('many')
291 | // Numbers ending with 5 => case 2 ('many')
292 | // Numbers ending with 6 => case 2 ('many')
293 | // Numbers ending with 7 => case 2 ('many')
294 | // Numbers ending with 8 => case 2 ('many')
295 | // Numbers ending with 9 => case 2 ('many')
296 | // => the 'other' case never occurs: use 'other' for 'many'
297 | $removeCategoriesWithoutExamples = true;
298 | break;
299 | case '(n == 1) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : ((n != 1 && (n % 10 == 0 || n % 10 == 1) || n % 10 >= 5 && n % 10 <= 9 || n % 100 >= 12 && n % 100 <= 14) ? 2 : 3))':
300 | // Numbers ending with 0 => case 2 ('many')
301 | // Numbers ending with 1 but not number 1 => case 2 ('many')
302 | // Number 1 => case 0 ('one')
303 | // Numbers ending with 2 but not with 12 => case 1 ('few')
304 | // Numbers ending with 12 => case 2 ('many')
305 | // Numbers ending with 3 but not with 13 => case 1 ('few')
306 | // Numbers ending with 13 => case 2 ('many')
307 | // Numbers ending with 4 but not with 14 => case 1 ('few')
308 | // Numbers ending with 14 => case 2 ('many')
309 | // Numbers ending with 5 => case 2 ('many')
310 | // Numbers ending with 6 => case 2 ('many')
311 | // Numbers ending with 7 => case 2 ('many')
312 | // Numbers ending with 8 => case 2 ('many')
313 | // Numbers ending with 9 => case 2 ('many')
314 | // => the 'other' case never occurs: use 'other' for 'many'
315 | $removeCategoriesWithoutExamples = true;
316 | break;
317 | }
318 | }
319 | if (!$removeCategoriesWithoutExamples) {
320 | throw new Exception("Unhandled case of plural categories without examples '" . implode(', ', $badCategoriesIds) . "' out of '" . implode(', ', $allCategoriesIds) . "'");
321 | }
322 | if ($badCategories[count($badCategories) - 1]->id === CldrData::OTHER_CATEGORY) {
323 | // We're removing the 'other' cagory: let's change the last good category to 'other'
324 | $lastGood = $goodCategories[count($goodCategories) - 1];
325 | $lastGood->id = CldrData::OTHER_CATEGORY;
326 | $lastGood->formula = null;
327 | }
328 | $this->categories = $goodCategories;
329 | }
330 |
331 | /**
332 | * Reverse a formula.
333 | *
334 | * @param string $formula
335 | *
336 | * @throws \Exception
337 | *
338 | * @return string
339 | */
340 | private static function reverseFormula($formula)
341 | {
342 | if (preg_match('/^n( % \d+)? == \d+(\.\.\d+|,\d+)*?$/', $formula)) {
343 | return str_replace(' == ', ' != ', $formula);
344 | }
345 | if (preg_match('/^n( % \d+)? != \d+(\.\.\d+|,\d+)*?$/', $formula)) {
346 | return str_replace(' != ', ' == ', $formula);
347 | }
348 | if (preg_match('/^\(?n == \d+ \|\| n == \d+\)?$/', $formula)) {
349 | return trim(str_replace(array(' == ', ' || '), array(' != ', ' && '), $formula), '()');
350 | }
351 | $m = null;
352 | if (preg_match('/^(n(?: % \d+)?) == (\d+) && (n(?: % \d+)?) != (\d+)$/', $formula, $m)) {
353 | return "{$m[1]} != {$m[2]} || {$m[3]} == {$m[4]}";
354 | }
355 | switch ($formula) {
356 | case '(n == 1 || n == 2 || n == 3) || n % 10 != 4 && n % 10 != 6 && n % 10 != 9':
357 | return 'n != 1 && n != 2 && n != 3 && (n % 10 == 4 || n % 10 == 6 || n % 10 == 9)';
358 | case '(n == 0 || n == 1) || n >= 11 && n <= 99':
359 | return 'n >= 2 && (n < 11 || n > 99)';
360 | }
361 | throw new Exception("Unable to reverse the formula '{$formula}'");
362 | }
363 |
364 | /**
365 | * Reduce some excessively complex formulas.
366 | *
367 | * @param string $formula
368 | *
369 | * @return string
370 | */
371 | private static function reduceFormula($formula)
372 | {
373 | $map = array(
374 | 'n != 0 && n != 1' => 'n > 1',
375 | '(n == 0 || n == 1) && n != 0' => 'n == 1',
376 | );
377 |
378 | return isset($map[$formula]) ? $map[$formula] : $formula;
379 | }
380 |
381 | /**
382 | * Take one variable and, if it's a string, we transliterate it to US-ASCII.
383 | *
384 | * @param mixed $value the variable to work on
385 | *
386 | * @throws \Exception
387 | */
388 | private static function asciifier(&$value)
389 | {
390 | if (is_string($value) && $value !== '') {
391 | // Avoid converting from 'Ÿ' to '"Y', let's prefer 'Y'
392 | $value = strtr($value, array(
393 | 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A',
394 | 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E',
395 | 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I',
396 | 'Ñ' => 'N',
397 | 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O',
398 | 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U',
399 | 'Ÿ' => 'Y', 'Ý' => 'Y',
400 | 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a',
401 | 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e',
402 | 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i',
403 | 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o',
404 | 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u',
405 | 'ý' => 'y', 'ÿ' => 'y',
406 | '…' => '...',
407 | 'ʼ' => "'", '’' => "'",
408 | ));
409 | }
410 | }
411 | }
412 |
--------------------------------------------------------------------------------
/bin/import-cldr-data:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env php
2 | outputDir) && !mkdir($options->outputDir, 0777, true)) {
28 | throw new RuntimeException("Cannot create output directory: {$options->outputDir}\n");
29 | }
30 | $options->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', realpath($options->outputDir));
31 | $documentStorage = new DocumentStorage($options);
32 | echo 'Processing languages... ';
33 | $languages = new Languages($options, $documentStorage);
34 | echo "done.\n";
35 | echo 'Processing scripts... ';
36 | $scripts = new Scripts($options, $documentStorage);
37 | echo "done.\n";
38 | echo 'Processing territories... ';
39 | $territories = new Territories($options, $documentStorage);
40 | echo "done.\n";
41 | echo 'Processing plural rules... ';
42 | $plurals = new Plurals($options, $documentStorage, $languages);
43 | echo "done.\n";
44 | echo 'Saving... ';
45 |
46 | $languages->save();
47 | $scripts->save();
48 | $territories->save();
49 | $plurals->save();
50 |
51 | echo "done.\n";
52 | }
53 |
54 | class Options
55 | {
56 | /**
57 | * @var string
58 | */
59 | public $cldrVersion;
60 |
61 | /**
62 | * @var string
63 | */
64 | public $outputDir;
65 |
66 | public function __construct(array $argv)
67 | {
68 | if (array_intersect($argv, array('-h', '--help'))) {
69 | $this->showSyntax($argv[0], 0);
70 | }
71 | $this->outputDir = $this->getDefaultOutputDir();
72 | switch (count($argv)) {
73 | case 3:
74 | $this->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', $argv[2]);
75 | // no break
76 | case 2:
77 | $this->cldrVersion = $argv[1];
78 | if (!preg_match('/^\d+(\.\d+)?(-(alpha|beta)\d+)?$/', $this->cldrVersion)) {
79 | throw new RuntimeException("{$this->cldrVersion} is not a valid CLDR version identifier");
80 | }
81 | break;
82 | default:
83 | $this->showSyntax($argv[0], 1);
84 | }
85 | }
86 |
87 | /**
88 | * @param string $programName
89 | * @param int $exitCode
90 | *
91 | * @return never
92 | */
93 | private function showSyntax($programName, $exitCode)
94 | {
95 | $programName = str_replace('/', DIRECTORY_SEPARATOR, $programName);
96 | $defaultOutputDir = str_replace('/', DIRECTORY_SEPARATOR, $this->getDefaultOutputDir());
97 |
98 | echo << [output-dir]
100 |
101 | Arguments:
102 | cldr-version: the version of the CLDR data.
103 | Examples:
104 | 47
105 | 47-beta2
106 | 47-alpha1
107 | 46.1
108 | 46.1-beta1
109 | output-dir: the directory where the data will be written to
110 | Default: {$defaultOutputDir}
111 |
112 | EOT;
113 | exit($exitCode);
114 | }
115 |
116 | /**
117 | * @return string
118 | */
119 | private function getDefaultOutputDir()
120 | {
121 | return str_replace(DIRECTORY_SEPARATOR, '/', dirname(__DIR__)) . '/src/cldr-data';
122 | }
123 | }
124 |
125 | class DocumentStorage
126 | {
127 | /**
128 | * @var string
129 | */
130 | private $baseUrl;
131 |
132 | private $context;
133 |
134 | private $cache;
135 |
136 | public function __construct(Options $options)
137 | {
138 | $this->baseUrl = 'https://raw.githubusercontent.com/unicode-org/cldr/refs/tags/release-' . str_replace('.', '-', $options->cldrVersion);
139 | $this->context = stream_context_create(array(
140 | 'http' => array(
141 | 'follow_location' => 1,
142 | 'ignore_errors' => false,
143 | ),
144 | ));
145 | $this->cache = array();
146 | }
147 |
148 | /**
149 | * @param string $path
150 | *
151 | * @throws RuntimeException
152 | *
153 | * @return DOMDocument
154 | */
155 | public function get($path)
156 | {
157 | if (!isset($this->cache[$path])) {
158 | $xml = $this->fetch($path);
159 | $doc = $this->loadXml($xml);
160 | $this->cache[$path] = $doc;
161 | }
162 |
163 | return $this->cache[$path];
164 | }
165 |
166 | /**
167 | * @param string $path
168 | *
169 | * @throws RuntimeException
170 | *
171 | * @return string
172 | */
173 | private function fetch($path)
174 | {
175 | $url = $this->baseUrl . '/' . ltrim($path, '/');
176 | set_error_handler(function () {}, -1);
177 | $content = file_get_contents($url, false, $this->context);
178 | restore_error_handler();
179 | if ($content === false) {
180 | $details = '';
181 | /** @var array $http_response_header */
182 | if (!empty($http_response_header)) {
183 | $details = " - {$http_response_header[0]}";
184 | }
185 | throw new RuntimeException("Failed to download from {$url}{$details}");
186 | }
187 |
188 | return $content;
189 | }
190 |
191 | /**
192 | * @param string $xml
193 | *
194 | * @throws RuntimeException
195 | *
196 | * @return DOMDocument
197 | */
198 | private function loadXml($xml)
199 | {
200 | $doc = new DOMDocument();
201 | libxml_clear_errors();
202 | $restore = libxml_use_internal_errors(true);
203 | $loaded = $doc->loadXML($xml);
204 | $errors = libxml_get_errors();
205 | libxml_use_internal_errors($restore);
206 | $lines = array();
207 | foreach ($errors as $error) {
208 | $lines[] = "{$error->message} at line {$error->line}";
209 | }
210 | if (!$loaded || $errors !== array()) {
211 | throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines));
212 | }
213 |
214 | return $doc;
215 | }
216 | }
217 |
218 | abstract class Processor
219 | {
220 | /**
221 | * @var Options
222 | */
223 | protected $options;
224 |
225 | /**
226 | * @var array
227 | */
228 | protected $data;
229 |
230 | /**
231 | * @var DocumentStorage
232 | */
233 | private $documentStorage;
234 |
235 | /**
236 | * @var string
237 | */
238 | private $path;
239 |
240 | protected function __construct(Options $options, DocumentStorage $documentStorage, $path)
241 | {
242 | $this->options = $options;
243 | $this->documentStorage = $documentStorage;
244 | $this->path = ltrim($path, '/');
245 | $doc = $this->documentStorage->get($this->path);
246 | $this->data = $this->parse($doc);
247 | }
248 |
249 | /**
250 | * @return void
251 | */
252 | public function save()
253 | {
254 | $file = $this->getOutputFile();
255 | $dir = dirname($file);
256 | if (!is_dir($dir) && !mkdir($dir, 0777, true)) {
257 | throw new RuntimeException("Cannot create directory: {$dir}");
258 | }
259 | $flags = 0;
260 | if (defined('JSON_UNESCAPED_SLASHES')) {
261 | $flags |= JSON_UNESCAPED_SLASHES;
262 | }
263 | if (defined('JSON_UNESCAPED_UNICODE')) {
264 | $flags |= JSON_UNESCAPED_UNICODE;
265 | }
266 | if (defined('JSON_PRETTY_PRINT')) {
267 | $flags |= JSON_PRETTY_PRINT;
268 | }
269 | if (defined('JSON_THROW_ON_ERROR')) {
270 | $flags |= JSON_THROW_ON_ERROR;
271 | }
272 | $json = json_encode($this->data, $flags);
273 | if (!file_put_contents($file, $json)) {
274 | throw new RuntimeException("Failed to write to file: {$file}");
275 | }
276 | }
277 |
278 | /**
279 | * @return array
280 | */
281 | abstract protected function parse(DOMDocument $doc);
282 |
283 | /**
284 | * @return void
285 | */
286 | protected function sortByKeyWithPossiblyAlt(array &$data)
287 | {
288 | uksort($data, function ($a, $b) {
289 | $aAlt = strpos($a, '-alt-') !== false;
290 | $bAlt = strpos($b, '-alt-') !== false;
291 | if ($aAlt !== $bAlt) {
292 | if (strpos("{$a}-alt-", $b) === 0) {
293 | return 0;
294 | }
295 | if (strpos($a, "{$b}-alt-") === 0) {
296 | return -1;
297 | }
298 | }
299 |
300 | return strcasecmp($a, $b);
301 | });
302 | }
303 |
304 | /**
305 | * @return string
306 | */
307 | abstract protected function getOutputRelativeFileName();
308 |
309 | /**
310 | * @param string $xml
311 | *
312 | * @return DOMDocument
313 | */
314 | private function loadXml($xml)
315 | {
316 | $doc = new DOMDocument();
317 | libxml_clear_errors();
318 | $restore = libxml_use_internal_errors(true);
319 | $loaded = $doc->loadXML($xml);
320 | $errors = libxml_get_errors();
321 | libxml_use_internal_errors($restore);
322 | $lines = array();
323 | foreach ($errors as $error) {
324 | $lines[] = "{$error->message} at line {$error->line}";
325 | }
326 | if (!$loaded || $errors !== array()) {
327 | throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines));
328 | }
329 | return $doc;
330 | }
331 |
332 | /**
333 | * @return string
334 | */
335 | private function getOutputFile()
336 | {
337 | return $this->options->outputDir . '/' . ltrim($this->getOutputRelativeFileName(), '/');
338 | }
339 | }
340 |
341 | class Plurals extends Processor
342 | {
343 | /**
344 | * @var Languages
345 | */
346 | private $languages;
347 |
348 | public function __construct(Options $options, DocumentStorage $documentStorage, Languages $languages)
349 | {
350 | $this->languages = $languages;
351 | parent::__construct($options, $documentStorage, 'common/supplemental/plurals.xml');
352 | }
353 |
354 | /**
355 | * {@inheritdoc}
356 | *
357 | * @see Processor::parse()
358 | */
359 | protected function parse(DOMDocument $doc)
360 | {
361 | $data = array();
362 | $xpath = new DOMXPath($doc);
363 | $xPluralRulesList = $xpath->query('/supplementalData/plurals[@type="cardinal"]/pluralRules');
364 | $definedLanguageIDs = $this->languages->getDefinedLanguageIDs();
365 | $knownMissingLanguages = array(
366 | 'guw', // Gun
367 | 'lld', // Dolomitic Ladin
368 | 'hnj', // Hmong Njua
369 | 'nah', // Nahuatl
370 | 'smi', // Sami
371 | );
372 | $replacements = array(
373 | 'in' => 'id', // Former Indonesian
374 | 'iw' => 'he', // Former Hebrew
375 | 'jw' => 'jv', // Former Javanese
376 | 'ji' => 'yi', // Former Yiddish
377 | 'mo' => 'ro-MD', // former Moldavian
378 | 'bh' => '', // Former Bihari: dismissed because it can be 'bho', 'mai' or 'mag'
379 | // Just a CLDR placeholder
380 | 'root' => '',
381 | );
382 | $unrecognizedLocaleCodes = array();
383 | foreach ($xPluralRulesList as $xPluralRules) {
384 | $locales = preg_split('/\s+/', (string) $xPluralRules->getAttribute('locales'), -1, PREG_SPLIT_NO_EMPTY);
385 | if ($locales === array()) {
386 | throw new RuntimeException('No locales found in pluralRules element');
387 | }
388 | $elements = array(
389 | 'pluralRule-count-zero' => null,
390 | 'pluralRule-count-one' => null,
391 | 'pluralRule-count-two' => null,
392 | 'pluralRule-count-few' => null,
393 | 'pluralRule-count-many' => null,
394 | 'pluralRule-count-other' => null,
395 | );
396 | foreach ($xPluralRules->childNodes as $xPluralRule) {
397 | if (!$xPluralRule instanceof DOMElement) {
398 | continue;
399 | }
400 | if ($xPluralRule->tagName !== 'pluralRule') {
401 | throw new RuntimeException("Unexpected element: {$xPluralRule->tagName}");
402 | }
403 | $count = (string) $xPluralRule->getAttribute('count');
404 | if ($count === '') {
405 | throw new RuntimeException('Missing count attribute');
406 | }
407 | $key = "pluralRule-count-{$count}";
408 | if (!array_key_exists($key, $elements)) {
409 | throw new RuntimeException("Unknown count: {$count}");
410 | }
411 | if ($elements[$key] !== null) {
412 | throw new RuntimeException("Duplicate count: {$count}");
413 | }
414 | $elements[$key] = $xPluralRule->textContent;
415 | }
416 | $elements = array_filter($elements, function ($value) {
417 | return $value !== null;
418 | });
419 | if ($elements === array()) {
420 | throw new RuntimeException('No plural rules found');
421 | }
422 | foreach ($locales as $locale) {
423 | $locale = str_replace('_', '-', $locale);
424 | $overwrite = true;
425 | if (isset($data[$locale]) && array_search($locale, $replacements, true) === false) {
426 | throw new RuntimeException("Duplicate locale: {$locale}");
427 | }
428 | if (!in_array($locale, $definedLanguageIDs, true) && !in_array($locale, $knownMissingLanguages, true)) {
429 | if (!isset($replacements[$locale])) {
430 | $unrecognizedLocaleCodes[] = $locale;
431 | continue;
432 | }
433 | $locale = $replacements[$locale];
434 | if ($locale === '') {
435 | continue;
436 | }
437 | $overwrite = false;
438 | }
439 | if ($overwrite || !isset($data[$locale])) {
440 | $data[$locale] = $elements;
441 | }
442 | }
443 | }
444 | if ($unrecognizedLocaleCodes !== array()) {
445 | throw new RuntimeException("The following locales are not defined:\n- " . implode("\n- ", $unrecognizedLocaleCodes));
446 | }
447 | if ($data === array()) {
448 | throw new RuntimeException('No plural rules found');
449 | }
450 | $this->sortByKeyWithPossiblyAlt($data);
451 |
452 | return array(
453 | 'supplemental' => array(
454 | 'version' => array(
455 | '_cldrVersion' => $this->options->cldrVersion,
456 | ),
457 | 'plurals-type-cardinal' => $data,
458 | ),
459 | );
460 | }
461 |
462 | /**
463 | * {@inheritdoc}
464 | *
465 | * @see Processor::getOutputRelativeFileName()
466 | */
467 | protected function getOutputRelativeFileName()
468 | {
469 | return 'supplemental/plurals.json';
470 | }
471 | }
472 |
473 | abstract class LocaleDisplayName extends Processor
474 | {
475 | public function __construct(Options $options, DocumentStorage $documentStorage)
476 | {
477 | parent::__construct($options, $documentStorage, 'common/main/en.xml');
478 | }
479 |
480 | /**
481 | * {@inheritdoc}
482 | *
483 | * @see Processor::parse()
484 | */
485 | protected function parse(DOMDocument $doc)
486 | {
487 | $data = array();
488 | $xpath = new DOMXPath($doc);
489 | $xElementList = $xpath->query($this->getXPathSelector());
490 | foreach ($xElementList as $xElement) {
491 | $type = (string) $xElement->getAttribute('type');
492 | if ($type === '') {
493 | throw new RuntimeException('Missing type attribute');
494 | }
495 | $key = str_replace('_', '-', $type);
496 | $alt = (string) $xElement->getAttribute('alt');
497 | if ($alt !== '') {
498 | $key = "{$key}-alt-{$alt}";
499 | }
500 | if (isset($data[$key])) {
501 | throw new RuntimeException("Duplicate key: {$key}");
502 | }
503 | $data[$key] = (string) $xElement->textContent;
504 | }
505 | if ($data === array()) {
506 | throw new RuntimeException('No elements found');
507 | }
508 | $this->sortByKeyWithPossiblyAlt($data);
509 |
510 | return array(
511 | 'main' => array(
512 | 'en-US' => array(
513 | 'identity' => array(
514 | 'version' => array(
515 | '_cldrVersion' => $this->options->cldrVersion,
516 | ),
517 | 'language' => 'en',
518 | 'territory' => 'US',
519 | ),
520 | 'localeDisplayNames' => array(
521 | $this->getExportedNodeName() => $data,
522 | ),
523 | ),
524 | ),
525 | );
526 | }
527 |
528 | /**
529 | * @return string
530 | */
531 | abstract protected function getXPathSelector();
532 |
533 | /**
534 | * @return string
535 | */
536 | abstract protected function getExportedNodeName();
537 | }
538 |
539 | class Languages extends LocaleDisplayName
540 | {
541 | /**
542 | * @return string[]
543 | */
544 | public function getDefinedLanguageIDs()
545 | {
546 | return array_values(array_filter(
547 | array_keys($this->data['main']['en-US']['localeDisplayNames'][$this->getExportedNodeName()]),
548 | function ($key) {
549 | return strpos((string) $key, '-alt-') === false;
550 | }
551 | ));
552 | }
553 |
554 | /**
555 | * {@inheritdoc}
556 | *
557 | * @see LocaleDisplayName::getXPathSelector()
558 | */
559 | protected function getXPathSelector()
560 | {
561 | return '/ldml/localeDisplayNames/languages/language';
562 | }
563 |
564 | /**
565 | * {@inheritdoc}
566 | *
567 | * @see LocaleDisplayName::getExportedNodeName()
568 | */
569 | protected function getExportedNodeName()
570 | {
571 | return 'languages';
572 | }
573 |
574 | /**
575 | * {@inheritdoc}
576 | *
577 | * @see Processor::getOutputRelativeFileName()
578 | */
579 | protected function getOutputRelativeFileName()
580 | {
581 | return 'main/en-US/languages.json';
582 | }
583 | }
584 |
585 | class Scripts extends LocaleDisplayName
586 | {
587 | /**
588 | * {@inheritdoc}
589 | *
590 | * @see LocaleDisplayName::getXPathSelector()
591 | */
592 | protected function getXPathSelector()
593 | {
594 | return '/ldml/localeDisplayNames/scripts/script';
595 | }
596 |
597 | /**
598 | * {@inheritdoc}
599 | *
600 | * @see LocaleDisplayName::getExportedNodeName()
601 | */
602 | protected function getExportedNodeName()
603 | {
604 | return 'scripts';
605 | }
606 |
607 | /**
608 | * {@inheritdoc}
609 | *
610 | * @see Processor::getOutputRelativeFileName()
611 | */
612 | protected function getOutputRelativeFileName()
613 | {
614 | return 'main/en-US/scripts.json';
615 | }
616 | }
617 |
618 | class Territories extends LocaleDisplayName
619 | {
620 | /**
621 | * {@inheritdoc}
622 | *
623 | * @see LocaleDisplayName::getXPathSelector()
624 | */
625 | protected function getXPathSelector()
626 | {
627 | return '/ldml/localeDisplayNames/territories/territory';
628 | }
629 |
630 | /**
631 | * {@inheritdoc}
632 | *
633 | * @see LocaleDisplayName::getExportedNodeName()
634 | */
635 | protected function getExportedNodeName()
636 | {
637 | return 'territories';
638 | }
639 |
640 | /**
641 | * {@inheritdoc}
642 | *
643 | * @see Processor::getOutputRelativeFileName()
644 | */
645 | protected function getOutputRelativeFileName()
646 | {
647 | return 'main/en-US/territories.json';
648 | }
649 | }
650 |
651 | try {
652 | main($argv);
653 | } catch (RuntimeException $e) {
654 | fwrite(STDERR, $e->getMessage() . "\n");
655 | exit(1);
656 | }
657 |
--------------------------------------------------------------------------------
/src/cldr-data/main/en-US/languages.json:
--------------------------------------------------------------------------------
1 | {
2 | "main": {
3 | "en-US": {
4 | "identity": {
5 | "version": {
6 | "_cldrVersion": "47"
7 | },
8 | "language": "en",
9 | "territory": "US"
10 | },
11 | "localeDisplayNames": {
12 | "languages": {
13 | "aa": "Afar",
14 | "ab": "Abkhazian",
15 | "ace": "Acehnese",
16 | "ach": "Acoli",
17 | "ada": "Adangme",
18 | "ady": "Adyghe",
19 | "ae": "Avestan",
20 | "aeb": "Tunisian Arabic",
21 | "af": "Afrikaans",
22 | "afh": "Afrihili",
23 | "agq": "Aghem",
24 | "ain": "Ainu",
25 | "ak": "Akan",
26 | "akk": "Akkadian",
27 | "akz": "Alabama",
28 | "ale": "Aleut",
29 | "aln": "Gheg Albanian",
30 | "alt": "Southern Altai",
31 | "am": "Amharic",
32 | "an": "Aragonese",
33 | "ang": "Old English",
34 | "ann": "Obolo",
35 | "anp": "Angika",
36 | "ar": "Arabic",
37 | "ar-001": "Modern Standard Arabic",
38 | "arc": "Aramaic",
39 | "arn": "Mapuche",
40 | "aro": "Araona",
41 | "arp": "Arapaho",
42 | "arq": "Algerian Arabic",
43 | "ars": "Najdi Arabic",
44 | "ars-alt-menu": "Arabic, Najdi",
45 | "arw": "Arawak",
46 | "ary": "Moroccan Arabic",
47 | "arz": "Egyptian Arabic",
48 | "as": "Assamese",
49 | "asa": "Asu",
50 | "ase": "American Sign Language",
51 | "ast": "Asturian",
52 | "atj": "Atikamekw",
53 | "av": "Avaric",
54 | "avk": "Kotava",
55 | "awa": "Awadhi",
56 | "ay": "Aymara",
57 | "az": "Azerbaijani",
58 | "az-alt-short": "Azeri",
59 | "ba": "Bashkir",
60 | "bal": "Baluchi",
61 | "ban": "Balinese",
62 | "bar": "Bavarian",
63 | "bas": "Basaa",
64 | "bax": "Bamun",
65 | "bbc": "Batak Toba",
66 | "bbj": "Ghomala",
67 | "be": "Belarusian",
68 | "bej": "Beja",
69 | "bem": "Bemba",
70 | "bew": "Betawi",
71 | "bez": "Bena",
72 | "bfd": "Bafut",
73 | "bfq": "Badaga",
74 | "bg": "Bulgarian",
75 | "bgc": "Haryanvi",
76 | "bgn": "Western Balochi",
77 | "bho": "Bhojpuri",
78 | "bi": "Bislama",
79 | "bik": "Bikol",
80 | "bin": "Bini",
81 | "bjn": "Banjar",
82 | "bkm": "Kom",
83 | "bla": "Siksiká",
84 | "blo": "Anii",
85 | "blt": "Tai Dam",
86 | "bm": "Bambara",
87 | "bn": "Bangla",
88 | "bo": "Tibetan",
89 | "bpy": "Bishnupriya",
90 | "bqi": "Bakhtiari",
91 | "br": "Breton",
92 | "bra": "Braj",
93 | "brh": "Brahui",
94 | "brx": "Bodo",
95 | "bs": "Bosnian",
96 | "bss": "Akoose",
97 | "bua": "Buriat",
98 | "bug": "Buginese",
99 | "bum": "Bulu",
100 | "byn": "Blin",
101 | "byv": "Medumba",
102 | "ca": "Catalan",
103 | "cad": "Caddo",
104 | "car": "Carib",
105 | "cay": "Cayuga",
106 | "cch": "Atsam",
107 | "ccp": "Chakma",
108 | "ce": "Chechen",
109 | "ceb": "Cebuano",
110 | "cgg": "Chiga",
111 | "ch": "Chamorro",
112 | "chb": "Chibcha",
113 | "chg": "Chagatai",
114 | "chk": "Chuukese",
115 | "chm": "Mari",
116 | "chn": "Chinook Jargon",
117 | "cho": "Choctaw",
118 | "chp": "Chipewyan",
119 | "chr": "Cherokee",
120 | "chy": "Cheyenne",
121 | "cic": "Chickasaw",
122 | "ckb": "Central Kurdish",
123 | "ckb-alt-menu": "Kurdish, Central",
124 | "ckb-alt-variant": "Kurdish, Sorani",
125 | "clc": "Chilcotin",
126 | "co": "Corsican",
127 | "cop": "Coptic",
128 | "cps": "Capiznon",
129 | "cr": "Cree",
130 | "cr-alt-long": "Woods Cree",
131 | "crg": "Michif",
132 | "crh": "Crimean Tatar",
133 | "crj": "Southern East Cree",
134 | "crk": "Plains Cree",
135 | "crl": "Northern East Cree",
136 | "crm": "Moose Cree",
137 | "crr": "Carolina Algonquian",
138 | "crs": "Seselwa Creole French",
139 | "cs": "Czech",
140 | "csb": "Kashubian",
141 | "csw": "Swampy Cree",
142 | "cu": "Church Slavic",
143 | "cv": "Chuvash",
144 | "cy": "Welsh",
145 | "da": "Danish",
146 | "dak": "Dakota",
147 | "dar": "Dargwa",
148 | "dav": "Taita",
149 | "de": "German",
150 | "de-AT": "Austrian German",
151 | "de-CH": "Swiss High German",
152 | "del": "Delaware",
153 | "den": "Slave",
154 | "dgr": "Dogrib",
155 | "din": "Dinka",
156 | "dje": "Zarma",
157 | "doi": "Dogri",
158 | "dsb": "Lower Sorbian",
159 | "dtp": "Central Dusun",
160 | "dua": "Duala",
161 | "dum": "Middle Dutch",
162 | "dv": "Divehi",
163 | "dyo": "Jola-Fonyi",
164 | "dyu": "Dyula",
165 | "dz": "Dzongkha",
166 | "dzg": "Dazaga",
167 | "ebu": "Embu",
168 | "ee": "Ewe",
169 | "efi": "Efik",
170 | "egl": "Emilian",
171 | "egy": "Ancient Egyptian",
172 | "eka": "Ekajuk",
173 | "el": "Greek",
174 | "elx": "Elamite",
175 | "en": "English",
176 | "en-AU": "Australian English",
177 | "en-CA": "Canadian English",
178 | "en-GB": "British English",
179 | "en-GB-alt-short": "UK English",
180 | "en-US": "American English",
181 | "en-US-alt-short": "US English",
182 | "enm": "Middle English",
183 | "eo": "Esperanto",
184 | "es": "Spanish",
185 | "es-419": "Latin American Spanish",
186 | "es-ES": "European Spanish",
187 | "es-MX": "Mexican Spanish",
188 | "esu": "Central Yupik",
189 | "et": "Estonian",
190 | "eu": "Basque",
191 | "ewo": "Ewondo",
192 | "ext": "Extremaduran",
193 | "fa": "Persian",
194 | "fa-AF": "Dari",
195 | "fan": "Fang",
196 | "fat": "Fanti",
197 | "ff": "Fula",
198 | "fi": "Finnish",
199 | "fil": "Filipino",
200 | "fit": "Tornedalen Finnish",
201 | "fj": "Fijian",
202 | "fo": "Faroese",
203 | "fon": "Fon",
204 | "fr": "French",
205 | "fr-CA": "Canadian French",
206 | "fr-CH": "Swiss French",
207 | "frc": "Cajun French",
208 | "frm": "Middle French",
209 | "fro": "Old French",
210 | "frp": "Arpitan",
211 | "frr": "Northern Frisian",
212 | "frs": "Eastern Frisian",
213 | "fur": "Friulian",
214 | "fy": "Western Frisian",
215 | "ga": "Irish",
216 | "gaa": "Ga",
217 | "gag": "Gagauz",
218 | "gan": "Gan Chinese",
219 | "gay": "Gayo",
220 | "gba": "Gbaya",
221 | "gbz": "Zoroastrian Dari",
222 | "gd": "Scottish Gaelic",
223 | "gez": "Geez",
224 | "gil": "Gilbertese",
225 | "gl": "Galician",
226 | "glk": "Gilaki",
227 | "gmh": "Middle High German",
228 | "gn": "Guarani",
229 | "goh": "Old High German",
230 | "gon": "Gondi",
231 | "gor": "Gorontalo",
232 | "got": "Gothic",
233 | "grb": "Grebo",
234 | "grc": "Ancient Greek",
235 | "gsw": "Swiss German",
236 | "gu": "Gujarati",
237 | "guc": "Wayuu",
238 | "gur": "Frafra",
239 | "guz": "Gusii",
240 | "gv": "Manx",
241 | "gwi": "Gwichʼin",
242 | "ha": "Hausa",
243 | "hai": "Haida",
244 | "hak": "Hakka Chinese",
245 | "haw": "Hawaiian",
246 | "hax": "Southern Haida",
247 | "he": "Hebrew",
248 | "hi": "Hindi",
249 | "hi-Latn": "Hindi (Latin)",
250 | "hi-Latn-alt-variant": "Hinglish",
251 | "hif": "Fiji Hindi",
252 | "hil": "Hiligaynon",
253 | "hit": "Hittite",
254 | "hmn": "Hmong",
255 | "hnj": "Hmong Njua",
256 | "ho": "Hiri Motu",
257 | "hr": "Croatian",
258 | "hsb": "Upper Sorbian",
259 | "hsn": "Xiang Chinese",
260 | "ht": "Haitian Creole",
261 | "hu": "Hungarian",
262 | "hup": "Hupa",
263 | "hur": "Halkomelem",
264 | "hy": "Armenian",
265 | "hz": "Herero",
266 | "ia": "Interlingua",
267 | "iba": "Iban",
268 | "ibb": "Ibibio",
269 | "id": "Indonesian",
270 | "ie": "Interlingue",
271 | "ig": "Igbo",
272 | "ii": "Sichuan Yi",
273 | "ik": "Inupiaq",
274 | "ikt": "Western Canadian Inuktitut",
275 | "ilo": "Iloko",
276 | "inh": "Ingush",
277 | "io": "Ido",
278 | "is": "Icelandic",
279 | "it": "Italian",
280 | "iu": "Inuktitut",
281 | "izh": "Ingrian",
282 | "ja": "Japanese",
283 | "jam": "Jamaican Creole English",
284 | "jbo": "Lojban",
285 | "jgo": "Ngomba",
286 | "jmc": "Machame",
287 | "jpr": "Judeo-Persian",
288 | "jrb": "Judeo-Arabic",
289 | "jut": "Jutish",
290 | "jv": "Javanese",
291 | "ka": "Georgian",
292 | "kaa": "Kara-Kalpak",
293 | "kab": "Kabyle",
294 | "kac": "Kachin",
295 | "kaj": "Jju",
296 | "kam": "Kamba",
297 | "kaw": "Kawi",
298 | "kbd": "Kabardian",
299 | "kbl": "Kanembu",
300 | "kcg": "Tyap",
301 | "kde": "Makonde",
302 | "kea": "Kabuverdianu",
303 | "ken": "Kenyang",
304 | "kfo": "Koro",
305 | "kg": "Kongo",
306 | "kgp": "Kaingang",
307 | "kha": "Khasi",
308 | "kho": "Khotanese",
309 | "khq": "Koyra Chiini",
310 | "khw": "Khowar",
311 | "ki": "Kikuyu",
312 | "kiu": "Kirmanjki",
313 | "kj": "Kuanyama",
314 | "kk": "Kazakh",
315 | "kkj": "Kako",
316 | "kl": "Kalaallisut",
317 | "kln": "Kalenjin",
318 | "km": "Khmer",
319 | "kmb": "Kimbundu",
320 | "kn": "Kannada",
321 | "ko": "Korean",
322 | "koi": "Komi-Permyak",
323 | "kok": "Konkani",
324 | "kos": "Kosraean",
325 | "kpe": "Kpelle",
326 | "kr": "Kanuri",
327 | "krc": "Karachay-Balkar",
328 | "kri": "Krio",
329 | "krj": "Kinaray-a",
330 | "krl": "Karelian",
331 | "kru": "Kurukh",
332 | "ks": "Kashmiri",
333 | "ksb": "Shambala",
334 | "ksf": "Bafia",
335 | "ksh": "Colognian",
336 | "ku": "Kurdish",
337 | "kum": "Kumyk",
338 | "kut": "Kutenai",
339 | "kv": "Komi",
340 | "kw": "Cornish",
341 | "kwk": "Kwakʼwala",
342 | "kxv": "Kuvi",
343 | "ky": "Kyrgyz",
344 | "ky-alt-variant": "Kirghiz",
345 | "la": "Latin",
346 | "lad": "Ladino",
347 | "lag": "Langi",
348 | "lah": "Western Panjabi",
349 | "lam": "Lamba",
350 | "lb": "Luxembourgish",
351 | "lez": "Lezghian",
352 | "lfn": "Lingua Franca Nova",
353 | "lg": "Ganda",
354 | "li": "Limburgish",
355 | "lij": "Ligurian",
356 | "lil": "Lillooet",
357 | "liv": "Livonian",
358 | "lkt": "Lakota",
359 | "lmo": "Lombard",
360 | "ln": "Lingala",
361 | "lo": "Lao",
362 | "lol": "Mongo",
363 | "lou": "Louisiana Creole",
364 | "loz": "Lozi",
365 | "lrc": "Northern Luri",
366 | "lsm": "Saamia",
367 | "lt": "Lithuanian",
368 | "ltg": "Latgalian",
369 | "lu": "Luba-Katanga",
370 | "lua": "Luba-Lulua",
371 | "lui": "Luiseno",
372 | "lun": "Lunda",
373 | "luo": "Luo",
374 | "lus": "Mizo",
375 | "luy": "Luyia",
376 | "lv": "Latvian",
377 | "lzh": "Literary Chinese",
378 | "lzz": "Laz",
379 | "mad": "Madurese",
380 | "maf": "Mafa",
381 | "mag": "Magahi",
382 | "mai": "Maithili",
383 | "mak": "Makasar",
384 | "man": "Mandingo",
385 | "mas": "Masai",
386 | "mde": "Maba",
387 | "mdf": "Moksha",
388 | "mdr": "Mandar",
389 | "men": "Mende",
390 | "mer": "Meru",
391 | "mfe": "Morisyen",
392 | "mg": "Malagasy",
393 | "mga": "Middle Irish",
394 | "mgh": "Makhuwa-Meetto",
395 | "mgo": "Metaʼ",
396 | "mh": "Marshallese",
397 | "mi": "Māori",
398 | "mic": "Mi'kmaw",
399 | "min": "Minangkabau",
400 | "mk": "Macedonian",
401 | "ml": "Malayalam",
402 | "mn": "Mongolian",
403 | "mnc": "Manchu",
404 | "mni": "Manipuri",
405 | "moe": "Innu-aimun",
406 | "moh": "Mohawk",
407 | "mos": "Mossi",
408 | "mr": "Marathi",
409 | "mrj": "Western Mari",
410 | "ms": "Malay",
411 | "mt": "Maltese",
412 | "mua": "Mundang",
413 | "mul": "Multiple languages",
414 | "mus": "Muscogee",
415 | "mus-alt-official": "Mvskoke",
416 | "mus-alt-variant": "Muscogee",
417 | "mwl": "Mirandese",
418 | "mwr": "Marwari",
419 | "mwv": "Mentawai",
420 | "my": "Burmese",
421 | "my-alt-variant": "Myanmar Language",
422 | "mye": "Myene",
423 | "myv": "Erzya",
424 | "mzn": "Mazanderani",
425 | "na": "Nauru",
426 | "nan": "Min Nan Chinese",
427 | "nap": "Neapolitan",
428 | "naq": "Nama",
429 | "nb": "Norwegian Bokmål",
430 | "nd": "North Ndebele",
431 | "nds": "Low German",
432 | "nds-NL": "Low Saxon",
433 | "ne": "Nepali",
434 | "new": "Newari",
435 | "ng": "Ndonga",
436 | "nia": "Nias",
437 | "niu": "Niuean",
438 | "njo": "Ao Naga",
439 | "nl": "Dutch",
440 | "nl-BE": "Flemish",
441 | "nmg": "Kwasio",
442 | "nn": "Norwegian Nynorsk",
443 | "nnh": "Ngiemboon",
444 | "no": "Norwegian",
445 | "nog": "Nogai",
446 | "non": "Old Norse",
447 | "nov": "Novial",
448 | "nqo": "N’Ko",
449 | "nr": "South Ndebele",
450 | "nso": "Northern Sotho",
451 | "nus": "Nuer",
452 | "nv": "Navajo",
453 | "nwc": "Classical Newari",
454 | "ny": "Nyanja",
455 | "nym": "Nyamwezi",
456 | "nyn": "Nyankole",
457 | "nyo": "Nyoro",
458 | "nzi": "Nzima",
459 | "oc": "Occitan",
460 | "oj": "Ojibwa",
461 | "ojb": "Northwestern Ojibwa",
462 | "ojc": "Central Ojibwa",
463 | "ojs": "Oji-Cree",
464 | "ojw": "Western Ojibwa",
465 | "oka": "Okanagan",
466 | "om": "Oromo",
467 | "or": "Odia",
468 | "os": "Ossetic",
469 | "osa": "Osage",
470 | "ota": "Ottoman Turkish",
471 | "pa": "Punjabi",
472 | "pag": "Pangasinan",
473 | "pal": "Pahlavi",
474 | "pam": "Pampanga",
475 | "pap": "Papiamento",
476 | "pau": "Palauan",
477 | "pcd": "Picard",
478 | "pcm": "Nigerian Pidgin",
479 | "pdc": "Pennsylvania German",
480 | "pdt": "Plautdietsch",
481 | "peo": "Old Persian",
482 | "pfl": "Palatine German",
483 | "phn": "Phoenician",
484 | "pi": "Pali",
485 | "pis": "Pijin",
486 | "pl": "Polish",
487 | "pms": "Piedmontese",
488 | "pnt": "Pontic",
489 | "pon": "Pohnpeian",
490 | "pqm": "Maliseet-Passamaquoddy",
491 | "prg": "Prussian",
492 | "pro": "Old Provençal",
493 | "ps": "Pashto",
494 | "ps-alt-variant": "Pushto",
495 | "pt": "Portuguese",
496 | "pt-BR": "Brazilian Portuguese",
497 | "pt-PT": "European Portuguese",
498 | "qu": "Quechua",
499 | "quc": "Kʼicheʼ",
500 | "qug": "Chimborazo Highland Quichua",
501 | "raj": "Rajasthani",
502 | "rap": "Rapanui",
503 | "rar": "Rarotongan",
504 | "rgn": "Romagnol",
505 | "rhg": "Rohingya",
506 | "rif": "Riffian",
507 | "rm": "Romansh",
508 | "rn": "Rundi",
509 | "ro": "Romanian",
510 | "ro-MD": "Moldavian",
511 | "rof": "Rombo",
512 | "rom": "Romany",
513 | "rtm": "Rotuman",
514 | "ru": "Russian",
515 | "rue": "Rusyn",
516 | "rug": "Roviana",
517 | "rup": "Aromanian",
518 | "rw": "Kinyarwanda",
519 | "rwk": "Rwa",
520 | "sa": "Sanskrit",
521 | "sad": "Sandawe",
522 | "sah": "Yakut",
523 | "sam": "Samaritan Aramaic",
524 | "saq": "Samburu",
525 | "sas": "Sasak",
526 | "sat": "Santali",
527 | "saz": "Saurashtra",
528 | "sba": "Ngambay",
529 | "sbp": "Sangu",
530 | "sc": "Sardinian",
531 | "scn": "Sicilian",
532 | "sco": "Scots",
533 | "sd": "Sindhi",
534 | "sdc": "Sassarese Sardinian",
535 | "sdh": "Southern Kurdish",
536 | "se": "Northern Sami",
537 | "se-alt-menu": "Sami, Northern",
538 | "see": "Seneca",
539 | "seh": "Sena",
540 | "sei": "Seri",
541 | "sel": "Selkup",
542 | "ses": "Koyraboro Senni",
543 | "sg": "Sango",
544 | "sga": "Old Irish",
545 | "sgs": "Samogitian",
546 | "sh": "Serbo-Croatian",
547 | "shi": "Tachelhit",
548 | "shn": "Shan",
549 | "shu": "Chadian Arabic",
550 | "si": "Sinhala",
551 | "sid": "Sidamo",
552 | "sk": "Slovak",
553 | "sl": "Slovenian",
554 | "slh": "Southern Lushootseed",
555 | "sli": "Lower Silesian",
556 | "sly": "Selayar",
557 | "sm": "Samoan",
558 | "sma": "Southern Sami",
559 | "sma-alt-menu": "Sami, Southern",
560 | "smj": "Lule Sami",
561 | "smj-alt-menu": "Sami, Lule",
562 | "smn": "Inari Sami",
563 | "smn-alt-menu": "Sami, Inari",
564 | "sms": "Skolt Sami",
565 | "sms-alt-menu": "Sami, Skolt",
566 | "sn": "Shona",
567 | "snk": "Soninke",
568 | "so": "Somali",
569 | "sog": "Sogdien",
570 | "sq": "Albanian",
571 | "sr": "Serbian",
572 | "sr-ME": "Montenegrin",
573 | "srn": "Sranan Tongo",
574 | "srr": "Serer",
575 | "ss": "Swati",
576 | "ssy": "Saho",
577 | "st": "Southern Sotho",
578 | "stq": "Saterland Frisian",
579 | "str": "Straits Salish",
580 | "su": "Sundanese",
581 | "suk": "Sukuma",
582 | "sus": "Susu",
583 | "sux": "Sumerian",
584 | "sv": "Swedish",
585 | "sw": "Swahili",
586 | "sw-CD": "Congo Swahili",
587 | "swb": "Comorian",
588 | "syc": "Classical Syriac",
589 | "syr": "Syriac",
590 | "szl": "Silesian",
591 | "ta": "Tamil",
592 | "tce": "Southern Tutchone",
593 | "tcy": "Tulu",
594 | "te": "Telugu",
595 | "tem": "Timne",
596 | "teo": "Teso",
597 | "ter": "Tereno",
598 | "tet": "Tetum",
599 | "tg": "Tajik",
600 | "tgx": "Tagish",
601 | "th": "Thai",
602 | "tht": "Tahltan",
603 | "ti": "Tigrinya",
604 | "tig": "Tigre",
605 | "tiv": "Tiv",
606 | "tk": "Turkmen",
607 | "tkl": "Tokelau",
608 | "tkr": "Tsakhur",
609 | "tl": "Tagalog",
610 | "tlh": "Klingon",
611 | "tli": "Tlingit",
612 | "tly": "Talysh",
613 | "tmh": "Tamashek",
614 | "tn": "Tswana",
615 | "to": "Tongan",
616 | "tog": "Nyasa Tonga",
617 | "tok": "Toki Pona",
618 | "tpi": "Tok Pisin",
619 | "tr": "Turkish",
620 | "tru": "Turoyo",
621 | "trv": "Taroko",
622 | "trw": "Torwali",
623 | "ts": "Tsonga",
624 | "tsd": "Tsakonian",
625 | "tsi": "Tsimshian",
626 | "tt": "Tatar",
627 | "ttm": "Northern Tutchone",
628 | "ttt": "Muslim Tat",
629 | "tum": "Tumbuka",
630 | "tvl": "Tuvalu",
631 | "tw": "Twi",
632 | "twq": "Tasawaq",
633 | "ty": "Tahitian",
634 | "tyv": "Tuvinian",
635 | "tzm": "Central Atlas Tamazight",
636 | "udm": "Udmurt",
637 | "ug": "Uyghur",
638 | "ug-alt-variant": "Uighur",
639 | "uga": "Ugaritic",
640 | "uk": "Ukrainian",
641 | "umb": "Umbundu",
642 | "und": "Unknown language",
643 | "ur": "Urdu",
644 | "uz": "Uzbek",
645 | "vai": "Vai",
646 | "ve": "Venda",
647 | "vec": "Venetian",
648 | "vep": "Veps",
649 | "vi": "Vietnamese",
650 | "vls": "West Flemish",
651 | "vmf": "Main-Franconian",
652 | "vmw": "Makhuwa",
653 | "vo": "Volapük",
654 | "vot": "Votic",
655 | "vro": "Võro",
656 | "vun": "Vunjo",
657 | "wa": "Walloon",
658 | "wae": "Walser",
659 | "wal": "Wolaytta",
660 | "war": "Waray",
661 | "was": "Washo",
662 | "wbp": "Warlpiri",
663 | "wo": "Wolof",
664 | "wuu": "Wu Chinese",
665 | "xal": "Kalmyk",
666 | "xh": "Xhosa",
667 | "xmf": "Mingrelian",
668 | "xnr": "Kangri",
669 | "xog": "Soga",
670 | "yao": "Yao",
671 | "yap": "Yapese",
672 | "yav": "Yangben",
673 | "ybb": "Yemba",
674 | "yi": "Yiddish",
675 | "yo": "Yoruba",
676 | "yrl": "Nheengatu",
677 | "yue": "Cantonese",
678 | "yue-alt-menu": "Chinese, Cantonese",
679 | "za": "Zhuang",
680 | "zap": "Zapotec",
681 | "zbl": "Blissymbols",
682 | "zea": "Zeelandic",
683 | "zen": "Zenaga",
684 | "zgh": "Standard Moroccan Tamazight",
685 | "zh": "Chinese",
686 | "zh-alt-long": "Mandarin Chinese",
687 | "zh-alt-menu": "Chinese, Mandarin",
688 | "zh-Hans": "Simplified Chinese",
689 | "zh-Hans-alt-long": "Simplified Mandarin Chinese",
690 | "zh-Hant": "Traditional Chinese",
691 | "zh-Hant-alt-long": "Traditional Mandarin Chinese",
692 | "zu": "Zulu",
693 | "zun": "Zuni",
694 | "zxx": "No linguistic content",
695 | "zza": "Zaza"
696 | }
697 | }
698 | }
699 | }
700 | }
--------------------------------------------------------------------------------