├── bin
    ├── export-plural-rules.bat
    ├── import-cldr-data.bat
    ├── export-plural-rules
    └── import-cldr-data
├── src
    ├── autoloader.php
    ├── Exporter
    │   ├── Prettyjson.php
    │   ├── Po.php
    │   ├── Ruby.php
    │   ├── Php.php
    │   ├── Html.php
    │   ├── Xml.php
    │   ├── Json.php
    │   └── Exporter.php
    ├── Category.php
    ├── FormulaConverter.php
    ├── cldr-data
    │   └── main
    │   │   └── en-US
    │   │       ├── scripts.json
    │   │       ├── territories.json
    │   │       └── languages.json
    ├── CldrData.php
    └── Language.php
├── LICENSE
├── composer.json
└── UNICODE-LICENSE.txt


/bin/export-plural-rules.bat:
--------------------------------------------------------------------------------
1 | @php "%~dpn0" %*


--------------------------------------------------------------------------------
/bin/import-cldr-data.bat:
--------------------------------------------------------------------------------
1 | @php "%~dpn0" %*


--------------------------------------------------------------------------------
/src/autoloader.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | spl_autoload_register(
 4 |     function ($class) {
 5 |         if (strpos($class, 'Gettext\\Languages\\') !== 0) {
 6 |             return;
 7 |         }
 8 |         $file = __DIR__ . str_replace('\\', DIRECTORY_SEPARATOR, substr($class, strlen('Gettext\\Languages'))) . '.php';
 9 |         if (is_file($file)) {
10 |             require_once $file;
11 |         }
12 |     }
13 | );
14 | 


--------------------------------------------------------------------------------
/src/Exporter/Prettyjson.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | use Exception;
 6 | 
 7 | class Prettyjson extends Json
 8 | {
 9 |     /**
10 |      * {@inheritdoc}
11 |      *
12 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
13 |      */
14 |     public static function getDescription()
15 |     {
16 |         return 'Build an uncompressed JSON-encoded file (PHP 5.4 or later is needed)';
17 |     }
18 | 
19 |     /**
20 |      * {@inheritdoc}
21 |      *
22 |      * @see \Gettext\Languages\Exporter\Json::getEncodeOptions()
23 |      */
24 |     protected static function getEncodeOptions()
25 |     {
26 |         if (!(defined('\JSON_PRETTY_PRINT') && defined('\JSON_UNESCAPED_SLASHES') && defined('\JSON_UNESCAPED_UNICODE'))) {
27 |             throw new Exception('PHP 5.4 or later is required to export uncompressed JSON');
28 |         }
29 | 
30 |         return \JSON_PRETTY_PRINT | \JSON_UNESCAPED_SLASHES | \JSON_UNESCAPED_UNICODE;
31 |     }
32 | }
33 | 


--------------------------------------------------------------------------------
/src/Exporter/Po.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | use Exception;
 6 | 
 7 | class Po extends Exporter
 8 | {
 9 |     /**
10 |      * {@inheritdoc}
11 |      *
12 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
13 |      */
14 |     public static function getDescription()
15 |     {
16 |         return 'Build a string to be used for gettext .po files';
17 |     }
18 | 
19 |     /**
20 |      * {@inheritdoc}
21 |      *
22 |      * @see \Gettext\Languages\Exporter\Exporter::toStringDoWithOptions()
23 |      */
24 |     protected static function toStringDoWithOptions($languages, array $options)
25 |     {
26 |         if (count($languages) !== 1) {
27 |             throw new Exception('The ' . get_called_class() . ' exporter can only export one language');
28 |         }
29 |         $language = $languages[0];
30 |         $lines = array();
31 |         $lines[] = '"Language: ' . $language->id . '\n"';
32 |         $lines[] = '"Plural-Forms: nplurals=' . count($language->categories) . '; plural=' . $language->formula . '\n"';
33 |         $lines[] = '';
34 | 
35 |         return implode("\n", $lines);
36 |     }
37 | }
38 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Michele Locati
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/composer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "gettext/languages",
 3 |     "description": "gettext languages with plural rules",
 4 |     "keywords": [
 5 |         "localization",
 6 |         "l10n",
 7 |         "internationalization",
 8 |         "i18n",
 9 |         "translations",
10 |         "translate",
11 |         "php",
12 |         "unicode",
13 |         "cldr",
14 |         "language",
15 |         "languages",
16 |         "plural",
17 |         "plurals",
18 |         "plural rules"
19 |     ],
20 |     "homepage": "https://github.com/php-gettext/Languages",
21 |     "license": "MIT",
22 |     "authors": [
23 |         {
24 |             "name": "Michele Locati",
25 |             "email": "mlocati@gmail.com",
26 |             "role": "Developer"
27 |         }
28 |     ],
29 |     "autoload": {
30 |         "psr-4": {
31 |             "Gettext\\Languages\\": "src/"
32 |         }
33 |     },
34 |     "autoload-dev": {
35 |         "psr-4": {
36 |             "Gettext\\Languages\\Test\\": "tests/test/"
37 |         }
38 |     },
39 |     "require": {
40 |         "php": ">=5.3"
41 |     },
42 |     "require-dev": {
43 |         "phpunit/phpunit": "^4.8 || ^5.7 || ^6.5 || ^7.5 || ^8.4"
44 |     },
45 |     "scripts": {
46 |         "test": "phpunit"
47 |     },
48 |     "bin": [
49 |         "bin/export-plural-rules",
50 |         "bin/import-cldr-data"
51 |     ]
52 | }


--------------------------------------------------------------------------------
/src/Exporter/Ruby.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | class Ruby extends Exporter
 6 | {
 7 |     /**
 8 |      * {@inheritdoc}
 9 |      *
10 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
11 |      */
12 |     public static function getDescription()
13 |     {
14 |         return 'Build a Ruby hash';
15 |     }
16 | 
17 |     /**
18 |      * {@inheritdoc}
19 |      *
20 |      * @see \Gettext\Languages\Exporter\Exporter::toStringDoWithOptions()
21 |      */
22 |     protected static function toStringDoWithOptions($languages, array $options)
23 |     {
24 |         $lines = array();
25 |         $lines[] = 'PLURAL_RULES = {';
26 |         foreach ($languages as $lc) {
27 |             $lines[] = '  \'' . $lc->id . '\' => {';
28 |             $lines[] = '    \'name\' => \'' . addslashes($lc->name) . '\',';
29 |             if (isset($lc->supersededBy)) {
30 |                 $lines[] = '    \'supersededBy\' => \'' . $lc->supersededBy . '\',';
31 |             }
32 |             if (isset($lc->script)) {
33 |                 $lines[] = '    \'script\' => \'' . addslashes($lc->script) . '\',';
34 |             }
35 |             if (isset($lc->territory)) {
36 |                 $lines[] = '    \'territory\' => \'' . addslashes($lc->territory) . '\',';
37 |             }
38 |             if (isset($lc->baseLanguage)) {
39 |                 $lines[] = '    \'baseLanguage\' => \'' . addslashes($lc->baseLanguage) . '\',';
40 |             }
41 |             $lines[] = '    \'formula\' => \'' . $lc->formula . '\',';
42 |             $lines[] = '    \'plurals\' => ' . count($lc->categories) . ',';
43 |             $catNames = array();
44 |             foreach ($lc->categories as $c) {
45 |                 $catNames[] = "'{$c->id}'";
46 |             }
47 |             $lines[] = '    \'cases\' => [' . implode(', ', $catNames) . '],';
48 |             $lines[] = '    \'examples\' => {';
49 |             foreach ($lc->categories as $c) {
50 |                 $lines[] = '      \'' . $c->id . '\' => \'' . $c->examples . '\',';
51 |             }
52 |             $lines[] = '    },';
53 |             $lines[] = '  },';
54 |         }
55 |         $lines[] = '}';
56 |         $lines[] = '';
57 | 
58 |         return implode("\n", $lines);
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/Exporter/Php.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | class Php extends Exporter
 6 | {
 7 |     /**
 8 |      * {@inheritdoc}
 9 |      *
10 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
11 |      */
12 |     public static function getDescription()
13 |     {
14 |         return 'Build a PHP array';
15 |     }
16 | 
17 |     /**
18 |      * {@inheritdoc}
19 |      *
20 |      * @see \Gettext\Languages\Exporter\Exporter::toStringDoWithOptions()
21 |      */
22 |     protected static function toStringDoWithOptions($languages, array $options)
23 |     {
24 |         $lines = array();
25 |         $lines[] = '<?php';
26 |         $lines[] = 'return array(';
27 |         foreach ($languages as $lc) {
28 |             $lines[] = '    \'' . $lc->id . '\' => array(';
29 |             $lines[] = '        \'name\' => \'' . addslashes($lc->name) . '\',';
30 |             if (isset($lc->supersededBy)) {
31 |                 $lines[] = '        \'supersededBy\' => \'' . $lc->supersededBy . '\',';
32 |             }
33 |             if (isset($lc->script)) {
34 |                 $lines[] = '        \'script\' => \'' . addslashes($lc->script) . '\',';
35 |             }
36 |             if (isset($lc->territory)) {
37 |                 $lines[] = '        \'territory\' => \'' . addslashes($lc->territory) . '\',';
38 |             }
39 |             if (isset($lc->baseLanguage)) {
40 |                 $lines[] = '        \'baseLanguage\' => \'' . addslashes($lc->baseLanguage) . '\',';
41 |             }
42 |             $lines[] = '        \'formula\' => \'' . $lc->formula . '\',';
43 |             $lines[] = '        \'plurals\' => ' . count($lc->categories) . ',';
44 |             $catNames = array();
45 |             foreach ($lc->categories as $c) {
46 |                 $catNames[] = "'{$c->id}'";
47 |             }
48 |             $lines[] = '        \'cases\' => array(' . implode(', ', $catNames) . '),';
49 |             $lines[] = '        \'examples\' => array(';
50 |             foreach ($lc->categories as $c) {
51 |                 $lines[] = '            \'' . $c->id . '\' => \'' . $c->examples . '\',';
52 |             }
53 |             $lines[] = '        ),';
54 |             $lines[] = '    ),';
55 |         }
56 |         $lines[] = ');';
57 |         $lines[] = '';
58 | 
59 |         return implode("\n", $lines);
60 |     }
61 | }
62 | 


--------------------------------------------------------------------------------
/src/Exporter/Html.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | class Html extends Exporter
 6 | {
 7 |     /**
 8 |      * {@inheritdoc}
 9 |      *
10 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
11 |      */
12 |     public static function getDescription()
13 |     {
14 |         return 'Build a HTML table';
15 |     }
16 | 
17 |     /**
18 |      * {@inheritdoc}
19 |      *
20 |      * @see \Gettext\Languages\Exporter\Exporter::toStringDoWithOptions()
21 |      */
22 |     protected static function toStringDoWithOptions($languages, array $options)
23 |     {
24 |         $lines = array();
25 |         $lines[] = '<table>';
26 |         $lines[] = '    <thead>';
27 |         $lines[] = '        <tr>';
28 |         $lines[] = '            <th>Language code</th>';
29 |         $lines[] = '            <th>Language name</th>';
30 |         $lines[] = '            <th># plurals</th>';
31 |         $lines[] = '            <th>Formula</th>';
32 |         $lines[] = '            <th>Plurals</th>';
33 |         $lines[] = '        </tr>';
34 |         $lines[] = '    </thead>';
35 |         $lines[] = '    <tbody>';
36 |         foreach ($languages as $lc) {
37 |             $lines[] = '        <tr>';
38 |             $lines[] = '            <td>' . $lc->id . '</td>';
39 |             $name = self::h($lc->name);
40 |             if (isset($lc->supersededBy)) {
41 |                 $name .= '<br /><small><span>Superseded by</span> ' . $lc->supersededBy . '</small>';
42 |             }
43 |             $lines[] = '            <td>' . $name . '</td>';
44 |             $lines[] = '            <td>' . count($lc->categories) . '</td>';
45 |             $lines[] = '            <td>' . self::h($lc->formula) . '</td>';
46 |             $cases = array();
47 |             foreach ($lc->categories as $c) {
48 |                 $cases[] = '<li><span>' . $c->id . '</span><code>' . self::h($c->examples) . '</code></li>';
49 |             }
50 |             $lines[] = '            <td><ol start="0">' . implode('', $cases) . '</ol></td>';
51 |             $lines[] = '        </tr>';
52 |         }
53 |         $lines[] = '    </tbody>';
54 |         $lines[] = '</table>';
55 | 
56 |         return implode("\n", $lines);
57 |     }
58 | 
59 |     protected static function h($str)
60 |     {
61 |         return htmlspecialchars($str, ENT_COMPAT, 'UTF-8');
62 |     }
63 | }
64 | 


--------------------------------------------------------------------------------
/UNICODE-LICENSE.txt:
--------------------------------------------------------------------------------
 1 | UNICODE, INC. LICENSE AGREEMENT - DATA FILES AND SOFTWARE
 2 | 
 3 | See Terms of Use for definitions of Unicode Inc.'s
 4 | Data Files and Software.
 5 | 
 6 | NOTICE TO USER: Carefully read the following legal agreement.
 7 | BY DOWNLOADING, INSTALLING, COPYING OR OTHERWISE USING UNICODE INC.'S
 8 | DATA FILES ("DATA FILES"), AND/OR SOFTWARE ("SOFTWARE"),
 9 | YOU UNEQUIVOCALLY ACCEPT, AND AGREE TO BE BOUND BY, ALL OF THE
10 | TERMS AND CONDITIONS OF THIS AGREEMENT.
11 | IF YOU DO NOT AGREE, DO NOT DOWNLOAD, INSTALL, COPY, DISTRIBUTE OR USE
12 | THE DATA FILES OR SOFTWARE.
13 | 
14 | COPYRIGHT AND PERMISSION NOTICE
15 | 
16 | Copyright © 1991-2019 Unicode, Inc. All rights reserved.
17 | Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
18 | 
19 | Permission is hereby granted, free of charge, to any person obtaining
20 | a copy of the Unicode data files and any associated documentation
21 | (the "Data Files") or Unicode software and any associated documentation
22 | (the "Software") to deal in the Data Files or Software
23 | without restriction, including without limitation the rights to use,
24 | copy, modify, merge, publish, distribute, and/or sell copies of
25 | the Data Files or Software, and to permit persons to whom the Data Files
26 | or Software are furnished to do so, provided that either
27 | (a) this copyright and permission notice appear with all copies
28 | of the Data Files or Software, or
29 | (b) this copyright and permission notice appear in associated
30 | Documentation.
31 | 
32 | THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
33 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
34 | WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
35 | NONINFRINGEMENT OF THIRD PARTY RIGHTS.
36 | IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
37 | NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
38 | DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
39 | DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
40 | TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
41 | PERFORMANCE OF THE DATA FILES OR SOFTWARE.
42 | 
43 | Except as contained in this notice, the name of a copyright holder
44 | shall not be used in advertising or otherwise to promote the sale,
45 | use or other dealings in these Data Files or Software without prior
46 | written authorization of the copyright holder.
47 | 


--------------------------------------------------------------------------------
/src/Exporter/Xml.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | class Xml extends Exporter
 6 | {
 7 |     /**
 8 |      * {@inheritdoc}
 9 |      *
10 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
11 |      */
12 |     public static function getDescription()
13 |     {
14 |         return 'Build an XML file - schema available at https://php-gettext.github.io/Languages/GettextLanguages.xsd';
15 |     }
16 | 
17 |     /**
18 |      * {@inheritdoc}
19 |      *
20 |      * @see \Gettext\Languages\Exporter\Exporter::toStringDoWithOptions()
21 |      */
22 |     protected static function toStringDoWithOptions($languages, array $options)
23 |     {
24 |         $xml = new \DOMDocument('1.0', 'UTF-8');
25 |         $xml->loadXML('<languages
26 |             xmlns="https://github.com/mlocati/cldr-to-gettext-plural-rules"
27 |             xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
28 |             xsi:schemaLocation="https://github.com/mlocati/cldr-to-gettext-plural-rules https://php-gettext.github.io/Languages/GettextLanguages.xsd"
29 |         />');
30 |         $xLanguages = $xml->firstChild;
31 |         foreach ($languages as $language) {
32 |             $xLanguage = $xml->createElement('language');
33 |             $xLanguage->setAttribute('id', $language->id);
34 |             $xLanguage->setAttribute('name', $language->name);
35 |             if (isset($language->supersededBy)) {
36 |                 $xLanguage->setAttribute('supersededBy', $language->supersededBy);
37 |             }
38 |             if (isset($language->script)) {
39 |                 $xLanguage->setAttribute('script', $language->script);
40 |             }
41 |             if (isset($language->territory)) {
42 |                 $xLanguage->setAttribute('territory', $language->territory);
43 |             }
44 |             if (isset($language->baseLanguage)) {
45 |                 $xLanguage->setAttribute('baseLanguage', $language->baseLanguage);
46 |             }
47 |             $xLanguage->setAttribute('formula', $language->formula);
48 |             foreach ($language->categories as $category) {
49 |                 $xCategory = $xml->createElement('category');
50 |                 $xCategory->setAttribute('id', $category->id);
51 |                 $xCategory->setAttribute('examples', $category->examples);
52 |                 $xLanguage->appendChild($xCategory);
53 |             }
54 |             $xLanguages->appendChild($xLanguage);
55 |         }
56 |         $xml->formatOutput = true;
57 | 
58 |         return $xml->saveXML();
59 |     }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/Exporter/Json.php:
--------------------------------------------------------------------------------
 1 | <?php
 2 | 
 3 | namespace Gettext\Languages\Exporter;
 4 | 
 5 | class Json extends Exporter
 6 | {
 7 |     /**
 8 |      * {@inheritdoc}
 9 |      *
10 |      * @see \Gettext\Languages\Exporter\Exporter::getDescription()
11 |      */
12 |     public static function getDescription()
13 |     {
14 |         return 'Build a compressed JSON-encoded file';
15 |     }
16 | 
17 |     /**
18 |      * {@inheritdoc}
19 |      *
20 |      * @see \Gettext\Languages\Exporter\Exporter::supportsFormulasWithAndWithoutParenthesis()
21 |      */
22 |     public static function supportsFormulasWithAndWithoutParenthesis()
23 |     {
24 |         return true;
25 |     }
26 | 
27 |     /**
28 |      * Return the options for json_encode.
29 |      *
30 |      * @return int
31 |      */
32 |     protected static function getEncodeOptions()
33 |     {
34 |         $result = 0;
35 |         if (defined('\JSON_UNESCAPED_SLASHES')) {
36 |             $result |= \JSON_UNESCAPED_SLASHES;
37 |         }
38 |         if (defined('\JSON_UNESCAPED_UNICODE')) {
39 |             $result |= \JSON_UNESCAPED_UNICODE;
40 |         }
41 | 
42 |         return $result;
43 |     }
44 | 
45 |     /**
46 |      * {@inheritdoc}
47 |      *
48 |      * @see \Gettext\Languages\Exporter\Exporter::toStringDoWithOptions()
49 |      */
50 |     protected static function toStringDoWithOptions($languages, array $options)
51 |     {
52 |         $list = array();
53 |         foreach ($languages as $language) {
54 |             $item = array();
55 |             $item['name'] = $language->name;
56 |             if (isset($language->supersededBy)) {
57 |                 $item['supersededBy'] = $language->supersededBy;
58 |             }
59 |             if (isset($language->script)) {
60 |                 $item['script'] = $language->script;
61 |             }
62 |             if (isset($language->territory)) {
63 |                 $item['territory'] = $language->territory;
64 |             }
65 |             if (isset($language->baseLanguage)) {
66 |                 $item['baseLanguage'] = $language->baseLanguage;
67 |             }
68 |             if (!empty($options['both-formulas'])) {
69 |                 $item['formulas'] = array(
70 |                     'standard' => $language->buildFormula(true),
71 |                     'php' => $language->formula,
72 |                 );
73 |             } else {
74 |                 $item['formula'] = $language->formula;
75 |             }
76 |             $item['plurals'] = count($language->categories);
77 |             $item['cases'] = array();
78 |             $item['examples'] = array();
79 |             foreach ($language->categories as $category) {
80 |                 $item['cases'][] = $category->id;
81 |                 $item['examples'][$category->id] = $category->examples;
82 |             }
83 |             $list[$language->id] = $item;
84 |         }
85 | 
86 |         return json_encode($list, static::getEncodeOptions());
87 |     }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/Category.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Gettext\Languages;
  4 | 
  5 | use Exception;
  6 | 
  7 | /**
  8 |  * A helper class that handles a single category rules (eg 'zero', 'one', ...) and its formula and examples.
  9 |  */
 10 | class Category
 11 | {
 12 |     /**
 13 |      * The category identifier (eg 'zero', 'one', ..., 'other').
 14 |      *
 15 |      * @var string
 16 |      */
 17 |     public $id;
 18 | 
 19 |     /**
 20 |      * The gettext formula that identifies this category (null if and only if the category is 'other').
 21 |      *
 22 |      * @var string|null
 23 |      */
 24 |     public $formula;
 25 | 
 26 |     /**
 27 |      * The CLDR representation of some exemplar numeric ranges that satisfy this category.
 28 |      *
 29 |      * @var string|null
 30 |      */
 31 |     public $examples;
 32 | 
 33 |     /**
 34 |      * Initialize the instance and parse the formula.
 35 |      *
 36 |      * @param string $cldrCategoryId the CLDR category identifier (eg 'pluralRule-count-one')
 37 |      * @param string $cldrFormulaAndExamples the CLDR formula and examples (eg 'i = 1 and v = 0 @integer 1')
 38 |      *
 39 |      * @throws \Exception
 40 |      */
 41 |     public function __construct($cldrCategoryId, $cldrFormulaAndExamples)
 42 |     {
 43 |         $matches = array();
 44 |         if (!preg_match('/^pluralRule-count-(.+)$/', $cldrCategoryId, $matches)) {
 45 |             throw new Exception("Invalid CLDR category: '{$cldrCategoryId}'");
 46 |         }
 47 |         if (!in_array($matches[1], CldrData::$categories)) {
 48 |             throw new Exception("Invalid CLDR category: '{$cldrCategoryId}'");
 49 |         }
 50 |         $this->id = $matches[1];
 51 |         $cldrFormulaAndExamplesNormalized = trim(preg_replace('/\s+/', ' ', $cldrFormulaAndExamples));
 52 |         if (!preg_match('/^([^@]*)(?:@integer([^@]+))?(?:@decimal(?:[^@]+))?$/', $cldrFormulaAndExamplesNormalized, $matches)) {
 53 |             throw new Exception("Invalid CLDR category rule: {$cldrFormulaAndExamples}");
 54 |         }
 55 |         $cldrFormula = trim($matches[1]);
 56 |         $s = isset($matches[2]) ? trim($matches[2]) : '';
 57 |         $this->examples = ($s === '') ? null : $s;
 58 |         switch ($this->id) {
 59 |             case CldrData::OTHER_CATEGORY:
 60 |                 if ($cldrFormula !== '') {
 61 |                     throw new Exception("The '" . CldrData::OTHER_CATEGORY . "' category should not have any formula, but it has '{$cldrFormula}'");
 62 |                 }
 63 |                 $this->formula = null;
 64 |                 break;
 65 |             default:
 66 |                 if ($cldrFormula === '') {
 67 |                     throw new Exception("The '{$this->id}' category does not have a formula");
 68 |                 }
 69 |                 $this->formula = FormulaConverter::convertFormula($cldrFormula);
 70 |                 break;
 71 |         }
 72 |     }
 73 | 
 74 |     /**
 75 |      * Return a list of numbers corresponding to the $examples value.
 76 |      *
 77 |      * @throws \Exception throws an Exception if we weren't able to expand the examples
 78 |      *
 79 |      * @return int[]
 80 |      */
 81 |     public function getExampleIntegers()
 82 |     {
 83 |         return self::expandExamples($this->examples);
 84 |     }
 85 | 
 86 |     /**
 87 |      * Expand a list of examples as defined by CLDR.
 88 |      *
 89 |      * @param string $examples A string like '1, 2, 5...7, …'.
 90 |      *
 91 |      * @throws \Exception throws an Exception if we weren't able to expand $examples
 92 |      *
 93 |      * @return int[]
 94 |      */
 95 |     public static function expandExamples($examples)
 96 |     {
 97 |         $result = array();
 98 |         $m = null;
 99 |         if (substr($examples, -strlen(', …')) === ', …') {
100 |             $examples = substr($examples, 0, strlen($examples) - strlen(', …'));
101 |         }
102 |         foreach (explode(',', str_replace(' ', '', $examples)) as $range) {
103 |             if (preg_match('/^(?<num>\d+)((c|e)(?<exp>\d+))?$/', $range, $m)) {
104 |                 $result[] = (int) (isset($m['exp']) ? ($m['num'] . str_repeat('0', (int) $m['exp'])) : $range);
105 |             } elseif (preg_match('/^(\d+)~(\d+)$/', $range, $m)) {
106 |                 $from = (int) $m[1];
107 |                 $to = (int) $m[2];
108 |                 $delta = $to - $from;
109 |                 $step = (int) max(1, $delta / 100);
110 |                 for ($i = $from; $i < $to; $i += $step) {
111 |                     $result[] = $i;
112 |                 }
113 |                 $result[] = $to;
114 |             } else {
115 |                 throw new Exception("Unhandled test range '{$range}' in '{$examples}'");
116 |             }
117 |         }
118 |         if (empty($result)) {
119 |             throw new Exception("No test numbers from '{$examples}'");
120 |         }
121 | 
122 |         return $result;
123 |     }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/Exporter/Exporter.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Gettext\Languages\Exporter;
  4 | 
  5 | use Exception;
  6 | 
  7 | /**
  8 |  * Base class for all the exporters.
  9 |  */
 10 | abstract class Exporter
 11 | {
 12 |     /**
 13 |      * @var array
 14 |      */
 15 |     private static $exporters;
 16 | 
 17 |     /**
 18 |      * Return the list of all the available exporters. Keys are the exporter handles, values are the exporter class names.
 19 |      *
 20 |      * @param bool $onlyForPublicUse if true, internal exporters will be omitted
 21 |      *
 22 |      * @return string[]
 23 |      */
 24 |     final public static function getExporters($onlyForPublicUse = false)
 25 |     {
 26 |         if (!isset(self::$exporters)) {
 27 |             $exporters = array();
 28 |             $m = null;
 29 |             foreach (scandir(__DIR__) as $f) {
 30 |                 if (preg_match('/^(\w+)\.php$/', $f, $m)) {
 31 |                     if ($f !== basename(__FILE__)) {
 32 |                         $exporters[strtolower($m[1])] = $m[1];
 33 |                     }
 34 |                 }
 35 |             }
 36 |             self::$exporters = $exporters;
 37 |         }
 38 |         if ($onlyForPublicUse) {
 39 |             $result = array();
 40 |             foreach (self::$exporters as $handle => $class) {
 41 |                 if (call_user_func(self::getExporterClassName($handle) . '::isForPublicUse') === true) {
 42 |                     $result[$handle] = $class;
 43 |                 }
 44 |             }
 45 |         } else {
 46 |             $result = self::$exporters;
 47 |         }
 48 | 
 49 |         return $result;
 50 |     }
 51 | 
 52 |     /**
 53 |      * Return the description of a specific exporter.
 54 |      *
 55 |      * @param string $exporterHandle the handle of the exporter
 56 |      *
 57 |      * @throws \Exception throws an Exception if $exporterHandle is not valid
 58 |      *
 59 |      * @return string
 60 |      */
 61 |     final public static function getExporterDescription($exporterHandle)
 62 |     {
 63 |         $exporters = self::getExporters();
 64 |         if (!isset($exporters[$exporterHandle])) {
 65 |             throw new Exception("Invalid exporter handle: '{$exporterHandle}'");
 66 |         }
 67 | 
 68 |         return call_user_func(self::getExporterClassName($exporterHandle) . '::getDescription');
 69 |     }
 70 | 
 71 |     /**
 72 |      * Returns the fully qualified class name of a exporter given its handle.
 73 |      *
 74 |      * @param string $exporterHandle the exporter class handle
 75 |      *
 76 |      * @return string
 77 |      */
 78 |     final public static function getExporterClassName($exporterHandle)
 79 |     {
 80 |         return __NAMESPACE__ . '\\' . ucfirst(strtolower($exporterHandle));
 81 |     }
 82 | 
 83 |     /**
 84 |      * Convert a list of Language instances to string.
 85 |      *
 86 |      * @param \Gettext\Languages\Language[] $languages the Language instances to convert
 87 |      * @param array|null $options
 88 |      *
 89 |      * @return string
 90 |      */
 91 |     final public static function toString($languages, $options = null)
 92 |     {
 93 |         if (!isset($options) || !is_array($options)) {
 94 |             $options = array();
 95 |         }
 96 |         if (isset($options['us-ascii']) && $options['us-ascii']) {
 97 |             $asciiList = array();
 98 |             foreach ($languages as $language) {
 99 |                 $asciiList[] = $language->getUSAsciiClone();
100 |             }
101 |             $languages = $asciiList;
102 |         }
103 | 
104 |         return static::toStringDoWithOptions($languages, $options);
105 |     }
106 | 
107 |     /**
108 |      * Save the Language instances to a file.
109 |      *
110 |      * @param \Gettext\Languages\Language[] $languages the Language instances to convert
111 |      * @param array|null $options
112 |      *
113 |      * @throws \Exception
114 |      */
115 |     final public static function toFile($languages, $filename, $options = null)
116 |     {
117 |         $data = self::toString($languages, $options);
118 |         if (@file_put_contents($filename, $data) === false) {
119 |             throw new Exception("Error writing data to '{$filename}'");
120 |         }
121 |     }
122 | 
123 |     /**
124 |      * Is this exporter for public use?
125 |      *
126 |      * @return bool
127 |      */
128 |     public static function isForPublicUse()
129 |     {
130 |         return true;
131 |     }
132 | 
133 |     /**
134 |      * Does this exporter supports exporting formulas both with and without extra parenthesis?
135 |      *
136 |      * @return bool
137 |      */
138 |     public static function supportsFormulasWithAndWithoutParenthesis()
139 |     {
140 |         return false;
141 |     }
142 | 
143 |     /**
144 |      * Return a short description of the exporter.
145 |      *
146 |      * @return string
147 |      */
148 |     public static function getDescription()
149 |     {
150 |         throw new Exception(get_called_class() . ' does not implement the method ' . __FUNCTION__);
151 |     }
152 | 
153 |     /**
154 |      * Convert a list of Language instances to string.
155 |      *
156 |      * @param \Gettext\Languages\Language[] $languages the Language instances to convert
157 |      * @param array $options export options
158 |      *
159 |      * @return string
160 |      */
161 |     protected static function toStringDoWithOptions($languages, array $options)
162 |     {
163 |         if (method_exists(get_called_class(), 'toStringDo')) {
164 |             return static::toStringDo($languages);
165 |         }
166 |         throw new Exception(get_called_class() . ' does not implement the method ' . __FUNCTION__);
167 |     }
168 | }
169 | 


--------------------------------------------------------------------------------
/src/FormulaConverter.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Gettext\Languages;
  4 | 
  5 | use Exception;
  6 | 
  7 | /**
  8 |  * A helper class to convert a CLDR formula to a gettext formula.
  9 |  */
 10 | class FormulaConverter
 11 | {
 12 |     /**
 13 |      * Converts a formula from the CLDR representation to the gettext representation.
 14 |      *
 15 |      * @param string $cldrFormula the CLDR formula to convert
 16 |      *
 17 |      * @throws \Exception
 18 |      *
 19 |      * @return bool|string returns true if the gettext will always evaluate to true, false if gettext will always evaluate to false, return the gettext formula otherwise
 20 |      */
 21 |     public static function convertFormula($cldrFormula)
 22 |     {
 23 |         if (strpbrk($cldrFormula, '()') !== false) {
 24 |             throw new Exception("Unable to convert the formula '{$cldrFormula}': parenthesis handling not implemented");
 25 |         }
 26 |         $orSeparatedChunks = array();
 27 |         foreach (explode(' or ', $cldrFormula) as $cldrFormulaChunk) {
 28 |             $gettextFormulaChunk = null;
 29 |             $andSeparatedChunks = array();
 30 |             foreach (explode(' and ', $cldrFormulaChunk) as $cldrAtom) {
 31 |                 $gettextAtom = self::convertAtom($cldrAtom);
 32 |                 if ($gettextAtom === false) {
 33 |                     // One atom joined by 'and' always evaluates to false => the whole 'and' group is always false
 34 |                     $gettextFormulaChunk = false;
 35 |                     break;
 36 |                 }
 37 |                 if ($gettextAtom !== true) {
 38 |                     $andSeparatedChunks[] = $gettextAtom;
 39 |                 }
 40 |             }
 41 |             if (!isset($gettextFormulaChunk)) {
 42 |                 if (empty($andSeparatedChunks)) {
 43 |                     // All the atoms joined by 'and' always evaluate to true => the whole 'and' group is always true
 44 |                     $gettextFormulaChunk = true;
 45 |                 } else {
 46 |                     $gettextFormulaChunk = implode(' && ', $andSeparatedChunks);
 47 |                     // Special cases simplification
 48 |                     switch ($gettextFormulaChunk) {
 49 |                         case 'n >= 0 && n <= 2 && n != 2':
 50 |                             $gettextFormulaChunk = 'n == 0 || n == 1';
 51 |                             break;
 52 |                     }
 53 |                 }
 54 |             }
 55 |             if ($gettextFormulaChunk === true) {
 56 |                 // One part of the formula joined with the others by 'or' always evaluates to true => the whole formula always evaluates to true
 57 |                 return true;
 58 |             }
 59 |             if ($gettextFormulaChunk !== false) {
 60 |                 $orSeparatedChunks[] = $gettextFormulaChunk;
 61 |             }
 62 |         }
 63 |         if (empty($orSeparatedChunks)) {
 64 |             // All the parts joined by 'or' always evaluate to false => the whole formula always evaluates to false
 65 |             return false;
 66 |         }
 67 | 
 68 |         return implode(' || ', $orSeparatedChunks);
 69 |     }
 70 | 
 71 |     /**
 72 |      * Converts an atomic part of the CLDR formula to its gettext representation.
 73 |      *
 74 |      * @param string $cldrAtom the CLDR formula atom to convert
 75 |      *
 76 |      * @throws \Exception
 77 |      *
 78 |      * @return bool|string returns true if the gettext will always evaluate to true, false if gettext will always evaluate to false, return the gettext formula otherwise
 79 |      */
 80 |     private static function convertAtom($cldrAtom)
 81 |     {
 82 |         $m = null;
 83 |         $gettextAtom = $cldrAtom;
 84 |         $gettextAtom = str_replace(' = ', ' == ', $gettextAtom);
 85 |         $gettextAtom = str_replace('i', 'n', $gettextAtom);
 86 |         if (preg_match('/^n( % \d+)? (!=|==) \d+$/', $gettextAtom)) {
 87 |             return $gettextAtom;
 88 |         }
 89 |         if (preg_match('/^n( % \d+)? (!=|==) \d+(,\d+|\.\.\d+)+$/', $gettextAtom)) {
 90 |             return self::expandAtom($gettextAtom);
 91 |         }
 92 |         if (preg_match('/^(?:v|w)(?: % 10+)? == (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // For gettext: v == 0, w == 0
 93 |             return (int) $m[1] === 0 ? true : false;
 94 |         }
 95 |         if (preg_match('/^(?:v|w)(?: % 10+)? != (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // For gettext: v == 0, w == 0
 96 |             return (int) $m[1] === 0 ? false : true;
 97 |         }
 98 |         if (preg_match('/^(?:f|t|c|e)(?: % 10+)? == (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // f == empty, t == empty, c == empty, e == empty
 99 |             return (int) $m[1] === 0 ? true : false;
100 |         }
101 |         if (preg_match('/^(?:f|t|c|e)(?: % 10+)? != (\d+)(?:\.\.\d+)?$/', $gettextAtom, $m)) { // f == empty, t == empty, c == empty, e == empty
102 |             return (int) $m[1] === 0 ? false : true;
103 |         }
104 |         throw new Exception("Unable to convert the formula chunk '{$cldrAtom}' from CLDR to gettext");
105 |     }
106 | 
107 |     /**
108 |      * Expands an atom containing a range (for instance: 'n == 1,3..5').
109 |      *
110 |      * @param string $atom
111 |      *
112 |      * @throws \Exception
113 |      *
114 |      * @return string
115 |      */
116 |     private static function expandAtom($atom)
117 |     {
118 |         $m = null;
119 |         if (preg_match('/^(n(?: % \d+)?) (==|!=) (\d+(?:\.\.\d+|,\d+)+)$/', $atom, $m)) {
120 |             $what = $m[1];
121 |             $op = $m[2];
122 |             $chunks = array();
123 |             foreach (explode(',', $m[3]) as $range) {
124 |                 $chunk = null;
125 |                 if ((!isset($chunk)) && preg_match('/^\d+$/', $range)) {
126 |                     $chunk = "{$what} {$op} {$range}";
127 |                 }
128 |                 if ((!isset($chunk)) && preg_match('/^(\d+)\.\.(\d+)$/', $range, $m)) {
129 |                     $from = (int) $m[1];
130 |                     $to = (int) $m[2];
131 |                     if (($to - $from) === 1) {
132 |                         switch ($op) {
133 |                             case '==':
134 |                                 $chunk = "({$what} == {$from} || {$what} == {$to})";
135 |                                 break;
136 |                             case '!=':
137 |                                 $chunk = "{$what} != {$from} && {$what} == {$to}";
138 |                                 break;
139 |                         }
140 |                     } else {
141 |                         switch ($op) {
142 |                             case '==':
143 |                                 $chunk = "{$what} >= {$from} && {$what} <= {$to}";
144 |                                 break;
145 |                             case '!=':
146 |                                 if ($what === 'n' && $from <= 0) {
147 |                                     $chunk = "{$what} > {$to}";
148 |                                 } else {
149 |                                     $chunk = "({$what} < {$from} || {$what} > {$to})";
150 |                                 }
151 |                                 break;
152 |                         }
153 |                     }
154 |                 }
155 |                 if (!isset($chunk)) {
156 |                     throw new Exception("Unhandled range '{$range}' in '{$atom}'");
157 |                 }
158 |                 $chunks[] = $chunk;
159 |             }
160 |             if (count($chunks) === 1) {
161 |                 return $chunks[0];
162 |             }
163 |             switch ($op) {
164 |                 case '==':
165 |                     return '(' . implode(' || ', $chunks) . ')';
166 |                 case '!=':
167 |                     return implode(' && ', $chunks);
168 |             }
169 |         }
170 |         throw new Exception("Unable to expand '{$atom}'");
171 |     }
172 | }
173 | 


--------------------------------------------------------------------------------
/src/cldr-data/main/en-US/scripts.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "main": {
  3 |         "en-US": {
  4 |             "identity": {
  5 |                 "version": {
  6 |                     "_cldrVersion": "47"
  7 |                 },
  8 |                 "language": "en",
  9 |                 "territory": "US"
 10 |             },
 11 |             "localeDisplayNames": {
 12 |                 "scripts": {
 13 |                     "Adlm": "Adlam",
 14 |                     "Afak": "Afaka",
 15 |                     "Aghb": "Caucasian Albanian",
 16 |                     "Ahom": "Ahom",
 17 |                     "Arab": "Arabic",
 18 |                     "Arab-alt-variant": "Perso-Arabic",
 19 |                     "Aran": "Nastaliq",
 20 |                     "Armi": "Imperial Aramaic",
 21 |                     "Armn": "Armenian",
 22 |                     "Avst": "Avestan",
 23 |                     "Bali": "Balinese",
 24 |                     "Bamu": "Bamum",
 25 |                     "Bass": "Bassa Vah",
 26 |                     "Batk": "Batak",
 27 |                     "Beng": "Bangla",
 28 |                     "Bhks": "Bhaiksuki",
 29 |                     "Blis": "Blissymbols",
 30 |                     "Bopo": "Bopomofo",
 31 |                     "Brah": "Brahmi",
 32 |                     "Brai": "Braille",
 33 |                     "Bugi": "Buginese",
 34 |                     "Buhd": "Buhid",
 35 |                     "Cakm": "Chakma",
 36 |                     "Cans": "Unified Canadian Aboriginal Syllabics",
 37 |                     "Cans-alt-short": "UCAS",
 38 |                     "Cari": "Carian",
 39 |                     "Cham": "Cham",
 40 |                     "Cher": "Cherokee",
 41 |                     "Chrs": "Chorasmian",
 42 |                     "Cirt": "Cirth",
 43 |                     "Copt": "Coptic",
 44 |                     "Cpmn": "Cypro-Minoan",
 45 |                     "Cprt": "Cypriot",
 46 |                     "Cyrl": "Cyrillic",
 47 |                     "Cyrs": "Old Church Slavonic Cyrillic",
 48 |                     "Deva": "Devanagari",
 49 |                     "Diak": "Dives Akuru",
 50 |                     "Dogr": "Dogra",
 51 |                     "Dsrt": "Deseret",
 52 |                     "Dupl": "Duployan shorthand",
 53 |                     "Egyd": "Egyptian demotic",
 54 |                     "Egyh": "Egyptian hieratic",
 55 |                     "Egyp": "Egyptian hieroglyphs",
 56 |                     "Elba": "Elbasan",
 57 |                     "Elym": "Elymaic",
 58 |                     "Ethi": "Ethiopic",
 59 |                     "Gara": "Garay",
 60 |                     "Geok": "Georgian Khutsuri",
 61 |                     "Geor": "Georgian",
 62 |                     "Glag": "Glagolitic",
 63 |                     "Gong": "Gunjala Gondi",
 64 |                     "Gonm": "Masaram Gondi",
 65 |                     "Goth": "Gothic",
 66 |                     "Gran": "Grantha",
 67 |                     "Grek": "Greek",
 68 |                     "Gujr": "Gujarati",
 69 |                     "Gukh": "Gurung Khema",
 70 |                     "Guru": "Gurmukhi",
 71 |                     "Hanb": "Han with Bopomofo",
 72 |                     "Hang": "Hangul",
 73 |                     "Hani": "Han",
 74 |                     "Hano": "Hanunoo",
 75 |                     "Hans": "Simplified",
 76 |                     "Hans-alt-stand-alone": "Simplified Han",
 77 |                     "Hant": "Traditional",
 78 |                     "Hant-alt-stand-alone": "Traditional Han",
 79 |                     "Hatr": "Hatran",
 80 |                     "Hebr": "Hebrew",
 81 |                     "Hira": "Hiragana",
 82 |                     "Hluw": "Anatolian Hieroglyphs",
 83 |                     "Hmng": "Pahawh Hmong",
 84 |                     "Hmnp": "Nyiakeng Puachue Hmong",
 85 |                     "Hrkt": "Japanese syllabaries",
 86 |                     "Hung": "Old Hungarian",
 87 |                     "Inds": "Indus",
 88 |                     "Ital": "Old Italic",
 89 |                     "Jamo": "Jamo",
 90 |                     "Java": "Javanese",
 91 |                     "Jpan": "Japanese",
 92 |                     "Jurc": "Jurchen",
 93 |                     "Kali": "Kayah Li",
 94 |                     "Kana": "Katakana",
 95 |                     "Kawi": "Kawi",
 96 |                     "Khar": "Kharoshthi",
 97 |                     "Khmr": "Khmer",
 98 |                     "Khoj": "Khojki",
 99 |                     "Kits": "Khitan small script",
100 |                     "Knda": "Kannada",
101 |                     "Kore": "Korean",
102 |                     "Kpel": "Kpelle",
103 |                     "Krai": "Kirat Rai",
104 |                     "Kthi": "Kaithi",
105 |                     "Lana": "Lanna",
106 |                     "Laoo": "Lao",
107 |                     "Latf": "Fraktur Latin",
108 |                     "Latg": "Gaelic Latin",
109 |                     "Latn": "Latin",
110 |                     "Lepc": "Lepcha",
111 |                     "Limb": "Limbu",
112 |                     "Lina": "Linear A",
113 |                     "Linb": "Linear B",
114 |                     "Lisu": "Fraser",
115 |                     "Loma": "Loma",
116 |                     "Lyci": "Lycian",
117 |                     "Lydi": "Lydian",
118 |                     "Mahj": "Mahajani",
119 |                     "Maka": "Makasar",
120 |                     "Mand": "Mandaean",
121 |                     "Mani": "Manichaean",
122 |                     "Marc": "Marchen",
123 |                     "Maya": "Mayan hieroglyphs",
124 |                     "Medf": "Medefaidrin",
125 |                     "Mend": "Mende",
126 |                     "Merc": "Meroitic Cursive",
127 |                     "Mero": "Meroitic",
128 |                     "Mlym": "Malayalam",
129 |                     "Modi": "Modi",
130 |                     "Mong": "Mongolian",
131 |                     "Moon": "Moon",
132 |                     "Mroo": "Mro",
133 |                     "Mtei": "Meitei Mayek",
134 |                     "Mult": "Multani",
135 |                     "Mymr": "Myanmar",
136 |                     "Nagm": "Nag Mundari",
137 |                     "Nand": "Nandinagari",
138 |                     "Narb": "Old North Arabian",
139 |                     "Nbat": "Nabataean",
140 |                     "Newa": "Newa",
141 |                     "Nkgb": "Naxi Geba",
142 |                     "Nkoo": "N’Ko",
143 |                     "Nshu": "Nüshu",
144 |                     "Ogam": "Ogham",
145 |                     "Olck": "Ol Chiki",
146 |                     "Onao": "Ol Onal",
147 |                     "Orkh": "Orkhon",
148 |                     "Orya": "Odia",
149 |                     "Osge": "Osage",
150 |                     "Osma": "Osmanya",
151 |                     "Ougr": "Old Uyghur",
152 |                     "Palm": "Palmyrene",
153 |                     "Pauc": "Pau Cin Hau",
154 |                     "Perm": "Old Permic",
155 |                     "Phag": "Phags-pa",
156 |                     "Phli": "Inscriptional Pahlavi",
157 |                     "Phlp": "Psalter Pahlavi",
158 |                     "Phlv": "Book Pahlavi",
159 |                     "Phnx": "Phoenician",
160 |                     "Plrd": "Pollard Phonetic",
161 |                     "Prti": "Inscriptional Parthian",
162 |                     "Qaag": "Zawgyi",
163 |                     "Rjng": "Rejang",
164 |                     "Rohg": "Hanifi",
165 |                     "Rohg-alt-stand-alone": "Hanifi Rohingya",
166 |                     "Roro": "Rongorongo",
167 |                     "Runr": "Runic",
168 |                     "Samr": "Samaritan",
169 |                     "Sara": "Sarati",
170 |                     "Sarb": "Old South Arabian",
171 |                     "Saur": "Saurashtra",
172 |                     "Sgnw": "SignWriting",
173 |                     "Shaw": "Shavian",
174 |                     "Shrd": "Sharada",
175 |                     "Sidd": "Siddham",
176 |                     "Sind": "Khudawadi",
177 |                     "Sinh": "Sinhala",
178 |                     "Sogd": "Sogdian",
179 |                     "Sogo": "Old Sogdian",
180 |                     "Sora": "Sora Sompeng",
181 |                     "Soyo": "Soyombo",
182 |                     "Sund": "Sundanese",
183 |                     "Sunu": "Sunuwar",
184 |                     "Sylo": "Syloti Nagri",
185 |                     "Syrc": "Syriac",
186 |                     "Syre": "Estrangelo Syriac",
187 |                     "Syrj": "Western Syriac",
188 |                     "Syrn": "Eastern Syriac",
189 |                     "Tagb": "Tagbanwa",
190 |                     "Takr": "Takri",
191 |                     "Tale": "Tai Le",
192 |                     "Talu": "New Tai Lue",
193 |                     "Taml": "Tamil",
194 |                     "Tang": "Tangut",
195 |                     "Tavt": "Tai Viet",
196 |                     "Telu": "Telugu",
197 |                     "Teng": "Tengwar",
198 |                     "Tfng": "Tifinagh",
199 |                     "Tglg": "Tagalog",
200 |                     "Thaa": "Thaana",
201 |                     "Thai": "Thai",
202 |                     "Tibt": "Tibetan",
203 |                     "Tirh": "Tirhuta",
204 |                     "Tnsa": "Tangsa",
205 |                     "Todr": "Todhri",
206 |                     "Toto": "Toto",
207 |                     "Tutg": "Tulu-Tigalari",
208 |                     "Ugar": "Ugaritic",
209 |                     "Vaii": "Vai",
210 |                     "Visp": "Visible Speech",
211 |                     "Vith": "Vithkuqi",
212 |                     "Wara": "Varang Kshiti",
213 |                     "Wcho": "Wancho",
214 |                     "Wole": "Woleai",
215 |                     "Xpeo": "Old Persian",
216 |                     "Xsux": "Sumero-Akkadian Cuneiform",
217 |                     "Xsux-alt-short": "S-A Cuneiform",
218 |                     "Yezi": "Yezidi",
219 |                     "Yiii": "Yi",
220 |                     "Zanb": "Zanabazar Square",
221 |                     "Zinh": "Inherited",
222 |                     "Zmth": "Mathematical Notation",
223 |                     "Zsye": "Emoji",
224 |                     "Zsym": "Symbols",
225 |                     "Zxxx": "Unwritten",
226 |                     "Zyyy": "Common",
227 |                     "Zzzz": "Unknown Script"
228 |                 }
229 |             }
230 |         }
231 |     }
232 | }


--------------------------------------------------------------------------------
/bin/export-plural-rules:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env php
  2 | <?php
  3 | 
  4 | use Gettext\Languages\Exporter\Exporter;
  5 | use Gettext\Languages\Language;
  6 | 
  7 | // Let's start by imposing that we don't accept any error or warning.
  8 | // This is a really life-saving approach.
  9 | error_reporting(E_ALL);
 10 | set_error_handler(function ($errno, $errstr, $errfile, $errline) {
 11 |     fwrite(STDERR, "{$errstr}\nFile: {$errfile}\nLine: {$errline}\nCode: {$errno}\n");
 12 |     exit(5);
 13 | });
 14 | 
 15 | require_once dirname(__DIR__) . '/src/autoloader.php';
 16 | 
 17 | /**
 18 |  * Helper class to handle command line options.
 19 |  */
 20 | class Enviro
 21 | {
 22 |     /**
 23 |      * Shall the output contain only US-ASCII characters?
 24 |      *
 25 |      * @var bool
 26 |      */
 27 |     public static $outputUSAscii;
 28 | 
 29 |     /**
 30 |      * The output format.
 31 |      *
 32 |      * @var string
 33 |      */
 34 |     public static $outputFormat;
 35 | 
 36 |     /**
 37 |      * Output file name.
 38 |      *
 39 |      * @var string
 40 |      */
 41 |     public static $outputFilename;
 42 | 
 43 |     /**
 44 |      * List of wanted language IDs; it not set: all languages will be returned.
 45 |      *
 46 |      * @var array|null
 47 |      */
 48 |     public static $languages;
 49 | 
 50 |     /**
 51 |      * Reduce the language list to the minimum common denominator.
 52 |      *
 53 |      * @var bool
 54 |      */
 55 |     public static $reduce;
 56 | 
 57 |     /**
 58 |      * Omit extra parenthesis in plural rule formulas.
 59 |      * If null: formulas will be exported with and without extra parenthesis (if supported by the exporter).
 60 |      *
 61 |      * @var bool|null
 62 |      */
 63 |     public static $extraParenthesis;
 64 | 
 65 |     /**
 66 |      * Parse the command line options.
 67 |      */
 68 |     public static function initialize()
 69 |     {
 70 |         global $argv;
 71 |         self::$outputUSAscii = false;
 72 |         self::$outputFormat = null;
 73 |         self::$outputFilename = null;
 74 |         self::$languages = null;
 75 |         self::$reduce = null;
 76 |         self::$extraParenthesis = true;
 77 |         $exporters = Exporter::getExporters();
 78 |         if (isset($argv) && is_array($argv)) {
 79 |             foreach ($argv as $argi => $arg) {
 80 |                 if ($argi === 0) {
 81 |                     continue;
 82 |                 }
 83 |                 if (is_string($arg)) {
 84 |                     $argLC = trim(strtolower($arg));
 85 |                     switch ($argLC) {
 86 |                         case '-h':
 87 |                         case '--help':
 88 |                             self::showSyntax();
 89 |                             exit(0);
 90 |                         case '--us-ascii':
 91 |                             self::$outputUSAscii = true;
 92 |                             break;
 93 |                         case '--reduce=yes':
 94 |                             self::$reduce = true;
 95 |                             break;
 96 |                         case '--reduce=no':
 97 |                             self::$reduce = false;
 98 |                             break;
 99 |                         case '--parenthesis=yes':
100 |                             self::$extraParenthesis = true;
101 |                             break;
102 |                         case '--parenthesis=no':
103 |                             self::$extraParenthesis = false;
104 |                             break;
105 |                         case '--parenthesis=both':
106 |                             self::$extraParenthesis = null;
107 |                             break;
108 |                         default:
109 |                             if (preg_match('/^--output=.+$/', $argLC)) {
110 |                                 if (isset(self::$outputFilename)) {
111 |                                     fwrite(STDERR, "The output file name has been specified more than once!\n");
112 |                                     self::showSyntax();
113 |                                     exit(3);
114 |                                 }
115 |                                 list(, self::$outputFilename) = explode('=', $arg, 2);
116 |                                 self::$outputFilename = trim(self::$outputFilename);
117 |                             } elseif (preg_match('/^--languages?=.+$/', $argLC)) {
118 |                                 list(, $s) = explode('=', $arg, 2);
119 |                                 $list = explode(',', $s);
120 |                                 if (is_array(self::$languages)) {
121 |                                     self::$languages = array_merge(self::$languages, $list);
122 |                                 } else {
123 |                                     self::$languages = $list;
124 |                                 }
125 |                             } elseif (isset($exporters[$argLC])) {
126 |                                 if (isset(self::$outputFormat)) {
127 |                                     fwrite(STDERR, "The output format has been specified more than once!\n");
128 |                                     self::showSyntax();
129 |                                     exit(3);
130 |                                 }
131 |                                 self::$outputFormat = $argLC;
132 |                             } else {
133 |                                 fwrite(STDERR, "Unknown option: {$arg}\n");
134 |                                 self::showSyntax();
135 |                                 exit(2);
136 |                             }
137 |                             break;
138 |                     }
139 |                 }
140 |             }
141 |         }
142 |         if (!isset(self::$outputFormat)) {
143 |             self::showSyntax();
144 |             exit(1);
145 |         }
146 |         if (isset(self::$languages)) {
147 |             self::$languages = array_values(array_unique(self::$languages));
148 |         }
149 |         if (!isset(self::$reduce)) {
150 |             self::$reduce = isset(self::$languages) ? false : true;
151 |         }
152 |     }
153 | 
154 |     /**
155 |      * Write out the syntax.
156 |      */
157 |     public static function showSyntax()
158 |     {
159 |         $basename = basename(__FILE__);
160 |         $exporters = array_keys(Exporter::getExporters(true));
161 |         $exporterList = implode('|', $exporters);
162 |         fwrite(
163 |             STDERR,
164 |             <<<EOT
165 | Syntax:
166 |     {$basename} [-h|--help] [--us-ascii] [--languages=<LanguageId>[,<LanguageId>,...]] [--reduce=yes|no] [--parenthesis=yes|no] [--output=<file name>] <{$exporterList}>
167 | 
168 | Where:
169 |     --help
170 |         show this help message.
171 | 
172 |     --us-ascii
173 |         if specified, the output will contain only US-ASCII characters.
174 | 
175 |     --languages(or --language)
176 |         export only the specified language codes.
177 |         Separate languages with commas; you can also use this argument
178 |         more than once; it's case insensitive and accepts both '_' and
179 |         '-' as locale chunks separator (eg we accept 'it_IT' as well as
180 |         'it-it').
181 |     --reduce
182 |         if set to yes the output won't contain languages with the same
183 |         base language and rules.
184 |         For instance nl_BE ('Flemish') will be omitted because it's the
185 |         same as nl ('Dutch').
186 |         Defaults to 'no' if --languages is specified, to 'yes' otherwise.
187 |     --parenthesis
188 |         if set to no, extra parenthesis will be omitted in generated
189 |         plural rules formulas.
190 |         Those extra parenthesis are needed to create a PHP-compatible
191 |         formula.
192 |         Some exporter may also export formulas both with and without
193 |         The extra parenthesis: use --parenthesis=both in this case
194 |         Defaults to 'yes'
195 |     --output
196 |         if specified, the output will be saved to <file name>. If not
197 |         specified we'll output to standard output.
198 | 
199 | Output formats
200 | 
201 | EOT
202 |         );
203 |         $len = max(array_map('strlen', $exporters));
204 |         foreach ($exporters as $exporter) {
205 |             fwrite(STDERR, '    ' . str_pad($exporter, $len) . ': ' . Exporter::getExporterDescription($exporter) . "\n");
206 |         }
207 |         fwrite(STDERR, "\n");
208 |     }
209 | 
210 |     /**
211 |      * Reduce a language list to the minimum common denominator.
212 |      *
213 |      * @param Language[] $languages
214 |      *
215 |      * @return Language[]
216 |      */
217 |     public static function reduce($languages)
218 |     {
219 |         for ($numChunks = 3; $numChunks >= 2; $numChunks--) {
220 |             $filtered = array();
221 |             foreach ($languages as $language) {
222 |                 $chunks = explode('_', $language->id);
223 |                 $compatibleFound = false;
224 |                 if ($numChunks === count($chunks)) {
225 |                     $categoriesHash = serialize($language->categories);
226 |                     $otherIds = array();
227 |                     $otherIds[] = $chunks[0];
228 |                     for ($k = 2; $k < $numChunks; $k++) {
229 |                         $otherIds[] = $chunks[0] . '_' . $chunks[$numChunks - 1];
230 |                     }
231 | 
232 |                     foreach ($languages as $check) {
233 |                         foreach ($otherIds as $otherId) {
234 |                             if ($check->id === $otherId && $check->formula === $language->formula && $categoriesHash === serialize($check->categories)) {
235 |                                 $compatibleFound = true;
236 |                                 break;
237 |                             }
238 |                         }
239 |                         if ($compatibleFound === true) {
240 |                             break;
241 |                         }
242 |                     }
243 |                 }
244 |                 if (!$compatibleFound) {
245 |                     $filtered[] = $language;
246 |                 }
247 |             }
248 |             $languages = $filtered;
249 |         }
250 | 
251 |         return $languages;
252 |     }
253 | }
254 | 
255 | // Parse the command line options
256 | Enviro::initialize();
257 | 
258 | try {
259 |     if (isset(Enviro::$languages)) {
260 |         $languages = array();
261 |         foreach (Enviro::$languages as $languageId) {
262 |             $language = Language::getById($languageId);
263 |             if (!isset($language)) {
264 |                 throw new Exception("Unable to find the language with id '{$languageId}'");
265 |             }
266 |             $languages[] = $language;
267 |         }
268 |     } else {
269 |         $languages = Language::getAll();
270 |     }
271 |     if (Enviro::$reduce) {
272 |         $languages = Enviro::reduce($languages);
273 |     }
274 |     if (Enviro::$extraParenthesis === false) {
275 |         $languages = array_map(
276 |             function (Language $language) {
277 |                 $language->formula = $language->buildFormula(true);
278 | 
279 |                 return $language;
280 |             },
281 |             $languages
282 |         );
283 |     }
284 |     $exporterClass = Exporter::getExporterClassName(Enviro::$outputFormat);
285 |     $options = array(
286 |         'us-ascii' => Enviro::$outputUSAscii,
287 |         'both-formulas' => Enviro::$extraParenthesis === null,
288 |     );
289 |     if ($options['both-formulas'] && !call_user_func(array($exporterClass, 'supportsFormulasWithAndWithoutParenthesis'))) {
290 |         throw new Exception("The selected exporter doesn't support exporting data with and without extra paranthesis");
291 |     }
292 |     if (isset(Enviro::$outputFilename)) {
293 |         echo call_user_func(array($exporterClass, 'toFile'), $languages, Enviro::$outputFilename, $options);
294 |     } else {
295 |         echo call_user_func(array($exporterClass, 'toString'), $languages, $options);
296 |     }
297 | } catch (Exception $x) {
298 |     fwrite(STDERR, $x->getMessage() . "\n");
299 |     fwrite(STDERR, "Trace:\n");
300 |     fwrite(STDERR, $x->getTraceAsString() . "\n");
301 |     exit(4);
302 | }
303 | 
304 | exit(0);
305 | 


--------------------------------------------------------------------------------
/src/cldr-data/main/en-US/territories.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "main": {
  3 |         "en-US": {
  4 |             "identity": {
  5 |                 "version": {
  6 |                     "_cldrVersion": "47"
  7 |                 },
  8 |                 "language": "en",
  9 |                 "territory": "US"
 10 |             },
 11 |             "localeDisplayNames": {
 12 |                 "territories": {
 13 |                     "001": "world",
 14 |                     "002": "Africa",
 15 |                     "003": "North America",
 16 |                     "005": "South America",
 17 |                     "009": "Oceania",
 18 |                     "011": "Western Africa",
 19 |                     "013": "Central America",
 20 |                     "014": "Eastern Africa",
 21 |                     "015": "Northern Africa",
 22 |                     "017": "Middle Africa",
 23 |                     "018": "Southern Africa",
 24 |                     "019": "Americas",
 25 |                     "021": "Northern America",
 26 |                     "029": "Caribbean",
 27 |                     "030": "Eastern Asia",
 28 |                     "034": "Southern Asia",
 29 |                     "035": "Southeast Asia",
 30 |                     "039": "Southern Europe",
 31 |                     "053": "Australasia",
 32 |                     "054": "Melanesia",
 33 |                     "057": "Micronesian Region",
 34 |                     "061": "Polynesia",
 35 |                     "142": "Asia",
 36 |                     "143": "Central Asia",
 37 |                     "145": "Western Asia",
 38 |                     "150": "Europe",
 39 |                     "151": "Eastern Europe",
 40 |                     "154": "Northern Europe",
 41 |                     "155": "Western Europe",
 42 |                     "202": "Sub-Saharan Africa",
 43 |                     "419": "Latin America",
 44 |                     "AC": "Ascension Island",
 45 |                     "AD": "Andorra",
 46 |                     "AE": "United Arab Emirates",
 47 |                     "AF": "Afghanistan",
 48 |                     "AG": "Antigua & Barbuda",
 49 |                     "AI": "Anguilla",
 50 |                     "AL": "Albania",
 51 |                     "AM": "Armenia",
 52 |                     "AO": "Angola",
 53 |                     "AQ": "Antarctica",
 54 |                     "AR": "Argentina",
 55 |                     "AS": "American Samoa",
 56 |                     "AT": "Austria",
 57 |                     "AU": "Australia",
 58 |                     "AW": "Aruba",
 59 |                     "AX": "Åland Islands",
 60 |                     "AZ": "Azerbaijan",
 61 |                     "BA": "Bosnia & Herzegovina",
 62 |                     "BA-alt-short": "Bosnia",
 63 |                     "BB": "Barbados",
 64 |                     "BD": "Bangladesh",
 65 |                     "BE": "Belgium",
 66 |                     "BF": "Burkina Faso",
 67 |                     "BG": "Bulgaria",
 68 |                     "BH": "Bahrain",
 69 |                     "BI": "Burundi",
 70 |                     "BJ": "Benin",
 71 |                     "BL": "St. Barthélemy",
 72 |                     "BM": "Bermuda",
 73 |                     "BN": "Brunei",
 74 |                     "BO": "Bolivia",
 75 |                     "BQ": "Caribbean Netherlands",
 76 |                     "BR": "Brazil",
 77 |                     "BS": "Bahamas",
 78 |                     "BT": "Bhutan",
 79 |                     "BV": "Bouvet Island",
 80 |                     "BW": "Botswana",
 81 |                     "BY": "Belarus",
 82 |                     "BZ": "Belize",
 83 |                     "CA": "Canada",
 84 |                     "CC": "Cocos (Keeling) Islands",
 85 |                     "CC-alt-short": "Cocos Islands",
 86 |                     "CD": "Congo - Kinshasa",
 87 |                     "CD-alt-variant": "Congo (DRC)",
 88 |                     "CF": "Central African Republic",
 89 |                     "CG": "Congo - Brazzaville",
 90 |                     "CG-alt-variant": "Congo (Republic)",
 91 |                     "CH": "Switzerland",
 92 |                     "CI": "Côte d’Ivoire",
 93 |                     "CI-alt-variant": "Ivory Coast",
 94 |                     "CK": "Cook Islands",
 95 |                     "CL": "Chile",
 96 |                     "CM": "Cameroon",
 97 |                     "CN": "China",
 98 |                     "CO": "Colombia",
 99 |                     "CP": "Clipperton Island",
100 |                     "CQ": "Sark",
101 |                     "CR": "Costa Rica",
102 |                     "CU": "Cuba",
103 |                     "CV": "Cape Verde",
104 |                     "CV-alt-variant": "Cabo Verde",
105 |                     "CW": "Curaçao",
106 |                     "CX": "Christmas Island",
107 |                     "CY": "Cyprus",
108 |                     "CZ": "Czechia",
109 |                     "CZ-alt-variant": "Czech Republic",
110 |                     "DE": "Germany",
111 |                     "DG": "Diego Garcia",
112 |                     "DJ": "Djibouti",
113 |                     "DK": "Denmark",
114 |                     "DM": "Dominica",
115 |                     "DO": "Dominican Republic",
116 |                     "DZ": "Algeria",
117 |                     "EA": "Ceuta & Melilla",
118 |                     "EC": "Ecuador",
119 |                     "EE": "Estonia",
120 |                     "EG": "Egypt",
121 |                     "EH": "Western Sahara",
122 |                     "ER": "Eritrea",
123 |                     "ES": "Spain",
124 |                     "ET": "Ethiopia",
125 |                     "EU": "European Union",
126 |                     "EZ": "Eurozone",
127 |                     "FI": "Finland",
128 |                     "FJ": "Fiji",
129 |                     "FK": "Falkland Islands",
130 |                     "FK-alt-variant": "Falkland Islands (Islas Malvinas)",
131 |                     "FM": "Micronesia",
132 |                     "FO": "Faroe Islands",
133 |                     "FR": "France",
134 |                     "GA": "Gabon",
135 |                     "GB": "United Kingdom",
136 |                     "GB-alt-short": "UK",
137 |                     "GD": "Grenada",
138 |                     "GE": "Georgia",
139 |                     "GF": "French Guiana",
140 |                     "GG": "Guernsey",
141 |                     "GH": "Ghana",
142 |                     "GI": "Gibraltar",
143 |                     "GL": "Greenland",
144 |                     "GM": "Gambia",
145 |                     "GN": "Guinea",
146 |                     "GP": "Guadeloupe",
147 |                     "GQ": "Equatorial Guinea",
148 |                     "GR": "Greece",
149 |                     "GS": "South Georgia & South Sandwich Islands",
150 |                     "GT": "Guatemala",
151 |                     "GU": "Guam",
152 |                     "GW": "Guinea-Bissau",
153 |                     "GY": "Guyana",
154 |                     "HK": "Hong Kong SAR China",
155 |                     "HK-alt-short": "Hong Kong",
156 |                     "HM": "Heard & McDonald Islands",
157 |                     "HN": "Honduras",
158 |                     "HR": "Croatia",
159 |                     "HT": "Haiti",
160 |                     "HU": "Hungary",
161 |                     "IC": "Canary Islands",
162 |                     "ID": "Indonesia",
163 |                     "IE": "Ireland",
164 |                     "IL": "Israel",
165 |                     "IM": "Isle of Man",
166 |                     "IN": "India",
167 |                     "IO": "British Indian Ocean Territory",
168 |                     "IO-alt-biot": "British Indian Ocean Territory",
169 |                     "IO-alt-chagos": "Chagos Archipelago",
170 |                     "IQ": "Iraq",
171 |                     "IR": "Iran",
172 |                     "IS": "Iceland",
173 |                     "IT": "Italy",
174 |                     "JE": "Jersey",
175 |                     "JM": "Jamaica",
176 |                     "JO": "Jordan",
177 |                     "JP": "Japan",
178 |                     "KE": "Kenya",
179 |                     "KG": "Kyrgyzstan",
180 |                     "KH": "Cambodia",
181 |                     "KI": "Kiribati",
182 |                     "KM": "Comoros",
183 |                     "KN": "St. Kitts & Nevis",
184 |                     "KP": "North Korea",
185 |                     "KR": "South Korea",
186 |                     "KW": "Kuwait",
187 |                     "KY": "Cayman Islands",
188 |                     "KZ": "Kazakhstan",
189 |                     "LA": "Laos",
190 |                     "LB": "Lebanon",
191 |                     "LC": "St. Lucia",
192 |                     "LI": "Liechtenstein",
193 |                     "LK": "Sri Lanka",
194 |                     "LR": "Liberia",
195 |                     "LS": "Lesotho",
196 |                     "LT": "Lithuania",
197 |                     "LU": "Luxembourg",
198 |                     "LV": "Latvia",
199 |                     "LY": "Libya",
200 |                     "MA": "Morocco",
201 |                     "MC": "Monaco",
202 |                     "MD": "Moldova",
203 |                     "ME": "Montenegro",
204 |                     "MF": "St. Martin",
205 |                     "MG": "Madagascar",
206 |                     "MH": "Marshall Islands",
207 |                     "MK": "North Macedonia",
208 |                     "ML": "Mali",
209 |                     "MM": "Myanmar (Burma)",
210 |                     "MM-alt-short": "Myanmar",
211 |                     "MN": "Mongolia",
212 |                     "MO": "Macao SAR China",
213 |                     "MO-alt-short": "Macao",
214 |                     "MP": "Northern Mariana Islands",
215 |                     "MQ": "Martinique",
216 |                     "MR": "Mauritania",
217 |                     "MS": "Montserrat",
218 |                     "MT": "Malta",
219 |                     "MU": "Mauritius",
220 |                     "MV": "Maldives",
221 |                     "MW": "Malawi",
222 |                     "MX": "Mexico",
223 |                     "MY": "Malaysia",
224 |                     "MZ": "Mozambique",
225 |                     "NA": "Namibia",
226 |                     "NC": "New Caledonia",
227 |                     "NE": "Niger",
228 |                     "NF": "Norfolk Island",
229 |                     "NG": "Nigeria",
230 |                     "NI": "Nicaragua",
231 |                     "NL": "Netherlands",
232 |                     "NO": "Norway",
233 |                     "NP": "Nepal",
234 |                     "NR": "Nauru",
235 |                     "NU": "Niue",
236 |                     "NZ": "New Zealand",
237 |                     "NZ-alt-variant": "Aotearoa New Zealand",
238 |                     "OM": "Oman",
239 |                     "PA": "Panama",
240 |                     "PE": "Peru",
241 |                     "PF": "French Polynesia",
242 |                     "PG": "Papua New Guinea",
243 |                     "PH": "Philippines",
244 |                     "PK": "Pakistan",
245 |                     "PL": "Poland",
246 |                     "PM": "St. Pierre & Miquelon",
247 |                     "PN": "Pitcairn Islands",
248 |                     "PN-alt-short": "Pitcairn",
249 |                     "PR": "Puerto Rico",
250 |                     "PS": "Palestinian Territories",
251 |                     "PS-alt-short": "Palestine",
252 |                     "PT": "Portugal",
253 |                     "PW": "Palau",
254 |                     "PY": "Paraguay",
255 |                     "QA": "Qatar",
256 |                     "QO": "Outlying Oceania",
257 |                     "RE": "Réunion",
258 |                     "RO": "Romania",
259 |                     "RS": "Serbia",
260 |                     "RU": "Russia",
261 |                     "RW": "Rwanda",
262 |                     "SA": "Saudi Arabia",
263 |                     "SB": "Solomon Islands",
264 |                     "SC": "Seychelles",
265 |                     "SD": "Sudan",
266 |                     "SE": "Sweden",
267 |                     "SG": "Singapore",
268 |                     "SH": "St. Helena",
269 |                     "SI": "Slovenia",
270 |                     "SJ": "Svalbard & Jan Mayen",
271 |                     "SK": "Slovakia",
272 |                     "SL": "Sierra Leone",
273 |                     "SM": "San Marino",
274 |                     "SN": "Senegal",
275 |                     "SO": "Somalia",
276 |                     "SR": "Suriname",
277 |                     "SS": "South Sudan",
278 |                     "ST": "São Tomé & Príncipe",
279 |                     "SV": "El Salvador",
280 |                     "SX": "Sint Maarten",
281 |                     "SY": "Syria",
282 |                     "SZ": "Eswatini",
283 |                     "SZ-alt-variant": "Swaziland",
284 |                     "TA": "Tristan da Cunha",
285 |                     "TC": "Turks & Caicos Islands",
286 |                     "TD": "Chad",
287 |                     "TF": "French Southern Territories",
288 |                     "TG": "Togo",
289 |                     "TH": "Thailand",
290 |                     "TJ": "Tajikistan",
291 |                     "TK": "Tokelau",
292 |                     "TL": "Timor-Leste",
293 |                     "TL-alt-variant": "East Timor",
294 |                     "TM": "Turkmenistan",
295 |                     "TN": "Tunisia",
296 |                     "TO": "Tonga",
297 |                     "TR": "Türkiye",
298 |                     "TR-alt-variant": "Turkey",
299 |                     "TT": "Trinidad & Tobago",
300 |                     "TV": "Tuvalu",
301 |                     "TW": "Taiwan",
302 |                     "TZ": "Tanzania",
303 |                     "UA": "Ukraine",
304 |                     "UG": "Uganda",
305 |                     "UM": "U.S. Outlying Islands",
306 |                     "UN": "United Nations",
307 |                     "UN-alt-short": "UN",
308 |                     "US": "United States",
309 |                     "US-alt-short": "US",
310 |                     "UY": "Uruguay",
311 |                     "UZ": "Uzbekistan",
312 |                     "VA": "Vatican City",
313 |                     "VC": "St. Vincent & Grenadines",
314 |                     "VE": "Venezuela",
315 |                     "VG": "British Virgin Islands",
316 |                     "VI": "U.S. Virgin Islands",
317 |                     "VN": "Vietnam",
318 |                     "VU": "Vanuatu",
319 |                     "WF": "Wallis & Futuna",
320 |                     "WS": "Samoa",
321 |                     "XA": "Pseudo-Accents",
322 |                     "XB": "Pseudo-Bidi",
323 |                     "XK": "Kosovo",
324 |                     "YE": "Yemen",
325 |                     "YT": "Mayotte",
326 |                     "ZA": "South Africa",
327 |                     "ZM": "Zambia",
328 |                     "ZW": "Zimbabwe",
329 |                     "ZZ": "Unknown Region"
330 |                 }
331 |             }
332 |         }
333 |     }
334 | }


--------------------------------------------------------------------------------
/src/CldrData.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Gettext\Languages;
  4 | 
  5 | use Exception;
  6 | 
  7 | /**
  8 |  * Holds the CLDR data.
  9 |  */
 10 | class CldrData
 11 | {
 12 |     /**
 13 |      * Super-special plural category: this should always be present for any language.
 14 |      *
 15 |      * @var string
 16 |      */
 17 |     const OTHER_CATEGORY = 'other';
 18 | 
 19 |     /**
 20 |      * The list of the plural categories, sorted from 'zero' to 'other'.
 21 |      *
 22 |      * @var string[]
 23 |      */
 24 |     public static $categories = array('zero', 'one', 'two', 'few', 'many', self::OTHER_CATEGORY);
 25 | 
 26 |     /**
 27 |      * The loaded CLDR data.
 28 |      *
 29 |      * @var array
 30 |      */
 31 |     private static $data;
 32 | 
 33 |     /**
 34 |      * @var array
 35 |      */
 36 |     private static $plurals;
 37 | 
 38 |     /**
 39 |      * Returns a dictionary containing the language names.
 40 |      * The keys are the language identifiers.
 41 |      * The values are the language names in US English.
 42 |      *
 43 |      * @return string[]
 44 |      */
 45 |     public static function getLanguageNames()
 46 |     {
 47 |         return self::getData('languages');
 48 |     }
 49 | 
 50 |     /**
 51 |      * Return a dictionary containing the territory names (in US English).
 52 |      * The keys are the territory identifiers.
 53 |      * The values are the territory names in US English.
 54 |      *
 55 |      * @return string[]
 56 |      */
 57 |     public static function getTerritoryNames()
 58 |     {
 59 |         return self::getData('territories');
 60 |     }
 61 | 
 62 |     /**
 63 |      * Return a dictionary containing the script names (in US English).
 64 |      * The keys are the script identifiers.
 65 |      * The values are the script names in US English.
 66 |      *
 67 |      * @param bool $standAlone set to true to retrieve the stand-alone script names, false otherwise
 68 |      *
 69 |      * @return string[]
 70 |      */
 71 |     public static function getScriptNames($standAlone)
 72 |     {
 73 |         return self::getData($standAlone ? 'standAloneScripts' : 'scripts');
 74 |     }
 75 | 
 76 |     /**
 77 |      * A dictionary containing the plural rules.
 78 |      * The keys are the language identifiers.
 79 |      * The values are arrays whose keys are the CLDR category names and the values are the CLDR category definition.
 80 |      *
 81 |      * @example The English key-value pair is somethink like this:
 82 |      * <code><pre>
 83 |      * "en": {
 84 |      *     "pluralRule-count-one": "i = 1 and v = 0 @integer 1",
 85 |      *     "pluralRule-count-other": " @integer 0, 2~16, 100, 1000, 10000, 100000, 1000000, … @decimal 0.0~1.5, 10.0, 100.0, 1000.0, 10000.0, 100000.0, 1000000.0, …"
 86 |      * }
 87 |      * </pre></code>
 88 |      *
 89 |      * @return array
 90 |      */
 91 |     public static function getPlurals()
 92 |     {
 93 |         return self::getData('plurals');
 94 |     }
 95 | 
 96 |     /**
 97 |      * Return a list of superseded language codes.
 98 |      *
 99 |      * @return array keys are the former language codes, values are the new language/locale codes
100 |      */
101 |     public static function getSupersededLanguages()
102 |     {
103 |         return self::getData('supersededLanguages');
104 |     }
105 | 
106 |     /**
107 |      * Retrieve the name of a language, as well as if a language code is deprecated in favor of another language code.
108 |      *
109 |      * @param string $id the language identifier
110 |      *
111 |      * @return array|null Returns an array with the keys 'id' (normalized), 'name', 'supersededBy' (optional), 'territory' (optional), 'script' (optional), 'baseLanguage' (optional), 'categories'. If $id is not valid returns null.
112 |      */
113 |     public static function getLanguageInfo($id)
114 |     {
115 |         $result = null;
116 |         $matches = array();
117 |         if (preg_match('/^([a-z]{2,3})(?:[_\-]([a-z]{4}))?(?:[_\-]([a-z]{2}|[0-9]{3}))?(?:$|-)/i', $id, $matches)) {
118 |             $languageId = strtolower($matches[1]);
119 |             $scriptId = (isset($matches[2]) && ($matches[2] !== '')) ? ucfirst(strtolower($matches[2])) : null;
120 |             $territoryId = (isset($matches[3]) && ($matches[3] !== '')) ? strtoupper($matches[3]) : null;
121 |             $normalizedId = $languageId;
122 |             if (isset($scriptId)) {
123 |                 $normalizedId .= '_' . $scriptId;
124 |             }
125 |             if (isset($territoryId)) {
126 |                 $normalizedId .= '_' . $territoryId;
127 |             }
128 |             // Structure precedence: see Likely Subtags - http://www.unicode.org/reports/tr35/tr35-31/tr35.html#Likely_Subtags
129 |             $variants = array();
130 |             $variantsWithScript = array();
131 |             $variantsWithTerritory = array();
132 |             if (isset($scriptId) && isset($territoryId)) {
133 |                 $variantsWithTerritory[] = $variantsWithScript[] = $variants[] = "{$languageId}_{$scriptId}_{$territoryId}";
134 |             }
135 |             if (isset($scriptId)) {
136 |                 $variantsWithScript[] = $variants[] = "{$languageId}_{$scriptId}";
137 |             }
138 |             if (isset($territoryId)) {
139 |                 $variantsWithTerritory[] = $variants[] = "{$languageId}_{$territoryId}";
140 |             }
141 |             $variants[] = $languageId;
142 |             $allGood = true;
143 |             $scriptName = null;
144 |             $scriptStandAloneName = null;
145 |             if (isset($scriptId)) {
146 |                 $scriptNames = self::getScriptNames(false);
147 |                 if (isset($scriptNames[$scriptId])) {
148 |                     $scriptName = $scriptNames[$scriptId];
149 |                     $scriptStandAloneNames = self::getScriptNames(true);
150 |                     $scriptStandAloneName = $scriptStandAloneNames[$scriptId];
151 |                 } else {
152 |                     $allGood = false;
153 |                 }
154 |             }
155 |             $territoryName = null;
156 |             if (isset($territoryId)) {
157 |                 $territoryNames = self::getTerritoryNames();
158 |                 if (isset($territoryNames[$territoryId])) {
159 |                     if ($territoryId !== '001') {
160 |                         $territoryName = $territoryNames[$territoryId];
161 |                     }
162 |                 } else {
163 |                     $allGood = false;
164 |                 }
165 |             }
166 |             $languageName = null;
167 |             $languageNames = self::getLanguageNames();
168 |             foreach ($variants as $variant) {
169 |                 if (isset($languageNames[$variant])) {
170 |                     $languageName = $languageNames[$variant];
171 |                     if (isset($scriptName) && (!in_array($variant, $variantsWithScript))) {
172 |                         $languageName = $scriptName . ' ' . $languageName;
173 |                     }
174 |                     if (isset($territoryName) && (!in_array($variant, $variantsWithTerritory))) {
175 |                         $languageName .= ' (' . $territoryNames[$territoryId] . ')';
176 |                     }
177 |                     break;
178 |                 }
179 |             }
180 |             if (!isset($languageName)) {
181 |                 $allGood = false;
182 |             }
183 |             $baseLanguage = null;
184 |             if (isset($scriptId) || isset($territoryId)) {
185 |                 if (isset($languageNames[$languageId]) && ($languageNames[$languageId] !== $languageName)) {
186 |                     $baseLanguage = $languageNames[$languageId];
187 |                 }
188 |             }
189 |             $plural = null;
190 |             $plurals = self::getPlurals();
191 |             foreach ($variants as $variant) {
192 |                 if (isset($plurals[$variant])) {
193 |                     $plural = $plurals[$variant];
194 |                     break;
195 |                 }
196 |             }
197 |             if (!isset($plural)) {
198 |                 $allGood = false;
199 |             }
200 |             $supersededBy = null;
201 |             $supersededBys = self::getSupersededLanguages();
202 |             foreach ($variants as $variant) {
203 |                 if (isset($supersededBys[$variant])) {
204 |                     $supersededBy = $supersededBys[$variant];
205 |                     break;
206 |                 }
207 |             }
208 |             if ($allGood) {
209 |                 $result = array();
210 |                 $result['id'] = $normalizedId;
211 |                 $result['name'] = $languageName;
212 |                 if (isset($supersededBy)) {
213 |                     $result['supersededBy'] = $supersededBy;
214 |                 }
215 |                 if (isset($scriptStandAloneName)) {
216 |                     $result['script'] = $scriptStandAloneName;
217 |                 }
218 |                 if (isset($territoryName)) {
219 |                     $result['territory'] = $territoryName;
220 |                 }
221 |                 if (isset($baseLanguage)) {
222 |                     $result['baseLanguage'] = $baseLanguage;
223 |                 }
224 |                 $result['categories'] = $plural;
225 |             }
226 |         }
227 | 
228 |         return $result;
229 |     }
230 | 
231 |     /**
232 |      * Returns the loaded CLDR data.
233 |      *
234 |      * @param string $key Can be 'languages', 'territories', 'plurals', 'supersededLanguages', 'scripts', 'standAloneScripts'
235 |      *
236 |      * @return array
237 |      */
238 |     private static function getData($key)
239 |     {
240 |         if (!isset(self::$data)) {
241 |             $fixKeys = function ($list, &$standAlone = null) {
242 |                 $result = array();
243 |                 $standAlone = array();
244 |                 $match = null;
245 |                 foreach ($list as $key => $value) {
246 |                     $variant = '';
247 |                     if (preg_match('/^(.+)-alt-(short|variant|stand-alone|long|menu)$/', $key, $match)) {
248 |                         $key = $match[1];
249 |                         $variant = $match[2];
250 |                     }
251 |                     $key = str_replace('-', '_', $key);
252 |                     switch ($key) {
253 |                         case 'root': // Language: Root
254 |                         case 'und': // Language: Unknown Language
255 |                         case 'zxx': // Language: No linguistic content
256 |                         case 'ZZ': // Territory: Unknown Region
257 |                         case 'Zinh': // Script: Inherited
258 |                         case 'Zmth': // Script: Mathematical Notation
259 |                         case 'Zsym': // Script: Symbols
260 |                         case 'Zxxx': // Script: Unwritten
261 |                         case 'Zyyy': // Script: Common
262 |                         case 'Zzzz': // Script: Unknown Script
263 |                             break;
264 |                         default:
265 |                             switch ($variant) {
266 |                                 case 'stand-alone':
267 |                                     $standAlone[$key] = $value;
268 |                                     break;
269 |                                 case '':
270 |                                     $result[$key] = $value;
271 |                                     break;
272 |                             }
273 |                             break;
274 |                     }
275 |                 }
276 | 
277 |                 return $result;
278 |             };
279 |             $data = array();
280 |             $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/languages.json'), true);
281 |             $data['languages'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['languages']);
282 |             $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/territories.json'), true);
283 |             $data['territories'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['territories']);
284 |             $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/supplemental/plurals.json'), true);
285 |             $data['plurals'] = $fixKeys($json['supplemental']['plurals-type-cardinal']);
286 |             $json = json_decode(file_get_contents(__DIR__ . '/cldr-data/main/en-US/scripts.json'), true);
287 |             $data['scripts'] = $fixKeys($json['main']['en-US']['localeDisplayNames']['scripts'], $data['standAloneScripts']);
288 |             $data['standAloneScripts'] = array_merge($data['scripts'], $data['standAloneScripts']);
289 |             $data['scripts'] = array_merge($data['standAloneScripts'], $data['scripts']);
290 |             $data['supersededLanguages'] = array();
291 |             // Remove the languages for which we don't have plurals
292 |             $m = null;
293 |             foreach (array_keys(array_diff_key($data['languages'], $data['plurals'])) as $missingPlural) {
294 |                 if (preg_match('/^([a-z]{2,3})_/', $missingPlural, $m)) {
295 |                     if (!isset($data['plurals'][$m[1]])) {
296 |                         unset($data['languages'][$missingPlural]);
297 |                     }
298 |                 } else {
299 |                     unset($data['languages'][$missingPlural]);
300 |                 }
301 |             }
302 |             // Fix the languages for which we have plurals
303 |             $formerCodes = array(
304 |                 'jw' => 'jv', // former Javanese
305 |                 'mo' => 'ro_MD', // former Moldavian
306 |             );
307 |             $knownMissingLanguages = array(
308 |                 'guw' => 'Gun',
309 |                 'hnj' => 'Hmong Njua',
310 |                 'lld' => 'Dolomitic Ladin',
311 |                 'nah' => 'Nahuatl',
312 |                 'smi' => 'Sami',
313 |             );
314 |             foreach (array_keys(array_diff_key($data['plurals'], $data['languages'])) as $missingLanguage) {
315 |                 if (isset($formerCodes[$missingLanguage]) && isset($data['languages'][$formerCodes[$missingLanguage]])) {
316 |                     $data['languages'][$missingLanguage] = $data['languages'][$formerCodes[$missingLanguage]];
317 |                     $data['supersededLanguages'][$missingLanguage] = $formerCodes[$missingLanguage];
318 |                 } else {
319 |                     if (isset($knownMissingLanguages[$missingLanguage])) {
320 |                         $data['languages'][$missingLanguage] = $knownMissingLanguages[$missingLanguage];
321 |                     } else {
322 |                         throw new Exception("We have the plural rule for the language '{$missingLanguage}' but we don't have its language name");
323 |                     }
324 |                 }
325 |             }
326 |             ksort($data['languages'], SORT_STRING);
327 |             ksort($data['territories'], SORT_STRING);
328 |             ksort($data['plurals'], SORT_STRING);
329 |             ksort($data['scripts'], SORT_STRING);
330 |             ksort($data['standAloneScripts'], SORT_STRING);
331 |             ksort($data['supersededLanguages'], SORT_STRING);
332 |             self::$data = $data;
333 |         }
334 |         if (!isset(self::$data[$key])) {
335 |             throw new Exception("Invalid CLDR data key: '{$key}'");
336 |         }
337 | 
338 |         return self::$data[$key];
339 |     }
340 | }
341 | 


--------------------------------------------------------------------------------
/src/Language.php:
--------------------------------------------------------------------------------
  1 | <?php
  2 | 
  3 | namespace Gettext\Languages;
  4 | 
  5 | use Exception;
  6 | 
  7 | /**
  8 |  * Main class to convert the plural rules of a language from CLDR to gettext.
  9 |  */
 10 | class Language
 11 | {
 12 |     /**
 13 |      * The language ID.
 14 |      *
 15 |      * @var string
 16 |      */
 17 |     public $id;
 18 | 
 19 |     /**
 20 |      * The language name.
 21 |      *
 22 |      * @var string
 23 |      */
 24 |     public $name;
 25 | 
 26 |     /**
 27 |      * If this language is deprecated: the gettext code of the new language.
 28 |      *
 29 |      * @var string|null
 30 |      */
 31 |     public $supersededBy;
 32 | 
 33 |     /**
 34 |      * The script name.
 35 |      *
 36 |      * @var string|null
 37 |      */
 38 |     public $script;
 39 | 
 40 |     /**
 41 |      * The territory name.
 42 |      *
 43 |      * @var string|null
 44 |      */
 45 |     public $territory;
 46 | 
 47 |     /**
 48 |      * The name of the base language.
 49 |      *
 50 |      * @var string|null
 51 |      */
 52 |     public $baseLanguage;
 53 | 
 54 |     /**
 55 |      * The list of categories.
 56 |      *
 57 |      * @var \Gettext\Languages\Category[]
 58 |      */
 59 |     public $categories;
 60 | 
 61 |     /**
 62 |      * The gettext formula to decide which category should be applied.
 63 |      *
 64 |      * @var string
 65 |      */
 66 |     public $formula;
 67 | 
 68 |     /**
 69 |      * Initialize the instance and parse the language code.
 70 |      *
 71 |      * @param array $info The result of CldrData::getLanguageInfo()
 72 |      *
 73 |      * @throws \Exception throws an Exception if $fullId is not valid
 74 |      */
 75 |     private function __construct($info)
 76 |     {
 77 |         $this->id = $info['id'];
 78 |         $this->name = $info['name'];
 79 |         $this->supersededBy = isset($info['supersededBy']) ? $info['supersededBy'] : null;
 80 |         $this->script = isset($info['script']) ? $info['script'] : null;
 81 |         $this->territory = isset($info['territory']) ? $info['territory'] : null;
 82 |         $this->baseLanguage = isset($info['baseLanguage']) ? $info['baseLanguage'] : null;
 83 |         // Let's build the category list
 84 |         $this->categories = array();
 85 |         foreach ($info['categories'] as $cldrCategoryId => $cldrFormulaAndExamples) {
 86 |             $category = new Category($cldrCategoryId, $cldrFormulaAndExamples);
 87 |             foreach ($this->categories as $c) {
 88 |                 if ($category->id === $c->id) {
 89 |                     throw new Exception("The category '{$category->id}' is specified more than once");
 90 |                 }
 91 |             }
 92 |             $this->categories[] = $category;
 93 |         }
 94 |         if (empty($this->categories)) {
 95 |             throw new Exception("The language '{$info['id']}' does not have any plural category");
 96 |         }
 97 |         // Let's sort the categories from 'zero' to 'other'
 98 |         usort($this->categories, function (Category $category1, Category $category2) {
 99 |             return array_search($category1->id, CldrData::$categories) - array_search($category2->id, CldrData::$categories);
100 |         });
101 |         // The 'other' category should always be there
102 |         if ($this->categories[count($this->categories) - 1]->id !== CldrData::OTHER_CATEGORY) {
103 |             throw new Exception("The language '{$info['id']}' does not have the '" . CldrData::OTHER_CATEGORY . "' plural category");
104 |         }
105 |         $this->checkAlwaysTrueCategories();
106 |         $this->checkAlwaysFalseCategories();
107 |         $this->checkAllCategoriesWithExamples();
108 |         $this->formula = $this->buildFormula();
109 |     }
110 | 
111 |     /**
112 |      * Return a list of all languages available.
113 |      *
114 |      * @throws \Exception
115 |      *
116 |      * @return \Gettext\Languages\Language[]
117 |      */
118 |     public static function getAll()
119 |     {
120 |         $result = array();
121 |         foreach (array_keys(CldrData::getLanguageNames()) as $cldrLanguageId) {
122 |             $result[] = new self(CldrData::getLanguageInfo($cldrLanguageId));
123 |         }
124 | 
125 |         return $result;
126 |     }
127 | 
128 |     /**
129 |      * Return a Language instance given the language id.
130 |      *
131 |      * @param string $id
132 |      *
133 |      * @return \Gettext\Languages\Language|null
134 |      */
135 |     public static function getById($id)
136 |     {
137 |         $result = null;
138 |         $info = CldrData::getLanguageInfo($id);
139 |         if (isset($info)) {
140 |             $result = new self($info);
141 |         }
142 | 
143 |         return $result;
144 |     }
145 | 
146 |     /**
147 |      * Returns a clone of this instance with all the strings to US-ASCII.
148 |      *
149 |      * @return \Gettext\Languages\Language
150 |      */
151 |     public function getUSAsciiClone()
152 |     {
153 |         $clone = clone $this;
154 |         self::asciifier($clone->name);
155 |         self::asciifier($clone->formula);
156 |         $clone->categories = array();
157 |         foreach ($this->categories as $category) {
158 |             $categoryClone = clone $category;
159 |             self::asciifier($categoryClone->examples);
160 |             $clone->categories[] = $categoryClone;
161 |         }
162 | 
163 |         return $clone;
164 |     }
165 | 
166 |     /**
167 |      * Build the formula starting from the currently defined categories.
168 |      *
169 |      * @param bool $withoutParenthesis TRUE to build a formula in standard gettext format, FALSE (default) to build a PHP-compatible formula
170 |      *
171 |      * @return string
172 |      */
173 |     public function buildFormula($withoutParenthesis = false)
174 |     {
175 |         $numCategories = count($this->categories);
176 |         switch ($numCategories) {
177 |             case 1:
178 |                 // Just one category
179 |                 return '0';
180 |             case 2:
181 |                 return self::reduceFormula(self::reverseFormula($this->categories[0]->formula));
182 |             default:
183 |                 $formula = (string) ($numCategories - 1);
184 |                 for ($i = $numCategories - 2; $i >= 0; $i--) {
185 |                     $f = self::reduceFormula($this->categories[$i]->formula);
186 |                     if (!$withoutParenthesis && !preg_match('/^\([^()]+\)$/', $f)) {
187 |                         $f = "({$f})";
188 |                     }
189 |                     $formula = "{$f} ? {$i} : {$formula}";
190 |                     if (!$withoutParenthesis && $i > 0) {
191 |                         $formula = "({$formula})";
192 |                     }
193 |                 }
194 | 
195 |                 return $formula;
196 |         }
197 |     }
198 | 
199 |     /**
200 |      * Let's look for categories that will always occur.
201 |      * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have just one case.
202 |      * If we found that (single) category we reduce the categories to that one only.
203 |      *
204 |      * @throws \Exception
205 |      */
206 |     private function checkAlwaysTrueCategories()
207 |     {
208 |         $alwaysTrueCategory = null;
209 |         foreach ($this->categories as $category) {
210 |             if ($category->formula === true) {
211 |                 if (!isset($category->examples)) {
212 |                     throw new Exception("The category '{$category->id}' should always occur, but it does not have examples (so for CLDR it will never occur for integers!)");
213 |                 }
214 |                 $alwaysTrueCategory = $category;
215 |                 break;
216 |             }
217 |         }
218 |         if (isset($alwaysTrueCategory)) {
219 |             foreach ($this->categories as $category) {
220 |                 if (($category !== $alwaysTrueCategory) && isset($category->examples)) {
221 |                     throw new Exception("The category '{$category->id}' should never occur, but it has some examples (so for CLDR it will occur!)");
222 |                 }
223 |             }
224 |             $alwaysTrueCategory->id = CldrData::OTHER_CATEGORY;
225 |             $alwaysTrueCategory->formula = null;
226 |             $this->categories = array($alwaysTrueCategory);
227 |         }
228 |     }
229 | 
230 |     /**
231 |      * Let's look for categories that will never occur.
232 |      * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have some less cases.
233 |      * If we found those categories we strip them out.
234 |      *
235 |      * @throws \Exception
236 |      */
237 |     private function checkAlwaysFalseCategories()
238 |     {
239 |         $filtered = array();
240 |         foreach ($this->categories as $category) {
241 |             if ($category->formula === false) {
242 |                 if (isset($category->examples)) {
243 |                     throw new Exception("The category '{$category->id}' should never occur, but it has examples (so for CLDR it may occur!)");
244 |                 }
245 |             } else {
246 |                 $filtered[] = $category;
247 |             }
248 |         }
249 |         $this->categories = $filtered;
250 |     }
251 | 
252 |     /**
253 |      * Let's look for categories that don't have examples.
254 |      * This because with decimals (CLDR) we may have more cases, with integers (gettext) we have some less cases.
255 |      * If we found those categories, we check that they never occur and we strip them out.
256 |      *
257 |      * @throws \Exception
258 |      */
259 |     private function checkAllCategoriesWithExamples()
260 |     {
261 |         $allCategoriesIds = array();
262 |         $goodCategories = array();
263 |         $badCategories = array();
264 |         $badCategoriesIds = array();
265 |         foreach ($this->categories as $category) {
266 |             $allCategoriesIds[] = $category->id;
267 |             if (isset($category->examples)) {
268 |                 $goodCategories[] = $category;
269 |             } else {
270 |                 $badCategories[] = $category;
271 |                 $badCategoriesIds[] = $category->id;
272 |             }
273 |         }
274 |         if (empty($badCategories)) {
275 |             return;
276 |         }
277 |         $removeCategoriesWithoutExamples = false;
278 |         switch (implode(',', $badCategoriesIds) . '@' . implode(',', $allCategoriesIds)) {
279 |             case CldrData::OTHER_CATEGORY . '@one,few,many,' . CldrData::OTHER_CATEGORY:
280 |                 switch ($this->buildFormula()) {
281 |                     case '(n % 10 == 1 && n % 100 != 11) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : ((n % 10 == 0 || n % 10 >= 5 && n % 10 <= 9 || n % 100 >= 11 && n % 100 <= 14) ? 2 : 3))':
282 |                         // Numbers ending with 0                 => case 2 ('many')
283 |                         // Numbers ending with 1 but not with 11 => case 0 ('one')
284 |                         // Numbers ending with 11                => case 2 ('many')
285 |                         // Numbers ending with 2 but not with 12 => case 1 ('few')
286 |                         // Numbers ending with 12                => case 2 ('many')
287 |                         // Numbers ending with 3 but not with 13 => case 1 ('few')
288 |                         // Numbers ending with 13                => case 2 ('many')
289 |                         // Numbers ending with 4 but not with 14 => case 1 ('few')
290 |                         // Numbers ending with 14                => case 2 ('many')
291 |                         // Numbers ending with 5                 => case 2 ('many')
292 |                         // Numbers ending with 6                 => case 2 ('many')
293 |                         // Numbers ending with 7                 => case 2 ('many')
294 |                         // Numbers ending with 8                 => case 2 ('many')
295 |                         // Numbers ending with 9                 => case 2 ('many')
296 |                         // => the 'other' case never occurs: use 'other' for 'many'
297 |                         $removeCategoriesWithoutExamples = true;
298 |                         break;
299 |                     case '(n == 1) ? 0 : ((n % 10 >= 2 && n % 10 <= 4 && (n % 100 < 12 || n % 100 > 14)) ? 1 : ((n != 1 && (n % 10 == 0 || n % 10 == 1) || n % 10 >= 5 && n % 10 <= 9 || n % 100 >= 12 && n % 100 <= 14) ? 2 : 3))':
300 |                         // Numbers ending with 0                  => case 2 ('many')
301 |                         // Numbers ending with 1 but not number 1 => case 2 ('many')
302 |                         // Number 1                               => case 0 ('one')
303 |                         // Numbers ending with 2 but not with 12  => case 1 ('few')
304 |                         // Numbers ending with 12                 => case 2 ('many')
305 |                         // Numbers ending with 3 but not with 13  => case 1 ('few')
306 |                         // Numbers ending with 13                 => case 2 ('many')
307 |                         // Numbers ending with 4 but not with 14  => case 1 ('few')
308 |                         // Numbers ending with 14                 => case 2 ('many')
309 |                         // Numbers ending with 5                  => case 2 ('many')
310 |                         // Numbers ending with 6                  => case 2 ('many')
311 |                         // Numbers ending with 7                  => case 2 ('many')
312 |                         // Numbers ending with 8                  => case 2 ('many')
313 |                         // Numbers ending with 9                  => case 2 ('many')
314 |                         // => the 'other' case never occurs: use 'other' for 'many'
315 |                         $removeCategoriesWithoutExamples = true;
316 |                         break;
317 |                 }
318 |         }
319 |         if (!$removeCategoriesWithoutExamples) {
320 |             throw new Exception("Unhandled case of plural categories without examples '" . implode(', ', $badCategoriesIds) . "' out of '" . implode(', ', $allCategoriesIds) . "'");
321 |         }
322 |         if ($badCategories[count($badCategories) - 1]->id === CldrData::OTHER_CATEGORY) {
323 |             // We're removing the 'other' cagory: let's change the last good category to 'other'
324 |             $lastGood = $goodCategories[count($goodCategories) - 1];
325 |             $lastGood->id = CldrData::OTHER_CATEGORY;
326 |             $lastGood->formula = null;
327 |         }
328 |         $this->categories = $goodCategories;
329 |     }
330 | 
331 |     /**
332 |      * Reverse a formula.
333 |      *
334 |      * @param string $formula
335 |      *
336 |      * @throws \Exception
337 |      *
338 |      * @return string
339 |      */
340 |     private static function reverseFormula($formula)
341 |     {
342 |         if (preg_match('/^n( % \d+)? == \d+(\.\.\d+|,\d+)*?$/', $formula)) {
343 |             return str_replace(' == ', ' != ', $formula);
344 |         }
345 |         if (preg_match('/^n( % \d+)? != \d+(\.\.\d+|,\d+)*?$/', $formula)) {
346 |             return str_replace(' != ', ' == ', $formula);
347 |         }
348 |         if (preg_match('/^\(?n == \d+ \|\| n == \d+\)?$/', $formula)) {
349 |             return trim(str_replace(array(' == ', ' || '), array(' != ', ' && '), $formula), '()');
350 |         }
351 |         $m = null;
352 |         if (preg_match('/^(n(?: % \d+)?) == (\d+) && (n(?: % \d+)?) != (\d+)$/', $formula, $m)) {
353 |             return "{$m[1]} != {$m[2]} || {$m[3]} == {$m[4]}";
354 |         }
355 |         switch ($formula) {
356 |             case '(n == 1 || n == 2 || n == 3) || n % 10 != 4 && n % 10 != 6 && n % 10 != 9':
357 |                 return 'n != 1 && n != 2 && n != 3 && (n % 10 == 4 || n % 10 == 6 || n % 10 == 9)';
358 |             case '(n == 0 || n == 1) || n >= 11 && n <= 99':
359 |                 return 'n >= 2 && (n < 11 || n > 99)';
360 |         }
361 |         throw new Exception("Unable to reverse the formula '{$formula}'");
362 |     }
363 | 
364 |     /**
365 |      * Reduce some excessively complex formulas.
366 |      *
367 |      * @param string $formula
368 |      *
369 |      * @return string
370 |      */
371 |     private static function reduceFormula($formula)
372 |     {
373 |         $map = array(
374 |             'n != 0 && n != 1' => 'n > 1',
375 |             '(n == 0 || n == 1) && n != 0' => 'n == 1',
376 |         );
377 | 
378 |         return isset($map[$formula]) ? $map[$formula] : $formula;
379 |     }
380 | 
381 |     /**
382 |      * Take one variable and, if it's a string, we transliterate it to US-ASCII.
383 |      *
384 |      * @param mixed $value the variable to work on
385 |      *
386 |      * @throws \Exception
387 |      */
388 |     private static function asciifier(&$value)
389 |     {
390 |         if (is_string($value) && $value !== '') {
391 |             // Avoid converting from 'Ÿ' to '"Y', let's prefer 'Y'
392 |             $value = strtr($value, array(
393 |                 'À' => 'A', 'Á' => 'A', 'Â' => 'A', 'Ã' => 'A', 'Ä' => 'A', 'Å' => 'A',
394 |                 'È' => 'E', 'É' => 'E', 'Ê' => 'E', 'Ë' => 'E',
395 |                 'Ì' => 'I', 'Í' => 'I', 'Î' => 'I', 'Ï' => 'I',
396 |                 'Ñ' => 'N',
397 |                 'Ò' => 'O', 'Ó' => 'O', 'Ô' => 'O', 'Õ' => 'O', 'Ö' => 'O',
398 |                 'Ù' => 'U', 'Ú' => 'U', 'Û' => 'U', 'Ü' => 'U',
399 |                 'Ÿ' => 'Y', 'Ý' => 'Y',
400 |                 'à' => 'a', 'á' => 'a', 'â' => 'a', 'ã' => 'a', 'ä' => 'a', 'å' => 'a',
401 |                 'è' => 'e', 'é' => 'e', 'ê' => 'e', 'ë' => 'e',
402 |                 'ì' => 'i', 'í' => 'i', 'î' => 'i', 'ï' => 'i',
403 |                 'ñ' => 'n', 'ò' => 'o', 'ó' => 'o', 'ô' => 'o', 'õ' => 'o', 'ö' => 'o',
404 |                 'ù' => 'u', 'ú' => 'u', 'û' => 'u', 'ü' => 'u',
405 |                 'ý' => 'y', 'ÿ' => 'y',
406 |                 '…' => '...',
407 |                 'ʼ' => "'", '’' => "'",
408 |             ));
409 |         }
410 |     }
411 | }
412 | 


--------------------------------------------------------------------------------
/bin/import-cldr-data:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env php
  2 | <?php
  3 | 
  4 | set_error_handler(function ($errno, $errstr, $errfile = null, $errline = null) {
  5 |     $message = $errstr ? $errstr : "Error {$errno}";
  6 |     if ($errfile) {
  7 |         $message .= "\nFile: {$errfile}";
  8 |         if ($errline) {
  9 |             $message .= "\nLine: {$errline}";
 10 |         }
 11 |     }
 12 |     throw new RuntimeException($message);
 13 | });
 14 | 
 15 | error_reporting(-1);
 16 | 
 17 | /**
 18 |  * @param string[] $argv
 19 |  *
 20 |  * @throws RuntimeException
 21 |  *
 22 |  * @return void
 23 |  */
 24 | function main(array $argv)
 25 | {
 26 |     $options = new Options($argv);
 27 |     if (!is_dir($options->outputDir) && !mkdir($options->outputDir, 0777, true)) {
 28 |         throw new RuntimeException("Cannot create output directory: {$options->outputDir}\n");
 29 |     }
 30 |     $options->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', realpath($options->outputDir));
 31 |     $documentStorage = new DocumentStorage($options);
 32 |     echo 'Processing languages... ';
 33 |     $languages = new Languages($options, $documentStorage);
 34 |     echo "done.\n";
 35 |     echo 'Processing scripts... ';
 36 |     $scripts = new Scripts($options, $documentStorage);
 37 |     echo "done.\n";
 38 |     echo 'Processing territories... ';
 39 |     $territories = new Territories($options, $documentStorage);
 40 |     echo "done.\n";
 41 |     echo 'Processing plural rules... ';
 42 |     $plurals = new Plurals($options, $documentStorage, $languages);
 43 |     echo "done.\n";
 44 |     echo 'Saving... ';
 45 | 
 46 |     $languages->save();
 47 |     $scripts->save();
 48 |     $territories->save();
 49 |     $plurals->save();
 50 | 
 51 |     echo "done.\n";
 52 | }
 53 | 
 54 | class Options
 55 | {
 56 |     /**
 57 |      * @var string
 58 |      */
 59 |     public $cldrVersion;
 60 | 
 61 |     /**
 62 |      * @var string
 63 |      */
 64 |     public $outputDir;
 65 | 
 66 |     public function __construct(array $argv)
 67 |     {
 68 |         if (array_intersect($argv, array('-h', '--help'))) {
 69 |             $this->showSyntax($argv[0], 0);
 70 |         }
 71 |         $this->outputDir = $this->getDefaultOutputDir();
 72 |         switch (count($argv)) {
 73 |             case 3:
 74 |                 $this->outputDir = str_replace(DIRECTORY_SEPARATOR, '/', $argv[2]);
 75 |                 // no break
 76 |             case 2:
 77 |                 $this->cldrVersion = $argv[1];
 78 |                 if (!preg_match('/^\d+(\.\d+)?(-(alpha|beta)\d+)?$/', $this->cldrVersion)) {
 79 |                     throw new RuntimeException("{$this->cldrVersion} is not a valid CLDR version identifier");
 80 |                 }
 81 |                 break;
 82 |             default:
 83 |                 $this->showSyntax($argv[0], 1);
 84 |         }
 85 |     }
 86 | 
 87 |     /**
 88 |      * @param string $programName
 89 |      * @param int $exitCode
 90 |      *
 91 |      * @return never
 92 |      */
 93 |     private function showSyntax($programName, $exitCode)
 94 |     {
 95 |         $programName = str_replace('/', DIRECTORY_SEPARATOR, $programName);
 96 |         $defaultOutputDir = str_replace('/', DIRECTORY_SEPARATOR, $this->getDefaultOutputDir());
 97 | 
 98 |         echo <<<EOT
 99 | Syntax: {$programName} <cldr-version> [output-dir]
100 | 
101 | Arguments:
102 |   cldr-version: the version of the CLDR data.
103 |     Examples:
104 |       47
105 |       47-beta2
106 |       47-alpha1
107 |       46.1
108 |       46.1-beta1
109 |   output-dir: the directory where the data will be written to
110 |     Default: {$defaultOutputDir}
111 | 
112 | EOT;
113 |         exit($exitCode);
114 |     }
115 | 
116 |     /**
117 |      * @return string
118 |      */
119 |     private function getDefaultOutputDir()
120 |     {
121 |         return str_replace(DIRECTORY_SEPARATOR, '/', dirname(__DIR__)) . '/src/cldr-data';
122 |     }
123 | }
124 | 
125 | class DocumentStorage
126 | {
127 |     /**
128 |      * @var string
129 |      */
130 |     private $baseUrl;
131 | 
132 |     private $context;
133 | 
134 |     private $cache;
135 | 
136 |     public function __construct(Options $options)
137 |     {
138 |         $this->baseUrl = 'https://raw.githubusercontent.com/unicode-org/cldr/refs/tags/release-' . str_replace('.', '-', $options->cldrVersion);
139 |         $this->context = stream_context_create(array(
140 |             'http' => array(
141 |                 'follow_location' => 1,
142 |                 'ignore_errors' => false,
143 |             ),
144 |         ));
145 |         $this->cache = array();
146 |     }
147 | 
148 |     /**
149 |      * @param string $path
150 |      *
151 |      * @throws RuntimeException
152 |      *
153 |      * @return DOMDocument
154 |      */
155 |     public function get($path)
156 |     {
157 |         if (!isset($this->cache[$path])) {
158 |             $xml = $this->fetch($path);
159 |             $doc = $this->loadXml($xml);
160 |             $this->cache[$path] = $doc;
161 |         }
162 | 
163 |         return $this->cache[$path];
164 |     }
165 | 
166 |     /**
167 |      * @param string $path
168 |      *
169 |      * @throws RuntimeException
170 |      *
171 |      * @return string
172 |      */
173 |     private function fetch($path)
174 |     {
175 |         $url = $this->baseUrl . '/' . ltrim($path, '/');
176 |         set_error_handler(function () {}, -1);
177 |         $content = file_get_contents($url, false, $this->context);
178 |         restore_error_handler();
179 |         if ($content === false) {
180 |             $details = '';
181 |             /** @var array $http_response_header */
182 |             if (!empty($http_response_header)) {
183 |                 $details = " - {$http_response_header[0]}";
184 |             }
185 |             throw new RuntimeException("Failed to download from {$url}{$details}");
186 |         }
187 | 
188 |         return $content;
189 |     }
190 | 
191 |     /**
192 |      * @param string $xml
193 |      *
194 |      * @throws RuntimeException
195 |      *
196 |      * @return DOMDocument
197 |      */
198 |     private function loadXml($xml)
199 |     {
200 |         $doc = new DOMDocument();
201 |         libxml_clear_errors();
202 |         $restore = libxml_use_internal_errors(true);
203 |         $loaded = $doc->loadXML($xml);
204 |         $errors = libxml_get_errors();
205 |         libxml_use_internal_errors($restore);
206 |         $lines = array();
207 |         foreach ($errors as $error) {
208 |             $lines[] = "{$error->message} at line {$error->line}";
209 |         }
210 |         if (!$loaded || $errors !== array()) {
211 |             throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines));
212 |         }
213 | 
214 |         return $doc;
215 |     }
216 | }
217 | 
218 | abstract class Processor
219 | {
220 |     /**
221 |      * @var Options
222 |      */
223 |     protected $options;
224 | 
225 |     /**
226 |      * @var array
227 |      */
228 |     protected $data;
229 | 
230 |     /**
231 |      * @var DocumentStorage
232 |      */
233 |     private $documentStorage;
234 | 
235 |     /**
236 |      * @var string
237 |      */
238 |     private $path;
239 | 
240 |     protected function __construct(Options $options, DocumentStorage $documentStorage, $path)
241 |     {
242 |         $this->options = $options;
243 |         $this->documentStorage = $documentStorage;
244 |         $this->path = ltrim($path, '/');
245 |         $doc = $this->documentStorage->get($this->path);
246 |         $this->data = $this->parse($doc);
247 |     }
248 | 
249 |     /**
250 |      * @return void
251 |      */
252 |     public function save()
253 |     {
254 |         $file = $this->getOutputFile();
255 |         $dir = dirname($file);
256 |         if (!is_dir($dir) && !mkdir($dir, 0777, true)) {
257 |             throw new RuntimeException("Cannot create directory: {$dir}");
258 |         }
259 |         $flags = 0;
260 |         if (defined('JSON_UNESCAPED_SLASHES')) {
261 |             $flags |= JSON_UNESCAPED_SLASHES;
262 |         }
263 |         if (defined('JSON_UNESCAPED_UNICODE')) {
264 |             $flags |= JSON_UNESCAPED_UNICODE;
265 |         }
266 |         if (defined('JSON_PRETTY_PRINT')) {
267 |             $flags |= JSON_PRETTY_PRINT;
268 |         }
269 |         if (defined('JSON_THROW_ON_ERROR')) {
270 |             $flags |= JSON_THROW_ON_ERROR;
271 |         }
272 |         $json = json_encode($this->data, $flags);
273 |         if (!file_put_contents($file, $json)) {
274 |             throw new RuntimeException("Failed to write to file: {$file}");
275 |         }
276 |     }
277 | 
278 |     /**
279 |      * @return array
280 |      */
281 |     abstract protected function parse(DOMDocument $doc);
282 | 
283 |     /**
284 |      * @return void
285 |      */
286 |     protected function sortByKeyWithPossiblyAlt(array &$data)
287 |     {
288 |         uksort($data, function ($a, $b) {
289 |             $aAlt = strpos($a, '-alt-') !== false;
290 |             $bAlt = strpos($b, '-alt-') !== false;
291 |             if ($aAlt !== $bAlt) {
292 |                 if (strpos("{$a}-alt-", $b) === 0) {
293 |                     return 0;
294 |                 }
295 |                 if (strpos($a, "{$b}-alt-") === 0) {
296 |                     return -1;
297 |                 }
298 |             }
299 | 
300 |             return strcasecmp($a, $b);
301 |         });
302 |     }
303 | 
304 |     /**
305 |      * @return string
306 |      */
307 |     abstract protected function getOutputRelativeFileName();
308 | 
309 |     /**
310 |      * @param string $xml
311 |      *
312 |      * @return DOMDocument
313 |      */
314 |     private function loadXml($xml)
315 |     {
316 |         $doc = new DOMDocument();
317 |         libxml_clear_errors();
318 |         $restore = libxml_use_internal_errors(true);
319 |         $loaded = $doc->loadXML($xml);
320 |         $errors = libxml_get_errors();
321 |         libxml_use_internal_errors($restore);
322 |         $lines = array();
323 |         foreach ($errors as $error) {
324 |             $lines[] = "{$error->message} at line {$error->line}";
325 |         }
326 |         if (!$loaded || $errors !== array()) {
327 |             throw new RuntimeException("Failed to parse XML:\n" . implode("\n", $lines));
328 |         }
329 |         return $doc;
330 |     }
331 | 
332 |     /**
333 |      * @return string
334 |      */
335 |     private function getOutputFile()
336 |     {
337 |         return $this->options->outputDir . '/' . ltrim($this->getOutputRelativeFileName(), '/');
338 |     }
339 | }
340 | 
341 | class Plurals extends Processor
342 | {
343 |     /**
344 |      * @var Languages
345 |      */
346 |     private $languages;
347 | 
348 |     public function __construct(Options $options, DocumentStorage $documentStorage, Languages $languages)
349 |     {
350 |         $this->languages = $languages;
351 |         parent::__construct($options, $documentStorage, 'common/supplemental/plurals.xml');
352 |     }
353 | 
354 |     /**
355 |      * {@inheritdoc}
356 |      *
357 |      * @see Processor::parse()
358 |      */
359 |     protected function parse(DOMDocument $doc)
360 |     {
361 |         $data = array();
362 |         $xpath = new DOMXPath($doc);
363 |         $xPluralRulesList = $xpath->query('/supplementalData/plurals[@type="cardinal"]/pluralRules');
364 |         $definedLanguageIDs = $this->languages->getDefinedLanguageIDs();
365 |         $knownMissingLanguages = array(
366 |             'guw', // Gun
367 |             'lld', //  Dolomitic Ladin
368 |             'hnj', // Hmong Njua
369 |             'nah', // Nahuatl
370 |             'smi', // Sami
371 |         );
372 |         $replacements = array(
373 |             'in' => 'id', // Former Indonesian
374 |             'iw' => 'he', // Former Hebrew
375 |             'jw' => 'jv', // Former Javanese
376 |             'ji' => 'yi', // Former Yiddish
377 |             'mo' => 'ro-MD', // former Moldavian
378 |             'bh' => '', // Former Bihari: dismissed because it can be 'bho', 'mai' or 'mag'
379 |             // Just a CLDR placeholder
380 |             'root' => '',
381 |         );
382 |         $unrecognizedLocaleCodes = array();
383 |         foreach ($xPluralRulesList as $xPluralRules) {
384 |             $locales = preg_split('/\s+/', (string) $xPluralRules->getAttribute('locales'), -1, PREG_SPLIT_NO_EMPTY);
385 |             if ($locales === array()) {
386 |                 throw new RuntimeException('No locales found in pluralRules element');
387 |             }
388 |             $elements = array(
389 |                 'pluralRule-count-zero' => null,
390 |                 'pluralRule-count-one' => null,
391 |                 'pluralRule-count-two' => null,
392 |                 'pluralRule-count-few' => null,
393 |                 'pluralRule-count-many' => null,
394 |                 'pluralRule-count-other' => null,
395 |             );
396 |             foreach ($xPluralRules->childNodes as $xPluralRule) {
397 |                 if (!$xPluralRule instanceof DOMElement) {
398 |                     continue;
399 |                 }
400 |                 if ($xPluralRule->tagName !== 'pluralRule') {
401 |                     throw new RuntimeException("Unexpected element: {$xPluralRule->tagName}");
402 |                 }
403 |                 $count = (string) $xPluralRule->getAttribute('count');
404 |                 if ($count === '') {
405 |                     throw new RuntimeException('Missing count attribute');
406 |                 }
407 |                 $key = "pluralRule-count-{$count}";
408 |                 if (!array_key_exists($key, $elements)) {
409 |                     throw new RuntimeException("Unknown count: {$count}");
410 |                 }
411 |                 if ($elements[$key] !== null) {
412 |                     throw new RuntimeException("Duplicate count: {$count}");
413 |                 }
414 |                 $elements[$key] = $xPluralRule->textContent;
415 |             }
416 |             $elements = array_filter($elements, function ($value) {
417 |                 return $value !== null;
418 |             });
419 |             if ($elements === array()) {
420 |                 throw new RuntimeException('No plural rules found');
421 |             }
422 |             foreach ($locales as $locale) {
423 |                 $locale = str_replace('_', '-', $locale);
424 |                 $overwrite = true;
425 |                 if (isset($data[$locale]) && array_search($locale, $replacements, true) === false) {
426 |                     throw new RuntimeException("Duplicate locale: {$locale}");
427 |                 }
428 |                 if (!in_array($locale, $definedLanguageIDs, true) && !in_array($locale, $knownMissingLanguages, true)) {
429 |                     if (!isset($replacements[$locale])) {
430 |                         $unrecognizedLocaleCodes[] = $locale;
431 |                         continue;
432 |                     }
433 |                     $locale = $replacements[$locale];
434 |                     if ($locale === '') {
435 |                         continue;
436 |                     }
437 |                     $overwrite = false;
438 |                 }
439 |                 if ($overwrite || !isset($data[$locale])) {
440 |                     $data[$locale] = $elements;
441 |                 }
442 |             }
443 |         }
444 |         if ($unrecognizedLocaleCodes !== array()) {
445 |             throw new RuntimeException("The following locales are not defined:\n- " . implode("\n- ", $unrecognizedLocaleCodes));
446 |         }
447 |         if ($data === array()) {
448 |             throw new RuntimeException('No plural rules found');
449 |         }
450 |         $this->sortByKeyWithPossiblyAlt($data);
451 | 
452 |         return array(
453 |             'supplemental' => array(
454 |                 'version' => array(
455 |                     '_cldrVersion' => $this->options->cldrVersion,
456 |                 ),
457 |                 'plurals-type-cardinal' => $data,
458 |             ),
459 |         );
460 |     }
461 | 
462 |     /**
463 |      * {@inheritdoc}
464 |      *
465 |      * @see Processor::getOutputRelativeFileName()
466 |      */
467 |     protected function getOutputRelativeFileName()
468 |     {
469 |         return 'supplemental/plurals.json';
470 |     }
471 | }
472 | 
473 | abstract class LocaleDisplayName extends Processor
474 | {
475 |     public function __construct(Options $options, DocumentStorage $documentStorage)
476 |     {
477 |         parent::__construct($options, $documentStorage, 'common/main/en.xml');
478 |     }
479 | 
480 |     /**
481 |      * {@inheritdoc}
482 |      *
483 |      * @see Processor::parse()
484 |      */
485 |     protected function parse(DOMDocument $doc)
486 |     {
487 |         $data = array();
488 |         $xpath = new DOMXPath($doc);
489 |         $xElementList = $xpath->query($this->getXPathSelector());
490 |         foreach ($xElementList as $xElement) {
491 |             $type = (string) $xElement->getAttribute('type');
492 |             if ($type === '') {
493 |                 throw new RuntimeException('Missing type attribute');
494 |             }
495 |             $key = str_replace('_', '-', $type);
496 |             $alt = (string) $xElement->getAttribute('alt');
497 |             if ($alt !== '') {
498 |                 $key = "{$key}-alt-{$alt}";
499 |             }
500 |             if (isset($data[$key])) {
501 |                 throw new RuntimeException("Duplicate key: {$key}");
502 |             }
503 |             $data[$key] = (string) $xElement->textContent;
504 |         }
505 |         if ($data === array()) {
506 |             throw new RuntimeException('No elements found');
507 |         }
508 |         $this->sortByKeyWithPossiblyAlt($data);
509 | 
510 |         return array(
511 |             'main' => array(
512 |                 'en-US' => array(
513 |                     'identity' => array(
514 |                         'version' => array(
515 |                             '_cldrVersion' => $this->options->cldrVersion,
516 |                         ),
517 |                         'language' => 'en',
518 |                         'territory' => 'US',
519 |                     ),
520 |                     'localeDisplayNames' => array(
521 |                         $this->getExportedNodeName() => $data,
522 |                     ),
523 |                 ),
524 |             ),
525 |         );
526 |     }
527 | 
528 |     /**
529 |      * @return string
530 |      */
531 |     abstract protected function getXPathSelector();
532 | 
533 |     /**
534 |      * @return string
535 |      */
536 |     abstract protected function getExportedNodeName();
537 | }
538 | 
539 | class Languages extends LocaleDisplayName
540 | {
541 |     /**
542 |      * @return string[]
543 |      */
544 |     public function getDefinedLanguageIDs()
545 |     {
546 |         return array_values(array_filter(
547 |             array_keys($this->data['main']['en-US']['localeDisplayNames'][$this->getExportedNodeName()]),
548 |             function ($key) {
549 |                 return strpos((string) $key, '-alt-') === false;
550 |             }
551 |         ));
552 |     }
553 | 
554 |     /**
555 |      * {@inheritdoc}
556 |      *
557 |      * @see LocaleDisplayName::getXPathSelector()
558 |      */
559 |     protected function getXPathSelector()
560 |     {
561 |         return '/ldml/localeDisplayNames/languages/language';
562 |     }
563 | 
564 |     /**
565 |      * {@inheritdoc}
566 |      *
567 |      * @see LocaleDisplayName::getExportedNodeName()
568 |      */
569 |     protected function getExportedNodeName()
570 |     {
571 |         return 'languages';
572 |     }
573 | 
574 |     /**
575 |      * {@inheritdoc}
576 |      *
577 |      * @see Processor::getOutputRelativeFileName()
578 |      */
579 |     protected function getOutputRelativeFileName()
580 |     {
581 |         return 'main/en-US/languages.json';
582 |     }
583 | }
584 | 
585 | class Scripts extends LocaleDisplayName
586 | {
587 |     /**
588 |      * {@inheritdoc}
589 |      *
590 |      * @see LocaleDisplayName::getXPathSelector()
591 |      */
592 |     protected function getXPathSelector()
593 |     {
594 |         return '/ldml/localeDisplayNames/scripts/script';
595 |     }
596 | 
597 |     /**
598 |      * {@inheritdoc}
599 |      *
600 |      * @see LocaleDisplayName::getExportedNodeName()
601 |      */
602 |     protected function getExportedNodeName()
603 |     {
604 |         return 'scripts';
605 |     }
606 | 
607 |     /**
608 |      * {@inheritdoc}
609 |      *
610 |      * @see Processor::getOutputRelativeFileName()
611 |      */
612 |     protected function getOutputRelativeFileName()
613 |     {
614 |         return 'main/en-US/scripts.json';
615 |     }
616 | }
617 | 
618 | class Territories extends LocaleDisplayName
619 | {
620 |     /**
621 |      * {@inheritdoc}
622 |      *
623 |      * @see LocaleDisplayName::getXPathSelector()
624 |      */
625 |     protected function getXPathSelector()
626 |     {
627 |         return '/ldml/localeDisplayNames/territories/territory';
628 |     }
629 | 
630 |     /**
631 |      * {@inheritdoc}
632 |      *
633 |      * @see LocaleDisplayName::getExportedNodeName()
634 |      */
635 |     protected function getExportedNodeName()
636 |     {
637 |         return 'territories';
638 |     }
639 | 
640 |     /**
641 |      * {@inheritdoc}
642 |      *
643 |      * @see Processor::getOutputRelativeFileName()
644 |      */
645 |     protected function getOutputRelativeFileName()
646 |     {
647 |         return 'main/en-US/territories.json';
648 |     }
649 | }
650 | 
651 | try {
652 |     main($argv);
653 | } catch (RuntimeException $e) {
654 |     fwrite(STDERR, $e->getMessage() . "\n");
655 |     exit(1);
656 | }
657 | 


--------------------------------------------------------------------------------
/src/cldr-data/main/en-US/languages.json:
--------------------------------------------------------------------------------
  1 | {
  2 |     "main": {
  3 |         "en-US": {
  4 |             "identity": {
  5 |                 "version": {
  6 |                     "_cldrVersion": "47"
  7 |                 },
  8 |                 "language": "en",
  9 |                 "territory": "US"
 10 |             },
 11 |             "localeDisplayNames": {
 12 |                 "languages": {
 13 |                     "aa": "Afar",
 14 |                     "ab": "Abkhazian",
 15 |                     "ace": "Acehnese",
 16 |                     "ach": "Acoli",
 17 |                     "ada": "Adangme",
 18 |                     "ady": "Adyghe",
 19 |                     "ae": "Avestan",
 20 |                     "aeb": "Tunisian Arabic",
 21 |                     "af": "Afrikaans",
 22 |                     "afh": "Afrihili",
 23 |                     "agq": "Aghem",
 24 |                     "ain": "Ainu",
 25 |                     "ak": "Akan",
 26 |                     "akk": "Akkadian",
 27 |                     "akz": "Alabama",
 28 |                     "ale": "Aleut",
 29 |                     "aln": "Gheg Albanian",
 30 |                     "alt": "Southern Altai",
 31 |                     "am": "Amharic",
 32 |                     "an": "Aragonese",
 33 |                     "ang": "Old English",
 34 |                     "ann": "Obolo",
 35 |                     "anp": "Angika",
 36 |                     "ar": "Arabic",
 37 |                     "ar-001": "Modern Standard Arabic",
 38 |                     "arc": "Aramaic",
 39 |                     "arn": "Mapuche",
 40 |                     "aro": "Araona",
 41 |                     "arp": "Arapaho",
 42 |                     "arq": "Algerian Arabic",
 43 |                     "ars": "Najdi Arabic",
 44 |                     "ars-alt-menu": "Arabic, Najdi",
 45 |                     "arw": "Arawak",
 46 |                     "ary": "Moroccan Arabic",
 47 |                     "arz": "Egyptian Arabic",
 48 |                     "as": "Assamese",
 49 |                     "asa": "Asu",
 50 |                     "ase": "American Sign Language",
 51 |                     "ast": "Asturian",
 52 |                     "atj": "Atikamekw",
 53 |                     "av": "Avaric",
 54 |                     "avk": "Kotava",
 55 |                     "awa": "Awadhi",
 56 |                     "ay": "Aymara",
 57 |                     "az": "Azerbaijani",
 58 |                     "az-alt-short": "Azeri",
 59 |                     "ba": "Bashkir",
 60 |                     "bal": "Baluchi",
 61 |                     "ban": "Balinese",
 62 |                     "bar": "Bavarian",
 63 |                     "bas": "Basaa",
 64 |                     "bax": "Bamun",
 65 |                     "bbc": "Batak Toba",
 66 |                     "bbj": "Ghomala",
 67 |                     "be": "Belarusian",
 68 |                     "bej": "Beja",
 69 |                     "bem": "Bemba",
 70 |                     "bew": "Betawi",
 71 |                     "bez": "Bena",
 72 |                     "bfd": "Bafut",
 73 |                     "bfq": "Badaga",
 74 |                     "bg": "Bulgarian",
 75 |                     "bgc": "Haryanvi",
 76 |                     "bgn": "Western Balochi",
 77 |                     "bho": "Bhojpuri",
 78 |                     "bi": "Bislama",
 79 |                     "bik": "Bikol",
 80 |                     "bin": "Bini",
 81 |                     "bjn": "Banjar",
 82 |                     "bkm": "Kom",
 83 |                     "bla": "Siksiká",
 84 |                     "blo": "Anii",
 85 |                     "blt": "Tai Dam",
 86 |                     "bm": "Bambara",
 87 |                     "bn": "Bangla",
 88 |                     "bo": "Tibetan",
 89 |                     "bpy": "Bishnupriya",
 90 |                     "bqi": "Bakhtiari",
 91 |                     "br": "Breton",
 92 |                     "bra": "Braj",
 93 |                     "brh": "Brahui",
 94 |                     "brx": "Bodo",
 95 |                     "bs": "Bosnian",
 96 |                     "bss": "Akoose",
 97 |                     "bua": "Buriat",
 98 |                     "bug": "Buginese",
 99 |                     "bum": "Bulu",
100 |                     "byn": "Blin",
101 |                     "byv": "Medumba",
102 |                     "ca": "Catalan",
103 |                     "cad": "Caddo",
104 |                     "car": "Carib",
105 |                     "cay": "Cayuga",
106 |                     "cch": "Atsam",
107 |                     "ccp": "Chakma",
108 |                     "ce": "Chechen",
109 |                     "ceb": "Cebuano",
110 |                     "cgg": "Chiga",
111 |                     "ch": "Chamorro",
112 |                     "chb": "Chibcha",
113 |                     "chg": "Chagatai",
114 |                     "chk": "Chuukese",
115 |                     "chm": "Mari",
116 |                     "chn": "Chinook Jargon",
117 |                     "cho": "Choctaw",
118 |                     "chp": "Chipewyan",
119 |                     "chr": "Cherokee",
120 |                     "chy": "Cheyenne",
121 |                     "cic": "Chickasaw",
122 |                     "ckb": "Central Kurdish",
123 |                     "ckb-alt-menu": "Kurdish, Central",
124 |                     "ckb-alt-variant": "Kurdish, Sorani",
125 |                     "clc": "Chilcotin",
126 |                     "co": "Corsican",
127 |                     "cop": "Coptic",
128 |                     "cps": "Capiznon",
129 |                     "cr": "Cree",
130 |                     "cr-alt-long": "Woods Cree",
131 |                     "crg": "Michif",
132 |                     "crh": "Crimean Tatar",
133 |                     "crj": "Southern East Cree",
134 |                     "crk": "Plains Cree",
135 |                     "crl": "Northern East Cree",
136 |                     "crm": "Moose Cree",
137 |                     "crr": "Carolina Algonquian",
138 |                     "crs": "Seselwa Creole French",
139 |                     "cs": "Czech",
140 |                     "csb": "Kashubian",
141 |                     "csw": "Swampy Cree",
142 |                     "cu": "Church Slavic",
143 |                     "cv": "Chuvash",
144 |                     "cy": "Welsh",
145 |                     "da": "Danish",
146 |                     "dak": "Dakota",
147 |                     "dar": "Dargwa",
148 |                     "dav": "Taita",
149 |                     "de": "German",
150 |                     "de-AT": "Austrian German",
151 |                     "de-CH": "Swiss High German",
152 |                     "del": "Delaware",
153 |                     "den": "Slave",
154 |                     "dgr": "Dogrib",
155 |                     "din": "Dinka",
156 |                     "dje": "Zarma",
157 |                     "doi": "Dogri",
158 |                     "dsb": "Lower Sorbian",
159 |                     "dtp": "Central Dusun",
160 |                     "dua": "Duala",
161 |                     "dum": "Middle Dutch",
162 |                     "dv": "Divehi",
163 |                     "dyo": "Jola-Fonyi",
164 |                     "dyu": "Dyula",
165 |                     "dz": "Dzongkha",
166 |                     "dzg": "Dazaga",
167 |                     "ebu": "Embu",
168 |                     "ee": "Ewe",
169 |                     "efi": "Efik",
170 |                     "egl": "Emilian",
171 |                     "egy": "Ancient Egyptian",
172 |                     "eka": "Ekajuk",
173 |                     "el": "Greek",
174 |                     "elx": "Elamite",
175 |                     "en": "English",
176 |                     "en-AU": "Australian English",
177 |                     "en-CA": "Canadian English",
178 |                     "en-GB": "British English",
179 |                     "en-GB-alt-short": "UK English",
180 |                     "en-US": "American English",
181 |                     "en-US-alt-short": "US English",
182 |                     "enm": "Middle English",
183 |                     "eo": "Esperanto",
184 |                     "es": "Spanish",
185 |                     "es-419": "Latin American Spanish",
186 |                     "es-ES": "European Spanish",
187 |                     "es-MX": "Mexican Spanish",
188 |                     "esu": "Central Yupik",
189 |                     "et": "Estonian",
190 |                     "eu": "Basque",
191 |                     "ewo": "Ewondo",
192 |                     "ext": "Extremaduran",
193 |                     "fa": "Persian",
194 |                     "fa-AF": "Dari",
195 |                     "fan": "Fang",
196 |                     "fat": "Fanti",
197 |                     "ff": "Fula",
198 |                     "fi": "Finnish",
199 |                     "fil": "Filipino",
200 |                     "fit": "Tornedalen Finnish",
201 |                     "fj": "Fijian",
202 |                     "fo": "Faroese",
203 |                     "fon": "Fon",
204 |                     "fr": "French",
205 |                     "fr-CA": "Canadian French",
206 |                     "fr-CH": "Swiss French",
207 |                     "frc": "Cajun French",
208 |                     "frm": "Middle French",
209 |                     "fro": "Old French",
210 |                     "frp": "Arpitan",
211 |                     "frr": "Northern Frisian",
212 |                     "frs": "Eastern Frisian",
213 |                     "fur": "Friulian",
214 |                     "fy": "Western Frisian",
215 |                     "ga": "Irish",
216 |                     "gaa": "Ga",
217 |                     "gag": "Gagauz",
218 |                     "gan": "Gan Chinese",
219 |                     "gay": "Gayo",
220 |                     "gba": "Gbaya",
221 |                     "gbz": "Zoroastrian Dari",
222 |                     "gd": "Scottish Gaelic",
223 |                     "gez": "Geez",
224 |                     "gil": "Gilbertese",
225 |                     "gl": "Galician",
226 |                     "glk": "Gilaki",
227 |                     "gmh": "Middle High German",
228 |                     "gn": "Guarani",
229 |                     "goh": "Old High German",
230 |                     "gon": "Gondi",
231 |                     "gor": "Gorontalo",
232 |                     "got": "Gothic",
233 |                     "grb": "Grebo",
234 |                     "grc": "Ancient Greek",
235 |                     "gsw": "Swiss German",
236 |                     "gu": "Gujarati",
237 |                     "guc": "Wayuu",
238 |                     "gur": "Frafra",
239 |                     "guz": "Gusii",
240 |                     "gv": "Manx",
241 |                     "gwi": "Gwichʼin",
242 |                     "ha": "Hausa",
243 |                     "hai": "Haida",
244 |                     "hak": "Hakka Chinese",
245 |                     "haw": "Hawaiian",
246 |                     "hax": "Southern Haida",
247 |                     "he": "Hebrew",
248 |                     "hi": "Hindi",
249 |                     "hi-Latn": "Hindi (Latin)",
250 |                     "hi-Latn-alt-variant": "Hinglish",
251 |                     "hif": "Fiji Hindi",
252 |                     "hil": "Hiligaynon",
253 |                     "hit": "Hittite",
254 |                     "hmn": "Hmong",
255 |                     "hnj": "Hmong Njua",
256 |                     "ho": "Hiri Motu",
257 |                     "hr": "Croatian",
258 |                     "hsb": "Upper Sorbian",
259 |                     "hsn": "Xiang Chinese",
260 |                     "ht": "Haitian Creole",
261 |                     "hu": "Hungarian",
262 |                     "hup": "Hupa",
263 |                     "hur": "Halkomelem",
264 |                     "hy": "Armenian",
265 |                     "hz": "Herero",
266 |                     "ia": "Interlingua",
267 |                     "iba": "Iban",
268 |                     "ibb": "Ibibio",
269 |                     "id": "Indonesian",
270 |                     "ie": "Interlingue",
271 |                     "ig": "Igbo",
272 |                     "ii": "Sichuan Yi",
273 |                     "ik": "Inupiaq",
274 |                     "ikt": "Western Canadian Inuktitut",
275 |                     "ilo": "Iloko",
276 |                     "inh": "Ingush",
277 |                     "io": "Ido",
278 |                     "is": "Icelandic",
279 |                     "it": "Italian",
280 |                     "iu": "Inuktitut",
281 |                     "izh": "Ingrian",
282 |                     "ja": "Japanese",
283 |                     "jam": "Jamaican Creole English",
284 |                     "jbo": "Lojban",
285 |                     "jgo": "Ngomba",
286 |                     "jmc": "Machame",
287 |                     "jpr": "Judeo-Persian",
288 |                     "jrb": "Judeo-Arabic",
289 |                     "jut": "Jutish",
290 |                     "jv": "Javanese",
291 |                     "ka": "Georgian",
292 |                     "kaa": "Kara-Kalpak",
293 |                     "kab": "Kabyle",
294 |                     "kac": "Kachin",
295 |                     "kaj": "Jju",
296 |                     "kam": "Kamba",
297 |                     "kaw": "Kawi",
298 |                     "kbd": "Kabardian",
299 |                     "kbl": "Kanembu",
300 |                     "kcg": "Tyap",
301 |                     "kde": "Makonde",
302 |                     "kea": "Kabuverdianu",
303 |                     "ken": "Kenyang",
304 |                     "kfo": "Koro",
305 |                     "kg": "Kongo",
306 |                     "kgp": "Kaingang",
307 |                     "kha": "Khasi",
308 |                     "kho": "Khotanese",
309 |                     "khq": "Koyra Chiini",
310 |                     "khw": "Khowar",
311 |                     "ki": "Kikuyu",
312 |                     "kiu": "Kirmanjki",
313 |                     "kj": "Kuanyama",
314 |                     "kk": "Kazakh",
315 |                     "kkj": "Kako",
316 |                     "kl": "Kalaallisut",
317 |                     "kln": "Kalenjin",
318 |                     "km": "Khmer",
319 |                     "kmb": "Kimbundu",
320 |                     "kn": "Kannada",
321 |                     "ko": "Korean",
322 |                     "koi": "Komi-Permyak",
323 |                     "kok": "Konkani",
324 |                     "kos": "Kosraean",
325 |                     "kpe": "Kpelle",
326 |                     "kr": "Kanuri",
327 |                     "krc": "Karachay-Balkar",
328 |                     "kri": "Krio",
329 |                     "krj": "Kinaray-a",
330 |                     "krl": "Karelian",
331 |                     "kru": "Kurukh",
332 |                     "ks": "Kashmiri",
333 |                     "ksb": "Shambala",
334 |                     "ksf": "Bafia",
335 |                     "ksh": "Colognian",
336 |                     "ku": "Kurdish",
337 |                     "kum": "Kumyk",
338 |                     "kut": "Kutenai",
339 |                     "kv": "Komi",
340 |                     "kw": "Cornish",
341 |                     "kwk": "Kwakʼwala",
342 |                     "kxv": "Kuvi",
343 |                     "ky": "Kyrgyz",
344 |                     "ky-alt-variant": "Kirghiz",
345 |                     "la": "Latin",
346 |                     "lad": "Ladino",
347 |                     "lag": "Langi",
348 |                     "lah": "Western Panjabi",
349 |                     "lam": "Lamba",
350 |                     "lb": "Luxembourgish",
351 |                     "lez": "Lezghian",
352 |                     "lfn": "Lingua Franca Nova",
353 |                     "lg": "Ganda",
354 |                     "li": "Limburgish",
355 |                     "lij": "Ligurian",
356 |                     "lil": "Lillooet",
357 |                     "liv": "Livonian",
358 |                     "lkt": "Lakota",
359 |                     "lmo": "Lombard",
360 |                     "ln": "Lingala",
361 |                     "lo": "Lao",
362 |                     "lol": "Mongo",
363 |                     "lou": "Louisiana Creole",
364 |                     "loz": "Lozi",
365 |                     "lrc": "Northern Luri",
366 |                     "lsm": "Saamia",
367 |                     "lt": "Lithuanian",
368 |                     "ltg": "Latgalian",
369 |                     "lu": "Luba-Katanga",
370 |                     "lua": "Luba-Lulua",
371 |                     "lui": "Luiseno",
372 |                     "lun": "Lunda",
373 |                     "luo": "Luo",
374 |                     "lus": "Mizo",
375 |                     "luy": "Luyia",
376 |                     "lv": "Latvian",
377 |                     "lzh": "Literary Chinese",
378 |                     "lzz": "Laz",
379 |                     "mad": "Madurese",
380 |                     "maf": "Mafa",
381 |                     "mag": "Magahi",
382 |                     "mai": "Maithili",
383 |                     "mak": "Makasar",
384 |                     "man": "Mandingo",
385 |                     "mas": "Masai",
386 |                     "mde": "Maba",
387 |                     "mdf": "Moksha",
388 |                     "mdr": "Mandar",
389 |                     "men": "Mende",
390 |                     "mer": "Meru",
391 |                     "mfe": "Morisyen",
392 |                     "mg": "Malagasy",
393 |                     "mga": "Middle Irish",
394 |                     "mgh": "Makhuwa-Meetto",
395 |                     "mgo": "Metaʼ",
396 |                     "mh": "Marshallese",
397 |                     "mi": "Māori",
398 |                     "mic": "Mi'kmaw",
399 |                     "min": "Minangkabau",
400 |                     "mk": "Macedonian",
401 |                     "ml": "Malayalam",
402 |                     "mn": "Mongolian",
403 |                     "mnc": "Manchu",
404 |                     "mni": "Manipuri",
405 |                     "moe": "Innu-aimun",
406 |                     "moh": "Mohawk",
407 |                     "mos": "Mossi",
408 |                     "mr": "Marathi",
409 |                     "mrj": "Western Mari",
410 |                     "ms": "Malay",
411 |                     "mt": "Maltese",
412 |                     "mua": "Mundang",
413 |                     "mul": "Multiple languages",
414 |                     "mus": "Muscogee",
415 |                     "mus-alt-official": "Mvskoke",
416 |                     "mus-alt-variant": "Muscogee",
417 |                     "mwl": "Mirandese",
418 |                     "mwr": "Marwari",
419 |                     "mwv": "Mentawai",
420 |                     "my": "Burmese",
421 |                     "my-alt-variant": "Myanmar Language",
422 |                     "mye": "Myene",
423 |                     "myv": "Erzya",
424 |                     "mzn": "Mazanderani",
425 |                     "na": "Nauru",
426 |                     "nan": "Min Nan Chinese",
427 |                     "nap": "Neapolitan",
428 |                     "naq": "Nama",
429 |                     "nb": "Norwegian Bokmål",
430 |                     "nd": "North Ndebele",
431 |                     "nds": "Low German",
432 |                     "nds-NL": "Low Saxon",
433 |                     "ne": "Nepali",
434 |                     "new": "Newari",
435 |                     "ng": "Ndonga",
436 |                     "nia": "Nias",
437 |                     "niu": "Niuean",
438 |                     "njo": "Ao Naga",
439 |                     "nl": "Dutch",
440 |                     "nl-BE": "Flemish",
441 |                     "nmg": "Kwasio",
442 |                     "nn": "Norwegian Nynorsk",
443 |                     "nnh": "Ngiemboon",
444 |                     "no": "Norwegian",
445 |                     "nog": "Nogai",
446 |                     "non": "Old Norse",
447 |                     "nov": "Novial",
448 |                     "nqo": "N’Ko",
449 |                     "nr": "South Ndebele",
450 |                     "nso": "Northern Sotho",
451 |                     "nus": "Nuer",
452 |                     "nv": "Navajo",
453 |                     "nwc": "Classical Newari",
454 |                     "ny": "Nyanja",
455 |                     "nym": "Nyamwezi",
456 |                     "nyn": "Nyankole",
457 |                     "nyo": "Nyoro",
458 |                     "nzi": "Nzima",
459 |                     "oc": "Occitan",
460 |                     "oj": "Ojibwa",
461 |                     "ojb": "Northwestern Ojibwa",
462 |                     "ojc": "Central Ojibwa",
463 |                     "ojs": "Oji-Cree",
464 |                     "ojw": "Western Ojibwa",
465 |                     "oka": "Okanagan",
466 |                     "om": "Oromo",
467 |                     "or": "Odia",
468 |                     "os": "Ossetic",
469 |                     "osa": "Osage",
470 |                     "ota": "Ottoman Turkish",
471 |                     "pa": "Punjabi",
472 |                     "pag": "Pangasinan",
473 |                     "pal": "Pahlavi",
474 |                     "pam": "Pampanga",
475 |                     "pap": "Papiamento",
476 |                     "pau": "Palauan",
477 |                     "pcd": "Picard",
478 |                     "pcm": "Nigerian Pidgin",
479 |                     "pdc": "Pennsylvania German",
480 |                     "pdt": "Plautdietsch",
481 |                     "peo": "Old Persian",
482 |                     "pfl": "Palatine German",
483 |                     "phn": "Phoenician",
484 |                     "pi": "Pali",
485 |                     "pis": "Pijin",
486 |                     "pl": "Polish",
487 |                     "pms": "Piedmontese",
488 |                     "pnt": "Pontic",
489 |                     "pon": "Pohnpeian",
490 |                     "pqm": "Maliseet-Passamaquoddy",
491 |                     "prg": "Prussian",
492 |                     "pro": "Old Provençal",
493 |                     "ps": "Pashto",
494 |                     "ps-alt-variant": "Pushto",
495 |                     "pt": "Portuguese",
496 |                     "pt-BR": "Brazilian Portuguese",
497 |                     "pt-PT": "European Portuguese",
498 |                     "qu": "Quechua",
499 |                     "quc": "Kʼicheʼ",
500 |                     "qug": "Chimborazo Highland Quichua",
501 |                     "raj": "Rajasthani",
502 |                     "rap": "Rapanui",
503 |                     "rar": "Rarotongan",
504 |                     "rgn": "Romagnol",
505 |                     "rhg": "Rohingya",
506 |                     "rif": "Riffian",
507 |                     "rm": "Romansh",
508 |                     "rn": "Rundi",
509 |                     "ro": "Romanian",
510 |                     "ro-MD": "Moldavian",
511 |                     "rof": "Rombo",
512 |                     "rom": "Romany",
513 |                     "rtm": "Rotuman",
514 |                     "ru": "Russian",
515 |                     "rue": "Rusyn",
516 |                     "rug": "Roviana",
517 |                     "rup": "Aromanian",
518 |                     "rw": "Kinyarwanda",
519 |                     "rwk": "Rwa",
520 |                     "sa": "Sanskrit",
521 |                     "sad": "Sandawe",
522 |                     "sah": "Yakut",
523 |                     "sam": "Samaritan Aramaic",
524 |                     "saq": "Samburu",
525 |                     "sas": "Sasak",
526 |                     "sat": "Santali",
527 |                     "saz": "Saurashtra",
528 |                     "sba": "Ngambay",
529 |                     "sbp": "Sangu",
530 |                     "sc": "Sardinian",
531 |                     "scn": "Sicilian",
532 |                     "sco": "Scots",
533 |                     "sd": "Sindhi",
534 |                     "sdc": "Sassarese Sardinian",
535 |                     "sdh": "Southern Kurdish",
536 |                     "se": "Northern Sami",
537 |                     "se-alt-menu": "Sami, Northern",
538 |                     "see": "Seneca",
539 |                     "seh": "Sena",
540 |                     "sei": "Seri",
541 |                     "sel": "Selkup",
542 |                     "ses": "Koyraboro Senni",
543 |                     "sg": "Sango",
544 |                     "sga": "Old Irish",
545 |                     "sgs": "Samogitian",
546 |                     "sh": "Serbo-Croatian",
547 |                     "shi": "Tachelhit",
548 |                     "shn": "Shan",
549 |                     "shu": "Chadian Arabic",
550 |                     "si": "Sinhala",
551 |                     "sid": "Sidamo",
552 |                     "sk": "Slovak",
553 |                     "sl": "Slovenian",
554 |                     "slh": "Southern Lushootseed",
555 |                     "sli": "Lower Silesian",
556 |                     "sly": "Selayar",
557 |                     "sm": "Samoan",
558 |                     "sma": "Southern Sami",
559 |                     "sma-alt-menu": "Sami, Southern",
560 |                     "smj": "Lule Sami",
561 |                     "smj-alt-menu": "Sami, Lule",
562 |                     "smn": "Inari Sami",
563 |                     "smn-alt-menu": "Sami, Inari",
564 |                     "sms": "Skolt Sami",
565 |                     "sms-alt-menu": "Sami, Skolt",
566 |                     "sn": "Shona",
567 |                     "snk": "Soninke",
568 |                     "so": "Somali",
569 |                     "sog": "Sogdien",
570 |                     "sq": "Albanian",
571 |                     "sr": "Serbian",
572 |                     "sr-ME": "Montenegrin",
573 |                     "srn": "Sranan Tongo",
574 |                     "srr": "Serer",
575 |                     "ss": "Swati",
576 |                     "ssy": "Saho",
577 |                     "st": "Southern Sotho",
578 |                     "stq": "Saterland Frisian",
579 |                     "str": "Straits Salish",
580 |                     "su": "Sundanese",
581 |                     "suk": "Sukuma",
582 |                     "sus": "Susu",
583 |                     "sux": "Sumerian",
584 |                     "sv": "Swedish",
585 |                     "sw": "Swahili",
586 |                     "sw-CD": "Congo Swahili",
587 |                     "swb": "Comorian",
588 |                     "syc": "Classical Syriac",
589 |                     "syr": "Syriac",
590 |                     "szl": "Silesian",
591 |                     "ta": "Tamil",
592 |                     "tce": "Southern Tutchone",
593 |                     "tcy": "Tulu",
594 |                     "te": "Telugu",
595 |                     "tem": "Timne",
596 |                     "teo": "Teso",
597 |                     "ter": "Tereno",
598 |                     "tet": "Tetum",
599 |                     "tg": "Tajik",
600 |                     "tgx": "Tagish",
601 |                     "th": "Thai",
602 |                     "tht": "Tahltan",
603 |                     "ti": "Tigrinya",
604 |                     "tig": "Tigre",
605 |                     "tiv": "Tiv",
606 |                     "tk": "Turkmen",
607 |                     "tkl": "Tokelau",
608 |                     "tkr": "Tsakhur",
609 |                     "tl": "Tagalog",
610 |                     "tlh": "Klingon",
611 |                     "tli": "Tlingit",
612 |                     "tly": "Talysh",
613 |                     "tmh": "Tamashek",
614 |                     "tn": "Tswana",
615 |                     "to": "Tongan",
616 |                     "tog": "Nyasa Tonga",
617 |                     "tok": "Toki Pona",
618 |                     "tpi": "Tok Pisin",
619 |                     "tr": "Turkish",
620 |                     "tru": "Turoyo",
621 |                     "trv": "Taroko",
622 |                     "trw": "Torwali",
623 |                     "ts": "Tsonga",
624 |                     "tsd": "Tsakonian",
625 |                     "tsi": "Tsimshian",
626 |                     "tt": "Tatar",
627 |                     "ttm": "Northern Tutchone",
628 |                     "ttt": "Muslim Tat",
629 |                     "tum": "Tumbuka",
630 |                     "tvl": "Tuvalu",
631 |                     "tw": "Twi",
632 |                     "twq": "Tasawaq",
633 |                     "ty": "Tahitian",
634 |                     "tyv": "Tuvinian",
635 |                     "tzm": "Central Atlas Tamazight",
636 |                     "udm": "Udmurt",
637 |                     "ug": "Uyghur",
638 |                     "ug-alt-variant": "Uighur",
639 |                     "uga": "Ugaritic",
640 |                     "uk": "Ukrainian",
641 |                     "umb": "Umbundu",
642 |                     "und": "Unknown language",
643 |                     "ur": "Urdu",
644 |                     "uz": "Uzbek",
645 |                     "vai": "Vai",
646 |                     "ve": "Venda",
647 |                     "vec": "Venetian",
648 |                     "vep": "Veps",
649 |                     "vi": "Vietnamese",
650 |                     "vls": "West Flemish",
651 |                     "vmf": "Main-Franconian",
652 |                     "vmw": "Makhuwa",
653 |                     "vo": "Volapük",
654 |                     "vot": "Votic",
655 |                     "vro": "Võro",
656 |                     "vun": "Vunjo",
657 |                     "wa": "Walloon",
658 |                     "wae": "Walser",
659 |                     "wal": "Wolaytta",
660 |                     "war": "Waray",
661 |                     "was": "Washo",
662 |                     "wbp": "Warlpiri",
663 |                     "wo": "Wolof",
664 |                     "wuu": "Wu Chinese",
665 |                     "xal": "Kalmyk",
666 |                     "xh": "Xhosa",
667 |                     "xmf": "Mingrelian",
668 |                     "xnr": "Kangri",
669 |                     "xog": "Soga",
670 |                     "yao": "Yao",
671 |                     "yap": "Yapese",
672 |                     "yav": "Yangben",
673 |                     "ybb": "Yemba",
674 |                     "yi": "Yiddish",
675 |                     "yo": "Yoruba",
676 |                     "yrl": "Nheengatu",
677 |                     "yue": "Cantonese",
678 |                     "yue-alt-menu": "Chinese, Cantonese",
679 |                     "za": "Zhuang",
680 |                     "zap": "Zapotec",
681 |                     "zbl": "Blissymbols",
682 |                     "zea": "Zeelandic",
683 |                     "zen": "Zenaga",
684 |                     "zgh": "Standard Moroccan Tamazight",
685 |                     "zh": "Chinese",
686 |                     "zh-alt-long": "Mandarin Chinese",
687 |                     "zh-alt-menu": "Chinese, Mandarin",
688 |                     "zh-Hans": "Simplified Chinese",
689 |                     "zh-Hans-alt-long": "Simplified Mandarin Chinese",
690 |                     "zh-Hant": "Traditional Chinese",
691 |                     "zh-Hant-alt-long": "Traditional Mandarin Chinese",
692 |                     "zu": "Zulu",
693 |                     "zun": "Zuni",
694 |                     "zxx": "No linguistic content",
695 |                     "zza": "Zaza"
696 |                 }
697 |             }
698 |         }
699 |     }
700 | }


--------------------------------------------------------------------------------