├── Makefile ├── README.md ├── pluralize └── __init__.py ├── pyproject.toml └── tests └── test_simple.py /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean test build deploy 2 | 3 | clean: 4 | rm -rf dist build 5 | 6 | test: 7 | python -m unittest tests/test*.py 8 | 9 | build: clean 10 | python -m build 11 | 12 | deploy: build 13 | python -m twine upload dist/* 14 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Pluralize 2 | 3 | Pluralize is a Python library for Internationalization (i18n) and Pluralization. 4 | 5 | The library assumes a folder (for exaple "translations") that contains files like: 6 | 7 | ```sh 8 | it.json 9 | it-IT.json 10 | fr.json 11 | fr-FR.json 12 | (etc) 13 | ``` 14 | 15 | Each file has the following structure, for example for Italian (it.json): 16 | 17 | ```json 18 | {"dog": {"0": "no cane", "1": "un cane", "2": "{n} cani", "10": "tantissimi cani"}} 19 | ``` 20 | 21 | The top level keys are the expressions to be translated and the associated value/dictionary maps a number to a translation. 22 | Different translations correspond to different plural forms of the expression, 23 | 24 | Here is another example for the word "bed" in Czech 25 | 26 | ```json 27 | {"bed": {"0": "no postel", "1": "postel", "2": "postele", "5": "postelí"}} 28 | ``` 29 | 30 | To translate and pluralize a string "dog" one simply wraps the string in the T operator as follows: 31 | 32 | ```python 33 | >>> from pluralize import Translator 34 | >>> T = Translator('translations') 35 | >>> dog = T("dog") 36 | >>> print(dog) 37 | dog 38 | >>> T.select('it') 39 | >>> print(dog) 40 | un cane 41 | >>> print(dog.format(n=0)) 42 | no cane 43 | >>> print(dog.format(n=1)) 44 | un cane 45 | >>> print(dog.format(n=5)) 46 | 5 cani 47 | >>> print(dog.format(n=20)) 48 | tantissimi cani 49 | ``` 50 | 51 | The string can contain multiple placeholders but the {n} placeholder is special because 52 | the variable called "n" is used to determine the pluralization by best match (max dict key <= n). 53 | 54 | T(...) objects can be added together with each other and with string, like regular strings. 55 | 56 | T.select(s) can parse a string s following the HTTP accept language format. 57 | 58 | ## Update the translation files 59 | 60 | Find all strings wrapped in T(...) in .py, .html, and .js files: 61 | ```python 62 | matches = T.find_matches('path/to/app/folder') 63 | ``` 64 | 65 | Add newly discovered entries in all supported languages 66 | ```python 67 | T.update_languages(matches) 68 | ``` 69 | 70 | Add a new supported language (for example german, "de") 71 | 72 | ```python 73 | T.languages['de'] = {} 74 | ``` 75 | 76 | Make sure all languages contain the same origin expressions 77 | ```python 78 | known_expressions = set() 79 | for language in T.languages.values(): 80 | for expression in language: 81 | known_expressions.add(expression) 82 | T.update_languages(known_expressions)) 83 | ``` 84 | 85 | Finally save the changes: 86 | 87 | ```python 88 | T.save('translations') 89 | ``` 90 | -------------------------------------------------------------------------------- /pluralize/__init__.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import json 3 | import os 4 | import re 5 | import threading 6 | 7 | __version__ = "20240519.3" 8 | 9 | re_language = re.compile(r"^\w\w(-\w+)*.json$") 10 | 11 | 12 | class lazyT(object): 13 | 14 | """accesssory object used to represent a T("string")""" 15 | 16 | def __init__(self, translator, text, **kwargs): 17 | self.translator = translator 18 | self.text = text 19 | self.kwargs = kwargs 20 | 21 | def format(self, **other): 22 | """T('hello {n}').format(n=2)""" 23 | kwargs = dict(self.kwargs) 24 | kwargs.update(**other) 25 | return lazyT(self.translator, self.text, **kwargs) 26 | 27 | def __add__(self, other): 28 | """T('hello') + ' ' + T('world')""" 29 | return str(self) + str(other) 30 | 31 | def __radd__(self, other): 32 | """T('hello') + ' ' + T('world')""" 33 | return str(other) + str(self) 34 | 35 | def __str__(self): 36 | """str(T('dog')) -> 'cane'""" 37 | return self.xml() 38 | 39 | def __mod__(self, obj): 40 | """T('route %d') % 66 -> 'route 66'""" 41 | return self.xml() % obj 42 | 43 | def xml(self): 44 | """same as str but for interoperability with yatl helpers""" 45 | return self.translator(self.text, **self.kwargs) 46 | 47 | 48 | class Translator(object): 49 | def __init__(self, folder=None, encoding="utf-8", comment_marker=None): 50 | """ 51 | creates a translator object loading languages and pluralizations from translations/en-US.py files 52 | usage: 53 | 54 | T = Translator('translations') 55 | print(T('dog')) 56 | """ 57 | self.local = threading.local() 58 | self.languages = {} 59 | self.local.tag = None 60 | self.local.language = None 61 | self.missing = set() 62 | self.folder = folder 63 | self.encoding = encoding 64 | self.comment_marker = comment_marker 65 | if folder: 66 | self.load(folder) 67 | 68 | def load(self, folder): 69 | """loads languages and pluralizations from folder/en-US.json files""" 70 | self.languages = {} 71 | for filename in os.listdir(folder): 72 | if re_language.match(filename): 73 | with open( 74 | os.path.join(folder, filename), "r", encoding=self.encoding 75 | ) as fp: 76 | self.languages[filename[:-5].lower()] = json.load(fp) 77 | 78 | def save(self, folder=None, ensure_ascii=True): 79 | """save the loaded translation files""" 80 | folder = folder or self.folder 81 | for key in self.languages: 82 | filename = "%s.json" % key 83 | with open( 84 | os.path.join(folder, filename), "w", encoding=self.encoding 85 | ) as fp: 86 | json.dump( 87 | self.languages[key], 88 | fp, 89 | sort_keys=True, 90 | indent=4, 91 | ensure_ascii=ensure_ascii, 92 | ) 93 | 94 | def select(self, accepted_languages="fr-CH, fr;q=0.9, en;q=0.8, de;q=0.7, *;q=0.5"): 95 | """given appected_langauges string from HTTP header, picks the best match""" 96 | if isinstance(accepted_languages, str): 97 | accepted_languages = [ 98 | tag.split(";")[0].replace("_", "-").strip() 99 | for tag in accepted_languages.split(",") 100 | ] 101 | for tag in accepted_languages: 102 | for k in range(tag.count("-"), 0, -1): 103 | subtag = "-".join(tag.split("-")[:k]) 104 | if not subtag in accepted_languages: 105 | accepted_languages.append(subtag) 106 | self.local.tag = None 107 | self.local.language = None 108 | for tag in accepted_languages: 109 | if tag.lower() in self.languages: 110 | self.local.tag = tag.lower() 111 | self.local.language = self.languages[tag.lower()] 112 | break 113 | 114 | def __call__(self, text): 115 | """retuns a lazyT object""" 116 | if isinstance(text, lazyT): 117 | return text 118 | return lazyT(self._translator, text) 119 | 120 | def _translator(self, text, **kwargs): 121 | """translates/pluralizes""" 122 | if not isinstance(text, str): 123 | text = str(text) 124 | using_original = True 125 | if getattr(self.local, "language", None): 126 | n = kwargs.get("n", 1) 127 | translations = self.local.language.get(text) 128 | if translations is None: 129 | self.missing.add(text) 130 | elif isinstance(translations, dict) and translations: 131 | k = max(int(i) for i in translations.keys() if int(i) <= n) 132 | text = translations[str(k)].format(**kwargs) 133 | using_original = False 134 | if text and using_original and self.comment_marker: 135 | text = text.split(self.comment_marker)[0] 136 | return text.format(**kwargs) 137 | 138 | @staticmethod 139 | def find_matches( 140 | folder, name="T", extensions=["py", "js", "html"], encoding="utf-8" 141 | ): 142 | """finds all strings in files in folder needing translations""" 143 | matches_found = set() 144 | re_string_t = ( 145 | r"(?<=[^\w]%s\()(?P" 146 | r"[uU]?[rR]?(?:'''(?:[^']|'{1,2}(?!'))*''')" 147 | r"|(?:'(?:[^'\\]|\\.)*')" 148 | r'|(?:"""(?:[^"]|"{1,2}(?!"))*""")' 149 | r'|(?:"(?:[^"\\]|\\.)*"))' 150 | ) % name 151 | regex_t = re.compile(re_string_t) 152 | for root, dirs, files in os.walk(folder): 153 | for name in files: 154 | if name.split(".")[-1] in extensions: 155 | path = os.path.join(root, name) 156 | with open(path, encoding=encoding) as fp: 157 | data = fp.read() 158 | items = regex_t.findall(data) 159 | matches_found |= set(map(ast.literal_eval, items)) 160 | return list(matches_found) 161 | 162 | def update_languages(self, items): 163 | """updates all loaded language files with the items, typically items returned by find_matches 164 | example of workflow: 165 | T = Translator() 166 | T.load(laguage_folder) 167 | T.update_languages(T.find_matches(app_folder)) 168 | T.save(languages_folder) 169 | """ 170 | for tag in self.languages: 171 | language = self.languages[tag] 172 | for item in items: 173 | if not item in language: 174 | language[item] = {} 175 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "pluralize" 3 | version = "20240519.3" 4 | authors = [{ name="Massimo Di Pierro", email="massimo.dipierro@gmail.com" },] 5 | description = "i18n + pluralization library with multi-plural form support and thread safe for web apps" 6 | readme = "README.md" 7 | requires-python = ">=3.7" 8 | classifiers = [ 9 | "Programming Language :: Python :: 3", 10 | "License :: OSI Approved :: BSD License", 11 | "Operating System :: OS Independent", 12 | ] 13 | 14 | [project.urls] 15 | "Homepage" = "https://github.com/web2py/pluralize" 16 | "Bug Tracker" = "https://github.com/web2py/yatl/pluralize" 17 | -------------------------------------------------------------------------------- /tests/test_simple.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pluralize import Translator 4 | 5 | 6 | class TestPluralization(unittest.TestCase): 7 | def setUp(self): 8 | T = Translator(comment_marker="##") 9 | T.update_languages(T.find_matches("./")) 10 | T.languages = { 11 | "it": { 12 | "dog": { 13 | "0": "no cane", 14 | "1": "un cane", 15 | "2": "due cani", 16 | "3": "alcuni cani", 17 | "10": "tanti cani", 18 | }, 19 | "dog##dialect": { 20 | "0": "nisciuno cane", 21 | }, 22 | } 23 | } 24 | T.select("en;q=0.9,it-IT;q=0.1") 25 | self.T = T 26 | 27 | def test_simple(self): 28 | T = self.T 29 | dog = T("dog") 30 | self.assertEqual(str(dog.format(n=0)), "no cane") 31 | self.assertEqual(str(dog.format(n=1)), "un cane") 32 | self.assertEqual(str(dog.format(n=2)), "due cani") 33 | self.assertEqual(str(dog.format(n=3)), "alcuni cani") 34 | self.assertEqual(str(dog.format(n=5)), "alcuni cani") 35 | self.assertEqual(str(dog.format(n=100)), "tanti cani") 36 | 37 | plus = T("plus") 38 | T.languages["it"]["plus"] = {"0": "piu'"} 39 | self.assertEqual( 40 | dog + " " + plus + " " + dog.format(n=2), "un cane piu' due cani" 41 | ) 42 | 43 | def test_comments(self): 44 | T = self.T 45 | dog = T("dog") 46 | self.assertEqual(str(dog.format(n=0)), "no cane") 47 | dog = T("dog##dialect") 48 | self.assertEqual(str(dog.format(n=0)), "nisciuno cane") 49 | 50 | def test_idempotency(self): 51 | T = self.T 52 | text = "dog" 53 | a = T(text) 54 | b = T(a) 55 | c = T(b) 56 | self.assertEqual(str(c.format(n=1)), "un cane") 57 | --------------------------------------------------------------------------------