├── .gitignore ├── .travis.yml ├── LICENSE ├── MANIFEST.in ├── NOTICE ├── README.md ├── langdetect ├── __init__.py ├── detector.py ├── detector_factory.py ├── lang_detect_exception.py ├── language.py ├── profiles │ ├── af │ ├── ar │ ├── bg │ ├── bn │ ├── ca │ ├── cs │ ├── cy │ ├── da │ ├── de │ ├── el │ ├── en │ ├── es │ ├── et │ ├── fa │ ├── fi │ ├── fr │ ├── gu │ ├── he │ ├── hi │ ├── hr │ ├── hu │ ├── id │ ├── it │ ├── ja │ ├── kn │ ├── ko │ ├── lt │ ├── lv │ ├── mk │ ├── ml │ ├── mr │ ├── ne │ ├── nl │ ├── no │ ├── pa │ ├── pl │ ├── pt │ ├── ro │ ├── ru │ ├── sk │ ├── sl │ ├── so │ ├── sq │ ├── sv │ ├── sw │ ├── ta │ ├── te │ ├── th │ ├── tl │ ├── tr │ ├── uk │ ├── ur │ ├── vi │ ├── zh-cn │ └── zh-tw ├── tests │ ├── __init__.py │ ├── test_detector.py │ ├── test_language.py │ └── utils │ │ ├── __init__.py │ │ ├── test_lang_profile.py │ │ ├── test_ngram.py │ │ └── test_unicode_block.py └── utils │ ├── __init__.py │ ├── lang_profile.py │ ├── messages.properties │ ├── messages.py │ ├── ngram.py │ └── unicode_block.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | 3 | arch: 4 | - amd64 5 | - ppc64le 6 | python: 7 | - "2.7" 8 | - "3.4" 9 | - "3.5" 10 | - "3.6" 11 | - "3.7" 12 | - "3.8" 13 | - "pypy" 14 | - "pypy3" 15 | jobs: 16 | exclude: 17 | - arch : ppc64le 18 | python : pypy 19 | - arch : ppc64le 20 | python : pypy3 21 | # Use container-based infrastructure 22 | sudo: false 23 | 24 | install: 25 | - pip install . 26 | - pip install -r requirements.txt 27 | - pip install coverage 28 | 29 | script: 30 | - coverage run --source=langdetect --omit=langdetect/tests/* -m unittest discover 31 | 32 | after_success: 33 | - pip install coveralls 34 | - coveralls 35 | 36 | after_script: 37 | - coverage report 38 | - pip install pep8 pyflakes 39 | - pyflakes .| tee >(wc -l) 40 | - pep8 --statistics --count . 41 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2014-2015 Michal "Mimino" Danilak 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include LICENSE 3 | include NOTICE 4 | include MANIFEST.in 5 | include requirements.txt 6 | include langdetect/utils/messages.properties 7 | recursive-include langdetect/profiles * 8 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | language-detection license 2 | ========================== 3 | 4 | Copyright (c) 2010-2014 Cybozu Labs, Inc. All rights reserved. 5 | 6 | Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. 11 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | langdetect 2 | ========== 3 | 4 | [![Build Status](https://travis-ci.org/Mimino666/langdetect.svg?branch=master)](https://travis-ci.org/Mimino666/langdetect) 5 | 6 | Port of Nakatani Shuyo's [language-detection](https://github.com/shuyo/language-detection) library (version from 03/03/2014) to Python. 7 | 8 | 9 | Installation 10 | ============ 11 | 12 | $ pip install langdetect 13 | 14 | Supported Python versions 2.7, 3.4+. 15 | 16 | 17 | Languages 18 | ========= 19 | 20 | ``langdetect`` supports 55 languages out of the box ([ISO 639-1 codes](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes)): 21 | 22 | af, ar, bg, bn, ca, cs, cy, da, de, el, en, es, et, fa, fi, fr, gu, he, 23 | hi, hr, hu, id, it, ja, kn, ko, lt, lv, mk, ml, mr, ne, nl, no, pa, pl, 24 | pt, ro, ru, sk, sl, so, sq, sv, sw, ta, te, th, tl, tr, uk, ur, vi, zh-cn, zh-tw 25 | 26 | 27 | Basic usage 28 | =========== 29 | 30 | To detect the language of the text: 31 | 32 | ```python 33 | >>> from langdetect import detect 34 | >>> detect("War doesn't show who's right, just who's left.") 35 | 'en' 36 | >>> detect("Ein, zwei, drei, vier") 37 | 'de' 38 | ``` 39 | 40 | To find out the probabilities for the top languages: 41 | 42 | ```python 43 | >>> from langdetect import detect_langs 44 | >>> detect_langs("Otec matka syn.") 45 | [sk:0.572770823327, pl:0.292872522702, cs:0.134356653968] 46 | ``` 47 | 48 | **NOTE** 49 | 50 | Language detection algorithm is non-deterministic, which means that if you try to run it on a text which is either too short or too ambiguous, you might get different results everytime you run it. 51 | 52 | To enforce consistent results, call following code before the first language detection: 53 | 54 | ```python 55 | from langdetect import DetectorFactory 56 | DetectorFactory.seed = 0 57 | ``` 58 | 59 | How to add new language? 60 | ======================== 61 | 62 | You need to create a new language profile. The easiest way to do it is to use the [langdetect.jar](https://github.com/shuyo/language-detection/raw/master/lib/langdetect.jar) tool, which can generate language profiles from Wikipedia abstract database files or plain text. 63 | 64 | Wikipedia abstract database files can be retrieved from "Wikipedia Downloads" ([http://download.wikimedia.org/](http://download.wikimedia.org/)). They form '(language code)wiki-(version)-abstract.xml' (e.g. 'enwiki-20101004-abstract.xml' ). 65 | 66 | usage: ``java -jar langdetect.jar --genprofile -d [directory path] [language codes]`` 67 | 68 | - Specify the directory which has abstract databases by -d option. 69 | - This tool can handle gzip compressed file. 70 | 71 | Remark: The database filename in Chinese is like 'zhwiki-(version)-abstract-zh-cn.xml' or zhwiki-(version)-abstract-zh-tw.xml', so that it must be modified 'zh-cnwiki-(version)-abstract.xml' or 'zh-twwiki-(version)-abstract.xml'. 72 | 73 | To generate language profile from a plain text, use the genprofile-text command. 74 | 75 | usage: ``java -jar langdetect.jar --genprofile-text -l [language code] [text file path]`` 76 | 77 | For more details see [language-detection Wiki](https://code.google.com/archive/p/language-detection/wikis/Tools.wiki). 78 | 79 | 80 | Original project 81 | ================ 82 | 83 | This library is a direct port of Google's [language-detection](https://code.google.com/p/language-detection/) library from Java to Python. All the classes and methods are unchanged, so for more information see the project's website or wiki. 84 | 85 | Presentation of the language detection algorithm: [http://www.slideshare.net/shuyo/language-detection-library-for-java](http://www.slideshare.net/shuyo/language-detection-library-for-java). 86 | -------------------------------------------------------------------------------- /langdetect/__init__.py: -------------------------------------------------------------------------------- 1 | from .detector_factory import DetectorFactory, PROFILES_DIRECTORY, detect, detect_langs 2 | from .lang_detect_exception import LangDetectException 3 | -------------------------------------------------------------------------------- /langdetect/detector.py: -------------------------------------------------------------------------------- 1 | import random 2 | import re 3 | 4 | import six 5 | from six.moves import zip, xrange 6 | 7 | from .lang_detect_exception import ErrorCode, LangDetectException 8 | from .language import Language 9 | from .utils.ngram import NGram 10 | from .utils.unicode_block import unicode_block 11 | 12 | 13 | class Detector(object): 14 | ''' 15 | Detector class is to detect language from specified text. 16 | Its instance is able to be constructed via the factory class DetectorFactory. 17 | 18 | After appending a target text to the Detector instance with .append(string), 19 | the detector provides the language detection results for target text via .detect() or .get_probabilities(). 20 | 21 | .detect() method returns a single language name which has the highest probability. 22 | .get_probabilities() methods returns a list of multiple languages and their probabilities. 23 | 24 | The detector has some parameters for language detection. 25 | See set_alpha(double), .set_max_text_length(int) .set_prior_map(dict). 26 | 27 | Example: 28 | 29 | from langdetect.detector_factory import DetectorFactory 30 | factory = DetectorFactory() 31 | factory.load_profile('/path/to/profile/directory') 32 | 33 | def detect(text): 34 | detector = factory.create() 35 | detector.append(text) 36 | return detector.detect() 37 | 38 | def detect_langs(text): 39 | detector = factory.create() 40 | detector.append(text) 41 | return detector.get_probabilities() 42 | ''' 43 | 44 | ALPHA_DEFAULT = 0.5 45 | ALPHA_WIDTH = 0.05 46 | 47 | ITERATION_LIMIT = 1000 48 | PROB_THRESHOLD = 0.1 49 | CONV_THRESHOLD = 0.99999 50 | BASE_FREQ = 10000 51 | UNKNOWN_LANG = 'unknown' 52 | 53 | URL_RE = re.compile(r'https?://[-_.?&~;+=/#0-9A-Za-z]{1,2076}') 54 | MAIL_RE = re.compile(r'[-_.0-9A-Za-z]{1,64}@[-_0-9A-Za-z]{1,255}[-_.0-9A-Za-z]{1,255}') 55 | 56 | def __init__(self, factory): 57 | self.word_lang_prob_map = factory.word_lang_prob_map 58 | self.langlist = factory.langlist 59 | self.seed = factory.seed 60 | self.random = random.Random() 61 | self.text = '' 62 | self.langprob = None 63 | 64 | self.alpha = self.ALPHA_DEFAULT 65 | self.n_trial = 7 66 | self.max_text_length = 10000 67 | self.prior_map = None 68 | self.verbose = False 69 | 70 | def set_verbose(self): 71 | self.verbose = True 72 | 73 | def set_alpha(self, alpha): 74 | self.alpha = alpha 75 | 76 | def set_prior_map(self, prior_map): 77 | '''Set prior information about language probabilities.''' 78 | self.prior_map = [0.0] * len(self.langlist) 79 | sump = 0.0 80 | for i in xrange(len(self.prior_map)): 81 | lang = self.langlist[i] 82 | if lang in prior_map: 83 | p = prior_map[lang] 84 | if p < 0: 85 | raise LangDetectException(ErrorCode.InitParamError, 'Prior probability must be non-negative.') 86 | self.prior_map[i] = p 87 | sump += p 88 | if sump <= 0.0: 89 | raise LangDetectException(ErrorCode.InitParamError, 'More one of prior probability must be non-zero.') 90 | for i in xrange(len(self.prior_map)): 91 | self.prior_map[i] /= sump 92 | 93 | def set_max_text_length(self, max_text_length): 94 | '''Specify max size of target text to use for language detection. 95 | The default value is 10000(10KB). 96 | ''' 97 | self.max_text_length = max_text_length 98 | 99 | def append(self, text): 100 | '''Append the target text for language detection. 101 | If the total size of target text exceeds the limit size specified by 102 | Detector.set_max_text_length(int), the rest is cut down. 103 | ''' 104 | text = self.URL_RE.sub(' ', text) 105 | text = self.MAIL_RE.sub(' ', text) 106 | text = NGram.normalize_vi(text) 107 | pre = 0 108 | for i in xrange(min(len(text), self.max_text_length)): 109 | ch = text[i] 110 | if ch != ' ' or pre != ' ': 111 | self.text += ch 112 | pre = ch 113 | 114 | def cleaning_text(self): 115 | '''Cleaning text to detect 116 | (eliminate URL, e-mail address and Latin sentence if it is not written in Latin alphabet). 117 | ''' 118 | latin_count, non_latin_count = 0, 0 119 | for ch in self.text: 120 | if 'A' <= ch <= 'z': 121 | latin_count += 1 122 | elif ch >= six.u('\u0300') and unicode_block(ch) != 'Latin Extended Additional': 123 | non_latin_count += 1 124 | 125 | if latin_count * 2 < non_latin_count: 126 | text_without_latin = '' 127 | for ch in self.text: 128 | if ch < 'A' or 'z' < ch: 129 | text_without_latin += ch 130 | self.text = text_without_latin 131 | 132 | def detect(self): 133 | '''Detect language of the target text and return the language name 134 | which has the highest probability. 135 | ''' 136 | probabilities = self.get_probabilities() 137 | if probabilities: 138 | return probabilities[0].lang 139 | return self.UNKNOWN_LANG 140 | 141 | def get_probabilities(self): 142 | if self.langprob is None: 143 | self._detect_block() 144 | return self._sort_probability(self.langprob) 145 | 146 | def _detect_block(self): 147 | self.cleaning_text() 148 | ngrams = self._extract_ngrams() 149 | if not ngrams: 150 | raise LangDetectException(ErrorCode.CantDetectError, 'No features in text.') 151 | 152 | self.langprob = [0.0] * len(self.langlist) 153 | 154 | self.random.seed(self.seed) 155 | for t in xrange(self.n_trial): 156 | prob = self._init_probability() 157 | alpha = self.alpha + self.random.gauss(0.0, 1.0) * self.ALPHA_WIDTH 158 | 159 | i = 0 160 | while True: 161 | self._update_lang_prob(prob, self.random.choice(ngrams), alpha) 162 | if i % 5 == 0: 163 | if self._normalize_prob(prob) > self.CONV_THRESHOLD or i >= self.ITERATION_LIMIT: 164 | break 165 | if self.verbose: 166 | six.print_('>', self._sort_probability(prob)) 167 | i += 1 168 | for j in xrange(len(self.langprob)): 169 | self.langprob[j] += prob[j] / self.n_trial 170 | if self.verbose: 171 | six.print_('==>', self._sort_probability(prob)) 172 | 173 | def _init_probability(self): 174 | '''Initialize the map of language probabilities. 175 | If there is the specified prior map, use it as initial map. 176 | ''' 177 | if self.prior_map is not None: 178 | return list(self.prior_map) 179 | else: 180 | return [1.0 / len(self.langlist)] * len(self.langlist) 181 | 182 | def _extract_ngrams(self): 183 | '''Extract n-grams from target text.''' 184 | RANGE = list(xrange(1, NGram.N_GRAM + 1)) 185 | 186 | result = [] 187 | ngram = NGram() 188 | for ch in self.text: 189 | ngram.add_char(ch) 190 | if ngram.capitalword: 191 | continue 192 | for n in RANGE: 193 | # optimized w = ngram.get(n) 194 | if len(ngram.grams) < n: 195 | break 196 | w = ngram.grams[-n:] 197 | if w and w != ' ' and w in self.word_lang_prob_map: 198 | result.append(w) 199 | return result 200 | 201 | def _update_lang_prob(self, prob, word, alpha): 202 | '''Update language probabilities with N-gram string(N=1,2,3).''' 203 | if word is None or word not in self.word_lang_prob_map: 204 | return False 205 | 206 | lang_prob_map = self.word_lang_prob_map[word] 207 | if self.verbose: 208 | six.print_('%s(%s): %s' % (word, self._unicode_encode(word), self._word_prob_to_string(lang_prob_map))) 209 | 210 | weight = alpha / self.BASE_FREQ 211 | for i in xrange(len(prob)): 212 | prob[i] *= weight + lang_prob_map[i] 213 | return True 214 | 215 | def _word_prob_to_string(self, prob): 216 | result = '' 217 | for j in xrange(len(prob)): 218 | p = prob[j] 219 | if p >= 0.00001: 220 | result += ' %s:%.5f' % (self.langlist[j], p) 221 | return result 222 | 223 | def _normalize_prob(self, prob): 224 | '''Normalize probabilities and check convergence by the maximun probability. 225 | ''' 226 | maxp, sump = 0.0, sum(prob) 227 | for i in xrange(len(prob)): 228 | p = prob[i] / sump 229 | if maxp < p: 230 | maxp = p 231 | prob[i] = p 232 | return maxp 233 | 234 | def _sort_probability(self, prob): 235 | result = [Language(lang, p) for (lang, p) in zip(self.langlist, prob) if p > self.PROB_THRESHOLD] 236 | result.sort(reverse=True) 237 | return result 238 | 239 | def _unicode_encode(self, word): 240 | buf = '' 241 | for ch in word: 242 | if ch >= six.u('\u0080'): 243 | st = hex(0x10000 + ord(ch))[2:] 244 | while len(st) < 4: 245 | st = '0' + st 246 | buf += r'\u' + st[1:5] 247 | else: 248 | buf += ch 249 | return buf 250 | -------------------------------------------------------------------------------- /langdetect/detector_factory.py: -------------------------------------------------------------------------------- 1 | import os 2 | from os import path 3 | import sys 4 | 5 | try: 6 | import simplejson as json 7 | except ImportError: 8 | import json 9 | 10 | from .detector import Detector 11 | from .lang_detect_exception import ErrorCode, LangDetectException 12 | from .utils.lang_profile import LangProfile 13 | 14 | 15 | class DetectorFactory(object): 16 | ''' 17 | Language Detector Factory Class. 18 | 19 | This class manages an initialization and constructions of Detector. 20 | 21 | Before using language detection library, 22 | load profiles with DetectorFactory.load_profile(str) 23 | and set initialization parameters. 24 | 25 | When the language detection, 26 | construct Detector instance via DetectorFactory.create(). 27 | See also Detector's sample code. 28 | ''' 29 | seed = None 30 | 31 | def __init__(self): 32 | self.word_lang_prob_map = {} 33 | self.langlist = [] 34 | 35 | def load_profile(self, profile_directory): 36 | list_files = os.listdir(profile_directory) 37 | if not list_files: 38 | raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Not found profile: ' + profile_directory) 39 | 40 | langsize, index = len(list_files), 0 41 | for filename in list_files: 42 | if filename.startswith('.'): 43 | continue 44 | filename = path.join(profile_directory, filename) 45 | if not path.isfile(filename): 46 | continue 47 | 48 | f = None 49 | try: 50 | if sys.version_info[0] < 3: 51 | f = open(filename, 'r') 52 | else: 53 | f = open(filename, 'r', encoding='utf-8') 54 | json_data = json.load(f) 55 | profile = LangProfile(**json_data) 56 | self.add_profile(profile, index, langsize) 57 | index += 1 58 | except IOError: 59 | raise LangDetectException(ErrorCode.FileLoadError, 'Cannot open "%s"' % filename) 60 | except Exception: 61 | raise LangDetectException(ErrorCode.FormatError, 'Profile format error in "%s"' % filename) 62 | finally: 63 | if f: 64 | f.close() 65 | 66 | def load_json_profile(self, json_profiles): 67 | langsize, index = len(json_profiles), 0 68 | if langsize < 2: 69 | raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Need more than 2 profiles.') 70 | 71 | for json_profile in json_profiles: 72 | try: 73 | json_data = json.loads(json_profile) 74 | profile = LangProfile(**json_data) 75 | self.add_profile(profile, index, langsize) 76 | index += 1 77 | except Exception: 78 | raise LangDetectException(ErrorCode.FormatError, 'Profile format error.') 79 | 80 | def add_profile(self, profile, index, langsize): 81 | lang = profile.name 82 | if lang in self.langlist: 83 | raise LangDetectException(ErrorCode.DuplicateLangError, 'Duplicate the same language profile.') 84 | self.langlist.append(lang) 85 | 86 | for word in profile.freq: 87 | if word not in self.word_lang_prob_map: 88 | self.word_lang_prob_map[word] = [0.0] * langsize 89 | length = len(word) 90 | if 1 <= length <= 3: 91 | prob = 1.0 * profile.freq.get(word) / profile.n_words[length - 1] 92 | self.word_lang_prob_map[word][index] = prob 93 | 94 | def clear(self): 95 | self.langlist = [] 96 | self.word_lang_prob_map = {} 97 | 98 | def create(self, alpha=None): 99 | '''Construct Detector instance with smoothing parameter.''' 100 | detector = self._create_detector() 101 | if alpha is not None: 102 | detector.set_alpha(alpha) 103 | return detector 104 | 105 | def _create_detector(self): 106 | if not self.langlist: 107 | raise LangDetectException(ErrorCode.NeedLoadProfileError, 'Need to load profiles.') 108 | return Detector(self) 109 | 110 | def set_seed(self, seed): 111 | self.seed = seed 112 | 113 | def get_lang_list(self): 114 | return list(self.langlist) 115 | 116 | 117 | PROFILES_DIRECTORY = path.join(path.dirname(__file__), 'profiles') 118 | _factory = None 119 | 120 | def init_factory(): 121 | global _factory 122 | if _factory is None: 123 | _factory = DetectorFactory() 124 | _factory.load_profile(PROFILES_DIRECTORY) 125 | 126 | def detect(text): 127 | init_factory() 128 | detector = _factory.create() 129 | detector.append(text) 130 | return detector.detect() 131 | 132 | 133 | def detect_langs(text): 134 | init_factory() 135 | detector = _factory.create() 136 | detector.append(text) 137 | return detector.get_probabilities() 138 | -------------------------------------------------------------------------------- /langdetect/lang_detect_exception.py: -------------------------------------------------------------------------------- 1 | _error_codes = { 2 | 'NoTextError': 0, 3 | 'FormatError': 1, 4 | 'FileLoadError': 2, 5 | 'DuplicateLangError': 3, 6 | 'NeedLoadProfileError': 4, 7 | 'CantDetectError': 5, 8 | 'CantOpenTrainData': 6, 9 | 'TrainDataFormatError': 7, 10 | 'InitParamError': 8, 11 | } 12 | 13 | ErrorCode = type('ErrorCode', (), _error_codes) 14 | 15 | 16 | class LangDetectException(Exception): 17 | def __init__(self, code, message): 18 | super(LangDetectException, self).__init__(message) 19 | self.code = code 20 | 21 | def get_code(self): 22 | return self.code 23 | -------------------------------------------------------------------------------- /langdetect/language.py: -------------------------------------------------------------------------------- 1 | class Language(object): 2 | ''' 3 | Language is to store the detected language. 4 | Detector.get_probabilities() returns a list of Languages. 5 | ''' 6 | 7 | def __init__(self, lang, prob): 8 | self.lang = lang 9 | self.prob = prob 10 | 11 | def __repr__(self): 12 | if self.lang is None: 13 | return '' 14 | return '%s:%s' % (self.lang, self.prob) 15 | 16 | def __lt__(self, other): 17 | return self.prob < other.prob 18 | -------------------------------------------------------------------------------- /langdetect/profiles/gu: -------------------------------------------------------------------------------- 1 | {"freq":{"ૈદિ":382,"g":235,"d":312,"e":960,"c":304,"a":1076,"n":720,"o":584,"l":382,"m":289,"h":369,"i":764,"u":324,"t":728,"s":517,"r":627,"ોટ ":345,"ેસા":764,"ોલ ":730,"ોર ":574,"ોદ ":1827,"ેત્":227,"ેતી":2222,"ેતમ":2186,"ેતપ":357,"ેડબ":253,"ેડા":1366,"ેડી":300,"ેગા":229,"ેગો":375,"ેઘર":242,"ૈકી":6300,"ેશમ":527,"ેશન":12436,"ેવી":831,"ેવા":710,"ઇડર":265,"ેરા":387,"ેરી":718,"આહવ":288,"ેલા":24917,"ેલી":519,"ેલુ":9935,"ેલો":381,"ોઇ ":458,"ેન્":278,"ેપુ":457,"આવે":34862,"ા":337683,"િ":47127,"સ":31472,"હ":20294,"શ":32541,"ષ":5409,"વ":91695,"લ":111041,"ળ":3931,"ર":102867,"ય":39143,"મ":113670,"ભ":35403,"બ":10569,"ફ":1198,"પ":49237,"ન":84304,"ધ":9131,"દ":38743,"થ":6321,"ત":89107,"ણ":9770,"ઢ":1233,"ડ":18443,"ઠ":3507,"જ":54268,"ઝ":1439,"ટ":6287,"ઘ":2525,"ચ":20557,"છ":25106,"ક":72592,"ખ":14557,"ગ":61691,"ઓ":8101,"એ":23599,"ઉ":5095,"ઈ":409,"અ":6168,"ઇ":1975,"આ":43598,"ં":82987,"૫":1391,"૪":875,"૩":2115,"૨":1146,"૯":1054,"૮":946,"૭":1034,"૬":461,"૧":5611,"૦":1269,"ૈ":7159,"ો":34921,"્":89060,"ૌ":562,"ુ":82336,"ી":42473,"ૃ":539,"ૂ":4236,"ે":108368,"આણં":435,"આદિ":1564,"ેશ ":483,"અને":1581,"અન્":344,"e ":271,"અમદ":630,"ેમ ":2812,"ેર ":1681,"ેલ ":1022," ૧":4255," ૩":409," ૨":679," ૫":978," ૪":492," ૭":700," ૯":551," ૮":625,"અગિ":1051," વ":14987," શ":3197," ર":16267," લ":5654," સ":11956," હ":3244," થ":1672," ત":31864," ધ":1718," દ":21808," ડ":1738," ઠ":222," ભ":34182," બ":4095," ય":383," મ":24848," ન":6795," ફ":765," પ":35455," છ":24245," ચ":2656," ઘ":628," ટ":479," ઝ":829," જ":21642," ઓ":682," ગ":30845," ખ":8068," ક":14981," ઉ":4757," એ":23366," આ":43205," ઇ":661," અ":6143,"આંગ":703,"્ચિ":11645,"્ટ્":549,"ોતર":377,"ોદર":1858,"ોનગ":236,"ોટા":473,"ોટી":225,"ોડા":794,"આઠ ":685,"ોની":800,"ોનો":2167,"ોરી":514,"ોળી":244,"ોલી":442,"ંવત":748,"ંબા":470,"ંબુ":281,"ંમત":254,"ંઠા":1406,"ંડવ":256,"ંદુ":455,"્ધ ":712,"ંદો":312,"ંધી":504,"ંતર":406,"ંચા":1418,"ંચમ":1337,"ંગા":221,"્ર ":966,"ોકો":3591,"્ય ":7092,"ંગણ":713,"ંખે":303,"ંગર":288,"્ષ ":789,"્વ ":2508,"એવા":6093,"્ષન":1137,"્ષિ":1509,"્વા":430,"્વે":772,"્વન":6820,"્વર":251,"્યન":12109,"્યત":720,"્યમ":432,"્યપ":428,"્યવ":2272,"્યા":2476,"્યુ":248,"્મદ":563,"્મા":375,"્લો":1068,"્લા":13052,"્રો":432,"્રે":602,"્રમ":649,"્રદ":871,"્રા":2175,"્રિ":476,"્રી":593,"્રહ":375,"્દ્":258,"્થા":288,"્તા":298,"્તી":856,"્તર":2535,"એક ":15869,"ઉદે":246,"ઉપલ":606,"ઉપર":392,"ઉત્":2557,"ઉમર":329,"િત":853,"િણ":1494,"વિજ":359,"ીં":343,"િમ":11976,"િપ":550,"િન":1570,"વાય":500,"િવ":4622,"વાર":525,"િશ":322,"ીક":534,"વામ":999,"િલ":14752,"ીઓ":697,"િય":2671,"િર":803,"વાસ":1799,"ીજ":386,"િહ":232,"વિક":492,"િસ":578,"વાલ":357,"ીત":406,"ીદ":281,"ું":22062,"વાદ":862,"વાન":404,"ીય":2619,"ીમ":860,"ીન":8731,"વાડ":2612,"ીપ":459,"ુક":18441,"ીવ":394,"ુખ":4304,"ીર":426,"ીલ":229,"વાગ":247,"વાઘ":254,"ીસ":510,"ુચ":222,"ુજ":13015,"ાં":48849,"ાઉ":298,"ાઇ":474,"ાક":921,"ાઓ":6449,"ાઘ":283,"ાખ":279,"ાગ":19994,"ાજ":13842,"ાચ":245,"ાટ":1064,"ાડ":4936,"િં":1255,"ાણ":2500,"ાથ":1359,"ાત":14028,"ાદ":2250,"ાન":14000,"ાપ":2091,"ાબ":1765,"ામ":34603,"ાય":4603,"ાર":20818,"ાલ":24953,"ાળ":1774,"વિર":286,"િક":2870,"ાવ":3657,"ાષ":849,"ાસ":4564,"ાહ":1263,"િજ":517,"વિસ":266,"હત":7478,"સી":2165,"સુ":1962,"સે":1528,"સા":8757,"સિ":770,"હવ":567,"સો":764,"હર":240,"સ્":3025,"સૌ":277,"હુ":529,"સા ":522,"હે":3680,"હા":2824,"હિ":2043,"હી":349,"હો":1278,"હ્":385,"શ્":12458,"ષન":1142,"સગ":988,"સર ":276,"શહ":485,"શિ":374,"શા":1696,"શુ":2253,"શી":262,"સં":2001,"ષ્":864,"સમ":828,"સન":417,"સવ":569,"સર":997,"ષા":458,"સદ":328,"સત":251,"સણ":439,"ષિ":1579,"વદ":549,"વન":7118,"વર":2659,"વલ":1055,"શક":498,"વગ":256,"વસા":2722,"વડ":3280,"વત":1294,"વણ":314,"વે":36512,"શન":12531,"વૈ":520,"શમ":627,"વ્":2839,"વસ":7405,"વી":2081,"વિ":2302,"વા":17902,"લો":6663,"લ્":14395,"લે":641,"લા":40018,"લિ":942,"લી":2736,"લુ":28591,"લસ":736,"લવ":315,"વસ્":826,"વસે":1125,"ળી":539,"વં":274,"ળા":1514,"રો":1975,"ર્":7275,"રુ":979,"રી":7304,"રૂ":908,"રે":1966,"રસ":718,"વાં":660,"રહ":1839,"રા":36128,"રિ":1193,"રવ":1135,"લબ":632,"લપ":735,"લય":580,"લન":2427,"લક":304,"લગ":215,"રક":1797,"રગ":229,"રખ":218,"યવ":2310,"રજ":562,"યા":7096,"રડ":414,"યુ":510,"રણ":534,"રત":14232,"રથ":239,"રદ":1032,"શમા":566,"યે":847,"રન":646,"રપ":682,"યો":569,"રબ":221,"રમ":2425,"મર":703,"મમ":2823,"મલ":319,"મહ":10705,"રં":397,"મી":561,"યડ":242,"મુ":6197,"મા":44661,"મિ":1415,"યત":1490,"યપ":480,"યન":12830,"મે":837,"યમ":548,"મ્":803,"મો":2623,"બ્":1122,"ભર":899,"મજ":2296,"મગ":222,"મખ":281,"મણ":336,"મત":581,"મથ":816,"ભા":32622,"ભિ":388,"મપ":589,"ભો":320,"મદ":1417,"મધ":3415,"મન":2890,"બર":1834,"બહ":293,"મં":287,"બી":424,"બુ":465,"બા":2768,"બિ":244,"બો":581,"બે":417,"પો":705,"પ્":4053,"બન":257,"પલ":826,"પહ":227,"પશ":13852,"પર":1594,"પૂ":2675,"પૈ":6312,"પે":306,"પુ":5227,"પી":742,"પિ":245,"પા":6287,"ન્":1623,"નો":5861,"પણ":494,"પત":282,"પટ":291,"પડ":321,"પછ":431,"નવ":1459,"નર":658,"ધ્":3586,"ધો":283,"નપ":579,"નન":237,"ને":4269,"નુ":10812,"પં":2789,"ની":3990,"નિ":691,"ના":45180,"નસ":606,"ધા":1073,"ધુ":315,"ધી":641,"દે":14545,"ધન":837,"દ્":1191,"દો":459,"ધર":916,"સી ":1244,"નગ":2570,"દશ":215,"દસ":731,"દહ":218,"દા":3639,"દિ":5058,"દી":575,"દુ":1289,"દર":3345,"થવ":580,"વેલ":34942,"શના":12337,"થી":1041,"થા":882,"તો":531,"વૈદ":382,"તે":6014,"દક":1517,"થય":782,"ત્":12219,"થમ":1093,"થક":777,"તી":4432,"તુ":553,"તા":26724,"તિ":1123,"તન":712,"ણે":327,"તપ":511,"તર":3967,"તલ":356,"તમ":2718,"ણા":2366,"ણી":846,"ણવ":795,"તઘ":640,"ડો":2767,"ડુ":286,"ડે":1027,"ણં":571,"ડી":3535,"ડિ":410,"ડા":5615,"ડવ":598,"ડર":315,"ડભ":218,"ડબ":277,"ઠા":1831,"ટ્":673,"ટે":774,"વ્ય":2778,"ટિ":249,"સે ":1144,"ટી":761,"છે":23574,"જન":365,"છી":486,"ઝઘ":245,"જય":244,"છો":305,"જબ":1127,"જર":11973,"જે":2020,"જો":465,"જિ":13950,"જા":1399,"જુ":2555,"જી":794,"જ્":13119,"ઝર":271,"શુપ":2187,"ઝા":567,"સંવ":756,"ટક":225,"સંત":320,"સંખ":325,"ટા":840,"ટલ":348,"ગા":17980,"ગુ":12125,"ગિ":1160,"ઘડ":264,"ગી":427,"૯ ":627,"ગ્":891,"ગો":1143,"ઘર":948,"ઘો":548,"ચર":560,"ચમ":1383,"ચા":2321,"ચિ":12038,"ચી":259,"જં":215,"ચો":437,"ચ્":251,"જક":251,"શહે":470,"૫ ":1091,"કર":2446,"કમ":270,"કલ":513,"કપ":478,"ખં":251,"કડ":644,"ખલ":227,"ક્":3347,"કો":5466,"કે":1500,"૭ ":792,"કુ":6686,"કૃ":229,"કા":21625,"કી":6922,"કિ":350,"કહ":564,"કવ":583,"ગવ":1115,"ગલ":272,"ગર":2876,"ગમ":18397,"ખ્":4351,"૮ ":673,"ખે":6309,"ગન":575,"ગણ":1117,"ગઢ":779,"ખા":2071,"૧ ":1168,"શાળ":1055,"શાસ":252,"એવ":6158,"૨ ":320,"૩ ":1541,"૪ ":593,"ઓન":283,"એક":16184,"૦ ":810,"ઉદ":317,"ઉત":2595,"ઉપ":1216,"સગવ":904,"ઉમ":378,"આં":1102,"અગ":1187,"અં":436,"ઇડ":287,"ષના":751,"આહ":296,"ષનો":362,"આવ":35199,"આદ":1695,"આઠ":804,"આણ":440,"અમ":826,"અર":256,"અન":2071,"ંવ":912,"ંસ":486,"ંત":1429,"ંથ":310,"ંદ":2195,"ંધ":939,"ંબ":1038,"ંભ":281,"ંમ":287,"ંક":540,"શ્ચ":11655,"ંગ":2855,"ંખ":377,"ંચ":4016,"ંજ":464,"ંટ":564,"ંડ":982,"ંઠ":1433,"હે ":1152,"શ્ર":320,"શ્વ":348,"૧૩":1467,"૧૧":1091,"૧૯":283,"૧૦":694,"વડો":2345,"ોટ":1277,"ોડ":1517,"ોજ":373,"વલી":267,"ોન":3584,"ોધ":322,"ોત":616,"ોદ":3809,"ોગ":284,"ોક":3826,"ોઇ":523,"્ટ":1147,"્ત":4385,"્ણ":325,"્દ":602,"્થ":651,"્ધ":1001,"્પ":336,"્બ":262,"વર્":2136,"્ક":609,"્ગ":267,"્ચ":11827,"ોમ":362,"ોલ":1612,"ોય":246,"ોર":2045,"ોવ":257,"વલસ":582,"ોળ":645,"્સ":302,"્ષ":3855,"્વ":11263,"્લ":14321,"્ર":8621,"્ય":26212,"્મ":1641,"ૂર":2771,"ુદ":699,"ુધ":815,"ુન":510,"ુણ":524,"ુત":213,"ુમ":369,"ુર":8763,"ુપ":2377,"ુવ":804,"ુસ":354,"ુલ":6351,"ૂચ":717,"વનો":376,"વનુ":6376,"ૃત":300,"ેક":249,"ેત":5179,"ેડ":2071,"ેટ":618,"ેઠ":219,"ેજ":326,"ેગ":653,"ેઘ":271,"ષા ":222,"વતા":228,"ેર":3345,"ેલ":37195,"ૈક":6315,"ેશ":13804,"ેવ":2024,"ેન":1094,"ેપ":540,"ેમ":3602,"ૈદ":389,"ેસ":990,"હિં":776,"હાલ":1485,"હાર":571,"હિન":919,"ઇ ":1018,"આ ":2702,"ાં ":41350,"ઓ ":7041,"હેર":525,"હેલ":239,"હેવ":536,"હેસ":730,"એ ":663,"ાઇ ":270,"હોદ":867,"હ્મ":326,"ાઓ ":6349,"ં ":62940,"ાગ ":214,"ાડ ":990,"ાદ ":1109,"ાણ ":231,"ાત ":12086,"ાન ":979,"ામ ":12051,"ાલ ":1653,"ાર ":2510,"ાય ":3046,"ાવ ":357,"િક ":1785,"ાસ ":1162,"ષિણ":1462,"ે ":34827,"ો ":11870,"સણા":264,"ષ ":917,"સ ":4627,"સમો":292,"વ ":3614,"શ ":598,"સરા":281,"ષ્ટ":602,"િ ":698,"ુ ":1292,"ી ":24520,"ા ":103799,"સવા":466,"સુર":1060,"દ ":4991,"થ ":242,"સીઓ":544,"ન ":4557,"સુદ":463,"ધ ":850,"સાડ":665,"સાત":801,"સાણ":973,"સાગ":213,"પ ":563,"સાય":2235,"સામ":236,"સાર":662,"સાવ":281,"સાબ":1406,"બ ":1292,"મ ":27791,"સોન":293,"ર ":16908,"ય ":11483,"લ ":10111,"હતા":268,"હત્":6808,"ળ ":723,"ક ":19636,"ગ ":1433,"સ્વ":268,"સ્થ":430,"ચ ":2010,"સ્ટ":235,"સ્ત":1316,"સ્ક":359,"જ ":4325,"ટ ":1263,"ડ ":2185,"ઠ ":958,"ઢ ":645,"હવે":238,"ણ ":3405,"હવા":327,"ત ":27700,"ૂચ ":710,"િત્":251,"ાસા":294,"ાસિ":270,"ાસી":1732,"ાહો":869,"ાષા":325,"ાસણ":337,"ુલ ":6018,"ાસર":237,"ાષ્":514,"ાલન":2232,"ાલપ":364,"ાલય":562,"ંગ ":904,"ારે":564,"ાર્":428,"ારો":350,"ારી":908,"ારા":1301,"ારત":13028,"ારમ":342,"ારડ":261,"ાયત":689,"ાયડ":229,"ામા":16128,"ુર ":3220,"ાવી":500,"ાવા":1445,"ાવલ":274,"િકે":222,"િક્":471,"ંચ ":1075,"ાવત":219,"ાળા":1219,"ાલો":667,"ાલુ":18139,"ાલી":427,"ાલિ":223,"ાલા":242,"ાનો":1081,"ંટ ":317,"ાનપ":429,"ાના":6337,"ાનુ":3493,"ાની":787,"ંજ ":235,"ાદર":601,"ામપ":347,"ંત ":433,"ામન":2336,"ામમ":2770,"ાબર":1409,"ાપ્":387,"ુદ ":457,"ાપી":523,"ાપુ":503,"ાપા":321,"ંદ ":612,"ીદા":229,"ીનગ":468,"ીના":6429,"ીને":1070,"ીની":270,"ીનો":264,"િસ્":315,"િલ્":13910,"િલો":366,"િવસ":2730,"િવા":1650,"િનો":226,"િના":989,"િયા":2403,"ાંટ":436,"ાંઠ":1422,"ાંડ":461,"ાંગ":1094,"ાંચ":1295,"ાંત":590,"ાંધ":547,"ાંદ":419,"ાંસ":263,"ીઓ ":502,"િમ ":11653,"િપ ":367,"િત ":360,"િણ ":1447,"ાણા":1231,"ાણી":404,"ાતી":550,"ાત્":264,"ાથમ":992,"ાતે":354,"ાટી":295,"ાટે":280,"િંમ":251,"િંદ":527,"ાડી":1596,"ાડા":2060,"ાજક":237,"ાજી":257,"ાજ્":12491,"ીય ":442,"ાકી":371,"ાગમ":18270,"ાગન":475,"ાઉદ":238,"ું ":21442,"ૂર્":2560,"ેટ ":236,"ુણા":437,"ુજબ":1107,"ુજર":11840,"ુપા":2208,"ુધન":587,"ીયન":389,"ીમા":379,"ીયા":1731,"ીમખ":262,"ીસમ":292,"ુકા":17641,"ુકો":529,"ુખ્":4233,"ુરી":2300,"ુરુ":440,"ુરા":1358,"ુરત":829,"ુવા":696,"તઘર":640,"ણવા":740,"દસ ":512,"ણાવ":406,"દા ":938,"તપુ":465,"તનગ":252,"દી ":294,"તના":216,"દુ ":479,"તમજ":2184,"તમા":321,"તરી":269,"તરા":350,"તો ":423,"થા ":316,"થી ":996,"નવ ":475,"થવા":574,"ના ":41544,"ને ":3899,"ની ":3744,"નો ":5537,"દરા":2352,"દરમ":423,"તું":277,"તાલ":18057,"તાર":306,"તાપ":566,"તાન":302,"ધા ":271,"તેમ":3056,"તેર":1418,"તેન":315,"દક્":1484,"ત્ત":2727,"થમિ":987,"ત્વ":7552,"ત્ય":281,"ત્ર":1452,"થયે":642,"નપુ":541,"પી ":564,"ધીન":468,"ધાર":249,"ધાન":290,"નવસ":483,"નસવ":370,"ધ્ય":3481,"નર્":560,"દુધ":605,"દેપ":250,"દેશ":13431,"દેવ":535,"ધની":595,"દાવ":797,"દિક":394,"દાર":330,"દાદ":262,"દિવ":4303,"દાહ":863,"નગર":2214,"નગઢ":247,"પર ":286,"પણ ":420,"દોદ":293,"દ્વ":331,"દ્ર":524,"ધરા":484,"બા ":381,"પટે":233,"પાવ":384,"પાર":273,"પાલ":2355,"પાટ":403,"પાડ":735,"પાં":1274,"પશ્":11653,"પશુ":2197,"પલબ":596,"પરા":655,"પંચ":2575,"નું":10559,"નાં":923,"નાર":240,"નામ":306,"નાન":1138,"પછી":428,"ન્ય":559,"ન્દ":395,"બહુ":235,"બાક":373,"બાર":800,"બાય":217,"રજ ":269,"મા ":715,"મી ":253,"યડ ":217,"યન ":455,"બરક":1386,"મો ":1343,"પૂર":2616,"પુર":4959,"પૈક":6303," આ ":2656,"પોર":340," એ ":485,"પ્ર":3459,"પ્ય":389,"માં":41157,"માટ":294,"માન":402,"માણ":276,"માત":359,"માલ":325,"માર":263,"મિક":1063,"મહત":6802,"મહા":1848,"મહિ":850,"મહુ":233,"મહે":917,"યત્":701,"મેઘ":271,"મુખ":4261,"મુજ":1125,"મુવ":353,"યતઘ":640,"મપુ":552,"રે ":767,"મમા":2808,"મધ્":3365,"મદા":1324,"રી ":4562,"મના":2419,"રો ":232,"મજુ":2185,"મખે":264,"રા ":5544,"મતન":247,"મથક":765,"ભિલ":318,"ભાર":12956,"ભાગ":18607,"ભાષ":335,"બોર":287,"રત ":13401,"યા ":2563,"રમ ":485,"યો ":254,"ભરૂ":710,"બ્ર":421,"બ્ધ":597,"ળા ":1138,"રેગ":384,"ળી ":385,"રોત":365,"રાં":597,"રાય":238,"રામ":591,"રાવ":421,"રિક":325,"રાષ":510,"રાડ":230,"રાણ":287,"રાત":11870,"રાથ":991,"રાપ":541,"રાજ":13365,"રીય":783,"રીન":986,"રું":396,"રીક":260,"રિય":414,"રૂચ":710,"રવા":911,"રહવ":219,"રહે":1153,"રહ્":327,"રપુ":361,"રમ્":382,"રમા":804,"લો ":1417,"લા ":25318,"રના":235,"યેલ":764,"લી ":1812,"રદે":816,"રડી":227,"યાલ":588,"યાર":1934,"યાન":576,"યાપ":305,"રકા":1582,"લય ":553,"યવસ":2216,"યપૂ":410,"મોડ":217,"મોટ":454,"યનો":326,"યના":11717,"મ્ય":415,"યમા":407,"લન ":2219,"મ્બ":232,"લ્લ":14131,"વે ":1140,"લોલ":465,"લોડ":396,"લોદ":449,"લોક":3642,"વા ":7780,"વી ":1602,"લુક":18002,"લીમ":321,"લુણ":389,"લિય":312,"લું":9913,"વસ ":2535,"લાન":6932,"લિપ":369,"લાસ":286,"લાવ":223,"લાલ":307,"લામ":5764,"લસા":677,"શક ":371,"વર ":236,"લબ્":597,"વદ ":505,"લપુ":412,"વત ":759,"વડ ":417,"ર્ષ":1963,"ર્વ":2626,"ર્ય":411,"ર્મ":900,"કી ":503,"કા ":605,"કે ":834,"કો ":1354," ૧૦":680,"૧૦ ":535,"૧૧ ":1036,"૧૩ ":1412,"ગઢ ":538," ૧૩":1456," ૧૧":1079," ૧૯":274,"ગર ":1959,"કડી":283,"કડા":244,"કરવ":314,"કરી":1162,"કવા":445," હો":303," હિ":944," હા":540," સો":475," સૌ":273," સ્":714," સિ":322," સા":3562," સુ":1717," હત":654," સમ":395," સર":370," સત":226,"કાલ":238,"કામ":9196,"કાર":676,"કીન":6304,"કુલ":6004," લુ":420," લી":386," લિ":414," લા":253,"કહે":527," લો":3728,"કાં":1539," રહ":1203," રા":13805,"કાન":2517,"કાઓ":6055," સં":1919," શા":1268," શિ":264," શહ":475," સગ":922,"ઘર ":662," શ્":290," વા":1551," વિ":1924," વસ":1926," વ્":2581," વૈ":515," વે":236," વડ":2142," વર":2148," શક":474," વલ":624," વદ":498," પછ":431," પટ":261," પણ":391," નો":313," પા":2926," પુ":759," પૂ":2113," પૈ":6312," પર":651," પશ":13847," પહ":226," ધા":386," દ્":305," દે":13156," નગ":894," ધર":567," ના":1491," નિ":408," નસ":380," ને":386," પં":2780," ધો":248," નવ":1299," નર":628," મધ":3389," ભિ":335," ભા":32144," મથ":749," ભર":862," મે":521," મો":1041," મા":2519," મુ":5993," મહ":9472," પ્":3289," પો":348," બો":476," બે":295," મં":217," બી":247," બા":1559," બહ":292," ડા":372,"કોન":2841," ડે":860,"કોળ":215,"કોટ":356,"ક્ષ":1887,"ક્ર":890," ત્":349," થય":778," દક":1478," તે":5306," થવ":379," દર":551," દુ":722," દિ":2858," દા":1294,"કેટ":221," દસ":704," તર":348," તિ":233," તા":24717," ૯ ":497," ગો":553," ગ્":552," ગુ":12029,"ખેડ":1595,"ખેત":4414," ગા":16841,"ગના":399," ૮ ":570," ખે":5659," ખા":1675," ગણ":237," ચર":392," ઘો":266," છો":266,"ગણવ":693," ઝઘ":243," છે":23548," ચો":383," જં":214," ચા":714," ચિ":300," ઝા":412," જ્":480," જુ":259," જા":631," જિ":13843," જો":332," જે":1921,"ખાસ":931," એવ":6158,"ખાન":217," એક":16184,"ખાત":404," ૫ ":907," ૪ ":431," કહ":562," કવ":319," કુ":6308," કા":1279," કો":831," ૭ ":640," કે":929," ક્":383," કડ":504," કપ":373," કલ":240," કર":2121," ઇડ":271," આહ":296," આવ":35196," આદ":1582," ઉત":2589," ઉપ":1212," ઉમ":372,"ગવડ":906,"ગરહ":219,"ગમા":18349,"ખ્ય":4323," અં":432," અગ":1187," આં":1036," અન":2069," અર":252," અમ":825," આઠ":804," આણ":440,"ગાં":635,"ગામ":16798,"ગિય":1057,"ગુજ":11824,"ઘડી":254,"ગોર":435,"ગોધ":222,"ગ્ર":743,"ઘરજ":242,"છી ":438,"ઘોડ":335," જ ":2898,"છે ":23415,"જબ ":1105,"ચરો":362,"ચાર":560,"ચાય":582,"ચિમ":11652,"ચાં":471,"ચાગ":369,"જી ":285,"જા ":331,"ચમહ":1210,"જે ":397,"જકો":214,"ઝઘડ":245,"જિલ":13824,"જુર":2204,"છોટ":245,"જરા":11822,"ઝાલ":270,"ઠા ":1548,"ટી ":375,"ટા ":336,"જેવ":714,"જેત":374,"જ્ય":12917,"ટે ":225,"ડી ":2334,"ડા ":4375,"ડર ":279,"ટેલ":254,"ટાઉ":239,"ડો ":637,"ણી ":405,"ણા ":1630,"ટ્ર":578,"ડેર":603,"ડેડ":232,"તી ":3871,"ડોદ":1844,"તે ":714,"ડિય":272,"ડાસ":222,"ણંદ":569,"ડીય":740,"તિ ":370,"તા ":7106,"તર ":2912,"થક ":760,"ડાં":355,"ડબ્":254,"ણે ":229},"n_words":[2118540,2468202,1874859],"name":"gu"} -------------------------------------------------------------------------------- /langdetect/profiles/ja: -------------------------------------------------------------------------------- 1 | {"freq":{"é":1545,"и":1279,"а":1241," 『":8564," 』":1624," 。":2126," 、":3623,"あ":3435630,"。":214195,"、":312995," ":2941,"々":3019,"』":21225,"『":21300,"」":29519,"「":29647,"〜":2758,"ア":2611969," あ":52272," ア":58543,"乱":13623,"九":1493,"乗":1644,"久":1407,"主":13646,"丼":69358,"中":24049,"両":2394,"並":1334,"丞":80623,"丕":5670,"世":14223,"丈":118136,"三":6461,"上":13976,"下":7225,"不":54571,"与":2254,"一":26873,"丁":71935,"万":1628,"任":2340,"以":6436,"令":1766,"代":19956,"他":3009,"付":3689,"人":34784,"交":21043,"京":8000,"五":1508,"井":2181,"争":3320,"予":2003,"事":34820,"二":5188,"使":59250,"住":2454,"位":7990,"作":23623,"何":10362,"体":13204,"佐":59544,"伊":1406,"企":2899,"会":27025,"伝":3991,"休":80474,"信":13921,"係":2046,"保":5439,"価":32733,"供":2235," 分":1406,"営":4802,"問":2693,"商":2606,"員":5415,"品":9370,"和":9812,"周":2042,"呼":9439,"命":2467,"味":3638,"含":3079,"名":22983,"同":12059,"吉":1498,"合":22154,"各":2831,"向":3485," 号":4482,"域":5560,"城":2545,"基":5538,"土":3313,"園":3175,"地":19745,"在":13117,"回":5170,"四":2207,"団":7524,"因":1258,"国":40277,"器":3147,"写":1418,"再":1714,"内":9686," 丞":3334," 世":5944," 丈":5457," 丁":2886,"処":1526,"優":2947,"共":7413,"具":1237,"入":4949,"全":10040,"八":1560,"公":10338,"児":1791,"党":2623,"元":7877,"光":3006,"先":1806,"催":4293,"倫":19778,"個":1623,"原":7882,"受":3764,"取":4325,"反":2686,"及":4976,"参":3018,"司":1601,"号":9329,"台":3682,"可":2330,"口":3431,"化":11592," 佐":1439,"区":8504,"医":2840,"南":6511,"協":4447,"博":1814,"単":3157,"千":2088,"十":1946,"半":2617,"劇":2223,"力":7128," 人":2934,"加":4595," 代":2394,"務":6021,"動":13267," 休":1652,"分":13117," 丼":3391,"初":7471,"別":4976,"利":4213,"制":7969,"則":1331,"前":8950,"創":2681,"崎":2237," 大":1534,"工":4508,"州":9754,"川":7871,"山":10018,"属":5936,"展":2319,"屋":2375,"局":4799,"少":2464,"小":30514,"導":2344,"将":1891,"専":2793,"島":7916,"岩":1342,"当":6576,"形":7101,"役":2410,"影":1589,"式":10389,"引":1979,"張":1336,"強":2073,"応":3170,"念":2270,"律":2571,"後":10708,"得":2357,"年":104829,"平":6377,"帝":2972,"布":1306,"常":2950,"師":2527,"建":3787,"店":2005,"庁":1327,"広":4783,"度":5771,"座":1332,"大":35199,"央":2180,"天":5593,"太":2851,"変":3794,"外":5603,"多":7893,"女":5796,"始":3033,"委":1623,"場":13501,"報":4192,"境":2223,"売":8204,"声":8316,"子":14064,"存":4270,"学":37448,"安":3733,"定":14058,"実":7367,"宗":1706,"宮":21279,"客":1436,"家":48873,"富":1327,"察":1679,"対":8681," 回":2984,"曲":6533,"書":6577,"曜":3792,"昭":3661,"映":4870,"星":2861,"時":14954,"果":2006,"査":2002,"木":3174,"本":36912,"朝":4495,"期":7726,"月":55829,"有":6525,"最":8063,"松":2561,"東":13912,"来":4168,"条":3471,"村":4202,"料":2840,"文":11450,"於":1575,"施":4063,"旅":1301,"族":3755,"旧":3190,"日":77379,"放":12785,"改":2906,"支":3198,"教":10820,"数":8554,"整":1308,"技":5374,"投":1259,"所":11171,"手":10767," 年":95958,"戦":13450,"戸":3149,"成":12592,"提":2781,"推":1511,"接":2250,"挙":1713,"持":4610,"指":8229,"情":3302,"急":1371,"感":1443,"愛":3173,"港":2061,"済":2489,"清":1263,"湾":1437,"源":1888,"演":3153,"気":4829,"民":7826,"水":5727,"江":2655,"決":2408,"河":1989,"治":7454,"波":1773,"派":2998,"活":6032,"流":4367,"浜":1603,"消":1291,"深":1258,"機":12320," 時":1629,"権":6128,"横":1498,"標":2070," 月":53298,"武":2753,"止":2016,"正":6725,"死":5205,"歌":3328,"次":4580,"欧":1451,"母":1932,"毎":2982,"比":1391,"殺":5325,"校":9373,"株":3423,"業":15026,"楽":6757,"植":1547,"検":1870,"構":5270," 日":47110,"様":2976,"石":2879,"知":6637,"県":18156,"省":2816,"着":1339,"皇":3000,"的":19048,"目":10668,"直":2224,"白":1686,"発":20771,"登":4069,"病":1859,"症":1381,"町":8545,"画":10139,"田":7594,"由":2916,"用":19757,"産":5875,"生":16324,"番":8301,"略":5228,"界":7422,"環":2235,"理":11169,"球":4459,"現":13270,"王":7473,"独":3082,"状":2926,"物":13688,"特":7417,"照":1250,"然":1440,"無":2728,"点":4161,"火":1398,"置":8277,"美":2548,"群":2041,"義":5850,"習":1334,"素":3072,"約":3788,"紀":4911,"級":2880,"統":4477,"経":5810,"組":11006,"結":4672,"続":3548,"編":3445,"総":5306,"線":8126,"米":2827,"系":6844,"等":8344,"策":1400,"第":13434,"算":1686,"積":1366,"究":4684,"空":6782,"程":1685,"種":6006,"立":12201,"競":4491," 番":1554,"神":6975,"社":17675,"示":2628,"移":1880,"称":14999,"科":5836,"福":3372,"要":4830,"規":3792,"視":1426,"親":1689,"観":2376,"解":3594,"西":7073,"補":1253,"裁":1509,"製":5884,"衛":3099,"術":5314,"行":22080,"衆":5487,"表":10987,"警":1923,"議":4992,"護":2333,"調":2776,"読":1601,"説":4782,"語":21957,"認":2658,"論":4375,"設":10824,"記":10247,"計":5326,"言":6896,"話":3787,"評":1446,"路":5085,"超":1256,"足":1256,"起":2835,"賞":3546,"資":3066,"質":3136,"象":3259,"谷":1834,"近":4077,"農":1628,"載":2905,"転":2711,"車":9144,"身":6802,"自":10797,"者":18526,"聞":12087,"聖":2242,"聯":10502,"育":4279,"能":5243,"華":1394,"般":3860,"航":2825,"興":1400,"艦":4184,"色":1827,"英":9312,"信ああ":2064,"葉":3002,"著":2162,"風":1713,"食":1713,"領":3103,"項":2228,"類":3952,"馬":3385,"駅":2902,"館":2783,"高":11470,"連":10914,"造":5408,"進":3210,"送":12678,"通":11192,"速":1858,"遺":1795,"選":8114,"過":1669,"運":6072,"達":1651,"郡":4845,"部":17131,"都":8516,"郎":2090,"配":2845,"金":5425,"野":8402,"量":2343,"重":4451,"鉄":5783,"銀":1566,"録":3006,"関":12598,"間":11298,"開":13989,"門":4039,"降":1592,"限":2003,"院":3827,"陸":3351,"阪":2944,"防":2270,"離":1558,"電":7996,"隊":3468,"際":6340,"青":1618,"非":1816,"面":3571,"響":1740,"音":7019,"始ああ":1913,"殺ああ":2295,")":148109,"(":149030,":":12250,"=":3484,"~":3083,"交味あ":2597,"使ああ":9134,"使アア":1628," (":4296," )":9852,"価ああ":5083,"行ああ":10289,"組ああ":2099,"表ああ":2782,"一種あ":1572,"類ああ":1338,"手ああ":1245,"場合あ":2928,"世界大":1450,"大ああ":1950,"多あ。":1570,"多ああ":2150,"構成あ":1846,"倫ああ":2719,"成ああ":4644,"部ああ":1870,"戦ああ":1623,"続ああ":1414,"等学校":2276,"売ああ":5009,"声ああ":1329,"通ああ":1426,"送ああ":5822,"生ああ":4620,"ア語:":1902,"造ああ":1673,"ア連休":1985,"世紀あ":1284,"用アア":1501,"作品あ":2764,"ア選手":1921,"用ああ":9493,"佐売あ":1480,"。 ":13103,"、 ":31983,"』 ":1427,"不聯あ":1405,"あ ":66354,"地域あ":1988,"ア ":32799,"催ああ":3126,"あ連":2419,"あ通":3320,"あ選":1976,"あ運":3219,"あ都":1640,"あ金":1288,"あ重":1969,"ア語":8308,"あ電":1906,"あ際":1362,"あ音":2155,"あ関":6111,"あ開":8435,"あ間":2317,"あ認":1333,"あ記":3845,"あ設":5283,"ア系":1829,"あ製":2477,"あ表":4499,"あ行":10711,"あ規":1437,"あ言":4181,"あ解":1569,"あ西":1435,"あ近":1371,"あ起":2247," 『ア":2743,"ア連":2828,"あ高":3598,"ア郡":1249,"ア選":2094,"ア教":1868,"ア放":1489,"ア文":1270,"あ無":1446,"あ特":3612,"あ物":2014,"あ独":1271,"ア朝":1242,"、第":1727,"あ現":2680,"あ王":1453,"あ用":4828,"あ生":5118,"あ略":1610,"あ登":3137,"あ発":11033,"あ目":3554,"あ知":3796,"あ構":3048,"ア州":5899,"、特":1256,"あ機":2062,"あ毎":1746,"あ殺":2375,"あ死":2199,"あ正":1615,"。現":1855,"あ水":1343,"、現":2451,"あ活":3109,"あ流":1320,"ア番":2807,"あ自":4181,"あ聞":2418,"あ聯":1675,"あ者":1261,"))あ":1638,"あ著":1250,"ア社":1403,"あ移":1304,"あ称":1389,"あ神":1585,"あ社":1649," あ ":1635,"あ第":4428,"、英":3619,"あ立":1390,"、自":1350,"あ続":1310,"あ総":2605,"あ結":2983,"あ経":2414,"あ組":1899,"あ統":1376,"ア王":2616,"あ置":2229,"あ国":6666,"あ基":3151,"あ地":4866,"あ呼":9152,"、小":1990,"あ含":2942,"あ同":5129,"あ名":6504,"あ合":3012,"あ各":1270,"あ加":1535,"あ務":1340,"あ動":1382,"あ初":2158,"あ分":4627,"あ制":2425,"あ利":1785,"あ前":1691,"あ創":1816,"あ原":2367,"あ参":1928,"あ取":2481,"あ受":2846,"あ単":1416,"あ南":1637,"あ倫":4845,"あ内":1663,"あ入":1803,"あ全":3374,"あ公":3342,"あ共":2253,"あ元":1846,"、大":2783,"あ乱":3962,"あ上":3029,"あ下":1378,"あ不":14639,"あ丈":32792,"あ三":1486,"あ丞":19819,"あ世":2331,"あ中":9465,"あ丼":18051,"あ主":4771,"あ他":2009,"あ付":1399,"あ代":2198,"あ企":1331,"あ事":5294,"あ交":5865,"あ人":7782,"あ作":6813,"あ何":3230,"あ佐":13423,"あ位":2504,"あ使":13500,"あ伝":1495,"あ会":1351,"、国":2207,"あ休":17591,"あ信":2085,"あ保":1720,"あ価":8783,"、同":2022,"あ一":16360,"あ丁":17347,"あ東":3287,"あ本":3519,"あ有":3083,"あ書":1761,"あ最":4488,"あ時":2327,"あ映":1487,"あ日":8737,"あ文":2466,"あ教":1976,"あ数":1984,"あ支":1653,"あ放":6620,"あ改":1615,"ア大":3211,"あ提":2042,"あ指":5951,"ア国":2219,"あ持":3443,"ア地":1445,"あ所":2803,"あ戦":3270,"あ成":1753,"あ手":1859,"。本":2143,"あ形":2406,"あ当":1618,"、本":1964,"ア合":4791,"、東":2694,"あ後":3036,"。日":2366,"、日":7073,"あ建":2122,"あ広":2188,"ア共":1242,"ア公":1517,"あ属":1897,"あ小":6851,"ア使":2442,"あ学":2960,"あ存":2964,"あ子":1693,"あ家":4670,"あ宮":4858,"あ定":2671,"あ実":3992,"あ対":5932,"ア事":1661,"ア人":2775,"ア休":2911,"ア佐":2436,"ア作":1731,"あ外":1294,"あ多":5332,"あ変":2304,"あ大":8249,"あ天":1336,"あ女":1848,"ア丁":1757,"ア丞":2294,"ア不":5250,"ア丈":4131,"ア丼":2746,"あ始":1317,"あ場":4272,"あ声":3268,"あア":156757,"ああ":1909463,"あ。":138896,"あ『":3993,"あ』":1518,"あ「":15609,"あ」":3632,"第二次":1255,"あ、":216446,"、あ":21490,"。「":2184,"、『":1828,"、「":4201,"。ア":19354,"、ア":64525,"々あ":2319,"。あ":13106,"『あ":1353,"」あ":19753,"「あ":2018,"」、":1525,"」。":1249,"『ア":7054,"』あ":7906,"「ア":7862,"、使":1676,"、価":1248,"、休":3597,"、佐":2726,"。休":1576,"、人":2001,"、丞":3869,"。丁":1388,"、不":3337,"、丁":3572,"、丈":7156,"、一":1931,"。丼":1300,"。丞":1247,"、主":1393,"、丼":4218,"、中":2406,"。丈":2918,"アア":2154092,"アあ":155866,"ア」":6771,"ア『":1584,"ア』":6210,"ア。":10635,"ア、":16488,"ア)":16388,"ア(":33336,"ア=":3214,"場ああ":3434,"入ああ":1917,"現在あ":4587,"、)":1834,"』(":8152,"」(":2340,"あ)":37153,"あ(":5782,"会社あ":2364," ああ":28860," あ、":7361," あア":3298," アア":57211,"次世界":1361,"日)あ":7258,"不身あ":2968,"日( ":2303,"一部あ":1349,"作曲家":1274,"基ああ":1599,"録ああ":1363,"対ああ":2786,"あ行あ":9249,"分ああ":1730,"設ああ":2171,"使究あ":1369,"記ああ":2072,"ア番組":2746,"位置あ":2308,"株式会":2868,"小ああ":3121,"言ああ":1584,"家ああ":5177,"あ置あ":2118,"あ総称":1606,"所属あ":1341,"主あ":3885,"丼ア":1464,"丼あ":19189,"使用あ":2638,"乱あ":3541,"与あ":1930,"不あ":8122,"丈ア":3173,"下あ":2803,"上あ":5913,"丈あ":27006,"丈、":1500,"丁ア":1890,"丁あ":18102,"一あ":5817,"丞あ":21257,"丕あ":1261,"世あ":2310,"不ア":1421,"中あ":5030,"丞ア":1844,"人あ":9122,"人。":1476,"人ア":1242,"他あ":1896,"付あ":2208,"代あ":6798,"代ア":1373,"争あ":1758,"事あ":9346,"交あ":4806,"不家":3233,"中国":2514,"作あ":7059,"何あ":2412,"体あ":6471,"位あ":2264,"佐あ":15860,"丼休":1592,"丼丞":2055,"丼丈":2151,"丼丁":1684,"丼丼":1418,"中使":3153,"会あ":6508,"丞佐":1535,"不使":1765,"休。":1523,"休あ":23904,"丞丈":2889,"丞丁":2038,"休ア":2507,"丞丞":1742,"不交":1422,"丈休":2694,"丈使":1243,"丈丈":5575,"丈丞":2374,"丈丼":3223,"丁休":1545,"一価":1575,"不不":1538,"丁丞":1826,"丁丈":3406,"丁丼":1794,"丈丁":3527,"信あ":4017,"交味":2982,"全 ":1304,"以下":1516,"、英語":1539,"係あ":1321,"供あ":1476,"価あ":9568,"使あ":19446,"中央":2121,"使ア":1968,"事交":2728,"丞家":1297,"一種":2197,"事業":2439,"二次":1389,"世界":5865,"佐丁":2480,"佐価":1425,"佐佐":1269,"倫あ":5771,"休丈":2771,"不治":2327,"休佐":1324,"休休":1921,"主義":2043,"人物":1891,"企業":2138,"一般":3547,"不聯":2198,"催あ":3689,"前 ":1371,"宮ああ":1909,"作家":1397,"作品":5289,"世紀":2728,"価使":2601,"佐売":2092,"あ目的":2533,"あ発表":1274,"あ第 ":2940,"内ああ":1292,"丞 ":1754,"丈 ":1322,"丁 ":1266,"丼 ":1638,"在あ":8533,"地あ":4409,"国あ":10614,"国ア":2658,"団あ":1724,"あ音楽":1319,"合衆":4737,"問宮":1556,"可能":1378,"名称":3513,"器あ":1492,"使(":1266,"動車":2200,"営あ":2139,"休(":1962,"和国":2518,"会(":2933,"分類":1336,"分野":1390,"多あ":4892,"大あ":3257,"あ開発":3310,"外あ":1908,"声あ":2287,"売あ":5710,"地区":1390,"地域":3263,"学ああ":2317,"国家":3119,"地不":3338,"在位":1282,"場あ":5556,"域あ":3413,"団体":2330,"あ開催":2692,"基あ":1982,"ア語あ":2144,"あ間あ":1859,"あ関あ":3802,"全国":1795,"加あ":2048,"力あ":3213,"共和":2791," 人あ":1389,"共同":1280,"化あ":4450,"和 ":3168,"動あ":4507,"務あ":2872,"前あ":3176,"制あ":1242,"定ああ":5359,"別あ":1751,"初あ":3667,"交通":1522,"代表":2575,"使用":3017,"使理":2476,"京都":3402,"佐県":1437," 世あ":1718,"会議":1333,"分あ":3773,"位置":2501,"ア語 ":2251,"使究":4609,"元あ":1489,"一部":2055,"不身":4258,"作曲":2528,"会社":6167,"内あ":4013,"語ああ":3397,"休画":2321,"入あ":2695,"全あ":1427,"品。":1238,"品あ":4556,"丈(":2884,"丁(":1789,"参加":1290,"員あ":2076,"丞(":2210,"丼(":1739,"含あ":3008,"協会":2160,"味あ":2957,"呼あ":8214,"化学":1687,"利用":1828," 世紀":2518,"制度":1248,"及あ":4790,"受あ":2030,"取あ":2208,"号あ":3953,"名あ":9335,"同あ":1953,"合あ":7017,"向あ":2543,"制作":1942,"区あ":2907,"家路":2125,"度あ":3326,"年ア":1913,"年あ":20990,"年、":1688,"広あ":1712,"帝国":1623,"形あ":1829,"年代":2422,"島県":1368,"専門":1642,"当あ":2190,"あ運営":1448,"小説":1987,"対象":1561,"式あ":3352,"川あ":1268,"州あ":2489,"州ア":2755,"常あ":1310,"小治":1760,"学者":3831," 年 ":49544,"成 ":1708,"当時":1631,"応あ":1648,"念あ":1283,"得あ":1713,"後あ":4649,"式会":2869,"平成":1905,"大学":6830,"あ設置":1507,"大戦":1559,"委員":1355,"女小":1372,"女子":1397,"子ア":1884,"子あ":3923,"あ起あ":1522,"場合":3557,"始あ":2180,"あ製造":1274,"子ああ":1318,"国際":4256,"子アア":1695,"あ設立":2139,"家 ":3003,"大会":3699,"物ああ":1862,"大阪":2112,"存在":3074,"家人":2908,"あ記休":1293,"年 ":51327,"島あ":1817,"あ表記":1710,"学校":6834,"家律":2208,"実施":1278,"天皇":1405,"学あ":6550,"定あ":7748,"宮あ":4230,"家ア":2140,"家、":2464,"家。":3423,"家あ":12300,"業ああ":1572,"あ言あ":2286,"小あ":7489,"対あ":3405,"属あ":3978,"局あ":2353,"東不":1238,"東京":5181,"本名":1618,"放送":10914,"映画":3085,"曲家":1330,"施設":1651,"曜日":2035,"教育":2974,"校あ":3206," 日 ":12276,"最初":1340,"楽あ":1408,"時間":1682,"業あ":4045," 月 ":44960," 日あ":15057,"株式":3061," 年(":5941," 年)":9287,"時あ":3451,"教会":1452,"教休":1250,"文使":1758,"文化":2253,"族あ":1560,"日あ":17278,"化ああ":2739,"昭和":3444,"時代":5531,"来あ":2732,"立ああ":3753,"あ放送":5748,"あ東京":1323,"果あ":1257,"日本":21674,"曲あ":2900,"あ日本":7358,"文学":1588,"月あ":5637,"有あ":1810,"最あ":1431,"書あ":2853,"選手権":2431,"本ア":2290,"本あ":10875,"期あ":4322,"日 ":12954,"手権":2431,"、東京":1480,"所属":2119,"月 ":45103,"数あ":3827,"あ戦あ":1312,"教あ":1272,"技術":2097,"務ああ":1643,"年(":6245,"年)":9425,"提供":1355,"指あ。":1899," 年あ":19179," 年、":1517," 年ア":1448,"家(":2915,"成あ":6012,"情報":2756,"戦あ":3394,"あ持あ":3321,"手あ":2666,"あ指あ":3829,"所あ":3296,"持ああ":1475,"ア合衆":4629,"戦争":2199," 年代":2242,"指あ":4364,"持あ":3953,"あ属あ":1826," 日)":9957," 日(":2924,"あ存在":2820,"点あ":3120,"ア不身":1594,"あ家律":1357,"江戸":1490,"あ広あ":1286,"活丞":1276,"活動":3023,"校(":1251,"動ああ":2193,"、日本":6502,"。日本":2210,"あ国家":1342,"あ地域":1419,"次世":1446,"構造":1383,"毎佐":1485,"日)":10122,"日(":3117,"加ああ":1583,"あ場合":3488,"派あ":1250,"流あ":1921,"正式":1441,"あ多あ":3964,"あ大あ":1447,"機能":1261,"機関":2979,"権あ":1708,"あ国際":1794,"機あ":2329,"共和国":2495," 月あ":5223,"あ対あ":3165,"殺あ":2872,"構成":2233,"止あ":1308,"称あ":9039,"称。":1304,"種あ":3083,"社会":2785,"発表":1469,"種ああ":1425,"第 ":9134,"目的":2889,"社あ":6402,"示あ":1874,"社ア":1383,"あ用あ":3710,"あ生あ":1801,"県家":2001,"発生":1334,"知あ":3526,"番組":5337,"発売":5037,"登場":2565,"略称":3022,"用語":1380,"目あ":4296,"あ知あ":3198,"あ登場":2375,"県あ":2733,"あ発売":3969,"界大":1455,"発あ":3815,"的あ":13077,"環境":1290,"理学":1713,"町あ":2460,"画あ":2581,"現在":6030,"用あ":12045,"用ア":1581,"自動車":2063,"界あ":1868,"独立":1445,"生あ":6030,"産あ":1671,"現ア":1355,"現あ":1628,"理あ":2327,"運営あ":1562,"特別":1260,"大学あ":1847,"衆国あ":2595,"ア放送":1421,"特あ":1759,"物あ":5383,"あ活丞":1241,"般あ":1349,"あ活動":1498,"艦あ":1387,"能あ":2381,"、現在":1773,"英 ":1379,"。現在":1490,"義あ":2621,"大会あ":2008,"総称":1629,"者。":2220,"者、":1458,"者あ":10349,"者ア":1305,"聯あ":3977,"聞あ":3281,"総合":1331,"置あ":6978,"経済":1991,"線あ":3253,"続あ":2112,"組佐":2309,"競馬":1452,"系使":1769,"等学":2286,"競技":1586,"組あ":3887,"組。":1569,"ア州あ":1713,"結あ":1657,"ア州ア":2558,"紀あ":1438,"系あ":2247,"素あ":1485,"あ殺あ":1580,"約 ":1619,"第二":1891,"立あ":5248,"究あ":1391,"空あ":1393,"等あ":3921,"あ構成":1941,"科学":1994,"象あ":2019,"設立":2500,"設置":1708,"賞あ":1603,"製造":1966,"計画":1610,"あ。 ":6469,"あ、 ":22576,"ああ ":39188,"論あ":1674,"記休":1374,"表記":2830,"身あ":4282,"車あ":2896,"路あ":1577,"象ああ":1255,"開ああ":1704,"起あ":1745,"設計":1269,"言語":1842,"質あ":1680,"郡あ":1437,"部あ":6774,"関ああ":4613,"。アア":19255,"日本ア":2092,"日本あ":9641,"、アア":64286,"路線":1359,"設立あ":2048,"送あ":7525,"通あ":2755,"造あ":2715,"連あ":1360,"進あ":1274,"『アア":7047,"近あ":1367,"』ああ":2158,"「アア":7790,"」ああ":7507,"間ああ":1397,"『ああ":1282,"載あ":2293,"称ああ":4464,"「ああ":1846,"通称":1887,"選手":4697,"、ああ":21181,"都事":2774,"。ああ":12750,"野あ":1575,"運営":1939,"運動":1644,"語:":3289,"連合":1965,"通信":1344,"連休":3247,"自治":1375,"(昭和":2128,"線(":1472,"航空":2161,"営ああ":1448,"自動":2405,"あ、)":1236,"ああ)":35187,"ああ(":4198,"英語":4067,"ああ自":1831,"ああ行":2651,"ああ表":1577,"ああ記":1483,"ああ言":2300,"ああ設":1654,"ああ開":2170,"あ、同":1707,"ああ丞":6562,"ああ一":3244,"ああ丁":5744,"ああ不":4855,"ああ丈":10564,"ああ丼":5871,"ああ主":1236,"ああ中":2062,"ああ価":2294,"ああ使":4600,"ああ作":2558,"ああ佐":5045,"ああ倫":1458,"ああ交":2092,"ああ人":3676,"ああ事":3171,"ああ休":6525,"あ、国":1738,"ああ他":1319,"あ、丁":2287,"あ、一":1711,"あ、不":2123,"あ、丈":4708,"あ、丞":2433,"あ、佐":1739,"あ、休":2616,"あ、人":1506,"あ、主":1290,"あ、丼":2884,"あ、中":1791,"あ。丈":1605,"ああ大":3222,"ああ多":2733,"ああ学":1481,"ああ家":2341,"ああ宮":1422,"ああ小":2540,"あ。日":1290,"あ、日":6150,"あ、東":1989,"ああ後":1790,"術あ":1484,"ああ分":1352,"ああ全":1444,"ああ公":1285,"あ、大":2131,"行あ":14000,"ああ名":2507,"ああ同":2111,"あ、小":1427,"ああ呼":3646,"ああ地":2280,"ああ国":2987,"ああ場":2933,"表あ":3761,"あ、現":2057,"ああ指":1536,"ああ戦":1238,"ああ最":1780,"ああ日":4356,"ああ放":1785,"あ、英":2463,"ああ第":1398,"ああ特":1792,"ああ現":1367,"あ、第":1316,"ああ目":1662,"ああ発":3602,"ああ知":2352,"ああ用":1755,"ああ生":1691,"設あ":3061,"記あ":3151,"言あ":3035,"製作":1647,"説あ":2100,"語あ":8268,"話あ":1899,"ああ。":128417,"ああ、":114147,"ああ『":1790,"ああ「":7669,"ああ」":2702,"行不":1963,"語 ":3303,"あ『ア":1268,"あ「ア":4056,"あ」あ":2473,"あ、ア":42684,"あ。ア":9708,"あ。あ":10112,"あ、あ":15664,"あ、「":2860,"あアア":151645,"あアあ":3890,"衆国":4735,"要あ":2289,"ああア":66491,"あああ":1201680,"時代あ":3504,"発表あ":1306,"高等":2629,"昭和 ":2986,"社アア":1256,"社ああ":2086,"丞(あ":1427,"本あア":1472,"本ああ":2442,"州アア":2688,"品ああ":1797,"本アア":2199,"分野あ":1258,"録あ":1741,"書ああ":1478,"鉄家":3515,"野球":2343,"設置あ":1442,"月ああ":2434,"間あ":6535,"開あ":1967,"関あ":5450,"有ああ":1305,"隊あ":1519,"関係":1758,"際あ":1785,"開催":3071,"面あ":1724,"項あ":1394,"開発":5246,"類あ":2486,"目的あ":2661,"電気":1300,"駅あ":1276,"音楽":3430,"曲ああ":1651,"高あ":1592,"属ああ":2285,"あ事あ":1426,"あ丼あ":5850,"あ丁あ":4319,"あ一あ":4453,"あ丈あ":8854,"あ上あ":1542,"あ不あ":2773,"あ中あ":2093,"あ丞あ":5276,"あ使あ":4014,"あ価あ":2254,"あ佐あ":3168,"あ作あ":1416,"利用あ":1328,"あ丈丁":1285,"あ丈丈":1864,"あ休あ":5479,"あ中使":2747,"あ他あ":1258,"あ丁丈":1300,"点ああ":1562,"あ利用":1457,"あ含あ":2902,"あ名あ":1668,"あ呼あ":8161,"記休あ":1285,"あ名称":2151,"高等学":2175,"丈(あ":1828,"存在あ":2940,"あ基あ":1594,"東京都":2018,"あ交味":2831,"呼ああ":6929,"あ不治":1330,"あ世界":2001,"あ一種":2121,"あ作品":1789,"あ価使":1553,"あ一般":1707,"あ人物":1374,"味ああ":1844,"あ一部":1521,"あ作曲":1773,"あ休画":1355,"あ代表":1400,"あ使用":2721,"あ位置":2381,"あ使究":1788,"あ務あ":1339,"表記あ":2011,"あ取あ":1899,"あ受あ":1927,"アあ作":1244,"アあ丼":1341,"アあ丁":1432,"アあ一":1999,"アあ不":1243,"アあ丈":2548,"アあ丞":1512,"アア、":16393,"アア。":10631,"アア」":6705,"アア』":6191,"アア『":1500,"アアア":1729078,"アアあ":151645,"アあア":19640,"アああ":55529,"アあ、":4837,"ア』あ":2640,"ア。ア":1791,"ア」あ":4452,"ア、ア":8332,"アあ開":1642,"アア語":8298,"アア系":1825,"アア社":1262,"アア番":2788,"受ああ":1416,"アア王":2579,"アア教":1744,"アア放":1421,"アア州":5894,"アア大":2784,"アア国":1719,"放送あ":6907,"アア合":4757,"アア公":1422,"アア休":2327,"アア事":1564,"アア人":2578,"アア作":1525,"アア佐":1706,"アア使":2109,"アア丼":1996,"アア丞":1556,"アア丈":2749,"アア不":4656,"アア ":31584,"ア。 ":1580,"アあ ":4215,"名ああ":2758,"最初あ":1276,"学校あ":2147,"番組。":1557,"番組あ":2493,"向ああ":1587,"知ああ":3322,"校ああ":1456,"含ああ":1792,"アア連":2243,"アア選":2079,"号ああ":1619,"ア』(":1980,"アア=":3088,"アア)":16080,"アア(":33305,"合ああ":3958,"=アア":3113,"年代あ":1428,"開発あ":3113,"登場あ":2088,"略称あ":1402,"発売あ":3963,")あ ":2592,"義ああ":1444,"機関あ":1580,"あ)あ":31163,"あ(あ":1574,"あ)、":1476,"あ)。":2164,")あア":7291,")ああ":30137,")あ、":47800,"(ああ":48329,")、ア":1251,"(アア":20979,"ア=ア":3175,"ア(ア":4269,"ア)あ":13245,"ア(あ":3617,"団体あ":1276,"ア( ":1748,"置ああ":4412,"合衆国":4735,"丁アア":1421,"上ああ":2005,"丈ああ":11397,"丈あア":1286,"丁ああ":8466,"一ああ":3370,"一あ。":1355,"平成 ":1642,"年) ":3918,"年( ":2247,"年)あ":3507,"式会社":2868,"国あア":1384,"国ああ":2054,"国アア":2374,"年(昭":1867,"載ああ":2044,"地ああ":1883,"者ああ":4317,"丼ああ":8612,"在ああ":4158,"中ああ":1478,"丞アア":1255,"目ああ":1258,"丞ああ":10116,"不ああ":3457,"車ああ":1306,"丈アア":2349,"与ああ":1754,"界大戦":1365,"名称あ":2696,"事ああ":3528,"起ああ":1658,"開催あ":2711,"年あア":3603,"年ああ":7456,"年アア":1814,"乱ああ":1859,"人ああ":3885,"活動あ":2178,"的ああ":3866,"発ああ":2687,"交ああ":1899,"組佐あ":1397,"会(あ":1757,"))":1846,"()":1332,"丈丁あ":1330,"丈丈あ":1676,"休アア":1927," )あ":8664,"会ああ":2436,"休ああ":11780,"(昭":2135,"代ああ":2253,"代アア":1273,"(現":2804,"(英":2015,"付ああ":1616,"=ア":3230,")ア":1792,"(ア":21128,")あ":114771,")」":1247,"(あ":48841,")。":7779,")、":5848,"対象あ":1241,"( ":12526,") ":6625,": ":1725,"~ ":2043,"作ああ":4059,"何ああ":1301,"佐ああ":8185,"体ああ":2330,"当ああ":1803,"日ああ":8218,"日あア":2005,"、)あ":1731,"』(あ":3555,"』(ア":2402,"中使あ":2391},"n_words":[10754229,8353071,5774482],"name":"ja"} -------------------------------------------------------------------------------- /langdetect/profiles/pa: -------------------------------------------------------------------------------- 1 | {"freq":{"ਾਜ਼":17,"ਾਜਧ":12,"ਾਜਾ":13,"ੀਲ ":18,"D":18,"E":20,"F":29,"G":23,"A":52,"B":35,"C":59,"L":38,"M":47,"N":37,"O":37,"H":31,"I":60,"U":14,"T":45,"W":18,"P":47,"S":55,"R":24,"f":103,"g":125,"d":221,"e":655,"b":81,"c":194,"ੀਰ ":33,"a":561,"n":492,"o":398,"l":256,"m":260,"j":16,"k":37,"h":252,"i":537,"w":71,"v":42,"u":236,"t":467,"s":328,"r":400,"p":124,"z":13,"y":106,"x":19,"ਿਆ।":22,"ਾਤਰ":17,"ਾਤਾ":27,"ਾਤੀ":12,"ਿਆਣ":31,"ਿਆਦ":20,"ਿਆਨ":47,"ਿਆਲ":13,"ਿਆਰ":28,"ਿਆਵ":14,"ਿਆਸ":12,"ਾਣਕ":12,"ਾਣੂ":104,"ਿਆਂ":76,"ਾਣੀ":40,"ਾਣਿ":27,"ਾਣਾ":22,"ਾਨੇ":13,"ਾਨਾ":31,"ਿਊਟ":14,"ਾਨੀ":48,"੩੧ ":18," । ":128,"੩੦ ":23,"ਾਦੀ":22,"ਾਦਾ":19,"ਾਨਕ":26,"ੁਝ ":21,"।ਇਸ":14,"ium":27,"is ":22,"ion":36,"ੀਤ ":15,"਼੍ਰ":22,"੦੦੮":24,"ੀਪ ":375,"ੀਨ ":57,"ੀਮ ":51,"ਟੀ ":57,"ਜੋਂ":19,"ਾਗਰ":20,"ਾਕੀ":370,"ਾਕਿ":34,"ਟਾ ":22,"੨੩ ":13," m":24," o":57," h":17," i":57," d":21," e":15," f":30," a":133," b":21," c":34," t":120," w":40," p":41," s":54,"੨੨ ":14," r":16,"੨੧ ":14,"੨੫ ":14,"਼ਸੀ":29," H":25," I":54," N":32," O":31," L":21," M":43,"੨੪ ":13," B":32," C":54," A":46," F":17," G":21," D":15," E":17,"ਿਰ ":73," S":47," R":22,"਼ਹਿ":50," P":44," W":15," T":36,"ਜੀਵ":20,"੨੭ ":13,"੨੬ ":13,"ਿਲ ":25,"ਜੁਲ":54,"ਜਿੰ":30,"੨੯ ":16,"ਜੂਨ":35,"਼ਾਂ":36,"਼ਾਹ":32,"੨੮ ":15,"਼ਿਆ":14,"ੀਕ ":34,"ਜ਼ਮ":12,"਼ਿਲ":75,"ਜ਼ਾ":33,"ਜ਼ਿ":88,"ਾਂਦ":190,"ਜਾਂ":231,"ਜ਼ੀ":72,"ਾਂਤ":20,"ਾਂਸ":16,"ਜਾਬ":125,"਼ੁਰ":27,"ਜਾਤ":13,"ਜਾਣ":55,"ਜਾਦ":21,"ਜਿਮ":22,"ਜਿਲ":12,"ਜੀਅ":19,"ਿਸ ":72,"ਾਅਦ":30,"ਜਿਸ":70,"ਜਿਹ":19,"ਾਇਆ":59,"ਾਇਣ":62,"੨੦ ":14,"ਾਇਲ":17,"ਟਰ ":67,"ਾਈਟ":14,"ਾਈਡ":27,"ਾਉਂ":14,"ਾਈਨ":29,"ਾਉਣ":23,"਼ਬਦ":26,"ੀਂ ":52,"ਿਤ ":82,"ਟਨ ":17,"ਚੰਦ":21,"੧੮ ":12,"ਛੋਟ":13,"੧੯ ":14,"ੀਆ ":53,"ਿਨ ":1101,"਼ਰਵ":39,"ਜਨਮ":32,"ਜਨਵ":39,"ਜਨਸ":13,"ਜਦੋ":15,"ਜਧਾ":12,"ਿਬ ":69,"ੀਤਾ":41,"Co":14,"ੀਤੀ":42,"ੁਆਰ":46," In":28," Ma":13,"he ":69,"ਾੜੀ":12,"ੀਟਰ":38,"Ma":13,"Ol":12," Co":14,"In":29,"L ":13,"।":1840,"ੀਕਲ":22,"ੂਪ ":25,"ੀਕਨ":13,"ਿਲੀ":21,"ਿਲਾ":83,"Th":15,"ਿਲ੍":13,"ਿਲੋ":46,"ਿਲੇ":38,"ੂਨ ":35,"ਿਲਦ":12,"ਵ":3164,"ਲ":4004,"ਰ":5772,"ਿ":5178,"ਾ":10004,"਼":1111,"ਹ":4455,"ਸ":5034,"ਦ":5926,"ਧ":297,"ਤ":3377,"ਥ":271,"ਢ":42,"ਣ":777,"ਠ":69,"ਡ":853,"ਮ":2207,"ਯ":188,"ਬ":2031,"ਭ":430,"ਪ":1926,"ਫ":303,"ਨ":4410,"ਕ":3798,"ਗ":2047,"ਖ":660,"ਐ":62,"ਓ":66,"ਝ":61,"ਜ":2041,"ਟ":625,"ਘ":134,"ਛ":70,"ਚ":1522,"ਅ":1321,"ਆ":1270,"ਇ":2053,"ਂ":2880,"ਏ":233,"ਈ":490,"ਉ":422,"ਊ":48,"ੱ":1924,"ੰ":2555," a ":18,"੦":200,"੧":273,"੪":62,"ਿਮਨ":20,"੫":77,"੨":239,"੩":93,"੮":92,"੯":96,"੬":69,"੭":58,"੨੦੦":38,"ੜ":191,"ੀ":4636,"ੁ":1825,"ੂ":1115,"ੇ":3761,"ੈ":2081,"ੋ":1676,"ੌ":206,"੍":1046,"ੂਲ ":22,"ੀਜਿ":21," Ol":12,"ੂਰ ":25,"b ":18,"ਿਹਾ":91,"a ":87,"ਿਸੇ":29,"ਿਸ਼":57,"ਿਸਾ":23," Th":15,"ਿਸਤ":34,"ਚੌਂ":12,"ਚੋਂ":38,"ਿਵੇ":63,"ਿਵਾ":13,"ੀਕਾ":45,"i ":27,"ਿਟੀ":38,"ge":13," in":40,"ic ":12,"fi":14," is":12,"fo":16,"ਚਾਈ":23,"he":95,"ha":27,"gh":19,"go":13,"g ":34,"ea":30,"ਚਾਰ":34,"ec":17,"ਚਾਲ":16,"ed":45,"de":37,"di":36,"ia ":20,"ev":13,"h ":33,"Ind":16,"ee":14,"el":41,"ei":12,"en":68,"em":14,"et":19,"ੀਆਂ":178,"es":53,"er":114,"ੀਅਤ":34,"ca":15,"e ":169,"ਚੀਨ":33,"be":16,"da":20,"f ":44," of":43,"ct":18,"cs":14,"co":29,"ck":12,"ch":22,"ce":32,"c ":16,"ics":14,"d ":98,"at":58,"as":29,"ar":52,"al":54,"ai":16,"am":68,"an":97,"ac":18,"ad":16,"ab":12,"ਿਨਾ":26,"nt":47,"ns":26," am":51," an":24,"ਿਨੇ":13,"ੈ। ":755,"of":44,"om":42,"on":91,"ol":28," ਅ":1094,"os":19," ਇ":1749," ਆ":274,"ou":18,"or":54," ਏ":119,"r ":72," ਉ":313,"ow":13," ਈ":30," ਕ":1460,"pe":18," ਖ":182," ਗ":918,"ਿਨ੍":12," ਐ":59," ਓ":47," ਜ":1208," ਝ":16," ਟ":112,"po":12," ਘ":42,"pi":19," ਚ":259," ਛ":39,"lo":16,"ਜੋ ":82,"ll":20,"igh":12,"ly":15,"o ":26,"ma":17,"mb":17,"me":42,"mi":19,"mp":27,"mu":48,"na":37,"nc":21,"nd":68,"ne":28,"ng":50,"ni":32,"ਿਤਾ":77,"ਿਤੀ":12," ।":190,"ੀਅਮ":40,"m ":58,"ੀਅਨ":27,"ਿਥਿ":27,"ine":16,"ing":33,"li":35,"le":33,"ld":21,"la":31,"n ":137," co":22,"ht":18,"hu":17,"hi":29,"ho":16,"id":13,"ic":60,"ia":36,"ig":20,"in ":38,"ie":22,"k ":17,"ir":14,"is":39,"it":46,"iu":27,"il":21,"in":129,"io":41,"l ":48,"ਾਰਕ":16,"ਾਰਚ":41,"ਾਰਤ":101,"ਾਰਨ":27,"wo":13,"ਾਮਿ":15,"y ":61,"wa":12,"ve":29,"ਾਰੀ":54,"ur":21,"us":14,"ਾਰਾ":51,"ut":16,"um":56,"un":15,"ty":14,"ਾਰੇ":39,"ua":16,"to":25,"ts":12,"tr":30,"te":65,"ti":67,"ਾਬਕ":377,"th":116,"ta":28,"st":52,"se":26,"sh":12,"si":30," ੨":195," ੩":56," ੪":26," ੫":29,"u ":55,"ਚਰਲ":20," ੬":17," ੭":14," ੮":15," ੯":19,"rs":22,"rt":24,"ry":21," ੧":210,"ro":42,"ri":64,"ਚਲਾ":12,"re":47,"rd":17,"ੁਤ ":26,"ra":24,"t ":72," ਹ":2872," ਸ":2699," ਵ":2408,"ਾਬਲ":23,"ht ":12," ਰ":498," ਲ":715," ਬ":868," ਭ":339,"s ":155,"ੁਣ ":18," ਮ":1134," ਯ":133," ਨ":1023," ਪ":1056," ਫ":184," ਤ":951," ਥ":21,"ਜੇ ":27," ਦ":4245," ਧ":86," ਡ":55,"ਾਬੀ":48," ਢ":16,"pr":12,"ਾਬਾ":23,"ਿਗਿ":35,"ੁਰ ":13," s ":16,"ਾਹੀ":40,"ਾਹਿ":88,"ਿਚਾ":17,"hum":14,"ਾਸਿ":34,"ਾਸ਼":82,"ਾਸੇ":16,"ਜਾ ":63,"ਚਨਾ":19,"ਾਲੇ":56,"ਾਲਾ":46,"ਾਲੀ":54,"ਜ਼ ":52,"ਜੀ ":155,"ਿਖੇ":23,"ਾਵਾ":47,"ਿਖਾ":19,"ਾਵਲ":13,"ਿਕਾ":29," th":90,"ym":13,"ਿਕਸ":14,"ਹਾਂ":94,"ਹੀਰ":14,"ਹੀਨ":13,"ਹੁਣ":12,"ਹੁਤ":24,"ਹਿਰ":60,"ਹਿਲ":50,"ਹਿਨ":24,"ਹਿਬ":75,"ਹਿਸ":19,"ਹਾਲ":13,"ਹਾਰ":25,"ਹਾਨ":18,"ਹਿਤ":27,"ਹੀਂ":44,"ਹਾਸ":57,"ਹਾਈ":26,"ਜਨ ":22,"਼ਨ ":23,"ਹਰਿ":36,"ਹਨਾ":25,"er ":48,"es ":28,"ਸ੍ਰ":19,"ers":16,"en ":20,"ਸਿੱ":77,"ਸਿੰ":78,"ਸੂਰ":32,"ਸੂਬ":12,"ਸਿਧ":13,"ਸਿਰ":19,"ਹਨ।":438,"ent":21,"ght":14,"ਸੀ।":71,"ਸ਼ਨ":37,"ਸ਼ਤ":17,"ਸ਼ਰ":12,"ਸ਼ਬ":26,"ਸ਼ਹ":65,"ਸ਼ੁ":34,"ਸ਼ੀ":30,"ਸਾਂ":13,"ਸ਼ਿ":19,"ਸ਼ਾ":149,"ਸ਼ਖ":29,"ਸ਼ਟ":21,"ਸਾਨ":18,"ਸਾਰ":46,"ਸਿਕ":33,"ਸਿਖ":14,"ਸਾਲ":1144,"ਸਾਹ":90,"ਸਿਟ":36,"ਸੀਅ":29,"ਸਾਇ":73,"ਸ਼ੇ":16,"ਸ਼੍":23,"ਸਿਆ":13,"ਸਰਕ":13,"ਸਮੇ":23,"ਸਮਾ":24,"ਗੜ੍":19,"ਸਰੀ":12,"ਸਲਾ":21,"ਸਤੰ":39,"ਸਦੇ":13,"ਸਦਾ":17,"ਸਦੀ":15,"ਸਨੂ":15,"ਗੁਰ":167,"ਗੋਬ":26,"for":13,"ਸਟਾ":14,"ਗ੍ਰ":502,"ਸਨ।":37,"ਚਾ ":12,"ਸਤਾ":54,"ਸਥਾ":37,"ਸਥਿ":13,"ਗਰੇ":16,"ਗਰਾ":30,"ਗਰੀ":415,"ਿਚ ":162,"ਾਹ ":17,"ਾਸ ":54,"਼ਟਰ":18,"ਿਕ ":134,"ਕੰਪ":19,"ਾਲ ":1369,"੧੬ ":14,"cti":12,"ਗਸਤ":63,"੧੭ ":15,"ਾਰ ":238,"੧੪ ":14,"੧੫ ":13,"੧੧ ":12,"ਾਮ ":62,"ਗਿਆ":104,"੧੨ ":16,"ਗਾਂ":14,"੧੩ ":12,"਼ਖ਼":29,"ਾਬ ":99,"੧੦ ":14,"ਹੱਦ":12,"ਖਾਂ":21,"ਖ਼ਸ":29,"ਖਿਆ":40,"ਾਨ ":155,"com":13,"ਗਣਿ":17,"ਿਆ ":232,"ਾਦ ":29,"੦੮ ":24,"cs ":14,"ਾਣ ":28,"ਾਤ ":23,"ਖੇਡ":37,"ਖੇਤ":37,"ਾਜ ":52,"ਖੋਂ":14,"ed ":32,"ਕੌਮ":15,"ਾਗ ":23,"ਕ੍ਰ":37,"ਸੰਸ":37,"੦੦ ":21,"ਘਰ ":13,"ਾਈ ":117,"ਸੰਖ":18,"ਕਾਂ":22,"ਸੰਬ":52,"ਸੰਤ":12,"ਕਿਤ":57,"ਾਂ ":1353,"਼ੀ ":81,"ਹੋਏ":48,"ਹੋਇ":70,"ਹੋਈ":20,"ਕਾਲ":30,"ਕਾਸ":23,"ਕਾਬ":25,"ਕਾਰ":103,"ਕੀਤ":87,"ਹੋਰ":25,"ਹੋਣ":23,"ਕਿਲ":58,"ਕਿਸ":74,"ਕਿਹ":19,"ਕੁਝ":20,"ਕੁੱ":13,"dia":15,"਼ਾ ":51,"ਹਿੰ":31,"ਹੈ।":922,"ਕੇਸ":12,"ਹੁੰ":468,"ਕੋਈ":24," ਅਪ":47," ਅਨ":20," ਆਉ":20," ਅਧ":17,"ੱਖੀ":14,"ੱਖਾ":16," ਅਮ":64," ਅਰ":39," ਅਸ":17,"ੱਖਰ":18,"ਕਦੀ":15," ਅਜ":33," ਅਗ":67," ਅਕ":54," ਅਤ":411,"ੰਸਥ":15,"ਗਾ ":22,"ਕਨੀ":13,"ੰਸਾ":14,"re ":16,"ੱਚੋ":29,"ੱਛਮ":12,"ਕਤੀ":13,"ਕਤੂ":34,"ੱਜੋ":16,"rs ":13," ਉੱ":56," ਉਹ":51," ਉਸ":69," ਇੱ":255," ਇੰ":25," ਉਪ":25," ਉਨ":48," ਉਤ":19," ਏ।":18," ਇਹ":533," ਇਸ":723," ਇਲ":20," ਅੰ":217," ਅੱ":34,"ਾ। ":18," ਇਨ":39," ਇਤ":23," ਇਥ":21,"rig":16,"ਗਰ ":22," ਆਮ":22,"ੱਖਣ":15," ਆਰ":19," ਇਕ":94," ਆਦ":14," ਆਪ":76," ਆਬ":12,"ਸਟ ":13," ਕਰ":183," ੨ ":19," ਕਲ":399," ਕਹ":19," ਕਾ":86," ਕੀ":109," ਕਿ":260," ਕੁ":68," ਗਈ":32," ਕੇ":73," ਕੈ":22," ਕੋ":64," ਕੌ":24," ੩ ":14," ਕ੍":23," ਗਏ":21,"ਕਸ਼":26," ਓਲ":31," ਕਈ":21," ੧ ":21,"ry ":17,"ਸਤ ":69,"ਕਲਾ":14,"ਵਰਗ":12,"ਕਲੰ":379," ਏਫ":13," ਏਨ":15," ਏਸ":15,"ਵਰਸ":36," ਐਗ":22," ਏਲ":14,"ਵਰਤ":32,"ਵਰੀ":76," ਜ਼":109," ਜਿ":159," ਜਾ":364," ਜੁ":64," ਜੀ":148," ਜੂ":37," ਜੇ":15," ਜੋ":96," ਜੰ":39,"ਸਨ ":38,"ਕਰਕ":17," ੮ ":14,"ਕਰਦ":41,"ੱਡਾ":23,"ਕਰਨ":55," ਚਿ":16," ਚਾ":51," ਚੁ":16," ਚੀ":38," ਚੰ":37," ਚੱ":12," ਜਰ":13," ਛੋ":14,"ਕਰੀ":18," ੯ ":15,"ਕਰੋ":16," ਜਨ":89," ਜਦ":19,"ਕਲਚ":23," ੬ ":12,"ੱਤਰ":42," ਚਲ":15," ੭ ":13,"ਗੀ ":17,"ਸਭ ":25," ਗਰ":23," ਕੰ":44," ੪ ":15," ਖੇ":84," ਗਣ":16," ਖਾ":28," ਖਿ":13,"ੱਥੇ":12," ਖ਼":15,"ੱਤਾ":19," ਘਰ":13," ਗ੍":436," ੫ ":15," ਗੋ":35," ਗੁ":188," ਗਿ":86,"ੱਤੇ":24," ਤਰ":31,"ੰਖਿ":16," ਤਾ":41," ਤਿ":24,"ੰਗਾ":15,"ੰਗਰ":20," ਤਕ":27," ਤੱ":66," ਦਰ":56," ਦੂ":37," ਦਾ":950," ਦਿ":1171," ਦੀ":419," ਦੁ":51," ਦਸ":54," ਤੌ":38," ਤੋ":497," ਤੇ":160," ਟੀ":56," ਟਰ":13,"ੰਗ੍":60," ਮਈ":37," ਬੇ":15," ਬੋ":36," ਬਾ":492," ਬਿ":25," ਬੀ":32," ਬੁ":18," ਭਗ":25," ਬਹ":34," ਫ੍":14," ਪੱ":30," ਪੰ":163," ਬਲ":14," ਬਰ":25," ਬਨ":14," ਫ਼":66," ਪੜ":16," ਫਾ":12," ਫਿ":18," ਫੁ":20," ਬਣ":71," ਪੋ":18," ਪ੍":133," ਮੌ":17," ਮੋ":24," ਮੈ":37," ਮੀ":19," ਮੁ":481," ਮਾ":131," ਮਿ":97," ਮਹ":95," ਮਸ":25," ਬੰ":15,"st ":12," ਭੌ":12," ਮਨ":31," ਮਤ":12," ਭਾ":230," ਭੀ":16," ਬ੍":13," ਭਰ":19," ਨਹ":38," ਨਿ":106," ਨਾ":302," ਨੂ":313," ਨੇ":127," ਦੱ":14," ਨਵ":51,"ਗਏ ":15," ਦੇ":1425," ਦੋ":31," ਧਰ":56," ਪੁ":69," ਪੀ":18," ਪਿ":76," ਪਾ":125," ਪੈ":40," ਪੇ":13," ਪੂ":34," ਪਰ":200," ਪਹ":57," ਪਟ":13,"ਕੋ ":15," ਲੱ":19,"ਈ ":351,"ੰਦੇ":65," ਵਖ":19,"ੰਦੀ":31,"ੰਦਾ":409," ਵਰ":63," ਵਧ":14," ਵੀ":103," ਵਿ":1204," ਵਾ":793,"ੰਦਰ":40," ਵਸ":13," ਵੇ":12," ਵੈ":16," ਵੰ":18," ਵੱ":108," ਸਕ":45,"ੰਬਰ":125," ਯਾ":23," ਰਚ":14," ਯੂ":72,"ਖੇ ":23," ਮੰ":56," ਰਾ":142," ਰਿ":33," ਰਸ":75," ਰਹ":56," ਰੇ":12," ਲਈ":76," ਰੁ":23,"str":14,"ੰਪਿ":40," ਰੂ":37," ਰੋ":38," ਲਗ":21," ਰੱ":19," ਲਾ":29," ਲਿ":55," ਲੀ":380,"ਏ ":114," ਲੇ":15," ਲੈ":20," ਲੋ":37,"Oly":12,"ਗਤ ":30,"ਲੰਪ":29,"ਲੰਡ":379,"ਲੱਗ":14,"ਂ ":2409,"ੰਜੀ":16,"ੰਜਾ":127," ਸਭ":32," ਸਬ":19," ਸਮ":69," ਸਨ":75," ਸਪ":16," ਸਰ":55," ਸਟ":13," ਸਤ":61," ਸਥ":20," ਸਦ":12,"ੰਤਰ":46," ਹਰ":55," ਸ੍":21," ਸੋ":27,"ਅ ":12," ਸਾ":1296," ਸਿ":210," ਸ਼":257,"ਖੀ ":31," ਹਨ":519," ਸੇ":33," ਸੂ":56," ਸੀ":135," ਸੁ":40," ਹੋ":233,"ੰਡਲ":14,"ੰਡੀ":26," ਸੱ":24,"ਆ ":416," ਸੰ":107,"ੰਡਾ":15," ਹਾ":41," ਹਿ":46," ਹੀ":66," ਹੁ":491," ਹੇ":15," ਹੈ":1352,"ੰਡਰ":381,"ਟ ":103,"ਝ ":29," ਏ ":29,"ਜ ":145,"ਕਸ ":17,"ਚ ":1074,"ਘ ":72,"ਹਿ ":31,"ਗ ":162," ਚ ":20,"ਹਾ ":59,"ਖ ":140,"ਕ ":1139,"ਹੀ ":112,"ਓ ":12,"ੀ। ":55,"pic":12,"ਕਰ ":44,"ਵੰਡ":19,"ਰ ":1488,"ਵੱਖ":25,"ਹੇ ":27,"ਵੰਬ":36,"ਵੱਜ":17,"ਵੱਡ":34,"ਭ ":31,"ਹੈ ":423,"ਵੱਲ":20,"ਸਕਦ":19,"ਗਈ ":25,"ਮ ":350,"ਫ ":31,"ਕੇ ":100,"ਬ ":211,"ਓਲੰ":29,"ਪ ":461,"ਧ ":64,"ਨ ":1907,"ਥ ":48,"ਕਾ ":68,"ਦ ":171,"ਕਿ ":66,"ਣ ":217,"ਕੀ ":396,"ਹੋ ":30,"ਤ ":537,"ਠ ":22,"ਡ ":132,"ਖਣ ":24,"ਾ ":2802,"ਵਿਸ":21,"ਵਾਰ":49,"ਵਿਕ":13,"ੁ ":14,"ਵਾਲ":96,"ਵਾਸ":13,"ਵਿਗ":34,"ਵਿਖ":22,"ਵਿਚ":200,"ੀ ":2902,"ਵਿਦ":16,"ਿ ":127,"ਵਾਨ":14,"ਵਾਂ":742,"ਹ ":633,"ਸਰ ":20,"਼ ":152,"ਵ ":71,"ਸਾ ":23,"ਵੇਦ":48,"ਸ ":997,"ਵੇਂ":22,"ਸ਼ ":84,"ਲ ":1735,"ਸਸ ":13,"ਵਿੱ":870,"ਸੇ ":56,"ਹਨ ":79,"ਂ।":15,"ਕਨ ":15,"ੋ ":213,"ਸੀ ":89,"੍ ":22,"ਹਰ ":16,"ੇ ":2940,"ੈ ":444,"ੂ ":303,"ਕਟ ":14,"ਰਸ਼":34,"ਰਸਾ":69,"ਰਸਿ":50,"ਰਹਿ":56,"ng ":24,"ਰਹੇ":17,"nce":16,"ne ":14,"ndi":15,"ਰਾਂ":87,"ਰਾਜ":89,"ਰਾਬ":13,"ਰਾਨ":31,"ਰਿਆ":72,"ਰਾਣ":19,"ਰਾਸ":25,"ਰਾਹ":24,"ਰਿਕ":21,"ਰਾਵ":14,"ਰਾਮ":35,"nd ":32,"ਰਿਤ":27,"ਰੀਆ":20,"ਰਮਾ":139,"ਮੰਡ":22,"ਮੰਨ":14,"ਲੇ ":121,"ਰਵਾ":29,"ਰਵਰ":37,"ਰੈਲ":39,"ਰੋਜ":15,"ਰੋਮ":30,"ਰੋੜ":12,"nte":17,"ੰਕ ":58,"ns ":12,"ਰੀਸ":12,"ਰੀਬ":18,"ਰੀਕ":94,"ਰਿਸ":18,"ਰਿਹ":15,"ੰਗ ":69,"। ":1125,"ਕਈ ":21,"ਰੂਪ":26,"ੰਘ ":71,"ਰੈਗ":367,"ਰੈਕ":12,"ਰੇਗ":15,"ਰੇਜ":79,"ੰਜ ":22,"ਲਮ ":13,"ਰਕੇ":20,"of ":41,"ਲਣ ":22,"ਰਕਾ":23,"ਐਗਰ":22,"ਰਨਾ":16,"ਲਾ ":163,"ਰਦੁ":28,"ਰਦੀ":16,"ਰਦਾ":42,"ਰਦੇ":29,"ਰਮਨ":15," ਈ ":16,"ਰਬੀ":20,"ਲੀ ":116,"ਯੋਗ":18," ਆ ":21,"on ":41,"ਰਤਾ":15,"ਰਤਿ":15,"ਰਤੀ":63,"ona":13,"ons":12,"ਯੂਨ":34,"ਯੂਰ":14,"ਯੂਲ":19,"ਲੀਪ":371,"ੱਤ ":62,"ਲੀਵ":25,"ੱਦ ":13,"ਲਾਂ":56,"ਲਾਈ":52,"ਵਲ ":18,"ਲਿਆ":33,"ਲੀਅ":22,"ld ":18,"ਲੀਆ":17,"ਲਾਵ":16,"ਲਿਖ":25,"ੱਧ ":29,"ਵੇ ":16,"ਲ੍ਹ":16,"ੱਲ ":23,"ਵਾ ":26,"ਲੋਮ":22,"ਲੋਂ":20,"ਲੋਕ":25,"ਲੋਗ":23,"ਵੀ ":125,"ਲੇਖ":15,"ਲੈਂ":19,"ੰਤ ":18,"mb ":14,"ਵਖ ":12,"ੰਡ ":46,"ਲਚਰ":23,"mer":15,"ਲਗਾ":13,"ੰਨ ":21,"lym":12,"ੰਧ ":16,"।ਇ":25,"ੰਦ ":43,"ੰਥ ":14,"mpi":14,"ਰੱਖ":17,"ੱਕ ":255,"ਰੰਥ":15,"ਵਨ ":15,"ੱਖ ":91,"mu ":47,"ੱਚ ":827,"ਆਉਂ":16,"ਅਨੁ":13,"੍ਹਾ":96,"ਇਸ ":693,"ਰਾ ":103,"ਮਨੇ":20,"ਇਹ ":527,"ਮਨੁ":16,"ਮਨੀ":13,"ਅਤੇ":407,"ਰਲ ":24,"ਭਾਈ":15,"ਭਾਗ":17,"ਭਾਰ":147,"ਭਾਵ":14,"ਭਾਸ":44,"ਅਜਿ":15,"ਇਲ ":16,"ਰਨ ":81,"ਰਮ ":38,"ਅਰਥ":18,"ਅਮਰ":51,"ਯਾ ":16,"ਬੋਲ":29,"ਰਡ ":23,"ਅਪ੍":39,"ਰਤ ":82,"ਬ੍ਰ":15,"੍ਰੋ":27,"੍ਰੇ":90,"੍ਰੈ":416,"ਰਣ ":20,"੍ਰਮ":14,"੍ਰਦ":16,"੍ਰੀ":65,"੍ਰਿ":60,"੍ਰਾ":30,"੍ਰਸ":16,"੍ਰਹ":30,"ਰਥ ":20,"੍ਰੰ":13,"ਮੇਂ":24,"ਆਨੀ":21,"ਏ। ":23,"ਆਪਣ":51,"ਆਦਿ":12,"ਆਦਾ":18,"ਆਣਾ":27,"ਮੈਨ":17,"ਮਾਨ":46,"ਮਾਤ":26,"ਮਾਣ":116,"ਮਿਕ":21,"ਮਾਰ":62,"ਮਾਲ":18,"ਮਿਥ":28,"ਮਾਂ":39,"ਇਆ।":31,"ਮੁੱ":38,"ਮਿਲ":41,"ਮਿਸ":19,"ਮੀਟ":34,"ਮੁਖ":14,"ਮੁਕ":29,"ਮੁਤ":380,"ਰੋ ":14,"ਮਹਾ":43,"ਮਹਿ":32,"ਮਹੀ":13,"ਆਵਾ":13,"ਰੂ ":145,"ਭੌਤ":12,"ਆਰਾ":46,"ਰੀ ":651,"ਬੰਧ":25,"ਰੇ ":75,"ਲਈ ":78,"ਉਣ ":23,"ਮਰੀ":43,"ਇਥੇ":16,"ਬਣਾ":43,"ਇਨ੍":18,"ਪੜ੍":12,"ਫੁਟ":13,"ਫ਼ਰ":40,"ਇਣਕ":56,"ਉਸ ":51,"ਉਹ ":40,"ਈਆਂ":13,"ਇਤਿ":19,"ਮਨ ":35,"ਇਸਦ":16,"ਪ੍ਰ":176,"ਪੈਦ":15,"ਅੰਤ":31,"ਅੰਦ":15,"ਅੰਗ":91,"ਅੰਕ":65,"ਭੀ ":15,"ਇਲਾ":16,"ਈਨਾ":22,"ਰਜ ":32,"ਰਚ ":44,"ਬੀਜ":26,"ਰਗ ":14,"ਬਿੰ":25,"ਬਾਦ":22,"ਬਾਰ":32,"ਬਾਬ":12,"ਰਕ ":18,"ਬਾਕ":372,"ਬਾਲ":27,"ymp":12,"ਈਡਰ":15,"ਬਾਅ":27,"ਬਾਈ":14,"ਉਂਦ":22,"ਬਹੁ":25,"ਭਗਤ":25,"ਮੇ ":12,"ਬਲਾ":20,"ਪੱਛ":12,"ਪੰਜ":152,"ਬਰਾ":15,"ਫ੍ਰ":14,"ਮੀ ":18,"ਮਾ ":32,"ਸਮ":81,"ਸਭ":32,"ਸਬ":22,"ਸਫ":15,"ਸਪ":32,"ਸਨ":101,"ਸਵ":19,"ਸਲ":51,"ਸਰ":94,"ਾ।":27,"ਸਟ":58,"ਸਦ":49,"ਸਤ":207,"ਸਥ":50,"ਸਕ":73,"ਵੱ":108,"ਵੰ":62,"ਵੈ":22,"ਵੇ":119,"ਉਨ੍":43,"ਵਸ":16,"ਵਿ":1234,"ਵੀ":154,"ਵਾ":1015,"ਵਨ":21,"ਵਧ":14,"ਵਰ":201,"ਵਲ":39,"ਵਖ":19,"ਲੰ":416,"ਲੱ":23,"ਲੜ":14,"ੌਰ ":34,"ਲੋ":133,"ਲ੍":24,"ਲੇ":163,"ਲੈ":43,"ਲਿ":106,"ਲਾ":368,"ਲੁ":15,"ਲੀ":580,"ਲਹ":12,"ਰੱ":21,"ਰੰ":40,"ਲਵ":25,"ਲਬ":13,"ਲਮ":25,"ਲਤ":16,"ਲਣ":25,"ਲਦ":24,"ਲਚ":23,"ਲਕ":25,"ਲਗ":28,"ਰੈ":428,"ਰੋ":128,"ਰ੍":22,"ਰੀ":839,"ਰੁ":36,"ਰੂ":205,"ਲਈ":79,"ਰੇ":213,"ਰਹ":94,"ਰਸ":179,"ਰਿ":221,"ਰਾ":502,"ਮੱ":15,"ਮੰ":60,"ਰਲ":37,"ਰਵ":92,"ੀਤ":114,"ੁਆ":59,"ੀਦ":20,"ੀਬ":34,"ੀਮ":70,"ੀਨ":92,"ੀਪ":396,"ੁਖ":20,"ੀਵ":67,"ੁਕ":56,"ੀਰ":67,"ੀਲ":39,"ੇ।":14,"ੁਝ":24,"ੁਟ":25,"ੀਸ":29,"ੁਜ":22,"ੀਆ":241,"ਿਥ":45,"ਿਦ":23,"ਿਣ":20,"ੀਅ":117,"ਿਤ":204,"ੀਂ":64,"ਿਡ":14,"ਿਟ":58,"ਿਮ":51,"ਿਬ":77,"ਬਰ ":175,"ਿਪ":16,"ਿਧ":20,"ਿਨ":1162,"ੀਗ":18,"ਿਵ":98,"ੀਕ":136,"ਿਲ":265,"ਿਰ":130,"ੀਟ":47,"ਾੜ":27,"ੀਜ":44,"ਿਹ":111,"ਿਸ":263,"ਾਡ":16,"ਇੰਟ":13,"ਾਣ":249,"ਿਅ":43,"ਾਤ":101,"ਿਆ":517,"ਾਦ":96,"ਾਧ":15,"ਿਉ":16,"ਾਨ":318,"ਿਊ":33,"ਾਪ":44,"ਾਬ":594,"ਾਮ":104,"ਾਰ":660,"ਾਲ":1569,"ਾਵ":109,"ਿਖ":72,"ਿਕ":215,"ਿਗ":45,"ਾਹ":169,"ਿਚ":202,"ਾਸ":245,"ਿਜ":25,"਼ਾ":203,"਼ਿ":116,"ਾਅ":32,"਼ੀ":109,"ਾਂ":1630,"਼ੁ":43,"ਾਈ":221,"ਾਉ":53,"਼ੇ":19,"ਾਇ":185,"਼ੋ":16,"਼ੈ":21,"ਬਲ ":16,"਼੍":25,"ਾਕ":443,"ਾਗ":63,"ਾਖ":18,"ਾਜ":126,"ਾਚ":13,"਼ਨ":40,"਼ਤ":27,"਼ਟ":24,"਼ਸ":37,"਼ਹ":70,"਼ਵ":12,"਼ਰ":65,"਼ਬ":33,"਼ਮ":21,"਼ਖ":29,"ਹੱ":25,"ਹੰ":15,"ਹੂ":17,"ਹੁ":520,"ਹੈ":1355,"ਹੇ":48,"ਹਾ":329,"ੀ।":87,"ਹੀ":203,"ਹਿ":360,"ਸੰ":167,"ਸੱ":26,"ਹੋ":237,"ਹੌ":13,"ਸੂ":66,"ਸੁ":43,"ਸੀ":222,"ਸੇ":97,"ਹਨ":550,"ਸਹ":12,"ਸਸ":15,"ਸਿ":313,"ਸਾ":1477,"ਸ਼":623,"ਹਲ":14,"ਸੋ":30,"ਹਰ":67,"ਹਮ":13,"ਸ੍":26,"ਦਸ":57,"ਦਿ":1225,"ਦਾ":1748,"ਦੁ":90,"ਦੀ":615,"ਦੂ":48,"ਥੇ":53,"ਦਨ":53,"ਥੋ":13,"ਦਰ":129,"ਤੰ":44,"ਦਲ":21,"ਤੱ":67,"ਧਾ":77,"ਨਜ":12,"ਨਡ":15,"ਧੀ":16,"ਧਿ":28,"ਨਦ":13,"ਨਤ":17,"ਦੇ":1613,"ਦੋ":54,"ਨੁਸ":12,"ਧਰ":65,"ਨਕ":44,"ਤਸ":17,"ਤਵ":19,"ਤੀ":231,"ਤੂ":46,"ਤਿ":96,"ਤਾ":728,"ਣੇ":53,"ਤਨ":20,"ਤਪ":13,"ਤਤ":12,"ਤਰ":186,"ਤਲ":37,"ਤਮ":27,"ਉਹਨ":12,"ਥੀ":15,"ਥਿ":46,"ਥਾ":70,"ਤੋ":518,"ਤੇ":631,"ਨੁੱ":16,"ਤੌ":43,"ਇੱਕ":245,"ਣਕ":72,"ਨੂੰ":329,"ਡੇ":30,"ਣਾ":137,"ਣੂ":108,"ਣਿ":62,"ਣੀ":85,"ਤਕ":48,"ਨੇਂ":14,"ਟ੍":17,"ਟੇ":26,"ਨੇਜ":20,"ਟੀ":130,"ਟਿ":29,"ਡਿ":29,"ਡੀ":68,"ਨ।":476,"ਡਾ":86,"ਡਲ":20,"ਡਰ":410,"ਡਦ":12,"ਮਰ":69,"ਮਲ":18,"ਬੰ":44,"ਮਸ":29,"ਮਹ":96,"ਮੁ":499,"ਮੀ":82,"ਮਿ":154,"ਮਾ":391,"ਮੂ":19,"ਮੈ":49,"ਮੇ":59,"ਮ੍":15,"ਮੌ":19,"ਮੋ":28,"ਰਕ":76,"ਰਖ":17,"ਰਗ":45,"ਰਚ":72,"ਰਜ":81,"ਰਟ":18,"ਯਾ":37,"ਰਡ":28,"ਯੂ":73,"ਰਣ":36,"ਯੁ":14,"ਰਥ":37,"ਰਤ":198,"ਰਦ":131,"ਰਨ":116,"ਰਫ":22,"ਰਪ":20,"ਯੋ":27,"ਰਬ":46,"ਰਮ":236,"ਪੰ":164,"ਬਲ":53,"ਪੱ":37,"ਬਰ":215,"ਫ੍":15,"ਬਹ":34,"ਭਗ":29,"ਬੁ":20,"ਬੀ":120,"ਬਿ":55,"ਬਾ":578,"ਬੋ":46,"ਬੈ":14,"ਬੇ":32,"ਮਈ":41,"ਮਕ":17,"ਬ੍":15,"ਭਰ":22,"ਮਜ":12,"ਮਤ":30,"ਭਾ":256,"ਭੀ":18,"ਭਿ":14,"ਭੌ":12,"ਮਦ":18,"ਮਨ":98,"ਪਲ":21,"ਨੰ":12,"ਪਹ":57,"ਪਸ":12,"ਪਰ":222,"ਪੂ":52,"ਪੈ":44,"ਪੇ":23,"ਪੀ":37,"ਪੁ":90,"ਪਾ":168,"ਪਿ":130,"ਬਕ":380,"ਪੋ":22,"ਪ੍":176,"ਨ੍ਹ":75,"ਫਰ":17,"ਫਲ":16,"ਬਦ":46,"ਬਨ":25,"ਫ਼":106,"ਪੜ":21,"ਫਾ":21,"ਫਿ":21,"ਫੁ":23,"ਬਣ":71,"ਨਵ":102,"ਦੱ":14,"ਨਲ":14,"ਨਰ":14,"ਨਮ":39,"ਨੇ":196,"ਨੂ":341,"ਨੀ":200,"ਨੁ":33,"ਨਾ":539,"ਨਿ":163,"ਨਸ":36,"ਨਹ":40,"ਨ੍":89,"ਨੈ":12,"ਨੋ":19,"ਪਨ":15,"ਪਣ":56,"ਪਤ":28,"ਪਟ":24,"ਬਦ ":20,"ਕਰ":232,"ਕਮ":20,"੨ ":59,"ਕਲ":439,"ਕਨ":33,"ਕਦ":33,"ਕਟ":31,"ਕਤ":65,"੧ ":77,"ਕਈ":21,"ਕੱ":19,"ਕੰ":48,"ਗਲ":25,"ਗਰ":511,"੪ ":44,"ਖੋ":29,"ਖੇ":111,"ਗਦ":22,"ਗਣ":22,"ਗਤ":41,"ਖੀ":35,"ਖਾ":81,"ਖਿ":54,"ਖ਼":60,"ਕੜ":15,"ਕ੍":37,"ਗਏ":21,"ਖਰ":32,"ਕੌ":24,"੩ ":43,"ਕੋ":104,"ਗਈ":32,"ਖਦ":15,"ਕੈ":22,"ਕੇ":135,"ਕੁ":80,"ਕੂ":15,"ਖਣ":34,"ਖਤ":12,"ਕਾ":323,"ਕਿ":322,"ਕੀ":517,"ਕਹ":19,"ਕਵ":12,"ਕਸ":64,"ਏਲ":14,"ਐਗ":22,"ਪਹਿ":45,"ਏਸ":17,"ਓਲ":31,"੦ ":87,"ਜੇ":38,"ਜੈ":24,"ਜੋ":120,"ਜ਼":304,"ਜਾ":547,"ਜਿ":213,"ਜੀ":234,"ਜੁ":64,"ਜੂ":46,"ਪਿਤ":20,"ੰ ":328,"ਪਾਸ":19,"ਪਿਕ":29,"ਜਦ":23,"ਪਾਰ":18,"ਜਧ":12,"ਜਨ":116,"ਪਿਊ":14,"ਪਾਣ":18,"੯ ":52,"ਛੋ":17,"ਚੱ":12,"ਪਾਕ":32,"ਚੰ":38,"ਜਲ":16,"ਜਰ":27,"ਟਬ":13,"ਟਨ":25,"ਪਾਈ":14,"ਟਾ":78,"ਟਰ":121,"ਜੰ":41,"ੋਗਰ":27,"ਗੜ":29,"੬ ":50,"ਚਕ":12,"ਗਸ":65,"ਗਾ":75,"ਗੂ":16,"ਗੁ":197,"ਗੀ":24,"ਗਿ":132,"ਪੂਰ":40,"ਗੇ":24,"ਗ੍":505,"ਗੋ":52,"੫ ":50,"ਘਰ":19,"ਚਿ":28,"ਚਾ":99,"ਚੁ":16,"ਚੀ":48,"ਚੇ":17,"ਪਿੰ":41,"ਪੁਰ":63,"ਚੋ":42,"ਚੌ":23,"੮ ":73,"ਛਮ":12,"ਜਗ":13,"ਚਨ":19,"ਚਰ":32,"੭ ":49,"ਚਲ":24,"ਅਤ":454,"ਆਂ":297,"ਅਜ":34,"ਅਗ":69,"ਅਕ":61,"ਆਣ":33,"ਇਆ":135,"ਆਖ":12,"ਅਸ":23,"ਅਮ":113,"ਅਰ":63,"ਅਲ":18,"ਅਦ":42,"ਆਇ":12,"ਅਧ":17,"ਆਉ":22,"ਅਨ":55,"ਅਪ":47,"ਪਣੇ":34,"ਈਆ":13,"ਇਥ":21,"ਇਤ":25,"ਇਣ":62,"ਆਸ":22,"ਪਣੀ":15,"ਆਰ":95,"ਆਮ":24,"ਇਕ":101,"ਆਵ":17,"ਆਲ":16,"ਆਨ":54,"ਆਦ":35,"ਆਬ":13,"ਆਪ":78,"ਆ।":54,"ਬਾ ":22,"ਂਕ":18,"ਂਗ":31,"ਈ।":12,"ਂਟ":25,"ਂਡ":36,"ਂਸ":30,"ਂਤ":24,"ਂਦ":244,"ਉੱ":56,"ੋੜ ":16,"ਬੀ ":78,"ਏਨ":15,"ਏਫ":13,"ਇਨ":48,"ਅੱ":34,"ਇਲ":37,"ਅੰ":220,"ਈਟ":14,"ਇਹ":542,"ਮਈ ":41,"ਇਸ":741,"ਉਂ":46,"ਈਡ":27,"ਈਨ":32,"ਏ।":33,"ਬੇ ":13,"ਉਦ":21,"ਉਨ":51,"ੋਇਆ":71,"ਉਣ":28,"ਉਤ":19,"ਉਪ":26,"ੜ ":44,"ਇੱ":255,"ਇੰ":32,"ਉਸ":76,"ਉਹ":56,"ਊਟ":14,"ਪਰਮ":113,"ਨਕਸ":15,"ਪਤ ":12,"ਧਰਮ":31,"ਧਰਤ":22,"ਪਰ ":49,"ਧਾਂ":14,"ਉੱਤ":40,"ੋਬਿ":25,"ਦੁਆ":48,"ਦਿੱ":27,"ਦੁਨ":24,"ੋਮੀ":24,"ਦੂਸ":12,"ਦੂਜ":16,"ੋਮਨ":25,"੦੦":66,"੦੮":25,"੧੦":19,"੧੩":12,"੧੪":15,"੧੧":14,"੧੨":18,"੧੭":17,"੧੮":24,"੧੫":17,"੧੬":17,"੧੯":42,"੨੧":14,"੨੦":53,"ਦਾਂ":14," ੧੮":24," ੧੭":16,"ਦਾਨ":17," ੧੬":17," ੧੫":16," ੧੪":15," ੧੩":12," ੧੨":16," ੧੧":13,"ੋਲੀ":19,"ਦਾਰ":26," ੧੯":41," ੨੦":53," ੨੧":14,"ਦਿਆ":31," ੨੭":12," ੨੬":12," ੨੯":17," ੨੮":14," ੨੩":13," ੨੨":14," ੨੫":13," ੨੪":14,"ਦਾਸ":19," ੩੦":25," ੩੧":16,"ਦੀਆ":75,"ਦਿਨ":1097,"umb":16,"੍ਹ ":21," ੧੦":19,"ਦੋਂ":17,"੩੧":18,"੩੦":26,"um ":31,"੨੯":17,"੨੮":15,"੨੭":13,"੨੬":13,"੨੫":14,"੨੪":14,"੨੩":13,"੨੨":14,"ੱਤ":172,"ੱਢ":13,"ੱਡ":45,"ੱਧ":49,"ੱਦ":25,"ੱਥ":25,"ੱਗ":23,"ੱਕ":286,"ੱਖ":185,"ੱਛ":18,"ੱਜ":30,"ੰਸ":52,"ੱਚ":879,"ੱਟ":28,"ੱਠ":21,"ੱਲ":52,"ੱਸ":22,"ੰਥ":18,"ੰਤ":85,"ੰਧ":36,"ੰਦ":618,"ੰਡ":502,"ੰਬ":155,"ੰਮ":32,"ੰਨ":45,"ੰਪ":49,"ੰਕ":75,"ੰਖ":19,"ੰਗ":218,"ਦੇਸ":44,"ਦੇਵ":25,"ੰਜ":177,"ੰਟ":22,"ੰਘ":85,"ਨਵਰ":42,"ਨੀਆ":32,"ਨੀਅ":13,"ਨਿਵ":62,"ਨੀਕ":15,"ਨਿਆ":24,"ਬਕ ":378,"ਨਾਨ":32,"ਨਾਲ":179,"ਨਾਵ":16,"ਨਿਕ":18,"ਨਾਮ":38,"ੜ੍":38,"ਨਾਂ":88,"ੜੀ":29,"ੜਾ":29,"ੜੇ":20,"ty ":13,"ਨਸੰ":13,"ਨਹੀ":36,"ਏਫ ":13,"ਨਵੰ":36,"ੁਦ":13,"ੂਆ":13,"ੁਨ":42,"ੁਤ":412,"ੁਣ":26,"ੁਮ":18,"ੁਰ":279,"ੁਸ":47,"ਿੰ":258,"ੁਲ":97,"ਿੱ":1026,"ੈ।":922,"ੂਜ":17,"ੂਨ":78,"ੂਦ":14,"ੂਰ":124,"ਏਲ ":13,"ੂਬ":50,"ੂਪ":31,"ੂਸ":23,"ੂਲ":53,"ੁੰ":490,"ੁੱ":117,"ੂੰ":335,"ੇਂ":94,"ੇਕ":12,"ੇਖ":36,"ੇਦ":53,"ੇਤ":52,"ੇਡ":47,"ੈਂ":49,"ੇਟ":14,"ੇਜ":108,"ੇਗ":24,"ੇਰ":47,"ੇਲ":44,"ੈਕ":39,"ੇਵ":52,"ੈਗ":377,"ਧਾਰ":30,"ੇਨ":27,"ਧਾਨ":15,"ਧਿਆ":15,"ੈਣ":13,"ੈਦ":24,"ੇਸ":109,"ੈਟ":17,"ੈਲ":73,"ੈਰ":17,"ੈਨ":38,"ੈਸ":23,"ੋਂ":617,"ੋਇ":77,"ੋਈ":46,"tio":34,"thu":15,"ੋਟ":27,"ੌਂ":47,"ੋਡ":13,"ੋਜ":32,"ੋਧ":14,"ੋਨ":32,"ੋਪ":22,"ੋਣ":30,"ੋਤ":20,"ੋਏ":51,"ੋਗ":55,"ੋਚ":14,"ੋਕ":33,"ੌਜ":14,"ੌਤ":23,"ੋਮ":61,"ੋਬ":36,"ੋਲ":71,"ੋਰ":92,"ੋਵ":23,"ੋਹ":24,"ੋਸ":21,"ੋੜ":31,"ੌਮ":17,"ੌਰ":50,"ted":14,"੍ਹ":146,"੍ਰ":834,"ter":25,"the":71,"ਆ। ":38,"ਤੂਬ":34,"ੇਲ ":13,"ੇਰ ":12,"ਤੀਆ":19,"ੇਸ ":12,"ਤਿਹ":21,"ਤਿਆ":30,"ਤਾਨ":49,"ਤਿਕ":18,"ਤਾਬ":383,"ਨਕ ":14,"ਤਾਰ":18,"ਤਾਂ":61,"ੇਵ ":19,"ਨਮ ":32,"ਤੌਂ":25,"ਤੋਂ":485,"ੈਨ ":18,"ਤੌਰ":15,"ੈਲ ":41,"ਨਾ ":113,"ਥਾਂ":13,"ਥਿਤ":12,"ਥਿਹ":26,"ਥਾਨ":18,"ਦਸੰ":39,"ਨੀ ":107,"ੇਗਰ":13,"ਦਰਿ":26,"ਨੇ ":143,"ਤੰਬ":41,"ਤੱਕ":12,"ਤੱਤ":53,"ਂਟ ":12,"ੇਜੀ":27,"ੇਜ਼":73,"ੇਡਾ":24,"ਂਡ ":22,"ੈਂਡ":21,"ੇਡਦ":12,"ਂਗ ":18,"ੇਦਨ":48,"ੇਤੀ":16,"ੇਤਰ":23,"ੋਂ ":614,"ਤੋ ":20,"ਣਕਾ":14,"ਤਕਨ":13,"ਥੇ ":48,"ਦਨ ":52,"ੈਕਟ":16,"ੋਈ ":39,"ce ":16,"ੈਗਰ":366,"ੋਏ ":45,"ਥਾ ":23,"ੇਵਾ":19,"ੇਸ਼":80,"am ":12,"ੋਕ ":20,"al ":27,"ਣਿਤ":17,"ਣਿਆ":40,"and":32,"amu":47,"an ":23,"ੈਦਾ":18,"ਦਰ ":34,"ਣਾਇ":20,"ੌਂ ":41,"ੋਣ ":17,"ਦੋ ":25,"ਤਰਾ":24,"ਤਰੀ":36,"ੋਪ ":15,"ਦੇ ":1528,"at ":19,"ਦੀ ":510,"as ":15,"ੋਰ ":44,"ੋਲ ":12,"ਦਾ ":1619,"ati":17,"ਦਿ ":15,"ੜਾ ":14,"ੁਟਬ":13,"ੀਸਟ":12,"ੀਵਨ":13,"ੀਵਰ":32,"ੁਕਾ":27,"ਤਕ ":19,"ੜੀ ":26,"ੀਮਾ":12,"�":48,"ੁਰਦ":34,"ਡਦੀ":12,"ੜੇ ":18,"ੁਨਿ":12,"ੁਨੀ":20,"ਣੀ ":69,"ੁਤਾ":380,"ਣਾ ":86,"ਡਰਾ":13,"ਣੂ ":105,"ਣੇ ":51,"ਆਂ ":277,"ੁਸ਼":14,"ਂਦਰ":14,"ਂਦੀ":34,"ਂਦਾ":168,"ਂਦੇ":20,"ੈ।ਇ":14,"ੂੰ ":327,"ਤਰ ":83,"ਅਨ ":34,"ਡਾਂ":31,"ੁਰੂ":142,"ੁਰਾ":23,"ੁਰਸ":21,"ਿੰਦ":69,"ਿੰਡ":45,"ਿੰਘ":68,"ਿੰਗ":49,"ਡੀਅ":12,"ਿੱਤ":44,"ਿੱਧ":18,"ਿੱਖ":61,"ਅਤ ":35,"ਿੱਚ":863,"ੁਲਾ":52,"ੁਲੀ":15,"ਅਦ ":27,"ੂਰਜ":24,"ੂਰਬ":14,"ੂਬਰ":34,"ਤਾ ":184,"ਤੀ ":175,"ੂਨਿ":14,"ੂਨੀ":19,"ਤੇ ":613,"ਜੰਤ":20,"ਜੰਗ":15,"ਡਰ ":388,"ਆਨ ":22,"ੂਲੀ":19,"ਇਆ ":100,"ਆਪ ":24,"ਅਮ ":45,"ੁੱਖ":54,"ਅਰ ":17,"ੁੰਦ":474,"ਡਲ ":18,"ਟਬਾ":13,"ਡੀ ":25,"ਡਾ ":35,"ਨ। ":77,"ੇਂ ":52,"ਡੇ ":21,"ੜ੍ਹ":37,"ਟਰੀ":23,"ਟਾਂ":13,"ਟਾਇ":14,"ਅਕਤ":41,"ਅਕਾ":15,"ਅਗਸ":63,"ਆਮ ":19,"ਆਰ ":21,"ਣਕ ":58,"ਟਿਆ":13,"ਇਕ ":86,"ਟੀਮ":54,"ਟ੍ਰ":17},"n_words":[112478,136533,89577],"name":"pa"} -------------------------------------------------------------------------------- /langdetect/profiles/so: -------------------------------------------------------------------------------- 1 | {"freq":{"YO ":13,"jec":34,"jee":32,"D":313,"E":183,"F":66,"G":214,"A":673,"B":249,"C":240,"L":152,"M":367,"N":163,"O":122,"H":180,"I":236,"J":129,"K":173,"U":82,"T":107,"W":226,"V":11,"Q":76,"P":22,"S":486,"R":114,"Y":96,"X":120,"Z":10,"f":458,"g":2154,"d":5233,"e":4497,"b":2102,"c":900,"a":24510,"n":3878,"o":5982,"l":3786,"m":2460,"j":397,"k":2897,"h":3132,"i":6615,"w":2306,"v":27,"u":3829,"t":1545,"s":2871,"r":2895,"q":718,"p":77,"z":23,"y":3607,"x":1698,"jaa":13,"jab":16,"jar":10,"jam":12,"Xam":11,"joo":14,"Xas":10,"jis":14,"jir":95,"jii":13,"jid":17,"jo ":15,"Far":12,"isk":69,"ism":12,"isl":25,"iso":22,"isu":42,"ist":67,"ita":17,"is ":71,"ion":20,"ir ":84,"irs":56,"irt":28,"iro":22,"irk":32,"iri":56,"isi":32,"ish":96,"ise":18,"isb":17,"Wux":23,"isa":134,"ire":16,"ira":131,"iyi":10,"iyo":394,"iya":423,"iye":65,"ixi":16," l":598," m":880,"kii":161," n":189," o":537," h":365," i":795," j":267," k":1328," d":1214," e":328," f":95," g":401," a":1317," b":593," c":361," y":296," x":283," u":599," t":376," w":1834," q":291," p":20," s":807," r":112,"km ":14," J":125," K":142," H":119," I":161," N":93," O":34," L":81," M":322," B":217,"khd":24," C":229,"kha":11," A":275," F":59," G":169," D":236," E":41," Z":10," Y":40," X":90," S":438," R":66," Q":69," P":18," W":211," U":33," T":83,"kee":20,"key":11,"kh ":38,"Web":10,"Waa":56,"ku ":434,"kor":15,"Wax":40,"koo":94,"War":17,"XEE":11,"مد":16,"Gal":22,"و":25,"ي":76,"ف":13,"ق":12,"ل":77,"م":62,"ن":31,"ه":13,"د":46,"ح":26,"ب":37,"ة":21,"ا":98,"أ":11,"ع":29,"ش":21,"س":23,"ر":49,"kar":49,"kas":30,"kan":49,"kal":143,"kam":32,"kad":48,"kac":14,"kab":10,"kaa":81,"ka ":1268,"A ":83," Ga":53," Ge":18,"Da":59,"DU":11,"Cu":18,"Co":13,"DE":11," Fi":17,"Ce":13,"DH":15,"Ci":23," Ha":35,"Du":13,"EY":13," Go":61," Gu":12,"EG":11,"De":45,"EE":45,"EL":14,"Di":29,"Dh":36,"H ":16,"GA":19,"Fa":23," IY":15,"Er":12," Ho":29,"ha ":334," Hi":37,"Ge":18," Ji":25,"Ga":53,"حم":18,"HA":35,"I ":13," Ja":63," KA":16," Is":32," It":29,"GM":12," In":35,"Fi":17,"ham":43,"han":102," Ka":28,"hal":48,"haw":17,"hax":44,"haq":58," Ki":19,"har":45,"has":76," Kh":10," Ju":19,"hah":12,"hab":77,"haa":189,"had":144,"hac":36,"AS":15,"AR":23," MA":17,"AX":27," La":22,"AY":15,"BA":11," Li":11,"C ":10,"AD":43,"AA":51,"AB":14,"AG":11," Ko":23,"AH":23,"hay":333,"AL":37," Ku":26,"AM":13,"AN":35," Ma":180,"Ax":18,"Ar":12,"D ":22,"بن":10," Mi":27,"Ba":101,"CA":15,"Af":65,"بد":10,"he ":25,"Aa":22,"Ab":33,"Ad":10,"Am":17," Lu":25,"Al":38," Ne":14,"Bu":30," Na":32,"Ca":127,"DA":43,"E ":30,"Bi":19,"Be":25,"hda":27,"Bo":30,"Hin":18," Mu":78,"hel":22,"Ku":26,"hee":112,"Ko":23,"hey":26,"hex":72,"Li":11,"N ":26,"her":11,"MA":41,"La":22,"Lu":25,"hi ":27,"Mi":27,"NK":10,"ال":51,"O ":34,"NA":12,"Ma":180,"Mu":79,"Ne":14,"Na":32," Am":16," Al":38,"Nu":16," Af":65,"No":12,"OO":18," Ad":10," Aa":22," Ab":33," Ba":101," CA":12," Ax":18," Ar":12,"hig":23," Be":25,"hid":12," Bi":19,"hin":40,"Go":61,"him":17,"Gu":12," Bo":30,"hii":170," Bu":30,"his":24,"hir":31,"Ha":35," Ca":127,"Hi":37," Ce":13," DE":10," Ci":23,"IN":12,"Ho":29," DH":13,"IS":10," Co":12," Cu":18,"IY":20," Da":59," Di":29," Dh":36,"In":36," De":45,"Is":32,"It":30,"Ja":63,"KA":33," Du":13,"Ji":25," Er":12,"Ju":19,"LA":35,"Ka":28,"Kh":10,"ho ":53,"Har":14,"Ki":19,"LE":16," Fa":23,"gma":64,"go ":32," Xi":13," Xa":51,"UU":11,"yuu":26," Wu":23,"To":11,"Th":10," Wi":15," We":12,"Ta":37," Wa":133,"St":13,"Su":23,"Wu":23,"gob":97,"Wi":16,"Wa":133,"XA":19,"We":12,"XE":12,"Y ":18,"yst":29," Yu":14,"yso":15," Ya":10,"WA":26,"gmo":41,"ysa":93,"Qa":26,"Qo":17," م":12,"RA":10,"S ":18," ع":21," ا":48,"goo":52,"R ":20," ب":13,"gsa":14,"gu ":229,"Si":17,"Sh":86,"gsi":12,"So":180,"Ru":12,"U ":11,"Sa":70,"TA":13,"Re":13,"SH":11,"Ro":11,"yoo":24,"Qu":16,"SA":16,"Ra":20,"gud":22," Nu":16," No":12,"gta":43," Ra":20," Qu":16,"b ":130," Ro":11," Re":13,"guu":20,"gun":12,"a ":5909," Qo":17," Qa":26,"شي":10," Su":23," St":13," Ta":37,"Ya":10," Th":10,"Yu":14," To":11," Ru":12," Sa":70,"Xa":51,"YO":15," Sh":86," Si":17,"Xi":13," So":180," WA":20,"ري":12,"Gob":48," ja":60,"i ":853,"ye ":36,"ian":11," iy":365," ji":127,"ge":93," je":47,"ga":1135,"fk":16,"Ing":16," im":15," in":148," il":54," ii":23,"ic ":14,"fi":49,"fr":45,"fu":47,"ft":29,"fo":18," is":155," ka":688," kh":13,"hd":44,"he":286," ki":46," ke":11,"ha":1580,"gn":11,"gm":108," jo":14,"gl":15,"gi":72,"id ":171,"gu":305,"iba":32,"gt":52,"gs":27,"gr":15," ju":17,"go":196,"du":188,"dw":36,"dy":13,"g ":83," ha":190,"ea":16,"eb":72,"yee":61,"ec":51," he":28,"ed":360,"de":252,"dd":113,"di":494,"dh":632,"dk":189,"dl":33," go":117,"do":234,"dn":22," gu":55,"ia ":36,"ex":102,"ey":554,"fa":110,"h ":441," id":15,"fe":17,"eh":54,"ib ":32,"eg":202," hi":20,"ee":1263,"el":242,"ek":35," ho":120,"ei":12,"yey":26,"en":172,"em":31,"et":26,"es":93,"er":287,"ya ":266,"ca":427," ni":37,"e ":881," ne":15,"bs":21," na":54,"br":36,"bu":104,"bt":55,"bn":18,"bo":234,"bk":30,"bl":13," mu":48,"ig ":10,"bi":355,"bb":15,"bd":41,"be":201,"db":11,"da":2087," og":18,"f ":98,"cy":18," of":16,"cu":41,"ct":11,"cs":27,"co":62,"cm":24,"cn":13,"cl":19,"ci":73," nu":10,"ch":33," no":73,"ce":64,"cd":20,"yad":111,"yag":10," le":91,"c ":51,"yaa":287," la":334,"icm":22," ku":465,"ici":14," km":14,"ica":25," ko":88," me":49,"az":10,"ay":1458,"idu":13," mi":187,"ba":817,"d ":893,"at":134,"as":580,"yd ":29,"ido":43,"ar":1307,"aq":237," ma":590,"ax":1066,"aw":157,"idk":12,"yay":52," lu":25,"ak":76,"al":1647,"idi":35,"yaw":11,"idh":19,"ai":29,"aj":59,"yar":45,"am":590,"an":1951,"yaq":50,"yan":13,"ac":260,"ida":140,"ad":2243,"aa":4171," lo":138,"ab":630,"ag":664,"ah":1152,"yah":134,"af":128,"iib":15,"nu":38,"iic":11,"nt":263," af":45,"ns":59," ah":473," aa":208,"iig":13," ab":31,"iid":50,"no":160,"nn":18," ad":49,"q ":34," am":103," an":18,"iik":48,"iin":164,"ny":57,"yka":17,"iil":93," al":21,"iim":26,"iis":199,"iir":65,"of":78,"iiq":14,"oc":29," ax":10,"od":156," ar":26,"ob":291," aq":21," as":29,"om":340,"on":186," ba":344,"ok":16,"ol":273," ay":246,"og":129,"il ":80,"ot":41,"os":90," bi":107,"op":10,"oo":1738," be":63,"or":236,"oq":49,"yn ":105," bo":34,"r ":475,"ox":10,"ow":125,"oy":128," bu":35,"pa":14," ca":238,"im ":21,"ika":50,"lo":386,"ige":10,"lm":39,"ll":110,"ls":27,"iga":247,"ii ":339,"lw":14,"lu":48,"igi":31,"yo ":488,"ly":56,"igu":13,"igt":12,"o ":2012,"ma":1465,"mb":52,"mh":21,"me":199,"mk":39,"mi":333,"mp":19,"mo":102,"yna":98,"mu":85,"ihi":82,"yni":14,"na":851,"nb":30,"yne":30,"nc":10,"nd":137,"ne":107,"nf":30,"ng":58,"ynt":29,"ni":213,"nk":312,"nl":21,"imo":20,"ju":17,"jo":31," ee":295,"imi":21,"ki":203,"kh":95,"ke":48,"ind":29,"ina":80," fa":48,"yga":15,"ka":1778,"yi ":19,"m ":103," fu":10,"ino":13,"kt":20," fo":12,"ku":558,"int":102,"ins":10,"ko":130,"ine":14,"ing":16," fi":17,"ini":10,"km":16,"ink":82," ge":36,"li":577,"lk":332,"le":352," ga":186,"ld":23,"lg":22,"inu":15,"la":1306,"lb":52,"iny":13,"n ":1478," co":22,"ht":11,"hu":92,"ikh":54," ce":15,"hi":387,"hn":16,"ho":217," ci":36,"ila":160,"id":471,"ic":103,"yin":59,"ib":108,"ia":61,"ih":88,"in ":262,"ig":350," da":424,"if":21,"yih":49,"yig":21," cu":34,"hy":12,"k ":24,"iq":21," do":45,"ilo":13,"ir":438,"is":630,"it":49,"ill":18,"ilk":32,"ix":28,"ilm":12,"ii":1062,"ij":21,"ik":134," de":120,"ili":51,"il":385,"im":170,"in":663,"io":30," di":70,"yir":13," dh":511,"ima":76,"je":69,"ji":178,"iy":896," du":39,"l ":398,"ja":82,"xi":123,"xo":56,"xm":34,"xw":27,"xu":185,"xb":18,"xa":850,"xe":161,"xd":67,"wg":11,"wi":81,"how":15,"wl":60,"wo":26,"wu":102,"hog":13,"y ":1137,"wa":1722,"wd":13,"hoo":55,"we":185,"hor":60," yi":55," yu":13,"uy":12,"ux":164,"uw":34,"uu":720," ye":13,"ve":10," ya":211,"x ":140," xo":33,"uj":15,"uk":28,"ul":200,"uf":20," xi":90,"ug":210,"uh":16,"uq":90,"ur":259,"hna":12," xu":39,"us":114,"ut":54,"um":90,"un":214,"tu":47,"ub":104,"ua":11,"ud":145,"uc":17," xe":16,"w ":59," xa":103,"to":175,"hul":37,"tr":25,"te":120,"ti":246,"th":37,"ta":784,"su":111,"ss":19,"st":173,"sw":12,"sl":47,"sk":106,"sm":25,"so":371,"sr":10,"sc":17,"se":101,"sh":456,"ي ":20,"xme":19,"si":404,"xma":13,"u ":1296,"sa":722,"sb":21,"rr":20,"rs":115,"rt":160,"ru":77,"rw":11,"rx":11,"ry":27,"ro":144,"rn":40,"rm":32,"rl":22,"rk":200,"ri":397,"hu ":11,"rg":35,"re":258,"rd":49,"rc":12,"rb":25,"ra":754,"t ":51,"qu":35,"qs":10,"xoo":44,"qo":163,"IYO":15,"qi":33,"qe":23,"qa":334,"qd":61,"s ":240,"pu":15,"pr":14," ru":12," u ":194," sa":221," se":17," si":157," sh":112," so":259," qu":21,"xya":13," ra":48," re":33,"ن ":17," ro":11," qe":14," qa":168," qo":69," qi":18," oo":464," or":10,"huu":29," wa":1582," we":88," wo":12," wu":102," wi":39," uu":195,"xud":12,"xuu":133,"Hoo":12," tu":36," us":16," ur":10,"م ":11," um":12," un":11," ug":131,"yg":19," ta":231,"ye":133,"yd":48,"ya":998,"yb":27,"xwe":21,"xy":17," su":25,"yu":34,"ys":166," to":18," th":15," ti":62,"yo":522,"yn":280," te":11,"yk":19,"yi":189,"fee":11,"xey":58,"xee":54,"far":32,"fad":21,"faa":24,"Suu":12,"Axm":14,"xir":17,"xis":13,"xil":26,"xii":17,"xid":14,"xig":24,"Sta":10,"xa ":169,"eyb":17,"eya":63,"eys":74,"Tal":11,"eyn":163,"eyo":14,"eyk":10,"xda":51,"eyd":16,"eye":14,"exa":10,"exd":12,"exe":51,"xe ":46,"xar":38,"Ban":18,"Baa":14,"Bad":22,"xam":54,"xan":16,"Bar":23,"xay":166,"xba":16,"xaa":341,"xad":27,"xag":13,"wux":100,"Aas":11,"Shi":22,"She":12,"Sha":50,"ex ":21,"Af ":19,"ey ":159,"er ":103,"es ":21,"eri":33,"ere":30,"era":49,"Afr":32,"esh":28,"esa":10,"ers":11,"ern":14,"ekh":16,"en ":89,"ela":47,"ele":26,"eli":17,"ell":42,"elo":15,"emb":19,"ena":28,"wla":53,"eny":12,"egm":90,"ego":14,"egt":11,"Som":32,"Soo":136,"woq":10,"el ":65,"wda":13,"Buu":11,"Bur":11,"we ":12,"gir":17,"gii":26,"wey":124,"wee":27,"gey":15,"gee":44,"wi ":14,"wis":10,"wii":22,"Sal":11,"gab":12,"gac":45,"gad":26,"DA ":20,"gaa":436,"gar":35,"gay":21,"gal":70,"gan":69,"ga ":388,"San":27,"wa ":22,"Cab":27,"waq":26,"wan":30,"wal":39,"wax":715,"way":45,"Cal":18,"war":52,"was":18,"Car":40,"waa":581,"wad":168,"Bel":10,"fur":37,"Bis":12,"fri":39,"fii":15,"Boo":10,"fka":13,"da ":918,"de ":22,"dad":131,"daa":159,"dab":19,"dal":113,"WAX":16,"dag":65,"dah":101,"dar":51,"dan":291,"dam":39,"day":61,"dax":79,"daw":32,"Cum":10,"dda":74,"dde":11,"ddi":17,"cun":14,"EEY":13,"EEL":14,"EGM":11,"Deg":30,"cyo":15,"uxu":126,"Daa":22,"Dag":10,"Dal":10,"uxa":15,"uun":88,"uul":63,"uum":13,"uug":15,"uud":50,"uux":10,"ux ":12,"uus":29,"uur":74,"uuq":18,"uut":24,"uwa":28,"co ":26,"cma":23,"ush":13,"usi":11,"use":13,"uu ":316,"usu":26,"uso":11,"uti":16,"uta":19,"cod":10,"com":11,"uqa":33,"uqd":36,"ura":37,"ure":10,"uri":31,"urk":17,"urt":32,"uru":37,"ur ":39,"csi":14,"uma":56,"unt":32,"unk":27,"uni":11,"una":85,"cel":30,"uka":13,"cee":17,"uls":10,"ulo":20,"ull":14,"ulk":27,"uli":14,"ule":16,"ula":26,"un ":29,"che":12,"ul ":36,"ciy":12,"cii":28,"uga":40,"ugu":128,"ugs":11,"ed ":184,"ebi":20,"uf ":13,"uda":33,"udi":12,"eb ":12,"udu":37,"ug ":18,"ega":53,"ub ":32,"eek":25,"een":99,"eel":138,"eem":18,"eeb":23,"eeg":65,"eed":229,"eey":113,"eh ":42,"ees":56,"eer":157,"edk":18,"edi":12,"ede":22,"eda":72,"uba":39,"ubb":11,"edu":15,"ud ":36,"edo":11,"ecl":12,"ece":25,"ee ":319,"dwe":25,"dwa":11,"duu":57,"tuu":22,"doo":96,"dow":37,"tri":10,"The":10,"dna":12,"to ":75,"Dhe":14,"Dhu":12,"dun":12,"dul":20,"dug":23,"too":69,"du ":45,"tii":59,"tig":10,"tir":66,"dha":335,"tio":16,"tic":26,"dhu":33,"dib":25,"dhi":112,"dhe":122,"dho":21,"der":19,"dex":18,"dey":16,"dee":48,"deg":96,"den":15,"di ":38,"dle":11,"dla":17,"tee":36,"dku":14,"dki":33,"do ":77,"ter":36,"diy":39,"din":26,"ti ":29,"dir":60,"dis":51,"dig":42,"dii":165,"dil":12,"dka":134,"the":16,"rga":14,"ri ":48,"rge":14,"rey":42,"ree":110,"rda":15,"rdh":16,"re ":77,"rco":10,"rax":25,"ray":99,"rar":15,"ras":44,"rat":10,"rba":11,"rah":41,"ran":54,"ram":17,"rak":12,"rab":82,"raa":165,"rad":87,"rs ":11,"roo":48,"rna":16,"rne":11,"rni":10,"ro ":63,"rma":23,"Nab":15,"rla":13,"rku":10,"rko":10,"rki":41,"rke":18,"rka":117,"riy":58,"ris":28,"rig":31,"rii":110,"rik":46,"rin":21,"ric":16,"rya":13,"rur":10,"run":18,"ruu":10,"ry ":11,"rsi":16,"rsa":63,"rsh":15,"rta":110,"rto":18,"rte":11,"rti":11,"rub":12,"saa":120,"sab":11,"sad":52,"sag":23,"sah":11,"sal":49,"sam":47,"sbi":14,"san":191,"sas":14,"sar":33,"say":43,"sa ":99,"sha":242,"sho":46,"she":41,"shi":83,"si ":68,"siy":42,"sid":91,"shu":10,"sil":13,"sim":38,"sii":82,"sig":32,"se ":61,"sh ":17,"see":14,"sow":16,"som":59,"soo":214,"soc":14,"su ":25,"sla":30,"sku":37,"ska":59,"so ":55,"sma":15,"حمد":15,"ste":15,"sta":66,"sto":28,"sti":41,"sub":11,"suf":12,"sug":13,"sul":11,"suu":22,"tal":42,"tag":10,"tah":87,"taa":194,"tad":13,"tay":60,"tar":33,"tan":31,"tam":13,"te ":13,"ta ":272,"bka":23,"biy":71,"bis":28,"bir":12,"bil":48,"bin":31,"big":38,"bii":37,"bo ":47,"bol":129,"bna":15,"boo":24,"bba":12,"be ":19,"ban":61,"bal":43,"bah":27,"bad":232,"baa":96,"bab":12,"bay":35,"bax":34,"bas":10,"bar":156,"bdi":25,"bdu":11,"bi ":69,"bee":145,"ber":11,"bey":12,"ca ":55,"car":35,"cas":13,"can":24,"cay":13,"cab":20,"cad":53,"caa":145,"cal":33,"cag":16,"bri":13,"bra":15,"bsa":11,"bta":33,"bti":13,"bur":20,"bul":12,"buu":52,"aka":19,"am ":40,"aki":23,"aji":27,"ajo":16,"qa ":12,"al ":136,"ahi":41,"qar":20,"qay":16,"aho":10,"qad":44,"qab":47,"qaa":149,"ahd":20,"qan":14,"qal":17,"ahe":26,"aha":697,"agm":13,"agt":24,"agu":76,"ago":29,"aq ":22,"qdi":38,"qda":17,"any":23,"ano":51,"ann":10,"ant":70,"ans":32,"ane":21,"ang":10," ال":46,"ani":87,"ank":185,"ana":385,"anb":26,"and":92,"amu":23,"amo":10,"amk":32,"amh":19,"ami":82,"ame":93,"amb":16,"ama":257,"aly":20,"qey":14,"alo":160,"alm":17,"all":22,"alk":165,"alg":17,"ali":424,"ald":14,"ale":110,"ala":480,"alb":42,"an ":924,"aba":194,"abd":37,"abe":56,"abi":146,"abk":18,"abo":40,"abt":38,"abu":36,"aca":130,"aab":114,"aac":13,"aaa":15,"aaf":38,"aag":64,"aad":398,"aaj":28,"aak":21,"aah":75,"aan":742,"aal":743,"aam":113,"aas":211,"aar":259,"aaq":41,"aaw":32,"aat":37,"aay":89,"aax":19,"ad ":334,"qiy":15,"ac ":19,"aa ":1110,"qii":10,"ab ":33,"afr":11,"aft":15,"afi":18,"aga":458,"age":12,"ah ":325,"afa":38,"ado":85,"adl":23,"adk":153,"adn":12,"adh":26,"adi":223,"add":96,"ade":66,"ag ":29,"adw":22,"adu":44,"aci":16,"ace":10,"Qar":12,"acd":15,"ada":1138,"af ":19,"acy":15,"acs":19,"qor":48,"qoo":60,"qof":24,"axi":13,"axm":15,"axo":15,"axu":15,"axa":702,"axb":16,"axd":50,"axe":90,"ayi":11,"ayo":52,"ayn":115,"ays":84,"ayu":13,"axy":16,"axw":26,"ayb":10,"aya":151,"ayg":11,"ayd":32,"aye":26,"ba ":84,"qur":24,"at ":11,"arg":25,"are":96,"ard":30,"arb":14,"ara":357,"aro":72,"arn":19,"arm":17,"arl":10,"ark":135,"ari":153,"aru":20,"ars":39,"art":72,"asa":99,"ary":14,"asi":106,"ash":156,"ase":12,"aso":31,"ask":17,"ar ":198,"as ":80,"aqa":111,"aqi":13,"aqo":51,"ax ":98,"awe":20,"ay ":932,"awa":46,"awl":31,"awi":33,"ata":37,"asu":12,"ast":33,"ato":18,"ate":17,"ra ":58,"ati":34,"ngi":20,"ni ":47,"Isl":11,"neh":11,"ng ":11,"nee":16,"nfu":25,"ney":14,"ne ":43,"ndh":18,"ndi":22,"nan":17,"nac":45,"nad":83,"nah":41,"nab":18,"naa":131,"Ito":28,"nbe":15,"nd ":69,"AXE":10,"AY ":10,"nba":11,"AXA":12,"nay":47,"nax":11,"na ":412,"Jab":13,"Jan":13,"Jam":22,"KA ":11,"KAL":10,"nya":38,"AAL":13,"ADA":25,"nuu":21,"nto":13,"nti":37,"nta":176,"nte":24,"nsi":15,"nsa":22,"AHA":14,"noo":67,"noq":18,"nna":11,"ALA":17,"nle":12,"no ":59,"nki":22,"nka":271,"AN ":16,"nii":13,"nih":11,"nig":39,"niy":10,"nis":15,"nim":17,"nin":39,"ogu":24,"oga":60,"Jub":11,"ol ":60,"oco":11,"odi":15,"of ":38,"oda":43,"ofe":10,"LA ":12,"د ":29,"oba":86,"od ":60,"obo":134,"obi":38,"ة ":21,"oyi":94,"oya":10,"owl":29,"ow ":45,"ost":14,"ota":10,"ose":28,"os ":15,"oon":114,"ool":98,"oom":198,"oof":13,"oog":60,"ood":123,"oob":124,"or ":39,"ooy":111,"oow":16,"oot":14,"oos":65,"oor":31,"Koo":13,"ore":44,"ori":14,"osa":11,"ort":21,"oqo":37,"oqd":11,"ora":61,"ola":52,"on ":52,"olk":99,"ole":20,"olo":14,"oly":10,"ona":28,"onf":25,"oni":16,"onk":11,"ons":12,"ont":14,"oma":298,"oo ":749,"omp":12,"la ":241,"le ":159,"laa":281,"lab":61,"lac":11,"lad":232,"laf":10,"lah":96,"lag":116,"lal":23,"lan":88,"lam":27,"las":21,"lay":70,"lba":15,"lbe":31,"kuw":22,"kuu":18,"kun":22,"kul":14,"kto":17,"MAD":13,"lom":11,"loo":176,"lmo":12,"lmi":13,"lma":10,"lsh":13,"Luu":11,"li ":92,"lga":16,"ley":29,"leh":35,"lee":98,"lo ":165,"lla":49,"lle":32,"lka":311,"lki":14,"lis":19,"lin":48,"lim":15,"liy":204,"lid":28,"lia":24,"lib":24,"lil":40,"lii":17,"lig":30,"ma ":133,"maa":361,"mac":36,"mah":24,"mad":229,"mag":226,"mar":193,"mas":14,"mal":133,"man":32,"may":23,"max":25,"mba":26,"mbe":10,"me ":19,"med":68,"mee":72,"mey":24,"luq":12,"luu":17,"مد ":15,"lya":33,"lyo":10,"Mar":22,"Mas":10,"Mag":51,"Mad":20,"Maa":17,"Max":25,"moo":35,"muq":17,"muu":16,"mul":10,"Mux":13,"mhu":20,"Muq":24,"Mud":14,"mi ":19,"min":17,"mil":14,"mis":11,"miy":27,"mig":18,"mid":170,"mij":10,"mii":25,"mo ":60,"mka":33},"n_words":[94077,109135,83288],"name":"so"} -------------------------------------------------------------------------------- /langdetect/profiles/sw: -------------------------------------------------------------------------------- 1 | {"freq":{"jer":348,"jen":305,"ji ":6234,"D":1805,"E":874,"F":1081,"G":1202,"A":4461,"B":2717,"C":2251,"L":1530,"M":12761,"N":2782,"O":860,"H":1677,"I":2605,"J":2641,"K":12188,"U":3120,"T":5185,"W":4730,"V":1116,"P":2090,"S":3343,"R":1632,"Y":517,"Z":395,"f":11048,"g":13829,"d":15034,"e":46694,"Feb":214,"b":19688,"c":9784,"a":289584,"n":90468,"o":57043,"l":42025,"m":53651,"j":21456,"k":76835,"h":32492,"i":164978,"w":60984,"v":3863,"u":57506,"t":40551,"s":35298,"r":27443,"p":13501,"z":18893,"y":38832,"x":501,"jar":185,"jan":137,"jaw":201,"é":167,"jim":1500,"jin":4267,"jil":163,"jij":492,"jia":221,"jib":3854,"ito":288,"itu":317,"itw":269,"isp":140,"ist":592,"ita":1061,"ite":213,"iti":334,"ivy":133,"iwa":2430,"ius":183,"ipo":224,"ipi":265,"is ":521,"ion":720,"iop":279,"ipa":165,"ipe":219,"iro":173,"iri":997,"isi":902,"ish":5756,"isa":694,"ire":164,"ira":314,"ja ":1529,"iyo":4644,"iye":227,"izo":242,"izi":413,"iza":568," l":8602,"kif":518," m":27935," n":19872," o":327,"kik":333," h":7652," i":9059,"kij":166,"kim":258," j":5212,"kil":389," k":27977," d":1010," e":802," f":914,"kia":390," g":257," a":6533," b":1252," c":2191,"kiw":279," y":17767," z":2257,"kin":442," u":4361,"kio":148," t":2402,"kip":379," w":34366," v":1482,"kis":520," p":2154,"kit":315," s":6097," r":837,"ki ":2193," J":2627," K":12017," H":1638," I":2128," N":2678," O":803," L":1487," M":12665," B":2646," C":2112," A":4277," F":1046," G":1172," D":1740," E":782," Z":375," Y":513,"и":142," S":3229," R":1588,"а":137," P":2015," W":4707," V":1031," U":3052," T":5117,"kea":156,"kem":150,"ke ":1988,"ku ":187,"kri":520,"kon":141,"koa":3734,"ko ":1214,"ل":165,"ا":240,"juu":155,"jul":257,"jum":177,"kaz":5045,"kaw":137,"kat":14149,"kar":374,"kas":316,"kan":2795,"kao":197,"kal":354,"kam":1048,"kad":160,"kab":375,"ka ":19783," Ga":196,"Da":365," Ge":229,"Co":364," Fr":177,"Ch":770," Ha":622," He":218," Go":142,"Do":469," Gr":177," Gu":142,"De":497,"Di":169,"Fe":311," Id":148,"Fa":160," Hu":173," Ho":177," II":154,"ha ":2668," Hi":392,"Ge":229," Ji":535,"Ga":198," Je":286,"I ":397," Ja":792,"Fr":177," Ir":284," Is":141," It":181," In":316," Ik":143," Il":224,"ham":522,"han":444,"hap":154," Ka":2225,"hai":238,"haj":163,"hak":611,"hal":314," Ke":708," Ki":3568,"har":1714,"has":255,"hat":148," Jo":255,"II ":207," Ju":691,"hag":267,"hab":181,"had":740," La":231," Le":207," Li":441," Ko":414," Ku":695," Kw":4009,"Au":181," Ma":4258," Mb":461,"Ar":475,"As":222," Mk":3388,"Ba":771," Mi":685," Mj":478," Me":615,"Af":445,"he ":544,"Ag":372," Lo":213,"Am":241,"An":463,"Ap":290," Lu":315,"Al":840," Ne":518,"Bu":429,"Br":278," Na":464,"Ca":592," Ni":435,"Bi":308," Mt":420,"Be":362," Mp":146," Mo":643,"Bo":282," Mu":471," Mw":545,"Ku":695,"Kw":4009,"Ko":415,"hez":299,"Le":210,"Li":441,"hes":336,"her":275,"hen":226,"hem":395,"La":231,"Lu":315,"Lo":213,"Me":621,"hi ":3880,"Mi":690,"Mj":478,"Mk":3388,"Ma":4263,"Mb":461,"Mw":546,"Mu":475,"Mt":420,"Mp":146,"Mo":643,"Ni":437,"Ne":518,"Na":466," Ap":290," Am":240," An":463," Al":833,"Ny":247," Ag":372," Af":443,"No":466," Ba":766,"Ok":277," Au":181," As":222," Ar":474," Be":362," Bi":308,"hio":2603,"Gr":177,"Go":143,"hin":1991,"him":244,"hil":432,"Gu":142," Bo":282,"hii":230," Br":278," Bu":429,"his":266,"hir":394,"Ha":622," Ca":582,"hiy":239,"He":219,"II":286,"Hi":393," Ch":768,"Ho":179,"Hu":173," Co":362,"K ":152,"Id":148," Da":365," Di":167,"In":317," De":495,"Ik":143,"Il":226,"Is":141,"It":181," Do":469,"Ir":284,"Ja":792,"Ji":536,"Je":286,"Jo":255,"Ju":691,"Ka":2234,"Has":225,"ho ":334," Fe":311,"Ki":3577," Fa":159,"Ke":708,"Us":172,"Ut":325,"Ur":181,"go ":920,"Un":355,"Uk":150,"Ul":189,"Ui":244,"Uj":249,"Uh":170,"Uf":251,"Uc":175,"Tu":237,"To":205,"Th":275,"Te":258," Wi":3377,"Ta":3841," We":188," Wa":1003,"St":260,"Su":178,"Wi":3380,"Wa":1003,"We":189," Zi":141," Za":152,"Vi":670," Yo":250,"Pr":150,"Pe":270,"goz":233,"Pa":858,"Po":195,"Pi":163,"gom":190,"gon":205,"gos":279,"gor":306,"Se":532,"gu ":424,"Si":424,"Sh":518,"So":239,"Ru":370,"Sa":668,"Re":188,"Ri":138,"Ro":385,"Ra":354," Po":195,"guj":253," Pi":163," Pe":270," Pa":857," Ny":247," No":466," Ok":277," Ra":354,"b ":211," Ro":385,"gwe":166," Re":188," Ri":138,"gwa":280,"guz":429," Pr":150,"a ":143240," Su":178," St":248," Ta":3838," Th":274,"Yo":250," Te":257," To":205," Ru":370," Sa":668," Sh":517," Si":421," Se":528," So":239," Vi":666," Tu":231,"Za":152,"Zi":141," Uc":175," Uf":251," Uh":170," Ui":243," Uj":249," Uk":150," Ul":189," Un":355," Ur":181," Us":172," Ut":325," ja":134,"iak":142,"i ":52347,"ian":874," ji":4522,"ias":364,"ge":1928,"iar":235," je":226,"ga":2900," im":145," in":3363," ik":274," il":4878,"fi":1075,"fr":504,"fu":1927,"fo":752,"ibl":142,"ibi":603," ka":16147,"gw":483," ki":3027,"he":2541,"ibu":4111,"ha":8898,"gl":145,"gi":1836,"gh":1233,"gu":1858,"iba":566," ju":300,"go":2336,"du":838,"dw":136,"g ":607," ha":1606,"ea":1091,"eb":539," he":144,"ec":251,"ed":686,"de":1841,"di":4816,"dh":617,"do":1639,"ia ":9119,"dr":203,"ew":912,"ex":163,"eu":261,"ev":332,"ey":739,"ez":1828,"fa":6104,"h ":704," id":219,"fe":174,"eh":737," hi":990,"eg":644,"ef":303,"ee":307,"el":2120,"ek":2577,"ej":155," ho":139,"ei":650,"ep":643,"eo":1165,"en":9965,"em":2423,"et":1296," hu":4749,"es":2258,"er":4147," nj":147,"ca":364," ni":9330,"e ":10467," ng":147," nd":690,"bw":843," nc":2455," na":6269,"br":408,"bu":5373,"bo":2905," mw":6857,"bl":321," mu":4335," mt":648," ms":331,"bi":2134," mp":280," mo":680," mn":1501,"be":1280," mm":157,"ifu":393,"da":3239,"f ":246,"ifo":606," of":164,"co":390," ny":523,"ck":301,"ci":283,"ch":7388,"ce":365,"ifa":585," le":184,"c ":192," li":859," la":7153," ku":5668,"ich":830," kw":2736," km":140,"ica":140," ko":150," me":184," mf":368,"az":6015,"ay":5308," mi":1257,"ba":6016," mj":5191," mk":1617,"d ":1205,"at":22079,"as":4908,"ar":9773," ma":3449," mb":469,"aw":1490," mc":155,"av":414,"au":1667," lu":341,"ak":14678,"al":8458,"idi":551,"ai":5267,"aj":1998,"ao":6210,"ap":5739,"ide":157,"am":9111,"an":29556,"ac":1224,"ad":3126,"ida":813,"aa":1773,"ab":2568,"ag":1596,"ah":1414,"ae":682,"af":1092,"nu":591,"nt":1270,"ns":4895,"no":1318,"nn":478," am":1335," an":488,"nz":5093," ai":153,"iin":242,"ny":7307," aj":134," ak":183," al":2589,"of":4380," au":941,"oc":308,"od":678,"oa":4118,"ob":631," at":195," as":220,"om":1846,"on":3853,"ok":2328," ba":679,"ol":1930,"oi":1488,"oj":1425,"og":855,"oh":360,"ija":140,"ot":1280," bi":222,"os":1066,"ov":580,"ou":534,"ije":137,"op":845,"oo":318,"or":2938,"iji":1232,"r ":1622,"ow":244,"oz":397,"oy":154,"pe":836,"pa":6921,"po":1264,"ph":151,"pi":2193,"ika":13864,"lo":1408,"lm":337,"Ida":135,"ll":791,"ls":182,"iga":224,"ii ":525,"lu":868,"lt":178,"igh":170,"igi":384,"ly":147,"o ":24303,"mc":173,"igo":169,"ma":8274,"mb":6660,"mh":261,"me":2630,"mf":564,"mk":1733,"ml":210,"mi":3477,"mj":5199,"mn":1546,"mm":321,"mp":578,"ihe":138,"mo":6079,"mr":140,"mt":753,"ms":447,"mu":6394,"mw":6988,"ihi":187,"p ":352,"na":23279,"nc":2788,"nd":5575,"ne":2353,"ng":6858,"ni":24361,"nj":567,"nk":135,"imo":196," es":141," en":369,"ju":713,"imf":161,"ime":354," el":223,"jo":133,"imi":180,"ki":6922,"kh":154,"ind":834,"ke":2748,"ina":8001," fa":353,"ka":45110,"imu":392,"m ":727," fu":177,"kw":3124,"ino":181,"ks":210,"kt":463,"ku":10532,"ins":133,"ko":5804,"ine":479,"ing":1959,"kr":669," fi":274,"ini":4598,"km":156,"li":17984,"le":2997,"ld":221,"lf":159,"la":14880,"lb":250,"iny":275,"n ":3144,"iko":612,"hw":492,"ht":198,"hu":6825,"iki":2488,"hi":11111," ch":2090,"hn":150,"ho":1180,"ila":4379,"id":1813,"ic":1403,"ib":5595,"ia":11251,"ih":490,"in ":378,"ig":1252," da":146,"if":1790,"ie":672,"iku":2496,"k ":628,"ilo":373,"ir":1982,"is":9376,"it":2904,"ill":288,"iu":466,"iv":385,"iw":2556,"ii":989,"ij":1580,"ik":19966," de":224,"ili":8251,"il":13887,"im":4832,"in":17333,"io":4395,"ile":321,"ip":1169,"ima":914,"je":934,"imb":2471,"io ":2960,"ji":17145,"iz":1362,"iy":4997," du":302,"l ":1018,"ja":2368,"z ":191,"wi":1773,"wo":202,"vy":671," za":1702,"y ":1239,"wa":56175," zi":456,"we":2203,"vi":1632,"vu":418,"vo":138,"uz":1451,"uw":2877,"uv":252,"uu":3068," ye":258,"ve":578," ya":17428,"va":328,"x ":213,"ui":563,"uj":4429,"uk":1643,"ul":2575,"ue":357,"uf":741,"ug":901,"uh":626,"ur":1919,"us":3274,"ut":2784,"um":5397,"un":5099,"uo":368,"up":1077,"ty":166,"tu":2287,"tt":391,"tw":473,"ub":1112,"ua":2111,"ud":534,"uc":476,"w ":435,"to":4407,"huk":345,"hul":146,"tl":220,"ts":343,"tr":455,"te":2280,"ti":12092,"th":999,"ta":14867,"su":644,"ss":500,"st":1842,"sw":308,"sl":142,"sk":865,"sm":139,"sp":289,"so":683,"sc":179,"se":5649,"sh":8151,"si":4764,"u ":13704,"sa":7736,"rr":220,"rs":467,"rt":620,"ru":2279,"ry":287,"ro":1786,"rn":619,"rm":257,"rl":223,"rk":320,"ri":8157,"rg":403,"re":3855,"rd":556,"rc":143,"rb":136,"ra":5018,"t ":1231,"s ":3025,"pt":348,"pu":357,"pw":193,"pr":381," sa":589," se":4480," si":369," sh":318," ra":432," ri":188,"hwa":473,"huo":175,"hum":2789,"hun":282,"hus":506,"hur":418,"huu":1333," pe":176," pa":632," pi":931," wa":33135," we":275," vy":396," wi":862," vi":1013," uc":144,"zi":8597,"ze":368,"za":8043," tu":189,"zw":257," us":165," ut":249," up":502," um":247,"zu":272," un":1571," uk":210,"zo":952," ul":573," uh":139," ta":1410,"ye":2395,"ya":24129,"yu":306," to":170," th":289,"yo":5888," te":201,"yi":4283,"Apr":266,"Asi":146,"Aru":195,"far":316,"fam":283,"fan":4203,"fal":292,"fa ":488,"eya":259,"Bah":237,"Bar":140,"eza":1136,"ezo":172,"ezi":237,"eta":229,"ete":154,"eti":253,"est":247,"ett":212,"ew ":355,"evi":165,"ewe":148,"ey ":361,"ewa":358,"er ":615,"epa":149,"es ":640,"ept":299,"eri":650,"ere":660,"era":456,"Afr":406,"esh":359,"ese":306,"esa":279,"eru":498,"Ago":254,"ert":152,"ers":339,"eku":184,"en ":297,"ela":204,"ele":786,"eli":360,"ell":177,"eo ":852,"emb":1055,"ema":157,"eme":314,"emi":276,"emu":365,"ene":704,"eng":671,"ena":283,"end":498,"eno":221,"eni":486,"ens":4087,"ent":441,"eny":1803,"Ali":478,"ege":351,"Ame":158,"ehe":647,"Ana":176,"el ":260,"eke":267,"eka":1754,"giz":193,"gir":232,"gin":349,"gid":165,"ght":136,"gha":925,"gi ":572,"gen":204,"ger":781,"ge ":611,"gaz":140,"gar":155,"gan":693,"ga ":1334,"Cal":307,"fup":194,"Bib":137,"fua":317,"fum":143,"fun":167,"fri":445,"fu ":810,"for":356,"fo ":342,"fil":269,"fik":168,"fiz":146,"da ":1525,"de ":752,"dad":386,"dae":220,"dar":151,"dan":305,"dam":173,"Des":272,"Dar":167,"Chi":216,"Chu":136,"Cha":300,"ch ":165,"cha":2430,"chu":596,"ck ":143,"che":571,"chi":3152,"cho":370,"ed ":154,"ebr":313,"ea ":663,"ei ":346,"efu":197,"edi":297,"ee ":156,"don":150,"dom":308,"dol":151,"dog":335,"dun":335,"dha":302,"dia":330,"dhi":240,"der":146,"deg":261,"del":152,"di ":2661,"do ":429,"Dod":240,"diy":201,"din":291,"dis":387,"dik":302,"ri ":2373,"rez":420,"rea":148,"ref":154,"reh":266,"ren":163,"rek":1672,"re ":305,"rd ":213,"ras":256,"rat":173,"Ni ":218,"New":381,"rai":160,"ran":867,"ram":226,"rab":297,"rad":150,"ron":135,"rog":253,"rne":169,"rni":283,"ro ":593,"riw":166,"ris":508,"ril":300,"rik":1688,"rin":373,"ria":769,"rib":1011,"ric":160,"rk ":191,"ruf":262,"rum":452,"ruk":315,"rus":423,"ry ":194,"rse":228,"Nya":144,"rua":234,"rt ":160,"ru ":273,"sab":458,"sac":139,"san":482,"sas":180,"sa ":5643,"Nov":242,"sha":1745,"sho":271,"she":240,"shi":5099,"si ":1365,"siw":355,"sia":608,"shw":458,"shu":187,"sis":157,"sin":881,"sil":283,"sim":158,"sik":319,"sey":212,"ser":175,"set":147,"Okt":259,"seh":319,"sen":4083,"sem":335,"spa":151,"son":242,"su ":198,"st ":167,"sko":136,"ska":599,"so ":134,"ssa":198,"ste":192,"sta":295,"sto":444,"sti":401,"str":197,"swa":181,"tai":280,"taj":233,"tak":462,"tal":339,"taa":220,"tab":242,"taw":344,"tat":292,"tar":668,"tao":3872,"tan":641,"tam":288,"te ":507,"ta ":6480,"pa ":765,"pat":4120,"pak":235,"pap":248,"pam":300,"pan":895,"pi ":233,"ped":156,"Pap":368,"pia":789,"pil":189,"pin":267,"pis":162,"pit":144,"po ":743,"pte":287,"pri":298,"pwa":189,"Rai":176,"ra ":1932,"ngo":958,"ngi":1065,"ngu":1084,"ngw":363,"ni ":18823,"Iri":209,"nge":937,"nga":1742,"Ita":147,"neo":505,"nes":161,"ng ":405,"nch":2504,"ne ":911,"ndu":263,"ndo":574,"ndi":1835,"nde":1085,"nda":1162,"nak":251,"nal":257,"nam":1855,"nan":221,"nao":1457,"nap":185,"nac":183,"nad":288,"naf":402,"nai":158,"naj":196,"nd ":409,"nat":353,"nas":439,"nay":454,"na ":15738,"Jan":271,"Jam":281,"nya":1379,"Jer":215,"nye":1338,"nyi":4239,"nus":133,"nua":282,"Jim":174,"Jin":277,"nti":403,"nta":151,"nte":177,"nsi":211,"nsa":4269,"nt ":232,"ns ":140,"nne":236,"no ":948,"nji":138,"nja":269,"Joh":134,"nia":4199,"nis":530,"ogo":593,"ois":1291,"oji":173,"oja":1149,"Jul":285,"Jun":259,"odo":288,"of ":150,"ofu":134,"ofa":3991,"oa ":3810,"oan":188,"oba":375,"nza":3817,"nzi":1111,"Kai":144,"Kag":175,"Kal":167,"Kan":354,"Kat":474,"Kas":372,"Kar":232,"Ken":632,"ozi":165,"Kis":329,"Kir":165,"Kit":204,"Kin":148,"Kib":138,"Kia":309,"ote":378,"Kik":287,"Kil":453,"Kim":202,"oto":331,"Kig":295,"Kii":249,"ost":309,"ota":195,"ove":320,"opo":325,"os ":178,"or ":161,"Kon":197,"orn":300,"oro":673,"ore":188,"ori":369,"ort":147,"ora":378,"ola":427,"on ":838,"oli":431,"ole":357,"olo":331,"oka":1580,"oke":163,"oko":236,"oku":141,"ona":230,"ond":383,"one":151,"ong":860,"oni":784,"oma":766,"omb":303,"omi":249,"omo":182,"op ":143,"la ":8089,"le ":1011,"Kwa":3975,"laa":157,"lai":293,"lak":564,"lan":660,"lam":497,"lat":186,"lay":3727,"Kus":393,"lba":165,"kuz":236,"kuw":2713,"kuu":1305,"kut":1795,"kus":492,"kur":190,"kup":186,"kun":409,"kum":210,"kul":297,"kuj":187,"kwe":591,"kwa":2512,"kub":762,"kuf":233,"kuh":134,"kua":620,"kto":308,"lom":136,"loj":136,"lme":241,"Lin":225,"lug":350,"lu ":155,"li ":2787,"lez":192,"lew":193,"lev":140,"les":155,"leo":178,"lem":198,"len":254,"lek":133,"lo ":347,"lla":138,"lle":153,"lli":198,"ll ":147,"lit":241,"lis":337,"lip":257,"lio":738,"lin":627,"lim":922,"liz":411,"liy":4415,"liw":979,"lic":340,"lia":1497,"lik":2742,"lil":529,"lih":179,"lif":397,"ma ":2611,"mb ":139,"maa":449,"maj":397,"mak":522,"mad":206,"mae":140,"mag":342,"mar":439,"mas":613,"mal":159,"mam":161,"man":1055,"mat":406,"mba":3047,"mbi":361,"mbe":389,"mbo":2343,"me ":516,"mbu":267,"mch":170,"met":211,"mer":252,"men":492,"mfa":152,"mez":387,"mfu":373,"Mei":250,"Man":216,"Mar":1940,"Mas":472,"Mag":282,"Mak":206,"Mac":287,"Mbe":273,"mpi":142,"mon":163,"moj":1127,"mpa":160,"Mor":279,"mu ":1602,"mtu":175,"mto":226,"Mic":182,"Mis":147,"msh":144,"mta":228,"mwe":383,"mwi":345,"Mko":3178,"mwa":6205,"Mku":138,"Mji":464,"muj":3839,"muz":374,"mhu":232,"Mtw":147,"mi ":359,"mji":5175,"min":192,"mil":749,"Mwa":460,"mit":295,"mia":630,"mik":321,"mo ":4413,"mku":1038,"mko":539,"mna":1501,"mmo":145,"Wik":149,"Wil":3077,"Wan":148,"zwa":252,"zi ":5785,"zai":249,"zaj":254,"zam":177,"zan":3194,"zal":783,"zar":173,"zo ":612,"zia":533,"zin":815,"zil":197,"zik":548,"zis":240,"一":303,"yof":3874,"yot":286,"za ":2981,"ye ":1320,"yen":237,"ya ":21762,"yar":252,"yan":567,"yao":167,"yam":250,"yak":657,"yo ":973,"yin":213,"yik":3954,"一一":144,"Tan":3407,"Tab":164,"Shi":315,"Sin":201,"Sep":283,"we ":401,"wez":265,"wen":1037,"wim":286,"wil":741,"Sal":197,"vyo":257,"wa ":33121,"wap":4111,"wan":3901,"wal":617,"wam":169,"wak":9923,"way":141,"wat":368,"war":238,"was":172,"wai":2667,"wah":176,"vu ":165,"vya":351,"vil":200,"vin":183,"vit":187,"vis":284,"Rom":180,"vem":244,"Vij":328,"uzi":743,"uza":470,"Uje":235,"uwa":2760,"uvu":174,"ush":417,"usi":1319,"use":183,"usa":176,"uu ":2892,"usu":216,"ust":207,"uso":141,"uti":211,"ute":137,"uta":560,"Uin":218,"utu":215,"uto":1436,"us ":536,"Ung":252,"ura":183,"ure":140,"uri":491,"uru":630,"unz":137,"Ula":150,"upa":554,"upi":311,"umu":162,"umi":484,"umo":2705,"uma":686,"umb":661,"ume":297,"uo ":238,"uni":940,"und":747,"una":1741,"ung":1193,"uku":302,"uko":457,"uki":429,"uka":247,"ulu":258,"uli":1405,"ule":192,"ula":478,"ukw":139,"uhu":267,"uji":4010,"uja":302,"Utu":261,"ugh":514,"ufu":352,"uhi":136,"ugu":137,"udi":174,"ubw":695,"uch":343,"ufa":176,"ufi":189,"ua ":369,"uat":317,"uar":494,"uan":690,"uba":185,"Uch":175,"ty ":146,"twa":450,"tur":369,"tun":270,"tum":424,"Ufa":219,"ts ":214,"tu ":896,"The":164,"tts":142,"to ":986,"tob":268,"tom":167,"ton":281,"tok":1553,"tol":482,"tor":246,"tik":8147,"tis":158,"tin":351,"tio":199,"thu":171,"tia":156,"tem":384,"ten":273,"tel":171,"th ":160,"ter":432,"ti ":2389,"the":225,"thi":213,"biw":209,"bis":191,"bil":315,"bin":256,"bo ":2326,"bli":173,"bor":262,"be ":229,"bam":230,"ban":516,"bal":619,"bah":147,"baa":227,"bab":179,"bay":333,"bar":432,"bao":277,"bi ":662,"ber":216,"bel":151,"bey":251,"bia":222,"ce ":176,"bu ":4649,"bru":221,"bur":149,"bun":177,"bwa":786,"aka":10583,"am ":337,"ake":1982,"aki":644,"aji":1355,"aju":170,"al ":304,"aja":293,"ain":393,"air":222,"ais":2933,"aif":267,"aid":437,"ahi":308,"aha":751,"agh":475,"agu":395,"aoi":1233,"anu":344,"anz":4756,"any":4453,"ano":638,"ann":141,"ant":323,"ans":490,"ane":261,"ang":1660,"ani":7747,"anj":260,"ana":4702,"anc":133,"and":2300,"amu":1047,"amo":1890,"amp":179,"amh":222,"ami":838,"ame":637,"amb":1658,"ama":1868,"ao ":4649,"alo":269,"alm":262,"all":133,"ali":5324,"ale":476,"ala":1026,"alb":152,"an ":1167,"akr":376,"aku":502,"ako":215,"aba":751,"abe":140,"abi":660,"abo":208,"abu":582,"ae ":291,"aad":302,"aan":389,"aal":140,"aam":185,"aar":236,"aa ":361,"afi":303,"ai ":477,"aga":223,"age":227,"afu":225,"aen":162,"ael":172,"afa":411,"ado":269,"adh":288,"adi":1538,"ach":840,"ada":637,"azo":205,"azi":5401,"aza":186,"ayo":638,"aya":4140,"aye":284,"ba ":2178,"are":1998,"ard":317,"ara":2057,"aro":249,"ari":3153,"aru":316,"art":243,"au ":993,"asa":1084,"asi":1169,"ash":895,"ask":665,"ar ":568,"apa":4869,"api":162,"apo":406,"as ":271,"aut":148,"awa":1126,"awi":190,"ata":10070,"ast":167,"ass":197,"ato":634,"ate":225,"ati":9962,"ath":135,"atu":749},"n_words":[1316698,1560317,1165243],"name":"sw"} -------------------------------------------------------------------------------- /langdetect/profiles/tl: -------------------------------------------------------------------------------- 1 | {"freq":{"D":3787,"E":3422,"F":1488,"G":3274,"A":19564,"B":7360,"C":4965,"L":4772,"M":7066,"N":4375,"O":1801,"H":3508,"I":8185,"J":1743,"K":5752,"U":1449,"T":5401,"W":1008,"V":1273,"Q":420,"P":11919,"S":10977,"R":3177,"Y":471,"X":334,"Z":463,"f":2669,"g":178562,"d":29955,"e":64572,"b":36938,"c":9968,"a":433329,"n":268000,"o":112013,"l":93919,"m":59846,"j":824,"k":54159,"h":28813,"i":161924,"w":15439,"v":3263,"u":56864,"t":84874,"s":113569,"r":58943,"q":542,"p":56795,"z":2158,"y":61992,"x":703,"Fil":231,"í":225,"é":238,"á":213,"ü":252,"ā":268,"Est":484,"Eng":311,"Ene":272," l":8609," m":26008," n":63523," o":8411," h":4923," i":23816," k":20018," d":6208," e":1886," f":514," g":3652," a":41230," b":12249," c":1285," y":708," u":3275," t":9124," w":1107," v":245," p":25524," s":37805," r":2505," J":1713," K":5573," H":3377," I":7706," N":3834," O":1616," L":4623," M":6813," B":6692," C":4389," A":18851," F":1372," G":3134," D":3532," E":3171," Z":430," Y":427," X":260," S":10261," R":2927," Q":408," P":11641," W":933," V":1041," U":1379," T":5149,"ا":230,"A ":369,"Da":1003,"Co":1457,"Ce":320,"Ch":481,"Do":225,"De":356,"Di":1582,"Fe":231,"Eu":252,"Es":754,"En":687,"Em":287,"Ge":222,"Ga":499,"I ":336,"Fr":271,"Fi":327,"C ":400,"Au":300,"Ar":695,"As":627,"Ba":3221,"Ay":1525,"Ag":601,"BC":219,"Ab":447,"Ad":339,"Am":736,"BN":257,"An":10961,"Ap":381,"Ak":282,"Al":957,"Bu":508,"Br":412,"Ca":1287,"Bi":744,"Be":555,"Bo":628,"Hil":266,"Ku":269,"Kr":232,"Ko":739,"Le":701,"Li":502,"N ":411,"La":1279,"Lu":1605,"Lo":364,"Me":527,"Mi":786,"Ma":4128,"Mu":386,"Mo":519,"Ni":284,"Ne":669,"Na":1605,"No":859,"Ok":212,"Ol":246,"Gi":363,"Gr":1131,"Go":284,"Gu":257,"Ha":926,"He":607,"Hi":663,"Ho":302,"Hu":700,"Im":288,"In":3099,"Ik":390,"Il":402,"Is":900,"It":1478,"Ir":258,"Ja":583,"Jo":504,"Ju":288,"Ka":3627,"Hap":403,"Ki":336,"Un":1045,"Tu":257,"Tr":725,"Ts":305,"To":291,"Th":568,"Ti":1025,"Te":370,"Ta":1438,"St":601,"Su":844,"Wi":302,"Wa":226,"Vi":399,"Va":226,"Pu":319,"Pr":784,"S ":239,"Pe":753,"Pa":3232,"Po":515,"Pi":5199,"Ph":519,"Or":329,"Se":751,"Sc":225,"Si":3949,"Sh":296,"So":479,"Sa":2369,"Re":1018,"Ri":307,"Ro":908,"Qu":349,"Ra":347,"Gre":522,"Gri":303,"b ":919,"a ":94136,"Sü":216,"Za":253,"i ":12212,"gd":846,"ge":1642,"ga":29192,"gb":804,"Ing":2213,"fi":306,"fo":309,"gy":549,"he":2166,"ha":12797,"gn":612,"gm":735,"gl":3792,"gk":3515,"gi":5586,"gh":1271,"gg":2300,"gu":3141,"gt":1399,"gs":3014,"gr":1126,"gp":2172,"go":3120,"dt":243,"du":1243,"dy":626,"g ":113439,"ea":1899,"eb":1161,"ec":685,"ed":1334,"de":3129,"di":5171,"do":3993,"Ilo":317,"dr":607,"ew":354,"ex":243,"eu":223,"ev":449,"ey":804,"ez":509,"fa":228,"h ":1453,"Ind":341,"fe":234,"eh":1900,"eg":2092,"ee":503,"el":4423,"ek":1455,"ei":449,"ep":1032,"eo":1328,"Imp":228,"en":9846,"em":2892,"et":2497,"es":7771,"er":9807,"ca":1369,"Ika":371,"e ":10545,"by":545,"br":2325,"bu":3483,"bo":2508,"bl":1265,"bi":7420,"be":1655,"da":7330,"f ":892,"cu":348,"ct":832,"cr":267,"co":1527,"ck":419,"ci":1114,"ch":1408,"ce":1134,"cc":244,"c ":679,"az":285,"ay":33855,"ba":16262,"d ":6470,"at":29146,"as":20781,"ar":16773,"aw":10001,"av":702,"au":1712,"ak":11317,"al":29791,"ai":3152,"ao":3807,"ap":8378,"am":13495,"an":101085,"ac":1440,"ad":5449,"aa":5389,"ab":8765,"ag":22898,"ah":9461,"ae":824,"nu":2551,"nt":6347,"ns":5167,"no":9894,"nn":491,"ny":2397,"of":922,"oc":711,"od":3388,"oa":467,"ob":1692,"om":3049,"on":28949,"ok":1666,"ol":5417,"oi":423,"og":2491,"oh":829,"ot":1896,"os":4877,"ov":562,"ou":1121,"op":3555,"oo":5633,"or":6366,"r ":5313,"ow":489,"oy":1082,"pe":2826,"pa":25955,"pl":758,"po":5042,"ph":416,"pi":11096,"lo":7059,"lm":376,"ll":2040,"ls":232,"lp":216,"lu":4808,"lt":766,"ly":1884,"o ":35938,"ma":23630,"mb":3361,"mg":9391,"me":3130,"mi":5214,"mm":511,"mp":3371,"mo":2915,"mu":4995,"p ":2357,"na":50921,"nc":944,"nd":5861,"ne":3717,"ng":124226,"ni":11322,"nl":1083,"ki":5108,"kh":660,"ke":653,"kb":240,"ka":28062,"m ":2592,"ky":234,"ks":1102,"kt":1232,"ku":3832,"ko":5455,"kr":397,"kl":2445,"li":16771,"le":7298,"ld":536,"la":41710,"lb":521,"n ":41545,"hr":229,"hu":1485,"hi":7425,"hn":275,"ho":2024,"id":2936,"ic":2306,"ib":3763,"ia":4281,"ih":1209,"ig":7316,"if":239,"ie":1122,"hy":251,"k ":4018,"ir":2572,"is":21654,"it":15297,"iu":510,"iv":697,"iw":1139,"ii":644,"ik":10689,"il":18574,"im":4535,"in":32448,"io":2262,"ip":8303,"iz":356,"iy":6303,"l ":8749,"ja":305,"z ":468,"wi":4192,"wo":282,"ws":238,"y ":26266,"wa":7331,"we":663,"vi":929,"vo":293,"uz":222,"uy":540,"uw":938,"uu":278,"ve":1065,"va":717,"x ":270,"ui":867,"uk":2223,"ul":10829,"ue":1048,"ug":2281,"uh":1222,"ur":4663,"us":3997,"ut":2718,"um":4571,"un":11124,"uo":841,"up":1559,"ty":936,"tu":5349,"tt":423,"tw":245,"ub":2276,"ua":2314,"ud":744,"uc":353,"w ":2307,"to":12113,"tn":494,"tl":670,"ts":639,"tr":2947,"te":5598,"ti":12280,"th":1499,"ta":23206,"su":2916,"ss":697,"st":7061,"sy":6204,"sl":357,"sk":592,"sm":643,"sp":948,"so":5456,"sc":415,"se":6243,"sh":1082,"si":9719,"u ":1011,"sa":50153,"rr":466,"rs":1459,"rt":2397,"ru":1218,"ry":2421,"rp":226,"ro":6522,"rn":1128,"rm":690,"rl":534,"rk":708,"ri":11065,"rg":616,"re":7270,"rd":981,"rc":435,"rb":496,"ra":14328,"t ":17794,"qu":482,"s ":20143,"py":238,"pt":257,"pu":5533,"pp":386,"pr":1443,"Hul":231,"za":466,"zo":453,"ye":2458,"ya":16851,"yb":251,"yu":966,"yt":264,"ys":635,"yr":507,"yo":11759,"yn":980,"Ara":274,"Apr":218,"Asy":352,"Ayo":1506,"Bag":274,"Ban":328,"Bay":1488,"Bat":317,"Abr":300,"Adi":220,"Ago":307,"BN ":257,"Ale":251,"Alt":229,"Ame":579,"Ang":10467,"Car":262,"Bib":239,"Com":233,"Col":584,"Dis":592,"üd":216,"Nat":309,"New":234,"Nag":476,"Nor":372,"Nob":295,"Pin":243,"Pil":4628,"Phi":298,"Per":237,"Pas":236,"Par":288,"Pag":236,"Pan":1029,"Pam":480,"Pal":436,"Pro":250,"Pra":302,"Que":246,"Isa":506,"Ita":502,"Ito":837,"Jam":280,"Jos":222,"Kab":349,"Kag":252,"Kal":869,"Kan":214,"Kat":442,"Kas":598,"Kar":312,"Kon":276,"Leo":280,"Lat":224,"Lun":1069,"Man":377,"Mal":287,"Mar":952,"May":1105,"Mat":220,"Min":268,"Süd":214,"Zam":212,"一":621,"一一":311,"Sur":412,"Sta":256,"Tag":668,"Siy":420,"Sil":411,"Set":224,"Si ":2472,"Sam":277,"Sal":320,"San":795,"Sa ":539,"Rey":251,"Rep":257,"Rom":461,"Uni":749,"The":357,"Tim":465,"Tin":234,"Tre":501,"Tsi":277,"bis":856,"bit":236,"bil":2673,"bin":1170,"big":1028,"bo ":562,"bli":613,"bla":330,"bol":310,"bon":590,"ban":3506,"bak":379,"bal":915,"bag":1029,"bah":2518,"bae":247,"bab":1067,"bay":2496,"baw":340,"bat":1046,"bas":870,"bar":318,"bi ":266,"ber":655,"bel":279,"bib":346,"bid":220,"ca ":245,"can":254,"ce ":518,"bri":332,"bra":317,"bre":1519,"buo":641,"bul":259,"bun":360,"bum":222,"buh":438,"but":324,"bye":332,"aka":4840,"am ":867,"aki":2053,"akh":385,"al ":4667,"ail":347,"ain":942,"ais":377,"ak ":1818,"aig":364,"ahi":1712,"ahu":567,"aho":893,"aha":5905,"agk":1800,"agl":815,"agm":409,"agg":261,"agh":599,"agi":2389,"ags":1175,"agt":890,"agu":765,"ago":873,"agp":2086,"anu":622,"any":1534,"ano":2033,"ant":1925,"ans":2023,"ane":370,"ang":54220,"ani":2704,"anl":847,"ap ":1248,"ana":5631,"anc":342,"and":2377,"amu":411,"amo":399,"amp":1054,"ami":2241,"ame":660,"amb":1287,"ama":6244,"ao ":2560,"aly":681,"alu":885,"alo":1422,"alm":256,"all":403,"ali":4268,"ale":940,"ala":15201,"alb":295,"an ":25875,"aku":295,"akt":378,"ako":347,"akl":349,"aba":5223,"abe":285,"abi":1848,"abo":410,"abu":558,"ae ":378,"aca":217,"aaa":370,"aan":2340,"aal":228,"aas":476,"aar":1310,"ad ":1547,"aga":6180,"agb":737,"agd":421,"ado":1685,"adi":344,"ade":215,"ag ":2962,"ada":1065,"ayo":1284,"ayn":676,"ays":426,"ayr":437,"ayu":244,"ayb":228,"aya":7316,"ba ":1007,"at ":11450,"are":434,"ard":406,"arc":218,"ara":6094,"aro":928,"arl":281,"ark":258,"ari":2740,"ars":347,"art":1318,"asa":4127,"ary":1252,"asi":1053,"ase":1923,"aso":401,"aon":1107,"ar ":1507,"apa":4168,"api":926,"apo":968,"apu":525,"as ":8041,"avi":292,"ay ":22870,"awa":4711,"awi":3261,"ata":9859,"asu":540,"ast":1119,"asy":2814,"atl":420,"ato":908,"ate":772,"ati":3674,"ath":277,"aw ":1870,"atu":1238,"aun":478,"aug":319,"itn":327,"ito":5794,"itu":365,"ity":376,"üdt":214,"ism":492,"ist":2727,"isy":1436,"ita":2453,"ite":522,"iti":1834,"iwa":1084,"ius":321,"ive":465,"ipo":219,"ipp":279,"ipi":5953,"is ":1468,"ion":1384,"ipa":894,"iro":325,"iri":318,"isi":1179,"ish":487,"ise":486,"isa":12350,"ire":294,"ira":1126,"it ":3154,"iyo":1973,"iya":3692,"iye":538,"kik":335,"kil":1497,"kin":1435,"kip":244,"kit":510,"ki ":400,"kha":550,"koy":367,"kop":294,"kon":1105,"kom":504,"kol":972,"ko ":1504,"kla":2111,"kay":687,"kat":4082,"kau":481,"kar":1361,"kas":2326,"kap":1524,"kan":3294,"kal":2502,"kam":987,"kak":1114,"kah":780,"kai":697,"kag":306,"kad":618,"kab":2686,"kaa":306,"ka ":4085," Ga":496," Ge":219," Fr":268," Fi":324," Ha":926," He":600," Go":283," Gr":1130," Gu":254," Gi":361," Hu":696," Ho":300,"ha ":509," Hi":657," Ja":583," Ir":258," Is":900," It":1471," Im":284," In":3092," Ik":389," Il":402,"ham":439,"han":3970," Ka":3625,"hal":1546," Ki":335,"har":726,"has":418,"hat":613," Jo":502," Ju":282,"hah":296,"hag":932,"hab":327," La":1273," Le":698," Li":486," Ko":738," Kr":232,"hay":2401," Ku":269," Ma":4113," Mi":784," Me":525,"he ":726," Lo":362," Lu":1602," Ne":660," Na":1601," Ni":283," Mo":516," Mu":380,"her":391,"hen":301,"hi ":256," Ap":381," Am":732," An":10945," Ak":280," Al":957," Ag":601," Ad":338," Ab":444," Ba":3213," Ay":1524," Au":300," As":625," Ar":684,"hig":262," Be":553," Bi":731,"hip":233,"hin":2102,"him":317," Bo":624,"hil":1124," Br":411," Bu":508,"hit":252,"hiy":1822," Ca":1275," Ce":318," Ch":477," Co":1435," Da":1001," Di":1559," De":354," Do":218," Es":753," En":683," Em":287," Eu":251," Fe":229,"gma":404,"go ":1131," Sü":216,"gle":2258,"gli":561,"gla":763," Wi":299,"gko":362," Wa":224," Za":253,"gna":429,"gmu":288,"gpu":1231,"gpa":814,"gon":726,"gos":419,"gor":456,"gsa":661,"gsi":395,"gra":396,"gre":308," Or":329," Po":510,"gui":242," Pi":5194,"gum":355," Ph":516,"gul":619," Pe":751," Pa":3220,"gsu":217,"gso":1676," No":856," Ol":245," Ok":212,"gta":876," Ra":342," Qu":347," Ro":902," Re":1013," Ri":307," Pr":781,"gus":319," Pu":319,"gun":828," Su":840," St":567," Ta":1427,"gya":341," Th":565," Ti":1025," Te":369," Tr":725," Ts":304," To":289," Sa":2358," Sh":292," Si":3946," Sc":224," Se":748," So":478," Va":226," Vi":396," Tu":256," Un":1043,"ial":214,"ian":1841," ip":821," im":295," in":1205," ik":2724," il":559,"ic ":391," is":12317," it":4118,"ibl":298,"ibi":852," ka":12843,"ibo":539," ki":1772,"id ":624,"iba":1373,"ibe":241," ha":2342," he":266," gi":1221," gr":273,"ia ":1668," gu":642," ib":1270," hi":1628," hu":502," ni":3203," ng":29501," ne":259,"ien":313," na":26725," mu":2405,"ig ":1084," mo":343," of":748," no":3658," le":482,"ict":361," li":1169," la":4719," ku":1926,"ich":298," kl":1518,"ica":454," ko":1618," me":432," mg":9378," mi":835," o ":6532,"ido":754," ma":12562," lu":1621,"ide":429,"ida":693," lo":567," ag":287," aa":479," an":11487," ap":351," ak":588," al":688," aw":224," ar":1782," at":8491,"iit":294," as":396," ba":6398," ay":15608,"il ":1019," bi":3286," be":279," bo":263," bl":303," bu":1574," ca":215,"im ":631,"ika":5845,"igd":340,"ige":351,"iga":3450,"igm":226,"igi":608,"iha":656,"ihi":488,"ik ":530,"imo":757," es":477," em":253,"imp":821," el":218,"ime":284," ek":212,"imi":396,"ip ":377,"ind":1253,"ina":12544,"imu":413,"ino":1637,"int":735,"ins":494,"ine":925,"ing":7215,"ini":1991," ga":1201,"inu":1256,"iko":1955," co":595,"iki":665,"ila":8566,"in ":3710," da":2205,"iku":803,"iks":322,"ilo":675,"ill":466," de":1318,"ili":6551," di":1938,"ima":551,"imb":583,"io ":525,"ily":587," du":359,"hol":219,"hon":897," ye":394,"hul":550," sa":28976," se":2207," si":4160," so":365," t ":331," re":1237," ri":835," pu":1239," pr":891," s ":272,"hum":213," op":300," or":499," pe":850," pa":17344," pl":235," po":2264," pi":2508," wa":403," wi":495," tu":1581," ur":1186," up":417," um":236," un":787," ta":3807," su":1227," tr":542," th":440," ti":1513," te":623,"eyn":227,"eta":377,"eti":227,"esp":220,"eso":216,"est":873,"ess":243,"esy":238,"eto":217,"etr":240,"ety":252,"ey ":259,"er ":1701,"es ":4679,"epu":270,"eri":1261,"ere":336,"era":1743,"erb":250,"et ":365,"esi":397,"ery":584,"ert":284,"ers":877,"ern":685,"erm":213,"ero":994,"eks":315,"ekt":395,"en ":605,"ela":487,"ele":614,"eli":929,"ell":447,"eo ":676,"emb":1031,"ema":617,"eme":244,"emo":250,"emi":251,"emp":247,"ene":540,"eng":2031,"ena":338,"end":702,"enc":265,"ens":1983,"ent":2381,"ego":798,"ege":495,"ehi":1394,"el ":1262,"eka":275,"gka":2797,"git":1186,"gis":283,"gil":253,"gin":2170,"gha":696,"ggi":233,"gga":1607,"gi ":930,"gen":305,"gda":265,"gdi":448,"ge ":872,"gbi":301,"gba":311,"gag":358,"gah":212,"gas":721,"gar":389,"gat":1029,"gaw":951,"gay":1184,"gam":1160,"gal":2016,"gan":8911,"gap":698,"ga ":10893,"da ":1215,"de ":630,"dad":487,"daa":229,"dal":1278,"dai":337,"dag":294,"dah":789,"dat":526,"dar":615,"dan":716,"dam":364,"cti":321,"co ":247,"com":405,"ch ":248,"cha":297,"cia":252,"ck ":232,"che":214,"ed ":364,"ebr":372,"ean":218,"ear":435,"eap":318,"ea ":269,"ega":369,"edi":305,"dya":287,"dor":701,"don":511,"dos":801,"dti":217,"dul":227,"duk":249,"dia":320,"der":377,"des":307,"del":555,"dek":245,"den":404,"di ":862,"do ":1342,"diy":230,"din":1025,"dis":689,"dit":243,"dig":691,"rga":250,"ri ":1883,"res":909,"rea":358,"reg":609,"reh":760,"ren":823,"rel":229,"rer":338,"re ":1785,"raw":1292,"rd ":279,"rap":411,"ras":562,"rat":708,"rag":342,"ran":3019,"ram":928,"ral":1710,"rab":295,"raa":290,"rad":734,"rs ":234,"ros":487,"rot":255,"ron":875,"roo":656,"rop":486,"rod":249,"rol":389,"rna":412,"rne":221,"ro ":2000,"rma":307,"riy":449,"rit":895,"ris":641,"rig":228,"ril":614,"rik":932,"rin":2120,"ria":1493,"ric":296,"rie":322,"rk ":265,"rya":408,"rup":247,"rus":239,"ry ":568,"rsi":370,"rso":362,"rte":436,"rti":1012,"saa":543,"sab":432,"sag":340,"sah":246,"sak":534,"sal":1402,"sam":846,"sap":373,"san":13859,"sas":644,"sar":588,"say":842,"sa ":29066,"ryo":1185,"shi":267,"si ":909,"siy":1352,"sid":252,"sia":279,"sit":454,"sis":871,"sip":588,"sin":2020,"sil":886,"sim":530,"sik":667,"sig":229,"se ":703,"ser":748,"ses":480,"sh ":511,"sen":3264,"spe":263,"spa":238,"son":453,"sod":1704,"st ":395,"ss ":216,"sla":213,"smo":455,"so ":2182,"sye":492,"sya":1319,"syo":3972,"syu":310,"ste":723,"sta":2233,"sto":847,"sti":1338,"str":1197,"sub":226,"sul":377,"sum":409,"suk":222,"sun":606,"sus":423,"tak":378,"tal":1825,"tag":2356,"taa":516,"tab":313,"tad":729,"tay":910,"taw":1462,"tat":2169,"tas":1294,"tar":426,"tap":454,"tao":2928,"tan":3973,"tam":283,"te ":1421,"ta ":2620,"pa ":793,"par":2056,"pat":1511,"pas":473,"pay":244,"paa":836,"pab":282,"pag":5944,"pah":588,"pak":582,"pal":1553,"pap":964,"pam":2183,"pan":7375,"pi ":308,"per":1160,"pel":619,"pla":312,"pik":487,"pil":381,"pin":8127,"pis":517,"pit":481,"por":477,"pop":1613,"pos":639,"pon":910,"pol":471,"ppi":245,"po ":494,"pua":1158,"pub":334,"pri":362,"pre":283,"pro":666,"put":222,"pun":775,"pul":2278,"ra ":2777,"ngo":322,"ngi":702,"ngl":2579,"ngk":1509,"ngu":1141,"ngr":246,"ngs":1763,"ni ":1903,"nge":218,"ngg":1999,"ngh":357,"nga":5141,"nel":268,"ner":635,"net":328,"nes":773,"ng ":107416,"nce":344,"ne ":832,"ndu":336,"ndo":855,"ndi":1104,"nde":357,"nda":2122,"nak":3214,"nal":1987,"nam":1069,"nan":5284,"nao":281,"nap":1249,"nar":667,"nad":394,"nag":3410,"nah":1201,"nai":343,"nab":490,"nd ":696,"nau":287,"nat":2025,"nas":5521,"nay":628,"naw":416,"na ":22155,"nya":1473,"nul":380,"num":251,"nun":621,"nus":223,"nut":274,"nub":254,"nto":1170,"ntu":218,"ntr":369,"nti":1113,"nta":1619,"nte":1041,"nsy":292,"nso":1561,"nst":288,"nse":338,"nsi":398,"nsa":1639,"nt ":493,"ns ":315,"nod":444,"noo":3554,"nom":260,"non":1292,"nla":384,"no ":3206,"nlu":471,"nid":488,"nib":262,"nia":263,"niy":508,"niw":515,"niv":230,"nis":952,"nit":1670,"nim":327,"nin":947,"nik":402,"nil":1831,"ogr":242,"ohi":487,"ok ":961,"ol ":1113,"oby":351,"ode":251,"of ":743,"og ":1738,"ob ":490,"od ":2512,"obe":281,"nyo":656,"oto":243,"ost":571,"ota":234,"osi":296,"ose":518,"oso":256,"oy ":861,"oun":298,"opo":245,"opi":377,"ope":331,"os ":2497,"opu":1605,"oon":4273,"ook":406,"oob":454,"or ":1386,"ork":236,"orm":244,"oro":293,"ord":364,"ore":565,"org":297,"ori":875,"osa":225,"ort":603,"ory":303,"ot ":787,"ora":483,"ola":327,"on ":10511,"oli":798,"oll":560,"ole":828,"olo":1158,"ona":1108,"ond":258,"one":582,"ong":14203,"oni":381,"ono":505,"ons":429,"ont":363,"ony":253,"oma":662,"ome":348,"omi":484,"omm":343,"omp":399,"omo":218,"op ":492,"la ":7051,"le ":657,"laa":472,"lab":1035,"lad":618,"lah":600,"lag":1451,"lal":5124,"lak":2054,"lan":8966,"lam":1407,"lap":417,"lar":1484,"lat":1650,"las":3731,"law":4043,"lay":1014,"lba":235,"ld ":260,"kuy":222,"kun":1182,"kum":367,"kul":1301,"ksy":373,"ksi":218,"ktu":302,"kto":418,"lon":1346,"loo":506,"lor":222,"loh":474,"log":979,"los":411,"lto":257,"lug":603,"li ":620,"les":2603,"lem":360,"len":771,"leh":645,"leg":553,"lea":415,"lo ":1738,"lla":492,"lle":760,"ll ":320,"lit":1575,"lis":938,"lip":5499,"lin":1820,"lim":971,"liy":225,"lic":224,"lid":294,"lia":495,"lib":291,"lik":1738,"lil":346,"lii":293,"lig":604,"lih":227,"ma ":1406,"maa":656,"mab":485,"mah":1161,"mai":353,"mak":856,"mad":287,"mag":1774,"map":283,"mar":866,"mas":904,"mal":1856,"mam":1144,"man":4514,"may":4380,"mat":2454,"mba":1424,"mbr":990,"mbo":357,"me ":332,"med":215,"met":267,"mes":455,"mer":827,"men":704,"luk":293,"lup":252,"lun":1075,"lum":601,"lut":234,"lus":323,"lur":429,"lya":1073,"lyo":437,"mpi":405,"mpe":729,"mpo":376,"mpu":307,"mog":636,"mon":703,"mot":220,"mpa":1165,"mus":340,"mut":297,"mul":2510,"mun":975,"mga":9380,"min":1128,"mil":657,"mis":408,"mit":1305,"mik":544,"mo ":604,"mmu":273,"zon":372,"yun":445,"ysa":370,"yro":432,"yos":430,"yon":8581,"yea":322,"yeg":287,"yen":248,"yem":924,"ya ":5481,"yag":346,"yar":349,"yan":8889,"yal":500,"yo ":2106,"yna":235,"yni":658,"wit":490,"wig":2597,"wik":476,"wa ":1340,"wan":2646,"wal":851,"wak":321,"wat":242,"war":240,"wag":936,"ver":481,"ve ":253,"va ":222,"uya":331,"uwe":288,"uwa":445,"usi":309,"usa":537,"usy":243,"usu":363,"ust":456,"uti":335,"ute":297,"uta":417,"utu":829,"uto":373,"us ":1413,"ura":1051,"ure":277,"uri":1726,"uro":457,"uny":224,"uon":264,"upa":766,"ur ":441,"upo":374,"ump":364,"umu":925,"umi":469,"uma":1769,"umb":268,"uly":233,"uo ":439,"unt":520,"unu":378,"uni":882,"uno":1281,"und":796,"una":2530,"ung":3950,"uku":387,"uko":610,"um ":503,"uka":453,"ulu":1033,"ult":291,"ulo":1229,"uli":710,"ula":6857,"uin":299,"ugn":288,"uga":1124,"uha":936,"ubo":309,"ubr":217,"ubu":535,"ue ":245,"uez":255,"uan":1897,"ubi":307,"ubl":385,"uba":263,"tye":228,"ty ":596,"tur":814,"tut":349,"tul":834,"tuk":407,"tun":595,"tum":594,"tub":614,"tra":868,"tri":838,"tro":879,"to ":7587,"tna":387,"tom":241,"ton":2028,"tol":467,"tor":922,"til":777,"tik":1397,"tig":242,"tir":721,"tit":772,"tis":888,"tin":3062,"tim":775,"tip":230,"tio":1023,"tib":304,"tid":407,"tiy":331,"tlo":414,"tem":511,"ten":352,"tel":392,"th ":280,"tes":226,"ter":1925,"ti ":555,"the":612,"tha":237},"n_words":[2110634,2489828,1864789],"name":"tl"} -------------------------------------------------------------------------------- /langdetect/profiles/zh-cn: -------------------------------------------------------------------------------- 1 | {"freq":{"·":11798,"é":695,"и":659,"о":642,"а":705," 《":2860," 。":4044," 、":2042,"あ":2229,"。":93258,"、":80590,"》":12787,"《":12801,"ア":2133,"乱":17692,"书":4419,"习":841,"乡":1808,"九":2519,"也":8266,"乘":28571,"乐":5294,"乌":1217,"义":4779,"之":17358,"久":747,"主":15497,"为":43069,"举":3076,"丽":1041,"丼":1241,"丰":817,"临":983,"个":19411,"中":47239,"两":4794,"严":592,"丛":1367,"业":8072,"东":12690,"丞":9778,"专":3303,"丕":15196,"世":9256,"丈":146015,"三":7927,"上":15305,"下":7962,"不":69712,"与":14330,"一":51232,"丁":117148,"七":1693,"万":2760,"价":1006,"份":14596,"任":5559,"以":21926,"令":1161,"代":10675,"们":2426,"他":7464,"仙":673,"仅":1129,"仍":888,"从":3822,"今":2840,"亲":1398,"人":38353,"亡":667,"产":5405,"亦":2742,"交":20817,"京":3333,"云":2725,"五":3431,"亚":46408,"些":2757,"了":8577,"争":2347,"予":591,"事":19248,"二":6378,"于":42781,"使":59599,"低":1173,"住":1250,"位":13424,"但":4207,"作":14815,"何":21331,"体":8040,"伯":2492,"传":4668,"伦":1894,"伊":2356,"企":1149,"会":16875,"优":845,"休":48239,"众":1421,"信":7840,"俄":2213,"保":3387,"侧":721,"供":2204," 分":744," 公":1819,"商":3626,"哥":1688,"品":4389,"响":1404,"和":23304,"周":2010,"呼":663,"命":2331,"员":7153,"启":817,"含":1431,"吴":771,"名":20347,"同":8476,"后":12165,"吉":2174,"合":7745,"各":3613,"向":3047," 号":1564," 台":812,"域":2609,"城":5315,"培":3476,"基":6619," 名":675,"址":719,"坦":1302,"坡":2693,"土":2149,"圆":692,"场":5846,"地":26817,"在":35727,"圣":2664,"回":1392,"四":5911,"团":4373,"因":6183,"园":2775,"围":1767,"图":2777,"国":50445,"器":3296,"况":701,"冰":646,"决":1729,"冲":743,"农":1200,"军":7136,"写":2568,"册":628,"再":1221,"内":8287,"击":1569," 丕":929," 世":1671," 丈":5741," 不":1283," 一":646," 丁":3946," 万":1431,"兰":5551,"共":7134,"关":4776,"兴":1795,"其":12214,"具":2485,"兹":634,"养":660,"入":4429,"全":7377,"八":1932,"六":2119,"公":17034,"党":2412,"元":4144,"克":7641,"光":2911,"先":2098,"免":623,"停":661,"原":6718,"压":796,"厂":831,"历":4017,"厅":672,"去":1176,"县":5347,"厦":614,"变":2770,"受":2511,"取":2479,"发":12789,"双":1530,"反":2487,"及":17913,"友":732,"又":5333,"参":2872,"司":7147,"号":5641,"台":13293,"可":7699,"只":2126,"口":4422,"化":7453,"包":4823," 何":732,"区":18524,"医":1928," 位":590,"南":15302,"单":3024," 使":1450,"博":1783,"升":971,"千":1226,"十":5342,"协":2640,"华":6868,"半":2037,"卷":760,"印":3243,"即":2844,"卡":3152,"卫":1639,"力":4944," 人":2100,"办":2818,"动":10155,"助":1407,"加":7203,"务":5246,"势":871,"劳":804,"勞":1639," 休":920,"分":15267," 个":2399," 中":1862,"刘":669,"则":3203,"创":4231,"初":2398," 之":647,"别":3560,"利":8681,"到":6365,"制":7035," 乘":758,"前":10878," 乱":770,"剧":3129,"副":860," 亚":733," 天":581," 大":1004,"工":9317,"已":3013,"巴":5253,"州":7529,"川":3028,"山":12651,"属":10137,"展":3788,"屋":1097,"届":1515,"局":2483,"层":1590,"居":1947,"尼":5208,"就":2894,"尚":3502,"少":2443,"小":25275,"将":4225," 多":650,"岸":1431,"岭":590,"岩":988,"岛":4996,"岁":688,"录":1700,"归":696,"当":5254,"形":4076,"役":939,"影":4410,"式":6888,"异":925,"开":7255,"引":4831,"张":2153,"弹":947,"强":1613," 小":894,"念":1709,"往":1560,"律":1732,"得":4936," 家":600,"德":8327,"广":7680,"并":7400,"年":47488,"干":1556,"平":5989,"帝":2672,"帕":667,"师":2671,"希":2317,"布":7549,"常":6171,"席":1225,"带":2534,"延":769,"建":8867,"库":1470,"底":1106,"应":3484,"店":1431,"庙":584,"庆":953,"康":1164,"度":6769,"座":2603,"大":34268,"央":1321,"天":7035,"夫":2315,"太":3274,"头":2350,"失":939,"备":1259,"处":4116,"复":1700,"外":5857,"多":11036,"奥":3388,"女":3642,"好":1250,"奖":2274,"如":3936,"始":3351,"委":2437," 和":850,"增":971,"境":2254,"声":4998," 在":1140,"子":9623,"存":2538,"学":28915,"安":5174,"它":4574,"宁":1619,"定":6314,"实":4032,"宝":999,"宗":2085,"宫":1054,"客":2272,"宣":1095,"宾":780,"家":33525,"富":1265,"密":1755,"察":855,"导":2989,"对":7380,"威":2129,"媒":887," 国":713,"区,一":689,"更":2085,"曲":2749,"曾":3299,"是":61140,"映":608,"春":892,"显":972,"星":4368,"易":1898,"普":2707,"智":818,"晚":728,"架":926,"林":7036,"果":2196,"极":1459,"构":3349,"查":1580,"机":7949,"未":4221,"木":1876,"术":4302,"本":13564,"望":1007,"朝":3840,"期":7068,"朗":953,"月":15728,"有":24992,"服":2639,"最":10271,"松":1118,"杰":812,"杨":679,"来":9550,"条":3385,"村":1739,"李":1577,"杀":947,"杂":1265,"权":2775,"播":2492,"·丁":954,"·丈":771,"摄":716,"料":1677,"文":14966,"施":1293,"斯":13962,"断":823,"旁":959,"旅":1334,"族":3701," 或":968,"时":14785,"无":3416,"旧":1079,"日":21151,"早":2138,"放":2159,"改":3194,"收":1975,"支":2892,"教":9551,"故":1906,"数":7124,"整":1497,"括":3402,"拥":1168,"拉":7109,"拔":3550,"报":2620,"护":1665,"技":2723,"抗":915,"投":1684,"执":1062,"扩":636,"承":1002,"批":885,"所":11521,"手":3277," 年":38848," 平":1187,"打":1400,"户":1297,"房":1033,"戏":2673,"我":858,"成":16718,"或":10994,"战":6655,"提":4221,"推":2158,"控":1211,"接":3776,"排":1222,"换":868,"据":3024,"持":2131,"指":5889,"情":1949,"息":1214,"态":1462,"总":5550,"感":998,"游":3622,"温":1133,"港":10900,"清":2634,"湾":8336,"湖":3858,"源":3031,"满":827,"演":3628,"区,多":1252,"澳":2170,"气":1964,"民":10950,"水":5383,"江":5857,"汉":2630,"求":1033,"汇":1020,"沟":938,"沙":2368,"河":5488,"油":1041,"治":5001,"没":1471,"泽":750,"波":2738,"派":1913,"活":2831,"洲":5289,"测":1385,"济":2241,"流":4509,"消":1003,"深":1422,"横":631," 是":2568," 月":13702,"区,常":666,"武":2208,"此":5466,"止":1051,"正":4102,"死":6678,"歌":2416,"次":4906,"欧":2758,"款":923,"母":1743,"比":4805,"毕":833,"毒":762,"案":1633,"桥":1371,"树":1023,"标":3335,"样":1395,"栽":2997,"根":2240,"核":1380,"校":3605,"楼":1695,"植":5185," 日":10844,"概":864,"石":3358,"知":2149,"省":5247,"着":1639,"皇":2205,"的":145656,"盖":717,"监":811,"盘":598,"目":9318,"直":3003,"白":2529,"百":1846,"登":1073,"病":1344,"町":605,"画":2451,"甸":900,"电":10706,"田":1994,"由":17497,"用":13911,"生":19204,"甘":1358,"略":871,"留":781,"界":6597,"理":9553,"球":6996,"班":1750,"玛":789,"王":5331,"现":8430,"环":2438,"独":1820,"状":1446,"片":2527,"牌":1136,"物":13958,"特":10095,"爱":2167,"照":1002,"然":2677,"热":1483,"点":3443,"火":1816,"灵":903,"缅":631,"缘":1067,"编":2591,"缩":977,"罗":6436,"网":4079,"置":1765,"署":901,"美":10457,"群":2218,"纳":2534,"纽":906,"线":7551,"红":1720,"约":5717,"级":4065,"纪":4115,"结":3008,"统":6645,"绝":650,"络":1469,"给":1157,"细":1259,"织":2183,"组":5795,"经":8789,"终":1135,"维":3625,"综":661,"绿":800,"继":1038,"续":1408,"索":1233,"素":1524,"类":5149,"米":9738,"系":8292,"等":12322,"策":957,"笔":614,"第":11099,"篇":721,"简":4298,"算":1987,"究":2923,"空":3846," 的":2781,"程":4573,"竞":659,"站":6147,"立":9651,"童":907,"突":730,"票":886,"神":3463,"社":4420,"示":1536,"积":2267,"移":1075,"称":17001,"私":738,"科":10282,"离":1553,"福":3324,"确":1094,"础":692,"破":649,"码":1376,"要":8323,"视":4552,"规":2234,"观":2303,"见":2171,"览":667,"角":3015,"解":2300,"西":17559,"被":7905,"街":2230,"行":14959,"表":5479,"认":2753,"计":5438,"训":622,"讯":1488,"议":3045,"让":773,"记":2357,"设":6237,"许":1740,"论":3372,"该":4765,"说":3972,"诸":678,"诺":1644,"读":946,"证":1510,"识":1123,"评":1093,"词":2012,"译":3205,"诗":967,"试":1000,"话":1875,"警":737,"言":2970,"路":9754,"越":2100,"超":1907,"足":2749,"资":4130,"赛":5990,"起":4293,"负":1207,"贝":1346,"财":832,"责":1208,"败":623,"货":901,"质":2689,"购":664,"贵":1643,"费":1407,"象":1888,"调":1492,"谷":1826,"过":6354,"进":5216,"还":1606,"这":6952,"连":2790,"远":1319,"运":6417,"近":3105,"辽":711,"达":4214,"边":2785,"较":2013,"辖":1462,"输":1210,"辑":1374,"车":6585,"轨":655,"转":1925,"轮":727,"软":1659,"轻":756,"载":1404,"身":2869,"脑":1283,"脉":651,"腊":1076,"致":1313,"至":9855,"自":8792,"而":9496,"者":7418,"职":2157,"联":6161," 米":6968,"肃":1126,"股":1262,"育":2953,"胜":1031,"能":5645,"药":977,"荣":690,"草":3994,"获":2004,"莱":1327,"菲":926,"般":2569,"航":2316,"舰":942,"艺":2269,"艾":805,"色":3575,"花":3115,"节":2402,"英":9686,"苏":3117,"著":2998,"营":2531,"萨":1866,"落":1487,"蓝":701,"蒂":676,"虽":721," 英":812,"频":899,"题":1997,"预":942,"领":2926,"食":1377,"飞":1436,"风":2500,"顿":1362,"顺":631,"项":2377,"顶":716,"页":780,"馆":2371,"香":9519,"验":1104,"高":8988," 阿":664,"马":7083,"鲜":1464,"鲁":1516,"鱼":955," 香":692,"黑":1900,"黄":2293,"龙":3881,"造":2929,"选":3181,"送":619,"适":644,"通":7604,"速":2166,"遗":895,"邻":1194,"郡":912,"部":14531,"都":5243,"配":1056,"金":5677,"野":1099,"量":3834,"里":8179,"重":5583,"释":634,"银":1422,"铁":4586,"针":594,"钟":734,"长":12428,"镜":635,"镇":2371,"锡":594,"锦":662,"销":827,"闻":1352,"间":8313,"问":1446,"门":4685,"际":4272,"陆":5534,"陈":1215,"降":626,"限":2379,"陕":1187,"院":5166,"除":1569,"险":710,"队":3990,"阶":1080,"阴":725,"防":1166,"阳":2041,"阿":4439,"随":1152,"难":627,"隶":660,"青":2398,"非":3368,"面":6531,"需":1195,"音":4816,"韩":1018,"가":589,")":53342,"(":53630,"-":1381,",":211761,":":15751,";":5849," (":2857," )":2917," ,":8290,"国的特":945,"植物。":2891,"植物,":1199,"基丁教":662,"等地,":3010,"民共和":1261,"。 ":4085,"、 ":2429,"》 ":659,"地区,":3536,"。这":1769,"、陕":800," 、 ":1160,"、福":680,"。现":602,"、甘":833,"。由":929,"、西":1056,"、贵":865,"。该":1506,"四川、":1228,"、广":1717,"、山":2033,"国大陆":3832,"、安":622,"。它":1480,"、四":1328,"。在":1915,"、台":689,"。分":2961,"、印":861,"。其":1173,"、湖":1562,"、河":1285,"、江":1104,"。此":760,"》是":802,"、日":634,"ああ":1707,"、《":1198,"》、":1065,"》中":595,"《丈":900,"、休":601,"。他":1369,"、云":1236,"。乘":742,"、人":909,"。丁":940,"、不":1336,"、丁":2633,"、丈":4209,"、乘":1367,"、中":959,"。丈":1034,"アア":1643,"地区的":687,"》(":2220,"》,":1819,"在中国":1180,"栽培。":2912,"、贵州":848,"是香港":1721,"立于 ":1061,"从 ":609,"人 ":623,"以 ":631,"于 ":10544,"了 ":641,"亚·":946,"亚 ":1478,"休 ":1064,"会 ":652,"使 ":1090,"不、":2097,"不。":742,"丈》":600,"丈、":2959,"丈。":2119,"丁、":2210,"丁。":2121,"一。":1805,"东、":1223,"中、":800,"人。":1182,"事。":826,"亚、":1600,"亚。":687,"交。":780,"丈山":758,"丈属":948,"不家":1629,"之后":1285,"丈小":1097,"丈家":1182,"丁属":1251,"中国":13581,"丈子":859,"丈学":961,"一家":830,"乱丈":1142,"为台":739,"丈大":637,"举办":651,"丁大":616,"何。":612,"乘丁":814,"乘一":737,"乘丈":920,"中华":3016,"乘丛":864,"丈地":713,"之一":4710,"丁国":1036,"丁地":630,"东南":852,"主义":1789,"为主":1114,"为中":1501,"为丈":1939,"为一":949,"为丁":953,"丈和":636,"为了":902,"不同":2049,"中使":2667,"不区":1235,"中一":647,"不军":620,"个人":1067,"丈克":1016,"丈其":691,"不公":924,"两个":961,"不使":1931,"休。":1293,"休、":1110,"丁克":614,"东不":1592,"丛中":711,"丁军":992,"丁兰":618,"不交":3736,"不京":1465,"不事":949,"丈休":1776,"丈使":1087,"不休":658,"丕丈":949,"丈不":1018,"丈丈":10643,"丈丕":925,"丈丞":767,"丈中":605,"丁休":2172,"丈为":629,"丈之":694,"上不":1488,"一位":1048,"丈乘":1009,"丈乱":1342,"丁作":779,"丈事":642,"丁使":3178,"丈交":632,"不丈":1417,"不丁":1361,"丈亚":1470,"不不":1140,"与丈":673,"一丈":695,"丁丈":5202,"丁丁":3673,"丁不":1113,"一个":9742,"丁乘":875,"一亚":4760,"一些":1051,"丁乱":789,"丁亚":2029,"份、":719,"丈一":1128,"丈丁":6021,"主教":1120,"任何":656,"于山":605,"交大":1013,"企业":965,"为是":626,"中文":1113,"人口":1818,"一次":948,"丈林":809,"他们":991,"产品":768,"事场":701,"以丈":590,"人使":892,"丁是":769,"丈是":785,"一条":1013,"不拔":3372,"二十":706,"人乘":690,"云南":1737,"人丁":945,"人丈":706,"主席":689,"丁教":806,"于台":1035,"元 ":583,"丁斯":907,"丁文":660,"一所":719,"亚使":601,"交休":767,"事务":628,"乘大":1199,"亚丈":1056,"不式":1110,"亚丁":910,"中学":1267,"亚亚":814,"交不":722,"使。":1718,"使、":1407,"丁店":632,"一座":888,"中央":1281,"于丁":1020,"于不":3550,"于丈":1169,"事交":1241,"于中":1731,"中的":2873,"休如":834,"丈科":1509,"亚栽":2911,"东省":644,"份年":663,"使休":784,"使代":599,"使中":607,"二次":594,"使丈":1433,"使上":903,"使丁":935,"下的":912,"世界":3851,"上的":1685,"何丁":623,"丈的":3617,"于日":763,"乘江":1087,"位于":6910,"丁的":3074,"不生":996,"作为":1517,"不的":1824,"也有":715,"不现":1191,"份围":818,"人工":3099,"丈球":620,"休丈":1574,"休丁":1348,"以及":5720,"不治":1654,"也是":2441,"休休":1177,"休使":948,"交的":754,"何成":650,"但是":741,"人的":1301,"事的":883,"人物":960,"丁西":582,"产生":952,"亚的":1008,"一般":2436,"人民":2356,"丈至":636,"丈草":591,"使团":1466,"前 ":886,"作家":983,"作品":1438,"亚洲":979,"使兰":933,"世纪":2136,"乘的":934,"到 ":1169,"丈 ":2639,"丈·":660,"丁 ":1901,"丁·":957,"为 ":5249,"地、":838,"国、":813,"又译":658,"发行":1130,"可能":964,"南部":1207,"名称":1365,"后的":584,"名的":1485,"同的":919,"商业":792,"使(":777,"使,":2980,"又称":2280,"台湾":6649,"位,":691,"发现":998,"体,":607,"何,":1182,"作,":878,"发生":1318,"后来":990,"只有":591,"南等":606,"休(":899,"休,":2609,"和国":2085,"会,":853,"会(":981,"同时":1360,"在 ":4342,"命名":845,"份,":752,"国 ":738,"区的":1567,"和丈":1029,"和丁":641,"前身":709,"员会":1240,"交,":1266,"制造":737,"反应":687,"人,":2362,"化的":604,"事,":1473,"亚,":1453,"亚(":751,"亚:":3961,"华民":1238,"发展":2345,"多生长":627,"国的":3079,"国王":741,"城事":1744,"因此":1473,"国民":1000,"基丁":1219,"分,":694,"四川":1896,"地区":6955,"国大":4205,"在台":652," 公里":1220,"国家":4191,"在不":766,"在丈":1143,"在丁":899,"地不":1517,"在中":1500,"内,":588,"国国":1280,"因为":1284,"团体":692,"国丁":732,"国丈":695,"国不":1345,"国人":990,"培。":2913,"公园":1063,"全国":1074,"共和":2100,"共同":711,"军事":699,"公司":6085,"和 ":1051,"克斯":708,"分丁":674,"分为":837," 丈丈":703,"及 ":757,"内信":885,"其他":1807,"内丁":823,"公共":779,"其中":1933,"俄罗":1205,"交通":1010,"代表":1920,"使用":3270,"使理":2624,"使的":2300,"何的":1007,"之间":1601,"作用":691,"何用":717,"传统":899,"作的":824,"何能":734,"九龙":820,"于香":923,"会议":766,"人闻":930,"体育":809,"使究":2862,"不部":1509,"代的":607,"人类":940,"使本":654,"丁近":677,"举行":1293,"主要":4270,"于美":614,"一部":1591,"丈车":720,"他的":941,"专辑":636,"信息":699,"保护":898,"亚言":1720,"休画":658,"休的":1686,"会的":752,"不面":1014,"及丈":729,"及中":2186,"下,":946,"上,":1461,"丈(":2586,"丈,":5165,"参与":744,"半岛":715,"不,":1460,"一,":2377,"丁)":1039,"丁,":4237,"丁(":1706,"包括":3325,"参加":688,"动画":653,"中,":3371,"及其":661,"业,":586,"原名":724,"动物":1359,"名为":1425,"乘,":704,"各亚":724,"又名":874,"印度":2180,"可以":2687,"台不":1436,"乱,":730,"名使":755,"合作":595,"包含":732,"单位":907,"员。":605,"华人":1471,"协会":1039,"南不":934,"公路":1194,"区域":737," 世纪":1447,"化学":839,"分类":693,"利用":693,"医学":647,"历使":2363,"创立":811,"公里":2086,"前尚":2725,"内的":698,"关系":1044,"名。":910,"创作":725,"分别":959,"利亚":1729,"共有":696,"制作":1175,"创办":644,"具有":1055,"区、":855,"区。":806," 中国":719,"南、":2847,"分子":582,"分布":3839,"全球":752,"加乘":908,"州事":599,"工业":864,"工作":1348,"实际":605,"巴乘":819,"属的":3366,"学院":1919,"度、":646,"帝国":1098,"工引":2908,"平不":1296,"山谷":602,"并丁":847,"年代":1820,"广东":1877,"广乘":652,"布在":1655,"山西":700,"小都":652,"布于":1877,"建、":593,"巴使":874,"小说":1496,"、陕西":793,"场,":659,"尚未":2839,"定的":781,"州、":1204,"地,":4149,"川、":1254,"它的":633,"山坡":1678,"学的":914,"国,":704,"学生":932,"小的":1375,"德·":778,"就是":947,"家的":1013,"应用":955," 年 ":13492,"形式":842,"形成":784,"德国":1532,"或 ":819,"影响":1080,"德亚":680,"希腊":1007,"年的":1358,"广西":1228,"建立":1106,"当时":1404,"年至":809,"拔 ":3155,"外,":1036,"广场":674,"广州":765,"建于":690,"建丁":1787,"年在":775,"处,":614,"开始":2055,"工程":1106,"已经":808,"常生":724,"引亚":2921,"广播":596,"开发":1428,"大学":4751,"名:":4034,"后,":2099,"名,":804,"大战":639,"号,":798,"司,":904,"司(":706,"太平":714,"员,":1310,"委员":1628,"多生":1267,"处理":904,"国立":626,"动,":770,"基本":664,"境内":659,"声任":875,"区,":4940,"基础":690,"国际":3114,"大利":1318,"大使":1012,"家 ":943,"天休":592,"天主":779,"大丈":869,"学使":616,"大陆":4400,"它们":581,"安丁":901,"学名":3799,"定义":596,"存在":833,"岛、":590,"家亚":645,"家丈":713,"家丁":849,"学家":1872,"对于":704,"年 ":13993,"小丈":990,"家国":1673,"尼亚":1342,"学校":1549,"它是":712,"家律":640,"属于":2022,"宗教":606,"山东":748,"大的":1819,"品,":659,"未由人":2713,"学、":1095,"媒体":593,"家、":1079,"家。":1319,"学中":593,"大部":622,"是由":1948,"最大":1527,"成,":1007,"最早":864,"是美":1276,"日至":621,"林中":937,"林下":866,"林丈":747,"是日":749,"最丈":683,"教育":1765,"斯特":879,"是指":1893,"最后":916,"服务":2010,"时的":619," 日 ":1067,"有一":1032,"有丈":634,"有关":606,"栽培":2968,"曾经":583," 月 ":10460,"期的":814,"未由":2714,"有的":742,"来的":817,"是香":1732," 年,":2603," 年)":2400,"机构":1217,"有植":939,"有时":593,"时间":1469,"标何":1254,"故事":840,"教丁":619,"教会":604,"教休":765,"文使":804,"、湖不":767,"斯丁":803,"文化":2111,"、江西":640,"式,":710,"是台":693,"是在":1454,"早期":592,"时期":1763,"日本":5547,"数学":761,"、湖南":720,"名:)":3257,"时代":1179,"斯坦":867,"文学":1074,"数据":868,"是位":626,"是以":583,"是中":2851,"日在":675,"是丁":1437,"是一":8458,"是不":724,"是丈":2026,"是 ":2352,"推不":777," 年的":914,"或称":642,"成立":2637,"技术":1644,"成的":1249,"日 ":1181,"斯·":1031,"所有":1061,"拉丁":1110,"委员会":1211,"投资":588,"月 ":10522,"有 ":2350,"年(":1135,"年)":2514,"年,":3020," 年至":781,"提不":749,"提供":1571,"控制":687,"或者":862,"拥有":1075,"家(":883,"家,":2992,"学(":696,"学,":1273,"子,":932,"成。":674,"年间":605," 年在":695," 年代":1447," 平不":1027,"所以":844,"成员":1194,"属(":1286,"成为":2403,"战争":1282,"成何":673,"车站,":720,"总统":750,"小,":1060,"流行":630," 日)":775," 日,":1367,"没有":1287,"活动":1238,"比赛":1111,"江苏":807,"江西":972,"湖不":1117,"湖南":1294,"源于":655,"游戏":1924,"江、":1010,"斯(":605,"文:":2387,"毕业":667,"时,":1279,"死关":792,"日,":1548,"日)":813," 日至":616,"、广西":861,"民国":1569,"民主":899,"欧洲":1589,"民共":1266,"正式":1275,"河南":853,"月,":734,"民族":1066,"期,":613,"河不":810,"称为 ":1204,"根据":1261,"来自":905,"朝鲜":1221,"最高":965,"期间":1013,"有限":1519,"植物":4924," 是一":701,"概念":628," 日在":655,"社会":2250,"的西":629,"的重":753,"第 ":1806,"的第":1619,"物,":2113,"目的":1041,"的是":1254,"的最":739,"的植":2910,"称 ":1447,"积 ":789,"生长":4500,"直接":612,"的特":1539,"的电":996,"的小":1525,"的家":638,"电脑":1011,"的大":1230,"的国":1201,"的基":581,"的地":4246,"电视":2674,"目前":4402,"的丁":3401,"的一":9462,"用的":1236,"生的":842,"球队":888,"生物":1143,"甘肃":1053,"的名":795,"的发":604,"的休":1722,"的使":1923,"的何":678,"的作":618,"的主":1363,"的中":1197,"的丈":5079,"的不":2462,"的乱":628,"的乘":866,"的人":1919,"的交":1075,"的亚":598,"的份":596,"用来":773,"电影":2044,"理论":1071,"生活":843,"生在":746,"电子":1067,"的。":902,"的《":764,"由人":3013,"由于":1683,"用于":1226,"生于":2477,"生产":971,"理学":1070,"王朝":657,"独立":1054,"现在":1080,"环境":892,"的 ":3036,"用。":732,"现代":899,"班丁":801,"物理":701,"特有":1087,"由 ":647,"特别":1034,"澳门":921,"、福建":587,"物。":3636,"而成":666,"网络":1319,"等,":840,"。由于":637,"结构":811,"称,":606,"统治":615,"美国":6262,"有限公":1342,"经济":1788,"线的":727,"统的":590,"群岛":661,"罗斯":1403,"组织":2012,"联休":1568,"联合":1420,"经营":801,"美洲":847,"站,":1417,"站(":605,"立,":948,"约丈":612,"系统":2897,"、甘肃":804,"结丁":584,"纪念":701,"缩写":612,"至 ":5691,"肃、":800,"自 ":728,"组成":1458,"米的":3087,"米至":2745,"空间":581,"简称":3399,"目,":621,"系使":1617,"等地":3350,"的,":1232,"算机":592,"约 ":1747,"天主教":743,"立的":1250,"立于":1279,"生,":616,"用,":772,"科技":622,"第三":1132,"第一":3441,"第二":2122,"科丈":1055,"等。":1256,"科学":1716,"福建":1133,"称为":4237,"站。":627,"角色":608,"认为":1515,"西部":624,"计份":1242,"译为":652,"西班":773,"要的":1104,"视台":583,"被称":1048,"设计":1764,"越南":953,"贵州":1074,"计算":1077,"设立":673,"赛事":631,"过 ":590,"达 ":635,"许多":1084,"这亚":795,"这些":921,"这个":1485,"运动":2493,"选举":733,"超过":649,"软交":1060,"路线":871,"负责":846,"资讯":628,"足球":1709,"行,":1162,"资料":726,"都是":704,"说,":693,"进行":2271,"华民国":1215,"过程":791,"部分":2341,"部份":779,"车站":2410,"通常":1655,"连接":661,"系,":584,"英亚":2018,"自治":726,"自然":791,"英国":2374,"艺术":1217,"自由":876,"般生":684,"航空":1141,"草地":757," 米的":3008,"罗马":1148,"自丁":786," 米至":2742,"米,":928,"联赛":872,"者,":759,"节目":923,"英文":2834,"苏联":584,"获得":1131,"线,":677,"著名":1735,"是台湾":602,"虽然":581,"行。":771,"规份":679,"西南":748,"行的":1255,"西份":1008,"西亚":1199,"西不":1211,"行为":642,"行不":2204,"西、":2735,"中,目":813,"马来":712,"香港":8632,"高丁":607,"间,":1121,"队,":590,"高速":865," 香港":632,"及中国":2044,"公里,":1108,"是一个":2236,"是一亚":2252,"是位于":587,"丈(学":778,"是中国":2142,"部的":1067,"通过":1270,"赛,":689,"长 ":666,"路,":847,"重要":1745,"的第一":601,"铁家":726,"重要的":693,"长于":3283,"长在":1116,"部,":1121,"除了":587,"限公":1343,"阿亚":611,"间的":1117,"阿拉":760,"铁路":1902,"银行":879,"陆的":3096,"里,":1518,"陕西":1134,"音乐":1761,"面积":1528,"问题":913,"非洲":689,"领域":706,"需要":625,"项目":737,"有植物":938,"的特有":970,"华人民":1156,"是美国":1183,"生长在":1097,"的植物":2905,"生长于":3254,"日至 ":606,"前尚未":2719,"最大的":950,"加乘大":716,"是日本":698,"目前尚":2723,"的地区":3123,"甘肃、":797,"的一部":774,"的主要":669,"年),":737,"的一亚":1477,"的一个":3092,"限公司":1342,"年( ":710,"长于不":2913," )是":697," ,是":1072,"南等地":586,"),":14139,"()":3758,"(,":1176,",)":1561,":,":733,":)":4083,"由人工":2914,",有":1486,",最":1122,",曾":1063,",是":13170,")是":10848,",此":660,",总":746,",当":886,",并":4017,",常":1051,",故":644,",指":584,",所":997,",或":1328,",成":1293,")的":2201,",目":3648,",第":777,",简":2029,",现":1341,",由":3911,",用":638,",生":3542,",西":833,",被":862,",该":1018,",经":598,",美":598,",而":3223,",英":974,"(英":2411,",香":721,",这":1581,",通":884,",《":690,")。":3713,")、":2880,"(丁":582,"(学":3658,",它":1417,",属":841,",小":789,",因":2416,",在":4524,",多":1573,",大":803,",如":965,",前":662,",分":865,",南":723,",即":1029,",包":1287,")和":952,",后":1255,",同":895,",台":750,",可":1101,",又":2947,",原":1270,",与":1258,",不":2776,",东":861,",中":1697,",主":1658,",为":3616,"(今":666,",乘":1464,",也":3256,",乱":593,"(丈":745,")为":2108,",丈":5138,",丁":4254,",一":2351,",休":1379,",但":3353,",位":2756,",使":1942,",交":917,",亦":1195,",于":2786,",人":883,",以":3498,",他":1341,",从":880,",份":844,",共":756,",其":3552,",全":1258,"( ":4029,") ":1194,", ":8207,"- ":821,": ":1177,"于台湾":746,"云南、":1087,"丁属(":793,"中国的":1325,"中国大":3854,"之一,":2267,"不拔 ":3152,"于不拔":2931,"于中国":1363,"丈属的":710,"人工引":2908," ),":840,"乘江、":593,"特有植":935,"位于香":588,"于日本":718,"交大利":915,"以及中":2006,"一般生":684,"英文:":1509,"亚栽培":2910,"陕西、":802,"人民共":1261,"广西、":800,":)为":1432,":)是":1905,"),又":980,"),是":2411,"()是":1113,",),":1007,"著名的":779,"(),":1527,"年至 ":713,"面积 ":773,"于香港":916,"之间的":629,"内丁使":615,"成立于":917,"英亚:":1340,"俄罗斯":1205,",香港":708,"物。分":2814,"公司(":690,"公司,":865,"行不区":1022,"共和国":1949,"分布于":1823,"分布在":1639,"》、《":946,")是丈":621,")是一":2670,",是一":2180,",是中":1286,"湖南、":689,"あああ":1321,"尚未由":2713,"地,生":2896,",并丁":652,",常生":669,",所以":699,",又称":1386,",台湾":584,",因此":1152,",多生":1256,"(学名":3651,"。分布":2922,"、印度":793,"、乘江":625,"、云南":1182,"被称为":866,"、山坡":669,"、广东":743,"、四川":1251,"大陆的":3080,"アアア":1275,"学名:":3527,"(英文":1335,"(英亚":1020,",目前":3438,",生长":2942,"西班丁":773,",简称":1929,"工引亚":2908,"平不公":791,"贵州、":836,"广东、":761,"引亚栽":2908,",包括":1127,"米的地":2908,",又名":585,",在 ":658,",其中":943,",以及":1003,",也是":1477,",位于":2555,",一般":1217,",主要":1207,",为中":658,",中国":911,"属的植":2824,",于 ":2118,"丁使、":641,"丈丈丁":631,"计算机":592,"江西、":618,"湖不、":763,"不同的":722,"米至 ":2743,"之一。":1753,"中华民":1237,"中华人":1156,"为中国":948,"不公里":767,"丈丈,":642},"n_words":[4792118,1709982,314544],"name":"zh-cn"} -------------------------------------------------------------------------------- /langdetect/profiles/zh-tw: -------------------------------------------------------------------------------- 1 | {"freq":{"·":11773,"é":695,"區,常":664,"и":659,"о":642,"а":705," 《":2780," 。":1867," 、":1053,"あ":2229,"。":93215,"、":80530,"》":12775,"《":12789,"」":16776,"「":16978,"ア":2133,"九":2518,"也":8240,"乘":27365,"之":17356,"久":750,"主":15490,"丼":1230,"中":47219,"並":6623,"丞":85837,"丕":96789,"世":9230,"丈":143774,"三":7943,"上":15307,"下":7965,"不":69580,"一":51222,"丁":112603,"七":1691,"份":8990,"任":5556,"以":21889,"令":1163,"代":10588,"他":7458,"仙":669,"仍":884,"今":2840,"人":38053,"亡":666,"亦":2741,"交":18627,"京":3334,"五":3430,"些":2759,"亞":10406,"了":8549,"予":591,"事":19252,"二":6375,"使":59515,"低":1174,"住":1250,"位":13512,"但":4206,"作":14816,"何":16276,"佐":33991,"伯":2485,"伊":2316,"企":1149,"休":48236,"信":7776,"俄":2093,"係":1210,"保":3393,"來":9528,"供":2206," 分":709,"單":3071,"問":1448," 公":2117,"商":3624,"員":7098,"哥":1699,"品":4392,"和":23261,"周":1446,"呼":664,"命":2330,"含":1432,"名":20323,"同":8443,"吉":2089,"合":7724," 倫":1516,"各":3613,"向":3031,"域":2612,"城":5349,"執":1078,"培":3458,"基":6608," 名":633,"址":721,"坦":1100,"坡":2688,"團":4364,"土":2148,"園":2775,"圖":2763,"國":50470,"地":26714,"在":35708,"回":1107,"四":5909,"因":6124,"嚴":592," 勞":1513,"器":3287,"冰":646,"再":1221," 丞":2854," 丕":3070," 世":1592," 丈":4439," 不":917," 丁":3186,"優":845,"共":7054,"其":12215,"具":2485,"入":4421,"內":8167,"兩":4796,"全":7445,"八":1932,"六":2118,"公":17482,"兒":1569,"元":4231,"克":7487,"光":2846,"先":2097,"免":625,"傳":4645,"價":1006,"倫":51626,"個":19430,"們":2429,"停":661,"原":6791,"去":1179,"受":2509,"取":2465,"反":2477,"及":17959,"友":732,"又":5332,"參":2854,"司":7145,"台":11564,"可":7754,"只":1856," 個":2397,"口":4174,"化":7454,"包":4825," 佐":611," 位":594,"南":15370,"協":2646," 使":1198,"博":1769,"升":830,"千":1228,"十":5341,"區":18522,"半":2039,"卷":655,"印":3241,"即":2842,"卡":3151,"劃":2056,"劇":3028,"力":4955," 人":1795,"助":1406,"加":7145,"勞":44204,"務":5038,"動":10173," 休":691,"分":15261," 中":1418,"初":2396,"別":3548,"利":8617,"到":6362,"制":3819," 乘":601,"則":3204,"前":10854,"副":861,"創":4236," 大":752,"工":9304,"已":3034,"巴":5247,"州":7520,"川":3027,"山":12644,"屬":10111,"展":3787,"屋":1099,"局":2380,"居":1946,"尼":4905,"就":2895,"尚":3522,"少":2442,"對":12131,"小":25320,"導":2988,"專":3315,"將":4225,"島":4996," 多":609,"岸":1457,"岩":955,"彈":1036,"形":4081,"役":939,"影":4543,"式":7142,"引":4806,"張":2155,"強":1613," 小":712,"念":1686,"往":1561,"律":1728,"後":11603,"得":4825,"從":3819,"德":8443,"幹":610,"年":47473,"平":5943,"帝":2671,"帕":627,"希":2358,"布":6774,"常":6175,"帶":2450,"師":2700,"席":1237,"延":769,"建":8865,"廣":7680,"廠":829,"底":1070,"店":1431,"康":1164,"度":6845,"座":2605,"大":34205,"央":1335,"天":6785,"夫":2310,"太":3614,"失":939,"外":5866,"多":11013,"奧":3348,"女":3636,"好":1255,"如":3939,"始":3424,"委":2437,"場":5840,"報":2620," 和":619,"增":971,"境":2254," 在":1006,"子":9582,"存":2418,"學":28894,"安":5183,"它":4320,"定":6314,"宗":2121,"宮":11665,"客":2255,"宣":1095,"家":33259,"富":1263,"密":1751,"察":902,"寫":2568,"實":4042,"威":2163,"媒":887,"更":2088,"曲":2693,"曾":3297,"書":4421,"是":61135,"映":606,"春":891,"星":4383,"易":1898,"普":2726,"智":821,"時":14789,"晚":728,"架":926,"林":7015,"果":2193,"查":1544,"未":4199,"木":1871,"本":13516,"望":1009,"朝":3760,"期":7068,"朗":950,"月":15722,"有":25006,"服":2638,"最":10258,"會":16872,"松":993,"東":12692,"村":1739,"李":1578,"播":2488,"擊":1563,"據":2801,"·丁":953,"·丕":684,"·丈":742,"料":1908,"文":14926,"於":85512,"施":1293,"斯":13761,"旁":958,"旅":1333,"族":3698," 或":592,"日":21067,"早":2137,"放":2161,"改":3194,"收":1971,"支":2879,"教":9547,"故":1906,"數":7013,"整":1506,"括":3403,"拉":7094,"拔":3534,"技":2726,"抗":915,"投":1689,"承":1001,"批":886,"所":11497,"手":3280," 年":38814," 平":1137,"打":1356,"戰":6668,"戲":2673,"房":1031,"我":858,"成":16686,"或":10999,"提":4217,"推":2160,"控":1213,"接":3506,"排":1256,"持":2131,"指":5892,"情":1951,"息":1210,"應":3488,"感":1000,"愛":2219,"游":601,"測":1417,"港":10903,"清":2633,"湖":3859,"源":2955,"滿":823,"漢":2629,"演":3758,"澳":2170,"濟":2246,"民":10942,"水":5379,"氣":1960,"江":5858,"求":1019,"決":1728,"沒":1469,"沙":2360,"河":5480,"油":1041,"治":5045,"波":2725,"派":1934,"活":2827,"洲":5278,"流":4541,"消":1003,"深":1422,"機":7988,"樓":1698,"標":3283,"樂":5292," 是":2270," 月":13683,"武":2208,"此":5462,"止":1051,"正":4102,"歷":3537,"歲":679,"死":6673,"歌":2412,"歐":2776,"次":4908,"款":922,"權":2775,"母":1810,"比":4792,"毒":761,"殺":3118,"案":1653,"栽":2979,"根":2237,"核":1380,"校":3603,"條":3385,"楊":679,"業":8348,"植":5169,"構":3354," 日":10789,"概":795,"石":3331,"知":2143,"省":5241,"眾":1402,"皇":2166,"的":145617,"目":9288,"直":3002,"發":12643,"白":2538,"百":1845,"登":1101,"病":1342,"町":605,"甸":897,"田":1992,"由":17477,"用":13939,"產":5389,"生":19162,"甘":1400,"當":5199,"畫":2821,"略":871,"留":781,"界":6587,"環":2430,"理":9565,"球":7020,"區,一":683,"現":8423,"班":1762,"王":5328,"獎":2259,"獲":1989,"片":2524,"牌":1136,"物":13950,"特":9797,"爭":2331,"爾":10487,"營":2531,"照":1001,"然":2724,"無":3430,"灣":8336,"火":1815,"置":1766,"署":979,"羅":6361,"美":10443,"群":2244,"義":5695,"習":841,"總":5592,"縣":5350,"繼":1038,"續":1407,"索":1368,"素":1525,"納":2465,"約":5697,"紀":4111,"級":4056,"統":6648,"組":5814,"結":3076,"綠":801,"維":3676,"網":4429,"經":8789,"綜":642,"編":2471,"線":7038,"簡":4320,"米":9228,"系":6773,"等":12258,"策":935,"第":11129,"篇":721,"節":2370,"算":1983,"積":2283,"究":2927,"空":4143," 的":1486,"程":4595,"稱":17006,"種":13270,"站":6147,"立":9659,"競":659,"童":907,"突":731,"票":889,"神":3459,"區,多":1245,"社":4421,"示":1588,"移":1020,"私":736,"科":10232,"福":3334,"破":649,"要":8322,"規":2224,"視":4488,"親":1395,"觀":2300,"角":3003,"解":2370,"西":17796,"被":7902,"製":3213," 萬":1397,"衛":1641,"街":2230,"術":4294,"行":14995,"表":5436,"變":2770,"譯":3201,"警":737,"議":3043,"護":1664,"證":1490,"調":1479,"說":3940,"語":12000,"認":2740,"論":3370," 號":1554,"設":6262,"記":2497,"計":5475,"訊":1564,"言":2973,"該":4761,"話":1875,"評":1093,"路":11367,"越":2102,"超":1905,"足":2739,"起":4291,"賓":786,"資":4396,"賽":6003,"質":2688,"費":1408,"貨":906,"責":1207,"象":1864,"谷":1752,"近":3107,"辦":2818,"農":1197,"轉":1925,"較":2015,"載":1405,"車":6562,"身":2869,"致":1208,"至":9837,"自":8751,"而":9484,"者":7426,"聞":13745,"聖":2663," 米":6617,"聯":14082,"聲":1069,"肅":1122,"股":1263,"育":2954,"能":5599,"草":3974,"菲":888,"華":6888,"般":2564,"航":2033,"興":1795,"舉":3075,"與":14328,"艦":940,"艾":758,"色":3577,"花":3113,"英":9681,"藝":2269,"藥":973,"葉":1953,"著":4637,"萬":2757,"落":1486,"蒂":673,"處":4121,"號":5640,"蘭":5557,"蘇":3088," 英":759,"風":2494,"食":1389,"飛":1667,"領":2927,"預":955,"項":2370,"類":5147,"馬":6983,"香":9518,"館":2367,"體":9444,"高":8889," 阿":586," 香":621,"點":3460,"黑":1831,"黃":2289,"黨":2390,"龍":3877,"連":2837,"造":2926,"進":5235,"送":620,"這":6954,"通":7591,"速":2179,"遺":895,"選":3181,"過":6349,"運":6466,"遊":3024,"達":4169,"還":1604,"邊":2782,"郡":912,"部":14529,"都":5256,"配":1052,"醫":1928,"金":5699,"野":1096,"量":3732,"里":6615,"重":5591,"銀":1423,"錄":1696,"鎮":2372,"鐵":4592,"間":8271,"開":7281,"門":4672,"降":626,"限":2369,"院":5168,"除":1568,"陳":1215,"陸":5523,"陽":2042,"防":1167,"阿":4360,"離":1505,"難":627,"電":10735,"雲":2700,"隊":3986,"際":4571,"青":2396,"非":3371,"面":6432,"需":1196,"響":1404,"音":4823,"가":589,")":53309,"(":53597,"-":1381,",":211682,":":15729,";":5850," (":1546," )":2103," ,":4859,"車站,":720,"國的特":944,"植物。":2875,"植物,":1199,"基丁教":662,"等地,":2994,"民共和":1261,"。 ":3480,"、 ":1427,"」 ":591,"》 ":624,"國大陸":3816,"地區,":3519,"。這":1771,"、雲":1235,"」的":1264,"、福":680,"。現":601,"、甘":835,"。由":929,"、西":1055,"。該":1503,"四川、":1228,"、廣":1717,"、山":2030,"、安":622,"。它":1379,"、四":1327,"。在":1916,"、台":622,"。分":2945,"、勞":792,"、印":867,"、倫":1945,"。其":1173,"、湖":1562,"、河":1284,"、江":1104,"。此":759,"》是":802,"、日":634,"ああ":1707,"、《":1196,"、「":974,"》、":1063,"」、":966,"」。":2150,"》中":594,"「丕":727,"「丞":592,"《丈":880,"、休":613,"。他":1366,"「丈":1131,"《丕":588,"。乘":718,"、人":863,"、丞":3044,"。丁":930,"、丕":3444,"、不":1334,"、丁":2563,"、丈":4244,"、乘":1329,"。丞":586,"。丕":1165,"、中":959,"。丈":1017,"アア":1643,"地區的":687,"」(":1271,"」,":3388,"」)":800,"》(":2216,"》,":1818,"在中國":1179,"立於 ":1061,"栽培。":2896,"是香港":1720,"休 ":665,"使 ":756,"不、":2094,"不。":739,"丈」":981,"丈》":585,"丈、":2906,"丈。":2027,"丁、":2136,"丁。":1921,"一。":1804,"丞、":1947,"丞。":1850,"丕」":693,"丕。":1789,"丕、":2245,"中、":800,"人。":1182,"事。":826,"交。":660,"亞、":852,"倫 ":745,"丈山":729,"丈屬":937,"不家":1629,"丈小":1088,"丈家":1181,"丁屬":1243,"中國":13565,"丈學":945,"丈子":859,"一家":831,"丕大":655,"乘倫":1335,"丈大":605,"丕地":870,"丁大":588,"乘丁":705,"乘一":736,"佐。":1221,"乘丈":663,"佐、":761,"丈地":748,"丞和":584,"之一":4708,"丁國":1032,"丞勞":850,"丕勞":1045,"丈和":615,"不同":2051,"不勞":723,"中使":2667,"不區":1235,"中一":646,"丞使":1157,"丞倫":1278,"丈勞":1514,"丞人":604,"丈克":1050,"丈其":690,"丕使":877,"一勞":758,"丞佐":1297,"不公":954,"丁勞":1850,"並丁":839,"丕倫":1815,"丞休":1067,"丈倫":1781,"不使":1931,"休。":1287,"休、":1097,"丁克":614,"丞丈":3784,"丞不":839,"丞丁":2627,"丞丕":3712,"丕休":1238,"丞丞":2873,"丕佐":865,"不交":3733,"不京":1465,"不事":953,"丈休":1744,"一個":9748,"丈使":1114,"丈佐":1124,"丁倫":2045,"丕丞":2819,"丕丕":4439,"不休":656,"丕不":779,"丕丈":4929,"丕丁":2910,"丕乘":689,"丈不":986,"丈丈":10533,"丈丕":5787,"丈丞":4591,"丁休":2124,"丈之":678,"上不":1486,"一位":1048,"丈乘":1046,"丁佐":630,"丈事":641,"丁使":3115,"不丈":1385,"不丁":1326,"不丕":1047,"不不":1140,"不丞":1080,"一丈":689,"丁丕":3008,"丁丞":3158,"丁丈":5062,"丁丁":3771,"丁不":1124,"丁乘":842,"一些":1051,"份、":614,"丈一":1125,"丈丁":5951,"丕樂":705,"主教":1120,"任何":653,"丞業":742,"中文":1114,"人口":1812,"一次":950,"丈林":808,"一條":1013,"之後":1287,"種栽培":2894,"丕於":989,"他們":991,"事場":701,"丞於":627,"丈於":1045,"人使":893,"丁是":707,"丈是":753,"不拔":3356,"二十":705,"人丁":915,"人丈":695,"主席":690,"丁教":802,"丁斯":911,"丁於":991,"丁文":610,"一所":718,"交休":759,"乘大":1193,"事務":628,"不式":1114,"中學":1267,"交不":718,"使。":1730,"使、":1391,"丁店":631,"一座":888,"中央":1281,"丕小":677,"丕屬":955,"事交":1241,"丞家":1096,"丞寫":642,"佐勞":651,"中的":2868,"休如":836,"丈科":1482,"一種":4747,"丞的":2226,"份年":660,"使休":786,"使代":599,"使中":614,"二次":594,"使丈":1261,"使上":902,"使丁":966,"使丞":904,"使丕":680,"下的":914,"世界":3842,"上的":1685,"丈的":3553,"乘江":1087,"佐丞":598,"佐丕":890,"丁的":2951,"佐不":741,"佐丈":1040,"佐丁":1316,"不生":995,"交於":631,"丕的":2516,"不的":1824,"倫。":904,"倫、":1101,"丈爾":927,"丕然":602,"丁爾":1156,"也有":714,"不現":1191,"人工":3084,"丈球":618,"休丈":1588,"休丁":1289,"休丕":1088,"休丞":1470,"以及":5705,"不治":1653,"休倫":752,"也是":2445,"乘斯":612,"休佐":739,"休休":1180,"休使":949,"丁語":647,"交的":662,"何成":648,"丕西":1266,"但是":741,"人的":1304,"丕語":843,"主義":1789,"丁蘭":617,"事的":883,"人物":959,"企業":965,"丁西":590,"中華":3016,"一般":2431,"不聯":718,"人民":2356,"丈至":635,"丈草":589,"使團":1465,"前 ":869,"作家":983,"作品":1437,"使倫":938,"世紀":2133,"亞洲":978,"丁聯":1080,"乘的":932,"到 ":1125,"丞 ":1327,"丕 ":1248,"稱於「":1031,"丈 ":1426,"丈·":665,"丁 ":1214,"丁·":957,"地、":843,"商業":792,"國、":913,"倫,":2201,"又譯":655,"問宮":915,"單位":908,"員會":1240,"可能":966,"南部":1208,"名稱":1366,"參與":743,"各種":710,"名的":1486,"同的":917,"又稱":2281,"使(":775,"使,":3019,"位,":692,"佐,":2446,"台灣":5826,"何,":895,"作,":878,"只有":589,"南等":606,"休(":898,"休,":2604,"同時":1361,"和國":2079,"名於":1540,"在 ":4228,"命名":845,"和丕":658,"和丞":617,"區的":1565,"創辦":644,"和丈":1045,"和丁":617,"前身":709,"交,":1096,"人,":2364,"反應":687,"分類":693,"化的":604,"事,":1473,"勞立":1122,"國王":742,"城事":1733,"因此":1472,"國民":1001,"基丁":1219,"基丞":778,"因於":1311,"多生聞":623,"分,":730,"國大":4188,"四川":1895,"地區":6934,"國國":1282,"在台":591,"國家":4180," 公里":1210,"國丁":723,"國丈":683,"國不":1346,"國人":979,"內,":588,"在丕":590,"在不":766,"在丈":1169,"在丁":857,"地不":1518,"在中":1499,"培。":2897,"保護":897,"公園":1063,"全國":1075,"共和":2094,"共同":713,"公司":6083,"和 ":817,"分丁":734,"勞。":848,"勞、":713,"使蘭":928,"佐責":846,"倫爾":803,"及 ":587,"來自":904,"人類":938,"其他":1807,"公共":721,"倫用":717,"內信":886,"其中":1932,"兩個":962,"倫的":1758,"俄羅":1193,"內丁":817,"交通":965,"代表":1917,"使用":3276,"使理":2623,"來的":817,"使的":2298,"何的":820,"佐的":1260,"作用":690,"之間":1600,"倫州":1179,"作的":823,"何能":735,"倫有":1307,"九龍":820,"倫斯":769,"使究":2861,"倫何":704,"不部":1510,"倫休":862,"代的":605,"倫倫":983,"倫克":676,"使於":674,"使本":654,"位於":6989,"個人":1069,"丁近":678,"作於":1594,"丈車":636,"主要":4266,"一部":1589,"他的":940,"倫丈":1719,"倫丁":1808,"倫丞":1219,"倫丕":1126,"倫中":896,"信息":690,"丈體":712,"倫多":1308,"倫勞":1159,"休畫":658,"人聞":943,"休的":1677,"不面":1013,"及丈":747,"及中":2170,"丕(":1556,"丕,":3869,"下,":946,"上,":1460,"丈(":2498,"丈,":5085,"半島":716,"不,":1457,"一,":2375,"丁)":1026,"丁,":3944,"丁(":1688,"包括":3326,"參加":686,"員。":603,"中,":3370,"及其":661,"創立":812,"原名":722,"丞,":4229,"丞(":1428,"乘,":704,"動物":1355,"又名":874,"勞灣":831,"印度":2180,"可以":2687,"台不":1162,"勞的":1695,"協會":1036,"名使":755,"動畫":650,"合作":595,"包含":733,"南不":932,"公路":1194,"化學":839,"於香港":1347,"利用":694," 世紀":1443,"區域":760,"公里":2070,"內的":698,"倫體":1121,"前尚":2709,"分於":856,"勞丞":1027,"勞休":691,"勞任":888,"勞不":751,"勞丈":1379,"勞丁":1206,"勞丕":1033,"勞倫":1202,"勞佐":918,"名。":909,"分別":960,"利亞":1460,"傳統":899,"共有":696,"具有":1054,"區。":805,"區、":855,"南、":2848,"創作":724,"分布":3537,"全球":780,"加乘":907,"場,":657,"州事":599,"工作":1351,"巴乘":819,"學院":1919,"屬的":3350,"度、":646,"帝國":1099,"工引":2892,"平不":1295,"山谷":592,"年代":1818,"布在":1588,"山西":699,"小都":651,"實際":604,"建、":593,"小說":1488,"巴使":882,"希丕":1033,"尚未":2823,"定的":781,"州、":1207,"地,":4121,"川、":1254,"、雲南":1181,"對於":1252,"它的":613,"學的":914,"山坡":1674,"國,":702,"學生":932,"小的":1375,"山東":747,"屬於":2028,"德·":729,"定義":598,"媒體":593,"就是":946,"家的":1003," 年 ":13455,"形式":845,"形成":781,"德國":1532,"後來":987,"廣播":596,"廣東":1877,"建於":727,"年的":1359,"建立":1105,"引種":2905,"年至":809,"拔 ":3138,"廣乘":652,"外,":1036,"工業":864,"布於":1775,"建佐":1764,"年在":773,"廣州":765,"工程":1106,"已經":809,"常生":722,"廣場":674,"團體":692,"大學":4750,"國際":3079,"名:":4017,"名,":805,"大戰":639,"司,":903,"司(":707,"太平":715,"委員":1628,"多生":1260,"動,":770,"勞,":1822,"國立":626,"國的":3067,"基本":664,"境內":659,"區,":4922,"大利":1318,"大使":1011,"家 ":671,"天休":592,"天主":779,"大丈":858,"大丕":898,"學中":593,"大陸":4384,"學使":615,"安丁":874,"存在":831,"學名":3781,"家丈":660,"家丁":836,"學家":1871,"島、":590,"年 ":13925,"小丕":726,"小丈":935,"家國":1674,"專倫":637,"對係":1024,"尼亞":1180,"學校":1548,"它是":687,"家律":639,"宗教":606,"山丞":856,"大的":1820,"品,":659,"學、":1095,"未由人":2697,"員,":1301,"太空":651,"家、":1074,"家。":1302,"大部":622,"東不":1591,"是由":1946,"有對":649,"於美":890,"最大":1529,"成,":1008,"最早":863,"是美":1276,"日至":621,"最後":917,"時的":620,"朝宮":1146,"林中":937,"林下":865,"林丈":744,"林丕":654,"東南":851,"是於":666,"是日":750,"最丈":682,"教育":1764,"斯特":828,"是指":1896,"服務":1801," 日 ":1059,"時期":1763,"有一":1032,"有丈":631,"栽培":2950,"時間":1470," 月 ":10453,"期的":813,"未由":2698,"有的":742,"東省":644,"是香":1731," 年,":2603," 年)":2399,"有植":939,"有時":596,"會的":752,"於香":1366,"故事":841,"後,":2045,"教丁":618,"教休":765,"文使":808,"、湖不":767,"斯丁":773,"文化":2109,"、江西":640,"於丁":1931,"於一":1279,"於丈":3104,"於不":3971,"於丕":2015,"於丞":1568,"於中":3228,"於主":1138,"式,":713,"於「":3160,"是「":751,"是台":621,"時代":1179,"是在":1454,"東、":1223,"於是":782,"於日":1024,"早期":592,"日本":5548,"於倫":790,"、湖南":720,"名:)":3240,"於了":898,"於人":919,"斯坦":674,"文學":1074,"於台":1510,"數學":761,"於勞":777,"數據":647,"於山":668,"是位":626,"是丕":1220,"是丞":853,"是中":2852,"日在":676,"是丁":1400,"是一":8457,"是不":724,"是丈":2046,"教會":604,"是 ":1586,"推不":777," 年的":915,"或稱":642,"成立":2635,"戰爭":1266,"成的":1247,"日 ":1136,"於 ":12568,"斯·":1030,"所有":1063,"應用":957,"拉丁":1113,"成於":2501,"投資":589,"月 ":10505,"有 ":1922,"技術":1647,"年(":1135,"年)":2513,"年,":3020," 年至":781,"提不":748,"提供":1573,"控制":697,"或者":865,"宮,":689,"家(":883,"家,":2956,"學(":695,"學,":1273,"子,":930,"成。":673,"年間":605,"廣西":1228," 年在":693,"屬(":1280," 年代":1445," 平不":1026,"成員":1192,"所以":843,"成何":668,"影響":1080,"小,":1059,"委員會":1211,"流行":628,"業,":586," 日)":775," 日,":1367,"源於":682,"爾·":762,"沒有":1285,"活動":1238,"比賽":1111,"江蘇":807,"江西":972,"湖不":1117,"湖南":1294,"江、":1010,"斯(":607,"文:":2388,"機構":1217,"死對":1361,"、廣東":743,"日,":1544,"日)":813,"歷使":2356," 日至":616,"民國":1569,"歐洲":1588,"民主":899,"民共":1266,"正式":1274,"時,":1278,"河南":853,"月,":734,"、廣西":861,"會,":853,"會(":980,"民族":1066,"期,":613,"河不":810,"根據":1260,"會議":766,"最高":966,"期間":1013,"有限":1519,"標佐":1052,"植物":4908," 是一":681,"概念":627," 日在":656,"的電":999,"稱「":1043,"的西":633,"發行":1127,"的重":753,"第 ":1807,"的發":596,"的第":1621,"物,":2112,"目的":1040,"的是":1255,"的最":739,"的植":2894,"直接":612,"發現":996,"的特":1537,"積 ":785,"發生":1316,"的對":648,"的小":1527,"的家":632,"發展":2338,"的大":1229,"的國":1195,"的地":4227,"目前":4386,"的丁":3310,"的一":9461,"用的":1238,"生的":842,"產生":949,"生產":964,"球隊":888,"生物":1143,"生聞":4477,"甘肅":1049,"的名":796,"的勞":1368,"的倫":1653,"的休":1719,"的使":1916,"的佐":1046,"的作":651,"的主":1361,"的中":1197,"的丕":3244,"的丞":2994,"的丈":5094,"的不":2456,"的乘":747,"的人":1899,"的交":1021,"生於":2502,"用於":1261,"由於":1686,"理論":1080,"當時":1405,"生活":843,"生在":746,"產品":771,"的。":900,"的《":764,"的「":1273,"環境":892,"由人":2997,"用來":777,"理學":1070,"現在":1079,"王朝":657,"現代":897,"獲得":1128,"的 ":1662,"用。":731,"班丁":801,"物理":701,"特有":1087,"特別":1033,"澳門":921,"、福建":587,"物。":3616,"聯合":1423,"而成":666,"聞在":1114,"等,":840,"聞於":3447,"肅、":797,"網路":1657,"總統":750,"稱,":608,"種,":743,"美國":6254,"有限公":1342,"義大":928,"群島":661,"羅斯":1391,"聯事":701,"美洲":846,"站,":1417,"站(":605,"聯休":1628,"立,":947,"經濟":1788,"統的":590,"經營":801,"、甘肅":803,"線的":712,"至 ":5663,"自 ":699,"紀念":700,"簡稱":3399,"組成":1456,"。由於":638,"聞 ":661,"米的":3024,"結構":812,"米至":2731,"系統":2899,"統治":615,"目,":621,"結丁":586,"組佐":2037,"約丈":598,"系使":1485,"節目":923,"立於":1349,"等地":3334,"的,":1232,"天主教":743,"立的":1251,"生,":616,"用,":772,"程式":650,"科技":622,"約 ":1720,"第三":1132,"第一":3441,"第二":2120,"稱於":4215,"種栽":2894,"科丈":1047,"等。":1259,"科學":1716,"福建":1133,"社會":2251,"站。":627,"角色":606,"計算":1063,"設立":673,"西部":622,"製造":737,"認於":1497,"西班":773,"要的":1104,"被稱":1048,"計劃":875,"越南":953,"資料":957,"賽事":631,"語言":1720,"設計":1777,"號,":798,"處,":614,"譯於":650,"超過":651,"路線":849,"足球":1703,"資訊":664,"行,":1162,"醫學":647,"進行":2272,"都是":704,"過程":791,"這種":792,"部分":2342,"部份":776,"遊戲":1924,"連接":662,"通常":1653,"運動":2496,"語:":3929,"說,":690,"車站":2410,"這個":1488,"這些":923,"於日本":917,"自治":728,"自然":792,"英國":2374,"自由":877,"般生":678,"線,":642,"航空":1143,"草地":752," 米的":2958,"自丁":786,"羅馬":1149,"與丈":666," 米至":2728,"米,":753,"聯賽":916,"華民":1238,"英語":2016,"者,":759,"舉行":1293,"英文":2836,"舉辦":651,"華人":1471,"著名":1732,"蘇聯":599,"處理":911,"藝術":1217,"行。":771,"聞,":628,"行於":853,"製作":1162,"西南":747,"行的":1255,"西份":1005,"西亞":1235,"西不":1211,"行不":2204,"語 ":675,"西、":2735,"中,目":813,"是位於":586,"間,":1121,"是一種":2255,"香港":8631,"高丁":607,"體的":647,"隊,":588,"高速":865,"體育":810,"及中國":2028,"公里,":1100,"體,":795,"是一個":2235,"丈(學":765,"是中國":2143,"於台灣":1145,"選舉":733,"通過":1270,"部的":1066,"賽,":689,"路,":930,"重要":1746,"銀行":879,"鐵家":726,"的第一":601,"重要的":694,"開始":2056,"部,":1123,"除了":587,"限公":1343,"鐵路":1902,"電勞":1154,"雲南":1736,"阿拉":770,"間的":1117,"開發":1451,"陸的":3080,"里,":1236,"電影":2042,"阿爾":603,"電子":1071,"面積":1525,"非洲":681,"領域":708,"項目":726,"電視":2609,"音樂":1761,"需要":625,"馬來":712,"有植物":938,"的特有":970,"是美國":1183,"的植物":2889,"日至 ":606,"前尚未":2703,"最大的":952,"於美國":799,"加乘大":716,"是日本":699,"目前尚":2707,"的地區":3107,"生聞在":1091,"生聞於":3237,"甘肅、":794,"的一部":774,"的主要":668,"的一種":1474,"年),":737,"的一個":3094,"限公司":1342,"年( ":710,"於不拔":2919,"於中國":2310," ,是":653,"南等地":586,"),":14128,"()":3756,"(,":1173,",)":1558,":,":731,":)":4068,"由人工":2898,",東":861,",有":1487,",最":1123,",曾":1063,",是":13163,")是":10839,",於":6392,",此":660,",後":1253,",從":882,",常":1047,",故":644,")於":2446,",指":584,",所":993,",或":1331,",成":1293,")的":2203,",目":3631,",簡":2031,",第":778,",現":1338,",由":3905,",用":642,",生":3524,",當":880,",西":834,",被":860,",該":1017,",經":598,",總":744,",美":597,",而":3222,",英":973,",與":1255,"(英":2412,",香":721,",通":885,",這":1582,",「":615,",《":690,")。":3711,")、":2876,"(學":3641,",它":1379,",屬":841,",小":787,",對":650,",因":2415,",在":4525,",多":1566,",大":803,",如":966,",前":662,",分":861,",南":727,",即":1028,",勞":1219,",包":1288,")和":952,",同":897,",台":652,",可":1102,",又":2944,",原":1271,",不":2771,",丕":3583,",丞":2297,",並":3993,",中":1698,",主":1659,"(今":666,",乘":1385,",也":3255,"(丈":751,",丈":5090,",丁":3996,",一":2348,",休":1371,",但":3353,",位":2751,",佐":784,",使":1949,",交":834,",亦":1195,",人":871,",以":3497,",他":1340,",份":756,",倫":1535,",共":754,",其":3556,",全":1257,"( ":3933,") ":996,", ":7569,"- ":815,"雲南、":1086,"丁屬(":787,"華民國":1215,"廣西、":800,"中國的":1325,"中國大":3838,"之一,":2265,"英語:":1339,"不拔 ":3137,"引種栽":2892,"丈屬的":703,"人工引":2892," ),":596,"乘江、":593,"特有植":935,"以及中":1990,"一般生":678,"中華人":1156,"英文:":1510,"丕西、":804,"中華民":1237,"廣東、":761,"華人民":1156,"人民共":1261,":)於":1432,":)是":1898,"),又":978,"),是":2411,"()是":1113,",),":1004,"著名的":780,"(),":1524,"位於香":588,"年至 ":713,"面積 ":770,"倫州、":844,"之間的":628,"成立於":922,"內丁使":611,"俄羅斯":1193,",香港":708,"物。分":2798,"公司(":691,"公司,":864,"行不區":1022,"共和國":1943,"分布於":1717,"分布在":1578,"》、《":944,"」、「":864,",於中":674,")是丈":645,")是一":2670,",是一":2178,",是中":1286,"湖南、":689,"あああ":1321,"尚未由":2697,"地,生":2880,",常生":667,",於 ":2197,",所以":698,",又稱":1386,",因此":1151,",多生":1249,"聞於不":2897,"(學名":3634,"。分布":2903,"、印度":793,"、乘江":625,"、丕西":798,"、倫州":850,"、山坡":668,"被稱於":856,"、四川":1250,"大陸的":3064,"アアア":1275,"學名:":3509,"(英語":1020,"(英文":1336,"義大利":914,",目前":3420,"西班丁":773,",生聞":2926,",簡稱":1929,"平不公":818,"工引種":2892,",包括":1128,"米的地":2889,",又名":585,",在 ":642,",其中":943,",以及":1003,",也是":1481,",一般":1212,",主要":1207,",位於":2550,",並丁":649,",中國":911,"屬的植":2808,"丁使、":633,"丈丈丁":628,"江西、":618,"湖不、":763,"不同的":723,"米至 ":2729,"之一。":1752,"不公里":761,"丈丈,":616},"n_words":[4924775,1867501,309785],"name":"zh-tw"} -------------------------------------------------------------------------------- /langdetect/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mimino666/langdetect/5071871742170034557c0e6ec8d6e410f3d9652f/langdetect/tests/__init__.py -------------------------------------------------------------------------------- /langdetect/tests/test_detector.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import six 4 | 5 | from langdetect.detector_factory import DetectorFactory 6 | from langdetect.utils.lang_profile import LangProfile 7 | 8 | 9 | class DetectorTest(unittest.TestCase): 10 | TRAINING_EN = 'a a a b b c c d e' 11 | TRAINING_FR = 'a b b c c c d d d' 12 | TRAINING_JA = six.u('\u3042 \u3042 \u3042 \u3044 \u3046 \u3048 \u3048') 13 | JSON_LANG1 = '{"freq":{"A":3,"B":6,"C":3,"AB":2,"BC":1,"ABC":2,"BBC":1,"CBA":1},"n_words":[12,3,4],"name":"lang1"}' 14 | JSON_LANG2 = '{"freq":{"A":6,"B":3,"C":3,"AA":3,"AB":2,"ABC":1,"ABA":1,"CAA":1},"n_words":[12,5,3],"name":"lang2"}' 15 | 16 | def setUp(self): 17 | self.factory = DetectorFactory() 18 | 19 | profile_en = LangProfile('en') 20 | for w in self.TRAINING_EN.split(): 21 | profile_en.add(w) 22 | self.factory.add_profile(profile_en, 0, 3) 23 | 24 | profile_fr = LangProfile('fr') 25 | for w in self.TRAINING_FR.split(): 26 | profile_fr.add(w) 27 | self.factory.add_profile(profile_fr, 1, 3) 28 | 29 | profile_ja = LangProfile('ja') 30 | for w in self.TRAINING_JA.split(): 31 | profile_ja.add(w) 32 | self.factory.add_profile(profile_ja, 2, 3) 33 | 34 | def test_detector1(self): 35 | detect = self.factory.create() 36 | detect.append('a') 37 | self.assertEqual(detect.detect(), 'en') 38 | 39 | def test_detector2(self): 40 | detect = self.factory.create() 41 | detect.append('b d') 42 | self.assertEqual(detect.detect(), 'fr') 43 | 44 | def test_detector3(self): 45 | detect = self.factory.create() 46 | detect.append('d e') 47 | self.assertEqual(detect.detect(), 'en') 48 | 49 | def test_detector4(self): 50 | detect = self.factory.create() 51 | detect.append(six.u('\u3042\u3042\u3042\u3042a')) 52 | self.assertEqual(detect.detect(), 'ja') 53 | 54 | def test_lang_list(self): 55 | langlist = self.factory.get_lang_list() 56 | self.assertEqual(len(langlist), 3) 57 | self.assertEqual(langlist[0], 'en') 58 | self.assertEqual(langlist[1], 'fr') 59 | self.assertEqual(langlist[2], 'ja') 60 | 61 | def test_factory_from_json_string(self): 62 | self.factory.clear() 63 | profiles = [self.JSON_LANG1, self.JSON_LANG2] 64 | self.factory.load_json_profile(profiles) 65 | langlist = self.factory.get_lang_list() 66 | self.assertEqual(len(langlist), 2) 67 | self.assertEqual(langlist[0], 'lang1') 68 | self.assertEqual(langlist[1], 'lang2') 69 | -------------------------------------------------------------------------------- /langdetect/tests/test_language.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from langdetect.language import Language 4 | 5 | 6 | class LanguageTest(unittest.TestCase): 7 | def test_language(self): 8 | lang = Language(None, 0) 9 | self.assertIsNone(lang.lang) 10 | self.assertEqual(lang.prob, 0.0, 0.0001) 11 | self.assertEqual(str(lang), '') 12 | 13 | lang2 = Language('en', 1.0) 14 | self.assertEqual(lang2.lang, 'en') 15 | self.assertEqual(lang2.prob, 1.0, 0.0001) 16 | self.assertEqual(str(lang2), 'en:1.0') 17 | 18 | def test_cmp(self): 19 | lang1 = Language('a', 0.1) 20 | lang2 = Language('b', 0.5) 21 | 22 | self.assertTrue(lang1 < lang2) 23 | self.assertFalse(lang1 == lang2) 24 | self.assertFalse(lang1 > lang1) 25 | -------------------------------------------------------------------------------- /langdetect/tests/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mimino666/langdetect/5071871742170034557c0e6ec8d6e410f3d9652f/langdetect/tests/utils/__init__.py -------------------------------------------------------------------------------- /langdetect/tests/utils/test_lang_profile.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import six 4 | from six.moves import xrange 5 | 6 | from langdetect.utils.lang_profile import LangProfile 7 | 8 | 9 | class LangProfileText(unittest.TestCase): 10 | def test_lang_profile(self): 11 | profile = LangProfile() 12 | self.assertIsNone(profile.name) 13 | 14 | def test_lang_profile_string_int(self): 15 | profile = LangProfile('en') 16 | self.assertEqual(profile.name, 'en') 17 | 18 | def test_add(self): 19 | profile = LangProfile('en') 20 | profile.add('a') 21 | self.assertEqual(profile.freq.get('a'), 1) 22 | profile.add('a') 23 | self.assertEqual(profile.freq.get('a'), 2) 24 | profile.omit_less_freq() 25 | 26 | def test_add_illegally1(self): 27 | profile = LangProfile() 28 | profile.add('a') # ignore 29 | self.assertIsNone(profile.freq.get('a')) # ignored 30 | 31 | def test_add_illegally2(self): 32 | profile = LangProfile('en') 33 | profile.add('a') 34 | profile.add('') # Illegal (string's length of parameter must be between 1 and 3) but ignore 35 | profile.add('abcd') # as well 36 | self.assertEqual(profile.freq.get('a'), 1) 37 | self.assertIsNone(profile.freq.get('')) # ignored 38 | self.assertIsNone(profile.freq.get('abcd')) # ignored 39 | 40 | def test_omit_less_freq(self): 41 | profile = LangProfile('en') 42 | grams = six.u('a b c \u3042 \u3044 \u3046 \u3048 \u304a \u304b \u304c \u304d \u304e \u304f').split() 43 | for i in xrange(5): 44 | for g in grams: 45 | profile.add(g) 46 | profile.add(six.u('\u3050')) 47 | 48 | self.assertEqual(profile.freq.get('a'), 5) 49 | self.assertEqual(profile.freq.get(six.u('\u3042')), 5) 50 | self.assertEqual(profile.freq.get(six.u('\u3050')), 1) 51 | profile.omit_less_freq() 52 | self.assertIsNone(profile.freq.get('a')) # omitted 53 | self.assertEqual(profile.freq.get(six.u('\u3042')), 5) 54 | self.assertIsNone(profile.freq.get(six.u('\u3050'))) # omitted 55 | 56 | def test_omit_less_freq_illegally(self): 57 | profile = LangProfile() 58 | profile.omit_less_freq() # ignore 59 | -------------------------------------------------------------------------------- /langdetect/tests/utils/test_ngram.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import six 4 | 5 | from langdetect.utils.ngram import NGram 6 | 7 | 8 | class NGramTest(unittest.TestCase): 9 | def test_constants(self): 10 | self.assertEqual(NGram.N_GRAM, 3) 11 | 12 | def test_normalize_with_latin(self): 13 | self.assertEqual(NGram.normalize(six.u('\u0000')), ' ') 14 | self.assertEqual(NGram.normalize(six.u('\u0009')), ' ') 15 | self.assertEqual(NGram.normalize(six.u('\u0020')), ' ') 16 | self.assertEqual(NGram.normalize(six.u('\u0030')), ' ') 17 | self.assertEqual(NGram.normalize(six.u('\u0040')), ' ') 18 | self.assertEqual(NGram.normalize(six.u('\u0041')), six.u('\u0041')) 19 | self.assertEqual(NGram.normalize(six.u('\u005a')), six.u('\u005a')) 20 | self.assertEqual(NGram.normalize(six.u('\u005b')), ' ') 21 | self.assertEqual(NGram.normalize(six.u('\u0060')), ' ') 22 | self.assertEqual(NGram.normalize(six.u('\u0061')), six.u('\u0061')) 23 | self.assertEqual(NGram.normalize(six.u('\u007a')), six.u('\u007a')) 24 | self.assertEqual(NGram.normalize(six.u('\u007b')), ' ') 25 | self.assertEqual(NGram.normalize(six.u('\u007f')), ' ') 26 | self.assertEqual(NGram.normalize(six.u('\u0080')), six.u('\u0080')) 27 | self.assertEqual(NGram.normalize(six.u('\u00a0')), ' ') 28 | self.assertEqual(NGram.normalize(six.u('\u00a1')), six.u('\u00a1')) 29 | 30 | def test_normalize_with_cjk_kanji(self): 31 | self.assertEqual(NGram.normalize(six.u('\u4E00')), six.u('\u4E00')) 32 | self.assertEqual(NGram.normalize(six.u('\u4E01')), six.u('\u4E01')) 33 | self.assertEqual(NGram.normalize(six.u('\u4E02')), six.u('\u4E02')) 34 | self.assertEqual(NGram.normalize(six.u('\u4E03')), six.u('\u4E01')) 35 | self.assertEqual(NGram.normalize(six.u('\u4E04')), six.u('\u4E04')) 36 | self.assertEqual(NGram.normalize(six.u('\u4E05')), six.u('\u4E05')) 37 | self.assertEqual(NGram.normalize(six.u('\u4E06')), six.u('\u4E06')) 38 | self.assertEqual(NGram.normalize(six.u('\u4E07')), six.u('\u4E07')) 39 | self.assertEqual(NGram.normalize(six.u('\u4E08')), six.u('\u4E08')) 40 | self.assertEqual(NGram.normalize(six.u('\u4E09')), six.u('\u4E09')) 41 | self.assertEqual(NGram.normalize(six.u('\u4E10')), six.u('\u4E10')) 42 | self.assertEqual(NGram.normalize(six.u('\u4E11')), six.u('\u4E11')) 43 | self.assertEqual(NGram.normalize(six.u('\u4E12')), six.u('\u4E12')) 44 | self.assertEqual(NGram.normalize(six.u('\u4E13')), six.u('\u4E13')) 45 | self.assertEqual(NGram.normalize(six.u('\u4E14')), six.u('\u4E14')) 46 | self.assertEqual(NGram.normalize(six.u('\u4E15')), six.u('\u4E15')) 47 | self.assertEqual(NGram.normalize(six.u('\u4E1e')), six.u('\u4E1e')) 48 | self.assertEqual(NGram.normalize(six.u('\u4E1f')), six.u('\u4E1f')) 49 | self.assertEqual(NGram.normalize(six.u('\u4E20')), six.u('\u4E20')) 50 | self.assertEqual(NGram.normalize(six.u('\u4E21')), six.u('\u4E21')) 51 | self.assertEqual(NGram.normalize(six.u('\u4E22')), six.u('\u4E22')) 52 | self.assertEqual(NGram.normalize(six.u('\u4E23')), six.u('\u4E23')) 53 | self.assertEqual(NGram.normalize(six.u('\u4E24')), six.u('\u4E13')) 54 | self.assertEqual(NGram.normalize(six.u('\u4E25')), six.u('\u4E13')) 55 | self.assertEqual(NGram.normalize(six.u('\u4E30')), six.u('\u4E30')) 56 | 57 | def test_normalize_for_romanian(self): 58 | self.assertEqual(NGram.normalize(six.u('\u015f')), six.u('\u015f')) 59 | self.assertEqual(NGram.normalize(six.u('\u0163')), six.u('\u0163')) 60 | self.assertEqual(NGram.normalize(six.u('\u0219')), six.u('\u015f')) 61 | self.assertEqual(NGram.normalize(six.u('\u021b')), six.u('\u0163')) 62 | 63 | def test_ngram(self): 64 | ngram = NGram() 65 | self.assertIsNone(ngram.get(0)) 66 | self.assertIsNone(ngram.get(1)) 67 | self.assertIsNone(ngram.get(2)) 68 | self.assertIsNone(ngram.get(3)) 69 | self.assertIsNone(ngram.get(4)) 70 | ngram.add_char(' ') 71 | self.assertIsNone(ngram.get(1)) 72 | self.assertIsNone(ngram.get(2)) 73 | self.assertIsNone(ngram.get(3)) 74 | ngram.add_char('A') 75 | self.assertEqual(ngram.get(1), 'A') 76 | self.assertEqual(ngram.get(2), ' A') 77 | self.assertIsNone(ngram.get(3)) 78 | ngram.add_char(six.u('\u06cc')) 79 | self.assertEqual(ngram.get(1), six.u('\u064a')) 80 | self.assertEqual(ngram.get(2), six.u('A\u064a')) 81 | self.assertEqual(ngram.get(3), six.u(' A\u064a')) 82 | ngram.add_char(six.u('\u1ea0')) 83 | self.assertEqual(ngram.get(1), six.u('\u1ec3')) 84 | self.assertEqual(ngram.get(2), six.u('\u064a\u1ec3')) 85 | self.assertEqual(ngram.get(3), six.u('A\u064a\u1ec3')) 86 | ngram.add_char(six.u('\u3044')) 87 | self.assertEqual(ngram.get(1), six.u('\u3042')) 88 | self.assertEqual(ngram.get(2), six.u('\u1ec3\u3042')) 89 | self.assertEqual(ngram.get(3), six.u('\u064a\u1ec3\u3042')) 90 | 91 | ngram.add_char(six.u('\u30a4')) 92 | self.assertEqual(ngram.get(1), six.u('\u30a2')) 93 | self.assertEqual(ngram.get(2), six.u('\u3042\u30a2')) 94 | self.assertEqual(ngram.get(3), six.u('\u1ec3\u3042\u30a2')) 95 | ngram.add_char(six.u('\u3106')) 96 | self.assertEqual(ngram.get(1), six.u('\u3105')) 97 | self.assertEqual(ngram.get(2), six.u('\u30a2\u3105')) 98 | self.assertEqual(ngram.get(3), six.u('\u3042\u30a2\u3105')) 99 | ngram.add_char(six.u('\uac01')) 100 | self.assertEqual(ngram.get(1), six.u('\uac00')) 101 | self.assertEqual(ngram.get(2), six.u('\u3105\uac00')) 102 | self.assertEqual(ngram.get(3), six.u('\u30a2\u3105\uac00')) 103 | ngram.add_char(six.u('\u2010')) 104 | self.assertIsNone(ngram.get(1)) 105 | self.assertEqual(ngram.get(2), six.u('\uac00 ')) 106 | self.assertEqual(ngram.get(3), six.u('\u3105\uac00 ')) 107 | 108 | ngram.add_char('a') 109 | self.assertEqual(ngram.get(1), 'a') 110 | self.assertEqual(ngram.get(2), ' a') 111 | self.assertIsNone(ngram.get(3)) 112 | 113 | def test_ngram3(self): 114 | ngram = NGram() 115 | 116 | ngram.add_char('A') 117 | self.assertEqual(ngram.get(1), 'A') 118 | self.assertEqual(ngram.get(2), ' A') 119 | self.assertIsNone(ngram.get(3)) 120 | 121 | ngram.add_char('1') 122 | self.assertIsNone(ngram.get(1)) 123 | self.assertEqual(ngram.get(2), 'A ') 124 | self.assertEqual(ngram.get(3), ' A ') 125 | 126 | ngram.add_char('B') 127 | self.assertEqual(ngram.get(1), 'B') 128 | self.assertEqual(ngram.get(2), ' B') 129 | self.assertIsNone(ngram.get(3)) 130 | 131 | def test_normalize_vietnamese(self): 132 | self.assertEqual(NGram.normalize_vi(six.u('')), '') 133 | self.assertEqual(NGram.normalize_vi(six.u('ABC')), 'ABC') 134 | self.assertEqual(NGram.normalize_vi(six.u('012')), '012') 135 | self.assertEqual(NGram.normalize_vi(six.u('\u00c0')), six.u('\u00c0')) 136 | 137 | self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0300')), six.u('\u00C0')) 138 | self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0300')), six.u('\u00C8')) 139 | self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0300')), six.u('\u00CC')) 140 | self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0300')), six.u('\u00D2')) 141 | self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0300')), six.u('\u00D9')) 142 | self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0300')), six.u('\u1EF2')) 143 | self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0300')), six.u('\u00E0')) 144 | self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0300')), six.u('\u00E8')) 145 | self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0300')), six.u('\u00EC')) 146 | self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0300')), six.u('\u00F2')) 147 | self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0300')), six.u('\u00F9')) 148 | self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0300')), six.u('\u1EF3')) 149 | self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0300')), six.u('\u1EA6')) 150 | self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0300')), six.u('\u1EC0')) 151 | self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0300')), six.u('\u1ED2')) 152 | self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0300')), six.u('\u1EA7')) 153 | self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0300')), six.u('\u1EC1')) 154 | self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0300')), six.u('\u1ED3')) 155 | self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0300')), six.u('\u1EB0')) 156 | self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0300')), six.u('\u1EB1')) 157 | self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0300')), six.u('\u1EDC')) 158 | self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0300')), six.u('\u1EDD')) 159 | self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0300')), six.u('\u1EEA')) 160 | self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0300')), six.u('\u1EEB')) 161 | 162 | self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0301')), six.u('\u00C1')) 163 | self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0301')), six.u('\u00C9')) 164 | self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0301')), six.u('\u00CD')) 165 | self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0301')), six.u('\u00D3')) 166 | self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0301')), six.u('\u00DA')) 167 | self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0301')), six.u('\u00DD')) 168 | self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0301')), six.u('\u00E1')) 169 | self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0301')), six.u('\u00E9')) 170 | self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0301')), six.u('\u00ED')) 171 | self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0301')), six.u('\u00F3')) 172 | self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0301')), six.u('\u00FA')) 173 | self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0301')), six.u('\u00FD')) 174 | self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0301')), six.u('\u1EA4')) 175 | self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0301')), six.u('\u1EBE')) 176 | self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0301')), six.u('\u1ED0')) 177 | self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0301')), six.u('\u1EA5')) 178 | self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0301')), six.u('\u1EBF')) 179 | self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0301')), six.u('\u1ED1')) 180 | self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0301')), six.u('\u1EAE')) 181 | self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0301')), six.u('\u1EAF')) 182 | self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0301')), six.u('\u1EDA')) 183 | self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0301')), six.u('\u1EDB')) 184 | self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0301')), six.u('\u1EE8')) 185 | self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0301')), six.u('\u1EE9')) 186 | 187 | self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0303')), six.u('\u00C3')) 188 | self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0303')), six.u('\u1EBC')) 189 | self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0303')), six.u('\u0128')) 190 | self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0303')), six.u('\u00D5')) 191 | self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0303')), six.u('\u0168')) 192 | self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0303')), six.u('\u1EF8')) 193 | self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0303')), six.u('\u00E3')) 194 | self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0303')), six.u('\u1EBD')) 195 | self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0303')), six.u('\u0129')) 196 | self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0303')), six.u('\u00F5')) 197 | self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0303')), six.u('\u0169')) 198 | self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0303')), six.u('\u1EF9')) 199 | self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0303')), six.u('\u1EAA')) 200 | self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0303')), six.u('\u1EC4')) 201 | self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0303')), six.u('\u1ED6')) 202 | self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0303')), six.u('\u1EAB')) 203 | self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0303')), six.u('\u1EC5')) 204 | self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0303')), six.u('\u1ED7')) 205 | self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0303')), six.u('\u1EB4')) 206 | self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0303')), six.u('\u1EB5')) 207 | self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0303')), six.u('\u1EE0')) 208 | self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0303')), six.u('\u1EE1')) 209 | self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0303')), six.u('\u1EEE')) 210 | self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0303')), six.u('\u1EEF')) 211 | 212 | self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0309')), six.u('\u1EA2')) 213 | self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0309')), six.u('\u1EBA')) 214 | self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0309')), six.u('\u1EC8')) 215 | self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0309')), six.u('\u1ECE')) 216 | self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0309')), six.u('\u1EE6')) 217 | self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0309')), six.u('\u1EF6')) 218 | self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0309')), six.u('\u1EA3')) 219 | self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0309')), six.u('\u1EBB')) 220 | self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0309')), six.u('\u1EC9')) 221 | self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0309')), six.u('\u1ECF')) 222 | self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0309')), six.u('\u1EE7')) 223 | self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0309')), six.u('\u1EF7')) 224 | self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0309')), six.u('\u1EA8')) 225 | self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0309')), six.u('\u1EC2')) 226 | self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0309')), six.u('\u1ED4')) 227 | self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0309')), six.u('\u1EA9')) 228 | self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0309')), six.u('\u1EC3')) 229 | self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0309')), six.u('\u1ED5')) 230 | self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0309')), six.u('\u1EB2')) 231 | self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0309')), six.u('\u1EB3')) 232 | self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0309')), six.u('\u1EDE')) 233 | self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0309')), six.u('\u1EDF')) 234 | self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0309')), six.u('\u1EEC')) 235 | self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0309')), six.u('\u1EED')) 236 | 237 | self.assertEqual(NGram.normalize_vi(six.u('\u0041\u0323')), six.u('\u1EA0')) 238 | self.assertEqual(NGram.normalize_vi(six.u('\u0045\u0323')), six.u('\u1EB8')) 239 | self.assertEqual(NGram.normalize_vi(six.u('\u0049\u0323')), six.u('\u1ECA')) 240 | self.assertEqual(NGram.normalize_vi(six.u('\u004F\u0323')), six.u('\u1ECC')) 241 | self.assertEqual(NGram.normalize_vi(six.u('\u0055\u0323')), six.u('\u1EE4')) 242 | self.assertEqual(NGram.normalize_vi(six.u('\u0059\u0323')), six.u('\u1EF4')) 243 | self.assertEqual(NGram.normalize_vi(six.u('\u0061\u0323')), six.u('\u1EA1')) 244 | self.assertEqual(NGram.normalize_vi(six.u('\u0065\u0323')), six.u('\u1EB9')) 245 | self.assertEqual(NGram.normalize_vi(six.u('\u0069\u0323')), six.u('\u1ECB')) 246 | self.assertEqual(NGram.normalize_vi(six.u('\u006F\u0323')), six.u('\u1ECD')) 247 | self.assertEqual(NGram.normalize_vi(six.u('\u0075\u0323')), six.u('\u1EE5')) 248 | self.assertEqual(NGram.normalize_vi(six.u('\u0079\u0323')), six.u('\u1EF5')) 249 | self.assertEqual(NGram.normalize_vi(six.u('\u00C2\u0323')), six.u('\u1EAC')) 250 | self.assertEqual(NGram.normalize_vi(six.u('\u00CA\u0323')), six.u('\u1EC6')) 251 | self.assertEqual(NGram.normalize_vi(six.u('\u00D4\u0323')), six.u('\u1ED8')) 252 | self.assertEqual(NGram.normalize_vi(six.u('\u00E2\u0323')), six.u('\u1EAD')) 253 | self.assertEqual(NGram.normalize_vi(six.u('\u00EA\u0323')), six.u('\u1EC7')) 254 | self.assertEqual(NGram.normalize_vi(six.u('\u00F4\u0323')), six.u('\u1ED9')) 255 | self.assertEqual(NGram.normalize_vi(six.u('\u0102\u0323')), six.u('\u1EB6')) 256 | self.assertEqual(NGram.normalize_vi(six.u('\u0103\u0323')), six.u('\u1EB7')) 257 | self.assertEqual(NGram.normalize_vi(six.u('\u01A0\u0323')), six.u('\u1EE2')) 258 | self.assertEqual(NGram.normalize_vi(six.u('\u01A1\u0323')), six.u('\u1EE3')) 259 | self.assertEqual(NGram.normalize_vi(six.u('\u01AF\u0323')), six.u('\u1EF0')) 260 | self.assertEqual(NGram.normalize_vi(six.u('\u01B0\u0323')), six.u('\u1EF1')) 261 | -------------------------------------------------------------------------------- /langdetect/tests/utils/test_unicode_block.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | import six 4 | 5 | from langdetect.utils import unicode_block 6 | 7 | 8 | class UnicodeBlockTest(unittest.TestCase): 9 | def test_unicode_block(self): 10 | self.assertEqual(unicode_block.unicode_block(six.u('\u0065')), unicode_block.UNICODE_BASIC_LATIN) 11 | self.assertEqual(unicode_block.unicode_block(six.u('\u007F')), unicode_block.UNICODE_BASIC_LATIN) 12 | self.assertEqual(unicode_block.unicode_block(six.u('\u0080')), unicode_block.UNICODE_LATIN_1_SUPPLEMENT) 13 | self.assertEqual(unicode_block.unicode_block(six.u('\u21FF')), unicode_block.UNICODE_ARROWS) 14 | self.assertEqual(unicode_block.unicode_block(six.u('\u2200')), unicode_block.UNICODE_MATHEMATICAL_OPERATORS) 15 | self.assertEqual(unicode_block.unicode_block(six.u('\u2201')), unicode_block.UNICODE_MATHEMATICAL_OPERATORS) 16 | self.assertEqual(unicode_block.unicode_block(six.u('\u22FF')), unicode_block.UNICODE_MATHEMATICAL_OPERATORS) 17 | self.assertEqual(unicode_block.unicode_block(six.u('\u2300')), unicode_block.UNICODE_MISCELLANEOUS_TECHNICAL) 18 | # test only on wide builds (i.e. Python 3) 19 | if len(six.u('\U0010FFFF')) == 1: 20 | self.assertEqual(unicode_block.unicode_block(six.u('\U000F0000')), unicode_block.UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A) 21 | self.assertEqual(unicode_block.unicode_block(six.u('\U000FFFFF')), unicode_block.UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A) 22 | self.assertEqual(unicode_block.unicode_block(six.u('\U00100000')), unicode_block.UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B) 23 | self.assertEqual(unicode_block.unicode_block(six.u('\U0010FFFF')), unicode_block.UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B) 24 | -------------------------------------------------------------------------------- /langdetect/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Mimino666/langdetect/5071871742170034557c0e6ec8d6e410f3d9652f/langdetect/utils/__init__.py -------------------------------------------------------------------------------- /langdetect/utils/lang_profile.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | import re 3 | 4 | import six 5 | from six.moves import xrange 6 | 7 | from .ngram import NGram 8 | 9 | 10 | class LangProfile(object): 11 | MINIMUM_FREQ = 2 12 | LESS_FREQ_RATIO = 100000 13 | 14 | ROMAN_CHAR_RE = re.compile(r'^[A-Za-z]$') 15 | ROMAN_SUBSTR_RE = re.compile(r'.*[A-Za-z].*') 16 | 17 | def __init__(self, name=None, freq=None, n_words=None): 18 | self.freq = defaultdict(int) 19 | if freq is not None: 20 | self.freq.update(freq) 21 | 22 | if n_words is None: 23 | n_words = [0] * NGram.N_GRAM 24 | 25 | self.name = name 26 | self.n_words = n_words 27 | 28 | def add(self, gram): 29 | '''Add n-gram to profile.''' 30 | if self.name is None or gram is None: # Illegal 31 | return 32 | length = len(gram) 33 | if length < 1 or length > NGram.N_GRAM: # Illegal 34 | return 35 | self.n_words[length - 1] += 1 36 | self.freq[gram] += 1 37 | 38 | def omit_less_freq(self): 39 | '''Eliminate below less frequency n-grams and noise Latin alphabets.''' 40 | if self.name is None: # Illegal 41 | return 42 | threshold = max(self.n_words[0] // self.LESS_FREQ_RATIO, self.MINIMUM_FREQ) 43 | 44 | roman = 0 45 | for key, count in list(six.iteritems(self.freq)): 46 | if count <= threshold: 47 | self.n_words[len(key)-1] -= count 48 | del self.freq[key] 49 | elif self.ROMAN_CHAR_RE.match(key): 50 | roman += count 51 | 52 | # roman check 53 | if roman < self.n_words[0] // 3: 54 | for key, count in list(six.iteritems(self.freq)): 55 | if self.ROMAN_SUBSTR_RE.match(key): 56 | self.n_words[len(key)-1] -= count 57 | del self.freq[key] 58 | 59 | def update(self, text): 60 | '''Update the language profile with (fragmented) text. 61 | Extract n-grams from text and add their frequency into the profile. 62 | ''' 63 | if text is None: 64 | return 65 | text = NGram.normalize_vi(text) 66 | gram = NGram() 67 | for ch in text: 68 | gram.add_char(ch) 69 | for n in xrange(1, NGram.N_GRAM+1): 70 | self.add(gram.get(n)) 71 | -------------------------------------------------------------------------------- /langdetect/utils/messages.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | 4 | class Messages(object): 5 | MESSAGES_FILENAME = path.join(path.dirname(__file__), 'messages.properties') 6 | 7 | def __init__(self): 8 | self.messages = {} 9 | with open(self.MESSAGES_FILENAME, 'r') as f: 10 | for line in f: 11 | key, _, value = line.strip().partition('=') 12 | self.messages[key] = value.encode().decode('unicode_escape') 13 | 14 | def get_string(self, key): 15 | return self.messages.get(key, '!%s!' % key) 16 | 17 | 18 | _messages = None 19 | def get_string(key): 20 | global _messages 21 | if _messages is None: 22 | _messages = Messages() 23 | return _messages.get_string(key) 24 | -------------------------------------------------------------------------------- /langdetect/utils/ngram.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | import six 4 | 5 | from . import messages 6 | from .unicode_block import ( 7 | unicode_block, 8 | UNICODE_BASIC_LATIN, 9 | UNICODE_LATIN_1_SUPPLEMENT, 10 | UNICODE_LATIN_EXTENDED_B, 11 | UNICODE_GENERAL_PUNCTUATION, 12 | UNICODE_ARABIC, 13 | UNICODE_LATIN_EXTENDED_ADDITIONAL, 14 | UNICODE_HIRAGANA, 15 | UNICODE_KATAKANA, 16 | UNICODE_BOPOMOFO, 17 | UNICODE_BOPOMOFO_EXTENDED, 18 | UNICODE_CJK_UNIFIED_IDEOGRAPHS, 19 | UNICODE_HANGUL_SYLLABLES, 20 | ) 21 | 22 | 23 | class NGram(object): 24 | LATIN1_EXCLUDED = messages.get_string('NGram.LATIN1_EXCLUDE') 25 | N_GRAM = 3 26 | 27 | def __init__(self): 28 | self.grams = ' ' 29 | self.capitalword = False 30 | 31 | def add_char(self, ch): 32 | '''Append a character into ngram buffer.''' 33 | ch = self.normalize(ch) 34 | last_char = self.grams[-1] 35 | if last_char == ' ': 36 | self.grams = ' ' 37 | self.capitalword = False 38 | if ch == ' ': 39 | return 40 | elif len(self.grams) >= self.N_GRAM: 41 | self.grams = self.grams[1:] 42 | self.grams += ch 43 | 44 | if ch.isupper(): 45 | if last_char.isupper(): 46 | self.capitalword = True 47 | else: 48 | self.capitalword = False 49 | 50 | def get(self, n): 51 | '''Get n-gram.''' 52 | if self.capitalword: 53 | return 54 | if n < 1 or n > self.N_GRAM or len(self.grams) < n: 55 | return 56 | if n == 1: 57 | ch = self.grams[-1] 58 | if ch == ' ': 59 | return 60 | return ch 61 | else: 62 | return self.grams[-n:] 63 | 64 | @classmethod 65 | def normalize(cls, ch): 66 | block = unicode_block(ch) 67 | if block == UNICODE_BASIC_LATIN: 68 | if ch < 'A' or ('Z' < ch < 'a') or 'z' < ch: 69 | ch = ' ' 70 | elif block == UNICODE_LATIN_1_SUPPLEMENT: 71 | if cls.LATIN1_EXCLUDED.find(ch) >= 0: 72 | ch = ' ' 73 | elif block == UNICODE_LATIN_EXTENDED_B: 74 | # normalization for Romanian 75 | if ch == six.u('\u0219'): # Small S with comma below => with cedilla 76 | ch = six.u('\u015f') 77 | if ch == six.u('\u021b'): # Small T with comma below => with cedilla 78 | ch = six.u('\u0163') 79 | elif block == UNICODE_GENERAL_PUNCTUATION: 80 | ch = ' ' 81 | elif block == UNICODE_ARABIC: 82 | if ch == six.u('\u06cc'): 83 | ch = six.u('\u064a') # Farsi yeh => Arabic yeh 84 | elif block == UNICODE_LATIN_EXTENDED_ADDITIONAL: 85 | if ch >= six.u('\u1ea0'): 86 | ch = six.u('\u1ec3') 87 | elif block == UNICODE_HIRAGANA: 88 | ch = six.u('\u3042') 89 | elif block == UNICODE_KATAKANA: 90 | ch = six.u('\u30a2') 91 | elif block in (UNICODE_BOPOMOFO, UNICODE_BOPOMOFO_EXTENDED): 92 | ch = six.u('\u3105') 93 | elif block == UNICODE_CJK_UNIFIED_IDEOGRAPHS: 94 | ch = cls.CJK_MAP.get(ch, ch) 95 | elif block == UNICODE_HANGUL_SYLLABLES: 96 | ch = six.u('\uac00') 97 | return ch 98 | 99 | @classmethod 100 | def normalize_vi(cls, text): 101 | '''Normalizer for Vietnamese. 102 | Normalize Alphabet + Diacritical Mark(U+03xx) into U+1Exx. 103 | ''' 104 | def repl(m): 105 | alphabet = cls.TO_NORMALIZE_VI_CHARS.find(m.group(1)) 106 | dmark = cls.DMARK_CLASS.find(m.group(2)) # Diacritical Mark 107 | return cls.NORMALIZED_VI_CHARS[dmark][alphabet] 108 | return cls.ALPHABET_WITH_DMARK.sub(repl, text) 109 | 110 | NORMALIZED_VI_CHARS = [ 111 | messages.get_string('NORMALIZED_VI_CHARS_0300'), 112 | messages.get_string('NORMALIZED_VI_CHARS_0301'), 113 | messages.get_string('NORMALIZED_VI_CHARS_0303'), 114 | messages.get_string('NORMALIZED_VI_CHARS_0309'), 115 | messages.get_string('NORMALIZED_VI_CHARS_0323')] 116 | TO_NORMALIZE_VI_CHARS = messages.get_string('TO_NORMALIZE_VI_CHARS') 117 | DMARK_CLASS = messages.get_string('DMARK_CLASS') 118 | ALPHABET_WITH_DMARK = re.compile( 119 | '([' + TO_NORMALIZE_VI_CHARS + '])([' + DMARK_CLASS + '])', 120 | re.UNICODE) 121 | 122 | # CJK Kanji Normalization Mapping 123 | CJK_CLASS = [ 124 | messages.get_string('NGram.KANJI_1_0'), 125 | messages.get_string('NGram.KANJI_1_2'), 126 | messages.get_string('NGram.KANJI_1_4'), 127 | messages.get_string('NGram.KANJI_1_8'), 128 | messages.get_string('NGram.KANJI_1_11'), 129 | messages.get_string('NGram.KANJI_1_12'), 130 | messages.get_string('NGram.KANJI_1_13'), 131 | messages.get_string('NGram.KANJI_1_14'), 132 | messages.get_string('NGram.KANJI_1_16'), 133 | messages.get_string('NGram.KANJI_1_18'), 134 | messages.get_string('NGram.KANJI_1_22'), 135 | messages.get_string('NGram.KANJI_1_27'), 136 | messages.get_string('NGram.KANJI_1_29'), 137 | messages.get_string('NGram.KANJI_1_31'), 138 | messages.get_string('NGram.KANJI_1_35'), 139 | messages.get_string('NGram.KANJI_2_0'), 140 | messages.get_string('NGram.KANJI_2_1'), 141 | messages.get_string('NGram.KANJI_2_4'), 142 | messages.get_string('NGram.KANJI_2_9'), 143 | messages.get_string('NGram.KANJI_2_10'), 144 | messages.get_string('NGram.KANJI_2_11'), 145 | messages.get_string('NGram.KANJI_2_12'), 146 | messages.get_string('NGram.KANJI_2_13'), 147 | messages.get_string('NGram.KANJI_2_15'), 148 | messages.get_string('NGram.KANJI_2_16'), 149 | messages.get_string('NGram.KANJI_2_18'), 150 | messages.get_string('NGram.KANJI_2_21'), 151 | messages.get_string('NGram.KANJI_2_22'), 152 | messages.get_string('NGram.KANJI_2_23'), 153 | messages.get_string('NGram.KANJI_2_28'), 154 | messages.get_string('NGram.KANJI_2_29'), 155 | messages.get_string('NGram.KANJI_2_30'), 156 | messages.get_string('NGram.KANJI_2_31'), 157 | messages.get_string('NGram.KANJI_2_32'), 158 | messages.get_string('NGram.KANJI_2_35'), 159 | messages.get_string('NGram.KANJI_2_36'), 160 | messages.get_string('NGram.KANJI_2_37'), 161 | messages.get_string('NGram.KANJI_2_38'), 162 | messages.get_string('NGram.KANJI_3_1'), 163 | messages.get_string('NGram.KANJI_3_2'), 164 | messages.get_string('NGram.KANJI_3_3'), 165 | messages.get_string('NGram.KANJI_3_4'), 166 | messages.get_string('NGram.KANJI_3_5'), 167 | messages.get_string('NGram.KANJI_3_8'), 168 | messages.get_string('NGram.KANJI_3_9'), 169 | messages.get_string('NGram.KANJI_3_11'), 170 | messages.get_string('NGram.KANJI_3_12'), 171 | messages.get_string('NGram.KANJI_3_13'), 172 | messages.get_string('NGram.KANJI_3_15'), 173 | messages.get_string('NGram.KANJI_3_16'), 174 | messages.get_string('NGram.KANJI_3_18'), 175 | messages.get_string('NGram.KANJI_3_19'), 176 | messages.get_string('NGram.KANJI_3_22'), 177 | messages.get_string('NGram.KANJI_3_23'), 178 | messages.get_string('NGram.KANJI_3_27'), 179 | messages.get_string('NGram.KANJI_3_29'), 180 | messages.get_string('NGram.KANJI_3_30'), 181 | messages.get_string('NGram.KANJI_3_31'), 182 | messages.get_string('NGram.KANJI_3_32'), 183 | messages.get_string('NGram.KANJI_3_35'), 184 | messages.get_string('NGram.KANJI_3_36'), 185 | messages.get_string('NGram.KANJI_3_37'), 186 | messages.get_string('NGram.KANJI_3_38'), 187 | messages.get_string('NGram.KANJI_4_0'), 188 | messages.get_string('NGram.KANJI_4_9'), 189 | messages.get_string('NGram.KANJI_4_10'), 190 | messages.get_string('NGram.KANJI_4_16'), 191 | messages.get_string('NGram.KANJI_4_17'), 192 | messages.get_string('NGram.KANJI_4_18'), 193 | messages.get_string('NGram.KANJI_4_22'), 194 | messages.get_string('NGram.KANJI_4_24'), 195 | messages.get_string('NGram.KANJI_4_28'), 196 | messages.get_string('NGram.KANJI_4_34'), 197 | messages.get_string('NGram.KANJI_4_39'), 198 | messages.get_string('NGram.KANJI_5_10'), 199 | messages.get_string('NGram.KANJI_5_11'), 200 | messages.get_string('NGram.KANJI_5_12'), 201 | messages.get_string('NGram.KANJI_5_13'), 202 | messages.get_string('NGram.KANJI_5_14'), 203 | messages.get_string('NGram.KANJI_5_18'), 204 | messages.get_string('NGram.KANJI_5_26'), 205 | messages.get_string('NGram.KANJI_5_29'), 206 | messages.get_string('NGram.KANJI_5_34'), 207 | messages.get_string('NGram.KANJI_5_39'), 208 | messages.get_string('NGram.KANJI_6_0'), 209 | messages.get_string('NGram.KANJI_6_3'), 210 | messages.get_string('NGram.KANJI_6_9'), 211 | messages.get_string('NGram.KANJI_6_10'), 212 | messages.get_string('NGram.KANJI_6_11'), 213 | messages.get_string('NGram.KANJI_6_12'), 214 | messages.get_string('NGram.KANJI_6_16'), 215 | messages.get_string('NGram.KANJI_6_18'), 216 | messages.get_string('NGram.KANJI_6_20'), 217 | messages.get_string('NGram.KANJI_6_21'), 218 | messages.get_string('NGram.KANJI_6_22'), 219 | messages.get_string('NGram.KANJI_6_23'), 220 | messages.get_string('NGram.KANJI_6_25'), 221 | messages.get_string('NGram.KANJI_6_28'), 222 | messages.get_string('NGram.KANJI_6_29'), 223 | messages.get_string('NGram.KANJI_6_30'), 224 | messages.get_string('NGram.KANJI_6_32'), 225 | messages.get_string('NGram.KANJI_6_34'), 226 | messages.get_string('NGram.KANJI_6_35'), 227 | messages.get_string('NGram.KANJI_6_37'), 228 | messages.get_string('NGram.KANJI_6_39'), 229 | messages.get_string('NGram.KANJI_7_0'), 230 | messages.get_string('NGram.KANJI_7_3'), 231 | messages.get_string('NGram.KANJI_7_6'), 232 | messages.get_string('NGram.KANJI_7_7'), 233 | messages.get_string('NGram.KANJI_7_9'), 234 | messages.get_string('NGram.KANJI_7_11'), 235 | messages.get_string('NGram.KANJI_7_12'), 236 | messages.get_string('NGram.KANJI_7_13'), 237 | messages.get_string('NGram.KANJI_7_16'), 238 | messages.get_string('NGram.KANJI_7_18'), 239 | messages.get_string('NGram.KANJI_7_19'), 240 | messages.get_string('NGram.KANJI_7_20'), 241 | messages.get_string('NGram.KANJI_7_21'), 242 | messages.get_string('NGram.KANJI_7_23'), 243 | messages.get_string('NGram.KANJI_7_25'), 244 | messages.get_string('NGram.KANJI_7_28'), 245 | messages.get_string('NGram.KANJI_7_29'), 246 | messages.get_string('NGram.KANJI_7_32'), 247 | messages.get_string('NGram.KANJI_7_33'), 248 | messages.get_string('NGram.KANJI_7_35'), 249 | messages.get_string('NGram.KANJI_7_37')] 250 | 251 | CJK_MAP = {} 252 | 253 | @classmethod 254 | def _init_cjk_map(cls): 255 | for cjk_list in cls.CJK_CLASS: 256 | representative = cjk_list[0] 257 | for ch in cjk_list: 258 | cls.CJK_MAP[ch] = representative 259 | 260 | NGram._init_cjk_map() 261 | -------------------------------------------------------------------------------- /langdetect/utils/unicode_block.py: -------------------------------------------------------------------------------- 1 | UNICODE_BASIC_LATIN = 1 2 | UNICODE_LATIN_1_SUPPLEMENT = 2 3 | UNICODE_LATIN_EXTENDED_A = 3 4 | UNICODE_LATIN_EXTENDED_B = 4 5 | UNICODE_IPA_EXTENSIONS = 5 6 | UNICODE_SPACING_MODIFIER_LETTERS = 6 7 | UNICODE_COMBINING_DIACRITICAL_MARKS = 7 8 | UNICODE_GREEK_AND_COPTIC = 8 9 | UNICODE_CYRILLIC = 9 10 | UNICODE_CYRILLIC_SUPPLEMENT = 10 11 | UNICODE_ARMENIAN = 11 12 | UNICODE_HEBREW = 12 13 | UNICODE_ARABIC = 13 14 | UNICODE_SYRIAC = 14 15 | UNICODE_ARABIC_SUPPLEMENT = 15 16 | UNICODE_THAANA = 16 17 | UNICODE_NKO = 17 18 | UNICODE_SAMARITAN = 18 19 | UNICODE_MANDAIC = 19 20 | UNICODE_ARABIC_EXTENDED_A = 20 21 | UNICODE_DEVANAGARI = 21 22 | UNICODE_BENGALI = 22 23 | UNICODE_GURMUKHI = 23 24 | UNICODE_GUJARATI = 24 25 | UNICODE_ORIYA = 25 26 | UNICODE_TAMIL = 26 27 | UNICODE_TELUGU = 27 28 | UNICODE_KANNADA = 28 29 | UNICODE_MALAYALAM = 29 30 | UNICODE_SINHALA = 30 31 | UNICODE_THAI = 31 32 | UNICODE_LAO = 32 33 | UNICODE_TIBETAN = 33 34 | UNICODE_MYANMAR = 34 35 | UNICODE_GEORGIAN = 35 36 | UNICODE_HANGUL_JAMO = 36 37 | UNICODE_ETHIOPIC = 37 38 | UNICODE_ETHIOPIC_SUPPLEMENT = 38 39 | UNICODE_CHEROKEE = 39 40 | UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = 40 41 | UNICODE_OGHAM = 41 42 | UNICODE_RUNIC = 42 43 | UNICODE_TAGALOG = 43 44 | UNICODE_HANUNOO = 44 45 | UNICODE_BUHID = 45 46 | UNICODE_TAGBANWA = 46 47 | UNICODE_KHMER = 47 48 | UNICODE_MONGOLIAN = 48 49 | UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 49 50 | UNICODE_LIMBU = 50 51 | UNICODE_TAI_LE = 51 52 | UNICODE_NEW_TAI_LUE = 52 53 | UNICODE_KHMER_SYMBOLS = 53 54 | UNICODE_BUGINESE = 54 55 | UNICODE_TAI_THAM = 55 56 | UNICODE_BALINESE = 56 57 | UNICODE_SUNDANESE = 57 58 | UNICODE_BATAK = 58 59 | UNICODE_LEPCHA = 59 60 | UNICODE_OL_CHIKI = 60 61 | UNICODE_SUNDANESE_SUPPLEMENT = 61 62 | UNICODE_VEDIC_EXTENSIONS = 62 63 | UNICODE_PHONETIC_EXTENSIONS = 63 64 | UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT = 64 65 | UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 65 66 | UNICODE_LATIN_EXTENDED_ADDITIONAL = 66 67 | UNICODE_GREEK_EXTENDED = 67 68 | UNICODE_GENERAL_PUNCTUATION = 68 69 | UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS = 69 70 | UNICODE_CURRENCY_SYMBOLS = 70 71 | UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS = 71 72 | UNICODE_LETTERLIKE_SYMBOLS = 72 73 | UNICODE_NUMBER_FORMS = 73 74 | UNICODE_ARROWS = 74 75 | UNICODE_MATHEMATICAL_OPERATORS = 75 76 | UNICODE_MISCELLANEOUS_TECHNICAL = 76 77 | UNICODE_CONTROL_PICTURES = 77 78 | UNICODE_OPTICAL_CHARACTER_RECOGNITION = 78 79 | UNICODE_ENCLOSED_ALPHANUMERICS = 79 80 | UNICODE_BOX_DRAWING = 80 81 | UNICODE_BLOCK_ELEMENTS = 81 82 | UNICODE_GEOMETRIC_SHAPES = 82 83 | UNICODE_MISCELLANEOUS_SYMBOLS = 83 84 | UNICODE_DINGBATS = 84 85 | UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 85 86 | UNICODE_SUPPLEMENTAL_ARROWS_A = 86 87 | UNICODE_BRAILLE_PATTERNS = 87 88 | UNICODE_SUPPLEMENTAL_ARROWS_B = 88 89 | UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 89 90 | UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 90 91 | UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 91 92 | UNICODE_GLAGOLITIC = 92 93 | UNICODE_LATIN_EXTENDED_C = 93 94 | UNICODE_COPTIC = 94 95 | UNICODE_GEORGIAN_SUPPLEMENT = 95 96 | UNICODE_TIFINAGH = 96 97 | UNICODE_ETHIOPIC_EXTENDED = 97 98 | UNICODE_CYRILLIC_EXTENDED_A = 98 99 | UNICODE_SUPPLEMENTAL_PUNCTUATION = 99 100 | UNICODE_CJK_RADICALS_SUPPLEMENT = 100 101 | UNICODE_KANGXI_RADICALS = 101 102 | UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS = 102 103 | UNICODE_CJK_SYMBOLS_AND_PUNCTUATION = 103 104 | UNICODE_HIRAGANA = 104 105 | UNICODE_KATAKANA = 105 106 | UNICODE_BOPOMOFO = 106 107 | UNICODE_HANGUL_COMPATIBILITY_JAMO = 107 108 | UNICODE_KANBUN = 108 109 | UNICODE_BOPOMOFO_EXTENDED = 109 110 | UNICODE_CJK_STROKES = 110 111 | UNICODE_KATAKANA_PHONETIC_EXTENSIONS = 111 112 | UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS = 112 113 | UNICODE_CJK_COMPATIBILITY = 113 114 | UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = 114 115 | UNICODE_YIJING_HEXAGRAM_SYMBOLS = 115 116 | UNICODE_CJK_UNIFIED_IDEOGRAPHS = 116 117 | UNICODE_YI_SYLLABLES = 117 118 | UNICODE_YI_RADICALS = 118 119 | UNICODE_LISU = 119 120 | UNICODE_VAI = 120 121 | UNICODE_CYRILLIC_EXTENDED_B = 121 122 | UNICODE_BAMUM = 122 123 | UNICODE_MODIFIER_TONE_LETTERS = 123 124 | UNICODE_LATIN_EXTENDED_D = 124 125 | UNICODE_SYLOTI_NAGRI = 125 126 | UNICODE_COMMON_INDIC_NUMBER_FORMS = 126 127 | UNICODE_PHAGS_PA = 127 128 | UNICODE_SAURASHTRA = 128 129 | UNICODE_DEVANAGARI_EXTENDED = 129 130 | UNICODE_KAYAH_LI = 130 131 | UNICODE_REJANG = 131 132 | UNICODE_HANGUL_JAMO_EXTENDED_A = 132 133 | UNICODE_JAVANESE = 133 134 | UNICODE_CHAM = 134 135 | UNICODE_MYANMAR_EXTENDED_A = 135 136 | UNICODE_TAI_VIET = 136 137 | UNICODE_MEETEI_MAYEK_EXTENSIONS = 137 138 | UNICODE_ETHIOPIC_EXTENDED_A = 138 139 | UNICODE_MEETEI_MAYEK = 139 140 | UNICODE_HANGUL_SYLLABLES = 140 141 | UNICODE_HANGUL_JAMO_EXTENDED_B = 141 142 | UNICODE_HIGH_SURROGATES = 142 143 | UNICODE_HIGH_PRIVATE_USE_SURROGATES = 143 144 | UNICODE_LOW_SURROGATES = 144 145 | UNICODE_PRIVATE_USE_AREA = 145 146 | UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS = 146 147 | UNICODE_ALPHABETIC_PRESENTATION_FORMS = 147 148 | UNICODE_ARABIC_PRESENTATION_FORMS_A = 148 149 | UNICODE_VARIATION_SELECTORS = 149 150 | UNICODE_VERTICAL_FORMS = 150 151 | UNICODE_COMBINING_HALF_MARKS = 151 152 | UNICODE_CJK_COMPATIBILITY_FORMS = 152 153 | UNICODE_SMALL_FORM_VARIANTS = 153 154 | UNICODE_ARABIC_PRESENTATION_FORMS_B = 154 155 | UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS = 155 156 | UNICODE_SPECIALS = 156 157 | UNICODE_LINEAR_B_SYLLABARY = 157 158 | UNICODE_LINEAR_B_IDEOGRAMS = 158 159 | UNICODE_AEGEAN_NUMBERS = 159 160 | UNICODE_ANCIENT_GREEK_NUMBERS = 160 161 | UNICODE_ANCIENT_SYMBOLS = 161 162 | UNICODE_PHAISTOS_DISC = 162 163 | UNICODE_LYCIAN = 163 164 | UNICODE_CARIAN = 164 165 | UNICODE_OLD_ITALIC = 165 166 | UNICODE_GOTHIC = 166 167 | UNICODE_UGARITIC = 167 168 | UNICODE_OLD_PERSIAN = 168 169 | UNICODE_DESERET = 169 170 | UNICODE_SHAVIAN = 170 171 | UNICODE_OSMANYA = 171 172 | UNICODE_CYPRIOT_SYLLABARY = 172 173 | UNICODE_IMPERIAL_ARAMAIC = 173 174 | UNICODE_PHOENICIAN = 174 175 | UNICODE_LYDIAN = 175 176 | UNICODE_MEROITIC_HIEROGLYPHS = 176 177 | UNICODE_MEROITIC_CURSIVE = 177 178 | UNICODE_KHAROSHTHI = 178 179 | UNICODE_OLD_SOUTH_ARABIAN = 179 180 | UNICODE_AVESTAN = 180 181 | UNICODE_INSCRIPTIONAL_PARTHIAN = 181 182 | UNICODE_INSCRIPTIONAL_PAHLAVI = 182 183 | UNICODE_OLD_TURKIC = 183 184 | UNICODE_RUMI_NUMERAL_SYMBOLS = 184 185 | UNICODE_BRAHMI = 185 186 | UNICODE_KAITHI = 186 187 | UNICODE_SORA_SOMPENG = 187 188 | UNICODE_CHAKMA = 188 189 | UNICODE_SHARADA = 189 190 | UNICODE_TAKRI = 190 191 | UNICODE_CUNEIFORM = 191 192 | UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 192 193 | UNICODE_EGYPTIAN_HIEROGLYPHS = 193 194 | UNICODE_BAMUM_SUPPLEMENT = 194 195 | UNICODE_MIAO = 195 196 | UNICODE_KANA_SUPPLEMENT = 196 197 | UNICODE_BYZANTINE_MUSICAL_SYMBOLS = 197 198 | UNICODE_MUSICAL_SYMBOLS = 198 199 | UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION = 199 200 | UNICODE_TAI_XUAN_JING_SYMBOLS = 200 201 | UNICODE_COUNTING_ROD_NUMERALS = 201 202 | UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 202 203 | UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 203 204 | UNICODE_MAHJONG_TILES = 204 205 | UNICODE_DOMINO_TILES = 205 206 | UNICODE_PLAYING_CARDS = 206 207 | UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 207 208 | UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 208 209 | UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 209 210 | UNICODE_EMOTICONS = 210 211 | UNICODE_TRANSPORT_AND_MAP_SYMBOLS = 211 212 | UNICODE_ALCHEMICAL_SYMBOLS = 212 213 | UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 213 214 | UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 214 215 | UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 215 216 | UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 216 217 | UNICODE_TAGS = 217 218 | UNICODE_VARIATION_SELECTORS_SUPPLEMENT = 218 219 | UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 219 220 | UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 220 221 | 222 | 223 | _unicode_blocks = [ 224 | (UNICODE_BASIC_LATIN, 0x0000, 0x007F), 225 | (UNICODE_LATIN_1_SUPPLEMENT, 0x0080, 0x00FF), 226 | (UNICODE_LATIN_EXTENDED_A, 0x0100, 0x017F), 227 | (UNICODE_LATIN_EXTENDED_B, 0x0180, 0x024F), 228 | (UNICODE_IPA_EXTENSIONS, 0x0250, 0x02AF), 229 | (UNICODE_SPACING_MODIFIER_LETTERS, 0x02B0, 0x02FF), 230 | (UNICODE_COMBINING_DIACRITICAL_MARKS, 0x0300, 0x036F), 231 | (UNICODE_GREEK_AND_COPTIC, 0x0370, 0x03FF), 232 | (UNICODE_CYRILLIC, 0x0400, 0x04FF), 233 | (UNICODE_CYRILLIC_SUPPLEMENT, 0x0500, 0x052F), 234 | (UNICODE_ARMENIAN, 0x0530, 0x058F), 235 | (UNICODE_HEBREW, 0x0590, 0x05FF), 236 | (UNICODE_ARABIC, 0x0600, 0x06FF), 237 | (UNICODE_SYRIAC, 0x0700, 0x074F), 238 | (UNICODE_ARABIC_SUPPLEMENT, 0x0750, 0x077F), 239 | (UNICODE_THAANA, 0x0780, 0x07BF), 240 | (UNICODE_NKO, 0x07C0, 0x07FF), 241 | (UNICODE_SAMARITAN, 0x0800, 0x083F), 242 | (UNICODE_MANDAIC, 0x0840, 0x085F), 243 | (UNICODE_ARABIC_EXTENDED_A, 0x08A0, 0x08FF), 244 | (UNICODE_DEVANAGARI, 0x0900, 0x097F), 245 | (UNICODE_BENGALI, 0x0980, 0x09FF), 246 | (UNICODE_GURMUKHI, 0x0A00, 0x0A7F), 247 | (UNICODE_GUJARATI, 0x0A80, 0x0AFF), 248 | (UNICODE_ORIYA, 0x0B00, 0x0B7F), 249 | (UNICODE_TAMIL, 0x0B80, 0x0BFF), 250 | (UNICODE_TELUGU, 0x0C00, 0x0C7F), 251 | (UNICODE_KANNADA, 0x0C80, 0x0CFF), 252 | (UNICODE_MALAYALAM, 0x0D00, 0x0D7F), 253 | (UNICODE_SINHALA, 0x0D80, 0x0DFF), 254 | (UNICODE_THAI, 0x0E00, 0x0E7F), 255 | (UNICODE_LAO, 0x0E80, 0x0EFF), 256 | (UNICODE_TIBETAN, 0x0F00, 0x0FFF), 257 | (UNICODE_MYANMAR, 0x1000, 0x109F), 258 | (UNICODE_GEORGIAN, 0x10A0, 0x10FF), 259 | (UNICODE_HANGUL_JAMO, 0x1100, 0x11FF), 260 | (UNICODE_ETHIOPIC, 0x1200, 0x137F), 261 | (UNICODE_ETHIOPIC_SUPPLEMENT, 0x1380, 0x139F), 262 | (UNICODE_CHEROKEE, 0x13A0, 0x13FF), 263 | (UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, 0x1400, 0x167F), 264 | (UNICODE_OGHAM, 0x1680, 0x169F), 265 | (UNICODE_RUNIC, 0x16A0, 0x16FF), 266 | (UNICODE_TAGALOG, 0x1700, 0x171F), 267 | (UNICODE_HANUNOO, 0x1720, 0x173F), 268 | (UNICODE_BUHID, 0x1740, 0x175F), 269 | (UNICODE_TAGBANWA, 0x1760, 0x177F), 270 | (UNICODE_KHMER, 0x1780, 0x17FF), 271 | (UNICODE_MONGOLIAN, 0x1800, 0x18AF), 272 | (UNICODE_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, 0x18B0, 0x18FF), 273 | (UNICODE_LIMBU, 0x1900, 0x194F), 274 | (UNICODE_TAI_LE, 0x1950, 0x197F), 275 | (UNICODE_NEW_TAI_LUE, 0x1980, 0x19DF), 276 | (UNICODE_KHMER_SYMBOLS, 0x19E0, 0x19FF), 277 | (UNICODE_BUGINESE, 0x1A00, 0x1A1F), 278 | (UNICODE_TAI_THAM, 0x1A20, 0x1AAF), 279 | (UNICODE_BALINESE, 0x1B00, 0x1B7F), 280 | (UNICODE_SUNDANESE, 0x1B80, 0x1BBF), 281 | (UNICODE_BATAK, 0x1BC0, 0x1BFF), 282 | (UNICODE_LEPCHA, 0x1C00, 0x1C4F), 283 | (UNICODE_OL_CHIKI, 0x1C50, 0x1C7F), 284 | (UNICODE_SUNDANESE_SUPPLEMENT, 0x1CC0, 0x1CCF), 285 | (UNICODE_VEDIC_EXTENSIONS, 0x1CD0, 0x1CFF), 286 | (UNICODE_PHONETIC_EXTENSIONS, 0x1D00, 0x1D7F), 287 | (UNICODE_PHONETIC_EXTENSIONS_SUPPLEMENT, 0x1D80, 0x1DBF), 288 | (UNICODE_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, 0x1DC0, 0x1DFF), 289 | (UNICODE_LATIN_EXTENDED_ADDITIONAL, 0x1E00, 0x1EFF), 290 | (UNICODE_GREEK_EXTENDED, 0x1F00, 0x1FFF), 291 | (UNICODE_GENERAL_PUNCTUATION, 0x2000, 0x206F), 292 | (UNICODE_SUPERSCRIPTS_AND_SUBSCRIPTS, 0x2070, 0x209F), 293 | (UNICODE_CURRENCY_SYMBOLS, 0x20A0, 0x20CF), 294 | (UNICODE_COMBINING_DIACRITICAL_MARKS_FOR_SYMBOLS, 0x20D0, 0x20FF), 295 | (UNICODE_LETTERLIKE_SYMBOLS, 0x2100, 0x214F), 296 | (UNICODE_NUMBER_FORMS, 0x2150, 0x218F), 297 | (UNICODE_ARROWS, 0x2190, 0x21FF), 298 | (UNICODE_MATHEMATICAL_OPERATORS, 0x2200, 0x22FF), 299 | (UNICODE_MISCELLANEOUS_TECHNICAL, 0x2300, 0x23FF), 300 | (UNICODE_CONTROL_PICTURES, 0x2400, 0x243F), 301 | (UNICODE_OPTICAL_CHARACTER_RECOGNITION, 0x2440, 0x245F), 302 | (UNICODE_ENCLOSED_ALPHANUMERICS, 0x2460, 0x24FF), 303 | (UNICODE_BOX_DRAWING, 0x2500, 0x257F), 304 | (UNICODE_BLOCK_ELEMENTS, 0x2580, 0x259F), 305 | (UNICODE_GEOMETRIC_SHAPES, 0x25A0, 0x25FF), 306 | (UNICODE_MISCELLANEOUS_SYMBOLS, 0x2600, 0x26FF), 307 | (UNICODE_DINGBATS, 0x2700, 0x27BF), 308 | (UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, 0x27C0, 0x27EF), 309 | (UNICODE_SUPPLEMENTAL_ARROWS_A, 0x27F0, 0x27FF), 310 | (UNICODE_BRAILLE_PATTERNS, 0x2800, 0x28FF), 311 | (UNICODE_SUPPLEMENTAL_ARROWS_B, 0x2900, 0x297F), 312 | (UNICODE_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, 0x2980, 0x29FF), 313 | (UNICODE_SUPPLEMENTAL_MATHEMATICAL_OPERATORS, 0x2A00, 0x2AFF), 314 | (UNICODE_MISCELLANEOUS_SYMBOLS_AND_ARROWS, 0x2B00, 0x2BFF), 315 | (UNICODE_GLAGOLITIC, 0x2C00, 0x2C5F), 316 | (UNICODE_LATIN_EXTENDED_C, 0x2C60, 0x2C7F), 317 | (UNICODE_COPTIC, 0x2C80, 0x2CFF), 318 | (UNICODE_GEORGIAN_SUPPLEMENT, 0x2D00, 0x2D2F), 319 | (UNICODE_TIFINAGH, 0x2D30, 0x2D7F), 320 | (UNICODE_ETHIOPIC_EXTENDED, 0x2D80, 0x2DDF), 321 | (UNICODE_CYRILLIC_EXTENDED_A, 0x2DE0, 0x2DFF), 322 | (UNICODE_SUPPLEMENTAL_PUNCTUATION, 0x2E00, 0x2E7F), 323 | (UNICODE_CJK_RADICALS_SUPPLEMENT, 0x2E80, 0x2EFF), 324 | (UNICODE_KANGXI_RADICALS, 0x2F00, 0x2FDF), 325 | (UNICODE_IDEOGRAPHIC_DESCRIPTION_CHARACTERS, 0x2FF0, 0x2FFF), 326 | (UNICODE_CJK_SYMBOLS_AND_PUNCTUATION, 0x3000, 0x303F), 327 | (UNICODE_HIRAGANA, 0x3040, 0x309F), 328 | (UNICODE_KATAKANA, 0x30A0, 0x30FF), 329 | (UNICODE_BOPOMOFO, 0x3100, 0x312F), 330 | (UNICODE_HANGUL_COMPATIBILITY_JAMO, 0x3130, 0x318F), 331 | (UNICODE_KANBUN, 0x3190, 0x319F), 332 | (UNICODE_BOPOMOFO_EXTENDED, 0x31A0, 0x31BF), 333 | (UNICODE_CJK_STROKES, 0x31C0, 0x31EF), 334 | (UNICODE_KATAKANA_PHONETIC_EXTENSIONS, 0x31F0, 0x31FF), 335 | (UNICODE_ENCLOSED_CJK_LETTERS_AND_MONTHS, 0x3200, 0x32FF), 336 | (UNICODE_CJK_COMPATIBILITY, 0x3300, 0x33FF), 337 | (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, 0x3400, 0x4DBF), 338 | (UNICODE_YIJING_HEXAGRAM_SYMBOLS, 0x4DC0, 0x4DFF), 339 | (UNICODE_CJK_UNIFIED_IDEOGRAPHS, 0x4E00, 0x9FFF), 340 | (UNICODE_YI_SYLLABLES, 0xA000, 0xA48F), 341 | (UNICODE_YI_RADICALS, 0xA490, 0xA4CF), 342 | (UNICODE_LISU, 0xA4D0, 0xA4FF), 343 | (UNICODE_VAI, 0xA500, 0xA63F), 344 | (UNICODE_CYRILLIC_EXTENDED_B, 0xA640, 0xA69F), 345 | (UNICODE_BAMUM, 0xA6A0, 0xA6FF), 346 | (UNICODE_MODIFIER_TONE_LETTERS, 0xA700, 0xA71F), 347 | (UNICODE_LATIN_EXTENDED_D, 0xA720, 0xA7FF), 348 | (UNICODE_SYLOTI_NAGRI, 0xA800, 0xA82F), 349 | (UNICODE_COMMON_INDIC_NUMBER_FORMS, 0xA830, 0xA83F), 350 | (UNICODE_PHAGS_PA, 0xA840, 0xA87F), 351 | (UNICODE_SAURASHTRA, 0xA880, 0xA8DF), 352 | (UNICODE_DEVANAGARI_EXTENDED, 0xA8E0, 0xA8FF), 353 | (UNICODE_KAYAH_LI, 0xA900, 0xA92F), 354 | (UNICODE_REJANG, 0xA930, 0xA95F), 355 | (UNICODE_HANGUL_JAMO_EXTENDED_A, 0xA960, 0xA97F), 356 | (UNICODE_JAVANESE, 0xA980, 0xA9DF), 357 | (UNICODE_CHAM, 0xAA00, 0xAA5F), 358 | (UNICODE_MYANMAR_EXTENDED_A, 0xAA60, 0xAA7F), 359 | (UNICODE_TAI_VIET, 0xAA80, 0xAADF), 360 | (UNICODE_MEETEI_MAYEK_EXTENSIONS, 0xAAE0, 0xAAFF), 361 | (UNICODE_ETHIOPIC_EXTENDED_A, 0xAB00, 0xAB2F), 362 | (UNICODE_MEETEI_MAYEK, 0xABC0, 0xABFF), 363 | (UNICODE_HANGUL_SYLLABLES, 0xAC00, 0xD7AF), 364 | (UNICODE_HANGUL_JAMO_EXTENDED_B, 0xD7B0, 0xD7FF), 365 | (UNICODE_HIGH_SURROGATES, 0xD800, 0xDB7F), 366 | (UNICODE_HIGH_PRIVATE_USE_SURROGATES, 0xDB80, 0xDBFF), 367 | (UNICODE_LOW_SURROGATES, 0xDC00, 0xDFFF), 368 | (UNICODE_PRIVATE_USE_AREA, 0xE000, 0xF8FF), 369 | (UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS, 0xF900, 0xFAFF), 370 | (UNICODE_ALPHABETIC_PRESENTATION_FORMS, 0xFB00, 0xFB4F), 371 | (UNICODE_ARABIC_PRESENTATION_FORMS_A, 0xFB50, 0xFDFF), 372 | (UNICODE_VARIATION_SELECTORS, 0xFE00, 0xFE0F), 373 | (UNICODE_VERTICAL_FORMS, 0xFE10, 0xFE1F), 374 | (UNICODE_COMBINING_HALF_MARKS, 0xFE20, 0xFE2F), 375 | (UNICODE_CJK_COMPATIBILITY_FORMS, 0xFE30, 0xFE4F), 376 | (UNICODE_SMALL_FORM_VARIANTS, 0xFE50, 0xFE6F), 377 | (UNICODE_ARABIC_PRESENTATION_FORMS_B, 0xFE70, 0xFEFF), 378 | (UNICODE_HALFWIDTH_AND_FULLWIDTH_FORMS, 0xFF00, 0xFFEF), 379 | (UNICODE_SPECIALS, 0xFFF0, 0xFFFF), 380 | (UNICODE_LINEAR_B_SYLLABARY, 0x10000, 0x1007F), 381 | (UNICODE_LINEAR_B_IDEOGRAMS, 0x10080, 0x100FF), 382 | (UNICODE_AEGEAN_NUMBERS, 0x10100, 0x1013F), 383 | (UNICODE_ANCIENT_GREEK_NUMBERS, 0x10140, 0x1018F), 384 | (UNICODE_ANCIENT_SYMBOLS, 0x10190, 0x101CF), 385 | (UNICODE_PHAISTOS_DISC, 0x101D0, 0x101FF), 386 | (UNICODE_LYCIAN, 0x10280, 0x1029F), 387 | (UNICODE_CARIAN, 0x102A0, 0x102DF), 388 | (UNICODE_OLD_ITALIC, 0x10300, 0x1032F), 389 | (UNICODE_GOTHIC, 0x10330, 0x1034F), 390 | (UNICODE_UGARITIC, 0x10380, 0x1039F), 391 | (UNICODE_OLD_PERSIAN, 0x103A0, 0x103DF), 392 | (UNICODE_DESERET, 0x10400, 0x1044F), 393 | (UNICODE_SHAVIAN, 0x10450, 0x1047F), 394 | (UNICODE_OSMANYA, 0x10480, 0x104AF), 395 | (UNICODE_CYPRIOT_SYLLABARY, 0x10800, 0x1083F), 396 | (UNICODE_IMPERIAL_ARAMAIC, 0x10840, 0x1085F), 397 | (UNICODE_PHOENICIAN, 0x10900, 0x1091F), 398 | (UNICODE_LYDIAN, 0x10920, 0x1093F), 399 | (UNICODE_MEROITIC_HIEROGLYPHS, 0x10980, 0x1099F), 400 | (UNICODE_MEROITIC_CURSIVE, 0x109A0, 0x109FF), 401 | (UNICODE_KHAROSHTHI, 0x10A00, 0x10A5F), 402 | (UNICODE_OLD_SOUTH_ARABIAN, 0x10A60, 0x10A7F), 403 | (UNICODE_AVESTAN, 0x10B00, 0x10B3F), 404 | (UNICODE_INSCRIPTIONAL_PARTHIAN, 0x10B40, 0x10B5F), 405 | (UNICODE_INSCRIPTIONAL_PAHLAVI, 0x10B60, 0x10B7F), 406 | (UNICODE_OLD_TURKIC, 0x10C00, 0x10C4F), 407 | (UNICODE_RUMI_NUMERAL_SYMBOLS, 0x10E60, 0x10E7F), 408 | (UNICODE_BRAHMI, 0x11000, 0x1107F), 409 | (UNICODE_KAITHI, 0x11080, 0x110CF), 410 | (UNICODE_SORA_SOMPENG, 0x110D0, 0x110FF), 411 | (UNICODE_CHAKMA, 0x11100, 0x1114F), 412 | (UNICODE_SHARADA, 0x11180, 0x111DF), 413 | (UNICODE_TAKRI, 0x11680, 0x116CF), 414 | (UNICODE_CUNEIFORM, 0x12000, 0x123FF), 415 | (UNICODE_CUNEIFORM_NUMBERS_AND_PUNCTUATION, 0x12400, 0x1247F), 416 | (UNICODE_EGYPTIAN_HIEROGLYPHS, 0x13000, 0x1342F), 417 | (UNICODE_BAMUM_SUPPLEMENT, 0x16800, 0x16A3F), 418 | (UNICODE_MIAO, 0x16F00, 0x16F9F), 419 | (UNICODE_KANA_SUPPLEMENT, 0x1B000, 0x1B0FF), 420 | (UNICODE_BYZANTINE_MUSICAL_SYMBOLS, 0x1D000, 0x1D0FF), 421 | (UNICODE_MUSICAL_SYMBOLS, 0x1D100, 0x1D1FF), 422 | (UNICODE_ANCIENT_GREEK_MUSICAL_NOTATION, 0x1D200, 0x1D24F), 423 | (UNICODE_TAI_XUAN_JING_SYMBOLS, 0x1D300, 0x1D35F), 424 | (UNICODE_COUNTING_ROD_NUMERALS, 0x1D360, 0x1D37F), 425 | (UNICODE_MATHEMATICAL_ALPHANUMERIC_SYMBOLS, 0x1D400, 0x1D7FF), 426 | (UNICODE_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS, 0x1EE00, 0x1EEFF), 427 | (UNICODE_MAHJONG_TILES, 0x1F000, 0x1F02F), 428 | (UNICODE_DOMINO_TILES, 0x1F030, 0x1F09F), 429 | (UNICODE_PLAYING_CARDS, 0x1F0A0, 0x1F0FF), 430 | (UNICODE_ENCLOSED_ALPHANUMERIC_SUPPLEMENT, 0x1F100, 0x1F1FF), 431 | (UNICODE_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, 0x1F200, 0x1F2FF), 432 | (UNICODE_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, 0x1F300, 0x1F5FF), 433 | (UNICODE_EMOTICONS, 0x1F600, 0x1F64F), 434 | (UNICODE_TRANSPORT_AND_MAP_SYMBOLS, 0x1F680, 0x1F6FF), 435 | (UNICODE_ALCHEMICAL_SYMBOLS, 0x1F700, 0x1F77F), 436 | (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, 0x20000, 0x2A6DF), 437 | (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, 0x2A700, 0x2B73F), 438 | (UNICODE_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, 0x2B740, 0x2B81F), 439 | (UNICODE_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, 0x2F800, 0x2FA1F), 440 | (UNICODE_TAGS, 0xE0000, 0xE007F), 441 | (UNICODE_VARIATION_SELECTORS_SUPPLEMENT, 0xE0100, 0xE01EF), 442 | (UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_A, 0xF0000, 0xFFFFF), 443 | (UNICODE_SUPPLEMENTARY_PRIVATE_USE_AREA_B, 0x100000, 0x10FFFF), 444 | ] 445 | 446 | NUM_BLOCKS = len(_unicode_blocks) 447 | 448 | 449 | def unicode_block(ch): 450 | '''Return the Unicode block name for ch, or None if ch has no block.''' 451 | cp = ord(ch) 452 | # special case basic latin 453 | if cp <= 0x7F: 454 | return UNICODE_BASIC_LATIN 455 | # binary search for the correct block 456 | be, en = 0, NUM_BLOCKS - 1 457 | while be <= en: 458 | mid = (be+en) >> 1 459 | name, start, end = _unicode_blocks[mid] 460 | if start <= cp <= end: 461 | return name 462 | if cp < start: 463 | en = mid-1 464 | else: 465 | be = mid+1 466 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | six 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup 3 | except ImportError: 4 | from distutils.core import setup 5 | 6 | 7 | with open('README.md') as f: 8 | readme = f.read() 9 | 10 | 11 | setup( 12 | name='langdetect', 13 | version='1.0.9', 14 | description='Language detection library ported from Google\'s language-detection.', 15 | long_description=readme, 16 | long_description_content_type='text/markdown', 17 | author='Michal Mimino Danilak', 18 | author_email='michal.danilak@gmail.com', 19 | url='https://github.com/Mimino666/langdetect', 20 | keywords='language detection library', 21 | packages=['langdetect', 'langdetect.utils', 'langdetect.tests'], 22 | include_package_data=True, 23 | install_requires=['six'], 24 | license='MIT', 25 | classifiers=[ 26 | 'Development Status :: 5 - Production/Stable', 27 | 'Intended Audience :: Developers', 28 | 'License :: OSI Approved :: Apache Software License', 29 | 'Operating System :: OS Independent', 30 | 'Programming Language :: Python :: 2', 31 | 'Programming Language :: Python :: 2.7', 32 | 'Programming Language :: Python :: 3', 33 | 'Programming Language :: Python :: 3.4', 34 | 'Programming Language :: Python :: 3.5', 35 | 'Programming Language :: Python :: 3.6', 36 | 'Programming Language :: Python :: 3.7', 37 | 'Programming Language :: Python :: 3.8', 38 | ] 39 | ) 40 | --------------------------------------------------------------------------------