├── .gitignore ├── LICENSE ├── README.md ├── benchmark ├── bench.py ├── big-test.txt ├── sentences.txt ├── single-words.txt ├── tweets.txt └── word-pairs.txt ├── demo.py ├── eld ├── __init__.py ├── languageData.py ├── languageDetector.py ├── languageResult.py ├── languageSubset.py ├── resources │ ├── avg_score.py │ └── ngrams │ │ ├── ngramsL60.py │ │ ├── ngramsM60.py │ │ └── subset │ │ ├── ngramsM60-1_2rrx014rx6ypsas6tplo1gtcnmiv5mz.py │ │ └── ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd.py ├── subsetResult.py └── tests │ ├── data │ └── big-test.txt │ ├── test_detector.py │ └── test_subset.py ├── misc ├── sentences_avg_py.png ├── table_accuracy_py.svg └── table_time_py.svg └── pyproject.toml /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.pyc 3 | /.idea/ 4 | build/ 5 | dist/ 6 | *.egg-info/ 7 | *.egg 8 | .pytest_cache/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright 2023 Nito T.M. 191 | Author URL: https://github.com/nitotm 192 | 193 | Licensed under the Apache License, Version 2.0 (the "License"); 194 | you may not use this file except in compliance with the License. 195 | You may obtain a copy of the License at 196 | 197 | http://www.apache.org/licenses/LICENSE-2.0 198 | 199 | Unless required by applicable law or agreed to in writing, software 200 | distributed under the License is distributed on an "AS IS" BASIS, 201 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 202 | See the License for the specific language governing permissions and 203 | limitations under the License. 204 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Efficient Language Detector 2 | 3 |
4 | 5 | ![supported Python versions](https://img.shields.io/badge/Python-%3E%3D%203.7-blue) 6 | [![license](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0) 7 | [![supported languages](https://img.shields.io/badge/supported%20languages-60-brightgreen.svg)](#languages) 8 | 9 |
10 | 11 | Efficient language detector (*Nito-ELD* or *ELD*) is a fast and accurate language detector, is one of the fastest non compiled detectors, while its accuracy is within the range of the heaviest and slowest detectors. 12 | 13 | It's 100% Python, easy installation and no dependencies other than `regex`. 14 | ELD is also available in [Javascript](https://github.com/nitotm/efficient-language-detector-js) and [PHP](https://github.com/nitotm/efficient-language-detector). 15 | 16 | > This is the first version of a port made from the original version in PHP, the structure might not be definitive, the code can be optimized. My knowledge of Python is basic, feel free to suggest improvements. 17 | 18 | 1. [Installation](#installation) 19 | 2. [How to use](#how-to-use) 20 | 3. [Benchmarks](#benchmarks) 21 | 4. [Languages](#languages) 22 | 23 | ## Installation 24 | 25 | ```bash 26 | $ pip install eld 27 | ``` 28 | Alternatively, download / clone the files can work too, by changing the import path. 29 | 30 | ## How to use? 31 | 32 | ```python 33 | from eld import LanguageDetector 34 | detector = LanguageDetector() 35 | ``` 36 | `detect()` expects a UTF-8 string, and returns an object, with a 'language' variable, which is either an *ISO 639-1 code* or `None` 37 | ```python 38 | print(detector.detect('Hola, cómo te llamas?')) 39 | # Object { language: "es", scores(): {"es": 0.53, "et": 0.21, ...}, is_reliable(): True } 40 | # Object { language: None|str, scores(): None|dict, is_reliable(): bool } 41 | 42 | print(detector.detect('Hola, cómo te llamas?').language) 43 | # "es" 44 | 45 | # if clean_text(True), detect() removes Urls, domains, emails, alphanumerical & numbers 46 | detector.clean_text(True) # Default is False 47 | ``` 48 | - To reduce the languages to be detected, there are 3 different options, they only need to be executed once. (Check available [languages](#languages) below) 49 | ```python 50 | lang_subset = ['en', 'es', 'fr', 'it', 'nl', 'de'] 51 | 52 | # Option 1 53 | # with dynamic_lang_subset(), detect() executes normally, and then filters excluded languages 54 | detector.dynamic_lang_subset(lang_subset) 55 | # Returns an object with a list named 'languages', with the validated languages or 'None' 56 | 57 | # Option 2. lang_subset() Will first remove the excluded languages, from the n-grams database 58 | # For a single detection is slower than dynamic_lang_subset(), but for several will be faster 59 | # If save option is true (default), the new Ngrams subset will be stored, and loaded next call 60 | detector.lang_subset(lang_subset) # lang_subset(langs, save=True) 61 | # Returns object {success: True, languages: ['de', 'en', ...], error: None, file: 'ngramsM60...'} 62 | 63 | # To remove either dynamic_lang_subset() or lang_subset(), call the methods with None as argument 64 | detector.lang_subset(None) 65 | 66 | # Finally the optimal way to regularly use a language subset: we create the instance with a file 67 | # The file in the argument can be a subset by lang_subset() or another database like 'ngramsL60' 68 | langSubsetDetect = LanguageDetector('ngramsL60') 69 | ``` 70 | 71 | ## Benchmarks 72 | 73 | I compared *ELD* with a different variety of detectors, since the interesting part is the algorithm. 74 | 75 | | URL | Version | Language | 76 | |:----------------------------------------------------------|:-------------|:-----------| 77 | | https://github.com/nitotm/efficient-language-detector-py/ | 0.9.0 | Python | 78 | | https://github.com/nitotm/efficient-language-detector/ | 1.0.0 | PHP | 79 | | https://github.com/pemistahl/lingua-py | 1.3.2 | Python | 80 | | https://github.com/CLD2Owners/cld2 | Aug 21, 2015 | C++ | 81 | | https://github.com/google/cld3 | Aug 28, 2020 | C++ | 82 | | https://github.com/wooorm/franc | 6.1.0 | Javascript | 83 | 84 | Benchmarks: **Tweets**: *760KB*, short sentences of 140 chars max.; **Big test**: *10MB*, sentences in all 60 languages supported; **Sentences**: *8MB*, this is the *Lingua* sentences test, minus unsupported languages. 85 | Short sentences is what *ELD* and most detectors focus on, as very short text is unreliable, but I included the *Lingua* **Word pairs** *1.5MB*, and **Single words** *880KB* tests to see how they all compare beyond their reliable limits. 86 | 87 | These are the results, first, accuracy and then execution time. 88 | 89 | 100 | accuracy table 101 | 102 | 114 | time table 115 | 116 | 1. Lingua could have a small advantage as it participates with 54 languages, 6 less. 117 | 2. CLD2 and CLD3, return a list of languages, the ones not included in this test where discarded, but usually they return one language, I believe they have a disadvantage. 118 | Also, I confirm the results of CLD2 for short text are correct, contrary to the test on the *Lingua* page, they did not use the parameter "bestEffort = True", their benchmark for CLD2 is unfair. 119 | 120 | *Lingua* is the average accuracy winner, but at what cost, the same test that in *ELD* or *CLD2* is below 10 seconds, in Lingua takes more than 5 hours! It acts like a brute-force software. 121 | Also, its lead comes from single and pair words, which are unreliable regardless. 122 | 123 | The Python version of *NITO-ELD* is not the fastest but is still considered fast, as it is faster than any other non compiled detector tested. 124 | 125 | I added *ELD-L* for comparison, which has a 2.3x bigger database, but only increases execution time marginally, a testament to the efficiency of the algorithm. *ELD-L* is not the main database as it does not improve language detection in sentences. 126 | 127 | Here is the average, per benchmark, of Tweets, Big test & Sentences. 128 | 129 | ![Sentences tests average](https://raw.githubusercontent.com/nitotm/efficient-language-detector-py/main/misc/sentences_avg_py.png) 130 | 141 | 142 | ## Languages 143 | 144 | These are the *ISO 639-1 codes* of the 60 supported languages for *Nito-ELD* v1 145 | 146 | > 'am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gu', 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr', 'ms', 'nl', 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'yo', 'zh' 147 | 148 | Full name languages: 149 | 150 | > Amharic, Arabic, Azerbaijani (Latin), Belarusian, Bulgarian, Bengali, Catalan, Czech, Danish, German, Greek, English, Spanish, Estonian, Basque, Persian, Finnish, French, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Armenian, Icelandic, Italian, Japanese, Georgian, Kannada, Korean, Kurdish (Arabic), Lao, Lithuanian, Latvian, Malayalam, Marathi, Malay (Latin), Dutch, Norwegian, Oriya, Punjabi, Polish, Portuguese, Romanian, Russian, Slovak, Slovene, Albanian, Serbian (Cyrillic), Swedish, Tamil, Telugu, Thai, Tagalog, Turkish, Ukrainian, Urdu, Vietnamese, Yoruba, Chinese 151 | 152 | ## Future improvements 153 | 154 | - Train from bigger datasets, and more languages. 155 | - The tokenizer could separate characters from languages that have their own alphabet, potentially improving accuracy and reducing the N-grams database. Retraining and testing is needed. 156 | 157 | **Donate / Hire** 158 | If you wish to Donate for open source improvements, Hire me for private modifications / upgrades, or to Contact me, use the following link: https://linktr.ee/nitotm -------------------------------------------------------------------------------- /benchmark/bench.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | import sys 4 | 5 | # Make sure local package is imported instead of pip package 6 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..')) 7 | sys.path.insert(0, project_root) # prioritize the local package 8 | # sys.path.append('../..') 9 | 10 | from eld.languageDetector import LanguageDetector 11 | 12 | langDetect = LanguageDetector() 13 | print(f"ELD version: {langDetect.VERSION}\n") 14 | 15 | files = ['tweets.txt', 'big-test.txt', 'sentences.txt', 'word-pairs.txt', 'single-words.txt'] 16 | durations = [] 17 | 18 | for file in files: 19 | content = open(file, encoding="utf-8").read() 20 | lines = content.strip().split("\n") 21 | texts = [] 22 | 23 | for line in lines: 24 | values = line.split("\t") 25 | texts.append([values[1], values[0]]) 26 | 27 | total = len(texts) 28 | correct = 0 29 | duration = 0 30 | 31 | for text in texts: 32 | start = time.time() 33 | language = langDetect.detect(text[0]).language 34 | duration += time.time() - start 35 | if language == text[1]: 36 | correct += 1 37 | durations.append(duration) 38 | print(f"{file} - Correct ratio: {round((correct / total) * 100, 2)}% Time: {duration}\n") 39 | 40 | average = sum(durations) / len(durations) if len(durations) > 0 else 1 41 | print(f"Average duration: {average}\n") 42 | 43 | # tweets.txt - Correct ratio: 99.28% Time: 0.9556999206542969 44 | # big-test.txt - Correct ratio: 99.41% Time: 7.8356194496154785 45 | # sentences.txt - Correct ratio: 98.77% Time: 6.7327587604522705 46 | # word-pairs.txt - Correct ratio: 87.55% Time: 2.636420488357544 47 | # single-words.txt - Correct ratio: 73.31% Time: 2.12335205078125 48 | -------------------------------------------------------------------------------- /demo.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | from eld import LanguageDetector 7 | 8 | detector = LanguageDetector() 9 | 10 | # detect() expects a UTF-8 string, returns an object, with a 'language' variable : ISO 639-1 code or null 11 | print(detector.detect('Hola, cómo te llamas?')) 12 | # Object { language: "es", scores(): {"es": 0.53, "et": 0.21, ...}, is_reliable(): True } 13 | # Object { language: None|str, scores(): None|dict, is_reliable(): bool } 14 | print(detector.detect('Hola, cómo te llamas?').language) 15 | # "es" 16 | 17 | # clean_text(True) Removes Urls, domains, emails, alphanumerical & numbers 18 | detector.clean_text(True) # Default is False 19 | 20 | # To reduce the languages to be detected, there are 3 different options, they only need to be executed once. 21 | # This is the complete list on languages for ELD v1, using ISO 639-1 codes: 22 | # ['am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gu', 23 | # 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr', 'ms', 'nl', 24 | # 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 25 | # 'vi', 'yo', 'zh'] 26 | 27 | lang_subset = ['en', 'es', 'fr', 'it', 'nl', 'de'] 28 | 29 | # Option 1. With dynamic_lang_subset(), detect() executes normally, but at the end will filter the excluded languages. 30 | detector.dynamic_lang_subset(lang_subset) 31 | # Returns an object with a list named 'languages', with the validated languages or 'None' 32 | 33 | # to remove the subset 34 | detector.dynamic_lang_subset(None) 35 | 36 | # Option 2. lang_subset(langs,save=True) Will previously remove the excluded languages form the Ngrams database; for 37 | # a single detection might be slower than dynamic_lang_subset(), but for several strings will be faster. If 'save' 38 | # option is true (default), the new ngrams subset will be stored and cached for next time. 39 | detector.lang_subset(lang_subset) 40 | # Returns object {success: True, languages: ['de', 'en', ...], error: None, file: 'ngramsM60...'} 41 | 42 | # to remove the subset 43 | detector.lang_subset(None) 44 | 45 | print(detector.VERSION) 46 | 47 | # Finally the optimal way to regularly use the same language subset, will be to add as an argument the file stored 48 | # (and returned) by lang_subset(), when creating an instance of the class. In this case the subset Ngrams database will 49 | # be loaded directly, and not the default database. Also, you can use this option to load different ngram databases 50 | # stored at eld/resources/ngrams 51 | langSubsetDetect = LanguageDetector('ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd') 52 | -------------------------------------------------------------------------------- /eld/__init__.py: -------------------------------------------------------------------------------- 1 | from eld.languageDetector import LanguageDetector 2 | -------------------------------------------------------------------------------- /eld/languageData.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | import importlib.util 7 | import os 8 | 9 | 10 | class LanguageData: 11 | def __init__(self): 12 | from .resources.avg_score import avg_score 13 | self.avg_score = avg_score 14 | self.ngrams = {} 15 | self.lang_score = [] 16 | self.lang_codes = {} 17 | self.type = '' 18 | self.folder = os.path.dirname(__file__) + '/resources/ngrams/' 19 | 20 | # ISO 639-1 codes, for the 60 languages set. 21 | # ['am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gu', 22 | # 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr', 'ms', 'nl', 23 | # 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 24 | # 'vi', 'yo', 'zh'] 25 | 26 | # ['Amharic', 'Arabic', 'Azerbaijani (Latin)', 'Belarusian', 'Bulgarian', 'Bengali', 'Catalan', 'Czech', 'Danish', 27 | # 'German', 'Greek', 'English', 'Spanish', 'Estonian', 'Basque', 'Persian', 'Finnish', 'French', 'Gujarati', 28 | # 'Hebrew', 'Hindi', 'Croatian', 'Hungarian', 'Armenian', 'Icelandic', 'Italian', 'Japanese', 'Georgian', 'Kannada', 29 | # 'Korean', 'Kurdish (Arabic)', 'Lao', 'Lithuanian', 'Latvian', 'Malayalam', 'Marathi', 'Malay (Latin)', 'Dutch', 30 | # 'Norwegian', 'Oriya', 'Punjabi', 'Polish', 'Portuguese', 'Romanian', 'Russian', 'Slovak', 'Slovene', 'Albanian', 31 | # 'Serbian (Cyrillic)', 'Swedish', 'Tamil', 'Telugu', 'Thai', 'Tagalog', 'Turkish', 'Ukrainian', 'Urdu', 32 | # 'Vietnamese', 'Yoruba', 'Chinese'] 33 | 34 | def load_ngrams(self, subset_file=''): 35 | if subset_file == '': 36 | from .resources.ngrams.ngramsM60 import ngrams_data 37 | else: 38 | # module = importlib.import_module('.ngrams.' + subset_file) 39 | file_path = self.folder + subset_file + '.py' 40 | if not os.path.exists(file_path): 41 | file_path = self.folder + 'subset/' + subset_file + '.py' 42 | spec = importlib.util.spec_from_file_location(subset_file, file_path) 43 | module = importlib.util.module_from_spec(spec) 44 | spec.loader.exec_module(module) 45 | ngrams_data = module.ngrams_data 46 | 47 | self.ngrams = ngrams_data['ngrams'] 48 | self.lang_score = [0] * (max(ngrams_data['languages'].keys()) + 1) 49 | self.type = ngrams_data['type'] 50 | self.lang_codes = ngrams_data['languages'] 51 | 52 | 53 | languageData = LanguageData() 54 | -------------------------------------------------------------------------------- /eld/languageDetector.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | import regex as re 7 | 8 | from eld.languageData import languageData 9 | from eld.languageSubset import LanguageSubset 10 | from eld.languageResult import LanguageResult 11 | 12 | 13 | class LanguageDetector(LanguageSubset): 14 | def __init__(self, subset_file=''): 15 | super().__init__() 16 | languageData.load_ngrams(subset_file) 17 | self.__do_clean_text = False 18 | self.VERSION = '1.0.8' # Has to match pyproject.toml version 19 | 20 | def detect(self, text): 21 | """ 22 | Returns the language detected for a given UTF-8 string, as an ISO 639-1 code 23 | LanguageResult object { language = 'es', scores() = {'es': 0.5, 'et': 0.2}, is_reliable() = True } 24 | 25 | Args: 26 | text (str): UTF-8 string 27 | 28 | Returns: 29 | object LanguageResult: language (str or None), scores() (dict or None), is_reliable() (bool) 30 | """ 31 | if not isinstance(text, str): 32 | raise TypeError("Input 'text' must be a string.") 33 | if self.__do_clean_text: 34 | # Removes Urls, emails, alphanumerical & numbers 35 | text = get_clean_txt(text) 36 | text = _normalize_text(text) 37 | byte_ngrams = _get_byte_ngrams(text) 38 | num_ngrams = len(byte_ngrams) 39 | results = _calculate_scores(byte_ngrams, num_ngrams) 40 | 41 | if results: 42 | if self.subset: 43 | results = LanguageSubset._filter_lang_subset(self, results) 44 | results.sort(key=lambda x: -x[1]) 45 | return LanguageResult(results, num_ngrams) 46 | return LanguageResult() 47 | 48 | def clean_text(self, set_bool): 49 | self.__do_clean_text = (True if set_bool else False) 50 | 51 | 52 | def _tokenizer(txt): 53 | return filter(None, re.split(b'\x20', txt)) 54 | 55 | 56 | def get_clean_txt(txt): 57 | """Removes parts of a string, that may be considered as "noise" for language detection""" 58 | # Remove URLS 59 | txt = re.sub(r'[hw]((ttps?://(www\.)?)|ww\.)([^\s/?\.#-]+\.?)+(\/\S*)?', ' ', txt, flags=re.IGNORECASE) 60 | # Remove emails 61 | txt = re.sub(r'[a-zA-Z0-9.!$%&?+_`-]+@[A-Za-z0-9.-]+\.[A-Za-z0-9-]{2,64}', ' ', txt) 62 | # Remove .com domains 63 | txt = re.sub(r'([A-Za-z0-9-]+\.)+com(\/\S*|[^\pL])', ' ', txt) 64 | # Remove alphanumerical/number codes 65 | txt = re.sub(r'[a-zA-Z]*[0-9]+[a-zA-Z0-9]*', ' ', txt) 66 | return txt 67 | 68 | 69 | def _normalize_text(text): 70 | """Normalize special characters/word separators""" 71 | text = re.sub(r'[^\pL]+(? 350: 77 | # Cut to first whitespace after 350 byte length offset 78 | text = text[0:min(380, (text.find(b'\x20', 350) or 350))] 79 | return text 80 | 81 | 82 | def _calculate_scores(byte_ngrams, num_ngrams): 83 | """Calculate scores for each language from the given Ngrams""" 84 | lang_score = languageData.lang_score[:] 85 | for bytes_, frequency in byte_ngrams.items(): 86 | if bytes_ in languageData.ngrams: 87 | lang_count = len(languageData.ngrams[bytes_]) 88 | # Ngram score multiplier, the fewer languages found the more relevancy. Formula can be fine-tuned. 89 | if lang_count == 1: 90 | relevancy = 27 # Handpicked relevance multiplier, trial-error 91 | elif lang_count < 16: 92 | relevancy = (16 - lang_count) / 2 + 1 93 | else: 94 | relevancy = 1 95 | 96 | # Most time-consuming loop, do only the strictly necessary inside 97 | for lang, global_frequency in languageData.ngrams[bytes_].items(): 98 | lang_score[lang] += (global_frequency / frequency if frequency > global_frequency 99 | else frequency / global_frequency) * relevancy + 2 100 | # This divisor will produce a final score between 0 - ~1, score could be >1. Can be improved. 101 | result_divisor = num_ngrams * 3.2 102 | results = [] 103 | for lang in range(len(lang_score)): 104 | if lang_score[lang]: 105 | results.append([lang, lang_score[lang] / result_divisor]) # * languageData.scoreNormalizer[lang] 106 | return results 107 | 108 | 109 | def _get_byte_ngrams(txt): 110 | """Gets Ngrams from a given string""" 111 | byte_grams = {} 112 | count_ngrams = 0 113 | 114 | for word in _tokenizer(txt): 115 | length = len(word) 116 | 117 | if length > 70: 118 | length = 70 119 | x = 0 120 | for j in range(0, length - 4, 3): 121 | this_bytes = (b' ' if j == 0 else b'') + word[j:j + 4] 122 | byte_grams[this_bytes] = (1 + byte_grams[this_bytes] if this_bytes in byte_grams else 1) 123 | count_ngrams += 1 124 | x = 1 125 | 126 | this_bytes = (b' ' if x == 0 else b'') + word[length - 4 if length != 3 else 0:] + b' ' 127 | byte_grams[this_bytes] = (1 + byte_grams[this_bytes] if this_bytes in byte_grams else 1) 128 | count_ngrams += 1 129 | 130 | # Frequency is multiplied by 15000 at the ngrams database. A reduced number (13200) seems to work better. 131 | # Linear formulas were tried, decreasing the multiplier for fewer ngram strings, no meaningful improvement. 132 | for bytes_, count in byte_grams.items(): 133 | byte_grams[bytes_] = count / count_ngrams * 13200 134 | 135 | return byte_grams 136 | -------------------------------------------------------------------------------- /eld/languageResult.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | import json 7 | from eld.languageData import languageData 8 | 9 | 10 | class LanguageResult: 11 | def __init__(self, results=None, num_ngrams=None): 12 | self.language = (languageData.lang_codes[results[0][0]] if results else None) 13 | self.__results = results 14 | self.__num_ngrams = num_ngrams 15 | 16 | def __str__(self): 17 | return json.dumps({'': { 18 | 'language': self.language, 19 | 'scores()': self.scores(), 20 | 'is_reliable()': self.is_reliable() 21 | } 22 | }) 23 | 24 | def scores(self): 25 | return _get_scores(self.__results) 26 | 27 | def is_reliable(self): 28 | if not self.language or self.__num_ngrams < 3 or not self.__results: 29 | return False 30 | next_score = (self.__results[1][1] if len(self.__results) > 1 else 0) 31 | # A minimum of a 24% from the average score 32 | if languageData.avg_score[self.language] * 0.24 > (self.__results[0][1] / self.__num_ngrams) \ 33 | or 0.01 > abs(self.__results[0][1] - next_score): 34 | return False 35 | return True 36 | 37 | 38 | def _get_scores(results): 39 | scores = {} 40 | if results: 41 | for value in results: 42 | scores[languageData.lang_codes[value[0]]] = value[1] 43 | return scores 44 | -------------------------------------------------------------------------------- /eld/languageSubset.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | import hashlib 7 | import os 8 | import copy 9 | import importlib.util 10 | import logging 11 | 12 | from eld.languageData import languageData 13 | from eld.subsetResult import SubsetResult 14 | 15 | 16 | class LanguageSubset: 17 | 18 | def __init__(self): 19 | self.subset = None 20 | self.default_ngrams = None 21 | self.loaded_subset = None 22 | 23 | def dynamic_lang_subset(self, languages): 24 | """ 25 | Sets a subset, then detect() will filter the languages not included, from the scores with filterLangSubset() 26 | Call dynamic_lang_subset(None) to deactivate 27 | 28 | Args: 29 | languages (list or None): List of languages (ISO 639-1) to include in subset, or None to delete subset 30 | 31 | Returns: 32 | object SubsetResult: success (bool), languages (list or None), error (str or None) 33 | """ 34 | self.subset = None 35 | if languages: 36 | self.subset = _make_subset(languages) 37 | if self.subset is None: 38 | return SubsetResult(False, None, 'No language matched this set') 39 | return SubsetResult(True, _iso_languages(self.subset) if self.subset else None) 40 | 41 | def lang_subset(self, languages, save=True): 42 | """ 43 | Sets a subset and removes the excluded languages form the ngrams database 44 | if $save option is true, the new ngrams subset will be stored, and cached for next time 45 | 46 | Args: 47 | languages (list or None): List of languages (ISO 639-1) to include in subset, or None to delete subset 48 | 49 | Returns: 50 | object SubsetResult: success (bool), languages (list or None), error (str or None), file (str) 51 | """ 52 | if not languages: 53 | if self.loaded_subset and self.default_ngrams: 54 | languageData.ngrams = copy.deepcopy(self.default_ngrams) 55 | self.loaded_subset = None 56 | return SubsetResult(True) # if there was already no subset to disable, it also is successful 57 | 58 | lang_array = _make_subset(languages) 59 | if not lang_array: 60 | return SubsetResult(False, None, 'No language matched this set') 61 | 62 | if self.default_ngrams is None: 63 | self.default_ngrams = copy.deepcopy(languageData.ngrams) 64 | 65 | langs_str = [str(lang) for lang in lang_array] 66 | new_subset = base16_to_base36( 67 | hashlib.sha1(','.join(langs_str).encode()).hexdigest() 68 | ) 69 | file_name = 'ngrams' + languageData.type + '-' + str(len(lang_array)) + '_' + new_subset 70 | file_path = languageData.folder + 'subset/' + file_name + '.py' 71 | 72 | if self.loaded_subset != new_subset: 73 | self.loaded_subset = new_subset 74 | 75 | if os.path.exists(file_path): 76 | # module = importlib.import_module('.ngrams.' + file_name, package=file_name) 77 | spec = importlib.util.spec_from_file_location(file_name, file_path) 78 | module = importlib.util.module_from_spec(spec) 79 | spec.loader.exec_module(module) 80 | languageData.ngrams = module.ngrams_data['ngrams'] 81 | if languageData.ngrams: 82 | return SubsetResult(True, _iso_languages(lang_array), None, file_path) 83 | 84 | if self.default_ngrams != languageData.ngrams: 85 | languageData.ngrams = copy.deepcopy(self.default_ngrams) 86 | 87 | for ngram, langsID in self.default_ngrams.items(): 88 | for lid, value in langsID.items(): 89 | if lid not in lang_array: 90 | del languageData.ngrams[ngram][lid] 91 | if not languageData.ngrams[ngram]: 92 | del languageData.ngrams[ngram] 93 | 94 | saved = False 95 | if save: 96 | saved = _save_ngrams(file_path, lang_array) 97 | 98 | return SubsetResult(True, _iso_languages(lang_array), None, (file_name if saved else None)) 99 | 100 | def _filter_lang_subset(self, scores): 101 | """Filters languages not included in the subset, from the result scores""" 102 | sub_results = [] 103 | for score in scores: 104 | if score[0] in self.subset: 105 | sub_results.append(score) 106 | return sub_results 107 | 108 | 109 | def _ngram_export(data): 110 | if isinstance(data, dict): 111 | to_implode = [] 112 | for key, value in data.items(): 113 | to_implode.append(repr(key) + ':' + _ngram_export(value)) 114 | code = '{' + ','.join(to_implode) + '}' 115 | return code 116 | else: 117 | return repr(data) 118 | 119 | 120 | def _save_ngrams(file_path, lang_array): 121 | if not os.path.exists(file_path): # in case self.loaded_subset != new_subset, and was previously saved 122 | try: 123 | with open(file_path, 'w') as f: 124 | f.write( 125 | '# Copyright 2023 Nito T.M. [ Apache 2.0 Licence https://www.apache.org/licenses/LICENSE-2.0 ]\n' + 126 | 'ngrams_data = {\n' + 127 | ' "type": "' + str(languageData.type) + '",\n' + 128 | ' "languages": ' + str(_iso_languages(lang_array)) + ',\n' + 129 | ' "is_subset": True,\n' + 130 | ' "ngrams": ' + _ngram_export(languageData.ngrams) + '\n' + 131 | '}') 132 | except Exception as e: 133 | logging.exception(e) 134 | return False 135 | return True 136 | 137 | 138 | def _make_subset(languages): 139 | """ 140 | Validates an expected array of ISO 639-1 language code strings, given by the user, and creates a subset of the valid 141 | languages compared against the current database available languages 142 | """ 143 | subset = [] 144 | reverse_langs = {v: k for k, v in languageData.lang_codes.items()} 145 | if languages: 146 | for lang in languages: 147 | found_lang = reverse_langs.get(lang) 148 | if found_lang is not None: 149 | subset.append(found_lang) 150 | subset.sort() 151 | return subset or None 152 | 153 | 154 | def _iso_languages(lang_set): 155 | """Converts ngram database language indexes (integer) to ISO 639-1 code""" 156 | lang_codes = {} 157 | for lang_id in lang_set: 158 | lang_codes[lang_id] = languageData.lang_codes[lang_id] 159 | return lang_codes 160 | 161 | 162 | def base16_to_base36(hex_string): 163 | # Convert hex string to integer 164 | integer_value = int(hex_string, 16) 165 | # Convert integer to base-36 string 166 | base36_string = '' 167 | while integer_value > 0: 168 | integer_value, remainder = divmod(integer_value, 36) 169 | base36_digit = '0123456789abcdefghijklmnopqrstuvwxyz'[remainder] 170 | base36_string = base36_digit + base36_string 171 | 172 | return base36_string 173 | -------------------------------------------------------------------------------- /eld/resources/avg_score.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | # Average score of each language in a correct detection, done with an extended version of big-test benchmark. 7 | avg_score = {'am': 0.0661, 'ar': 0.0237, 'az': 0.0269, 'be': 0.0227, 'bg': 0.0234, 'bn': 0.1373, 'ca': 0.0246, 8 | 'cs': 0.0242, 'da': 0.0277, 'de': 0.0275, 'el': 0.0369, 'en': 0.0378, 'es': 0.0252, 'et': 0.0253, 9 | 'eu': 0.0369, 'fa': 0.0213, 'fi': 0.026, 'fr': 0.0253, 'gu': 0.1197, 'he': 0.0402, 'hi': 0.0578, 10 | 'hr': 0.0201, 'hu': 0.0208, 'hy': 0.0439, 'is': 0.032, 'it': 0.0251, 'ja': 0.0375, 'ka': 0.1383, 11 | 'kn': 0.1305, 'ko': 0.0222, 'ku': 0.0256, 'lo': 0.3488, 'lt': 0.0246, 'lv': 0.0264, 'ml': 0.1322, 12 | 'mr': 0.0571, 'ms': 0.0251, 'nl': 0.0342, 'no': 0.0266, 'or': 0.1269, 'pa': 0.1338, 'pl': 0.0275, 13 | 'pt': 0.0252, 'ro': 0.0247, 'ru': 0.0184, 'sk': 0.024, 'sl': 0.0253, 'sq': 0.0353, 'sr': 0.0234, 14 | 'sv': 0.033, 'ta': 0.1513, 'te': 0.1547, 'th': 0.0882, 'tl': 0.0368, 'tr': 0.0258, 'uk': 0.0206, 15 | 'ur': 0.0282, 'vi': 0.0467, 'yo': 0.0329, 'zh': 0.0152} 16 | 17 | # Deprecated for now: Some languages score higher with the same amount of text, this multiplier evens it out for 18 | # multi-language strings 19 | # self.scoreNormalizer = [0.7, 1, 1, 1, 1, 0.6, 0.98, 1, 1, 1, 0.9, 1, 1, 1, 1, 1, 1, 1, 0.6, 1, 0.7, 1, 1, 0.9, 1, 1, 20 | # 0.8, 0.6, 0.6, 1, 1, 0.5, 1, 1, 0.6, 0.7, 1, 0.95, 1, 0.6, 0.6, 1, 1, 1, 1, 1, 1, 0.9, 1, 1, 0.6, 0.6, 0.7, 0.9, 1, 1, 21 | # 1, 0.8, 1, 1.7] 22 | -------------------------------------------------------------------------------- /eld/resources/ngrams/subset/ngramsM60-1_2rrx014rx6ypsas6tplo1gtcnmiv5mz.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. [ Apache 2.0 Licence https://www.apache.org/licenses/LICENSE-2.0 ] 2 | ngrams_data = { 3 | "type": "M60", 4 | "languages": {11: 'en'}, 5 | "is_subset": True, 6 | "ngrams": {b' p ':{11:5},b' the ':{11:576},b' a ':{11:197},b' of ':{11:262},b'tion ':{11:129},b'mber ':{11:16},b' in ':{11:193},b' ethi':{11:1},b' de ':{11:2},b' and ':{11:245},b' b ':{11:2},b'uary ':{11:3},b' f ':{11:1},b' c ':{11:2},b' marc':{11:3},b'anda ':{11:1},b' apar':{11:1},b' mill':{11:5},b' part':{11:17},b'ista':{11:2},b' prob':{11:6},b'stan ':{11:1},b' pros':{11:1},b'blem':{11:2},b' poli':{11:14},b'dent ':{11:9},b' mana':{11:6},b' tele':{11:2},b'llar ':{11:1},b' demo':{11:4},b' resp':{11:7},b' isla':{11:2},b' parl':{11:2},b' dedi':{11:1},b' indi':{11:8},b'imiz':{11:1},b' form':{11:7},b' plan':{11:7},b' inte':{11:27},b'enti':{11:10},b' sist':{11:1},b' doll':{11:1},b' regi':{11:8},b' yard':{11:1},b' amer':{11:5},b' depu':{11:1},b' hall':{11:1},b' isra':{11:1},b' s ':{11:7},b' i ':{11:33},b'isin':{11:1},b' fran':{11:3},b' ener':{11:5},b'nada ':{11:1},b'ular ':{11:6},b'orma':{11:7},b' medi':{11:7},b'land':{11:1},b' stan':{11:6},b'issi':{11:1},b' univ':{11:5},b' info':{11:7},b'vers':{11:7},b' dipl':{11:1},b' radi':{11:3},b' norm':{11:2},b' terr':{11:3},b'ment ':{11:45},b' bank':{11:2},b' m ':{11:4},b'pert':{11:3},b' real':{11:7},b' prin':{11:4},b' e ':{11:4},b' musi':{11:4},b'list ':{11:1},b' stat':{11:18},b'llin':{11:1},b' sala':{11:1},b' stra':{11:5},b' film':{11:1},b'blem ':{11:2},b' bala':{11:1},b' dire':{11:7},b'erne':{11:2},b' vers':{11:2},b' agen':{11:3},b' pull':{11:1},b' metr':{11:1},b'ular':{11:2},b'sion ':{11:21},b' inst':{11:10},b'entl':{11:2},b'demi':{11:1},b' gene':{11:9},b'pani':{11:3},b'mina':{11:3},b' mart':{11:1},b' aid ':{11:1},b' euro':{11:15},b'rmal ':{11:2},b' obam':{11:1},b' ukra':{11:1},b' r ':{11:1},b'edia ':{11:3},b'onal ':{11:23},b' alan ':{11:1},b'stit':{11:2},b'mpio':{11:1},b' faci':{11:3},b'titu':{11:3},b'ensi':{11:4},b' anal':{11:3},b'ndin':{11:3},b' real ':{11:3},b' foru':{11:1},b'imal ':{11:1},b' vari':{11:5},b' sent':{11:1},b'ndar':{11:4},b'eria':{11:3},b' on ':{11:70},b' mate':{11:3},b' prot':{11:6},b' stru':{11:3},b' tale':{11:1},b' inve':{11:6},b'eral ':{11:11},b'tand':{11:1},b' admi':{11:3},b'alis':{11:1},b' mark':{11:6},b'rnet ':{11:2},b'isti':{11:3},b'erna':{11:8},b' tran':{11:9},b' livi':{11:2},b'inis':{11:3},b' brit':{11:3},b'stra':{11:3},b' prof':{11:5},b'aniz':{11:3},b' post':{11:2},b' mode':{11:6},b'itin':{11:1},b' roma':{11:2},b'eren':{11:2},b'oses ':{11:1},b'orit':{11:4},b'itor':{11:2},b'rror ':{11:1},b'esti':{11:4},b' para':{11:2},b' h ':{11:1},b'bama ':{11:1},b' pote':{11:2},b' pers':{11:7},b' miss':{11:3},b' arti':{11:5},b'erin':{11:4},b' ad ':{11:1},b'ider ':{11:2},b'iona':{11:13},b' refe':{11:3},b' moni':{11:1},b' park':{11:1},b' ital':{11:2},b'andi':{11:1},b' bank ':{11:1},b'fess':{11:3},b'eral':{11:3},b' may ':{11:10},b' sess':{11:1},b' fede':{11:2},b' mini':{11:4},b' fest':{11:1},b'tiva':{11:1},b' anti':{11:1},b'ssor ':{11:1},b'tora':{11:1},b'pert ':{11:1},b'rban ':{11:1},b'muni':{11:8},b' vide':{11:2},b'rman ':{11:3},b'ilab':{11:4},b'inal ':{11:5},b' effe':{11:5},b'cula':{11:2},b' oper':{11:6},b' temp':{11:3},b' rest':{11:3},b'tist':{11:1},b' depa':{11:2},b' lond':{11:1},b' port':{11:3},b' dial':{11:1},b' mind':{11:1},b' assa':{11:1},b' t ':{11:1},b' hard':{11:1},b' huma':{11:4},b'edit ':{11:1},b'obal ':{11:2},b'logi':{11:3},b' men ':{11:2},b' seri':{11:4},b'rdin':{11:2},b'sion':{11:6},b' inci':{11:1},b' resu':{11:6},b' pres':{11:16},b'ress ':{11:7},b' can ':{11:22},b'tual ':{11:3},b' sena':{11:1},b' by ':{11:50},b'loud ':{11:1},b' soun':{11:2},b' per ':{11:3},b' no ':{11:13},b'ions ':{11:47},b'uest ':{11:1},b' esta':{11:3},b' cons':{11:19},b' comp':{11:28},b'ents ':{11:27},b' desp':{11:2},b' cont':{11:25},b' entr':{11:2},b' pass':{11:4},b' cata':{11:1},b' prim':{11:3},b' d ':{11:2},b' serv':{11:12},b' reco':{11:9},b' l ':{11:1},b' cent':{11:10},b'ants ':{11:6},b' prop':{11:8},b'ones ':{11:2},b' come':{11:2},b'rant ':{11:2},b'enta':{11:4},b'tant ':{11:6},b'side':{11:9},b' actu':{11:2},b' fina':{11:7},b'cies ':{11:4},b' soci':{11:7},b' gove':{11:8},b' expl':{11:4},b'lica':{11:6},b' conv':{11:6},b' sens':{11:3},b' havi':{11:2},b' conc':{11:8},b' espe':{11:2},b'cial ':{11:12},b' carr':{11:3},b' arri':{11:1},b' part ':{11:6},b'ense ':{11:4},b' asse':{11:3},b' dona':{11:1},b'ject':{11:4},b' dive':{11:1},b' expe':{11:12},b' qual':{11:3},b' fall':{11:1},b'ilit':{11:6},b' era ':{11:1},b'sent':{11:3},b' dema':{11:2},b' cost':{11:2},b'aris ':{11:1},b' impo':{11:7},b'vant ':{11:1},b'ctor ':{11:6},b' band':{11:1},b'iste':{11:4},b' conf':{11:7},b' muni':{11:1},b' util':{11:1},b'icip':{11:1},b'alit':{11:2},b'eria ':{11:1},b'ment':{11:9},b' mome':{11:1},b' dest':{11:2},b' situ':{11:2},b' proj':{11:9},b' rema':{11:4},b'ests ':{11:3},b' deci':{11:5},b' desc':{11:3},b' prov':{11:12},b' proc':{11:8},b'rior ':{11:2},b'able ':{11:19},b'tici':{11:5},b' dese':{11:1},b' assi':{11:3},b' web ':{11:1},b'ials ':{11:3},b'ries ':{11:14},b'aria ':{11:1},b' mili':{11:2},b' camp ':{11:1},b' prev':{11:4},b'port ':{11:11},b' prod':{11:10},b'ible ':{11:8},b'bles ':{11:2},b'istr':{11:2},b' nece':{11:2},b'nals ':{11:2},b' rela':{11:7},b'essi':{11:2},b' loca':{11:8},b' any ':{11:8},b' perm':{11:2},b' defe':{11:3},b' nove':{11:3},b' trac':{11:2},b'cers ':{11:1},b' disp':{11:3},b'uals ':{11:1},b'ures ':{11:7},b' cond':{11:5},b'orta':{11:6},b'rese':{11:4},b' majo':{11:4},b'sona':{11:3},b' poss':{11:5},b' cook':{11:1},b'eves ':{11:1},b' posi':{11:4},b'tors ':{11:8},b'anes ':{11:1},b' camp':{11:3},b' corr':{11:3},b'anta ':{11:1},b'otes ':{11:1},b' minu':{11:2},b' supe':{11:3},b'ells ':{11:2},b' ment':{11:2},b'oves ':{11:1},b' elec':{11:7},b' have':{11:1},b' trav':{11:2},b' seve':{11:7},b' visi':{11:5},b'ncip':{11:2},b' prog':{11:9},b' esti':{11:1},b'teri':{11:3},b' acti':{11:9},b' barr':{11:1},b' prem':{11:1},b'erio':{11:1},b' hist':{11:5},b' repr':{11:4},b' auto':{11:2},b' equi':{11:2},b'tinu':{11:6},b'pect':{11:2},b'ient ':{11:4},b'unts ':{11:1},b' peri':{11:3},b'ines ':{11:5},b' sant':{11:1},b' most':{11:1},b' inde':{11:3},b' cast':{11:1},b' pilo':{11:1},b'este':{11:1},b'ipal ':{11:1},b'ital ':{11:4},b' func':{11:2},b' crea':{11:7},b'nter':{11:1},b'arts ':{11:2},b' term':{11:3},b' tota':{11:3},b' diss':{11:1},b' cele':{11:2},b'anit':{11:1},b'cals ':{11:1},b'ecia':{11:2},b'cord ':{11:2},b' cult':{11:4},b'stru':{11:2},b' gran':{11:3},b'itat':{11:1},b'uest':{11:1},b'gram':{11:4},b'pora':{11:2},b'tics ':{11:4},b' comi':{11:2},b' pare':{11:2},b' subs':{11:3},b' plat':{11:2},b' decl':{11:2},b' mari':{11:2},b' dete':{11:3},b'duct':{11:6},b' labo':{11:2},b'rmat ':{11:1},b'ives ':{11:7},b' habi':{11:1},b'umen':{11:3},b'stic ':{11:4},b'epen':{11:2},b' econ':{11:6},b'trac':{11:2},b'sibl':{11:5},b' merc':{11:1},b'emes ':{11:1},b'ecta':{11:1},b' orga':{11:7},b'lars ':{11:1},b' disc':{11:7},b' caus':{11:3},b'rals ':{11:1},b'oral ':{11:1},b'nent ':{11:2},b'ebra':{11:1},b'ress':{11:4},b'mple ':{11:5},b'otal ':{11:2},b' prec':{11:2},b'blic ':{11:7},b' publ':{11:11},b' mani':{11:1},b'lant ':{11:1},b'ecto':{11:2},b' dime':{11:1},b'pons':{11:4},b'ides ':{11:4},b'imes ':{11:5},b' trip':{11:1},b'ivit':{11:4},b'rial ':{11:5},b'cles ':{11:2},b'orda':{11:1},b'iple ':{11:2},b'etes ':{11:1},b'ativ':{11:7},b' popu':{11:4},b'ural ':{11:5},b'posi':{11:1},b'sist':{11:2},b' quar':{11:2},b' mont':{11:5},b'vert':{11:1},b'icia':{11:4},b'rent ':{11:10},b' sect':{11:4},b' he ':{11:36},b' acce':{11:7},b' lega':{11:2},b' vehi':{11:2},b' incl':{11:10},b'ocia':{11:3},b'bert ':{11:1},b' volu':{11:2},b' prom':{11:4},b'stal ':{11:1},b' cris':{11:1},b'rans ':{11:1},b'ball ':{11:2},b'anti':{11:1},b'ecti':{11:8},b'ller ':{11:2},b' dist':{11:6},b'icle ':{11:3},b'ules ':{11:2},b'enda ':{11:1},b' futu':{11:4},b'tura':{11:1},b' pp ':{11:1},b'ific':{11:1},b'unit':{11:3},b'rers ':{11:1},b' acci':{11:1},b'rses ':{11:1},b' quan':{11:1},b' cand':{11:2},b'ners ':{11:4},b' nego':{11:1},b'para':{11:1},b' obje':{11:3},b'nant ':{11:1},b'ares ':{11:1},b' club ':{11:1},b' jose':{11:1},b' inco':{11:2},b'alls ':{11:2},b' unit':{11:5},b' moti':{11:1},b'lans ':{11:1},b'ntal ':{11:5},b'rito':{11:1},b' sign':{11:5},b'erso':{11:1},b'enge ':{11:1},b' rece':{11:8},b'ocal ':{11:5},b' just':{11:1},b'rica ':{11:2},b' obli':{11:1},b' memb':{11:8},b'ical':{11:3},b'dida':{11:1},b'icle':{11:1},b' circ':{11:2},b' deli':{11:3},b' defi':{11:4},b' tren':{11:1},b' patr':{11:1},b' cong':{11:2},b'atur':{11:2},b'cant ':{11:2},b'ntit':{11:1},b'stem ':{11:7},b'posa':{11:2},b'reta':{11:1},b' refo':{11:1},b'orti':{11:1},b'iali':{11:1},b'tals ':{11:1},b'fica':{11:1},b'erac':{11:1},b' febr':{11:1},b'pera':{11:3},b'orts ':{11:5},b'sibi':{11:2},b' capi':{11:2},b' deba':{11:1},b'ival ':{11:2},b' capa':{11:2},b'erse':{11:1},b' cart':{11:1},b' mass':{11:1},b' resi':{11:4},b'nden':{11:2},b' redu':{11:3},b' favo':{11:2},b'eres':{11:5},b'enes ':{11:1},b' trad':{11:5},b' refu':{11:2},b'gues ':{11:1},b' colo':{11:3},b'omes ':{11:4},b'ersi':{11:1},b' civi':{11:2},b' docu':{11:2},b' trib':{11:1},b'iden':{11:4},b' bene':{11:3},b' rive':{11:2},b' targ':{11:2},b' enti':{11:3},b'mics ':{11:1},b' ente':{11:3},b'cent':{11:1},b' fill':{11:1},b' asso':{11:3},b' extr':{11:3},b' impl':{11:3},b'iver ':{11:3},b'ctiv':{11:2},b'tral ':{11:2},b' expo':{11:2},b'sult':{11:1},b' clar':{11:1},b' arre':{11:1},b' desi':{11:6},b' esca':{11:1},b'lers ':{11:1},b' reso':{11:4},b'oles ':{11:1},b'mane':{11:1},b' educ':{11:4},b' natu':{11:4},b'ilia':{11:1},b'ites ':{11:2},b' secr':{11:2},b' fund':{11:4},b' prep':{11:2},b'nche':{11:1},b'lies ':{11:3},b' figu':{11:2},b' pape':{11:2},b'erve':{11:2},b' bomb':{11:1},b'ting':{11:2},b' fron':{11:2},b' cert':{11:3},b'erts ':{11:2},b'sses ':{11:5},b' home':{11:2},b' via ':{11:1},b'serv':{11:2},b' vall':{11:1},b' clas':{11:5},b'tent ':{11:4},b' perf':{11:6},b'erti':{11:2},b'ajor ':{11:3},b' cate':{11:1},b'olar ':{11:1},b' solu':{11:2},b'acit':{11:1},b'port':{11:4},b'ermi':{11:2},b'gent ':{11:1},b'cess':{11:4},b' exis':{11:3},b'iana ':{11:1},b'dent':{11:4},b'emen':{11:7},b'tori':{11:3},b' crit':{11:3},b'ania ':{11:1},b'firm':{11:1},b' pati':{11:2},b' vict':{11:2},b' repe':{11:1},b' soli':{11:1},b'udes ':{11:2},b'cret':{11:1},b'nifi':{11:2},b'peti':{11:2},b' legi':{11:2},b' priv':{11:3},b' set ':{11:4},b' ambi':{11:1},b' mobi':{11:2},b'stan':{11:3},b' germ':{11:3},b' urba':{11:1},b' comb':{11:3},b'mers ':{11:3},b' anim':{11:2},b' reti':{11:1},b'ctor':{11:1},b'aper ':{11:2},b'itar':{11:2},b' andr':{11:1},b' alte':{11:2},b' fami':{11:6},b'trib':{11:4},b'ivil ':{11:1},b'ames ':{11:4},b' repa':{11:1},b' domi':{11:1},b'tifi':{11:1},b' amon':{11:3},b'ecte':{11:4},b' incr':{11:6},b'cret ':{11:1},b' davi':{11:1},b'rles ':{11:1},b' refl':{11:1},b' sele':{11:2},b' expr':{11:2},b' fort':{11:1},b' abso':{11:1},b'init':{11:2},b'odel ':{11:2},b'taur':{11:1},b'egra':{11:2},b' exce':{11:3},b' viol':{11:2},b' me ':{11:5},b' deta':{11:2},b'ncie':{11:1},b'fici':{11:1},b' guar':{11:2},b'ases ':{11:4},b'cial':{11:2},b'nces ':{11:7},b'cans ':{11:1},b'alia ':{11:1},b' nega':{11:1},b' mort':{11:1},b' impr':{11:5},b'eran':{11:1},b'rica':{11:4},b'ilar ':{11:2},b'reme':{11:1},b' revi':{11:3},b'ient':{11:1},b' cana':{11:2},b'mans ':{11:1},b'ance':{11:2},b' assu':{11:1},b' idea ':{11:2},b'plet':{11:4},b'visi':{11:1},b'nics ':{11:1},b' home ':{11:5},b'onal':{11:2},b'tari':{11:1},b'lent ':{11:2},b' trum':{11:1},b' coll':{11:7},b'rump ':{11:1},b' mult':{11:3},b'rida ':{11:1},b'iant ':{11:1},b' retr':{11:1},b' envi':{11:4},b'resp':{11:1},b' requ':{11:7},b'trol ':{11:3},b'tenc':{11:1},b' sent ':{11:1},b'nits ':{11:1},b' insi':{11:2},b' pret':{11:1},b' iden':{11:3},b'gina':{11:3},b'ires ':{11:2},b'ster ':{11:7},b' fact':{11:3},b'essa':{11:2},b'ator':{11:1},b' muse':{11:1},b'avid ':{11:1},b' regu':{11:3},b'ogra':{11:1},b' reta':{11:1},b' movi':{11:2},b'sent ':{11:3},b' evid':{11:2},b'egor':{11:1},b'egal ':{11:2},b'erat':{11:5},b'abli':{11:3},b'evis':{11:1},b'elle':{11:2},b' usua':{11:2},b' marg':{11:1},b'cati':{11:4},b'aces ':{11:2},b' perc':{11:3},b'icul':{11:1},b' albe':{11:1},b'trol':{11:1},b'ical ':{11:21},b'ills ':{11:2},b' clie':{11:1},b' imme':{11:2},b' us ':{11:7},b' simp':{11:3},b' forc':{11:4},b'ncer ':{11:1},b'ales ':{11:2},b'ente':{11:2},b'cent ':{11:5},b'scri':{11:1},b' impe':{11:1},b' reve':{11:2},b' exec':{11:2},b' repu':{11:2},b'ntic ':{11:1},b'nies ':{11:3},b'nspo':{11:2},b'pita':{11:2},b'bers ':{11:6},b' exte':{11:4},b' conn':{11:4},b' exer':{11:1},b'abil':{11:2},b' coor':{11:1},b' base ':{11:1},b' ille':{11:1},b'sors ':{11:1},b' prio':{11:2},b'oper ':{11:1},b'test':{11:1},b' reno':{11:1},b'ican':{11:3},b' test':{11:2},b'orpo':{11:1},b' adve':{11:1},b'tion':{11:13},b' pref':{11:1},b'form':{11:4},b'gres':{11:2},b'olut':{11:2},b' elem':{11:2},b' arra':{11:1},b'pone':{11:1},b'itiv':{11:2},b' tria':{11:1},b'icie':{11:3},b' just ':{11:9},b' divi':{11:2},b' plac':{11:7},b'oria':{11:1},b'osen ':{11:1},b'mati':{11:2},b'temp':{11:1},b' insu':{11:1},b'ipan':{11:1},b' orig':{11:3},b'rant':{11:1},b'spec':{11:1},b' truc':{11:1},b' opin':{11:1},b' frui':{11:1},b'ture':{11:4},b' sati':{11:1},b' infl':{11:2},b' paci':{11:1},b'ties ':{11:17},b' aspe':{11:1},b' flor':{11:1},b'mals ':{11:1},b' grav':{11:1},b'ages ':{11:5},b'erpr':{11:2},b'ders ':{11:6},b'ates ':{11:13},b'ntif':{11:2},b'sump':{11:1},b' mand':{11:1},b' text ':{11:1},b'llen ':{11:1},b' dele':{11:1},b'gers ':{11:2},b'tene':{11:1},b' succ':{11:4},b' argu':{11:2},b'ubli':{11:2},b'tivi':{11:1},b' limi':{11:3},b'tica':{11:3},b'cept':{11:1},b'orme':{11:1},b'pers ':{11:2},b' agre':{11:3},b' lite':{11:1},b'ians ':{11:4},b'list':{11:1},b'tant':{11:1},b' ball':{11:1},b'plem':{11:1},b'ntil ':{11:4},b' depe':{11:2},b'lifi':{11:1},b' amen':{11:1},b' reac':{11:4},b'ticu':{11:3},b'tive':{11:1},b' angl':{11:1},b'ears ':{11:11},b'isla':{11:2},b'olor ':{11:1},b' moto':{11:1},b'ords ':{11:3},b' reca':{11:1},b'ital':{11:1},b' glob':{11:3},b' sovi':{11:1},b' cost ':{11:2},b'aria':{11:1},b'tral':{11:1},b' tend':{11:1},b'flic':{11:1},b'truc':{11:3},b' manu':{11:2},b' pate':{11:1},b'icat':{11:7},b' mora':{11:1},b'tter ':{11:8},b'anis':{11:3},b'ovel ':{11:1},b' hosp':{11:2},b'cuti':{11:1},b' edit':{11:2},b'edia':{11:2},b' susp':{11:1},b'lici':{11:1},b' data ':{11:5},b' vill':{11:1},b' modi':{11:1},b' past':{11:1},b'erns ':{11:1},b'leme':{11:3},b' obse':{11:2},b'pond':{11:2},b' infe':{11:1},b' reci':{11:1},b'ters ':{11:10},b'rodu':{11:2},b'anta':{11:1},b'cter ':{11:1},b'ting ':{11:42},b' digi':{11:2},b' inno':{11:3},b' revo':{11:1},b' rega':{11:2},b'rces ':{11:4},b' addi':{11:4},b' rese':{11:8},b'alle':{11:1},b'sics ':{11:1},b'igen':{11:1},b' infr':{11:1},b'ront ':{11:2},b'ustr':{11:4},b' sepa':{11:2},b'uctu':{11:2},b' port ':{11:1},b'rori':{11:1},b'igna':{11:1},b'onia ':{11:1},b'ssif':{11:1},b'\xe2\x80\x99s ':{11:31},b' elev':{11:1},b' impa':{11:2},b'utes ':{11:2},b'nect':{11:4},b' indu':{11:4},b'bili':{11:2},b' nota':{11:1},b'ssio':{11:1},b' simi':{11:2},b' decr':{11:1},b'pare':{11:2},b'entu':{11:1},b' alco':{11:1},b'veni':{11:1},b'gies ':{11:2},b'nsfo':{11:1},b' depo':{11:1},b' case':{11:2},b'erva':{11:1},b' has ':{11:28},b' cent ':{11:1},b'dame':{11:1},b'olog':{11:2},b' capt':{11:1},b' dani':{11:1},b'ivid':{11:3},b'ervi':{11:2},b' excl':{11:1},b' intr':{11:3},b' mile':{11:1},b'crip':{11:1},b' nucl':{11:1},b'uits ':{11:1},b'nari':{11:1},b'pens':{11:1},b'nten':{11:1},b' esse':{11:1},b' orde':{11:5},b' vent':{11:1},b' coop':{11:2},b' subm':{11:2},b'eces ':{11:1},b' insp':{11:2},b'otos ':{11:1},b'rees ':{11:1},b'lusi':{11:1},b'rovi':{11:1},b'nder ':{11:9},b'isio':{11:4},b' face':{11:2},b'tric':{11:3},b'tine':{11:1},b'nter ':{11:5},b' erro':{11:1},b'odes ':{11:1},b' comm':{11:25},b'arge ':{11:5},b' sexu':{11:1},b' adju':{11:1},b'ntat':{11:1},b'mula':{11:1},b'vent ':{11:4},b'ssia ':{11:1},b'ence':{11:3},b'rter ':{11:2},b'tabl':{11:1},b' magn':{11:1},b' adap':{11:1},b' sola':{11:1},b' calc':{11:1},b' base':{11:6},b' unit ':{11:1},b'inan':{11:1},b'uses ':{11:3},b' imag':{11:3},b' elim':{11:1},b' immi':{11:1},b'nomi':{11:4},b'nual ':{11:2},b'ctic':{11:4},b'lect':{11:4},b'ende':{11:4},b' hote':{11:1},b' arme':{11:1},b' barb':{11:1},b'orat':{11:3},b'enda':{11:2},b'rint ':{11:1},b' audi':{11:2},b' pale':{11:1},b'ples ':{11:2},b'ique ':{11:2},b' alex':{11:1},b'tent':{11:1},b'rati':{11:6},b' clim':{11:2},b'babl':{11:1},b'mits ':{11:1},b'gles ':{11:1},b'vers ':{11:2},b'icit':{11:1},b' acto':{11:1},b' reli':{11:3},b'secu':{11:1},b'ande':{11:1},b' robe':{11:1},b' prac':{11:4},b' ende':{11:1},b'tric ':{11:2},b'nfor':{11:1},b' rein':{11:1},b' advo':{11:1},b' cali':{11:1},b'rect':{11:1},b' deri':{11:1},b' mess':{11:2},b'book ':{11:1},b'inat':{11:2},b'eboo':{11:1},b'trat':{11:2},b' gall':{11:1},b'fere':{11:11},b'sten':{11:1},b' pred':{11:2},b'omin':{11:2},b'erim':{11:1},b'stin':{11:3},b' vita':{11:1},b' sing':{11:4},b' spor':{11:2},b'etic ':{11:2},b' orie':{11:1},b' tv ':{11:1},b'tall':{11:1},b'itor ':{11:1},b' corp':{11:2},b'clus':{11:1},b' enga':{11:1},b' card':{11:1},b' diag':{11:1},b' mone':{11:3},b'ipat':{11:2},b' doct':{11:1},b'pany ':{11:3},b' etc ':{11:1},b' ball ':{11:1},b' raci':{11:1},b'bina':{11:1},b' to ':{11:232},b' do ':{11:11},b' pro ':{11:1},b' k ':{11:1},b' co ':{11:2},b' u ':{11:2},b'oval ':{11:1},b' let ':{11:1},b' star':{11:8},b' stud':{11:11},b'itic':{11:5},b' ten ':{11:1},b'atel':{11:1},b' tech':{11:7},b'lice ':{11:3},b' sout':{11:5},b'ence ':{11:22},b'race ':{11:1},b' syst':{11:10},b'erst':{11:3},b' slav':{11:1},b'ance ':{11:19},b' stro':{11:3},b' spec':{11:9},b' eu ':{11:6},b' scho':{11:8},b'eman ':{11:1},b' list':{11:2},b' arch':{11:2},b'hoto ':{11:1},b'esto':{11:1},b'rove':{11:2},b'tice ':{11:3},b' brus':{11:1},b'onst':{11:2},b'tury ':{11:2},b'enty ':{11:1},b' mich':{11:1},b' host':{11:1},b'ince ':{11:6},b' my ':{11:9},b' char':{11:8},b'hnol':{11:4},b'ines':{11:6},b'vice ':{11:5},b'vide':{11:5},b' mist':{11:1},b'avel ':{11:1},b'ovat':{11:3},b'tory ':{11:9},b'hnic':{11:2},b' fire':{11:1},b'mise ':{11:1},b'hodo':{11:1},b'lity ':{11:14},b' obvi':{11:1},b'gram ':{11:3},b'lace ':{11:6},b'hite':{11:1},b' bran':{11:2},b' film ':{11:2},b'tner':{11:2},b'lati':{11:2},b' styl':{11:2},b' libe':{11:1},b' turn':{11:2},b' magi':{11:1},b'atic':{11:1},b'onic':{11:1},b' migr':{11:1},b' here':{11:1},b'rtin ':{11:1},b' psyc':{11:1},b'ivat':{11:1},b' text':{11:1},b' dram':{11:1},b'liza':{11:1},b' maxi':{11:1},b' logi':{11:1},b' spok':{11:1},b' stop':{11:1},b'aliz':{11:1},b'taly ':{11:1},b'omat':{11:1},b' top ':{11:3},b'rize ':{11:1},b' most ':{11:10},b'ateg':{11:3},b' seni':{11:1},b' symb':{11:1},b'ideo ':{11:2},b' atmo':{11:1},b' pane':{11:1},b' j ':{11:1},b'arty ':{11:3},b'vati':{11:1},b' link':{11:1},b'rice ':{11:2},b' repo':{11:7},b'line ':{11:5},b' prou':{11:1},b' spar':{11:1},b'ness ':{11:10},b' opti':{11:3},b' berl':{11:1},b'vity ':{11:2},b' farm':{11:1},b' stre':{11:5},b' chem':{11:1},b'oman ':{11:2},b'nese ':{11:2},b'rest ':{11:3},b'anic ':{11:1},b' semi':{11:1},b' afri':{11:2},b' nomi':{11:1},b' aust':{11:2},b'rity ':{11:8},b' onli':{11:2},b' new ':{11:16},b' line':{11:2},b'body ':{11:1},b' body ':{11:2},b' emai':{11:1},b'keti':{11:1},b' stab':{11:1},b'cept ':{11:3},b' busi':{11:6},b' fant':{11:1},b'dium ':{11:1},b'inar':{11:1},b' you ':{11:30},b'ered ':{11:11},b' plas':{11:1},b'mail ':{11:1},b' line ':{11:3},b' an ':{11:34},b' at ':{11:44},b' for ':{11:94},b' man ':{11:3},b'ning ':{11:23},b'fter ':{11:11},b'ange ':{11:8},b'nger ':{11:4},b'mmer ':{11:2},b' over ':{11:10},b'ring ':{11:26},b' over':{11:5},b'ille ':{11:2},b' unde':{11:11},b' her ':{11:11},b' stor':{11:7},b' bill':{11:2},b' have ':{11:34},b'ling ':{11:13},b'lder ':{11:2},b'ager ':{11:1},b' land':{11:2},b' find':{11:1},b' end ':{11:4},b' hold':{11:1},b'land ':{11:7},b'atio':{11:50},b' fore':{11:4},b'mark ':{11:1},b' side':{11:1},b'tore ':{11:1},b'amme ':{11:2},b' lang':{11:3},b' alle':{11:1},b'eter ':{11:1},b'gger ':{11:1},b' give':{11:4},b' hand':{11:3},b' dog ':{11:1},b'hold ':{11:1},b' god ':{11:1},b' chri':{11:3},b'rsda':{11:1},b'ogen ':{11:1},b'tere':{11:1},b'tand ':{11:2},b'mand ':{11:2},b'ften ':{11:4},b' stil':{11:5},b'tten ':{11:2},b' leve':{11:5},b' bedr':{11:1},b'ater ':{11:10},b'rker ':{11:1},b'ling':{11:1},b'ilie':{11:1},b'tive ':{11:21},b'pper ':{11:1},b' offe':{11:6},b'ader ':{11:2},b'deri':{11:1},b' afte':{11:12},b' pris':{11:1},b'ever ':{11:12},b' fire ':{11:2},b'over ':{11:3},b' pete':{11:1},b' give ':{11:3},b' side ':{11:2},b'tern':{11:1},b'itie':{11:3},b'dere':{11:1},b' sand':{11:1},b'ppen ':{11:1},b'ster':{11:1},b'vent':{11:2},b' hill':{11:1},b'lion':{11:1},b'rier ':{11:1},b' sund':{11:1},b' alli':{11:1},b'tage ':{11:4},b'embe':{11:5},b'berg ':{11:1},b'nner ':{11:2},b'ands ':{11:5},b' live':{11:3},b'vert ':{11:2},b' x ':{11:2},b'rmed ':{11:4},b'sted ':{11:8},b' time':{11:3},b' fast':{11:1},b'rket ':{11:3},b'ding ':{11:31},b' beha':{11:2},b'rger ':{11:1},b'dlin':{11:1},b'tian ':{11:1},b'ange':{11:1},b' dr ':{11:1},b' henr':{11:1},b' thom':{11:1},b' fast ':{11:1},b'arks ':{11:1},b'evel ':{11:4},b' far ':{11:2},b'uati':{11:2},b'deli':{11:1},b' nati':{11:9},b'uper ':{11:1},b' hold ':{11:1},b' anno':{11:2},b'ense':{11:1},b'erie':{11:4},b'olde':{11:1},b' janu':{11:2},b'iner ':{11:1},b'renc':{11:1},b'hael ':{11:1},b'rive ':{11:2},b'elin':{11:1},b' hund':{11:1},b'omas ':{11:1},b' roll':{11:1},b' bord':{11:1},b' form ':{11:3},b' g ':{11:2},b'mand':{11:1},b'ffer ':{11:2},b' traf':{11:1},b'tter':{11:1},b'sati':{11:3},b' skil':{11:1},b' befo':{11:6},b' job ':{11:2},b' week':{11:3},b' syri':{11:1},b'ming ':{11:7},b' scen':{11:2},b'diti':{11:5},b'raft ':{11:2},b'rmer ':{11:2},b' dece':{11:2},b'vide ':{11:3},b' land ':{11:2},b' budg':{11:2},b'temb':{11:2},b'rder ':{11:5},b' apri':{11:2},b'dred':{11:1},b' sept':{11:2},b'rand ':{11:1},b'nnin':{11:2},b'ings ':{11:12},b' stri':{11:3},b'pril ':{11:2},b'ding':{11:1},b'utio':{11:5},b' poin':{11:6},b'ober ':{11:2},b' jour':{11:2},b' chan':{11:10},b' augu':{11:2},b'rnal':{11:1},b'gust ':{11:1},b' even':{11:7},b'duce':{11:2},b'rson ':{11:3},b'mmin':{11:1},b' love':{11:1},b'vate ':{11:2},b' spon':{11:1},b'gdom ':{11:1},b' midd':{11:1},b' invi':{11:1},b'itio':{11:10},b' lett':{11:2},b'pute':{11:2},b'esse':{11:1},b'rked ':{11:3},b' john ':{11:2},b'rine ':{11:2},b'ivel':{11:1},b' scor':{11:2},b'tner ':{11:1},b' smar':{11:1},b'nsor':{11:1},b'side ':{11:4},b'reds ':{11:1},b'rner ':{11:1},b'rgen':{11:1},b'lled ':{11:9},b'lier ':{11:1},b' find ':{11:3},b'hone ':{11:2},b'ndle ':{11:1},b'liti':{11:2},b' loui':{11:1},b'over':{11:1},b' driv':{11:4},b'gion ':{11:2},b'form ':{11:3},b' mete':{11:1},b'oint ':{11:5},b' brin':{11:3},b'tart ':{11:3},b'omme':{11:1},b' russ':{11:2},b'rian ':{11:3},b' plan ':{11:2},b' rand':{11:1},b'igne':{11:2},b'mmes ':{11:1},b' spri':{11:1},b' init':{11:3},b'rver ':{11:1},b'tiat':{11:2},b'sive ':{11:6},b'teen ':{11:1},b'nges ':{11:4},b'rage ':{11:4},b'nald ':{11:1},b' offi':{11:8},b' fine ':{11:1},b'orte':{11:3},b'aste ':{11:1},b' mail ':{11:1},b'shed ':{11:7},b'teme':{11:1},b' holl':{11:1},b'ator ':{11:3},b' stee':{11:1},b' trus':{11:1},b'ight ':{11:20},b' jul ':{11:1},b'gest':{11:2},b'iver':{11:2},b' span':{11:1},b'epte':{11:1},b' litt':{11:3},b'ansi':{11:1},b' sex ':{11:1},b' adva':{11:3},b'rise ':{11:1},b'kend ':{11:1},b' begi':{11:3},b'uter ':{11:2},b'rnin':{11:1},b'igio':{11:2},b' will':{11:2},b'orge ':{11:1},b'tati':{11:3},b'etal ':{11:1},b' grad':{11:3},b'sign ':{11:2},b' time ':{11:14},b'nton ':{11:1},b'rete ':{11:1},b'vice':{11:4},b'miss':{11:5},b' engl':{11:3},b'tine ':{11:1},b'erse ':{11:1},b'mine':{11:1},b' sold':{11:1},b' affa':{11:1},b' is ':{11:109},b' red ':{11:1},b'test ':{11:2},b' meta':{11:1},b' supp':{11:11},b'view ':{11:3},b' math':{11:1},b'icer':{11:1},b'rist ':{11:1},b'ianc':{11:1},b'tfor':{11:1},b'gets ':{11:1},b' roya':{11:1},b' glas':{11:1},b'aves ':{11:1},b'orie':{11:1},b' pari':{11:1},b'otel ':{11:1},b' forg':{11:1},b'cher ':{11:2},b' it ':{11:59},b' rock':{11:1},b' led ':{11:2},b'oser ':{11:1},b'sing':{11:1},b'kers ':{11:3},b'pare ':{11:1},b'ping ':{11:5},b'lter ':{11:1},b' clin':{11:1},b'olin':{11:1},b'rter':{11:1},b'evan':{11:1},b'pire':{11:1},b' appe':{11:4},b'ther ':{11:38},b'dget ':{11:1},b'mart ':{11:1},b'sing ':{11:15},b'ards ':{11:7},b' smit':{11:1},b' spre':{11:1},b'lage ':{11:1},b'adio ':{11:1},b'lion ':{11:5},b' blac':{11:2},b' typi':{11:1},b' batt':{11:2},b'pete':{11:1},b'erre':{11:1},b' illu':{11:1},b'atus ':{11:1},b' geor':{11:1},b'male ':{11:1},b' post ':{11:2},b'sked ':{11:2},b' spir':{11:1},b' attr':{11:2},b'ndon ':{11:2},b' step':{11:1},b' am ':{11:3},b' so ':{11:13},b' war ':{11:3},b' ange':{11:1},b'urch ':{11:2},b' was ':{11:66},b' mach':{11:2},b'dern ':{11:2},b'iter ':{11:1},b'burg ':{11:1},b' euro ':{11:1},b'tern ':{11:3},b' rich':{11:1},b'ches ':{11:4},b' will ':{11:31},b' date':{11:1},b' welc':{11:1},b' kind':{11:1},b' also ':{11:23},b'sage ':{11:2},b' them':{11:3},b' sche':{11:2},b'orde':{11:1},b'lick ':{11:1},b'hter ':{11:2},b' word':{11:2},b'rade ':{11:3},b'iven ':{11:3},b'itte':{11:1},b' trai':{11:5},b'lose ':{11:3},b'ruck ':{11:1},b'cher':{11:2},b'mitt':{11:3},b'cker ':{11:1},b' fall ':{11:1},b'unte':{11:1},b' west':{11:2},b' webs':{11:2},b'orum ':{11:1},b' art ':{11:2},b'ssel':{11:1},b'ause ':{11:9},b'dest ':{11:1},b' chin':{11:3},b' wind':{11:2},b' team ':{11:4},b'fort ':{11:1},b'hern ':{11:2},b' name':{11:2},b'ntie':{11:1},b'ware ':{11:3},b'ders':{11:1},b' frie':{11:3},b' mann':{11:1},b'king ':{11:20},b' wide':{11:1},b' dama':{11:1},b'ison ':{11:2},b' hand ':{11:2},b' ster':{11:1},b' besi':{11:1},b' inse':{11:1},b'ache':{11:1},b' basi':{11:3},b'band ':{11:1},b'tein ':{11:1},b'chin':{11:3},b'ails ':{11:2},b' mast':{11:1},b'nion ':{11:3},b'here ':{11:26},b' appl':{11:7},b' beli':{11:4},b'nate ':{11:2},b' bein':{11:7},b' wint':{11:1},b'dows ':{11:1},b' soft':{11:2},b'udio ':{11:1},b' team':{11:1},b'agem':{11:3},b'dies ':{11:3},b' all ':{11:24},b'hina ':{11:1},b'hine ':{11:1},b' unio':{11:3},b'shop ':{11:1},b'rato':{11:1},b' late':{11:5},b'heri':{11:1},b' bad ':{11:1},b' shop ':{11:1},b' tour':{11:2},b'twar':{11:2},b'inin':{11:3},b' micr':{11:1},b'elli':{11:2},b' dres':{11:1},b'irls ':{11:1},b' wort':{11:1},b' japa':{11:1},b' live ':{11:2},b'ague ':{11:2},b' unab':{11:1},b'line':{11:1},b'rate':{11:2},b'inks ':{11:1},b' test ':{11:1},b'erwa':{11:1},b' st ':{11:3},b'dier':{11:1},b' news ':{11:3},b' ther':{11:22},b'eams ':{11:2},b'lied ':{11:2},b'date ':{11:2},b'site ':{11:3},b'aine ':{11:1},b' phil':{11:2},b'bile ':{11:1},b' warn':{11:1},b'erfa':{11:1},b' gold':{11:1},b' devo':{11:1},b' down':{11:3},b'endl':{11:1},b' tabl':{11:2},b'rate ':{11:6},b' upda':{11:1},b'lege ':{11:2},b'asis ':{11:2},b' idea':{11:2},b' drin':{11:1},b' girl':{11:1},b'tars ':{11:1},b' beac':{11:1},b' fans ':{11:1},b' brow':{11:1},b' bega':{11:2},b'hard ':{11:1},b'llen':{11:3},b' wild':{11:1},b' call':{11:6},b'aste':{11:1},b' kind ':{11:2},b'onds ':{11:1},b' main':{11:3},b'nloa':{11:1},b' play':{11:7},b'hase ':{11:2},b' leag':{11:1},b' bar ':{11:1},b' symp':{11:1},b'lass ':{11:3},b'rall ':{11:1},b'ayer ':{11:2},b'ends ':{11:3},b' game':{11:2},b' modu':{11:1},b' well':{11:1},b'erni':{11:1},b'atch ':{11:3},b'aren':{11:1},b'gnos':{11:1},b'nute ':{11:1},b'sequ':{11:2},b' powe':{11:6},b' rele':{11:4},b' star ':{11:1},b'dels ':{11:1},b' cham':{11:2},b'trum':{11:1},b'orld ':{11:7},b' theo':{11:2},b' meth':{11:3},b' spen':{11:2},b'ocus ':{11:2},b'hani':{11:2},b' date ':{11:2},b'load ':{11:1},b' matt':{11:3},b' tick':{11:1},b' main ':{11:4},b' effi':{11:2},b'icks ':{11:1},b' worl':{11:9},b'osit':{11:2},b' focu':{11:3},b' maga':{11:1},b'tend ':{11:1},b'omen ':{11:2},b'erwi':{11:1},b' name ':{11:3},b'ture ':{11:17},b' amaz':{11:1},b'play ':{11:1},b' bett':{11:3},b'tock ':{11:1},b' bloc':{11:2},b'dard ':{11:2},b'tems ':{11:4},b'epti':{11:2},b'nity ':{11:7},b'otic ':{11:1},b' code ':{11:1},b'nkin':{11:1},b' note':{11:1},b' gold ':{11:1},b' flex':{11:1},b'osph':{11:1},b'hops ':{11:1},b' thea':{11:1},b' news':{11:1},b' virt':{11:1},b' butt':{11:1},b' york ':{11:2},b' that ':{11:77},b' with ':{11:64},b' as ':{11:57},b' are ':{11:54},b' be ':{11:50},b' this ':{11:50},b' from ':{11:42},b' not ':{11:34},b' they ':{11:32},b'ally ':{11:30},b' or ':{11:30},b'ould ':{11:29},b' we ':{11:28},b' his ':{11:27},b'ated ':{11:27},b' but ':{11:25},b' thei':{11:25},b'heir ':{11:25},b' were ':{11:24},b' whic':{11:21},b'hich ':{11:21},b' one ':{11:20},b' more ':{11:20},b' othe':{11:18},b' coun':{11:18},b' said ':{11:18},b' been ':{11:17},b' had ':{11:17},b'ough ':{11:17},b'bout ':{11:17},b' abou':{11:17},b' its ':{11:16},b'hing ':{11:15},b' woul':{11:15},b' who ':{11:15},b'ound ':{11:15},b' thes':{11:14},b' she ':{11:14},b'hese ':{11:13},b' when ':{11:13},b' up ':{11:13},b' some ':{11:13},b' firs':{11:13},b'irst ':{11:13},b' peop':{11:13},b' two ':{11:13},b' into ':{11:12},b' out ':{11:12},b' if ':{11:12},b' our ':{11:12},b'ople ':{11:12},b' your ':{11:12},b'ving ':{11:12},b' what ':{11:12},b'cted ':{11:11},b' only ':{11:11},b' than ':{11:11},b' them ':{11:11},b' thro':{11:11},b' year':{11:11},b' work':{11:11},b' deve':{11:10},b' diff':{11:10},b' many ':{11:10},b' thin':{11:10},b'nted ':{11:10},b' year ':{11:10},b' like ':{11:10},b' beca':{11:10},b'arch ':{11:9},b'ased ':{11:9},b'\xe2\x80\x99t ':{11:9},b'elop':{11:9},b'ined ':{11:9},b' such ':{11:9},b'ying ':{11:9},b'ctio':{11:9},b' ever':{11:9},b' used ':{11:9},b' well ':{11:9},b' now ':{11:9},b' thre':{11:9},b' with':{11:9},b' how ':{11:9},b' grou':{11:8},b' work ':{11:8},b'nded ':{11:8},b'opea':{11:8},b' very ':{11:8},b'ious ':{11:8},b' use ':{11:8},b'ited ':{11:8},b'ices ':{11:8},b' wher':{11:8},b' coul':{11:8},b'ject ':{11:8},b' shou':{11:8},b'tate ':{11:8},b'cess ':{11:8},b'pean ':{11:8},b'eing ':{11:8},b'hose ':{11:8},b'fore ':{11:8},b' acco':{11:8},b'ntly ':{11:8},b'ween ':{11:8},b' then ':{11:7},b' betw':{11:7},b'arly ':{11:7},b'hile ':{11:7},b' duri':{11:7},b' righ':{11:7},b' agai':{11:7},b'wing ':{11:7},b' it\xe2\x80':{11:7},b' both ':{11:7},b'ects ':{11:7},b' made ':{11:7},b' whil':{11:7},b' buil':{11:7},b' appr':{11:7},b' grea':{11:7},b' cour':{11:7},b' even ':{11:7},b'earc':{11:7},b' get ':{11:7},b'hree ':{11:7},b'ernm':{11:7},b' make ':{11:7},b'oing ':{11:7},b' howe':{11:7},b' larg':{11:7},b' foun':{11:7},b' numb':{11:7},b' thos':{11:6},b' high ':{11:6},b'ower ':{11:6},b' good ':{11:6},b' some':{11:6},b'ship ':{11:6},b' way ':{11:6},b'rted ':{11:6},b' each ':{11:6},b' him ':{11:6},b'ease ':{11:6},b' day ':{11:6},b' city ':{11:6},b' foll':{11:6},b' seco':{11:6},b' know':{11:6},b' see ':{11:6},b'very ':{11:6},b'ssed ':{11:6},b' last ':{11:6},b'ists ':{11:6},b' need ':{11:6},b'reas':{11:6},b'reat ':{11:6},b'tial ':{11:6},b'tely ':{11:6},b'ging ':{11:6},b' clos':{11:6},b' life ':{11:6},b' sinc':{11:6},b'hout ':{11:6},b' chil':{11:6},b'come ':{11:6},b' thou':{11:6},b' same ':{11:6},b'logy ':{11:6},b'ught ':{11:6},b' take ':{11:6},b' much ':{11:6},b'hers ':{11:5},b'sity ':{11:5},b'pmen':{11:5},b' back ':{11:5},b'hool ':{11:5},b' issu':{11:5},b'tain ':{11:5},b' did ':{11:5},b' th ':{11:5},b'ized ':{11:5},b' allo':{11:5},b' high':{11:5},b' wate':{11:5},b'till ':{11:5},b' hous':{11:5},b'ntry ':{11:5},b' anot':{11:5},b' lead':{11:5},b' long ':{11:5},b'ulat':{11:5},b'ired ':{11:5},b'cond ':{11:5},b' here ':{11:5},b' that':{11:5},b' curr':{11:5},b' exam':{11:5},b' heal':{11:5},b'ised ':{11:5},b'ttle ':{11:5},b' oppo':{11:5},b'tain':{11:5},b' atte':{11:5},b' own ':{11:5},b'ency ':{11:5},b' know ':{11:5},b' earl':{11:5},b' arou':{11:5},b'ided ':{11:5},b'ergy ':{11:5},b" it's ":{11:5},b'tary ':{11:5},b' scie':{11:5},b'ward ':{11:5},b' mean':{11:5},b'cing ':{11:5},b'rope ':{11:5},b' want ':{11:5},b'ason ':{11:5},b' need':{11:5},b' must ':{11:5},b' off ':{11:4},b'uire':{11:4},b' beco':{11:4},b' area ':{11:4},b'ared ':{11:4},b' show':{11:4},b' writ':{11:4},b' smal':{11:4},b' game ':{11:4},b'roup ':{11:4},b' cita':{11:4},b'sure ':{11:4},b'inst ':{11:4},b'nced ':{11:4},b'oved ':{11:4},b' nort':{11:4},b'ived ':{11:4},b'ludi':{11:4},b'rned ':{11:4},b'ican ':{11:4},b' neve':{11:4},b' go ':{11:4},b' avai':{11:4},b'mily ':{11:4},b' help ':{11:4},b' gree':{11:4},b'self ':{11:4},b' shar':{11:4},b'outh ':{11:4},b'ghts ':{11:4},b' ofte':{11:4},b' down ':{11:4},b'ways ':{11:4},b'thin ':{11:4},b' auth':{11:4},b'ordi':{11:4},b'alth ':{11:4},b'osed ':{11:4},b' four ':{11:4},b'eady ':{11:4},b' goin':{11:4},b'ross ':{11:4},b'ract':{11:4},b'ield ':{11:4},b'llow ':{11:4},b'nown ':{11:4},b'ched ':{11:4},b' next ':{11:4},b'ouse ':{11:4},b'fied ':{11:4},b'ains ':{11:4},b'work ':{11:4},b'hink ':{11:4},b'owed ':{11:4},b'ldin':{11:4},b'ntri':{11:4},b'akes ':{11:4},b' too ':{11:4},b' happ':{11:4},b' free ':{11:4},b' ques':{11:4},b'nmen':{11:4},b' take':{11:4},b' does ':{11:4},b' old ':{11:4},b't\xe2\x80\x99':{11:4},b' citi':{11:4},b' best ':{11:4},b' clea':{11:4},b' fiel':{11:4},b' trea':{11:4},b'iron':{11:4},b'ific ':{11:4},b'stry ':{11:4},b'ried ':{11:4},b'stio':{11:4},b'ount ':{11:4},b' open ':{11:4},b' secu':{11:4},b' alon':{11:4},b' netw':{11:4},b' usin':{11:4},b' grow':{11:4},b' invo':{11:4},b'orth ':{11:4},b'iety ':{11:4},b'vely ':{11:4},b' alwa':{11:4},b' unti':{11:4},b'ther':{11:4},b' case ':{11:4},b'lish':{11:4},b'less ':{11:4},b'sday ':{11:4},b'omic ':{11:4},b' law ':{11:4},b' furt':{11:4},b'mall ':{11:3},b' shor':{11:3},b'ldre':{11:3},b' lear':{11:3},b'ured ':{11:3},b' valu':{11:3},b'\x80\x99re ':{11:3},b'licy ':{11:3},b' come ':{11:3},b' awar':{11:3},b' big ':{11:3},b' toda':{11:3},b'eeds ':{11:3},b'lude':{11:3},b'ints ':{11:3},b' hear':{11:3},b'atin':{11:3},b'oney ':{11:3},b' says ':{11:3},b' meet':{11:3},b'ucti':{11:3},b' show ':{11:3},b' few ':{11:3},b'dren ':{11:3},b' seas':{11:3},b'igat':{11:3},b'n\xe2\x80\x99':{11:3},b'oard ':{11:3},b'orks ':{11:3},b'imat':{11:3},b'ethe':{11:3},b' look':{11:3},b'reas ':{11:3},b'lish ':{11:3},b' i\xe2\x80\x99':{11:3},b'cifi':{11:3},b'ults ':{11:3},b'used ':{11:3},b'unty ':{11:3},b' brea':{11:3},b'uced ':{11:3},b' youn':{11:3},b'ortu':{11:3},b'zati':{11:3},b' got ':{11:3},b' read':{11:3},b' ligh':{11:3},b' food ':{11:3},b'eans ':{11:3},b'atic ':{11:3},b' left ':{11:3},b' full ':{11:3},b'rong ':{11:3},b' they':{11:3},b' play ':{11:3},b' took ':{11:3},b' why ':{11:3},b' toge':{11:3},b'olve':{11:3},b' lot ':{11:3},b' retu':{11:3},b' move':{11:3},b' thir':{11:3},b' don\xe2':{11:3},b' cove':{11:3},b'urse ':{11:3},b'oday ':{11:3},b'ishe':{11:3},b'ened ':{11:3},b'gain ':{11:3},b' empl':{11:3},b'fect ':{11:3},b'long ':{11:3},b'guag':{11:3},b' reas':{11:3},b'usic ':{11:3},b' boar':{11:3},b' chal':{11:3},b'unds ':{11:3},b' meas':{11:3},b' say ':{11:3},b'mmon ':{11:3},b'pose':{11:3},b'ored ':{11:3},b' held ':{11:3},b'ctur':{11:3},b' five ':{11:3},b' maki':{11:3},b'ools ':{11:3},b' area':{11:3},b' spac':{11:3},b' put ':{11:3},b' alth':{11:3},b'houg':{11:3},b'ngle ':{11:3},b'pted ':{11:3},b' summ':{11:3},b' chur':{11:3},b'mong ':{11:3},b'gned ':{11:3},b' wome':{11:3},b' week ':{11:3},b' plea':{11:3},b' feat':{11:3},b'mate ':{11:3},b'y\xe2\x80\x99':{11:3},b' rang':{11:3},b'anci':{11:3},b'urit':{11:3},b'east ':{11:3},b' alre':{11:3},b' atta':{11:3},b'gest ':{11:3},b'oach ':{11:3},b'lowi':{11:3},b'yers ':{11:3},b'uman ':{11:3},b'tted ':{11:3},b'usly ':{11:3},b'lves ':{11:3},b'iled ':{11:3},b'ieve ':{11:3},b' nigh':{11:3},b'roac':{11:3},b' addr':{11:3},b' acro':{11:3},b' phot':{11:3},b'lude ':{11:3},b' look ':{11:3},b' able ':{11:3},b' days ':{11:3},b' due ':{11:3},b'ncil ':{11:3},b'aken ':{11:3},b'eded ':{11:3},b' less ':{11:3},b'eath ':{11:3},b'cked ':{11:3},b' non ':{11:3},b' call ':{11:3},b'lved ':{11:3},b'hori':{11:3},b' deat':{11:3},b'fice ':{11:3},b'ctly ':{11:3},b' migh':{11:3},b"at's ":{11:3},b'erty ':{11:3},b' run ':{11:3},b' pric':{11:3},b'dded ':{11:3},b'pped ':{11:3},b' sour':{11:3},b'duce ':{11:3},b' open':{11:3},b' occu':{11:3},b' want':{11:3},b'ayed ':{11:3},b'iate ':{11:3},b'hird ':{11:3},b'rnal ':{11:3},b' came ':{11:3},b'oung ':{11:3},b' disa':{11:3},b'sues ':{11:3},b' love ':{11:3},b' term ':{11:3},b'pect ':{11:3},b'lear ':{11:3},b' teac':{11:2},b' surv':{11:2},b'ntai':{11:2},b"on't ":{11:2},b'ooks ':{11:2},b'oped ':{11:2},b'tudy ':{11:2},b'each ':{11:2},b'ieve':{11:2},b'uall':{11:2},b'ably ':{11:2},b' once ':{11:2},b'ully ':{11:2},b'nary ':{11:2},b' away ':{11:2},b'rary ':{11:2},b'ssue ':{11:2},b'aced ':{11:2},b'oned ':{11:2},b'sons ':{11:2},b'orms ':{11:2},b'ethi':{11:2},b'city ':{11:2},b'eive':{11:2},b' make':{11:2},b' spea':{11:2},b'pace ':{11:2},b'cuss':{11:2},b' engi':{11:2},b' whit':{11:2},b' cust':{11:2},b'came ':{11:2},b' leas':{11:2},b'yone ':{11:2},b' june ':{11:2},b' help':{11:2},b'sult ':{11:2},b'ourc':{11:2},b'ourt ':{11:2},b'aged ':{11:2},b' role ':{11:2},b' leav':{11:2},b'nday ':{11:2},b' outs':{11:2},b' cann':{11:2},b'most ':{11:2},b' air ':{11:2},b' enco':{11:2},b'ours ':{11:2},b'orce ':{11:2},b' key ':{11:2},b'uded ':{11:2},b' care':{11:2},b' whol':{11:2},b'late ':{11:2},b' didn':{11:2},b'tect':{11:2},b'riod ':{11:2},b' book ':{11:2},b' adde':{11:2},b' past ':{11:2},b'ease':{11:2},b'nish ':{11:2},b'reen ':{11:2},b' low ':{11:2},b' fact ':{11:2},b' we\xe2\x80':{11:2},b' spee':{11:2},b'manc':{11:2},b'rved ':{11:2},b'loye':{11:2},b'eted ':{11:2},b'uted ':{11:2},b' owne':{11:2},b'oted ':{11:2},b' care ':{11:2},b' belo':{11:2},b' read ':{11:2},b' keep ':{11:2},b' effo':{11:2},b'egan ':{11:2},b'artm':{11:2},b' repl':{11:2},b' almo':{11:2},b" don'":{11:2},b"er's ":{11:2},b' six ':{11:2},b' half ':{11:2},b' mr ':{11:2},b' done ':{11:2},b' road ':{11:2},b'fers ':{11:2},b'etim':{11:2},b'urce ':{11:2},b'ucts ':{11:2},b' july ':{11:2},b' towa':{11:2},b'rous ':{11:2},b' achi':{11:2},b' like':{11:2},b' died ':{11:2},b' yet ':{11:2},b' marr':{11:2},b'anks ':{11:2},b'crib':{11:2},b'nged ':{11:2},b'rove ':{11:2},b' phys':{11:2},b' memo':{11:2},b' safe':{11:2},b' clai':{11:2},b'liam':{11:2},b' west ':{11:2},b'\x80\x99ve ':{11:2},b' join':{11:2},b'erve ':{11:2},b'uage ':{11:2},b'edge ':{11:2},b'hole ':{11:2},b' guid':{11:2},b'ocra':{11:2},b'onsh':{11:2},b'rred ':{11:2},b' age ':{11:2},b'ngly ':{11:2},b' went ':{11:2},b' back':{11:2},b'wers ':{11:2},b'zing ':{11:2},b'lows ':{11:2},b'nths ':{11:2},b'sure':{11:2},b'eate ':{11:2},b'hips ':{11:2},b'sers ':{11:2},b' sust':{11:2},b'osts ':{11:2},b' enjo':{11:2},b'oups ':{11:2},b' sett':{11:2},b' lead ':{11:2},b' rais':{11:2},b'rent':{11:2},b' remo':{11:2},b'lete ':{11:2},b' fini':{11:2},b' chai':{11:2},b' enou':{11:2},b'buti':{11:2},b' figh':{11:2},b'tanc':{11:2},b'hort ':{11:2},b'nnot ':{11:2},b'ficu':{11:2},b' devi':{11:2},b' long':{11:2},b' head ':{11:2},b'ngth ':{11:2},b' crim':{11:2},b' site ':{11:2},b' than':{11:2},b'rded ':{11:2},b'sary ':{11:2},b' free':{11:2},b' stag':{11:2},b'rect ':{11:2},b' eith':{11:2},b' doub':{11:2},b' told ':{11:2},b' lowe':{11:2},b'ghte':{11:2},b' hard ':{11:2},b'inue ':{11:2},b'nomy ':{11:2},b'pply ':{11:2},b'gher ':{11:2},b' view ':{11:2},b' fail':{11:2},b'ough':{11:2},b' thus ':{11:2},b' ensu':{11:2},b' town ':{11:2},b'viou':{11:2},b'kets ':{11:2},b' octo':{11:2},b' rath':{11:2},b'gree ':{11:2},b'ptio':{11:2},b'\xe2\x80\x99m ':{11:2},b'cern':{11:2},b'main ':{11:2},b'lems ':{11:2},b'alue ':{11:2},b' kill':{11:2},b'iday ':{11:2},b' book':{11:2},b' broa':{11:2},b' abov':{11:2},b' seen ':{11:2},b'aine':{11:2},b'ises ':{11:2},b'bove ':{11:2},b'eign ':{11:2},b'uilt ':{11:2},b' fram':{11:2},b'ogni':{11:2},b' aver':{11:2},b'ntin':{11:2},b'mage ':{11:2},b'luen':{11:2},b' sugg':{11:2},b'eeme':{11:2},b' sear':{11:2},b' whet':{11:2},b' cros':{11:2},b' late ':{11:2},b' chec':{11:2},b'izen':{11:2},b'tish ':{11:2},b'duct ':{11:2},b'rely ':{11:2},b'ysis ':{11:2},b'owth ':{11:2},b' subj':{11:2},b'paig':{11:2},b'hite ':{11:2},b'ctri':{11:2},b' amou':{11:2},b'cate ':{11:2},b'ench ':{11:2},b'rict ':{11:2},b' list ':{11:2},b'refo':{11:2},b'reet ':{11:2},b'pact ':{11:2},b'erms ':{11:2},b' answ':{11:2},b'oyed ':{11:2},b'wled':{11:2},b' rule':{11:2},b'oice ':{11:2},b' floo':{11:2},b'dian ':{11:2},b' ever ':{11:2},b'lack ':{11:2},b' doin':{11:2},b'even ':{11:2},b' moun':{11:2},b'tead ':{11:2},b'acks ':{11:2},b' watc':{11:2},b'lysi':{11:2},b' suff':{11:2},b' taki':{11:2},b'ounc':{11:2},b' staf':{11:2},b'rain ':{11:2},b'denc':{11:2},b' east ':{11:2},b' win ':{11:2},b' annu':{11:2},b' fath':{11:2},b' degr':{11:2},b'merc':{11:2},b' seem':{11:2},b'iles ':{11:2},b' risk ':{11:2},b' head':{11:2},b'isit ':{11:2},b'amed ':{11:2},b' car ':{11:2},b'pose ':{11:2},b' purp':{11:2},b' park ':{11:2},b' cred':{11:2},b'dual ':{11:2},b'erta':{11:2},b'lian ':{11:2},b'ctro':{11:2},b'rown ':{11:2},b' laun':{11:2},b' equa':{11:2},b' move ':{11:2},b'arat':{11:2},b'inly ':{11:2},b' accu':{11:2},b' fren':{11:2},b"sn't ":{11:2},b'lowe':{11:2},b'rite ':{11:2},b'ract ':{11:2},b' emer':{11:2},b'ryth':{11:2},b' winn':{11:2},b' what':{11:2},b'ngin':{11:2},b' word ':{11:2},b' does':{11:2},b' face ':{11:2},b'iers ':{11:2},b' vote':{11:2},b'fety ':{11:2},b' type ':{11:2},b'rday ':{11:2},b'hare ':{11:2},b'acts ':{11:2},b'taff ':{11:2},b'tact ':{11:2},b' near ':{11:2},b' aske':{11:2},b' pay ':{11:2},b' talk':{11:2},b'zens ':{11:2},b'estm':{11:2},b'sian ':{11:2},b' lost ':{11:2},b' born ':{11:2},b' advi':{11:2},b' you\xe2':{11:2},b' quic':{11:2},b' eigh':{11:2},b' expa':{11:2},b'vels ':{11:2},b'thin':{11:2},b' satu':{11:2},b"on's ":{11:2},b' noth':{11:2},b' act ':{11:2},b' tax ':{11:2},b' goal ':{11:2},b'ount':{11:2},b' true ':{11:2},b'onth ':{11:2},b' scal':{11:2},b' feel ':{11:2},b'endi':{11:2},b'eved ':{11:2},b'onic ':{11:2},b' near':{11:2},b' moth':{11:2},b' runn':{11:2},b'oura':{11:2},b'rime ':{11:2},b' affe':{11:2},b'nior ':{11:2},b' adop':{11:2},b'oods ':{11:2},b'turn ':{11:2},b' pain':{11:2},b' room ':{11:2},b' rate ':{11:2},b'uite ':{11:2},b' vote ':{11:2},b' gett':{11:2},b' upon ':{11:2},b'inee':{11:2},b' foot':{11:2},b' view':{11:2},b'dual':{11:2},b'eful ':{11:2},b' heav':{11:2},b' frid':{11:2},b' self ':{11:2},b' leng':{11:2},b'hang':{11:2},b' itse':{11:2},b' behi':{11:2},b'hind ':{11:2},b' deal ':{11:2},b'mply ':{11:2},b'face ':{11:2},b' noti':{11:2},b'ryon':{11:2},b'tyle ':{11:2},b'ford ':{11:2},b'tire ':{11:2},b'oint':{11:2},b'pite ':{11:2},b'ddle ':{11:2},b'kely ':{11:2},b'eeks ':{11:2},b'idge ':{11:2},b' user':{11:2},b'elow ':{11:2},b' reme':{11:2},b' forw':{11:2},b'msel':{11:1},b' hour':{11:1},b'nned ':{11:1},b'time ':{11:1},b' eart':{11:1},b' acad':{11:1},b'rshi':{11:1},b'ghly ':{11:1},b'nued ':{11:1},b"dn't ":{11:1},b' squa':{11:1},b' pack':{11:1},b' peac':{11:1},b' stop ':{11:1},b' page ':{11:1},b' won ':{11:1},b'cult ':{11:1},b'racy ':{11:1},b'gton ':{11:1},b'ksho':{11:1},b' neig':{11:1},b'sful ':{11:1},b'ghbo':{11:1},b'ttee ':{11:1},b'iati':{11:1},b'tute ':{11:1},b' roun':{11:1},b'hows ':{11:1},b'mary ':{11:1},b'urda':{11:1},b' wide ':{11:1},b'ruar':{11:1},b'tome':{11:1},b'rful ':{11:1},b' ago ':{11:1},b' choi':{11:1},b' gas ':{11:1},b'atme':{11:1},b'inni':{11:1},b'quen':{11:1},b' army ':{11:1},b' libr':{11:1},b'rams ':{11:1},b' inju':{11:1},b'aign ':{11:1},b'ream ':{11:1},b' easi':{11:1},b' coac':{11:1},b'hild ':{11:1},b'stly ':{11:1},b' rout':{11:1},b' step ':{11:1},b' easy ':{11:1},b'rget ':{11:1},b'lect ':{11:1},b'arry ':{11:1},b' quit':{11:1},b' sure ':{11:1},b'rged ':{11:1},b'uble ':{11:1},b'lism ':{11:1},b' gave ':{11:1},b'ufac':{11:1},b' surf':{11:1},b' meet ':{11:1},b'nian ':{11:1},b' soon ':{11:1},b' freq':{11:1},b'ssur':{11:1},b" i'm ":{11:1},b'sica':{11:1},b' pict':{11:1},b'cove':{11:1},b' user ':{11:1},b'work':{11:1},b' hope ':{11:1},b'pris':{11:1},b'ypes ':{11:1},b' coup':{11:1},b' deca':{11:1},b' uk ':{11:1},b' enab':{11:1},b'aded ':{11:1},b'nabl':{11:1},b'many ':{11:1},b' topi':{11:1},b' matc':{11:1},b' size ':{11:1},b'more ':{11:1},b'sume':{11:1},b'mous ':{11:1},b' mond':{11:1},b' judg':{11:1},b' king ':{11:1},b' piec':{11:1},b' turn ':{11:1},b' try ':{11:1},b' beau':{11:1},b' thur':{11:1},b' exch':{11:1},b'reed ':{11:1},b'elop ':{11:1},b'read ':{11:1},b' scre':{11:1},b'unch ':{11:1},b'lock ':{11:1},b' dise':{11:1},b'iews ':{11:1},b'ntia':{11:1},b'duat':{11:1},b' type':{11:1},b' oil ':{11:1},b' mech':{11:1},b'uate ':{11:1},b'ibed ':{11:1},b'reer ':{11:1},b'nson ':{11:1},b'ppin':{11:1},b'onse ':{11:1},b' titl':{11:1},b'uild ':{11:1},b'rily ':{11:1},b'eads ':{11:1},b' tool':{11:1},b'oked ':{11:1},b'eate':{11:1},b'rote ':{11:1},b' hono':{11:1},b' hims':{11:1},b'fits ':{11:1},b' ways ':{11:1},b' tues':{11:1},b' east':{11:1},b'erly ':{11:1},b' wrot':{11:1},b'inds ':{11:1},b" 's ":{11:1},b"re's ":{11:1},b'eart ':{11:1},b'sual ':{11:1},b'cale ':{11:1},b'eave ':{11:1},b' sale':{11:1},b'tegy ':{11:1},b' wast':{11:1},b'embl':{11:1},b' brot':{11:1},b' phon':{11:1},b' rest ':{11:1},b' brid':{11:1},b'rack ':{11:1},b'hniq':{11:1},b'hood ':{11:1},b' king':{11:1},b'eive ':{11:1},b'earn ':{11:1},b'ienc':{11:1},b'call':{11:1},b' surr':{11:1},b' mean ':{11:1},b'reak ':{11:1},b' brou':{11:1},b'eers ':{11:1},b'airs ':{11:1},b' bloo':{11:1},b'ectu':{11:1},b're\xe2\x80':{11:1},b' canc':{11:1},b'akin':{11:1},b'erno':{11:1},b'vinc':{11:1},b'mewo':{11:1},b'rced ':{11:1},b' surp':{11:1},b'tack ':{11:1},b'cket ':{11:1},b'nsfe':{11:1},b' anyt':{11:1},b'tipl':{11:1},b'lain':{11:1},b' band ':{11:1},b' net ':{11:1},b' rati':{11:1},b' sea ':{11:1},b' saw ':{11:1},b" you'":{11:1},b'engt':{11:1},b'igns ':{11:1},b' crow':{11:1},b' bill ':{11:1},b'efit':{11:1},b' loss ':{11:1},b'uenc':{11:1},b'labo':{11:1},b'chas':{11:1},b'plex ':{11:1},b' clou':{11:1},b' hit ':{11:1},b'ndat':{11:1},b'appy ':{11:1},b' gard':{11:1},b'aker ':{11:1},b' daug':{11:1},b'ectl':{11:1},b'imed ':{11:1},b'isis ':{11:1},b' cath':{11:1},b'room ':{11:1},b'elds ':{11:1},b'town ':{11:1},b'ston ':{11:1},b'lues ':{11:1},b'heck ':{11:1},b'rast':{11:1},b' uniq':{11:1},b'shes ':{11:1},b' purc':{11:1},b'\x80\x99ll ':{11:1},b'lood ':{11:1},b'etty ':{11:1},b' appo':{11:1},b'imum ':{11:1},b' wash':{11:1},b' walk':{11:1},b' deal':{11:1},b'lley ':{11:1},b' woma':{11:1},b' bit ':{11:1},b' shap':{11:1},b' wedn':{11:1},b'enue ':{11:1},b'nesd':{11:1},b' pm ':{11:1},b' tell ':{11:1},b'luat':{11:1},b' eval':{11:1},b'lize':{11:1},b'cedu':{11:1},b' scot':{11:1},b' ship':{11:1},b' touc':{11:1},b'edul':{11:1},b' good':{11:1},b'rtin':{11:1},b'head ':{11:1},b' stoc':{11:1},b' cut ':{11:1},b' agri':{11:1},b' four':{11:1},b'oose ':{11:1},b'tect ':{11:1},b'ocat':{11:1},b' choo':{11:1},b' colu':{11:1},b'nnel ':{11:1},b'eory ':{11:1},b'tist ':{11:1},b'ltur':{11:1},b' avoi':{11:1},b'ntua':{11:1},b'reek ':{11:1},b' weig':{11:1},b' olde':{11:1},b'avio':{11:1},b' coas':{11:1},b'nism ':{11:1},b'mote ':{11:1},b' beyo':{11:1},b'tery ':{11:1},b' ii ':{11:1},b' abil':{11:1},b' cycl':{11:1},b'roun':{11:1},b'lays ':{11:1},b' danc':{11:1},b'bute':{11:1},b'hest ':{11:1},b'eats ':{11:1},b' wond':{11:1},b' morn':{11:1},b' phas':{11:1},b'tbal':{11:1},b'yond ':{11:1},b'plai':{11:1},b' talk ':{11:1},b'efit ':{11:1},b' mind ':{11:1},b'mely ':{11:1},b'back ':{11:1},b'uire ':{11:1},b'edly ':{11:1},b' clic':{11:1},b'eone ':{11:1},b' kids ':{11:1},b' chie':{11:1},b' gmt ':{11:1},b' yout':{11:1},b'hall ':{11:1},b' full':{11:1},b' exac':{11:1},b'nsla':{11:1},b' met ':{11:1},b'ipme':{11:1},b'sely ':{11:1},b'swer ':{11:1},b'aini':{11:1},b'ocks ':{11:1},b'heme ':{11:1},b' patt':{11:1},b'raph':{11:1},b'deas ':{11:1},b'tone ':{11:1},b'ndia ':{11:1},b'ardi':{11:1},b' wron':{11:1},b' tryi':{11:1},b'ffic ':{11:1},b'asio':{11:1},b'arth ':{11:1},b'rene':{11:1},b'eems ':{11:1},b'peed ':{11:1},b' jobs ':{11:1},b'wned ':{11:1},b'eals ':{11:1},b'road ':{11:1},b'asic ':{11:1},b' exci':{11:1},b' can\xe2':{11:1},b' wors':{11:1},b'hods ':{11:1},b' nov ':{11:1},b' dail':{11:1},b'grap':{11:1},b'usan':{11:1},b' uses ':{11:1},b' draw':{11:1},b' voic':{11:1},b'aily ':{11:1},b'vidi':{11:1},b' gues':{11:1},b' bott':{11:1},b' he\xe2\x80':{11:1},b'nues ':{11:1},b' ones ':{11:1},b'ckin':{11:1},b' lice':{11:1},b'text ':{11:1},b'olve ':{11:1},b'empt':{11:1},b' stak':{11:1},b' race ':{11:1},b' chap':{11:1},b'rney ':{11:1},b'erry ':{11:1},b' shal':{11:1},b'eace ':{11:1},b' feel':{11:1},b' poll':{11:1},b'eles':{11:1},b'wood ':{11:1},b'eets ':{11:1},b' ask ':{11:1},b' add ':{11:1},b'bate ':{11:1},b' acqu':{11:1},b'eard ':{11:1},b' goal':{11:1},b' shel':{11:1},b' boun':{11:1},b'lone ':{11:1},b'rust ':{11:1},b' stay ':{11:1},b'roxi':{11:1},b' exhi':{11:1},b' life':{11:1},b' nume':{11:1},b' john':{11:1},b'cide ':{11:1},b'thod ':{11:1},b' fres':{11:1},b'otia':{11:1},b'tuti':{11:1},b'erou':{11:1},b'lain ':{11:1},b'ixed ':{11:1},b'gine ':{11:1},b'ifie':{11:1},b'bled ':{11:1},b' whos':{11:1},b'emic ':{11:1},b'ogue ':{11:1},b'hief ':{11:1},b' wasn':{11:1},b' mino':{11:1},b'ckly ':{11:1},b' rule ':{11:1},b' san ':{11:1},b' wife ':{11:1},b'mari':{11:1},b'rism ':{11:1},b' suit':{11:1},b' lake ':{11:1},b' anyo':{11:1},b' cell':{11:1},b'iend ':{11:1},b'aphy ':{11:1},b' hall ':{11:1},b' shoo':{11:1},b'orse ':{11:1},b'hing':{11:1},b' shot ':{11:1},b' brok':{11:1},b' lack ':{11:1},b'osal ':{11:1},b'bute ':{11:1},b'urth ':{11:1},b'njoy ':{11:1},b'ewed ':{11:1},b' carb':{11:1},b'sn\xe2\x80':{11:1},b'nked ':{11:1},b' paid ':{11:1},b'lize ':{11:1},b'ifor':{11:1},b' birt':{11:1},b'rick ':{11:1},b'siti':{11:1},b' came':{11:1},b' goes ':{11:1},b'aint ':{11:1},b' felt ':{11:1},b' data':{11:1},b' trie':{11:1},b' albu':{11:1},b' rene':{11:1},b'tled ':{11:1},b' sign ':{11:1},b' adul':{11:1},b' mark ':{11:1},b' sayi':{11:1},b'agin':{11:1},b'tify ':{11:1},b"e're ":{11:1},b' buy ':{11:1},b'olds ':{11:1},b'pear ':{11:1},b' fish':{11:1},b'icin':{11:1},b'tude ':{11:1},b'urne':{11:1},b' evol':{11:1},b'redi':{11:1},b' stea':{11:1},b'ycle ':{11:1},b' note ':{11:1},b" we'r":{11:1},b'urat':{11:1},b' bigg':{11:1},b'essm':{11:1},b'ulti':{11:1},b' file':{11:1},b' huge ':{11:1},b'tity ':{11:1},b'craf':{11:1},b'pent ':{11:1},b'icer ':{11:1},b'lace':{11:1},b' rate':{11:1},b'htin':{11:1},b' laws ':{11:1},b' mass ':{11:1},b'togr':{11:1},b' heat ':{11:1},b'ault ':{11:1},b'haps ':{11:1},b'uple ':{11:1},b' chic':{11:1},b"en's ":{11:1},b' blue ':{11:1},b'ropr':{11:1},b' hope':{11:1},b' givi':{11:1},b'uing ':{11:1},b' obta':{11:1},b' doma':{11:1},b'ceed':{11:1},b' dang':{11:1},b'mica':{11:1},b'uran':{11:1},b'\xe2\x80\x99r':{11:1},b'pari':{11:1},b' airc':{11:1},b' gath':{11:1},b'rves ':{11:1},b' door ':{11:1},b'hown ':{11:1},b' perh':{11:1},b' weat':{11:1},b' drug ':{11:1},b'iall':{11:1},b'iece ':{11:1},b'olic ':{11:1},b' dome':{11:1},b' mayb':{11:1},b'tial':{11:1},b' join ':{11:1},b' rock ':{11:1},b' poor ':{11:1},b'wide ':{11:1},b'aybe ':{11:1},b'rnia ':{11:1},b'aled ':{11:1},b' fema':{11:1},b'loor ':{11:1},b' enha':{11:1},b'plie':{11:1},b' fun ':{11:1},b' airp':{11:1},b'itle ':{11:1},b'lped ':{11:1},b'mith ':{11:1},b' aim ':{11:1},b'oals ':{11:1},b'wise ':{11:1},b'uled ':{11:1},b' dyna':{11:1},b'ribe ':{11:1},b'ibly ':{11:1},b'ibit':{11:1},b'uare ':{11:1},b' flow':{11:1},b'dict':{11:1},b'pend ':{11:1},b'ious':{11:1},b' chos':{11:1},b'nist ':{11:1},b' sold ':{11:1},b'loym':{11:1},b'turi':{11:1},b' flig':{11:1},b' anti ':{11:1},b'sily ':{11:1},b' dead ':{11:1},b'opic ':{11:1},b' grap':{11:1},b' rapi':{11:1},b' sell':{11:1},b' drea':{11:1},b' paym':{11:1},b' wood':{11:1},b' turk':{11:1},b'bine':{11:1},b'cene ':{11:1},b'gure ':{11:1},b'eare':{11:1},b' tool ':{11:1},b' mayo':{11:1},b' mid ':{11:1},b' occa':{11:1},b' safe ':{11:1},b' kept ':{11:1},b' chea':{11:1},b' hors':{11:1},b' cere':{11:1},b'pics ':{11:1},b' core ':{11:1},b'izes ':{11:1},b' link ':{11:1},b' wall':{11:1},b"an't ":{11:1},b' corn':{11:1},b"an's ":{11:1},b" can'":{11:1},b'moti':{11:1},b' else ':{11:1},b'bing ':{11:1},b'fide':{11:1},b' recr':{11:1},b' unli':{11:1},b'nage ':{11:1},b'utif':{11:1},b' appa':{11:1},b' samp':{11:1},b' nine ':{11:1},b'sist ':{11:1},b'gins ':{11:1},b'sels ':{11:1},b'tinc':{11:1},b'iful ':{11:1},b'rges ':{11:1},b'pene':{11:1},b' seek':{11:1},b' pola':{11:1},b' fund ':{11:1},b'mony ':{11:1},b' hydr':{11:1},b'edom ':{11:1},b'lenc':{11:1},b' sale ':{11:1},b'empt ':{11:1},b'gate ':{11:1},b' trut':{11:1},b'phic ':{11:1},b'ongs ':{11:1},b' runs ':{11:1},b'oyal ':{11:1},b'ndly ':{11:1},b' brai':{11:1},b'lthy ':{11:1},b'itch ':{11:1},b' fift':{11:1},b'loso':{11:1},b'cise ':{11:1},b' pass ':{11:1},b'late':{11:1},b'nses ':{11:1},b'dary ':{11:1},b' site':{11:1},b' earn':{11:1},b'rrow ':{11:1},b'yees ':{11:1},b'amic ':{11:1},b' pick':{11:1},b'laim ':{11:1},b' pre ':{11:1},b'owin':{11:1},b' save ':{11:1},b' simu':{11:1},b'guis':{11:1},b' outl':{11:1},b' jack':{11:1},b'cine ':{11:1},b' item':{11:1},b'eavy ':{11:1},b' hour ':{11:1},b'care ':{11:1},b'mory ':{11:1},b' weap':{11:1},b' swit':{11:1},b' holi':{11:1},b'spap':{11:1},b'well ':{11:1},b'riat':{11:1},b' fair ':{11:1},b'lted ':{11:1},b' emph':{11:1},b'luti':{11:1},b' ulti':{11:1},b"y're ":{11:1},b' file ':{11:1},b' knew ':{11:1},b' emis':{11:1},b' keep':{11:1},b' heat':{11:1},b' deep ':{11:1},b'urre':{11:1},b'uide ':{11:1},b'erfu':{11:1},b' host ':{11:1},b'riag':{11:1},b'emed ':{11:1},b'dges ':{11:1},b' jame':{11:1},b'ancy ':{11:1},b' fait':{11:1},b'tail ':{11:1},b'oast ':{11:1},b' song ':{11:1},b' road':{11:1},b'sued ':{11:1},b'mple':{11:1},b'come':{11:1},b' song':{11:1},b'rvey ':{11:1},b' lati':{11:1},b' fish ':{11:1},b'keho':{11:1},b'oken ':{11:1},b'core ':{11:1},b'eech ':{11:1},b'uent ':{11:1},b'void ':{11:1},b'lary ':{11:1},b'olar':{11:1},b'west ':{11:1},b' seat':{11:1},b'ovie ':{11:1},b' depl':{11:1},b'cism ':{11:1},b'egin ':{11:1},b'aise ':{11:1},b' bull':{11:1},b' gain':{11:1},b' harm':{11:1},b'ooms ':{11:1},b' paul ':{11:1},b'iabl':{11:1},b' tell':{11:1},b'tles ':{11:1},b' stuf':{11:1},b'nica':{11:1},b'oads ':{11:1},b'eant ':{11:1},b'cape ':{11:1},b" he's ":{11:1},b'xist ':{11:1},b'lson ':{11:1},b'ssin':{11:1},b'hasi':{11:1},b'pate ':{11:1},b'mitm':{11:1},b'cast ':{11:1},b'deed ':{11:1},b'rbon ':{11:1},b' arts ':{11:1},b' sun ':{11:1},b'lite ':{11:1},b' send ':{11:1},b'rcis':{11:1},b' task ':{11:1},b' texa':{11:1},b'egat':{11:1},b'bour ':{11:1},b'ompa':{11:1},b'sals ':{11:1},b' hot ':{11:1},b'ruth ':{11:1},b'nsmi':{11:1},b' burn':{11:1},b' enfo':{11:1},b' ston':{11:1},b' aims ':{11:1},b'en\xe2\x80':{11:1},b' ice ':{11:1},b'seum ':{11:1},b' hunt':{11:1},b' brig':{11:1},b'r\xe2\x80\x99':{11:1},b'lict ':{11:1},b' rise ':{11:1},b'toms ':{11:1},b'izat':{11:1},b' ocea':{11:1},b' rank':{11:1},b' wall ':{11:1},b' seem ':{11:1},b' cell ':{11:1},b' feed':{11:1},b' quot':{11:1},b'oors ':{11:1},b' shif':{11:1},b'kgro':{11:1},b' deco':{11:1},b' outp':{11:1},b' savi':{11:1},b' hear ':{11:1},b'opes ':{11:1},b' cars ':{11:1},b' fuel ':{11:1},b' epis':{11:1},b'e\xe2\x80\x99':{11:1},b'reme ':{11:1},b'lean ':{11:1},b'egic ':{11:1},b' solv':{11:1},b'iage ':{11:1},b'lton ':{11:1},b'pire ':{11:1},b' fair':{11:1},b' harr':{11:1},b' warr':{11:1},b'gica':{11:1},b'rish ':{11:1},b'anes':{11:1},b' card ':{11:1},b' outc':{11:1},b'emon':{11:1},b'urns ':{11:1},b'like ':{11:1},b' ahea':{11:1},b'adia':{11:1},b' fell':{11:1},b'hlig':{11:1},b' dela':{11:1},b' chro':{11:1},b' gets ':{11:1},b'nsit':{11:1},b'aile':{11:1},b'sfer ':{11:1},b'abas':{11:1},b' debt ':{11:1},b' unco':{11:1},b'xual ':{11:1},b'owns ':{11:1},b' less':{11:1},b'nati':{11:1},b' unif':{11:1},b'uggl':{11:1},b' visu':{11:1},b'ario ':{11:1},b'clud':{11:1},b' tour ':{11:1},b' yes ':{11:1},b'holi':{11:1},b' farm ':{11:1},b' brie':{11:1},b'itud':{11:1},b'pens ':{11:1},b' draf':{11:1},b'htly ':{11:1},b'hank ':{11:1},b' shop':{11:1},b'exas ':{11:1},b'sman ':{11:1},b' walk ':{11:1},b'teps ':{11:1},b'uous ':{11:1},b'esis ':{11:1},b'azin':{11:1},b'oric ':{11:1},b' swed':{11:1},b'erag':{11:1},b'tuff ':{11:1},b'adca':{11:1},b' grow ':{11:1},b' laye':{11:1},b'iced ':{11:1},b' purs':{11:1},b'reci':{11:1},b'asts ':{11:1},b'ight':{11:1},b'isms ':{11:1},b'eous ':{11:1},b' inva':{11:1},b'mies ':{11:1},b' anyw':{11:1},b'tlan':{11:1},b'peak ':{11:1},b' inpu':{11:1},b' gone ':{11:1},b'dely ':{11:1},b' sequ':{11:1},b' usef':{11:1},b'lbum ':{11:1},b' sets ':{11:1},b' husb':{11:1},b'nite ':{11:1},b'mpus ':{11:1},b'logu':{11:1},b'cern ':{11:1},b'omer ':{11:1},b'tood ':{11:1},b' drop':{11:1},b' box ':{11:1},b' town':{11:1},b'rtly ':{11:1},b' your':{11:1},b'apan ':{11:1},b'efer ':{11:1},b'alks ':{11:1},b'thor ':{11:1},b' pitc':{11:1},b'eara':{11:1},b'icts ':{11:1},b'aith ':{11:1},b' spai':{11:1},b' taxe':{11:1},b'tmen':{11:1},b'ofit ':{11:1},b' poly':{11:1},b' cold ':{11:1},b' reje':{11:1},b'tise ':{11:1},b' labe':{11:1},b' ordi':{11:1},b' fune':{11:1},b' catc':{11:1},b' affo':{11:1},b' nice ':{11:1},b'alty ':{11:1},b'senc':{11:1},b'\xe2\x80\x99d ':{11:1},b'wner ':{11:1},b'lked ':{11:1},b' trou':{11:1},b'gory ':{11:1},b' narr':{11:1},b' unfo':{11:1},b'orce':{11:1},b'dule ':{11:1},b' murd':{11:1},b' slee':{11:1},b'cuss ':{11:1},b' shee':{11:1},b'ucin':{11:1},b'ulti ':{11:1},b'mise':{11:1},b'take ':{11:1},b'xtra ':{11:1},b'asur':{11:1},b'lure ':{11:1},b'liam ':{11:1},b'mbly ':{11:1},b' clot':{11:1},b' hill ':{11:1},b' biol':{11:1},b'ayor ':{11:1},b'rnam':{11:1},b'ishm':{11:1},b'kage ':{11:1},b'cipl':{11:1},b'eled ':{11:1},b'ilms ':{11:1},b' feet ':{11:1},b'ulty ':{11:1},b' trip ':{11:1},b'igra':{11:1},b' scar':{11:1},b'pter ':{11:1},b'axes ':{11:1},b' hone':{11:1},b' wait':{11:1},b"u're ":{11:1},b'aigh':{11:1},b'udge ':{11:1},b' aggr':{11:1},b' priz':{11:1},b' gain ':{11:1},b' flow ':{11:1},b'lief ':{11:1},b'atin ':{11:1},b' toug':{11:1},b' miss ':{11:1},b'cure ':{11:1},b'mall':{11:1},b'iest ':{11:1},b' rura':{11:1},b' aren':{11:1},b'wish ':{11:1},b' cash ':{11:1},b'ompl':{11:1},b' hung':{11:1},b' anci':{11:1},b' soil ':{11:1},b' slow':{11:1},b'cian':{11:1},b' guy ':{11:1},b' bodi':{11:1},b'idat':{11:1},b'days ':{11:1},b'rael ':{11:1},b' mole':{11:1},b'aril':{11:1},b' oct ':{11:1},b'dden ':{11:1},b"nt's ":{11:1},b'icti':{11:1},b'down ':{11:1},b' famo':{11:1},b' athl':{11:1},b' mexi':{11:1},b' risk':{11:1},b'pain ':{11:1},b' foot ':{11:1},b'anel ':{11:1},b'hema':{11:1},b'rupt':{11:1},b'lume ':{11:1},b' nor ':{11:1},b' fash':{11:1},b'nels ':{11:1},b'tras':{11:1},b'rsel':{11:1},b' chee':{11:1},b' jewi':{11:1},b' cutt':{11:1},b' male ':{11:1},b' sacr':{11:1},b'ttom ':{11:1},b'irit ':{11:1},b'only ':{11:1},b'ness':{11:1},b'base ':{11:1},b'oops ':{11:1},b'gged ':{11:1},b' caro':{11:1},b'imit ':{11:1},b' deni':{11:1},b'ldwi':{11:1},b'aths ':{11:1},b'eece ':{11:1},b' rich ':{11:1},b' worr':{11:1},b' more':{11:1},b'ilot ':{11:1},b' atla':{11:1},b'irth ':{11:1},b'eaks ':{11:1},b'eove':{11:1},b'gari':{11:1},b' swee':{11:1},b'chol':{11:1},b' empi':{11:1},b' cruc':{11:1},b' tree ':{11:1},b'cian ':{11:1},b'ewer ':{11:1},b'onen':{11:1},b'hape ':{11:1},b' vete':{11:1},b' bask':{11:1},b' atto':{11:1},b'bers':{11:1},b' deep':{11:1},b'nied ':{11:1},b'preh':{11:1},b' dark ':{11:1},b'ousl':{11:1},b' slig':{11:1},b'orne':{11:1},b'itim':{11:1},b' quee':{11:1},b'resh ':{11:1},b' rail':{11:1},b'ibes ':{11:1},b' phar':{11:1},b' asia ':{11:1},b'gage ':{11:1},b'urin':{11:1},b' path ':{11:1},b'oute ':{11:1},b' emot':{11:1},b'anet ':{11:1},b' mine':{11:1},b'gely ':{11:1},b'otin':{11:1},b'part ':{11:1},b' smok':{11:1},b"ne's ":{11:1},b'ecul':{11:1},b' latt':{11:1},b' remi':{11:1},b'cate':{11:1},b'monl':{11:1},b'mmit ':{11:1},b' ran ':{11:1},b'ophy ':{11:1},b' bear':{11:1},b' stev':{11:1},b'erab':{11:1},b'tlem':{11:1},b' whom ':{11:1},b'ommo':{11:1},b'uces ':{11:1},b'gard ':{11:1},b'vior ':{11:1},b' vice ':{11:1},b' sell ':{11:1},b' guys ':{11:1},b'eter':{11:1},b'aims ':{11:1},b' wish ':{11:1},b' towe':{11:1},b' wood ':{11:1},b'dure ':{11:1},b'ndow ':{11:1},b'ribu':{11:1},b'mond ':{11:1},b' comf':{11:1},b'ival':{11:1},b' bay ':{11:1},b'seng':{11:1},b'ikes ':{11:1},b'ntow':{11:1},b'gero':{11:1},b'rybo':{11:1},b' phen':{11:1},b'ride ':{11:1},b' boys ':{11:1},b'nium ':{11:1},b' nd ':{11:1},b'enth ':{11:1},b' neit':{11:1},b' map ':{11:1},b' troo':{11:1},b' fixe':{11:1},b'ruly ':{11:1},b'aved ':{11:1},b' wind ':{11:1},b' bus ':{11:1},b'cade ':{11:1},b' tast':{11:1},b' stay':{11:1},b'ntry':{11:1},b' surg':{11:1},b' dead':{11:1},b' fear ':{11:1},b' page':{11:1},b' weal':{11:1},b'roke ':{11:1},b' aris':{11:1},b' trul':{11:1},b'uent':{11:1},b'hine':{11:1},b' igno':{11:1},b'hens':{11:1},b'aped ':{11:1},b'elps ':{11:1},b'rifi':{11:1},b'roud ':{11:1},b'erce ':{11:1},b'sier ':{11:1},b' onto ':{11:1},b'tton ':{11:1},b' abus':{11:1},b' gend':{11:1},b' voca':{11:1},b'uard ':{11:1},b'rugs ':{11:1},b' silv':{11:1},b'fted ':{11:1},b'ccur ':{11:1},b'raph ':{11:1},b' eye ':{11:1},b' mary ':{11:1},b' tree':{11:1},b'lery ':{11:1},b'zine ':{11:1},b' eyes ':{11:1},b'nput ':{11:1},b'umer ':{11:1},b'qual ':{11:1},b' taug':{11:1},b' nurs':{11:1},b' scri':{11:1},b' spli':{11:1},b' ceme':{11:1},b'dged ':{11:1},b' isn\xe2':{11:1},b'othe':{11:1},b'hain ':{11:1},b' wave':{11:1},b'sory ':{11:1},b'fiel':{11:1},b'nshi':{11:1},b'terd':{11:1},b' rain':{11:1},b'isks ':{11:1},b'olid ':{11:1},b'phic':{11:1},b'ewhe':{11:1},b' last':{11:1},b' ms ':{11:1},b' aven':{11:1},b'ught':{11:1},b'eale':{11:1},b'ainm':{11:1},b'epts ':{11:1},b'riti':{11:1},b"ry's ":{11:1},b' heri':{11:1},b' endo':{11:1},b' sher':{11:1},b'gmen':{11:1},b'orho':{11:1},b'istm':{11:1},b' enem':{11:1},b'oots ':{11:1},b'abor ':{11:1},b'tile ':{11:1},b'ouch ':{11:1},b'hair ':{11:1},b'eply ':{11:1},b'derf':{11:1},b'cean ':{11:1},b' ship ':{11:1},b' cras':{11:1},b' depr':{11:1},b' dism':{11:1},b' craf':{11:1},b'odat':{11:1},b' vary ':{11:1},b' caug':{11:1},b'ldn\xe2':{11:1},b' fell ':{11:1},b'nize ':{11:1},b'gnal ':{11:1},b'troy':{11:1},b' irel':{11:1},b'play':{11:1},b'icit ':{11:1},b' virg':{11:1},b'uick ':{11:1},b'teve':{11:1},b'cope ':{11:1},b'iams ':{11:1},b"ty's ":{11:1},b' wine ':{11:1},b' lose ':{11:1},b' cast ':{11:1},b'osin':{11:1},b'enly ':{11:1},b'rnor ':{11:1},b' scan':{11:1},b'toco':{11:1},b' girl ':{11:1},b'pons ':{11:1},b'ntum ':{11:1},b' yard ':{11:1},b' grew ':{11:1},b'itag':{11:1},b' push':{11:1},b' sain':{11:1},b"ia's ":{11:1},b'nows ':{11:1},b' boy ':{11:1},b' voti':{11:1},b'viet ':{11:1},b'ncil':{11:1},b' soph':{11:1},b'rons ':{11:1},b' atti':{11:1},b'ound':{11:1},b' neut':{11:1},b'onor ':{11:1},b'pher ':{11:1},b'ifth ':{11:1},b'tmas ':{11:1},b' door':{11:1},b' harv':{11:1},b' fit ':{11:1},b' path':{11:1},b'tput ':{11:1},b'mble ':{11:1},b'etry ':{11:1},b' whee':{11:1},b'cott ':{11:1},b'tche':{11:1},b' affi':{11:1},b' pain ':{11:1},b'shin':{11:1},b' firm ':{11:1},b' twen':{11:1},b' liqu':{11:1},b'osis ':{11:1},b'died ':{11:1},b'rmor':{11:1},b'stle ':{11:1},b' unle':{11:1},b' hori':{11:1},b' acts ':{11:1},b' eat ':{11:1},b'hted ':{11:1},b'uabl':{11:1},b'reat':{11:1},b' city':{11:1},b'ranc':{11:1},b' pray':{11:1},b'gued ':{11:1},b'peni':{11:1},b'irds ':{11:1},b'umed ':{11:1},b' uppe':{11:1},b'eels ':{11:1},b' kitc':{11:1},b'sess':{11:1},b'ghtl':{11:1},b' code':{11:1},b'ceme':{11:1},b' rota':{11:1},b"le's ":{11:1},b'ript ':{11:1},b' drop ':{11:1},b' juni':{11:1},b' wire':{11:1},b' shak':{11:1},b'isor':{11:1},b' shoc':{11:1},b' emba':{11:1},b' anth':{11:1},b'rway ':{11:1},b'rsal ':{11:1},b' cree':{11:1},b'seho':{11:1},b'iary ':{11:1},b'nous ':{11:1},b'rast ':{11:1},b'ishi':{11:1},b' jesu':{11:1},b'rton ':{11:1},b'demy ':{11:1},b'uity ':{11:1},b' heig':{11:1},b' synt':{11:1},b'repr':{11:1},b'sets ':{11:1},b'ceiv':{11:1},b'kins ':{11:1},b' edge ':{11:1},b' beat ':{11:1},b' tack':{11:1},b'nome':{11:1},b'ssic':{11:1},b'eful':{11:1},b' neur':{11:1},b'rash ':{11:1},b'tees ':{11:1},b'lthc':{11:1},b'eric ':{11:1},b' mixe':{11:1},b'lane ':{11:1},b" i've ":{11:1},b' rd ':{11:1},b'ilar':{11:1},b' sixt':{11:1},b'sume ':{11:1},b'hion ':{11:1},b' nano':{11:1},b'rowd ':{11:1},b'deal ':{11:1},b' mars':{11:1},b'orry ':{11:1},b' crew ':{11:1},b' zero ':{11:1},b'hors ':{11:1},b'rmac':{11:1},b' wait ':{11:1}} 7 | } -------------------------------------------------------------------------------- /eld/subsetResult.py: -------------------------------------------------------------------------------- 1 | # Copyright 2023 Nito T.M. 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0 3 | # Author Nito T.M. (https://github.com/nitotm) 4 | # Package pypi.org/project/eld/ 5 | 6 | class SubsetResult: 7 | def __init__(self, success, languages=None, error=None, file=None): 8 | self.success = success 9 | self.languages = list(languages.values()) if languages else None 10 | self.error = error 11 | self.file = file 12 | -------------------------------------------------------------------------------- /eld/tests/test_detector.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import os 3 | import sys 4 | 5 | # Make sure, local package is imported instead of pip package 6 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 7 | sys.path.insert(0, project_root) # prioritize the local package 8 | # sys.path.append('../..') 9 | 10 | from eld import LanguageDetector 11 | from eld.languageDetector import get_clean_txt 12 | 13 | 14 | # Mostly functional testing, when functions are more mature I will add some more unit tests 15 | 16 | def test_print_version(): 17 | detector = LanguageDetector() 18 | print('ELD ver. ' + detector.VERSION) 19 | assert True 20 | 21 | 22 | def test_load_eld(): 23 | detector = LanguageDetector() 24 | assert isinstance(detector, LanguageDetector) 25 | 26 | 27 | def test_simple_detect(): 28 | detector = LanguageDetector() 29 | result = detector.detect('Hola, cómo te llamas?').language 30 | assert result == 'es' 31 | 32 | 33 | def test_get_multiple_scores(): 34 | detector = LanguageDetector() 35 | detector.return_scores = True 36 | result = len(detector.detect('Hola, cómo te llamas?').scores()) 37 | assert result > 1, 'Expected: >1 scores' 38 | 39 | 40 | def test_detect_error_empty_text(): 41 | detector = LanguageDetector() 42 | result = detector.detect('').language 43 | assert result is None 44 | 45 | 46 | def test_clean_text(): 47 | text = 'https://www.google.com/\n' \ 48 | 'mail@gmail.com\n' \ 49 | 'oogle.com/search?q=search&source=hp\n' \ 50 | '12345 A12345\n' 51 | result = get_clean_txt(text).strip() 52 | assert result == '' 53 | 54 | 55 | def test_check_confidence(): 56 | detector = LanguageDetector('ngramsM60') 57 | text = 'zxz zcz zvz zbz znz zmz zlz zsz zdz zkz zjz pelo' 58 | result = detector.detect(text).is_reliable() 59 | assert result is False 60 | 61 | 62 | def test_load_ngrams_detect(): 63 | detector = LanguageDetector('ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd') 64 | result = detector.detect('Hola, cómo te llamas?').language 65 | assert result == 'es' 66 | 67 | 68 | def test_accuracy_m_bigtest(): 69 | # TODO use importlib or pathlib to open txt file as package eld.tests.data resource 70 | detector = LanguageDetector('ngramsM60') 71 | file = open( os.path.dirname(__file__) + '/data/big-test.txt', encoding='utf-8') 72 | # '../../benchmark/big-test.txt' 73 | content = file.read() 74 | file.close() 75 | lines = content.strip().split("\n") 76 | total = 0 77 | correct = 0 78 | for line in lines: 79 | total += 1 80 | values = line.split("\t") 81 | if detector.detect(values[1]).language == values[0]: 82 | correct += 1 83 | if total < 60000: 84 | pytest.fail('big-test.txt was not load correctly, too few lines') 85 | result = correct / total * 100 86 | # a bit of margin, depending on tie scores order, avg. might change a bit 87 | assert result > 99.4 88 | 89 | # python -m pytest -v -s test_detector.py 90 | # if __name__ == '__main__': 91 | # pytest.main(["-v", "test_detector.py"]) # Gives errors 92 | -------------------------------------------------------------------------------- /eld/tests/test_subset.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import sys 3 | import os 4 | 5 | # Make sure, local package is imported instead of pip package 6 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 7 | sys.path.insert(0, project_root) # prioritize the local package 8 | # sys.path.append('../..') 9 | 10 | from eld import LanguageDetector 11 | 12 | 13 | # Mostly functional testing, when functions are more mature I will add some more unit tests 14 | 15 | def test_load_eld(): 16 | detector = LanguageDetector() 17 | assert isinstance(detector, LanguageDetector) 18 | 19 | 20 | def test_dynamic_subset_detect(): 21 | detector = LanguageDetector() 22 | lang_subset = ['en'] 23 | detector.dynamic_lang_subset(lang_subset) 24 | result = len(detector.detect('How are you? Bien, gracias').scores()) 25 | assert result == 1, 'Expected: 1 score, subset of only one language' 26 | 27 | 28 | def test_remove_dynamic_subset(): 29 | detector = LanguageDetector() 30 | lang_subset = ['en'] 31 | detector.dynamic_lang_subset(lang_subset) 32 | detector.dynamic_lang_subset(None) 33 | result = len(detector.detect('How are you? Bien, gracias').scores()) 34 | assert result > 1 35 | 36 | 37 | def test_subset_detect(): 38 | detector = LanguageDetector() 39 | lang_subset = ['en'] 40 | detector.lang_subset(lang_subset) 41 | result = len(detector.detect('How are you? Bien, gracias').scores()) 42 | assert result == 1, 'Expected: 1 score, subset of only one language' 43 | 44 | 45 | def test_remove_subset(): 46 | detector = LanguageDetector() 47 | lang_subset = ['en'] 48 | detector.lang_subset(lang_subset) 49 | detector.lang_subset(None) 50 | result = len(detector.detect('How are you? Bien, gracias').scores()) 51 | assert result > 1 52 | 53 | 54 | def test_save_subset_file(): 55 | # TODO use importlib or pathlib to check subset file as package resource 56 | file = os.path.dirname(__file__) + '/../resources/ngrams/subset/ngramsM60-1_2rrx014rx6ypsas6tplo1gtcnmiv5mz.py' 57 | if os.path.exists(file): 58 | os.remove(file) 59 | detector = LanguageDetector() 60 | lang_subset = ['en'] 61 | detector.lang_subset(lang_subset) 62 | result = os.path.exists(file) 63 | assert result is True, 'Subset languages file Not saved: ' + file 64 | 65 | 66 | def test_load_ngrams_detect(): 67 | detector = LanguageDetector('ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd') 68 | result = detector.detect('Hola, cómo te llamas?').language 69 | assert result == 'es' 70 | -------------------------------------------------------------------------------- /misc/sentences_avg_py.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/nitotm/efficient-language-detector-py/ce666a0355d5ca972779e0777c534f5895b067e4/misc/sentences_avg_py.png -------------------------------------------------------------------------------- /misc/table_accuracy_py.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Accuracy 5 | 6 | Nito-ELD 7 | 8 | 99.3% 9 | 10 | 99.4% 11 | 12 | 98.8% 13 | 14 | 87.6% 15 | 16 | 73.3% 17 | 18 | Nito-ELD-L 19 | 20 | 99.4% 21 | 22 | 99.4% 23 | 24 | 98.7% 25 | 26 | 89.6% 27 | 28 | 76.4% 29 | 30 | Lingua1 31 | 32 | 98.8% 33 | 34 | 99.1% 35 | 36 | 98.6% 37 | 38 | 93.1% 39 | 40 | 80.0% 41 | 42 | CLD22 43 | 44 | 93.8% 45 | 46 | 97.2% 47 | 48 | 97.2% 49 | 50 | 87.7% 51 | 52 | 69.6% 53 | 54 | Lingua low1 55 | 56 | 96.0% 57 | 58 | 97.2% 59 | 60 | 96.3% 61 | 62 | 83.7% 63 | 64 | 68.0% 65 | 66 | CLD32 67 | 68 | 92.2% 69 | 70 | 95.8% 71 | 72 | 94.7% 73 | 74 | 69.0% 75 | 76 | 51.5% 77 | 78 | franc 79 | 80 | 89.8% 81 | 82 | 92.0% 83 | 84 | 90.5% 85 | 86 | 65.9% 87 | 88 | 52.9% 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | Tweets 104 | 105 | 106 | Big test 107 | 108 | 109 | Sentences 110 | 111 | 112 | Word pairs 113 | 114 | 115 | Single words 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /misc/table_time_py.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Seconds 5 | 6 | Nito-ELD-py 7 | 8 | 0.96" 9 | 10 | 7.8" 11 | 12 | 6.7" 13 | 14 | 2.6" 15 | 16 | 2.1" 17 | 18 | Nito-ELD-L-py 19 | 20 | 1" 21 | 22 | 8" 23 | 24 | 6.9" 25 | 26 | 2.7" 27 | 28 | 2.1" 29 | 30 | Lingua 31 | 32 | 4790" 33 | 34 | 24000" 35 | 36 | 18700" 37 | 38 | 8450" 39 | 40 | 6700" 41 | 42 | CLD2 43 | 44 | 0.35" 45 | 46 | 2" 47 | 48 | 1.7" 49 | 50 | 0.98" 51 | 52 | 0.8" 53 | 54 | Lingua low 55 | 56 | 64" 57 | 58 | 370" 59 | 60 | 308" 61 | 62 | 108" 63 | 64 | 85" 65 | 66 | CLD3 67 | 68 | 3.9" 69 | 70 | 29" 71 | 72 | 26" 73 | 74 | 12" 75 | 76 | 11" 77 | 78 | franc 79 | 80 | 1.2" 81 | 82 | 8" 83 | 84 | 7.8" 85 | 86 | 2.8" 87 | 88 | 2" 89 | 90 | Nito-ELD-php 91 | 92 | 0.31" 93 | 94 | 2.5" 95 | 96 | 2.2" 97 | 98 | 0.66" 99 | 100 | 0.48" 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | Tweets 116 | 117 | 118 | Big test 119 | 120 | 121 | Sentences 122 | 123 | 124 | Word pairs 125 | 126 | 127 | Single words 128 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core>=1.0.0"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "eld" 7 | version = "1.0.8" 8 | # Update VERSION at languageDetector.py too 9 | authors = ["Nito T.M."] 10 | description = "Fast and accurate natural language detection. Detector written in Python. Nito-ELD, ELD." 11 | keywords = ["nlp", "language", "natural-language-processing", "natural-language", "language-detection", "language-detector", "language-identification"] 12 | license = "Apache-2.0" 13 | readme = "README.md" 14 | homepage = "https://github.com/nitotm/efficient-language-detector-py/" 15 | repository = "https://github.com/nitotm/efficient-language-detector-py.git" 16 | classifiers = [ 17 | "Programming Language :: Python :: 3", 18 | "License :: OSI Approved :: Apache Software License", 19 | "Operating System :: OS Independent" 20 | ] 21 | packages = [ 22 | { include = "eld" }, 23 | { include = "eld/tests" }, 24 | { include = "eld/resources" }, 25 | { include = "eld/resources/ngrams" }, 26 | { include = "eld/resources/ngrams/subset" } 27 | ] 28 | include = ["eld/resources/test/data/*.txt"] 29 | 30 | [tool.poetry.dependencies] 31 | python = "^3.7" 32 | regex = "*" 33 | 34 | # [tool.poetry.scripts] 35 | --------------------------------------------------------------------------------