├── .gitignore
├── LICENSE
├── README.md
├── benchmark
    ├── bench.py
    ├── big-test.txt
    ├── sentences.txt
    ├── single-words.txt
    ├── tweets.txt
    └── word-pairs.txt
├── demo.py
├── eld
    ├── __init__.py
    ├── languageData.py
    ├── languageDetector.py
    ├── languageResult.py
    ├── languageSubset.py
    ├── resources
    │   ├── avg_score.py
    │   └── ngrams
    │   │   ├── ngramsL60.py
    │   │   ├── ngramsM60.py
    │   │   └── subset
    │   │       ├── ngramsM60-1_2rrx014rx6ypsas6tplo1gtcnmiv5mz.py
    │   │       └── ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd.py
    ├── subsetResult.py
    └── tests
    │   ├── data
    │       └── big-test.txt
    │   ├── test_detector.py
    │   └── test_subset.py
├── misc
    ├── sentences_avg_py.png
    ├── table_accuracy_py.svg
    └── table_time_py.svg
└── pyproject.toml


/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | *.pyc
3 | /.idea/
4 | build/
5 | dist/
6 | *.egg-info/
7 | *.egg
8 | .pytest_cache/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 |    END OF TERMS AND CONDITIONS
178 | 
179 |    APPENDIX: How to apply the Apache License to your work.
180 | 
181 |       To apply the Apache License to your work, attach the following
182 |       boilerplate notice, with the fields enclosed by brackets "[]"
183 |       replaced with your own identifying information. (Don't include
184 |       the brackets!)  The text should be enclosed in the appropriate
185 |       comment syntax for the file format. We also recommend that a
186 |       file or class name and description of purpose be included on the
187 |       same "printed page" as the copyright notice for easier
188 |       identification within third-party archives.
189 | 
190 |    Copyright 2023 Nito T.M.
191 | 	Author URL: https://github.com/nitotm
192 | 
193 |    Licensed under the Apache License, Version 2.0 (the "License");
194 |    you may not use this file except in compliance with the License.
195 |    You may obtain a copy of the License at
196 | 
197 |        http://www.apache.org/licenses/LICENSE-2.0
198 | 
199 |    Unless required by applicable law or agreed to in writing, software
200 |    distributed under the License is distributed on an "AS IS" BASIS,
201 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
202 |    See the License for the specific language governing permissions and
203 |    limitations under the License.
204 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Efficient Language Detector
  2 | 
  3 | <div align="center">
  4 | 	
  5 | ![supported Python versions](https://img.shields.io/badge/Python-%3E%3D%203.7-blue)
  6 | [![license](https://img.shields.io/badge/license-Apache%202.0-blue.svg)](https://www.apache.org/licenses/LICENSE-2.0)
  7 | [![supported languages](https://img.shields.io/badge/supported%20languages-60-brightgreen.svg)](#languages)
  8 | 	
  9 | </div>
 10 | 
 11 | Efficient language detector (*Nito-ELD* or *ELD*) is a fast and accurate language detector, is one of the fastest non compiled detectors, while its accuracy is within the range of the heaviest and slowest detectors.
 12 | 
 13 | It's 100% Python, easy installation and no dependencies other than `regex`.  
 14 | ELD is also available in [Javascript](https://github.com/nitotm/efficient-language-detector-js) and [PHP](https://github.com/nitotm/efficient-language-detector).
 15 | 
 16 | > This is the first version of a port made from the original version in PHP, the structure might not be definitive, the code can be optimized. My knowledge of Python is basic, feel free to suggest improvements.
 17 | 
 18 | 1. [Installation](#installation)
 19 | 2. [How to use](#how-to-use)
 20 | 3. [Benchmarks](#benchmarks)
 21 | 4. [Languages](#languages)
 22 | 
 23 | ## Installation
 24 | 
 25 | ```bash
 26 | $ pip install eld
 27 | ```
 28 | Alternatively, download / clone the files can work too, by changing the import path.
 29 | 
 30 | ## How to use?
 31 | 
 32 | ```python
 33 | from eld import LanguageDetector
 34 | detector = LanguageDetector()
 35 | ```
 36 | `detect()` expects a UTF-8 string, and returns an object, with a 'language' variable, which is either an *ISO 639-1 code* or `None`
 37 | ```python
 38 | print(detector.detect('Hola, cómo te llamas?'))
 39 | # Object { language: "es", scores(): {"es": 0.53, "et": 0.21, ...}, is_reliable(): True }
 40 | # Object { language: None|str, scores(): None|dict, is_reliable(): bool }
 41 | 
 42 | print(detector.detect('Hola, cómo te llamas?').language)
 43 | # "es"
 44 | 
 45 | # if clean_text(True), detect() removes Urls, domains, emails, alphanumerical & numbers
 46 | detector.clean_text(True)  # Default is False
 47 | ```
 48 | - To reduce the languages to be detected, there are 3 different options, they only need to be executed once. (Check available [languages](#languages) below)
 49 | ```python
 50 | lang_subset = ['en', 'es', 'fr', 'it', 'nl', 'de']
 51 | 
 52 | # Option 1
 53 | # with dynamic_lang_subset(), detect() executes normally, and then filters excluded languages
 54 | detector.dynamic_lang_subset(lang_subset)
 55 | # Returns an object with a list named 'languages', with the validated languages or 'None'
 56 | 
 57 | # Option 2. lang_subset() Will first remove the excluded languages, from the n-grams database
 58 | # For a single detection is slower than dynamic_lang_subset(), but for several will be faster
 59 | # If save option is true (default), the new Ngrams subset will be stored, and loaded next call
 60 | detector.lang_subset(lang_subset) # lang_subset(langs, save=True) 
 61 | # Returns object {success: True, languages: ['de', 'en', ...], error: None, file: 'ngramsM60...'}
 62 | 
 63 | # To remove either dynamic_lang_subset() or lang_subset(), call the methods with None as argument
 64 | detector.lang_subset(None)
 65 | 
 66 | # Finally the optimal way to regularly use a language subset: we create the instance with a file
 67 | # The file in the argument can be a subset by lang_subset() or another database like 'ngramsL60'
 68 | langSubsetDetect = LanguageDetector('ngramsL60')
 69 | ```
 70 | 
 71 | ## Benchmarks
 72 | 
 73 | I compared *ELD* with a different variety of detectors, since the interesting part is the algorithm.
 74 | 
 75 | | URL                                                       | Version      | Language   |
 76 | |:----------------------------------------------------------|:-------------|:-----------|
 77 | | https://github.com/nitotm/efficient-language-detector-py/ | 0.9.0        | Python     |
 78 | | https://github.com/nitotm/efficient-language-detector/    | 1.0.0        | PHP        |
 79 | | https://github.com/pemistahl/lingua-py                    | 1.3.2        | Python     |
 80 | | https://github.com/CLD2Owners/cld2                        | Aug 21, 2015 | C++        |
 81 | | https://github.com/google/cld3                            | Aug 28, 2020 | C++        |
 82 | | https://github.com/wooorm/franc                           | 6.1.0        | Javascript |
 83 | 
 84 | Benchmarks: **Tweets**: *760KB*, short sentences of 140 chars max.; **Big test**: *10MB*, sentences in all 60 languages supported; **Sentences**: *8MB*, this is the *Lingua* sentences test, minus unsupported languages.  
 85 | Short sentences is what *ELD* and most detectors focus on, as very short text is unreliable, but I included the *Lingua* **Word pairs** *1.5MB*, and **Single words** *880KB* tests to see how they all compare beyond their reliable limits.
 86 | 
 87 | These are the results, first, accuracy and then execution time.
 88 | 
 89 | <!-- Accuracy table
 90 | |                     | Tweets       | Big test     | Sentences    | Word pairs   | Single words |
 91 | |:--------------------|:------------:|:------------:|:------------:|:------------:|:------------:|
 92 | | **Nito-ELD**        | 99.3%        | 99.4%        | 98.8%        | 87.6%        | 73.3%        |
 93 | | **Nito-ELD-L**      | 99.4%        | 99.4%        | 98.7%        | 89.4%        | 76.1%        |
 94 | | **Lingua**          | 98.8%        | 99.1%        | 98.6%        | 93.1%        | 80.0%        |
 95 | | **CLD2**            | 93.8%        | 97.2%        | 97.2%        | 87.7%        | 69.6%        |
 96 | | **Lingua low**      | 96.0%        | 97.2%        | 96.3%        | 83.7%        | 68.0%        |
 97 | | **CLD3**            | 92.2%        | 95.8%        | 94.7%        | 69.0%        | 51.5%        |
 98 | | **franc**           | 89.8%        | 92.0%        | 90.5%        | 65.9%        | 52.9%        |
 99 | -->
100 | <img alt="accuracy table" width="800" src="https://raw.githubusercontent.com/nitotm/efficient-language-detector-py/main/misc/table_accuracy_py.svg">
101 | 
102 | <!--- Time table
103 | |                     | Tweets       | Big test     | Sentences    | Word pairs   | Single words |
104 | |:--------------------|:------------:|:------------:|:------------:|:------------:|:------------:|
105 | | **Nito-ELD-py**     |     0.96"    |      7.8"    |      6.7"    |     2.6"     |     2.1"     |
106 | | **Nito-ELD-L-py**   |     1"       |      8"      |      6.9"    |     2.7"     |     2.1"     |
107 | | **Lingua**          |  4790"       |  24000"      |  18700"      |  8450"       |  6700"       |
108 | | **CLD2**            |     0.35"    |      2"      |      1.7"    |     0.98"    |     0.8"     |
109 | | **Lingua low**      |    64"       |    370"      |    308"      |   108"       |    85"       |
110 | | **CLD3**            |     3.9"     |     29"      |     26"      |    12"       |    11"       |
111 | | **franc**           |     1.2"     |      8"      |      7.8"    |     2.8"     |     2"       |
112 | | **Nito-ELD-php**    |     0.31"    |      2.5"    |      2.2"    |     0.66"    |     0.48"    |
113 | -->
114 | <img alt="time table" width="800" src="https://raw.githubusercontent.com/nitotm/efficient-language-detector-py/main/misc/table_time_py.svg">
115 | 
116 | <sup style="color:#08e">1.</sup> <sup style="color:#777">Lingua could have a small advantage as it participates with 54 languages, 6 less.</sup>  
117 | <sup style="color:#08e">2.</sup> <sup style="color:#777">CLD2 and CLD3, return a list of languages, the ones not included in this test where discarded, but usually they return one language, I believe they have a disadvantage. 
118 | Also, I confirm the results of CLD2 for short text are correct, contrary to the test on the *Lingua* page, they did not use the parameter "bestEffort = True", their benchmark for CLD2 is unfair.
119 | 
120 | *Lingua* is the average accuracy winner, but at what cost, the same test that in *ELD* or *CLD2* is below 10 seconds, in Lingua takes more than 5 hours! It acts like a brute-force software. 
121 | Also, its lead comes from single and pair words, which are unreliable regardless.
122 | 
123 | The Python version of *NITO-ELD* is not the fastest but is still considered fast, as it is faster than any other non compiled detector tested.
124 | 
125 | I added *ELD-L* for comparison, which has a 2.3x bigger database, but only increases execution time marginally, a testament to the efficiency of the algorithm. *ELD-L* is not the main database as it does not improve language detection in sentences.
126 | 
127 | Here is the average, per benchmark, of Tweets, Big test & Sentences.
128 | 
129 | ![Sentences tests average](https://raw.githubusercontent.com/nitotm/efficient-language-detector-py/main/misc/sentences_avg_py.png)
130 | <!--- Sentences average
131 | |                     | Time         | Accuracy     |
132 | |:--------------------|:------------:|:------------:|
133 | | **Nito-ELD-py**     |      5.17"   | 99.16%       |
134 | | **Nito-ELD-php**    |      1.65"   | 99.16%       |
135 | | **Lingua**          |  15800"      | 98.84%       |
136 | | **CLD2**            |      1.35"   | 96.08%       |
137 | | **Lingua low**      |    247"      | 96.51%       |
138 | | **CLD3**            |     19.6"    | 94.19%       |
139 | | **franc**           |      5.7"    | 90.79%       |
140 | -->
141 | 
142 | ## Languages
143 | 
144 | These are the *ISO 639-1 codes* of the 60 supported languages for *Nito-ELD* v1
145 | 
146 | > 'am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gu', 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr', 'ms', 'nl', 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur', 'vi', 'yo', 'zh'
147 | 
148 | Full name languages:
149 | 
150 | > Amharic, Arabic, Azerbaijani (Latin), Belarusian, Bulgarian, Bengali, Catalan, Czech, Danish, German, Greek, English, Spanish, Estonian, Basque, Persian, Finnish, French, Gujarati, Hebrew, Hindi, Croatian, Hungarian, Armenian, Icelandic, Italian, Japanese, Georgian, Kannada, Korean, Kurdish (Arabic), Lao, Lithuanian, Latvian, Malayalam, Marathi, Malay (Latin), Dutch, Norwegian, Oriya, Punjabi, Polish, Portuguese, Romanian, Russian, Slovak, Slovene, Albanian, Serbian (Cyrillic), Swedish, Tamil, Telugu, Thai, Tagalog, Turkish, Ukrainian, Urdu, Vietnamese, Yoruba, Chinese
151 | 
152 | ## Future improvements
153 | 
154 | - Train from bigger datasets, and more languages.
155 | - The tokenizer could separate characters from languages that have their own alphabet, potentially improving accuracy and reducing the N-grams database. Retraining and testing is needed.
156 | 
157 | **Donate / Hire**   
158 | If you wish to Donate for open source improvements, Hire me for private modifications / upgrades, or to Contact me, use the following link: https://linktr.ee/nitotm


--------------------------------------------------------------------------------
/benchmark/bench.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | import os
 3 | import sys
 4 | 
 5 | # Make sure local package is imported instead of pip package
 6 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
 7 | sys.path.insert(0, project_root)  # prioritize the local package
 8 | # sys.path.append('../..')
 9 | 
10 | from eld.languageDetector import LanguageDetector
11 | 
12 | langDetect = LanguageDetector()
13 | print(f"ELD version: {langDetect.VERSION}\n")
14 | 
15 | files = ['tweets.txt', 'big-test.txt', 'sentences.txt', 'word-pairs.txt', 'single-words.txt']
16 | durations = []
17 | 
18 | for file in files:
19 |     content = open(file, encoding="utf-8").read()
20 |     lines = content.strip().split("\n")
21 |     texts = []
22 | 
23 |     for line in lines:
24 |         values = line.split("\t")
25 |         texts.append([values[1], values[0]])
26 | 
27 |     total = len(texts)
28 |     correct = 0
29 |     duration = 0
30 | 
31 |     for text in texts:
32 |         start = time.time()
33 |         language = langDetect.detect(text[0]).language
34 |         duration += time.time() - start
35 |         if language == text[1]:
36 |             correct += 1
37 |     durations.append(duration)
38 |     print(f"{file} - Correct ratio: {round((correct / total) * 100, 2)}% Time: {duration}\n")
39 | 
40 | average = sum(durations) / len(durations) if len(durations) > 0 else 1
41 | print(f"Average duration: {average}\n")
42 | 
43 | # tweets.txt - Correct ratio: 99.28% Time: 0.9556999206542969
44 | # big-test.txt - Correct ratio: 99.41% Time: 7.8356194496154785
45 | # sentences.txt - Correct ratio: 98.77% Time: 6.7327587604522705
46 | # word-pairs.txt - Correct ratio: 87.55% Time: 2.636420488357544
47 | # single-words.txt - Correct ratio: 73.31% Time: 2.12335205078125
48 | 


--------------------------------------------------------------------------------
/demo.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Nito T.M.
 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
 3 | # Author Nito T.M. (https://github.com/nitotm)
 4 | # Package pypi.org/project/eld/
 5 | 
 6 | from eld import LanguageDetector
 7 | 
 8 | detector = LanguageDetector()
 9 | 
10 | # detect() expects a UTF-8 string, returns an object, with a 'language' variable : ISO 639-1 code or null
11 | print(detector.detect('Hola, cómo te llamas?'))
12 | # Object { language: "es", scores(): {"es": 0.53, "et": 0.21, ...}, is_reliable(): True }
13 | # Object { language: None|str, scores(): None|dict, is_reliable(): bool }
14 | print(detector.detect('Hola, cómo te llamas?').language)
15 | # "es"
16 | 
17 | # clean_text(True) Removes Urls, domains, emails, alphanumerical & numbers
18 | detector.clean_text(True)  # Default is False
19 | 
20 | # To reduce the languages to be detected, there are 3 different options, they only need to be executed once.
21 | # This is the complete list on languages for ELD v1, using ISO 639-1 codes:
22 | # ['am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gu',
23 | # 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr', 'ms', 'nl',
24 | # 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur',
25 | # 'vi', 'yo', 'zh']
26 | 
27 | lang_subset = ['en', 'es', 'fr', 'it', 'nl', 'de']
28 | 
29 | # Option 1. With dynamic_lang_subset(), detect() executes normally, but at the end will filter the excluded languages.
30 | detector.dynamic_lang_subset(lang_subset)
31 | # Returns an object with a list named 'languages', with the validated languages or 'None'
32 | 
33 | # to remove the subset
34 | detector.dynamic_lang_subset(None)
35 | 
36 | # Option 2. lang_subset(langs,save=True) Will previously remove the excluded languages form the Ngrams database; for
37 | # a single detection might be slower than dynamic_lang_subset(), but for several strings will be faster. If 'save'
38 | # option is true (default), the new ngrams subset will be stored and cached for next time.
39 | detector.lang_subset(lang_subset)
40 | # Returns object {success: True, languages: ['de', 'en', ...], error: None, file: 'ngramsM60...'}
41 | 
42 | # to remove the subset
43 | detector.lang_subset(None)
44 | 
45 | print(detector.VERSION)
46 | 
47 | # Finally the optimal way to regularly use the same language subset, will be to add as an argument the file stored
48 | # (and returned) by lang_subset(), when creating an instance of the class. In this case the subset Ngrams database will
49 | # be loaded directly, and not the default database. Also, you can use this option to load different ngram databases
50 | # stored at eld/resources/ngrams
51 | langSubsetDetect = LanguageDetector('ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd')
52 | 


--------------------------------------------------------------------------------
/eld/__init__.py:
--------------------------------------------------------------------------------
1 | from eld.languageDetector import LanguageDetector
2 | 


--------------------------------------------------------------------------------
/eld/languageData.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Nito T.M.
 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
 3 | # Author Nito T.M. (https://github.com/nitotm)
 4 | # Package pypi.org/project/eld/
 5 | 
 6 | import importlib.util
 7 | import os
 8 | 
 9 | 
10 | class LanguageData:
11 |     def __init__(self):
12 |         from .resources.avg_score import avg_score
13 |         self.avg_score = avg_score
14 |         self.ngrams = {}
15 |         self.lang_score = []
16 |         self.lang_codes = {}
17 |         self.type = ''
18 |         self.folder = os.path.dirname(__file__) + '/resources/ngrams/'
19 | 
20 |     # ISO 639-1 codes, for the 60 languages set.
21 |     # ['am', 'ar', 'az', 'be', 'bg', 'bn', 'ca', 'cs', 'da', 'de', 'el', 'en', 'es', 'et', 'eu', 'fa', 'fi', 'fr', 'gu',
22 |     # 'he', 'hi', 'hr', 'hu', 'hy', 'is', 'it', 'ja', 'ka', 'kn', 'ko', 'ku', 'lo', 'lt', 'lv', 'ml', 'mr', 'ms', 'nl',
23 |     # 'no', 'or', 'pa', 'pl', 'pt', 'ro', 'ru', 'sk', 'sl', 'sq', 'sr', 'sv', 'ta', 'te', 'th', 'tl', 'tr', 'uk', 'ur',
24 |     # 'vi', 'yo', 'zh']
25 | 
26 |     # ['Amharic', 'Arabic', 'Azerbaijani (Latin)', 'Belarusian', 'Bulgarian', 'Bengali', 'Catalan', 'Czech', 'Danish',
27 |     # 'German', 'Greek', 'English', 'Spanish', 'Estonian', 'Basque', 'Persian', 'Finnish', 'French', 'Gujarati',
28 |     # 'Hebrew', 'Hindi', 'Croatian', 'Hungarian', 'Armenian', 'Icelandic', 'Italian', 'Japanese', 'Georgian', 'Kannada',
29 |     # 'Korean', 'Kurdish (Arabic)', 'Lao', 'Lithuanian', 'Latvian', 'Malayalam', 'Marathi', 'Malay (Latin)', 'Dutch',
30 |     # 'Norwegian', 'Oriya', 'Punjabi', 'Polish', 'Portuguese', 'Romanian', 'Russian', 'Slovak', 'Slovene', 'Albanian',
31 |     # 'Serbian (Cyrillic)', 'Swedish', 'Tamil', 'Telugu', 'Thai', 'Tagalog', 'Turkish', 'Ukrainian', 'Urdu',
32 |     # 'Vietnamese', 'Yoruba', 'Chinese']
33 | 
34 |     def load_ngrams(self, subset_file=''):
35 |         if subset_file == '':
36 |             from .resources.ngrams.ngramsM60 import ngrams_data
37 |         else:
38 |             # module = importlib.import_module('.ngrams.' + subset_file)
39 |             file_path = self.folder + subset_file + '.py'
40 |             if not os.path.exists(file_path):
41 |                 file_path = self.folder + 'subset/' + subset_file + '.py'
42 |             spec = importlib.util.spec_from_file_location(subset_file, file_path)
43 |             module = importlib.util.module_from_spec(spec)
44 |             spec.loader.exec_module(module)
45 |             ngrams_data = module.ngrams_data
46 | 
47 |         self.ngrams = ngrams_data['ngrams']
48 |         self.lang_score = [0] * (max(ngrams_data['languages'].keys()) + 1)
49 |         self.type = ngrams_data['type']
50 |         self.lang_codes = ngrams_data['languages']
51 | 
52 | 
53 | languageData = LanguageData()
54 | 


--------------------------------------------------------------------------------
/eld/languageDetector.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Nito T.M.
  2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
  3 | # Author Nito T.M. (https://github.com/nitotm)
  4 | # Package pypi.org/project/eld/
  5 | 
  6 | import regex as re
  7 | 
  8 | from eld.languageData import languageData
  9 | from eld.languageSubset import LanguageSubset
 10 | from eld.languageResult import LanguageResult
 11 | 
 12 | 
 13 | class LanguageDetector(LanguageSubset):
 14 |     def __init__(self, subset_file=''):
 15 |         super().__init__()
 16 |         languageData.load_ngrams(subset_file)
 17 |         self.__do_clean_text = False
 18 |         self.VERSION = '1.0.8'  # Has to match pyproject.toml version
 19 | 
 20 |     def detect(self, text):
 21 |         """
 22 |         Returns the language detected for a given UTF-8 string, as an ISO 639-1 code
 23 |             LanguageResult object { language = 'es', scores() = {'es': 0.5, 'et': 0.2}, is_reliable() = True }
 24 | 
 25 |         Args:
 26 |             text (str): UTF-8 string
 27 | 
 28 |         Returns:
 29 |             object LanguageResult: language (str or None), scores() (dict or None), is_reliable() (bool)
 30 |         """
 31 |         if not isinstance(text, str):
 32 |             raise TypeError("Input 'text' must be a string.")
 33 |         if self.__do_clean_text:
 34 |             # Removes Urls, emails, alphanumerical & numbers
 35 |             text = get_clean_txt(text)
 36 |         text = _normalize_text(text)
 37 |         byte_ngrams = _get_byte_ngrams(text)
 38 |         num_ngrams = len(byte_ngrams)
 39 |         results = _calculate_scores(byte_ngrams, num_ngrams)
 40 | 
 41 |         if results:
 42 |             if self.subset:
 43 |                 results = LanguageSubset._filter_lang_subset(self, results)
 44 |             results.sort(key=lambda x: -x[1])
 45 |             return LanguageResult(results, num_ngrams)
 46 |         return LanguageResult()
 47 | 
 48 |     def clean_text(self, set_bool):
 49 |         self.__do_clean_text = (True if set_bool else False)
 50 | 
 51 | 
 52 | def _tokenizer(txt):
 53 |     return filter(None, re.split(b'\x20', txt))
 54 | 
 55 | 
 56 | def get_clean_txt(txt):
 57 |     """Removes parts of a string, that may be considered as "noise" for language detection"""
 58 |     # Remove URLS
 59 |     txt = re.sub(r'[hw]((ttps?://(www\.)?)|ww\.)([^\s/?\.#-]+\.?)+(\/\S*)?', ' ', txt, flags=re.IGNORECASE)
 60 |     # Remove emails
 61 |     txt = re.sub(r'[a-zA-Z0-9.!$%&?+_`-]+@[A-Za-z0-9.-]+\.[A-Za-z0-9-]{2,64}', ' ', txt)
 62 |     # Remove .com domains
 63 |     txt = re.sub(r'([A-Za-z0-9-]+\.)+com(\/\S*|[^\pL])', ' ', txt)
 64 |     # Remove alphanumerical/number codes
 65 |     txt = re.sub(r'[a-zA-Z]*[0-9]+[a-zA-Z0-9]*', ' ', txt)
 66 |     return txt
 67 | 
 68 | 
 69 | def _normalize_text(text):
 70 |     """Normalize special characters/word separators"""
 71 |     text = re.sub(r'[^\pL]+(?<![\x27\x60\x2019])', ' ', text[:1000], flags=re.UNICODE).strip()
 72 |     text = text.lower()
 73 |     text = bytes(text, 'utf-8')
 74 |     this_length = len(text)
 75 | 
 76 |     if this_length > 350:
 77 |         # Cut to first whitespace after 350 byte length offset
 78 |         text = text[0:min(380, (text.find(b'\x20', 350) or 350))]
 79 |     return text
 80 | 
 81 | 
 82 | def _calculate_scores(byte_ngrams, num_ngrams):
 83 |     """Calculate scores for each language from the given Ngrams"""
 84 |     lang_score = languageData.lang_score[:]
 85 |     for bytes_, frequency in byte_ngrams.items():
 86 |         if bytes_ in languageData.ngrams:
 87 |             lang_count = len(languageData.ngrams[bytes_])
 88 |             # Ngram score multiplier, the fewer languages found the more relevancy. Formula can be fine-tuned.
 89 |             if lang_count == 1:
 90 |                 relevancy = 27  # Handpicked relevance multiplier, trial-error
 91 |             elif lang_count < 16:
 92 |                 relevancy = (16 - lang_count) / 2 + 1
 93 |             else:
 94 |                 relevancy = 1
 95 | 
 96 |             # Most time-consuming loop, do only the strictly necessary inside
 97 |             for lang, global_frequency in languageData.ngrams[bytes_].items():
 98 |                 lang_score[lang] += (global_frequency / frequency if frequency > global_frequency
 99 |                                      else frequency / global_frequency) * relevancy + 2
100 |     # This divisor will produce a final score between 0 - ~1, score could be >1. Can be improved.
101 |     result_divisor = num_ngrams * 3.2
102 |     results = []
103 |     for lang in range(len(lang_score)):
104 |         if lang_score[lang]:
105 |             results.append([lang, lang_score[lang] / result_divisor])  # * languageData.scoreNormalizer[lang]
106 |     return results
107 | 
108 | 
109 | def _get_byte_ngrams(txt):
110 |     """Gets Ngrams from a given string"""
111 |     byte_grams = {}
112 |     count_ngrams = 0
113 | 
114 |     for word in _tokenizer(txt):
115 |         length = len(word)
116 | 
117 |         if length > 70:
118 |             length = 70
119 |         x = 0
120 |         for j in range(0, length - 4, 3):
121 |             this_bytes = (b' ' if j == 0 else b'') + word[j:j + 4]
122 |             byte_grams[this_bytes] = (1 + byte_grams[this_bytes] if this_bytes in byte_grams else 1)
123 |             count_ngrams += 1
124 |             x = 1
125 | 
126 |         this_bytes = (b' ' if x == 0 else b'') + word[length - 4 if length != 3 else 0:] + b' '
127 |         byte_grams[this_bytes] = (1 + byte_grams[this_bytes] if this_bytes in byte_grams else 1)
128 |         count_ngrams += 1
129 | 
130 |     # Frequency is multiplied by 15000 at the ngrams database. A reduced number (13200) seems to work better.
131 |     # Linear formulas were tried, decreasing the multiplier for fewer ngram strings, no meaningful improvement.
132 |     for bytes_, count in byte_grams.items():
133 |         byte_grams[bytes_] = count / count_ngrams * 13200
134 | 
135 |     return byte_grams
136 | 


--------------------------------------------------------------------------------
/eld/languageResult.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Nito T.M.
 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
 3 | # Author Nito T.M. (https://github.com/nitotm)
 4 | # Package pypi.org/project/eld/
 5 | 
 6 | import json
 7 | from eld.languageData import languageData
 8 | 
 9 | 
10 | class LanguageResult:
11 |     def __init__(self, results=None, num_ngrams=None):
12 |         self.language = (languageData.lang_codes[results[0][0]] if results else None)
13 |         self.__results = results
14 |         self.__num_ngrams = num_ngrams
15 | 
16 |     def __str__(self):
17 |         return json.dumps({'<object>': {
18 |             'language': self.language,
19 |             'scores()': self.scores(),
20 |             'is_reliable()': self.is_reliable()
21 |         }
22 |         })
23 | 
24 |     def scores(self):
25 |         return _get_scores(self.__results)
26 | 
27 |     def is_reliable(self):
28 |         if not self.language or self.__num_ngrams < 3 or not self.__results:
29 |             return False
30 |         next_score = (self.__results[1][1] if len(self.__results) > 1 else 0)
31 |         # A minimum of a 24% from the average score
32 |         if languageData.avg_score[self.language] * 0.24 > (self.__results[0][1] / self.__num_ngrams) \
33 |                 or 0.01 > abs(self.__results[0][1] - next_score):
34 |             return False
35 |         return True
36 | 
37 | 
38 | def _get_scores(results):
39 |     scores = {}
40 |     if results:
41 |         for value in results:
42 |             scores[languageData.lang_codes[value[0]]] = value[1]
43 |     return scores
44 | 


--------------------------------------------------------------------------------
/eld/languageSubset.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2023 Nito T.M.
  2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
  3 | # Author Nito T.M. (https://github.com/nitotm)
  4 | # Package pypi.org/project/eld/
  5 | 
  6 | import hashlib
  7 | import os
  8 | import copy
  9 | import importlib.util
 10 | import logging
 11 | 
 12 | from eld.languageData import languageData
 13 | from eld.subsetResult import SubsetResult
 14 | 
 15 | 
 16 | class LanguageSubset:
 17 | 
 18 |     def __init__(self):
 19 |         self.subset = None
 20 |         self.default_ngrams = None
 21 |         self.loaded_subset = None
 22 | 
 23 |     def dynamic_lang_subset(self, languages):
 24 |         """
 25 |         Sets a subset, then detect() will filter the languages not included, from the scores with filterLangSubset()
 26 |         Call dynamic_lang_subset(None) to deactivate
 27 | 
 28 |         Args:
 29 |             languages (list or None): List of languages (ISO 639-1) to include in subset, or None to delete subset
 30 | 
 31 |         Returns:
 32 |             object SubsetResult: success (bool), languages (list or None), error (str or None)
 33 |         """
 34 |         self.subset = None
 35 |         if languages:
 36 |             self.subset = _make_subset(languages)
 37 |             if self.subset is None:
 38 |                 return SubsetResult(False, None, 'No language matched this set')
 39 |         return SubsetResult(True, _iso_languages(self.subset) if self.subset else None)
 40 | 
 41 |     def lang_subset(self, languages, save=True):
 42 |         """
 43 |          Sets a subset and removes the excluded languages form the ngrams database
 44 |          if $save option is true, the new ngrams subset will be stored, and cached for next time
 45 | 
 46 |         Args:
 47 |             languages (list or None): List of languages (ISO 639-1) to include in subset, or None to delete subset
 48 | 
 49 |         Returns:
 50 |             object SubsetResult: success (bool), languages (list or None), error (str or None), file (str)
 51 |         """
 52 |         if not languages:
 53 |             if self.loaded_subset and self.default_ngrams:
 54 |                 languageData.ngrams = copy.deepcopy(self.default_ngrams)
 55 |                 self.loaded_subset = None
 56 |             return SubsetResult(True)  # if there was already no subset to disable, it also is successful
 57 | 
 58 |         lang_array = _make_subset(languages)
 59 |         if not lang_array:
 60 |             return SubsetResult(False, None, 'No language matched this set')
 61 | 
 62 |         if self.default_ngrams is None:
 63 |             self.default_ngrams = copy.deepcopy(languageData.ngrams)
 64 | 
 65 |         langs_str = [str(lang) for lang in lang_array]
 66 |         new_subset = base16_to_base36(
 67 |             hashlib.sha1(','.join(langs_str).encode()).hexdigest()
 68 |         )
 69 |         file_name = 'ngrams' + languageData.type + '-' + str(len(lang_array)) + '_' + new_subset
 70 |         file_path = languageData.folder + 'subset/' + file_name + '.py'
 71 | 
 72 |         if self.loaded_subset != new_subset:
 73 |             self.loaded_subset = new_subset
 74 | 
 75 |             if os.path.exists(file_path):
 76 |                 # module = importlib.import_module('.ngrams.' + file_name, package=file_name)
 77 |                 spec = importlib.util.spec_from_file_location(file_name, file_path)
 78 |                 module = importlib.util.module_from_spec(spec)
 79 |                 spec.loader.exec_module(module)
 80 |                 languageData.ngrams = module.ngrams_data['ngrams']
 81 |                 if languageData.ngrams:
 82 |                     return SubsetResult(True, _iso_languages(lang_array), None, file_path)
 83 | 
 84 |             if self.default_ngrams != languageData.ngrams:
 85 |                 languageData.ngrams = copy.deepcopy(self.default_ngrams)
 86 | 
 87 |             for ngram, langsID in self.default_ngrams.items():
 88 |                 for lid, value in langsID.items():
 89 |                     if lid not in lang_array:
 90 |                         del languageData.ngrams[ngram][lid]
 91 |                 if not languageData.ngrams[ngram]:
 92 |                     del languageData.ngrams[ngram]
 93 | 
 94 |         saved = False
 95 |         if save:
 96 |             saved = _save_ngrams(file_path, lang_array)
 97 | 
 98 |         return SubsetResult(True, _iso_languages(lang_array), None, (file_name if saved else None))
 99 | 
100 |     def _filter_lang_subset(self, scores):
101 |         """Filters languages not included in the subset, from the result scores"""
102 |         sub_results = []
103 |         for score in scores:
104 |             if score[0] in self.subset:
105 |                 sub_results.append(score)
106 |         return sub_results
107 | 
108 | 
109 | def _ngram_export(data):
110 |     if isinstance(data, dict):
111 |         to_implode = []
112 |         for key, value in data.items():
113 |             to_implode.append(repr(key) + ':' + _ngram_export(value))
114 |         code = '{' + ','.join(to_implode) + '}'
115 |         return code
116 |     else:
117 |         return repr(data)
118 | 
119 | 
120 | def _save_ngrams(file_path, lang_array):
121 |     if not os.path.exists(file_path):  # in case self.loaded_subset != new_subset, and was previously saved
122 |         try:
123 |             with open(file_path, 'w') as f:
124 |                 f.write(
125 |                     '# Copyright 2023 Nito T.M. [ Apache 2.0 Licence https://www.apache.org/licenses/LICENSE-2.0 ]\n' +
126 |                     'ngrams_data = {\n' +
127 |                     '   "type": "' + str(languageData.type) + '",\n' +
128 |                     '   "languages": ' + str(_iso_languages(lang_array)) + ',\n' +
129 |                     '   "is_subset": True,\n' +
130 |                     '   "ngrams": ' + _ngram_export(languageData.ngrams) + '\n' +
131 |                     '}')
132 |         except Exception as e:
133 |             logging.exception(e)
134 |             return False
135 |     return True
136 | 
137 | 
138 | def _make_subset(languages):
139 |     """
140 |     Validates an expected array of ISO 639-1 language code strings, given by the user, and creates a subset of the valid
141 |     languages compared against the current database available languages
142 |     """
143 |     subset = []
144 |     reverse_langs = {v: k for k, v in languageData.lang_codes.items()}
145 |     if languages:
146 |         for lang in languages:
147 |             found_lang = reverse_langs.get(lang)
148 |             if found_lang is not None:
149 |                 subset.append(found_lang)
150 |         subset.sort()
151 |     return subset or None
152 | 
153 | 
154 | def _iso_languages(lang_set):
155 |     """Converts ngram database language indexes (integer) to ISO 639-1 code"""
156 |     lang_codes = {}
157 |     for lang_id in lang_set:
158 |         lang_codes[lang_id] = languageData.lang_codes[lang_id]
159 |     return lang_codes
160 | 
161 | 
162 | def base16_to_base36(hex_string):
163 |     # Convert hex string to integer
164 |     integer_value = int(hex_string, 16)
165 |     # Convert integer to base-36 string
166 |     base36_string = ''
167 |     while integer_value > 0:
168 |         integer_value, remainder = divmod(integer_value, 36)
169 |         base36_digit = '0123456789abcdefghijklmnopqrstuvwxyz'[remainder]
170 |         base36_string = base36_digit + base36_string
171 | 
172 |     return base36_string
173 | 


--------------------------------------------------------------------------------
/eld/resources/avg_score.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Nito T.M.
 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
 3 | # Author Nito T.M. (https://github.com/nitotm)
 4 | # Package pypi.org/project/eld/
 5 | 
 6 | # Average score of each language in a correct detection, done with an extended version of big-test benchmark.
 7 | avg_score = {'am': 0.0661, 'ar': 0.0237, 'az': 0.0269, 'be': 0.0227, 'bg': 0.0234, 'bn': 0.1373, 'ca': 0.0246,
 8 |              'cs': 0.0242, 'da': 0.0277, 'de': 0.0275, 'el': 0.0369, 'en': 0.0378, 'es': 0.0252, 'et': 0.0253,
 9 |              'eu': 0.0369, 'fa': 0.0213, 'fi': 0.026, 'fr': 0.0253, 'gu': 0.1197, 'he': 0.0402, 'hi': 0.0578,
10 |              'hr': 0.0201, 'hu': 0.0208, 'hy': 0.0439, 'is': 0.032, 'it': 0.0251, 'ja': 0.0375, 'ka': 0.1383,
11 |              'kn': 0.1305, 'ko': 0.0222, 'ku': 0.0256, 'lo': 0.3488, 'lt': 0.0246, 'lv': 0.0264, 'ml': 0.1322,
12 |              'mr': 0.0571, 'ms': 0.0251, 'nl': 0.0342, 'no': 0.0266, 'or': 0.1269, 'pa': 0.1338, 'pl': 0.0275,
13 |              'pt': 0.0252, 'ro': 0.0247, 'ru': 0.0184, 'sk': 0.024, 'sl': 0.0253, 'sq': 0.0353, 'sr': 0.0234,
14 |              'sv': 0.033, 'ta': 0.1513, 'te': 0.1547, 'th': 0.0882, 'tl': 0.0368, 'tr': 0.0258, 'uk': 0.0206,
15 |              'ur': 0.0282, 'vi': 0.0467, 'yo': 0.0329, 'zh': 0.0152}
16 | 
17 | # Deprecated for now: Some languages score higher with the same amount of text, this multiplier evens it out for
18 | #  multi-language strings
19 | # self.scoreNormalizer = [0.7, 1, 1, 1, 1, 0.6, 0.98, 1, 1, 1, 0.9, 1, 1, 1, 1, 1, 1, 1, 0.6, 1, 0.7, 1, 1, 0.9, 1, 1,
20 | # 0.8, 0.6, 0.6, 1, 1, 0.5, 1, 1, 0.6, 0.7, 1, 0.95, 1, 0.6, 0.6, 1, 1, 1, 1, 1, 1, 0.9, 1, 1, 0.6, 0.6, 0.7, 0.9, 1, 1,
21 | # 1, 0.8, 1, 1.7]
22 | 


--------------------------------------------------------------------------------
/eld/resources/ngrams/subset/ngramsM60-1_2rrx014rx6ypsas6tplo1gtcnmiv5mz.py:
--------------------------------------------------------------------------------
1 | # Copyright 2023 Nito T.M. [ Apache 2.0 Licence https://www.apache.org/licenses/LICENSE-2.0 ]
2 | ngrams_data = {
3 |    "type": "M60",
4 |    "languages": {11: 'en'},
5 |    "is_subset": True,
6 |    "ngrams": {b' p ':{11:5},b' the ':{11:576},b' a ':{11:197},b' of ':{11:262},b'tion ':{11:129},b'mber ':{11:16},b' in ':{11:193},b' ethi':{11:1},b' de ':{11:2},b' and ':{11:245},b' b ':{11:2},b'uary ':{11:3},b' f ':{11:1},b' c ':{11:2},b' marc':{11:3},b'anda ':{11:1},b' apar':{11:1},b' mill':{11:5},b' part':{11:17},b'ista':{11:2},b' prob':{11:6},b'stan ':{11:1},b' pros':{11:1},b'blem':{11:2},b' poli':{11:14},b'dent ':{11:9},b' mana':{11:6},b' tele':{11:2},b'llar ':{11:1},b' demo':{11:4},b' resp':{11:7},b' isla':{11:2},b' parl':{11:2},b' dedi':{11:1},b' indi':{11:8},b'imiz':{11:1},b' form':{11:7},b' plan':{11:7},b' inte':{11:27},b'enti':{11:10},b' sist':{11:1},b' doll':{11:1},b' regi':{11:8},b' yard':{11:1},b' amer':{11:5},b' depu':{11:1},b' hall':{11:1},b' isra':{11:1},b' s ':{11:7},b' i ':{11:33},b'isin':{11:1},b' fran':{11:3},b' ener':{11:5},b'nada ':{11:1},b'ular ':{11:6},b'orma':{11:7},b' medi':{11:7},b'land':{11:1},b' stan':{11:6},b'issi':{11:1},b' univ':{11:5},b' info':{11:7},b'vers':{11:7},b' dipl':{11:1},b' radi':{11:3},b' norm':{11:2},b' terr':{11:3},b'ment ':{11:45},b' bank':{11:2},b' m ':{11:4},b'pert':{11:3},b' real':{11:7},b' prin':{11:4},b' e ':{11:4},b' musi':{11:4},b'list ':{11:1},b' stat':{11:18},b'llin':{11:1},b' sala':{11:1},b' stra':{11:5},b' film':{11:1},b'blem ':{11:2},b' bala':{11:1},b' dire':{11:7},b'erne':{11:2},b' vers':{11:2},b' agen':{11:3},b' pull':{11:1},b' metr':{11:1},b'ular':{11:2},b'sion ':{11:21},b' inst':{11:10},b'entl':{11:2},b'demi':{11:1},b' gene':{11:9},b'pani':{11:3},b'mina':{11:3},b' mart':{11:1},b' aid ':{11:1},b' euro':{11:15},b'rmal ':{11:2},b' obam':{11:1},b' ukra':{11:1},b' r ':{11:1},b'edia ':{11:3},b'onal ':{11:23},b' alan ':{11:1},b'stit':{11:2},b'mpio':{11:1},b' faci':{11:3},b'titu':{11:3},b'ensi':{11:4},b' anal':{11:3},b'ndin':{11:3},b' real ':{11:3},b' foru':{11:1},b'imal ':{11:1},b' vari':{11:5},b' sent':{11:1},b'ndar':{11:4},b'eria':{11:3},b' on ':{11:70},b' mate':{11:3},b' prot':{11:6},b' stru':{11:3},b' tale':{11:1},b' inve':{11:6},b'eral ':{11:11},b'tand':{11:1},b' admi':{11:3},b'alis':{11:1},b' mark':{11:6},b'rnet ':{11:2},b'isti':{11:3},b'erna':{11:8},b' tran':{11:9},b' livi':{11:2},b'inis':{11:3},b' brit':{11:3},b'stra':{11:3},b' prof':{11:5},b'aniz':{11:3},b' post':{11:2},b' mode':{11:6},b'itin':{11:1},b' roma':{11:2},b'eren':{11:2},b'oses ':{11:1},b'orit':{11:4},b'itor':{11:2},b'rror ':{11:1},b'esti':{11:4},b' para':{11:2},b' h ':{11:1},b'bama ':{11:1},b' pote':{11:2},b' pers':{11:7},b' miss':{11:3},b' arti':{11:5},b'erin':{11:4},b' ad ':{11:1},b'ider ':{11:2},b'iona':{11:13},b' refe':{11:3},b' moni':{11:1},b' park':{11:1},b' ital':{11:2},b'andi':{11:1},b' bank ':{11:1},b'fess':{11:3},b'eral':{11:3},b' may ':{11:10},b' sess':{11:1},b' fede':{11:2},b' mini':{11:4},b' fest':{11:1},b'tiva':{11:1},b' anti':{11:1},b'ssor ':{11:1},b'tora':{11:1},b'pert ':{11:1},b'rban ':{11:1},b'muni':{11:8},b' vide':{11:2},b'rman ':{11:3},b'ilab':{11:4},b'inal ':{11:5},b' effe':{11:5},b'cula':{11:2},b' oper':{11:6},b' temp':{11:3},b' rest':{11:3},b'tist':{11:1},b' depa':{11:2},b' lond':{11:1},b' port':{11:3},b' dial':{11:1},b' mind':{11:1},b' assa':{11:1},b' t ':{11:1},b' hard':{11:1},b' huma':{11:4},b'edit ':{11:1},b'obal ':{11:2},b'logi':{11:3},b' men ':{11:2},b' seri':{11:4},b'rdin':{11:2},b'sion':{11:6},b' inci':{11:1},b' resu':{11:6},b' pres':{11:16},b'ress ':{11:7},b' can ':{11:22},b'tual ':{11:3},b' sena':{11:1},b' by ':{11:50},b'loud ':{11:1},b' soun':{11:2},b' per ':{11:3},b' no ':{11:13},b'ions ':{11:47},b'uest ':{11:1},b' esta':{11:3},b' cons':{11:19},b' comp':{11:28},b'ents ':{11:27},b' desp':{11:2},b' cont':{11:25},b' entr':{11:2},b' pass':{11:4},b' cata':{11:1},b' prim':{11:3},b' d ':{11:2},b' serv':{11:12},b' reco':{11:9},b' l ':{11:1},b' cent':{11:10},b'ants ':{11:6},b' prop':{11:8},b'ones ':{11:2},b' come':{11:2},b'rant ':{11:2},b'enta':{11:4},b'tant ':{11:6},b'side':{11:9},b' actu':{11:2},b' fina':{11:7},b'cies ':{11:4},b' soci':{11:7},b' gove':{11:8},b' expl':{11:4},b'lica':{11:6},b' conv':{11:6},b' sens':{11:3},b' havi':{11:2},b' conc':{11:8},b' espe':{11:2},b'cial ':{11:12},b' carr':{11:3},b' arri':{11:1},b' part ':{11:6},b'ense ':{11:4},b' asse':{11:3},b' dona':{11:1},b'ject':{11:4},b' dive':{11:1},b' expe':{11:12},b' qual':{11:3},b' fall':{11:1},b'ilit':{11:6},b' era ':{11:1},b'sent':{11:3},b' dema':{11:2},b' cost':{11:2},b'aris ':{11:1},b' impo':{11:7},b'vant ':{11:1},b'ctor ':{11:6},b' band':{11:1},b'iste':{11:4},b' conf':{11:7},b' muni':{11:1},b' util':{11:1},b'icip':{11:1},b'alit':{11:2},b'eria ':{11:1},b'ment':{11:9},b' mome':{11:1},b' dest':{11:2},b' situ':{11:2},b' proj':{11:9},b' rema':{11:4},b'ests ':{11:3},b' deci':{11:5},b' desc':{11:3},b' prov':{11:12},b' proc':{11:8},b'rior ':{11:2},b'able ':{11:19},b'tici':{11:5},b' dese':{11:1},b' assi':{11:3},b' web ':{11:1},b'ials ':{11:3},b'ries ':{11:14},b'aria ':{11:1},b' mili':{11:2},b' camp ':{11:1},b' prev':{11:4},b'port ':{11:11},b' prod':{11:10},b'ible ':{11:8},b'bles ':{11:2},b'istr':{11:2},b' nece':{11:2},b'nals ':{11:2},b' rela':{11:7},b'essi':{11:2},b' loca':{11:8},b' any ':{11:8},b' perm':{11:2},b' defe':{11:3},b' nove':{11:3},b' trac':{11:2},b'cers ':{11:1},b' disp':{11:3},b'uals ':{11:1},b'ures ':{11:7},b' cond':{11:5},b'orta':{11:6},b'rese':{11:4},b' majo':{11:4},b'sona':{11:3},b' poss':{11:5},b' cook':{11:1},b'eves ':{11:1},b' posi':{11:4},b'tors ':{11:8},b'anes ':{11:1},b' camp':{11:3},b' corr':{11:3},b'anta ':{11:1},b'otes ':{11:1},b' minu':{11:2},b' supe':{11:3},b'ells ':{11:2},b' ment':{11:2},b'oves ':{11:1},b' elec':{11:7},b' have':{11:1},b' trav':{11:2},b' seve':{11:7},b' visi':{11:5},b'ncip':{11:2},b' prog':{11:9},b' esti':{11:1},b'teri':{11:3},b' acti':{11:9},b' barr':{11:1},b' prem':{11:1},b'erio':{11:1},b' hist':{11:5},b' repr':{11:4},b' auto':{11:2},b' equi':{11:2},b'tinu':{11:6},b'pect':{11:2},b'ient ':{11:4},b'unts ':{11:1},b' peri':{11:3},b'ines ':{11:5},b' sant':{11:1},b' most':{11:1},b' inde':{11:3},b' cast':{11:1},b' pilo':{11:1},b'este':{11:1},b'ipal ':{11:1},b'ital ':{11:4},b' func':{11:2},b' crea':{11:7},b'nter':{11:1},b'arts ':{11:2},b' term':{11:3},b' tota':{11:3},b' diss':{11:1},b' cele':{11:2},b'anit':{11:1},b'cals ':{11:1},b'ecia':{11:2},b'cord ':{11:2},b' cult':{11:4},b'stru':{11:2},b' gran':{11:3},b'itat':{11:1},b'uest':{11:1},b'gram':{11:4},b'pora':{11:2},b'tics ':{11:4},b' comi':{11:2},b' pare':{11:2},b' subs':{11:3},b' plat':{11:2},b' decl':{11:2},b' mari':{11:2},b' dete':{11:3},b'duct':{11:6},b' labo':{11:2},b'rmat ':{11:1},b'ives ':{11:7},b' habi':{11:1},b'umen':{11:3},b'stic ':{11:4},b'epen':{11:2},b' econ':{11:6},b'trac':{11:2},b'sibl':{11:5},b' merc':{11:1},b'emes ':{11:1},b'ecta':{11:1},b' orga':{11:7},b'lars ':{11:1},b' disc':{11:7},b' caus':{11:3},b'rals ':{11:1},b'oral ':{11:1},b'nent ':{11:2},b'ebra':{11:1},b'ress':{11:4},b'mple ':{11:5},b'otal ':{11:2},b' prec':{11:2},b'blic ':{11:7},b' publ':{11:11},b' mani':{11:1},b'lant ':{11:1},b'ecto':{11:2},b' dime':{11:1},b'pons':{11:4},b'ides ':{11:4},b'imes ':{11:5},b' trip':{11:1},b'ivit':{11:4},b'rial ':{11:5},b'cles ':{11:2},b'orda':{11:1},b'iple ':{11:2},b'etes ':{11:1},b'ativ':{11:7},b' popu':{11:4},b'ural ':{11:5},b'posi':{11:1},b'sist':{11:2},b' quar':{11:2},b' mont':{11:5},b'vert':{11:1},b'icia':{11:4},b'rent ':{11:10},b' sect':{11:4},b' he ':{11:36},b' acce':{11:7},b' lega':{11:2},b' vehi':{11:2},b' incl':{11:10},b'ocia':{11:3},b'bert ':{11:1},b' volu':{11:2},b' prom':{11:4},b'stal ':{11:1},b' cris':{11:1},b'rans ':{11:1},b'ball ':{11:2},b'anti':{11:1},b'ecti':{11:8},b'ller ':{11:2},b' dist':{11:6},b'icle ':{11:3},b'ules ':{11:2},b'enda ':{11:1},b' futu':{11:4},b'tura':{11:1},b' pp ':{11:1},b'ific':{11:1},b'unit':{11:3},b'rers ':{11:1},b' acci':{11:1},b'rses ':{11:1},b' quan':{11:1},b' cand':{11:2},b'ners ':{11:4},b' nego':{11:1},b'para':{11:1},b' obje':{11:3},b'nant ':{11:1},b'ares ':{11:1},b' club ':{11:1},b' jose':{11:1},b' inco':{11:2},b'alls ':{11:2},b' unit':{11:5},b' moti':{11:1},b'lans ':{11:1},b'ntal ':{11:5},b'rito':{11:1},b' sign':{11:5},b'erso':{11:1},b'enge ':{11:1},b' rece':{11:8},b'ocal ':{11:5},b' just':{11:1},b'rica ':{11:2},b' obli':{11:1},b' memb':{11:8},b'ical':{11:3},b'dida':{11:1},b'icle':{11:1},b' circ':{11:2},b' deli':{11:3},b' defi':{11:4},b' tren':{11:1},b' patr':{11:1},b' cong':{11:2},b'atur':{11:2},b'cant ':{11:2},b'ntit':{11:1},b'stem ':{11:7},b'posa':{11:2},b'reta':{11:1},b' refo':{11:1},b'orti':{11:1},b'iali':{11:1},b'tals ':{11:1},b'fica':{11:1},b'erac':{11:1},b' febr':{11:1},b'pera':{11:3},b'orts ':{11:5},b'sibi':{11:2},b' capi':{11:2},b' deba':{11:1},b'ival ':{11:2},b' capa':{11:2},b'erse':{11:1},b' cart':{11:1},b' mass':{11:1},b' resi':{11:4},b'nden':{11:2},b' redu':{11:3},b' favo':{11:2},b'eres':{11:5},b'enes ':{11:1},b' trad':{11:5},b' refu':{11:2},b'gues ':{11:1},b' colo':{11:3},b'omes ':{11:4},b'ersi':{11:1},b' civi':{11:2},b' docu':{11:2},b' trib':{11:1},b'iden':{11:4},b' bene':{11:3},b' rive':{11:2},b' targ':{11:2},b' enti':{11:3},b'mics ':{11:1},b' ente':{11:3},b'cent':{11:1},b' fill':{11:1},b' asso':{11:3},b' extr':{11:3},b' impl':{11:3},b'iver ':{11:3},b'ctiv':{11:2},b'tral ':{11:2},b' expo':{11:2},b'sult':{11:1},b' clar':{11:1},b' arre':{11:1},b' desi':{11:6},b' esca':{11:1},b'lers ':{11:1},b' reso':{11:4},b'oles ':{11:1},b'mane':{11:1},b' educ':{11:4},b' natu':{11:4},b'ilia':{11:1},b'ites ':{11:2},b' secr':{11:2},b' fund':{11:4},b' prep':{11:2},b'nche':{11:1},b'lies ':{11:3},b' figu':{11:2},b' pape':{11:2},b'erve':{11:2},b' bomb':{11:1},b'ting':{11:2},b' fron':{11:2},b' cert':{11:3},b'erts ':{11:2},b'sses ':{11:5},b' home':{11:2},b' via ':{11:1},b'serv':{11:2},b' vall':{11:1},b' clas':{11:5},b'tent ':{11:4},b' perf':{11:6},b'erti':{11:2},b'ajor ':{11:3},b' cate':{11:1},b'olar ':{11:1},b' solu':{11:2},b'acit':{11:1},b'port':{11:4},b'ermi':{11:2},b'gent ':{11:1},b'cess':{11:4},b' exis':{11:3},b'iana ':{11:1},b'dent':{11:4},b'emen':{11:7},b'tori':{11:3},b' crit':{11:3},b'ania ':{11:1},b'firm':{11:1},b' pati':{11:2},b' vict':{11:2},b' repe':{11:1},b' soli':{11:1},b'udes ':{11:2},b'cret':{11:1},b'nifi':{11:2},b'peti':{11:2},b' legi':{11:2},b' priv':{11:3},b' set ':{11:4},b' ambi':{11:1},b' mobi':{11:2},b'stan':{11:3},b' germ':{11:3},b' urba':{11:1},b' comb':{11:3},b'mers ':{11:3},b' anim':{11:2},b' reti':{11:1},b'ctor':{11:1},b'aper ':{11:2},b'itar':{11:2},b' andr':{11:1},b' alte':{11:2},b' fami':{11:6},b'trib':{11:4},b'ivil ':{11:1},b'ames ':{11:4},b' repa':{11:1},b' domi':{11:1},b'tifi':{11:1},b' amon':{11:3},b'ecte':{11:4},b' incr':{11:6},b'cret ':{11:1},b' davi':{11:1},b'rles ':{11:1},b' refl':{11:1},b' sele':{11:2},b' expr':{11:2},b' fort':{11:1},b' abso':{11:1},b'init':{11:2},b'odel ':{11:2},b'taur':{11:1},b'egra':{11:2},b' exce':{11:3},b' viol':{11:2},b' me ':{11:5},b' deta':{11:2},b'ncie':{11:1},b'fici':{11:1},b' guar':{11:2},b'ases ':{11:4},b'cial':{11:2},b'nces ':{11:7},b'cans ':{11:1},b'alia ':{11:1},b' nega':{11:1},b' mort':{11:1},b' impr':{11:5},b'eran':{11:1},b'rica':{11:4},b'ilar ':{11:2},b'reme':{11:1},b' revi':{11:3},b'ient':{11:1},b' cana':{11:2},b'mans ':{11:1},b'ance':{11:2},b' assu':{11:1},b' idea ':{11:2},b'plet':{11:4},b'visi':{11:1},b'nics ':{11:1},b' home ':{11:5},b'onal':{11:2},b'tari':{11:1},b'lent ':{11:2},b' trum':{11:1},b' coll':{11:7},b'rump ':{11:1},b' mult':{11:3},b'rida ':{11:1},b'iant ':{11:1},b' retr':{11:1},b' envi':{11:4},b'resp':{11:1},b' requ':{11:7},b'trol ':{11:3},b'tenc':{11:1},b' sent ':{11:1},b'nits ':{11:1},b' insi':{11:2},b' pret':{11:1},b' iden':{11:3},b'gina':{11:3},b'ires ':{11:2},b'ster ':{11:7},b' fact':{11:3},b'essa':{11:2},b'ator':{11:1},b' muse':{11:1},b'avid ':{11:1},b' regu':{11:3},b'ogra':{11:1},b' reta':{11:1},b' movi':{11:2},b'sent ':{11:3},b' evid':{11:2},b'egor':{11:1},b'egal ':{11:2},b'erat':{11:5},b'abli':{11:3},b'evis':{11:1},b'elle':{11:2},b' usua':{11:2},b' marg':{11:1},b'cati':{11:4},b'aces ':{11:2},b' perc':{11:3},b'icul':{11:1},b' albe':{11:1},b'trol':{11:1},b'ical ':{11:21},b'ills ':{11:2},b' clie':{11:1},b' imme':{11:2},b' us ':{11:7},b' simp':{11:3},b' forc':{11:4},b'ncer ':{11:1},b'ales ':{11:2},b'ente':{11:2},b'cent ':{11:5},b'scri':{11:1},b' impe':{11:1},b' reve':{11:2},b' exec':{11:2},b' repu':{11:2},b'ntic ':{11:1},b'nies ':{11:3},b'nspo':{11:2},b'pita':{11:2},b'bers ':{11:6},b' exte':{11:4},b' conn':{11:4},b' exer':{11:1},b'abil':{11:2},b' coor':{11:1},b' base ':{11:1},b' ille':{11:1},b'sors ':{11:1},b' prio':{11:2},b'oper ':{11:1},b'test':{11:1},b' reno':{11:1},b'ican':{11:3},b' test':{11:2},b'orpo':{11:1},b' adve':{11:1},b'tion':{11:13},b' pref':{11:1},b'form':{11:4},b'gres':{11:2},b'olut':{11:2},b' elem':{11:2},b' arra':{11:1},b'pone':{11:1},b'itiv':{11:2},b' tria':{11:1},b'icie':{11:3},b' just ':{11:9},b' divi':{11:2},b' plac':{11:7},b'oria':{11:1},b'osen ':{11:1},b'mati':{11:2},b'temp':{11:1},b' insu':{11:1},b'ipan':{11:1},b' orig':{11:3},b'rant':{11:1},b'spec':{11:1},b' truc':{11:1},b' opin':{11:1},b' frui':{11:1},b'ture':{11:4},b' sati':{11:1},b' infl':{11:2},b' paci':{11:1},b'ties ':{11:17},b' aspe':{11:1},b' flor':{11:1},b'mals ':{11:1},b' grav':{11:1},b'ages ':{11:5},b'erpr':{11:2},b'ders ':{11:6},b'ates ':{11:13},b'ntif':{11:2},b'sump':{11:1},b' mand':{11:1},b' text ':{11:1},b'llen ':{11:1},b' dele':{11:1},b'gers ':{11:2},b'tene':{11:1},b' succ':{11:4},b' argu':{11:2},b'ubli':{11:2},b'tivi':{11:1},b' limi':{11:3},b'tica':{11:3},b'cept':{11:1},b'orme':{11:1},b'pers ':{11:2},b' agre':{11:3},b' lite':{11:1},b'ians ':{11:4},b'list':{11:1},b'tant':{11:1},b' ball':{11:1},b'plem':{11:1},b'ntil ':{11:4},b' depe':{11:2},b'lifi':{11:1},b' amen':{11:1},b' reac':{11:4},b'ticu':{11:3},b'tive':{11:1},b' angl':{11:1},b'ears ':{11:11},b'isla':{11:2},b'olor ':{11:1},b' moto':{11:1},b'ords ':{11:3},b' reca':{11:1},b'ital':{11:1},b' glob':{11:3},b' sovi':{11:1},b' cost ':{11:2},b'aria':{11:1},b'tral':{11:1},b' tend':{11:1},b'flic':{11:1},b'truc':{11:3},b' manu':{11:2},b' pate':{11:1},b'icat':{11:7},b' mora':{11:1},b'tter ':{11:8},b'anis':{11:3},b'ovel ':{11:1},b' hosp':{11:2},b'cuti':{11:1},b' edit':{11:2},b'edia':{11:2},b' susp':{11:1},b'lici':{11:1},b' data ':{11:5},b' vill':{11:1},b' modi':{11:1},b' past':{11:1},b'erns ':{11:1},b'leme':{11:3},b' obse':{11:2},b'pond':{11:2},b' infe':{11:1},b' reci':{11:1},b'ters ':{11:10},b'rodu':{11:2},b'anta':{11:1},b'cter ':{11:1},b'ting ':{11:42},b' digi':{11:2},b' inno':{11:3},b' revo':{11:1},b' rega':{11:2},b'rces ':{11:4},b' addi':{11:4},b' rese':{11:8},b'alle':{11:1},b'sics ':{11:1},b'igen':{11:1},b' infr':{11:1},b'ront ':{11:2},b'ustr':{11:4},b' sepa':{11:2},b'uctu':{11:2},b' port ':{11:1},b'rori':{11:1},b'igna':{11:1},b'onia ':{11:1},b'ssif':{11:1},b'\xe2\x80\x99s ':{11:31},b' elev':{11:1},b' impa':{11:2},b'utes ':{11:2},b'nect':{11:4},b' indu':{11:4},b'bili':{11:2},b' nota':{11:1},b'ssio':{11:1},b' simi':{11:2},b' decr':{11:1},b'pare':{11:2},b'entu':{11:1},b' alco':{11:1},b'veni':{11:1},b'gies ':{11:2},b'nsfo':{11:1},b' depo':{11:1},b' case':{11:2},b'erva':{11:1},b' has ':{11:28},b' cent ':{11:1},b'dame':{11:1},b'olog':{11:2},b' capt':{11:1},b' dani':{11:1},b'ivid':{11:3},b'ervi':{11:2},b' excl':{11:1},b' intr':{11:3},b' mile':{11:1},b'crip':{11:1},b' nucl':{11:1},b'uits ':{11:1},b'nari':{11:1},b'pens':{11:1},b'nten':{11:1},b' esse':{11:1},b' orde':{11:5},b' vent':{11:1},b' coop':{11:2},b' subm':{11:2},b'eces ':{11:1},b' insp':{11:2},b'otos ':{11:1},b'rees ':{11:1},b'lusi':{11:1},b'rovi':{11:1},b'nder ':{11:9},b'isio':{11:4},b' face':{11:2},b'tric':{11:3},b'tine':{11:1},b'nter ':{11:5},b' erro':{11:1},b'odes ':{11:1},b' comm':{11:25},b'arge ':{11:5},b' sexu':{11:1},b' adju':{11:1},b'ntat':{11:1},b'mula':{11:1},b'vent ':{11:4},b'ssia ':{11:1},b'ence':{11:3},b'rter ':{11:2},b'tabl':{11:1},b' magn':{11:1},b' adap':{11:1},b' sola':{11:1},b' calc':{11:1},b' base':{11:6},b' unit ':{11:1},b'inan':{11:1},b'uses ':{11:3},b' imag':{11:3},b' elim':{11:1},b' immi':{11:1},b'nomi':{11:4},b'nual ':{11:2},b'ctic':{11:4},b'lect':{11:4},b'ende':{11:4},b' hote':{11:1},b' arme':{11:1},b' barb':{11:1},b'orat':{11:3},b'enda':{11:2},b'rint ':{11:1},b' audi':{11:2},b' pale':{11:1},b'ples ':{11:2},b'ique ':{11:2},b' alex':{11:1},b'tent':{11:1},b'rati':{11:6},b' clim':{11:2},b'babl':{11:1},b'mits ':{11:1},b'gles ':{11:1},b'vers ':{11:2},b'icit':{11:1},b' acto':{11:1},b' reli':{11:3},b'secu':{11:1},b'ande':{11:1},b' robe':{11:1},b' prac':{11:4},b' ende':{11:1},b'tric ':{11:2},b'nfor':{11:1},b' rein':{11:1},b' advo':{11:1},b' cali':{11:1},b'rect':{11:1},b' deri':{11:1},b' mess':{11:2},b'book ':{11:1},b'inat':{11:2},b'eboo':{11:1},b'trat':{11:2},b' gall':{11:1},b'fere':{11:11},b'sten':{11:1},b' pred':{11:2},b'omin':{11:2},b'erim':{11:1},b'stin':{11:3},b' vita':{11:1},b' sing':{11:4},b' spor':{11:2},b'etic ':{11:2},b' orie':{11:1},b' tv ':{11:1},b'tall':{11:1},b'itor ':{11:1},b' corp':{11:2},b'clus':{11:1},b' enga':{11:1},b' card':{11:1},b' diag':{11:1},b' mone':{11:3},b'ipat':{11:2},b' doct':{11:1},b'pany ':{11:3},b' etc ':{11:1},b' ball ':{11:1},b' raci':{11:1},b'bina':{11:1},b' to ':{11:232},b' do ':{11:11},b' pro ':{11:1},b' k ':{11:1},b' co ':{11:2},b' u ':{11:2},b'oval ':{11:1},b' let ':{11:1},b' star':{11:8},b' stud':{11:11},b'itic':{11:5},b' ten ':{11:1},b'atel':{11:1},b' tech':{11:7},b'lice ':{11:3},b' sout':{11:5},b'ence ':{11:22},b'race ':{11:1},b' syst':{11:10},b'erst':{11:3},b' slav':{11:1},b'ance ':{11:19},b' stro':{11:3},b' spec':{11:9},b' eu ':{11:6},b' scho':{11:8},b'eman ':{11:1},b' list':{11:2},b' arch':{11:2},b'hoto ':{11:1},b'esto':{11:1},b'rove':{11:2},b'tice ':{11:3},b' brus':{11:1},b'onst':{11:2},b'tury ':{11:2},b'enty ':{11:1},b' mich':{11:1},b' host':{11:1},b'ince ':{11:6},b' my ':{11:9},b' char':{11:8},b'hnol':{11:4},b'ines':{11:6},b'vice ':{11:5},b'vide':{11:5},b' mist':{11:1},b'avel ':{11:1},b'ovat':{11:3},b'tory ':{11:9},b'hnic':{11:2},b' fire':{11:1},b'mise ':{11:1},b'hodo':{11:1},b'lity ':{11:14},b' obvi':{11:1},b'gram ':{11:3},b'lace ':{11:6},b'hite':{11:1},b' bran':{11:2},b' film ':{11:2},b'tner':{11:2},b'lati':{11:2},b' styl':{11:2},b' libe':{11:1},b' turn':{11:2},b' magi':{11:1},b'atic':{11:1},b'onic':{11:1},b' migr':{11:1},b' here':{11:1},b'rtin ':{11:1},b' psyc':{11:1},b'ivat':{11:1},b' text':{11:1},b' dram':{11:1},b'liza':{11:1},b' maxi':{11:1},b' logi':{11:1},b' spok':{11:1},b' stop':{11:1},b'aliz':{11:1},b'taly ':{11:1},b'omat':{11:1},b' top ':{11:3},b'rize ':{11:1},b' most ':{11:10},b'ateg':{11:3},b' seni':{11:1},b' symb':{11:1},b'ideo ':{11:2},b' atmo':{11:1},b' pane':{11:1},b' j ':{11:1},b'arty ':{11:3},b'vati':{11:1},b' link':{11:1},b'rice ':{11:2},b' repo':{11:7},b'line ':{11:5},b' prou':{11:1},b' spar':{11:1},b'ness ':{11:10},b' opti':{11:3},b' berl':{11:1},b'vity ':{11:2},b' farm':{11:1},b' stre':{11:5},b' chem':{11:1},b'oman ':{11:2},b'nese ':{11:2},b'rest ':{11:3},b'anic ':{11:1},b' semi':{11:1},b' afri':{11:2},b' nomi':{11:1},b' aust':{11:2},b'rity ':{11:8},b' onli':{11:2},b' new ':{11:16},b' line':{11:2},b'body ':{11:1},b' body ':{11:2},b' emai':{11:1},b'keti':{11:1},b' stab':{11:1},b'cept ':{11:3},b' busi':{11:6},b' fant':{11:1},b'dium ':{11:1},b'inar':{11:1},b' you ':{11:30},b'ered ':{11:11},b' plas':{11:1},b'mail ':{11:1},b' line ':{11:3},b' an ':{11:34},b' at ':{11:44},b' for ':{11:94},b' man ':{11:3},b'ning ':{11:23},b'fter ':{11:11},b'ange ':{11:8},b'nger ':{11:4},b'mmer ':{11:2},b' over ':{11:10},b'ring ':{11:26},b' over':{11:5},b'ille ':{11:2},b' unde':{11:11},b' her ':{11:11},b' stor':{11:7},b' bill':{11:2},b' have ':{11:34},b'ling ':{11:13},b'lder ':{11:2},b'ager ':{11:1},b' land':{11:2},b' find':{11:1},b' end ':{11:4},b' hold':{11:1},b'land ':{11:7},b'atio':{11:50},b' fore':{11:4},b'mark ':{11:1},b' side':{11:1},b'tore ':{11:1},b'amme ':{11:2},b' lang':{11:3},b' alle':{11:1},b'eter ':{11:1},b'gger ':{11:1},b' give':{11:4},b' hand':{11:3},b' dog ':{11:1},b'hold ':{11:1},b' god ':{11:1},b' chri':{11:3},b'rsda':{11:1},b'ogen ':{11:1},b'tere':{11:1},b'tand ':{11:2},b'mand ':{11:2},b'ften ':{11:4},b' stil':{11:5},b'tten ':{11:2},b' leve':{11:5},b' bedr':{11:1},b'ater ':{11:10},b'rker ':{11:1},b'ling':{11:1},b'ilie':{11:1},b'tive ':{11:21},b'pper ':{11:1},b' offe':{11:6},b'ader ':{11:2},b'deri':{11:1},b' afte':{11:12},b' pris':{11:1},b'ever ':{11:12},b' fire ':{11:2},b'over ':{11:3},b' pete':{11:1},b' give ':{11:3},b' side ':{11:2},b'tern':{11:1},b'itie':{11:3},b'dere':{11:1},b' sand':{11:1},b'ppen ':{11:1},b'ster':{11:1},b'vent':{11:2},b' hill':{11:1},b'lion':{11:1},b'rier ':{11:1},b' sund':{11:1},b' alli':{11:1},b'tage ':{11:4},b'embe':{11:5},b'berg ':{11:1},b'nner ':{11:2},b'ands ':{11:5},b' live':{11:3},b'vert ':{11:2},b' x ':{11:2},b'rmed ':{11:4},b'sted ':{11:8},b' time':{11:3},b' fast':{11:1},b'rket ':{11:3},b'ding ':{11:31},b' beha':{11:2},b'rger ':{11:1},b'dlin':{11:1},b'tian ':{11:1},b'ange':{11:1},b' dr ':{11:1},b' henr':{11:1},b' thom':{11:1},b' fast ':{11:1},b'arks ':{11:1},b'evel ':{11:4},b' far ':{11:2},b'uati':{11:2},b'deli':{11:1},b' nati':{11:9},b'uper ':{11:1},b' hold ':{11:1},b' anno':{11:2},b'ense':{11:1},b'erie':{11:4},b'olde':{11:1},b' janu':{11:2},b'iner ':{11:1},b'renc':{11:1},b'hael ':{11:1},b'rive ':{11:2},b'elin':{11:1},b' hund':{11:1},b'omas ':{11:1},b' roll':{11:1},b' bord':{11:1},b' form ':{11:3},b' g ':{11:2},b'mand':{11:1},b'ffer ':{11:2},b' traf':{11:1},b'tter':{11:1},b'sati':{11:3},b' skil':{11:1},b' befo':{11:6},b' job ':{11:2},b' week':{11:3},b' syri':{11:1},b'ming ':{11:7},b' scen':{11:2},b'diti':{11:5},b'raft ':{11:2},b'rmer ':{11:2},b' dece':{11:2},b'vide ':{11:3},b' land ':{11:2},b' budg':{11:2},b'temb':{11:2},b'rder ':{11:5},b' apri':{11:2},b'dred':{11:1},b' sept':{11:2},b'rand ':{11:1},b'nnin':{11:2},b'ings ':{11:12},b' stri':{11:3},b'pril ':{11:2},b'ding':{11:1},b'utio':{11:5},b' poin':{11:6},b'ober ':{11:2},b' jour':{11:2},b' chan':{11:10},b' augu':{11:2},b'rnal':{11:1},b'gust ':{11:1},b' even':{11:7},b'duce':{11:2},b'rson ':{11:3},b'mmin':{11:1},b' love':{11:1},b'vate ':{11:2},b' spon':{11:1},b'gdom ':{11:1},b' midd':{11:1},b' invi':{11:1},b'itio':{11:10},b' lett':{11:2},b'pute':{11:2},b'esse':{11:1},b'rked ':{11:3},b' john ':{11:2},b'rine ':{11:2},b'ivel':{11:1},b' scor':{11:2},b'tner ':{11:1},b' smar':{11:1},b'nsor':{11:1},b'side ':{11:4},b'reds ':{11:1},b'rner ':{11:1},b'rgen':{11:1},b'lled ':{11:9},b'lier ':{11:1},b' find ':{11:3},b'hone ':{11:2},b'ndle ':{11:1},b'liti':{11:2},b' loui':{11:1},b'over':{11:1},b' driv':{11:4},b'gion ':{11:2},b'form ':{11:3},b' mete':{11:1},b'oint ':{11:5},b' brin':{11:3},b'tart ':{11:3},b'omme':{11:1},b' russ':{11:2},b'rian ':{11:3},b' plan ':{11:2},b' rand':{11:1},b'igne':{11:2},b'mmes ':{11:1},b' spri':{11:1},b' init':{11:3},b'rver ':{11:1},b'tiat':{11:2},b'sive ':{11:6},b'teen ':{11:1},b'nges ':{11:4},b'rage ':{11:4},b'nald ':{11:1},b' offi':{11:8},b' fine ':{11:1},b'orte':{11:3},b'aste ':{11:1},b' mail ':{11:1},b'shed ':{11:7},b'teme':{11:1},b' holl':{11:1},b'ator ':{11:3},b' stee':{11:1},b' trus':{11:1},b'ight ':{11:20},b' jul ':{11:1},b'gest':{11:2},b'iver':{11:2},b' span':{11:1},b'epte':{11:1},b' litt':{11:3},b'ansi':{11:1},b' sex ':{11:1},b' adva':{11:3},b'rise ':{11:1},b'kend ':{11:1},b' begi':{11:3},b'uter ':{11:2},b'rnin':{11:1},b'igio':{11:2},b' will':{11:2},b'orge ':{11:1},b'tati':{11:3},b'etal ':{11:1},b' grad':{11:3},b'sign ':{11:2},b' time ':{11:14},b'nton ':{11:1},b'rete ':{11:1},b'vice':{11:4},b'miss':{11:5},b' engl':{11:3},b'tine ':{11:1},b'erse ':{11:1},b'mine':{11:1},b' sold':{11:1},b' affa':{11:1},b' is ':{11:109},b' red ':{11:1},b'test ':{11:2},b' meta':{11:1},b' supp':{11:11},b'view ':{11:3},b' math':{11:1},b'icer':{11:1},b'rist ':{11:1},b'ianc':{11:1},b'tfor':{11:1},b'gets ':{11:1},b' roya':{11:1},b' glas':{11:1},b'aves ':{11:1},b'orie':{11:1},b' pari':{11:1},b'otel ':{11:1},b' forg':{11:1},b'cher ':{11:2},b' it ':{11:59},b' rock':{11:1},b' led ':{11:2},b'oser ':{11:1},b'sing':{11:1},b'kers ':{11:3},b'pare ':{11:1},b'ping ':{11:5},b'lter ':{11:1},b' clin':{11:1},b'olin':{11:1},b'rter':{11:1},b'evan':{11:1},b'pire':{11:1},b' appe':{11:4},b'ther ':{11:38},b'dget ':{11:1},b'mart ':{11:1},b'sing ':{11:15},b'ards ':{11:7},b' smit':{11:1},b' spre':{11:1},b'lage ':{11:1},b'adio ':{11:1},b'lion ':{11:5},b' blac':{11:2},b' typi':{11:1},b' batt':{11:2},b'pete':{11:1},b'erre':{11:1},b' illu':{11:1},b'atus ':{11:1},b' geor':{11:1},b'male ':{11:1},b' post ':{11:2},b'sked ':{11:2},b' spir':{11:1},b' attr':{11:2},b'ndon ':{11:2},b' step':{11:1},b' am ':{11:3},b' so ':{11:13},b' war ':{11:3},b' ange':{11:1},b'urch ':{11:2},b' was ':{11:66},b' mach':{11:2},b'dern ':{11:2},b'iter ':{11:1},b'burg ':{11:1},b' euro ':{11:1},b'tern ':{11:3},b' rich':{11:1},b'ches ':{11:4},b' will ':{11:31},b' date':{11:1},b' welc':{11:1},b' kind':{11:1},b' also ':{11:23},b'sage ':{11:2},b' them':{11:3},b' sche':{11:2},b'orde':{11:1},b'lick ':{11:1},b'hter ':{11:2},b' word':{11:2},b'rade ':{11:3},b'iven ':{11:3},b'itte':{11:1},b' trai':{11:5},b'lose ':{11:3},b'ruck ':{11:1},b'cher':{11:2},b'mitt':{11:3},b'cker ':{11:1},b' fall ':{11:1},b'unte':{11:1},b' west':{11:2},b' webs':{11:2},b'orum ':{11:1},b' art ':{11:2},b'ssel':{11:1},b'ause ':{11:9},b'dest ':{11:1},b' chin':{11:3},b' wind':{11:2},b' team ':{11:4},b'fort ':{11:1},b'hern ':{11:2},b' name':{11:2},b'ntie':{11:1},b'ware ':{11:3},b'ders':{11:1},b' frie':{11:3},b' mann':{11:1},b'king ':{11:20},b' wide':{11:1},b' dama':{11:1},b'ison ':{11:2},b' hand ':{11:2},b' ster':{11:1},b' besi':{11:1},b' inse':{11:1},b'ache':{11:1},b' basi':{11:3},b'band ':{11:1},b'tein ':{11:1},b'chin':{11:3},b'ails ':{11:2},b' mast':{11:1},b'nion ':{11:3},b'here ':{11:26},b' appl':{11:7},b' beli':{11:4},b'nate ':{11:2},b' bein':{11:7},b' wint':{11:1},b'dows ':{11:1},b' soft':{11:2},b'udio ':{11:1},b' team':{11:1},b'agem':{11:3},b'dies ':{11:3},b' all ':{11:24},b'hina ':{11:1},b'hine ':{11:1},b' unio':{11:3},b'shop ':{11:1},b'rato':{11:1},b' late':{11:5},b'heri':{11:1},b' bad ':{11:1},b' shop ':{11:1},b' tour':{11:2},b'twar':{11:2},b'inin':{11:3},b' micr':{11:1},b'elli':{11:2},b' dres':{11:1},b'irls ':{11:1},b' wort':{11:1},b' japa':{11:1},b' live ':{11:2},b'ague ':{11:2},b' unab':{11:1},b'line':{11:1},b'rate':{11:2},b'inks ':{11:1},b' test ':{11:1},b'erwa':{11:1},b' st ':{11:3},b'dier':{11:1},b' news ':{11:3},b' ther':{11:22},b'eams ':{11:2},b'lied ':{11:2},b'date ':{11:2},b'site ':{11:3},b'aine ':{11:1},b' phil':{11:2},b'bile ':{11:1},b' warn':{11:1},b'erfa':{11:1},b' gold':{11:1},b' devo':{11:1},b' down':{11:3},b'endl':{11:1},b' tabl':{11:2},b'rate ':{11:6},b' upda':{11:1},b'lege ':{11:2},b'asis ':{11:2},b' idea':{11:2},b' drin':{11:1},b' girl':{11:1},b'tars ':{11:1},b' beac':{11:1},b' fans ':{11:1},b' brow':{11:1},b' bega':{11:2},b'hard ':{11:1},b'llen':{11:3},b' wild':{11:1},b' call':{11:6},b'aste':{11:1},b' kind ':{11:2},b'onds ':{11:1},b' main':{11:3},b'nloa':{11:1},b' play':{11:7},b'hase ':{11:2},b' leag':{11:1},b' bar ':{11:1},b' symp':{11:1},b'lass ':{11:3},b'rall ':{11:1},b'ayer ':{11:2},b'ends ':{11:3},b' game':{11:2},b' modu':{11:1},b' well':{11:1},b'erni':{11:1},b'atch ':{11:3},b'aren':{11:1},b'gnos':{11:1},b'nute ':{11:1},b'sequ':{11:2},b' powe':{11:6},b' rele':{11:4},b' star ':{11:1},b'dels ':{11:1},b' cham':{11:2},b'trum':{11:1},b'orld ':{11:7},b' theo':{11:2},b' meth':{11:3},b' spen':{11:2},b'ocus ':{11:2},b'hani':{11:2},b' date ':{11:2},b'load ':{11:1},b' matt':{11:3},b' tick':{11:1},b' main ':{11:4},b' effi':{11:2},b'icks ':{11:1},b' worl':{11:9},b'osit':{11:2},b' focu':{11:3},b' maga':{11:1},b'tend ':{11:1},b'omen ':{11:2},b'erwi':{11:1},b' name ':{11:3},b'ture ':{11:17},b' amaz':{11:1},b'play ':{11:1},b' bett':{11:3},b'tock ':{11:1},b' bloc':{11:2},b'dard ':{11:2},b'tems ':{11:4},b'epti':{11:2},b'nity ':{11:7},b'otic ':{11:1},b' code ':{11:1},b'nkin':{11:1},b' note':{11:1},b' gold ':{11:1},b' flex':{11:1},b'osph':{11:1},b'hops ':{11:1},b' thea':{11:1},b' news':{11:1},b' virt':{11:1},b' butt':{11:1},b' york ':{11:2},b' that ':{11:77},b' with ':{11:64},b' as ':{11:57},b' are ':{11:54},b' be ':{11:50},b' this ':{11:50},b' from ':{11:42},b' not ':{11:34},b' they ':{11:32},b'ally ':{11:30},b' or ':{11:30},b'ould ':{11:29},b' we ':{11:28},b' his ':{11:27},b'ated ':{11:27},b' but ':{11:25},b' thei':{11:25},b'heir ':{11:25},b' were ':{11:24},b' whic':{11:21},b'hich ':{11:21},b' one ':{11:20},b' more ':{11:20},b' othe':{11:18},b' coun':{11:18},b' said ':{11:18},b' been ':{11:17},b' had ':{11:17},b'ough ':{11:17},b'bout ':{11:17},b' abou':{11:17},b' its ':{11:16},b'hing ':{11:15},b' woul':{11:15},b' who ':{11:15},b'ound ':{11:15},b' thes':{11:14},b' she ':{11:14},b'hese ':{11:13},b' when ':{11:13},b' up ':{11:13},b' some ':{11:13},b' firs':{11:13},b'irst ':{11:13},b' peop':{11:13},b' two ':{11:13},b' into ':{11:12},b' out ':{11:12},b' if ':{11:12},b' our ':{11:12},b'ople ':{11:12},b' your ':{11:12},b'ving ':{11:12},b' what ':{11:12},b'cted ':{11:11},b' only ':{11:11},b' than ':{11:11},b' them ':{11:11},b' thro':{11:11},b' year':{11:11},b' work':{11:11},b' deve':{11:10},b' diff':{11:10},b' many ':{11:10},b' thin':{11:10},b'nted ':{11:10},b' year ':{11:10},b' like ':{11:10},b' beca':{11:10},b'arch ':{11:9},b'ased ':{11:9},b'\xe2\x80\x99t ':{11:9},b'elop':{11:9},b'ined ':{11:9},b' such ':{11:9},b'ying ':{11:9},b'ctio':{11:9},b' ever':{11:9},b' used ':{11:9},b' well ':{11:9},b' now ':{11:9},b' thre':{11:9},b' with':{11:9},b' how ':{11:9},b' grou':{11:8},b' work ':{11:8},b'nded ':{11:8},b'opea':{11:8},b' very ':{11:8},b'ious ':{11:8},b' use ':{11:8},b'ited ':{11:8},b'ices ':{11:8},b' wher':{11:8},b' coul':{11:8},b'ject ':{11:8},b' shou':{11:8},b'tate ':{11:8},b'cess ':{11:8},b'pean ':{11:8},b'eing ':{11:8},b'hose ':{11:8},b'fore ':{11:8},b' acco':{11:8},b'ntly ':{11:8},b'ween ':{11:8},b' then ':{11:7},b' betw':{11:7},b'arly ':{11:7},b'hile ':{11:7},b' duri':{11:7},b' righ':{11:7},b' agai':{11:7},b'wing ':{11:7},b' it\xe2\x80':{11:7},b' both ':{11:7},b'ects ':{11:7},b' made ':{11:7},b' whil':{11:7},b' buil':{11:7},b' appr':{11:7},b' grea':{11:7},b' cour':{11:7},b' even ':{11:7},b'earc':{11:7},b' get ':{11:7},b'hree ':{11:7},b'ernm':{11:7},b' make ':{11:7},b'oing ':{11:7},b' howe':{11:7},b' larg':{11:7},b' foun':{11:7},b' numb':{11:7},b' thos':{11:6},b' high ':{11:6},b'ower ':{11:6},b' good ':{11:6},b' some':{11:6},b'ship ':{11:6},b' way ':{11:6},b'rted ':{11:6},b' each ':{11:6},b' him ':{11:6},b'ease ':{11:6},b' day ':{11:6},b' city ':{11:6},b' foll':{11:6},b' seco':{11:6},b' know':{11:6},b' see ':{11:6},b'very ':{11:6},b'ssed ':{11:6},b' last ':{11:6},b'ists ':{11:6},b' need ':{11:6},b'reas':{11:6},b'reat ':{11:6},b'tial ':{11:6},b'tely ':{11:6},b'ging ':{11:6},b' clos':{11:6},b' life ':{11:6},b' sinc':{11:6},b'hout ':{11:6},b' chil':{11:6},b'come ':{11:6},b' thou':{11:6},b' same ':{11:6},b'logy ':{11:6},b'ught ':{11:6},b' take ':{11:6},b' much ':{11:6},b'hers ':{11:5},b'sity ':{11:5},b'pmen':{11:5},b' back ':{11:5},b'hool ':{11:5},b' issu':{11:5},b'tain ':{11:5},b' did ':{11:5},b' th ':{11:5},b'ized ':{11:5},b' allo':{11:5},b' high':{11:5},b' wate':{11:5},b'till ':{11:5},b' hous':{11:5},b'ntry ':{11:5},b' anot':{11:5},b' lead':{11:5},b' long ':{11:5},b'ulat':{11:5},b'ired ':{11:5},b'cond ':{11:5},b' here ':{11:5},b' that':{11:5},b' curr':{11:5},b' exam':{11:5},b' heal':{11:5},b'ised ':{11:5},b'ttle ':{11:5},b' oppo':{11:5},b'tain':{11:5},b' atte':{11:5},b' own ':{11:5},b'ency ':{11:5},b' know ':{11:5},b' earl':{11:5},b' arou':{11:5},b'ided ':{11:5},b'ergy ':{11:5},b" it's ":{11:5},b'tary ':{11:5},b' scie':{11:5},b'ward ':{11:5},b' mean':{11:5},b'cing ':{11:5},b'rope ':{11:5},b' want ':{11:5},b'ason ':{11:5},b' need':{11:5},b' must ':{11:5},b' off ':{11:4},b'uire':{11:4},b' beco':{11:4},b' area ':{11:4},b'ared ':{11:4},b' show':{11:4},b' writ':{11:4},b' smal':{11:4},b' game ':{11:4},b'roup ':{11:4},b' cita':{11:4},b'sure ':{11:4},b'inst ':{11:4},b'nced ':{11:4},b'oved ':{11:4},b' nort':{11:4},b'ived ':{11:4},b'ludi':{11:4},b'rned ':{11:4},b'ican ':{11:4},b' neve':{11:4},b' go ':{11:4},b' avai':{11:4},b'mily ':{11:4},b' help ':{11:4},b' gree':{11:4},b'self ':{11:4},b' shar':{11:4},b'outh ':{11:4},b'ghts ':{11:4},b' ofte':{11:4},b' down ':{11:4},b'ways ':{11:4},b'thin ':{11:4},b' auth':{11:4},b'ordi':{11:4},b'alth ':{11:4},b'osed ':{11:4},b' four ':{11:4},b'eady ':{11:4},b' goin':{11:4},b'ross ':{11:4},b'ract':{11:4},b'ield ':{11:4},b'llow ':{11:4},b'nown ':{11:4},b'ched ':{11:4},b' next ':{11:4},b'ouse ':{11:4},b'fied ':{11:4},b'ains ':{11:4},b'work ':{11:4},b'hink ':{11:4},b'owed ':{11:4},b'ldin':{11:4},b'ntri':{11:4},b'akes ':{11:4},b' too ':{11:4},b' happ':{11:4},b' free ':{11:4},b' ques':{11:4},b'nmen':{11:4},b' take':{11:4},b' does ':{11:4},b' old ':{11:4},b't\xe2\x80\x99':{11:4},b' citi':{11:4},b' best ':{11:4},b' clea':{11:4},b' fiel':{11:4},b' trea':{11:4},b'iron':{11:4},b'ific ':{11:4},b'stry ':{11:4},b'ried ':{11:4},b'stio':{11:4},b'ount ':{11:4},b' open ':{11:4},b' secu':{11:4},b' alon':{11:4},b' netw':{11:4},b' usin':{11:4},b' grow':{11:4},b' invo':{11:4},b'orth ':{11:4},b'iety ':{11:4},b'vely ':{11:4},b' alwa':{11:4},b' unti':{11:4},b'ther':{11:4},b' case ':{11:4},b'lish':{11:4},b'less ':{11:4},b'sday ':{11:4},b'omic ':{11:4},b' law ':{11:4},b' furt':{11:4},b'mall ':{11:3},b' shor':{11:3},b'ldre':{11:3},b' lear':{11:3},b'ured ':{11:3},b' valu':{11:3},b'\x80\x99re ':{11:3},b'licy ':{11:3},b' come ':{11:3},b' awar':{11:3},b' big ':{11:3},b' toda':{11:3},b'eeds ':{11:3},b'lude':{11:3},b'ints ':{11:3},b' hear':{11:3},b'atin':{11:3},b'oney ':{11:3},b' says ':{11:3},b' meet':{11:3},b'ucti':{11:3},b' show ':{11:3},b' few ':{11:3},b'dren ':{11:3},b' seas':{11:3},b'igat':{11:3},b'n\xe2\x80\x99':{11:3},b'oard ':{11:3},b'orks ':{11:3},b'imat':{11:3},b'ethe':{11:3},b' look':{11:3},b'reas ':{11:3},b'lish ':{11:3},b' i\xe2\x80\x99':{11:3},b'cifi':{11:3},b'ults ':{11:3},b'used ':{11:3},b'unty ':{11:3},b' brea':{11:3},b'uced ':{11:3},b' youn':{11:3},b'ortu':{11:3},b'zati':{11:3},b' got ':{11:3},b' read':{11:3},b' ligh':{11:3},b' food ':{11:3},b'eans ':{11:3},b'atic ':{11:3},b' left ':{11:3},b' full ':{11:3},b'rong ':{11:3},b' they':{11:3},b' play ':{11:3},b' took ':{11:3},b' why ':{11:3},b' toge':{11:3},b'olve':{11:3},b' lot ':{11:3},b' retu':{11:3},b' move':{11:3},b' thir':{11:3},b' don\xe2':{11:3},b' cove':{11:3},b'urse ':{11:3},b'oday ':{11:3},b'ishe':{11:3},b'ened ':{11:3},b'gain ':{11:3},b' empl':{11:3},b'fect ':{11:3},b'long ':{11:3},b'guag':{11:3},b' reas':{11:3},b'usic ':{11:3},b' boar':{11:3},b' chal':{11:3},b'unds ':{11:3},b' meas':{11:3},b' say ':{11:3},b'mmon ':{11:3},b'pose':{11:3},b'ored ':{11:3},b' held ':{11:3},b'ctur':{11:3},b' five ':{11:3},b' maki':{11:3},b'ools ':{11:3},b' area':{11:3},b' spac':{11:3},b' put ':{11:3},b' alth':{11:3},b'houg':{11:3},b'ngle ':{11:3},b'pted ':{11:3},b' summ':{11:3},b' chur':{11:3},b'mong ':{11:3},b'gned ':{11:3},b' wome':{11:3},b' week ':{11:3},b' plea':{11:3},b' feat':{11:3},b'mate ':{11:3},b'y\xe2\x80\x99':{11:3},b' rang':{11:3},b'anci':{11:3},b'urit':{11:3},b'east ':{11:3},b' alre':{11:3},b' atta':{11:3},b'gest ':{11:3},b'oach ':{11:3},b'lowi':{11:3},b'yers ':{11:3},b'uman ':{11:3},b'tted ':{11:3},b'usly ':{11:3},b'lves ':{11:3},b'iled ':{11:3},b'ieve ':{11:3},b' nigh':{11:3},b'roac':{11:3},b' addr':{11:3},b' acro':{11:3},b' phot':{11:3},b'lude ':{11:3},b' look ':{11:3},b' able ':{11:3},b' days ':{11:3},b' due ':{11:3},b'ncil ':{11:3},b'aken ':{11:3},b'eded ':{11:3},b' less ':{11:3},b'eath ':{11:3},b'cked ':{11:3},b' non ':{11:3},b' call ':{11:3},b'lved ':{11:3},b'hori':{11:3},b' deat':{11:3},b'fice ':{11:3},b'ctly ':{11:3},b' migh':{11:3},b"at's ":{11:3},b'erty ':{11:3},b' run ':{11:3},b' pric':{11:3},b'dded ':{11:3},b'pped ':{11:3},b' sour':{11:3},b'duce ':{11:3},b' open':{11:3},b' occu':{11:3},b' want':{11:3},b'ayed ':{11:3},b'iate ':{11:3},b'hird ':{11:3},b'rnal ':{11:3},b' came ':{11:3},b'oung ':{11:3},b' disa':{11:3},b'sues ':{11:3},b' love ':{11:3},b' term ':{11:3},b'pect ':{11:3},b'lear ':{11:3},b' teac':{11:2},b' surv':{11:2},b'ntai':{11:2},b"on't ":{11:2},b'ooks ':{11:2},b'oped ':{11:2},b'tudy ':{11:2},b'each ':{11:2},b'ieve':{11:2},b'uall':{11:2},b'ably ':{11:2},b' once ':{11:2},b'ully ':{11:2},b'nary ':{11:2},b' away ':{11:2},b'rary ':{11:2},b'ssue ':{11:2},b'aced ':{11:2},b'oned ':{11:2},b'sons ':{11:2},b'orms ':{11:2},b'ethi':{11:2},b'city ':{11:2},b'eive':{11:2},b' make':{11:2},b' spea':{11:2},b'pace ':{11:2},b'cuss':{11:2},b' engi':{11:2},b' whit':{11:2},b' cust':{11:2},b'came ':{11:2},b' leas':{11:2},b'yone ':{11:2},b' june ':{11:2},b' help':{11:2},b'sult ':{11:2},b'ourc':{11:2},b'ourt ':{11:2},b'aged ':{11:2},b' role ':{11:2},b' leav':{11:2},b'nday ':{11:2},b' outs':{11:2},b' cann':{11:2},b'most ':{11:2},b' air ':{11:2},b' enco':{11:2},b'ours ':{11:2},b'orce ':{11:2},b' key ':{11:2},b'uded ':{11:2},b' care':{11:2},b' whol':{11:2},b'late ':{11:2},b' didn':{11:2},b'tect':{11:2},b'riod ':{11:2},b' book ':{11:2},b' adde':{11:2},b' past ':{11:2},b'ease':{11:2},b'nish ':{11:2},b'reen ':{11:2},b' low ':{11:2},b' fact ':{11:2},b' we\xe2\x80':{11:2},b' spee':{11:2},b'manc':{11:2},b'rved ':{11:2},b'loye':{11:2},b'eted ':{11:2},b'uted ':{11:2},b' owne':{11:2},b'oted ':{11:2},b' care ':{11:2},b' belo':{11:2},b' read ':{11:2},b' keep ':{11:2},b' effo':{11:2},b'egan ':{11:2},b'artm':{11:2},b' repl':{11:2},b' almo':{11:2},b" don'":{11:2},b"er's ":{11:2},b' six ':{11:2},b' half ':{11:2},b' mr ':{11:2},b' done ':{11:2},b' road ':{11:2},b'fers ':{11:2},b'etim':{11:2},b'urce ':{11:2},b'ucts ':{11:2},b' july ':{11:2},b' towa':{11:2},b'rous ':{11:2},b' achi':{11:2},b' like':{11:2},b' died ':{11:2},b' yet ':{11:2},b' marr':{11:2},b'anks ':{11:2},b'crib':{11:2},b'nged ':{11:2},b'rove ':{11:2},b' phys':{11:2},b' memo':{11:2},b' safe':{11:2},b' clai':{11:2},b'liam':{11:2},b' west ':{11:2},b'\x80\x99ve ':{11:2},b' join':{11:2},b'erve ':{11:2},b'uage ':{11:2},b'edge ':{11:2},b'hole ':{11:2},b' guid':{11:2},b'ocra':{11:2},b'onsh':{11:2},b'rred ':{11:2},b' age ':{11:2},b'ngly ':{11:2},b' went ':{11:2},b' back':{11:2},b'wers ':{11:2},b'zing ':{11:2},b'lows ':{11:2},b'nths ':{11:2},b'sure':{11:2},b'eate ':{11:2},b'hips ':{11:2},b'sers ':{11:2},b' sust':{11:2},b'osts ':{11:2},b' enjo':{11:2},b'oups ':{11:2},b' sett':{11:2},b' lead ':{11:2},b' rais':{11:2},b'rent':{11:2},b' remo':{11:2},b'lete ':{11:2},b' fini':{11:2},b' chai':{11:2},b' enou':{11:2},b'buti':{11:2},b' figh':{11:2},b'tanc':{11:2},b'hort ':{11:2},b'nnot ':{11:2},b'ficu':{11:2},b' devi':{11:2},b' long':{11:2},b' head ':{11:2},b'ngth ':{11:2},b' crim':{11:2},b' site ':{11:2},b' than':{11:2},b'rded ':{11:2},b'sary ':{11:2},b' free':{11:2},b' stag':{11:2},b'rect ':{11:2},b' eith':{11:2},b' doub':{11:2},b' told ':{11:2},b' lowe':{11:2},b'ghte':{11:2},b' hard ':{11:2},b'inue ':{11:2},b'nomy ':{11:2},b'pply ':{11:2},b'gher ':{11:2},b' view ':{11:2},b' fail':{11:2},b'ough':{11:2},b' thus ':{11:2},b' ensu':{11:2},b' town ':{11:2},b'viou':{11:2},b'kets ':{11:2},b' octo':{11:2},b' rath':{11:2},b'gree ':{11:2},b'ptio':{11:2},b'\xe2\x80\x99m ':{11:2},b'cern':{11:2},b'main ':{11:2},b'lems ':{11:2},b'alue ':{11:2},b' kill':{11:2},b'iday ':{11:2},b' book':{11:2},b' broa':{11:2},b' abov':{11:2},b' seen ':{11:2},b'aine':{11:2},b'ises ':{11:2},b'bove ':{11:2},b'eign ':{11:2},b'uilt ':{11:2},b' fram':{11:2},b'ogni':{11:2},b' aver':{11:2},b'ntin':{11:2},b'mage ':{11:2},b'luen':{11:2},b' sugg':{11:2},b'eeme':{11:2},b' sear':{11:2},b' whet':{11:2},b' cros':{11:2},b' late ':{11:2},b' chec':{11:2},b'izen':{11:2},b'tish ':{11:2},b'duct ':{11:2},b'rely ':{11:2},b'ysis ':{11:2},b'owth ':{11:2},b' subj':{11:2},b'paig':{11:2},b'hite ':{11:2},b'ctri':{11:2},b' amou':{11:2},b'cate ':{11:2},b'ench ':{11:2},b'rict ':{11:2},b' list ':{11:2},b'refo':{11:2},b'reet ':{11:2},b'pact ':{11:2},b'erms ':{11:2},b' answ':{11:2},b'oyed ':{11:2},b'wled':{11:2},b' rule':{11:2},b'oice ':{11:2},b' floo':{11:2},b'dian ':{11:2},b' ever ':{11:2},b'lack ':{11:2},b' doin':{11:2},b'even ':{11:2},b' moun':{11:2},b'tead ':{11:2},b'acks ':{11:2},b' watc':{11:2},b'lysi':{11:2},b' suff':{11:2},b' taki':{11:2},b'ounc':{11:2},b' staf':{11:2},b'rain ':{11:2},b'denc':{11:2},b' east ':{11:2},b' win ':{11:2},b' annu':{11:2},b' fath':{11:2},b' degr':{11:2},b'merc':{11:2},b' seem':{11:2},b'iles ':{11:2},b' risk ':{11:2},b' head':{11:2},b'isit ':{11:2},b'amed ':{11:2},b' car ':{11:2},b'pose ':{11:2},b' purp':{11:2},b' park ':{11:2},b' cred':{11:2},b'dual ':{11:2},b'erta':{11:2},b'lian ':{11:2},b'ctro':{11:2},b'rown ':{11:2},b' laun':{11:2},b' equa':{11:2},b' move ':{11:2},b'arat':{11:2},b'inly ':{11:2},b' accu':{11:2},b' fren':{11:2},b"sn't ":{11:2},b'lowe':{11:2},b'rite ':{11:2},b'ract ':{11:2},b' emer':{11:2},b'ryth':{11:2},b' winn':{11:2},b' what':{11:2},b'ngin':{11:2},b' word ':{11:2},b' does':{11:2},b' face ':{11:2},b'iers ':{11:2},b' vote':{11:2},b'fety ':{11:2},b' type ':{11:2},b'rday ':{11:2},b'hare ':{11:2},b'acts ':{11:2},b'taff ':{11:2},b'tact ':{11:2},b' near ':{11:2},b' aske':{11:2},b' pay ':{11:2},b' talk':{11:2},b'zens ':{11:2},b'estm':{11:2},b'sian ':{11:2},b' lost ':{11:2},b' born ':{11:2},b' advi':{11:2},b' you\xe2':{11:2},b' quic':{11:2},b' eigh':{11:2},b' expa':{11:2},b'vels ':{11:2},b'thin':{11:2},b' satu':{11:2},b"on's ":{11:2},b' noth':{11:2},b' act ':{11:2},b' tax ':{11:2},b' goal ':{11:2},b'ount':{11:2},b' true ':{11:2},b'onth ':{11:2},b' scal':{11:2},b' feel ':{11:2},b'endi':{11:2},b'eved ':{11:2},b'onic ':{11:2},b' near':{11:2},b' moth':{11:2},b' runn':{11:2},b'oura':{11:2},b'rime ':{11:2},b' affe':{11:2},b'nior ':{11:2},b' adop':{11:2},b'oods ':{11:2},b'turn ':{11:2},b' pain':{11:2},b' room ':{11:2},b' rate ':{11:2},b'uite ':{11:2},b' vote ':{11:2},b' gett':{11:2},b' upon ':{11:2},b'inee':{11:2},b' foot':{11:2},b' view':{11:2},b'dual':{11:2},b'eful ':{11:2},b' heav':{11:2},b' frid':{11:2},b' self ':{11:2},b' leng':{11:2},b'hang':{11:2},b' itse':{11:2},b' behi':{11:2},b'hind ':{11:2},b' deal ':{11:2},b'mply ':{11:2},b'face ':{11:2},b' noti':{11:2},b'ryon':{11:2},b'tyle ':{11:2},b'ford ':{11:2},b'tire ':{11:2},b'oint':{11:2},b'pite ':{11:2},b'ddle ':{11:2},b'kely ':{11:2},b'eeks ':{11:2},b'idge ':{11:2},b' user':{11:2},b'elow ':{11:2},b' reme':{11:2},b' forw':{11:2},b'msel':{11:1},b' hour':{11:1},b'nned ':{11:1},b'time ':{11:1},b' eart':{11:1},b' acad':{11:1},b'rshi':{11:1},b'ghly ':{11:1},b'nued ':{11:1},b"dn't ":{11:1},b' squa':{11:1},b' pack':{11:1},b' peac':{11:1},b' stop ':{11:1},b' page ':{11:1},b' won ':{11:1},b'cult ':{11:1},b'racy ':{11:1},b'gton ':{11:1},b'ksho':{11:1},b' neig':{11:1},b'sful ':{11:1},b'ghbo':{11:1},b'ttee ':{11:1},b'iati':{11:1},b'tute ':{11:1},b' roun':{11:1},b'hows ':{11:1},b'mary ':{11:1},b'urda':{11:1},b' wide ':{11:1},b'ruar':{11:1},b'tome':{11:1},b'rful ':{11:1},b' ago ':{11:1},b' choi':{11:1},b' gas ':{11:1},b'atme':{11:1},b'inni':{11:1},b'quen':{11:1},b' army ':{11:1},b' libr':{11:1},b'rams ':{11:1},b' inju':{11:1},b'aign ':{11:1},b'ream ':{11:1},b' easi':{11:1},b' coac':{11:1},b'hild ':{11:1},b'stly ':{11:1},b' rout':{11:1},b' step ':{11:1},b' easy ':{11:1},b'rget ':{11:1},b'lect ':{11:1},b'arry ':{11:1},b' quit':{11:1},b' sure ':{11:1},b'rged ':{11:1},b'uble ':{11:1},b'lism ':{11:1},b' gave ':{11:1},b'ufac':{11:1},b' surf':{11:1},b' meet ':{11:1},b'nian ':{11:1},b' soon ':{11:1},b' freq':{11:1},b'ssur':{11:1},b" i'm ":{11:1},b'sica':{11:1},b' pict':{11:1},b'cove':{11:1},b' user ':{11:1},b'work':{11:1},b' hope ':{11:1},b'pris':{11:1},b'ypes ':{11:1},b' coup':{11:1},b' deca':{11:1},b' uk ':{11:1},b' enab':{11:1},b'aded ':{11:1},b'nabl':{11:1},b'many ':{11:1},b' topi':{11:1},b' matc':{11:1},b' size ':{11:1},b'more ':{11:1},b'sume':{11:1},b'mous ':{11:1},b' mond':{11:1},b' judg':{11:1},b' king ':{11:1},b' piec':{11:1},b' turn ':{11:1},b' try ':{11:1},b' beau':{11:1},b' thur':{11:1},b' exch':{11:1},b'reed ':{11:1},b'elop ':{11:1},b'read ':{11:1},b' scre':{11:1},b'unch ':{11:1},b'lock ':{11:1},b' dise':{11:1},b'iews ':{11:1},b'ntia':{11:1},b'duat':{11:1},b' type':{11:1},b' oil ':{11:1},b' mech':{11:1},b'uate ':{11:1},b'ibed ':{11:1},b'reer ':{11:1},b'nson ':{11:1},b'ppin':{11:1},b'onse ':{11:1},b' titl':{11:1},b'uild ':{11:1},b'rily ':{11:1},b'eads ':{11:1},b' tool':{11:1},b'oked ':{11:1},b'eate':{11:1},b'rote ':{11:1},b' hono':{11:1},b' hims':{11:1},b'fits ':{11:1},b' ways ':{11:1},b' tues':{11:1},b' east':{11:1},b'erly ':{11:1},b' wrot':{11:1},b'inds ':{11:1},b" 's ":{11:1},b"re's ":{11:1},b'eart ':{11:1},b'sual ':{11:1},b'cale ':{11:1},b'eave ':{11:1},b' sale':{11:1},b'tegy ':{11:1},b' wast':{11:1},b'embl':{11:1},b' brot':{11:1},b' phon':{11:1},b' rest ':{11:1},b' brid':{11:1},b'rack ':{11:1},b'hniq':{11:1},b'hood ':{11:1},b' king':{11:1},b'eive ':{11:1},b'earn ':{11:1},b'ienc':{11:1},b'call':{11:1},b' surr':{11:1},b' mean ':{11:1},b'reak ':{11:1},b' brou':{11:1},b'eers ':{11:1},b'airs ':{11:1},b' bloo':{11:1},b'ectu':{11:1},b're\xe2\x80':{11:1},b' canc':{11:1},b'akin':{11:1},b'erno':{11:1},b'vinc':{11:1},b'mewo':{11:1},b'rced ':{11:1},b' surp':{11:1},b'tack ':{11:1},b'cket ':{11:1},b'nsfe':{11:1},b' anyt':{11:1},b'tipl':{11:1},b'lain':{11:1},b' band ':{11:1},b' net ':{11:1},b' rati':{11:1},b' sea ':{11:1},b' saw ':{11:1},b" you'":{11:1},b'engt':{11:1},b'igns ':{11:1},b' crow':{11:1},b' bill ':{11:1},b'efit':{11:1},b' loss ':{11:1},b'uenc':{11:1},b'labo':{11:1},b'chas':{11:1},b'plex ':{11:1},b' clou':{11:1},b' hit ':{11:1},b'ndat':{11:1},b'appy ':{11:1},b' gard':{11:1},b'aker ':{11:1},b' daug':{11:1},b'ectl':{11:1},b'imed ':{11:1},b'isis ':{11:1},b' cath':{11:1},b'room ':{11:1},b'elds ':{11:1},b'town ':{11:1},b'ston ':{11:1},b'lues ':{11:1},b'heck ':{11:1},b'rast':{11:1},b' uniq':{11:1},b'shes ':{11:1},b' purc':{11:1},b'\x80\x99ll ':{11:1},b'lood ':{11:1},b'etty ':{11:1},b' appo':{11:1},b'imum ':{11:1},b' wash':{11:1},b' walk':{11:1},b' deal':{11:1},b'lley ':{11:1},b' woma':{11:1},b' bit ':{11:1},b' shap':{11:1},b' wedn':{11:1},b'enue ':{11:1},b'nesd':{11:1},b' pm ':{11:1},b' tell ':{11:1},b'luat':{11:1},b' eval':{11:1},b'lize':{11:1},b'cedu':{11:1},b' scot':{11:1},b' ship':{11:1},b' touc':{11:1},b'edul':{11:1},b' good':{11:1},b'rtin':{11:1},b'head ':{11:1},b' stoc':{11:1},b' cut ':{11:1},b' agri':{11:1},b' four':{11:1},b'oose ':{11:1},b'tect ':{11:1},b'ocat':{11:1},b' choo':{11:1},b' colu':{11:1},b'nnel ':{11:1},b'eory ':{11:1},b'tist ':{11:1},b'ltur':{11:1},b' avoi':{11:1},b'ntua':{11:1},b'reek ':{11:1},b' weig':{11:1},b' olde':{11:1},b'avio':{11:1},b' coas':{11:1},b'nism ':{11:1},b'mote ':{11:1},b' beyo':{11:1},b'tery ':{11:1},b' ii ':{11:1},b' abil':{11:1},b' cycl':{11:1},b'roun':{11:1},b'lays ':{11:1},b' danc':{11:1},b'bute':{11:1},b'hest ':{11:1},b'eats ':{11:1},b' wond':{11:1},b' morn':{11:1},b' phas':{11:1},b'tbal':{11:1},b'yond ':{11:1},b'plai':{11:1},b' talk ':{11:1},b'efit ':{11:1},b' mind ':{11:1},b'mely ':{11:1},b'back ':{11:1},b'uire ':{11:1},b'edly ':{11:1},b' clic':{11:1},b'eone ':{11:1},b' kids ':{11:1},b' chie':{11:1},b' gmt ':{11:1},b' yout':{11:1},b'hall ':{11:1},b' full':{11:1},b' exac':{11:1},b'nsla':{11:1},b' met ':{11:1},b'ipme':{11:1},b'sely ':{11:1},b'swer ':{11:1},b'aini':{11:1},b'ocks ':{11:1},b'heme ':{11:1},b' patt':{11:1},b'raph':{11:1},b'deas ':{11:1},b'tone ':{11:1},b'ndia ':{11:1},b'ardi':{11:1},b' wron':{11:1},b' tryi':{11:1},b'ffic ':{11:1},b'asio':{11:1},b'arth ':{11:1},b'rene':{11:1},b'eems ':{11:1},b'peed ':{11:1},b' jobs ':{11:1},b'wned ':{11:1},b'eals ':{11:1},b'road ':{11:1},b'asic ':{11:1},b' exci':{11:1},b' can\xe2':{11:1},b' wors':{11:1},b'hods ':{11:1},b' nov ':{11:1},b' dail':{11:1},b'grap':{11:1},b'usan':{11:1},b' uses ':{11:1},b' draw':{11:1},b' voic':{11:1},b'aily ':{11:1},b'vidi':{11:1},b' gues':{11:1},b' bott':{11:1},b' he\xe2\x80':{11:1},b'nues ':{11:1},b' ones ':{11:1},b'ckin':{11:1},b' lice':{11:1},b'text ':{11:1},b'olve ':{11:1},b'empt':{11:1},b' stak':{11:1},b' race ':{11:1},b' chap':{11:1},b'rney ':{11:1},b'erry ':{11:1},b' shal':{11:1},b'eace ':{11:1},b' feel':{11:1},b' poll':{11:1},b'eles':{11:1},b'wood ':{11:1},b'eets ':{11:1},b' ask ':{11:1},b' add ':{11:1},b'bate ':{11:1},b' acqu':{11:1},b'eard ':{11:1},b' goal':{11:1},b' shel':{11:1},b' boun':{11:1},b'lone ':{11:1},b'rust ':{11:1},b' stay ':{11:1},b'roxi':{11:1},b' exhi':{11:1},b' life':{11:1},b' nume':{11:1},b' john':{11:1},b'cide ':{11:1},b'thod ':{11:1},b' fres':{11:1},b'otia':{11:1},b'tuti':{11:1},b'erou':{11:1},b'lain ':{11:1},b'ixed ':{11:1},b'gine ':{11:1},b'ifie':{11:1},b'bled ':{11:1},b' whos':{11:1},b'emic ':{11:1},b'ogue ':{11:1},b'hief ':{11:1},b' wasn':{11:1},b' mino':{11:1},b'ckly ':{11:1},b' rule ':{11:1},b' san ':{11:1},b' wife ':{11:1},b'mari':{11:1},b'rism ':{11:1},b' suit':{11:1},b' lake ':{11:1},b' anyo':{11:1},b' cell':{11:1},b'iend ':{11:1},b'aphy ':{11:1},b' hall ':{11:1},b' shoo':{11:1},b'orse ':{11:1},b'hing':{11:1},b' shot ':{11:1},b' brok':{11:1},b' lack ':{11:1},b'osal ':{11:1},b'bute ':{11:1},b'urth ':{11:1},b'njoy ':{11:1},b'ewed ':{11:1},b' carb':{11:1},b'sn\xe2\x80':{11:1},b'nked ':{11:1},b' paid ':{11:1},b'lize ':{11:1},b'ifor':{11:1},b' birt':{11:1},b'rick ':{11:1},b'siti':{11:1},b' came':{11:1},b' goes ':{11:1},b'aint ':{11:1},b' felt ':{11:1},b' data':{11:1},b' trie':{11:1},b' albu':{11:1},b' rene':{11:1},b'tled ':{11:1},b' sign ':{11:1},b' adul':{11:1},b' mark ':{11:1},b' sayi':{11:1},b'agin':{11:1},b'tify ':{11:1},b"e're ":{11:1},b' buy ':{11:1},b'olds ':{11:1},b'pear ':{11:1},b' fish':{11:1},b'icin':{11:1},b'tude ':{11:1},b'urne':{11:1},b' evol':{11:1},b'redi':{11:1},b' stea':{11:1},b'ycle ':{11:1},b' note ':{11:1},b" we'r":{11:1},b'urat':{11:1},b' bigg':{11:1},b'essm':{11:1},b'ulti':{11:1},b' file':{11:1},b' huge ':{11:1},b'tity ':{11:1},b'craf':{11:1},b'pent ':{11:1},b'icer ':{11:1},b'lace':{11:1},b' rate':{11:1},b'htin':{11:1},b' laws ':{11:1},b' mass ':{11:1},b'togr':{11:1},b' heat ':{11:1},b'ault ':{11:1},b'haps ':{11:1},b'uple ':{11:1},b' chic':{11:1},b"en's ":{11:1},b' blue ':{11:1},b'ropr':{11:1},b' hope':{11:1},b' givi':{11:1},b'uing ':{11:1},b' obta':{11:1},b' doma':{11:1},b'ceed':{11:1},b' dang':{11:1},b'mica':{11:1},b'uran':{11:1},b'\xe2\x80\x99r':{11:1},b'pari':{11:1},b' airc':{11:1},b' gath':{11:1},b'rves ':{11:1},b' door ':{11:1},b'hown ':{11:1},b' perh':{11:1},b' weat':{11:1},b' drug ':{11:1},b'iall':{11:1},b'iece ':{11:1},b'olic ':{11:1},b' dome':{11:1},b' mayb':{11:1},b'tial':{11:1},b' join ':{11:1},b' rock ':{11:1},b' poor ':{11:1},b'wide ':{11:1},b'aybe ':{11:1},b'rnia ':{11:1},b'aled ':{11:1},b' fema':{11:1},b'loor ':{11:1},b' enha':{11:1},b'plie':{11:1},b' fun ':{11:1},b' airp':{11:1},b'itle ':{11:1},b'lped ':{11:1},b'mith ':{11:1},b' aim ':{11:1},b'oals ':{11:1},b'wise ':{11:1},b'uled ':{11:1},b' dyna':{11:1},b'ribe ':{11:1},b'ibly ':{11:1},b'ibit':{11:1},b'uare ':{11:1},b' flow':{11:1},b'dict':{11:1},b'pend ':{11:1},b'ious':{11:1},b' chos':{11:1},b'nist ':{11:1},b' sold ':{11:1},b'loym':{11:1},b'turi':{11:1},b' flig':{11:1},b' anti ':{11:1},b'sily ':{11:1},b' dead ':{11:1},b'opic ':{11:1},b' grap':{11:1},b' rapi':{11:1},b' sell':{11:1},b' drea':{11:1},b' paym':{11:1},b' wood':{11:1},b' turk':{11:1},b'bine':{11:1},b'cene ':{11:1},b'gure ':{11:1},b'eare':{11:1},b' tool ':{11:1},b' mayo':{11:1},b' mid ':{11:1},b' occa':{11:1},b' safe ':{11:1},b' kept ':{11:1},b' chea':{11:1},b' hors':{11:1},b' cere':{11:1},b'pics ':{11:1},b' core ':{11:1},b'izes ':{11:1},b' link ':{11:1},b' wall':{11:1},b"an't ":{11:1},b' corn':{11:1},b"an's ":{11:1},b" can'":{11:1},b'moti':{11:1},b' else ':{11:1},b'bing ':{11:1},b'fide':{11:1},b' recr':{11:1},b' unli':{11:1},b'nage ':{11:1},b'utif':{11:1},b' appa':{11:1},b' samp':{11:1},b' nine ':{11:1},b'sist ':{11:1},b'gins ':{11:1},b'sels ':{11:1},b'tinc':{11:1},b'iful ':{11:1},b'rges ':{11:1},b'pene':{11:1},b' seek':{11:1},b' pola':{11:1},b' fund ':{11:1},b'mony ':{11:1},b' hydr':{11:1},b'edom ':{11:1},b'lenc':{11:1},b' sale ':{11:1},b'empt ':{11:1},b'gate ':{11:1},b' trut':{11:1},b'phic ':{11:1},b'ongs ':{11:1},b' runs ':{11:1},b'oyal ':{11:1},b'ndly ':{11:1},b' brai':{11:1},b'lthy ':{11:1},b'itch ':{11:1},b' fift':{11:1},b'loso':{11:1},b'cise ':{11:1},b' pass ':{11:1},b'late':{11:1},b'nses ':{11:1},b'dary ':{11:1},b' site':{11:1},b' earn':{11:1},b'rrow ':{11:1},b'yees ':{11:1},b'amic ':{11:1},b' pick':{11:1},b'laim ':{11:1},b' pre ':{11:1},b'owin':{11:1},b' save ':{11:1},b' simu':{11:1},b'guis':{11:1},b' outl':{11:1},b' jack':{11:1},b'cine ':{11:1},b' item':{11:1},b'eavy ':{11:1},b' hour ':{11:1},b'care ':{11:1},b'mory ':{11:1},b' weap':{11:1},b' swit':{11:1},b' holi':{11:1},b'spap':{11:1},b'well ':{11:1},b'riat':{11:1},b' fair ':{11:1},b'lted ':{11:1},b' emph':{11:1},b'luti':{11:1},b' ulti':{11:1},b"y're ":{11:1},b' file ':{11:1},b' knew ':{11:1},b' emis':{11:1},b' keep':{11:1},b' heat':{11:1},b' deep ':{11:1},b'urre':{11:1},b'uide ':{11:1},b'erfu':{11:1},b' host ':{11:1},b'riag':{11:1},b'emed ':{11:1},b'dges ':{11:1},b' jame':{11:1},b'ancy ':{11:1},b' fait':{11:1},b'tail ':{11:1},b'oast ':{11:1},b' song ':{11:1},b' road':{11:1},b'sued ':{11:1},b'mple':{11:1},b'come':{11:1},b' song':{11:1},b'rvey ':{11:1},b' lati':{11:1},b' fish ':{11:1},b'keho':{11:1},b'oken ':{11:1},b'core ':{11:1},b'eech ':{11:1},b'uent ':{11:1},b'void ':{11:1},b'lary ':{11:1},b'olar':{11:1},b'west ':{11:1},b' seat':{11:1},b'ovie ':{11:1},b' depl':{11:1},b'cism ':{11:1},b'egin ':{11:1},b'aise ':{11:1},b' bull':{11:1},b' gain':{11:1},b' harm':{11:1},b'ooms ':{11:1},b' paul ':{11:1},b'iabl':{11:1},b' tell':{11:1},b'tles ':{11:1},b' stuf':{11:1},b'nica':{11:1},b'oads ':{11:1},b'eant ':{11:1},b'cape ':{11:1},b" he's ":{11:1},b'xist ':{11:1},b'lson ':{11:1},b'ssin':{11:1},b'hasi':{11:1},b'pate ':{11:1},b'mitm':{11:1},b'cast ':{11:1},b'deed ':{11:1},b'rbon ':{11:1},b' arts ':{11:1},b' sun ':{11:1},b'lite ':{11:1},b' send ':{11:1},b'rcis':{11:1},b' task ':{11:1},b' texa':{11:1},b'egat':{11:1},b'bour ':{11:1},b'ompa':{11:1},b'sals ':{11:1},b' hot ':{11:1},b'ruth ':{11:1},b'nsmi':{11:1},b' burn':{11:1},b' enfo':{11:1},b' ston':{11:1},b' aims ':{11:1},b'en\xe2\x80':{11:1},b' ice ':{11:1},b'seum ':{11:1},b' hunt':{11:1},b' brig':{11:1},b'r\xe2\x80\x99':{11:1},b'lict ':{11:1},b' rise ':{11:1},b'toms ':{11:1},b'izat':{11:1},b' ocea':{11:1},b' rank':{11:1},b' wall ':{11:1},b' seem ':{11:1},b' cell ':{11:1},b' feed':{11:1},b' quot':{11:1},b'oors ':{11:1},b' shif':{11:1},b'kgro':{11:1},b' deco':{11:1},b' outp':{11:1},b' savi':{11:1},b' hear ':{11:1},b'opes ':{11:1},b' cars ':{11:1},b' fuel ':{11:1},b' epis':{11:1},b'e\xe2\x80\x99':{11:1},b'reme ':{11:1},b'lean ':{11:1},b'egic ':{11:1},b' solv':{11:1},b'iage ':{11:1},b'lton ':{11:1},b'pire ':{11:1},b' fair':{11:1},b' harr':{11:1},b' warr':{11:1},b'gica':{11:1},b'rish ':{11:1},b'anes':{11:1},b' card ':{11:1},b' outc':{11:1},b'emon':{11:1},b'urns ':{11:1},b'like ':{11:1},b' ahea':{11:1},b'adia':{11:1},b' fell':{11:1},b'hlig':{11:1},b' dela':{11:1},b' chro':{11:1},b' gets ':{11:1},b'nsit':{11:1},b'aile':{11:1},b'sfer ':{11:1},b'abas':{11:1},b' debt ':{11:1},b' unco':{11:1},b'xual ':{11:1},b'owns ':{11:1},b' less':{11:1},b'nati':{11:1},b' unif':{11:1},b'uggl':{11:1},b' visu':{11:1},b'ario ':{11:1},b'clud':{11:1},b' tour ':{11:1},b' yes ':{11:1},b'holi':{11:1},b' farm ':{11:1},b' brie':{11:1},b'itud':{11:1},b'pens ':{11:1},b' draf':{11:1},b'htly ':{11:1},b'hank ':{11:1},b' shop':{11:1},b'exas ':{11:1},b'sman ':{11:1},b' walk ':{11:1},b'teps ':{11:1},b'uous ':{11:1},b'esis ':{11:1},b'azin':{11:1},b'oric ':{11:1},b' swed':{11:1},b'erag':{11:1},b'tuff ':{11:1},b'adca':{11:1},b' grow ':{11:1},b' laye':{11:1},b'iced ':{11:1},b' purs':{11:1},b'reci':{11:1},b'asts ':{11:1},b'ight':{11:1},b'isms ':{11:1},b'eous ':{11:1},b' inva':{11:1},b'mies ':{11:1},b' anyw':{11:1},b'tlan':{11:1},b'peak ':{11:1},b' inpu':{11:1},b' gone ':{11:1},b'dely ':{11:1},b' sequ':{11:1},b' usef':{11:1},b'lbum ':{11:1},b' sets ':{11:1},b' husb':{11:1},b'nite ':{11:1},b'mpus ':{11:1},b'logu':{11:1},b'cern ':{11:1},b'omer ':{11:1},b'tood ':{11:1},b' drop':{11:1},b' box ':{11:1},b' town':{11:1},b'rtly ':{11:1},b' your':{11:1},b'apan ':{11:1},b'efer ':{11:1},b'alks ':{11:1},b'thor ':{11:1},b' pitc':{11:1},b'eara':{11:1},b'icts ':{11:1},b'aith ':{11:1},b' spai':{11:1},b' taxe':{11:1},b'tmen':{11:1},b'ofit ':{11:1},b' poly':{11:1},b' cold ':{11:1},b' reje':{11:1},b'tise ':{11:1},b' labe':{11:1},b' ordi':{11:1},b' fune':{11:1},b' catc':{11:1},b' affo':{11:1},b' nice ':{11:1},b'alty ':{11:1},b'senc':{11:1},b'\xe2\x80\x99d ':{11:1},b'wner ':{11:1},b'lked ':{11:1},b' trou':{11:1},b'gory ':{11:1},b' narr':{11:1},b' unfo':{11:1},b'orce':{11:1},b'dule ':{11:1},b' murd':{11:1},b' slee':{11:1},b'cuss ':{11:1},b' shee':{11:1},b'ucin':{11:1},b'ulti ':{11:1},b'mise':{11:1},b'take ':{11:1},b'xtra ':{11:1},b'asur':{11:1},b'lure ':{11:1},b'liam ':{11:1},b'mbly ':{11:1},b' clot':{11:1},b' hill ':{11:1},b' biol':{11:1},b'ayor ':{11:1},b'rnam':{11:1},b'ishm':{11:1},b'kage ':{11:1},b'cipl':{11:1},b'eled ':{11:1},b'ilms ':{11:1},b' feet ':{11:1},b'ulty ':{11:1},b' trip ':{11:1},b'igra':{11:1},b' scar':{11:1},b'pter ':{11:1},b'axes ':{11:1},b' hone':{11:1},b' wait':{11:1},b"u're ":{11:1},b'aigh':{11:1},b'udge ':{11:1},b' aggr':{11:1},b' priz':{11:1},b' gain ':{11:1},b' flow ':{11:1},b'lief ':{11:1},b'atin ':{11:1},b' toug':{11:1},b' miss ':{11:1},b'cure ':{11:1},b'mall':{11:1},b'iest ':{11:1},b' rura':{11:1},b' aren':{11:1},b'wish ':{11:1},b' cash ':{11:1},b'ompl':{11:1},b' hung':{11:1},b' anci':{11:1},b' soil ':{11:1},b' slow':{11:1},b'cian':{11:1},b' guy ':{11:1},b' bodi':{11:1},b'idat':{11:1},b'days ':{11:1},b'rael ':{11:1},b' mole':{11:1},b'aril':{11:1},b' oct ':{11:1},b'dden ':{11:1},b"nt's ":{11:1},b'icti':{11:1},b'down ':{11:1},b' famo':{11:1},b' athl':{11:1},b' mexi':{11:1},b' risk':{11:1},b'pain ':{11:1},b' foot ':{11:1},b'anel ':{11:1},b'hema':{11:1},b'rupt':{11:1},b'lume ':{11:1},b' nor ':{11:1},b' fash':{11:1},b'nels ':{11:1},b'tras':{11:1},b'rsel':{11:1},b' chee':{11:1},b' jewi':{11:1},b' cutt':{11:1},b' male ':{11:1},b' sacr':{11:1},b'ttom ':{11:1},b'irit ':{11:1},b'only ':{11:1},b'ness':{11:1},b'base ':{11:1},b'oops ':{11:1},b'gged ':{11:1},b' caro':{11:1},b'imit ':{11:1},b' deni':{11:1},b'ldwi':{11:1},b'aths ':{11:1},b'eece ':{11:1},b' rich ':{11:1},b' worr':{11:1},b' more':{11:1},b'ilot ':{11:1},b' atla':{11:1},b'irth ':{11:1},b'eaks ':{11:1},b'eove':{11:1},b'gari':{11:1},b' swee':{11:1},b'chol':{11:1},b' empi':{11:1},b' cruc':{11:1},b' tree ':{11:1},b'cian ':{11:1},b'ewer ':{11:1},b'onen':{11:1},b'hape ':{11:1},b' vete':{11:1},b' bask':{11:1},b' atto':{11:1},b'bers':{11:1},b' deep':{11:1},b'nied ':{11:1},b'preh':{11:1},b' dark ':{11:1},b'ousl':{11:1},b' slig':{11:1},b'orne':{11:1},b'itim':{11:1},b' quee':{11:1},b'resh ':{11:1},b' rail':{11:1},b'ibes ':{11:1},b' phar':{11:1},b' asia ':{11:1},b'gage ':{11:1},b'urin':{11:1},b' path ':{11:1},b'oute ':{11:1},b' emot':{11:1},b'anet ':{11:1},b' mine':{11:1},b'gely ':{11:1},b'otin':{11:1},b'part ':{11:1},b' smok':{11:1},b"ne's ":{11:1},b'ecul':{11:1},b' latt':{11:1},b' remi':{11:1},b'cate':{11:1},b'monl':{11:1},b'mmit ':{11:1},b' ran ':{11:1},b'ophy ':{11:1},b' bear':{11:1},b' stev':{11:1},b'erab':{11:1},b'tlem':{11:1},b' whom ':{11:1},b'ommo':{11:1},b'uces ':{11:1},b'gard ':{11:1},b'vior ':{11:1},b' vice ':{11:1},b' sell ':{11:1},b' guys ':{11:1},b'eter':{11:1},b'aims ':{11:1},b' wish ':{11:1},b' towe':{11:1},b' wood ':{11:1},b'dure ':{11:1},b'ndow ':{11:1},b'ribu':{11:1},b'mond ':{11:1},b' comf':{11:1},b'ival':{11:1},b' bay ':{11:1},b'seng':{11:1},b'ikes ':{11:1},b'ntow':{11:1},b'gero':{11:1},b'rybo':{11:1},b' phen':{11:1},b'ride ':{11:1},b' boys ':{11:1},b'nium ':{11:1},b' nd ':{11:1},b'enth ':{11:1},b' neit':{11:1},b' map ':{11:1},b' troo':{11:1},b' fixe':{11:1},b'ruly ':{11:1},b'aved ':{11:1},b' wind ':{11:1},b' bus ':{11:1},b'cade ':{11:1},b' tast':{11:1},b' stay':{11:1},b'ntry':{11:1},b' surg':{11:1},b' dead':{11:1},b' fear ':{11:1},b' page':{11:1},b' weal':{11:1},b'roke ':{11:1},b' aris':{11:1},b' trul':{11:1},b'uent':{11:1},b'hine':{11:1},b' igno':{11:1},b'hens':{11:1},b'aped ':{11:1},b'elps ':{11:1},b'rifi':{11:1},b'roud ':{11:1},b'erce ':{11:1},b'sier ':{11:1},b' onto ':{11:1},b'tton ':{11:1},b' abus':{11:1},b' gend':{11:1},b' voca':{11:1},b'uard ':{11:1},b'rugs ':{11:1},b' silv':{11:1},b'fted ':{11:1},b'ccur ':{11:1},b'raph ':{11:1},b' eye ':{11:1},b' mary ':{11:1},b' tree':{11:1},b'lery ':{11:1},b'zine ':{11:1},b' eyes ':{11:1},b'nput ':{11:1},b'umer ':{11:1},b'qual ':{11:1},b' taug':{11:1},b' nurs':{11:1},b' scri':{11:1},b' spli':{11:1},b' ceme':{11:1},b'dged ':{11:1},b' isn\xe2':{11:1},b'othe':{11:1},b'hain ':{11:1},b' wave':{11:1},b'sory ':{11:1},b'fiel':{11:1},b'nshi':{11:1},b'terd':{11:1},b' rain':{11:1},b'isks ':{11:1},b'olid ':{11:1},b'phic':{11:1},b'ewhe':{11:1},b' last':{11:1},b' ms ':{11:1},b' aven':{11:1},b'ught':{11:1},b'eale':{11:1},b'ainm':{11:1},b'epts ':{11:1},b'riti':{11:1},b"ry's ":{11:1},b' heri':{11:1},b' endo':{11:1},b' sher':{11:1},b'gmen':{11:1},b'orho':{11:1},b'istm':{11:1},b' enem':{11:1},b'oots ':{11:1},b'abor ':{11:1},b'tile ':{11:1},b'ouch ':{11:1},b'hair ':{11:1},b'eply ':{11:1},b'derf':{11:1},b'cean ':{11:1},b' ship ':{11:1},b' cras':{11:1},b' depr':{11:1},b' dism':{11:1},b' craf':{11:1},b'odat':{11:1},b' vary ':{11:1},b' caug':{11:1},b'ldn\xe2':{11:1},b' fell ':{11:1},b'nize ':{11:1},b'gnal ':{11:1},b'troy':{11:1},b' irel':{11:1},b'play':{11:1},b'icit ':{11:1},b' virg':{11:1},b'uick ':{11:1},b'teve':{11:1},b'cope ':{11:1},b'iams ':{11:1},b"ty's ":{11:1},b' wine ':{11:1},b' lose ':{11:1},b' cast ':{11:1},b'osin':{11:1},b'enly ':{11:1},b'rnor ':{11:1},b' scan':{11:1},b'toco':{11:1},b' girl ':{11:1},b'pons ':{11:1},b'ntum ':{11:1},b' yard ':{11:1},b' grew ':{11:1},b'itag':{11:1},b' push':{11:1},b' sain':{11:1},b"ia's ":{11:1},b'nows ':{11:1},b' boy ':{11:1},b' voti':{11:1},b'viet ':{11:1},b'ncil':{11:1},b' soph':{11:1},b'rons ':{11:1},b' atti':{11:1},b'ound':{11:1},b' neut':{11:1},b'onor ':{11:1},b'pher ':{11:1},b'ifth ':{11:1},b'tmas ':{11:1},b' door':{11:1},b' harv':{11:1},b' fit ':{11:1},b' path':{11:1},b'tput ':{11:1},b'mble ':{11:1},b'etry ':{11:1},b' whee':{11:1},b'cott ':{11:1},b'tche':{11:1},b' affi':{11:1},b' pain ':{11:1},b'shin':{11:1},b' firm ':{11:1},b' twen':{11:1},b' liqu':{11:1},b'osis ':{11:1},b'died ':{11:1},b'rmor':{11:1},b'stle ':{11:1},b' unle':{11:1},b' hori':{11:1},b' acts ':{11:1},b' eat ':{11:1},b'hted ':{11:1},b'uabl':{11:1},b'reat':{11:1},b' city':{11:1},b'ranc':{11:1},b' pray':{11:1},b'gued ':{11:1},b'peni':{11:1},b'irds ':{11:1},b'umed ':{11:1},b' uppe':{11:1},b'eels ':{11:1},b' kitc':{11:1},b'sess':{11:1},b'ghtl':{11:1},b' code':{11:1},b'ceme':{11:1},b' rota':{11:1},b"le's ":{11:1},b'ript ':{11:1},b' drop ':{11:1},b' juni':{11:1},b' wire':{11:1},b' shak':{11:1},b'isor':{11:1},b' shoc':{11:1},b' emba':{11:1},b' anth':{11:1},b'rway ':{11:1},b'rsal ':{11:1},b' cree':{11:1},b'seho':{11:1},b'iary ':{11:1},b'nous ':{11:1},b'rast ':{11:1},b'ishi':{11:1},b' jesu':{11:1},b'rton ':{11:1},b'demy ':{11:1},b'uity ':{11:1},b' heig':{11:1},b' synt':{11:1},b'repr':{11:1},b'sets ':{11:1},b'ceiv':{11:1},b'kins ':{11:1},b' edge ':{11:1},b' beat ':{11:1},b' tack':{11:1},b'nome':{11:1},b'ssic':{11:1},b'eful':{11:1},b' neur':{11:1},b'rash ':{11:1},b'tees ':{11:1},b'lthc':{11:1},b'eric ':{11:1},b' mixe':{11:1},b'lane ':{11:1},b" i've ":{11:1},b' rd ':{11:1},b'ilar':{11:1},b' sixt':{11:1},b'sume ':{11:1},b'hion ':{11:1},b' nano':{11:1},b'rowd ':{11:1},b'deal ':{11:1},b' mars':{11:1},b'orry ':{11:1},b' crew ':{11:1},b' zero ':{11:1},b'hors ':{11:1},b'rmac':{11:1},b' wait ':{11:1}}
7 | }


--------------------------------------------------------------------------------
/eld/subsetResult.py:
--------------------------------------------------------------------------------
 1 | # Copyright 2023 Nito T.M.
 2 | # License https://www.apache.org/licenses/LICENSE-2.0 Apache-2.0
 3 | # Author Nito T.M. (https://github.com/nitotm)
 4 | # Package pypi.org/project/eld/
 5 | 
 6 | class SubsetResult:
 7 |     def __init__(self, success, languages=None, error=None, file=None):
 8 |         self.success = success
 9 |         self.languages = list(languages.values()) if languages else None
10 |         self.error = error
11 |         self.file = file
12 | 


--------------------------------------------------------------------------------
/eld/tests/test_detector.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import os
 3 | import sys
 4 | 
 5 | # Make sure, local package is imported instead of pip package
 6 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 7 | sys.path.insert(0, project_root)  # prioritize the local package
 8 | # sys.path.append('../..')
 9 | 
10 | from eld import LanguageDetector
11 | from eld.languageDetector import get_clean_txt
12 | 
13 | 
14 | # Mostly functional testing, when functions are more mature I will add some more unit tests
15 | 
16 | def test_print_version():
17 |     detector = LanguageDetector()
18 |     print('ELD ver. ' + detector.VERSION)
19 |     assert True
20 | 
21 | 
22 | def test_load_eld():
23 |     detector = LanguageDetector()
24 |     assert isinstance(detector, LanguageDetector)
25 | 
26 | 
27 | def test_simple_detect():
28 |     detector = LanguageDetector()
29 |     result = detector.detect('Hola, cómo te llamas?').language
30 |     assert result == 'es'
31 | 
32 | 
33 | def test_get_multiple_scores():
34 |     detector = LanguageDetector()
35 |     detector.return_scores = True
36 |     result = len(detector.detect('Hola, cómo te llamas?').scores())
37 |     assert result > 1, 'Expected: >1 scores'
38 | 
39 | 
40 | def test_detect_error_empty_text():
41 |     detector = LanguageDetector()
42 |     result = detector.detect('').language
43 |     assert result is None
44 | 
45 | 
46 | def test_clean_text():
47 |     text = 'https://www.google.com/\n' \
48 |            'mail@gmail.com\n' \
49 |            'oogle.com/search?q=search&source=hp\n' \
50 |            '12345 A12345\n'
51 |     result = get_clean_txt(text).strip()
52 |     assert result == ''
53 | 
54 | 
55 | def test_check_confidence():
56 |     detector = LanguageDetector('ngramsM60')
57 |     text = 'zxz zcz zvz zbz znz zmz zlz zsz zdz zkz zjz pelo'
58 |     result = detector.detect(text).is_reliable()
59 |     assert result is False
60 | 
61 | 
62 | def test_load_ngrams_detect():
63 |     detector = LanguageDetector('ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd')
64 |     result = detector.detect('Hola, cómo te llamas?').language
65 |     assert result == 'es'
66 | 
67 | 
68 | def test_accuracy_m_bigtest():
69 |     # TODO use importlib or pathlib to open txt file as package eld.tests.data resource
70 |     detector = LanguageDetector('ngramsM60')
71 |     file = open( os.path.dirname(__file__) + '/data/big-test.txt', encoding='utf-8')
72 |     # '../../benchmark/big-test.txt'
73 |     content = file.read()
74 |     file.close()
75 |     lines = content.strip().split("\n")
76 |     total = 0
77 |     correct = 0
78 |     for line in lines:
79 |         total += 1
80 |         values = line.split("\t")
81 |         if detector.detect(values[1]).language == values[0]:
82 |             correct += 1
83 |     if total < 60000:
84 |         pytest.fail('big-test.txt was not load correctly, too few lines')
85 |     result = correct / total * 100
86 |     # a bit of margin, depending on tie scores order, avg. might change a bit
87 |     assert result > 99.4
88 | 
89 | # python -m pytest -v -s test_detector.py
90 | # if __name__ == '__main__':
91 | #    pytest.main(["-v", "test_detector.py"])  # Gives errors
92 | 


--------------------------------------------------------------------------------
/eld/tests/test_subset.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import sys
 3 | import os
 4 | 
 5 | # Make sure, local package is imported instead of pip package
 6 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
 7 | sys.path.insert(0, project_root)  # prioritize the local package
 8 | # sys.path.append('../..')
 9 | 
10 | from eld import LanguageDetector
11 | 
12 | 
13 | # Mostly functional testing, when functions are more mature I will add some more unit tests
14 | 
15 | def test_load_eld():
16 |     detector = LanguageDetector()
17 |     assert isinstance(detector, LanguageDetector)
18 | 
19 | 
20 | def test_dynamic_subset_detect():
21 |     detector = LanguageDetector()
22 |     lang_subset = ['en']
23 |     detector.dynamic_lang_subset(lang_subset)
24 |     result = len(detector.detect('How are you? Bien, gracias').scores())
25 |     assert result == 1, 'Expected: 1 score, subset of only one language'
26 | 
27 | 
28 | def test_remove_dynamic_subset():
29 |     detector = LanguageDetector()
30 |     lang_subset = ['en']
31 |     detector.dynamic_lang_subset(lang_subset)
32 |     detector.dynamic_lang_subset(None)
33 |     result = len(detector.detect('How are you? Bien, gracias').scores())
34 |     assert result > 1
35 | 
36 | 
37 | def test_subset_detect():
38 |     detector = LanguageDetector()
39 |     lang_subset = ['en']
40 |     detector.lang_subset(lang_subset)
41 |     result = len(detector.detect('How are you? Bien, gracias').scores())
42 |     assert result == 1, 'Expected: 1 score, subset of only one language'
43 | 
44 | 
45 | def test_remove_subset():
46 |     detector = LanguageDetector()
47 |     lang_subset = ['en']
48 |     detector.lang_subset(lang_subset)
49 |     detector.lang_subset(None)
50 |     result = len(detector.detect('How are you? Bien, gracias').scores())
51 |     assert result > 1
52 | 
53 | 
54 | def test_save_subset_file():
55 |     # TODO use importlib or pathlib to check subset file as package resource
56 |     file = os.path.dirname(__file__) + '/../resources/ngrams/subset/ngramsM60-1_2rrx014rx6ypsas6tplo1gtcnmiv5mz.py'
57 |     if os.path.exists(file):
58 |         os.remove(file)
59 |     detector = LanguageDetector()
60 |     lang_subset = ['en']
61 |     detector.lang_subset(lang_subset)
62 |     result = os.path.exists(file)
63 |     assert result is True, 'Subset languages file Not saved: ' + file
64 | 
65 | 
66 | def test_load_ngrams_detect():
67 |     detector = LanguageDetector('ngramsM60-6_5ijqhj4oecs310zqtm8u9pgmd9ox2yd')
68 |     result = detector.detect('Hola, cómo te llamas?').language
69 |     assert result == 'es'
70 | 


--------------------------------------------------------------------------------
/misc/sentences_avg_py.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/nitotm/efficient-language-detector-py/ce666a0355d5ca972779e0777c534f5895b067e4/misc/sentences_avg_py.png


--------------------------------------------------------------------------------
/misc/table_accuracy_py.svg:
--------------------------------------------------------------------------------
  1 | <svg xmlns='http://www.w3.org/2000/svg' font-family='Arial, Helvetica, sans-serif' font-size='17' viewBox='0 0 600 241'>
  2 |   <path d='M0 0h600v350H0z' fill='#fff'/>
  3 |   <text fill='#555' transform='translate(30 16)'>
  4 |     <tspan x='0' y='0'>Accuracy</tspan>
  5 |   </text>
  6 |   <text transform='translate(4 52)'>Nito-ELD</text>
  7 |   		<path d='M126 30h93v30H126z' fill='#dfd'/>
  8 | 	<text transform='translate(132 52)'>99.3%</text>
  9 | 		<path d='M219 30h93v30H219z' fill='#dfd'/>
 10 | 	<text transform='translate(225 52)'>99.4%</text>
 11 | 		<path d='M312 30h93v30H312z' fill='#dfd'/>
 12 | 	<text transform='translate(318 52)'>98.8%</text>
 13 | 		<path d='M405 30h93v30H405z' fill='#fee'/>
 14 | 	<text transform='translate(411 52)'>87.6%</text>
 15 | 		<path d='M498 30h102v30H498z' fill='#fee'/>
 16 | 	<text transform='translate(504 52)'>73.3%</text>
 17 | 	
 18 | 	 <text transform='translate(4 82)'>Nito-ELD-L</text>
 19 |   		<path d='M126 60h93v30H126z' fill='#dfd'/>
 20 | 	<text transform='translate(132 82)'>99.4%</text>
 21 | 		<path d='M219 60h93v30H219z' fill='#dfd'/>
 22 | 	<text transform='translate(225 82)'>99.4%</text>
 23 | 		<path d='M312 60h93v30H312z' fill='#dfd'/>
 24 | 	<text transform='translate(318 82)'>98.7%</text>
 25 | 		<path d='M405 60h93v30H405z' fill='#fee'/>
 26 | 	<text transform='translate(411 82)'>89.6%</text>
 27 | 		<path d='M498 60h102v30H498z' fill='#fee'/>
 28 | 	<text transform='translate(504 82)'>76.4%</text> 
 29 | 	
 30 |   <text transform='translate(4 112)'>Lingua</text><text fill='#08e' font-size='12' transform='translate(54 102)'>1</text>
 31 |     	<path d='M126 90h93v30H126z' fill='#dfd'/>
 32 | 	<text transform='translate(132 112)'>98.8%</text>
 33 | 		<path d='M219 90h93v30H219z' fill='#dfd'/>
 34 | 	<text transform='translate(225 112)'>99.1%</text>
 35 | 		<path d='M312 90h93v30H312z' fill='#dfd'/>
 36 | 	<text transform='translate(318 112)'>98.6%</text>
 37 | 		<path d='M405 90h93v30H405z' fill='#fec'/>
 38 | 	<text transform='translate(411 112)'>93.1%</text>
 39 | 		<path d='M498 90h102v30H498z' fill='#fee'/>
 40 | 	<text transform='translate(504 112)'>80.0%</text>
 41 | 	
 42 |   <text transform='translate(4 142)'>CLD2</text><text fill='#08e' font-size='12' transform='translate(49 132)'>2</text>
 43 | 		<path d='M126 120h93v30H126z' fill='#fec'/>
 44 | 	<text transform='translate(132 142)'>93.8%</text>
 45 | 		<path d='M219 120h93v30H219z' fill='#ffc'/>
 46 | 	<text transform='translate(225 142)'>97.2%</text>
 47 | 		<path d='M312 120h93v30H312z' fill='#ffc'/>
 48 | 	<text transform='translate(318 142)'>97.2%</text>
 49 | 		<path d='M405 120h93v30H405z' fill='#fee'/>
 50 | 	<text transform='translate(411 142)'>87.7%</text>
 51 | 		<path d='M498 120h102v30H498z' fill='#eee'/>
 52 | 	<text transform='translate(504 142)'>69.6%</text>
 53 | 	
 54 |   <text transform='translate(4 172)'>Lingua low</text><text fill='#08e' font-size='12' transform='translate(85 162)'>1</text>
 55 | 		<path d='M126 150h93v30H126z' fill='#ffc'/>
 56 | 	<text transform='translate(132 172)'>96.0%</text>
 57 | 		<path d='M219 150h93v30H219z' fill='#ffc'/>
 58 | 	<text transform='translate(225 172)'>97.2%</text>
 59 | 		<path d='M312 150h93v30H312z' fill='#ffc'/>
 60 | 	<text transform='translate(318 172)'>96.3%</text>
 61 | 		<path d='M405 150h93v30H405z' fill='#fee'/>
 62 | 	<text transform='translate(411 172)'>83.7%</text>
 63 | 		<path d='M498 150h102v30H498z' fill='#eee'/>
 64 | 	<text transform='translate(504 172)'>68.0%</text>  
 65 | 	
 66 |   <text transform='translate(4 202)'>CLD3</text><text fill='#08e' font-size='12' transform='translate(49 192)'>2</text>
 67 | 		<path d='M126 180h93v30H126z' fill='#fec'/>
 68 | 	<text transform='translate(132 202)'>92.2%</text>
 69 | 		<path d='M219 180h93v30H219z' fill='#ffc'/>
 70 | 	<text transform='translate(225 202)'>95.8%</text>
 71 | 		<path d='M312 180h93v30H312z' fill='#fec'/>
 72 | 	<text transform='translate(318 202)'>94.7%</text>
 73 | 		<path d='M405 180h93v30H405z' fill='#eee'/>
 74 | 	<text transform='translate(411 202)'>69.0%</text>
 75 | 		<path d='M498 180h102v30H498z' fill='#eee'/>
 76 | 	<text transform='translate(504 202)'>51.5%</text>    
 77 | 	
 78 |   <text transform='translate(4 232)'>franc</text>
 79 | 		<path d='M126 210h93v30H126z' fill='#fee'/>
 80 | 	<text transform='translate(132 232)'>89.8%</text>
 81 | 		<path d='M219 210h93v30H219z' fill='#fec'/>
 82 | 	<text transform='translate(225 232)'>92.0%</text>
 83 | 		<path d='M312 210h93v30H312z' fill='#fec'/>
 84 | 	<text transform='translate(318 232)'>90.5%</text>
 85 | 		<path d='M405 210h93v30H405z' fill='#eee'/>
 86 | 	<text transform='translate(411 232)'>65.9%</text>
 87 | 		<path d='M498 210h102v30H498z' fill='#eee'/>
 88 | 	<text transform='translate(504 232)'>52.9%</text>   
 89 | 	  
 90 | 	<g fill='#e0e0e0'>
 91 | 	  <path d='M0 0h600v1H0z'/>
 92 | 	  <path d='M0 30h600v1H0z'/>
 93 | 	  <path d='M0 60h600v1H0z'/>
 94 | 	  <path d='M0 90h600v1H0z'/>
 95 | 	  <path d='M0 120h600v1H0z'/>
 96 | 	  <path d='M0 150h600v1H0z'/>
 97 | 	  <path d='M0 180h600v1H0z'/>
 98 | 	  <path d='M0 210h600v1H0z'/>
 99 | 	  <path d='M0 240h600v1H0z'/>
100 | 	</g>
101 | 	<g x='0' y='0' font-size='16' font-weight='700'>
102 | 	  <text transform='translate(130 21)'>
103 | 		 <tspan>Tweets</tspan>
104 | 	  </text>
105 | 	  <text transform='translate(223 21)'>
106 | 		 <tspan>Big test</tspan>
107 | 	  </text>
108 | 	  <text transform='translate(316 21)'>
109 | 		 <tspan>Sentences</tspan>
110 | 	  </text>
111 | 	  <text transform='translate(409 21)'>
112 | 		 <tspan>Word pairs</tspan>
113 | 	  </text>
114 | 	  <text font-size='15.4' transform='translate(502 21)'>
115 | 		 <tspan>Single words</tspan>
116 | 	  </text>
117 | 	</g>
118 |   <path d='M0 0h1v240H0zm126 0h1v240h-1zm93 0h1v240h-1zm93 0h1v240h-1zm93 0h1v240h-1zm93 0h1v240h-1zm101 0h1v240h-1zM0 240h600v1H0z' fill='#e0e0e0'/>
119 | </svg>


--------------------------------------------------------------------------------
/misc/table_time_py.svg:
--------------------------------------------------------------------------------
  1 | <svg xmlns='http://www.w3.org/2000/svg' font-family='Arial, Helvetica, sans-serif' font-size='17' viewBox='0 0 600 271'>
  2 |   <path d='M0 0h600v350H0z' fill='#fff'/>
  3 |   <text fill='#555' transform='translate(30 16)'>
  4 |     <tspan x='0' y='0'>Seconds</tspan>
  5 |   </text>
  6 |   <text transform='translate(4 52)'>Nito-ELD-py</text>
  7 |   		<path d='M126 30h93v30H126z' fill='#ffc'/>
  8 | 	<text transform='translate(132 52)'>0.96"</text>
  9 | 		<path d='M219 30h93v30H219z' fill='#ffc'/>
 10 | 	<text transform='translate(225 52)'>7.8"</text>
 11 | 		<path d='M312 30h93v30H312z' fill='#ffc'/>
 12 | 	<text transform='translate(318 52)'>6.7"</text>
 13 | 		<path d='M405 30h93v30H405z' fill='#ffc'/>
 14 | 	<text transform='translate(411 52)'>2.6"</text>
 15 | 		<path d='M498 30h102v30H498z' fill='#ffc'/>
 16 | 	<text transform='translate(504 52)'>2.1"</text>
 17 | 	
 18 | 	 <text transform='translate(4 82)'>Nito-ELD-L-py</text>
 19 |   		<path d='M126 60h93v30H126z' fill='#ffc'/>
 20 | 	<text transform='translate(132 82)'>1"</text>
 21 | 		<path d='M219 60h93v30H219z' fill='#ffc'/>
 22 | 	<text transform='translate(225 82)'>8"</text>
 23 | 		<path d='M312 60h93v30H312z' fill='#ffc'/>
 24 | 	<text transform='translate(318 82)'>6.9"</text>
 25 | 		<path d='M405 60h93v30H405z' fill='#ffc'/>
 26 | 	<text transform='translate(411 82)'>2.7"</text>
 27 | 		<path d='M498 60h102v30H498z' fill='#ffc'/>
 28 | 	<text transform='translate(504 82)'>2.1"</text>   
 29 | 	
 30 |   <text transform='translate(4 112)'>Lingua</text>
 31 |     	<path d='M126 90h93v30H126z' fill='#eee'/>
 32 | 	<text transform='translate(132 112)'>4790"</text>
 33 | 		<path d='M219 90h93v30H219z' fill='#eee'/>
 34 | 	<text transform='translate(225 112)'>24000"</text>
 35 | 		<path d='M312 90h93v30H312z' fill='#eee'/>
 36 | 	<text transform='translate(318 112)'>18700"</text>
 37 | 		<path d='M405 90h93v30H405z' fill='#eee'/>
 38 | 	<text transform='translate(411 112)'>8450"</text>
 39 | 		<path d='M498 90h102v30H498z' fill='#eee'/>
 40 | 	<text transform='translate(504 112)'>6700"</text>
 41 | 	
 42 |   <text transform='translate(4 142)'>CLD2</text>
 43 | 		<path d='M126 120h93v30H126z' fill='#dfd'/>
 44 | 	<text transform='translate(132 142)'>0.35"</text>
 45 | 		<path d='M219 120h93v30H219z' fill='#dfd'/>
 46 | 	<text transform='translate(225 142)'>2"</text>
 47 | 		<path d='M312 120h93v30H312z' fill='#dfd'/>
 48 | 	<text transform='translate(318 142)'>1.7"</text>
 49 | 		<path d='M405 120h93v30H405z' fill='#dfd'/>
 50 | 	<text transform='translate(411 142)'>0.98"</text>
 51 | 		<path d='M498 120h102v30H498z' fill='#dfd'/>
 52 | 	<text transform='translate(504 142)'>0.8"</text>
 53 | 	
 54 |   <text transform='translate(4 172)'>Lingua low</text>
 55 | 		<path d='M126 150h93v30H126z' fill='#fee'/>
 56 | 	<text transform='translate(132 172)'>64"</text>
 57 | 		<path d='M219 150h93v30H219z' fill='#fee'/>
 58 | 	<text transform='translate(225 172)'>370"</text>
 59 | 		<path d='M312 150h93v30H312z' fill='#fee'/>
 60 | 	<text transform='translate(318 172)'>308"</text>
 61 | 		<path d='M405 150h93v30H405z' fill='#fee'/>
 62 | 	<text transform='translate(411 172)'>108"</text>
 63 | 		<path d='M498 150h102v30H498z' fill='#fee'/>
 64 | 	<text transform='translate(504 172)'>85"</text>  
 65 | 	
 66 |   <text transform='translate(4 202)'>CLD3</text>
 67 | 		<path d='M126 180h93v30H126z' fill='#ffc'/>
 68 | 	<text transform='translate(132 202)'>3.9"</text>
 69 | 		<path d='M219 180h93v30H219z' fill='#ffc'/>
 70 | 	<text transform='translate(225 202)'>29"</text>
 71 | 		<path d='M312 180h93v30H312z' fill='#ffc'/>
 72 | 	<text transform='translate(318 202)'>26"</text>
 73 | 		<path d='M405 180h93v30H405z' fill='#ffc'/>
 74 | 	<text transform='translate(411 202)'>12"</text>
 75 | 		<path d='M498 180h102v30H498z' fill='#ffc'/>
 76 | 	<text transform='translate(504 202)'>11"</text>    
 77 | 	
 78 |   <text transform='translate(4 232)'>franc</text>
 79 | 		<path d='M126 210h93v30H126z' fill='#ffc'/>
 80 | 	<text transform='translate(132 232)'>1.2"</text>
 81 | 		<path d='M219 210h93v30H219z' fill='#ffc'/>
 82 | 	<text transform='translate(225 232)'>8"</text>
 83 | 		<path d='M312 210h93v30H312z' fill='#ffc'/>
 84 | 	<text transform='translate(318 232)'>7.8"</text>
 85 | 		<path d='M405 210h93v30H405z' fill='#ffc'/>
 86 | 	<text transform='translate(411 232)'>2.8"</text>
 87 | 		<path d='M498 210h102v30H498z' fill='#ffc'/>
 88 | 	<text transform='translate(504 232)'>2"</text>   
 89 | 	
 90 | 	 <text transform='translate(4 262)'>Nito-ELD-php</text>
 91 |   		<path d='M126 240h93v30H126z' fill='#dfd'/>
 92 | 	<text transform='translate(132 262)'>0.31"</text>
 93 | 		<path d='M219 240h93v30H219z' fill='#dfd'/>
 94 | 	<text transform='translate(225 262)'>2.5"</text>
 95 | 		<path d='M312 240h93v30H312z' fill='#dfd'/>
 96 | 	<text transform='translate(318 262)'>2.2"</text>
 97 | 		<path d='M405 240h93v30H405z' fill='#dfd'/>
 98 | 	<text transform='translate(411 262)'>0.66"</text>
 99 | 		<path d='M498 240h102v30H498z' fill='#dfd'/>
100 | 	<text transform='translate(504 262)'>0.48"</text>
101 | 	
102 | 	<g fill='#e0e0e0'>
103 | 	  <path d='M0 0h600v1H0z'/>
104 | 	  <path d='M0 30h600v1H0z'/>
105 | 	  <path d='M0 60h600v1H0z'/>
106 | 	  <path d='M0 90h600v1H0z'/>
107 | 	  <path d='M0 120h600v1H0z'/>
108 | 	  <path d='M0 150h600v1H0z'/>
109 | 	  <path d='M0 180h600v1H0z'/>
110 | 	  <path d='M0 210h600v1H0z'/>
111 | 	  <path d='M0 240h600v1H0z'/>
112 | 	</g>
113 | 	<g x='0' y='0' font-size='16' font-weight='700'>
114 | 	  <text transform='translate(130 21)'>
115 | 		 <tspan>Tweets</tspan>
116 | 	  </text>
117 | 	  <text transform='translate(223 21)'>
118 | 		 <tspan>Big test</tspan>
119 | 	  </text>
120 | 	  <text transform='translate(316 21)'>
121 | 		 <tspan>Sentences</tspan>
122 | 	  </text>
123 | 	  <text transform='translate(409 21)'>
124 | 		 <tspan>Word pairs</tspan>
125 | 	  </text>
126 | 	  <text font-size='15.4' transform='translate(502 21)'>
127 | 		 <tspan>Single words</tspan>
128 | 	  </text>
129 | 	</g>
130 |   <path d='M0 0h1v270H0zm126 0h1v270h-1zm93 0h1v270h-1zm93 0h1v270h-1zm93 0h1v270h-1zm93 0h1v270h-1zm101 0h1v270h-1zM0 270h600v1H0z' fill='#e0e0e0'/>
131 | </svg>


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core>=1.0.0"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "eld"
 7 | version = "1.0.8"
 8 | # Update VERSION at languageDetector.py too
 9 | authors = ["Nito T.M."]
10 | description = "Fast and accurate natural language detection. Detector written in Python. Nito-ELD, ELD."
11 | keywords = ["nlp", "language", "natural-language-processing", "natural-language", "language-detection", "language-detector", "language-identification"]
12 | license = "Apache-2.0"
13 | readme = "README.md"
14 | homepage = "https://github.com/nitotm/efficient-language-detector-py/"
15 | repository = "https://github.com/nitotm/efficient-language-detector-py.git"
16 | classifiers = [
17 |     "Programming Language :: Python :: 3",
18 |     "License :: OSI Approved :: Apache Software License",
19 |     "Operating System :: OS Independent"
20 | ]
21 | packages = [
22 |     { include = "eld" },
23 |     { include = "eld/tests" },
24 |     { include = "eld/resources" },
25 |     { include = "eld/resources/ngrams" },
26 |     { include = "eld/resources/ngrams/subset" }
27 | ]
28 | include = ["eld/resources/test/data/*.txt"]
29 | 
30 | [tool.poetry.dependencies]
31 | python = "^3.7"
32 | regex = "*"
33 | 
34 | # [tool.poetry.scripts]
35 | 


--------------------------------------------------------------------------------