├── .gitignore
├── LICENSE
├── README.md
├── analogies
└── analogies.ipynb
├── bad-data-labels
├── bad-label-experiment.ipynb
└── outofscope-intent-classification-dataset.csv
├── bias
├── 3d-projection.ipynb
├── debias.ipynb
├── female-words.txt
└── male-words.txt
├── crf
└── config.yml
├── diet
├── config-mega-basic.yml
├── config-orig.yml
├── diet-heavy.yml
├── diet-light.yml
├── diet-replace-mask.yml
├── diet-replace.yml
├── readme.md
└── viewresults.py
├── glove
├── glove.py
└── glove_variant_embeddings.ipynb
├── images
├── benchmarks.jpeg
├── bias.jpg
├── bulk-labelling-ui.jpeg
├── byte.jpg
├── confidence.jpeg
├── countv.jpg
├── debias-hard.jpg
├── debias-proj.jpg
├── fallback-detection.jpeg
├── flashtext.jpeg
├── incremental-training.jpeg
├── iterate-data.jpeg
├── labse.jpeg
├── language.jpeg
├── leven.jpg
├── lexical-ambiguity.jpeg
├── logo.png
├── remain-careful.jpg
├── resp1.jpg
├── resp2.jpg
├── subw-imp.jpg
├── subw.jpg
├── ted1.jpg
├── ted2.jpg
├── toxic-lang.png
├── vid-1.jpg
├── vid-10.jpg
├── vid-11.jpg
├── vid-12.jpg
├── vid-13.jpg
├── vid-2.jpg
├── vid-3.jpg
├── vid-4.jpg
├── vid-5.jpg
├── vid-6.jpg
├── vid-7.jpg
├── vid-8.jpg
└── word-analogies.jpg
├── intent-benchmark
├── benchmark.py
├── intent-benchmark.ipynb
└── n-ft-use-train-experiment.jsonl
├── language
├── ar.tsv
├── de.tsv
├── en.tsv
├── eo.tsv
├── es.tsv
├── language-detection-models.ipynb
├── lid.176.ftz
├── lt.tsv
├── nl.tsv
├── pt.tsv
├── tr.tsv
└── vi.tsv
├── letter-embeddings
├── algo_whiteboard_letter_embeddings_v1.ipynb
└── algo_whiteboard_letter_embeddings_v2.ipynb
├── named-name-recognition
├── arabic-names.txt
├── french-names.txt
└── named-name-recognition.ipynb
├── toxic
├── toxicity.ipynb
└── toxicity.py
└── whatlies
└── whatlies.ipynb
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | pip-wheel-metadata/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | target/
76 |
77 | # Jupyter Notebook
78 | .ipynb_checkpoints
79 |
80 | # IPython
81 | profile_default/
82 | ipython_config.py
83 |
84 | # pyenv
85 | .python-version
86 |
87 | # pipenv
88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
91 | # install all needed dependencies.
92 | #Pipfile.lock
93 |
94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95 | __pypackages__/
96 |
97 | # Celery stuff
98 | celerybeat-schedule
99 | celerybeat.pid
100 |
101 | # SageMath parsed files
102 | *.sage.py
103 |
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 |
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 |
117 | # Rope project settings
118 | .ropeproject
119 |
120 | # mkdocs documentation
121 | /site
122 |
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 |
128 | # Pyre type checker
129 | .pyre/
130 | .DS_Store
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Algorithm Whiteboard Resources
2 |
3 |
4 |
5 |
6 |
7 | This is where we share notebooks and projects used in our [youtube channel](https://www.youtube.com/watch?v=wWNMST6t1TA&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb).
8 |
9 |
10 |
11 |
12 |
13 | ## Video 1: [DIET Architecture - How it Works](https://www.youtube.com/watch?v=vWStcJDuOUk&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb)
14 |
15 | This video explains the parts of the DIET architecture. It does not discuss any code.
16 |
17 |
18 |
19 |
20 |
21 | ## Video 2: [DIET Architecture - Design Decisions](https://www.youtube.com/watch?v=KUGGuJ0aTL8&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb)
22 |
23 | This video explains the parts of the DIET architecture. It does not discuss any code.
24 |
25 |
26 |
27 |
28 |
29 | ## Video 3: [DIET Architecture - Benchmarks](https://www.youtube.com/watch?v=oj5oPGDlep4&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb)
30 |
31 | In this video we make changes to a configuration file. The configuration files, the streamlit application as well as an instructions manual can be found in the `diet` folder.
32 |
33 |
34 |
35 |
36 |
37 | ## Video 4: [Word Embeddings - Letter Embeddings](https://www.youtube.com/watch?v=mWvnlVw_LiY&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb)
38 |
39 | In this video we demonstrate how to train letter embeddings in order to gain intuition on what word embeddings are.
40 |
41 | The kaggle dataset that we use in this video can be found [here](https://www.kaggle.com/therohk/million-headlines).
42 |
43 | We've added the two notebooks in this repo in the `letter-embeddings` folder. But you can also run them yourself in google colab. The notebooks are mostly identical but the `v1` notebook only uses one token to predict the next one while `v2` uses two tokens to predict the next one.
44 |
45 | Notebook with one token input:
46 |
47 | Notebook with two token input:
48 |
49 |
50 |
51 |
52 |
53 |
54 | ## Video 5: [Word Embeddings - CBOW & SkipGram](https://www.youtube.com/watch?v=BWaHLmG1lak&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=6)
55 |
56 |
57 | This video explains two algorithms but it does not discuss any code.
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 | ## Video 6: [Word Embeddings - GloVe](https://www.youtube.com/watch?v=BWaHLmG1lak&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=7)
66 |
67 | This video discusses GloVe but also offers code to train a variant of your own. The keras model can be found in the `glove` folder.
68 |
69 | The `glove.py` file contains just the keras algorithm while the notebook
70 | contains the full code. You can also go online to colab
71 | and play with the full notebook from there.
72 |
73 | The full notebook:
74 |
75 |
76 |
77 |
78 |
79 | ## Video 7: [Word Embeddings - WhatLies](https://www.youtube.com/watch?v=FwkwC7IJWO0&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=9)
80 |
81 | This video discusses a small visualisation package we've open sourced. The documentation for it can be found [here](https://rasahq.github.io/whatlies/).
82 |
83 | The notebook that we made in this video can be found in the `whatlies` folder.
84 |
85 |
86 |
87 |
88 |
89 | ## Video 8: [Attention - Self Attention](https://www.youtube.com/watch?v=yGTUuEx3GkA&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=9)
90 |
91 | This video discusses the idea behind attention (you may notice some similarities
92 | with a convolution) but it does not discuss any code.
93 |
94 |
95 |
96 |
97 |
98 |
99 | ## Video 9: [Attention: Keys, Values, Queries](https://www.youtube.com/watch?v=tIvKXrEDMhk&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=11)
100 |
101 | This video discusses how you can add more context to the self attention mechanism by introducing layers. This video does not discuss any code though.
102 |
103 |
104 |
105 |
106 |
107 | ## Video 10: [Attention: Multi Head Attention](https://www.youtube.com/watch?v=23XUv0T9L5c&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=12)
108 |
109 | This video explains how you can increase the potential of attention by introducing multiple layers of keys, queries and values. The video does not discuss any code though.
110 |
111 |
112 |
113 |
114 |
115 | ## Video 11: [Attention: Transformers](https://www.youtube.com/watch?v=EXNBy8G43MM&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=13)
116 |
117 | Given the lessons from the previous videos, this video wraps everything together by
118 | combining everything into a transformer block. There is no code for this video.
119 |
120 |
121 |
122 |
123 |
124 |
125 | ## Video 12: [StarSpace](https://www.youtube.com/watch?v=ZT3_9Kjx7oI&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=14)
126 |
127 | This video discusses the [StarSpace](https://arxiv.org/abs/1709.03856) algorithm. The
128 | video serves as an introduction to the TED policy. This video contains no code.
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 | ## Video 13: [TED Policy](https://www.youtube.com/watch?v=j90NvurJI4I&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=14)
137 |
138 | This video only discusses the theory behind the TED algorithm. The next video will
139 | show how TED more on a practical level. This video contains no code.
140 |
141 |
142 |
143 |
144 |
145 | ## Video 14: [TED Policy in Practice](https://www.youtube.com/watch?v=d8JMJMvErSg&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=15)
146 |
147 | This video makes use of a rasa project that can be found [here](https://github.com/RasaHQ/rasa-ted-demo). By tuning the `history` hyperparameter we see how the chatbot is able
148 | to deal with context switches over a long period in the dialogue.
149 |
150 |
151 |
152 |
153 |
154 | ## Video 15: [Response Selection](https://www.youtube.com/watch?v=2jvyWngHEJM&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=16)
155 |
156 | This video explains how a response selection model might make your model more accurate in a FAQ/Chitchat scenario. There is no code for this video.
157 |
158 |
159 |
160 |
161 |
162 | ## Video 16: [Response Selection](https://www.youtube.com/watch?v=0tXkFScW0hE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=17)
163 |
164 | This video explains how a response selection model is implemented internally. There is no code for this video.
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 | ## Video 17: [CountVectors](https://www.youtube.com/watch?v=Ju7l5ADg10U&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=18)
173 |
174 | This video explains why CountVectors are still the unsung hero of natural language processing. There is no code attachment for this video.
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 | ## Video 18: [Subword Embeddings](https://www.youtube.com/watch?v=kNw9dpzp5RU&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=19)
183 |
184 | This video tries to combine the ideas from word embeddings with the idea of countvectors. To reproduce, check out [whatlies](https://rasahq.github.io/whatlies/).
185 |
186 |
187 |
188 |
189 |
190 | ## Video 19: [Subword Implementation](https://www.youtube.com/watch?v=8D3Gamk1Jig&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=20)
191 |
192 | This video explains how you might implement subword embeddings from a neural network design perspective. There is no code for this video.
193 |
194 |
195 |
196 |
197 |
198 | ## Video 20: [BytePair Embeddings](https://www.youtube.com/watch?v=-0IjF-7OB3s&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=21)
199 |
200 | This video explains how BytePair embeddings work. If you want to use these embeddings in Rasa please check out [rasa-nlu-examples](https://rasahq.github.io/rasa-nlu-examples/).
201 |
202 |
203 |
204 |
205 |
206 | ## Video 21: [Levenshtein Vectors](https://www.youtube.com/watch?v=e9JdIKgf0QY&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=22)
207 |
208 | This video explains how count vector mights be turned from sparse into dense layers. While doing this, we also learn that these vectors also encode levensthein distance.
209 |
210 |
211 |
212 |
213 |
214 | ## Video 22: [Bias in Word Embeddings](https://www.youtube.com/watch?v=UwAvyACOrWs&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=23)
215 |
216 | This video explains how you might measure gender bias in word embeddings. It's part of a larger series and the code for it can be found in the `bias` folder of this repository.
217 |
218 |
219 |
220 |
221 |
222 | ## Video 23: [De-Biasing Projections](https://www.youtube.com/watch?v=8xQbWlCEHRw&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=24)
223 |
224 | There's a lot of research on how we might remove bias from word-embeddings. In this video we'll discuss one such technique. For the code, check the `bias` folder of this repository.
225 |
226 |
227 |
228 |
229 |
230 | ## Video 24: [Remain Careful with Debiasing](https://www.youtube.com/watch?v=MHdAd48dANo&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=25)
231 |
232 | In this video we explain why de-biasing techniques have limits. For the code, check the `bias` folder of this repository.
233 |
234 |
235 |
236 |
237 |
238 | ## Video 25: [Why Debiasing is Hard](https://www.youtube.com/watch?v=2ROP1QFKsqc&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=26)
239 |
240 | In this video we explain why de-biasing techniques have limits. For the code, check the `bias` folder of this repository.
241 |
242 |
243 |
244 |
245 |
246 | ## Video 26: [Word Analogies](https://www.youtube.com/watch?v=u6EmngzBUEU&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=27)
247 |
248 | In this video we explain why "word analogies" don't really work by merely applying arithmetic on word-vectors. For the code, check the `analogies` folder of this repository.
249 |
250 |
251 |
252 |
253 |
254 | ## Video 27: [Toxic Language](https://www.youtube.com/watch?v=Czto6GzJah8&feature=youtu.be&ab_channel=Rasa)
255 |
256 | In this video we explain why detecting toxic language is harder than it might seem. Code for the video can be found in the `toxic` folder in this repository.
257 |
258 |
259 |
260 |
261 |
262 | ## Video 28: [Lexical Ambiguity](https://www.youtube.com/watch?v=byy19WPLPBQ&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=34)
263 |
264 | In this video we explain why detecting, in general, NLP models fall short. Models don't *really* understand language, they merely model it.
265 |
266 |
267 |
268 |
269 |
270 | ## Video 29: [Fallback Detection](https://www.youtube.com/watch?v=VldHznqAYlE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=35)
271 |
272 | It's important to understand the limits of our models. They can sometimes tell us when they're uncertain about a prediction and this information should not be ignored.
273 |
274 |
275 |
276 |
277 |
278 | ## Video 30: [Language Detection](https://www.youtube.com/watch?v=Z0pnQcWHBZE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=36)
279 |
280 | What might an assistant do if it sees a text from a language that it isn't trained on? It might make assumptions because it's unlike anything it has seen before and a standard fallback mechanism might not be able to pick it up.
281 |
282 | For the code, check the `language` folder of this repository.
283 |
284 |
285 |
286 |
287 |
288 | ## Video 31: [Incremental Training](https://www.youtube.com/watch?v=FipRjQRaCz8&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=37)
289 |
290 | Sometimes we don't need to completely retrain our algorithms. At times we can just finetune on new data. In this video we explain how that might be done with DIET.
291 |
292 |
293 |
294 |
295 |
296 | ## Video 31: [Bulk Labelling UI](https://www.youtube.com/watch?v=T0dDetqgra4&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=38)
297 |
298 | This video demonstrates a new feature in our bulk labelling demo. The code can be found [here](https://github.com/RasaHQ/rasalit/tree/main/notebooks/bulk-labelling).
299 |
300 |
301 |
302 |
303 |
304 | ## Video 32: [Language Agnostic BERT (LaBSE)](https://www.youtube.com/watch?v=7tAWk_Coj-s&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=39)
305 |
306 | In this episode, I'll discuss how you might tweak the standard BERT model to accommodate multiple languages at the same time. We'll also demonstrate a pre-trained model that you can use right away! If you're interested in the paper, you can find it [here](https://arxiv.org/abs/2007.01852).
307 |
308 |
309 |
310 |
311 |
312 | ## Video 33: [Iterate on Data](https://www.youtube.com/watch?v=xpm17ibm0E8&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=40)
313 |
314 | Instead of debugging a model, it might be *much* more effective to consider debugging your data. In this video, we'll discuss some techniques that you can start with while also demonstrating some new features in Rasa X.
315 |
316 |
317 |
318 |
319 |
320 | ## Video 34: [Meaningful Benchmarks](https://www.youtube.com/watch?v=GTClb8RQSGM&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=41)
321 |
322 | It's easy to get distracted when you go down the rabbit hole of performance statistics. But! Not every impressive benchmark is meaningful and it's important to make the distinction. In this video, we're going to explore one benchmark to demonstrate what we mean by this.
323 |
324 | The code for this can be found in the `intent-benchmark` folder.
325 |
326 |
327 |
328 |
329 |
330 | ## Video 35: [Model Confidence](https://www.youtube.com/watch?v=ev1tNXPo3tE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=42)
331 |
332 | If we're going to apply a fallback, we better make sure that we have a good measure for confidence. In this video we explain an update that we've made to DIET that makes the confidence measure a more representative number.
333 |
334 |
335 |
336 |
337 |
338 | ## Video 36: [FlashText Entity Extraction](https://www.youtube.com/watch?v=IHHKrqgFgs4&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=43)
339 |
340 | If we're going to apply a fallback, we better make sure that we have a good measure for confidence. In this video we explain an update that we've made to DIET that makes the confidence measure a more representative number.
341 |
--------------------------------------------------------------------------------
/bias/female-words.txt:
--------------------------------------------------------------------------------
1 | countrywoman
2 | sororal
3 | witches
4 | maidservant
5 | mothers
6 | diva
7 | actress
8 | spinster
9 | mama
10 | duchesses
11 | barwoman
12 | countrywomen
13 | dowry
14 | hostesses
15 | airwomen
16 | menopause
17 | clitoris
18 | princess
19 | governesses
20 | abbess
21 | women
22 | widow
23 | ladies
24 | sorceresses
25 | madam
26 | brides
27 | baroness
28 | housewives
29 | godesses
30 | niece
31 | widows
32 | lady
33 | sister
34 | brides
35 | nun
36 | adultresses
37 | obstetrics
38 | bellgirls
39 | her
40 | marchioness
41 | princesses
42 | empresses
43 | mare
44 | chairwoman
45 | convent
46 | priestesses
47 | girlhood
48 | ladies
49 | queen
50 | gals
51 | mommies
52 | maid
53 | female_ejaculation
54 | spokeswoman
55 | seamstress
56 | cowgirls
57 | chick
58 | spinsters
59 | hair_salon
60 | empress
61 | mommy
62 | feminism
63 | gals
64 | enchantress
65 | gal
66 | motherhood
67 | estrogen
68 | camerawomen
69 | godmother
70 | strongwoman
71 | goddess
72 | matriarch
73 | aunt
74 | chairwomen
75 | ma'am
76 | sisterhood
77 | hostess
78 | estradiol
79 | wife
80 | mom
81 | stewardess
82 | females
83 | viagra
84 | spokeswomen
85 | ma
86 | belle
87 | minx
88 | maiden
89 | witch
90 | miss
91 | nieces
92 | mothered
93 | cow
94 | belles
95 | councilwomen
96 | landladies
97 | granddaughter
98 | fiancees
99 | stepmothers
100 | horsewomen
101 | grandmothers
102 | adultress
103 | schoolgirl
104 | hen
105 | granddaughters
106 | bachelorette
107 | camerawoman
108 | moms
109 | her
110 | mistress
111 | lass
112 | policewoman
113 | nun
114 | actresses
115 | saleswomen
116 | girlfriend
117 | councilwoman
118 | lady
119 | stateswoman
120 | maternal
121 | lass
122 | landlady
123 | sistren
124 | ladies
125 | wenches
126 | sorority
127 | bellgirl
128 | duchess
129 | ballerina
130 | chicks
131 | fiancee
132 | fillies
133 | wives
134 | suitress
135 | maternity
136 | she
137 | businesswoman
138 | masseuses
139 | heroine
140 | doe
141 | busgirls
142 | girlfriends
143 | queens
144 | sisters
145 | mistresses
146 | stepmother
147 | brides
148 | daughter
149 | minxes
150 | cowgirl
151 | lady
152 | daughters
153 | mezzo
154 | saleswoman
155 | mistress
156 | hostess
157 | nuns
158 | maids
159 | mrs.
160 | headmistresses
161 | lasses
162 | congresswoman
163 | airwoman
164 | housewife
165 | priestess
166 | barwomen
167 | barnoesses
168 | abbesses
169 | handywoman
170 | toque
171 | sororities
172 | stewardesses
173 | filly
174 | czarina
175 | stepdaughters
176 | herself
177 | girls
178 | lionesses
179 | lady
180 | vagina
181 | hers
182 | masseuse
183 | cows
184 | aunts
185 | wench
186 | toques
187 | wife
188 | lioness
189 | sorceress
190 | effeminate
191 | mother
192 | lesbians
193 | female
194 | waitresses
195 | ovum
196 | skene_gland
197 | stepdaughter
198 | womb
199 | businesswomen
200 | heiress
201 | waitress
202 | headmistress
203 | woman
204 | governess
205 | godess
206 | bride
207 | grandma
208 | bride
209 | gal
210 | lesbian
211 | ladies
212 | girl
213 | grandmother
214 | mare
215 | maternity
216 | hens
217 | uterus
218 | nuns
219 | maidservants
220 | seamstress'
221 | busgirl
222 | heroines
--------------------------------------------------------------------------------
/bias/male-words.txt:
--------------------------------------------------------------------------------
1 | countryman
2 | fraternal
3 | wizards
4 | manservant
5 | fathers
6 | divo
7 | actor
8 | bachelor
9 | papa
10 | dukes
11 | barman
12 | countrymen
13 | brideprice
14 | hosts
15 | airmen
16 | andropause
17 | penis
18 | prince
19 | governors
20 | abbot
21 | men
22 | widower
23 | gentlemen
24 | sorcerers
25 | sir
26 | bridegrooms
27 | baron
28 | househusbands
29 | gods
30 | nephew
31 | widowers
32 | lord
33 | brother
34 | grooms
35 | priest
36 | adultors
37 | andrology
38 | bellboys
39 | his
40 | marquis
41 | princes
42 | emperors
43 | stallion
44 | chairman
45 | monastery
46 | priests
47 | boyhood
48 | fellas
49 | king
50 | dudes
51 | daddies
52 | manservant
53 | semen
54 | spokesman
55 | tailor
56 | cowboys
57 | dude
58 | bachelors
59 | barbershop
60 | emperor
61 | daddy
62 | masculism
63 | guys
64 | enchanter
65 | guy
66 | fatherhood
67 | androgen
68 | cameramen
69 | godfather
70 | strongman
71 | god
72 | patriarch
73 | uncle
74 | chairmen
75 | sir
76 | brotherhood
77 | host
78 | testosterone
79 | husband
80 | dad
81 | steward
82 | males
83 | cialis
84 | spokesmen
85 | pa
86 | beau
87 | stud
88 | bachelor
89 | wizard
90 | sir
91 | nephews
92 | fathered
93 | bull
94 | beaus
95 | councilmen
96 | landlords
97 | grandson
98 | fiances
99 | stepfathers
100 | horsemen
101 | grandfathers
102 | adultor
103 | schoolboy
104 | rooster
105 | grandsons
106 | bachelor
107 | cameraman
108 | dads
109 | him
110 | master
111 | lad
112 | policeman
113 | monk
114 | actors
115 | salesmen
116 | boyfriend
117 | councilman
118 | fella
119 | statesman
120 | paternal
121 | chap
122 | landlord
123 | brethren
124 | lords
125 | blokes
126 | fraternity
127 | bellboy
128 | duke
129 | ballet_dancer
130 | dudes
131 | fiance
132 | colts
133 | husbands
134 | suitor
135 | paternity
136 | he
137 | businessman
138 | masseurs
139 | hero
140 | deer
141 | busboys
142 | boyfriends
143 | kings
144 | brothers
145 | masters
146 | stepfather
147 | grooms
148 | son
149 | studs
150 | cowboy
151 | mentleman
152 | sons
153 | baritone
154 | salesman
155 | paramour
156 | male_host
157 | monks
158 | menservants
159 | mr.
160 | headmasters
161 | lads
162 | congressman
163 | airman
164 | househusband
165 | priest
166 | barmen
167 | barons
168 | abbots
169 | handyman
170 | beard
171 | fraternities
172 | stewards
173 | colt
174 | czar
175 | stepsons
176 | himself
177 | boys
178 | lions
179 | gentleman
180 | penis
181 | his
182 | masseur
183 | bulls
184 | uncles
185 | bloke
186 | beards
187 | hubby
188 | lion
189 | sorcerer
190 | macho
191 | father
192 | gays
193 | male
194 | waiters
195 | sperm
196 | prostate
197 | stepson
198 | prostatic_utricle
199 | businessmen
200 | heir
201 | waiter
202 | headmaster
203 | man
204 | governor
205 | god
206 | bridegroom
207 | grandpa
208 | groom
209 | dude
210 | gay
211 | gents
212 | boy
213 | grandfather
214 | gelding
215 | paternity
216 | roosters
217 | prostatic_utricle
218 | priests
219 | manservants
220 | stailor
221 | busboy
222 | heros
--------------------------------------------------------------------------------
/crf/config.yml:
--------------------------------------------------------------------------------
1 | language: en
2 |
3 | pipeline:
4 | - name: WhitespaceTokenizer
5 | - name: CountVectorsFeaturizer
6 | - name: LexicalSyntacticFeaturizer
7 | "features": [
8 | ["low", "title", "upper"],
9 | [
10 | "BOS",
11 | "EOS",
12 | "low",
13 | "upper",
14 | "title",
15 | "digit",
16 | ],
17 | ["low", "title", "upper"],
18 | ]
19 | - name: DIETClassifier
20 |
21 | policies:
22 | - name: MemoizationPolicy
23 | - name: KerasPolicy
24 | - name: MappingPolicy
25 |
--------------------------------------------------------------------------------
/diet/config-mega-basic.yml:
--------------------------------------------------------------------------------
1 | language: en
2 | pipeline:
3 | - name: WhitespaceTokenizer
4 | - name: CountVectorsFeaturizer
5 | - name: EmbeddingIntentClassifier
6 | policies:
7 | - name: EmbeddingPolicy
8 | max_history: 10
9 | epochs: 20
10 | batch_size:
11 | - 32
12 | - 64
13 | - max_history: 6
14 | name: AugmentedMemoizationPolicy
15 | - core_threshold: 0.3
16 | name: TwoStageFallbackPolicy
17 | nlu_threshold: 0.8
18 | - name: FormPolicy
19 | - name: MappingPolicy
20 |
--------------------------------------------------------------------------------
/diet/config-orig.yml:
--------------------------------------------------------------------------------
1 | language: en
2 | pipeline:
3 | - name: WhitespaceTokenizer
4 | - name: CRFEntityExtractor
5 | - name: CountVectorsFeaturizer
6 | OOV_token: oov
7 | token_pattern: (?u)\b\w+\b
8 | - name: CountVectorsFeaturizer
9 | analyzer: char_wb
10 | min_ngram: 1
11 | max_ngram: 4
12 | - name: EmbeddingIntentClassifier
13 | epochs: 50
14 | ranking_length: 5
15 | - name: DucklingHTTPExtractor
16 | url: http://localhost:8000
17 | dimensions:
18 | - email
19 | - number
20 | - amount-of-money
21 | - name: EntitySynonymMapper
22 | policies:
23 | - name: EmbeddingPolicy
24 | max_history: 10
25 | epochs: 20
26 | batch_size:
27 | - 32
28 | - 64
29 | - max_history: 6
30 | name: AugmentedMemoizationPolicy
31 | - core_threshold: 0.3
32 | name: TwoStageFallbackPolicy
33 | nlu_threshold: 0.8
34 | - name: FormPolicy
35 | - name: MappingPolicy
36 |
--------------------------------------------------------------------------------
/diet/diet-heavy.yml:
--------------------------------------------------------------------------------
1 | language: en
2 | pipeline:
3 | - name: HFTransformersNLP
4 | model_weights: "bert-base-uncased"
5 | model_name: "bert"
6 | - name: LanguageModelTokenizer
7 | - name: LanguageModelFeaturizer
8 | - name: CountVectorsFeaturizer
9 | analyzer: char_wb
10 | min_ngram: 1
11 | max_ngram: 4
12 | - name: CountVectorsFeaturizer
13 | - name: DIETClassifier
14 | epochs: 30
15 | num_transformer_layers: 4
16 | transformer_size: 256
17 | use_masked_language_model: True
18 | drop_rate: 0.25
19 | weight_sparsity: 0.7
20 | batch_size: [64, 256]
21 | embedding_dimension: 30
22 | hidden_layer_sizes:
23 | text: [512, 128]
24 | policies:
25 | - name: EmbeddingPolicy
26 | max_history: 10
27 | epochs: 20
28 | batch_size:
29 | - 32
30 | - 64
31 | - max_history: 6
32 | name: AugmentedMemoizationPolicy
33 | - core_threshold: 0.3
34 | name: TwoStageFallbackPolicy
35 | nlu_threshold: 0.8
36 | - name: FormPolicy
37 | - name: MappingPolicy
38 |
--------------------------------------------------------------------------------
/diet/diet-light.yml:
--------------------------------------------------------------------------------
1 | language: en
2 | pipeline:
3 | - name: WhitespaceTokenizer
4 | - name: CountVectorsFeaturizer
5 | - name: CountVectorsFeaturizer
6 | analyzer: char_wb
7 | min_ngram: 1
8 | max_ngram: 4
9 | - name: DIETClassifier
10 | epochs: 20
11 | learning_rate: 0.005
12 | num_transformer_layers: 0
13 | embedding_dimension: 10
14 | weight_sparcity: 0.90
15 | hidden_layer_sizes:
16 | text: [256, 128]
17 | policies:
18 | - name: EmbeddingPolicy
19 | max_history: 10
20 | epochs: 20
21 | batch_size:
22 | - 32
23 | - 64
24 | - max_history: 6
25 | name: AugmentedMemoizationPolicy
26 | - core_threshold: 0.3
27 | name: TwoStageFallbackPolicy
28 | nlu_threshold: 0.8
29 | - name: FormPolicy
30 | - name: MappingPolicy
31 |
--------------------------------------------------------------------------------
/diet/diet-replace-mask.yml:
--------------------------------------------------------------------------------
1 | language: en
2 | pipeline:
3 | - name: WhitespaceTokenizer
4 | - name: LexicalSyntacticFeaturizer
5 | - name: CountVectorsFeaturizer
6 | OOV_token: oov
7 | token_pattern: (?u)\b\w+\b
8 | - name: CountVectorsFeaturizer
9 | analyzer: char_wb
10 | min_ngram: 1
11 | max_ngram: 4
12 | - name: DIETClassifier
13 | epochs: 50
14 | ranking_length: 5
15 | use_masked_language_model: True
16 | - name: DucklingHTTPExtractor
17 | url: http://localhost:8000
18 | dimensions:
19 | - email
20 | - number
21 | - amount-of-money
22 | - name: EntitySynonymMapper
23 | policies:
24 | - name: EmbeddingPolicy
25 | max_history: 10
26 | epochs: 20
27 | batch_size:
28 | - 32
29 | - 64
30 | - max_history: 6
31 | name: AugmentedMemoizationPolicy
32 | - core_threshold: 0.3
33 | name: TwoStageFallbackPolicy
34 | nlu_threshold: 0.8
35 | - name: FormPolicy
36 | - name: MappingPolicy
37 |
--------------------------------------------------------------------------------
/diet/diet-replace.yml:
--------------------------------------------------------------------------------
1 | language: en
2 | pipeline:
3 | - name: WhitespaceTokenizer
4 | - name: LexicalSyntacticFeaturizer
5 | - name: CountVectorsFeaturizer
6 | OOV_token: oov
7 | token_pattern: (?u)\b\w+\b
8 | - name: CountVectorsFeaturizer
9 | analyzer: char_wb
10 | min_ngram: 1
11 | max_ngram: 4
12 | - name: DIETClassifier
13 | epochs: 50
14 | ranking_length: 5
15 | - name: DucklingHTTPExtractor
16 | url: http://localhost:8000
17 | dimensions:
18 | - email
19 | - number
20 | - amount-of-money
21 | - name: EntitySynonymMapper
22 | policies:
23 | - name: EmbeddingPolicy
24 | max_history: 10
25 | epochs: 20
26 | batch_size:
27 | - 32
28 | - 64
29 | - max_history: 6
30 | name: AugmentedMemoizationPolicy
31 | - core_threshold: 0.3
32 | name: TwoStageFallbackPolicy
33 | nlu_threshold: 0.8
34 | - name: FormPolicy
35 | - name: MappingPolicy
36 |
--------------------------------------------------------------------------------
/diet/readme.md:
--------------------------------------------------------------------------------
1 | ## readme
2 |
3 | This gist contains the code to repeat the steps in the DIET benchmarking youtube video. You can download all the files by cloning this gist;
4 |
5 | ```
6 | git clone git@gist.github.com:81fc9433182ccfb9dece4bb4dbde1f7a.git
7 | ```
8 |
9 | You'll also need to clone the repository over [here](https://github.com/RasaHQ/rasa-demo) to get the dataset you'll need. You can clone that repository via;
10 |
11 | ```
12 | git clone git@github.com:RasaHQ/rasa-demo.git
13 | ```
14 |
15 | You will also need to ensure that you've installed the bert dependencies if you
16 | want to run the heavy model.
17 |
18 | ```
19 | pip install "rasa[transformers]"
20 | ```
21 |
22 | Once that is done you can repeat everything we've done here by running;
23 |
24 | ```
25 | mkdir results
26 | rasa test nlu --config configs/config-orig.yml --cross-validation --runs 1 --folds 2 --out results/config-orig
27 | rasa test nlu --config configs/config-init.yml --cross-validation --runs 1 --folds 2 --out results/config-init
28 | rasa test nlu --config configs/diet-replace.yml --cross-validation --runs 1 --folds 2 --out results/diet-replace
29 | rasa test nlu --config configs/diet-minimum.yml --cross-validation --runs 1 --folds 2 --out results/diet-minimum
30 | rasa test nlu --config configs/diet-heavy.yml --cross-validation --runs 1 --folds 2 --out results/diet-heavy
31 | ```
32 |
33 | Once done you can use streamlit to see a dasbboard of the results.
34 |
35 | ```
36 | pip install streamlit
37 | streamlit run viewresults.py
38 | ```
39 |
40 |
--------------------------------------------------------------------------------
/diet/viewresults.py:
--------------------------------------------------------------------------------
1 | # to run this please make sure you've got the dependencies
2 | # pip install streamlit altair pandas
3 |
4 | import json
5 | import pathlib
6 |
7 | import streamlit as st
8 | import altair as alt
9 | import pandas as pd
10 | import plotnine as p9
11 |
12 | def read_intent_report(path):
13 | blob = json.loads(path.read_text())
14 | jsonl = [{**v, 'config': path.parts[1]} for k,v in blob.items() if 'weighted avg' in k]
15 | return pd.DataFrame(jsonl).drop(columns=['support'])
16 |
17 | def read_entity_report(path):
18 | blob = json.loads(path.read_text())
19 | jsonl = [{**v, 'config': path.parts[1]} for k,v in blob.items() if 'weighted avg' in k]
20 | return pd.DataFrame(jsonl).drop(columns=['support'])
21 |
22 | def add_zeros(dataf, all_configs):
23 | for cfg in all_configs:
24 | if cfg not in list(dataf['config']):
25 | dataf = pd.concat([dataf, pd.DataFrame({'precision': [0],
26 | 'recall': [0],
27 | 'f1-score': [0],
28 | 'config': cfg})])
29 | return dataf
30 |
31 | st.cache()
32 | def read_pandas():
33 | paths = list(pathlib.Path("results").glob("*/*_report.json"))
34 | configurations = set([p.parts[1] for p in paths])
35 | intent_df = pd.concat([read_intent_report(p) for p in paths if 'intent_report' in str(p)])
36 | paths = list(pathlib.Path("results").glob("*/CRFEntityExtractor_report.json"))
37 | paths += list(pathlib.Path("results").glob("*/DIETClassifier_report.json"))
38 | entity_df = pd.concat([read_entity_report(p) for p in paths]).pipe(add_zeros, all_configs=configurations)
39 | return intent_df, entity_df
40 |
41 | intent_df, entity_df = read_pandas()
42 | possible_configs = list(intent_df['config'])
43 |
44 | st.markdown("# Rasa GridResults Summary")
45 | st.markdown("Quick Overview of Crossvalidated Runs")
46 |
47 | st.sidebar.markdown("### Configure Overview")
48 | st.sidebar.markdown("Select what you care about.")
49 | selected_config = st.sidebar.multiselect("Select Result Folders",
50 | possible_configs,
51 | default=possible_configs)
52 | show_raw_data = st.sidebar.checkbox("Show Raw Data")
53 |
54 | subset_df = intent_df.loc[lambda d: d['config'].isin(selected_config)].melt('config')
55 |
56 |
57 | st.markdown("## Intent Summary Overview")
58 |
59 | c = alt.Chart(subset_df).mark_bar().encode(
60 | y='config:N',
61 | x='value:Q',
62 | color='config:N',
63 | row='variable:N'
64 | )
65 | st.altair_chart(c)
66 |
67 | if show_raw_data:
68 | st.write(intent_df.loc[lambda d: d['config'].isin(selected_config)])
69 |
70 |
71 | subset_df = entity_df.loc[lambda d: d['config'].isin(selected_config)].melt('config')
72 |
73 | st.markdown("## Entity Summary Overview")
74 | c = alt.Chart(subset_df).mark_bar().encode(
75 | y='config:N',
76 | x='value:Q',
77 | color='config:N',
78 | row='variable:N'
79 | )
80 |
81 | st.altair_chart(c)
82 |
83 | if show_raw_data:
84 | st.write(entity_df.loc[lambda d: d['config'].isin(selected_config)])
85 |
--------------------------------------------------------------------------------
/glove/glove.py:
--------------------------------------------------------------------------------
1 | from tensorflow.keras.layers import Embedding, Dense, Flatten, Input, Dot
2 | from tensorflow.keras.models import Sequential, Model
3 |
4 | dim_words = 5
5 |
6 | # this one is so we might grab the embeddings
7 | model_emb = Sequential()
8 | embedding = Embedding(num_words, dim_words, input_length=1)
9 | model_emb.add(embedding)
10 | model_emb.add(Flatten())
11 |
12 | word_one = Input(shape=(1,))
13 | word_two = Input(shape=(1,))
14 |
15 | cross_prod = Dot(axes=1)([model_emb(word_one), model_emb(word_two)])
16 | out = Dense(1, activation="relu")(cross_prod)
17 |
18 | glovelike = Model(inputs=[word_one, word_two], outputs=out)
--------------------------------------------------------------------------------
/images/benchmarks.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/benchmarks.jpeg
--------------------------------------------------------------------------------
/images/bias.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/bias.jpg
--------------------------------------------------------------------------------
/images/bulk-labelling-ui.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/bulk-labelling-ui.jpeg
--------------------------------------------------------------------------------
/images/byte.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/byte.jpg
--------------------------------------------------------------------------------
/images/confidence.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/confidence.jpeg
--------------------------------------------------------------------------------
/images/countv.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/countv.jpg
--------------------------------------------------------------------------------
/images/debias-hard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/debias-hard.jpg
--------------------------------------------------------------------------------
/images/debias-proj.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/debias-proj.jpg
--------------------------------------------------------------------------------
/images/fallback-detection.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/fallback-detection.jpeg
--------------------------------------------------------------------------------
/images/flashtext.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/flashtext.jpeg
--------------------------------------------------------------------------------
/images/incremental-training.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/incremental-training.jpeg
--------------------------------------------------------------------------------
/images/iterate-data.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/iterate-data.jpeg
--------------------------------------------------------------------------------
/images/labse.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/labse.jpeg
--------------------------------------------------------------------------------
/images/language.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/language.jpeg
--------------------------------------------------------------------------------
/images/leven.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/leven.jpg
--------------------------------------------------------------------------------
/images/lexical-ambiguity.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/lexical-ambiguity.jpeg
--------------------------------------------------------------------------------
/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/logo.png
--------------------------------------------------------------------------------
/images/remain-careful.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/remain-careful.jpg
--------------------------------------------------------------------------------
/images/resp1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/resp1.jpg
--------------------------------------------------------------------------------
/images/resp2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/resp2.jpg
--------------------------------------------------------------------------------
/images/subw-imp.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/subw-imp.jpg
--------------------------------------------------------------------------------
/images/subw.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/subw.jpg
--------------------------------------------------------------------------------
/images/ted1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/ted1.jpg
--------------------------------------------------------------------------------
/images/ted2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/ted2.jpg
--------------------------------------------------------------------------------
/images/toxic-lang.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/toxic-lang.png
--------------------------------------------------------------------------------
/images/vid-1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-1.jpg
--------------------------------------------------------------------------------
/images/vid-10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-10.jpg
--------------------------------------------------------------------------------
/images/vid-11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-11.jpg
--------------------------------------------------------------------------------
/images/vid-12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-12.jpg
--------------------------------------------------------------------------------
/images/vid-13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-13.jpg
--------------------------------------------------------------------------------
/images/vid-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-2.jpg
--------------------------------------------------------------------------------
/images/vid-3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-3.jpg
--------------------------------------------------------------------------------
/images/vid-4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-4.jpg
--------------------------------------------------------------------------------
/images/vid-5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-5.jpg
--------------------------------------------------------------------------------
/images/vid-6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-6.jpg
--------------------------------------------------------------------------------
/images/vid-7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-7.jpg
--------------------------------------------------------------------------------
/images/vid-8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-8.jpg
--------------------------------------------------------------------------------
/images/word-analogies.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/word-analogies.jpg
--------------------------------------------------------------------------------
/intent-benchmark/benchmark.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pathlib
3 |
4 | import numpy as np
5 | import pandas as pd
6 |
7 | from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion
8 | from sklearn.feature_extraction.text import CountVectorizer
9 |
10 | from memo import memlist, memfunc, memfile, time_taken, grid
11 |
12 | from sklearn.model_selection import train_test_split
13 | from whatlies.language import FasttextLanguage, UniversalSentenceLanguage
14 |
15 | ft_lang = FasttextLanguage("../whatlies/embeddings/cc.en.300.bin")
16 | use_lang = UniversalSentenceLanguage()
17 |
18 |
19 | def generate_model(emb, model='lr'):
20 | models = {
21 | 'lr': LogisticRegression(solver='liblinear', class_weight="balanced"),
22 | }
23 | if emb == "use":
24 | union = FeatureUnion([
25 | ('cv', CountVectorizer()),
26 | ('cv-ngram', CountVectorizer(analyzer='char', ngram_range=(2, 3))),
27 | ('use_lang', use_lang)
28 | ])
29 | mod = make_pipeline(union, models[model])
30 | elif emb == "ft":
31 | union = FeatureUnion([
32 | ('cv', CountVectorizer()),
33 | ('cv-ngram', CountVectorizer(analyzer='char', ngram_range=(2, 3))),
34 | ('ft', ft)
35 | ])
36 | mod = make_pipeline(union, models[model])
37 | elif emb == "cv-ngram":
38 | union = FeatureUnion([
39 | ('cv', CountVectorizer()),
40 | ('cv-ngram', CountVectorizer(analyzer='char', ngram_range=(2, 3))),
41 | ])
42 | mod = make_pipeline(union, models[model])
43 | return mod
44 |
45 |
46 | @memfile('benchmark-logs.jsonl')
47 | @time_taken()
48 | def experiment(dataset, model, emb="cv", train_size=100, test_size=1000):
49 | df = (pd.read_csv(datasets[dataset])
50 | .loc[lambda d: ~d['text'].isna()]
51 | .loc[lambda d: ~d['label'].isna()])
52 |
53 | X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'],
54 | test_size=test_size,
55 | stratify=df['label'],
56 | random_state=42)
57 |
58 | # Everything must be a list when the input is text.
59 | X_train, y_train = list(X_train[:train_size]), list(y_train[:train_size])
60 | X_test, y_test = list(X_test), list(y_test)
61 |
62 | # Generate and train the model
63 | mod = generate_model(emb=emb, model=model)
64 | mod.fit(X_train, y_train)
65 |
66 | # Gather stats
67 | y_train_pred = mod.predict(X_train)
68 | tic = time.time()
69 | y_test_pred = mod.predict(X_test)
70 | toc = time.time()
71 | return {
72 | 'accuracy_test': np.mean(y_test == y_test_pred),
73 | 'accuracy_train': np.mean(y_train == y_train_pred),
74 | 'pred_time': toc - tic
75 | }
76 |
77 |
78 | settings = grid(
79 | dataset=["scope"],
80 | model=["lr"],
81 | emb=["ft", "use", "cv-ngram"],
82 | train_size=np.arange(500, 9500, 500),
83 | test_size=[4000]
84 | )
85 |
86 | for s in settings:
87 | experiment(**s)
88 |
--------------------------------------------------------------------------------
/intent-benchmark/n-ft-use-train-experiment.jsonl:
--------------------------------------------------------------------------------
1 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":200,"test_size":1000,"accuracy_test":0.775,"accuracy_train":1.0,"pred_time":0.05856132507324219,"time_taken":0.19}
2 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":300,"test_size":1000,"accuracy_test":0.827,"accuracy_train":1.0,"pred_time":0.05572152137756348,"time_taken":0.26}
3 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":400,"test_size":1000,"accuracy_test":0.849,"accuracy_train":1.0,"pred_time":0.05605268478393555,"time_taken":0.34}
4 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":500,"test_size":1000,"accuracy_test":0.855,"accuracy_train":0.998,"pred_time":0.05651450157165527,"time_taken":0.42}
5 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":600,"test_size":1000,"accuracy_test":0.857,"accuracy_train":0.9983333333333333,"pred_time":0.056448936462402344,"time_taken":0.49}
6 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":700,"test_size":1000,"accuracy_test":0.864,"accuracy_train":0.9985714285714286,"pred_time":0.05691123008728027,"time_taken":0.57}
7 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":800,"test_size":1000,"accuracy_test":0.877,"accuracy_train":0.9975,"pred_time":0.05725860595703125,"time_taken":0.66}
8 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":900,"test_size":1000,"accuracy_test":0.882,"accuracy_train":0.9966666666666667,"pred_time":0.05646491050720215,"time_taken":0.74}
9 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":1000,"test_size":1000,"accuracy_test":0.886,"accuracy_train":0.996,"pred_time":0.056417226791381836,"time_taken":0.83}
10 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":1100,"test_size":1000,"accuracy_test":0.896,"accuracy_train":0.9963636363636363,"pred_time":0.056476593017578125,"time_taken":0.94}
11 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":200,"test_size":1000,"accuracy_test":0.783,"accuracy_train":1.0,"pred_time":0.9751472473144531,"time_taken":1.7}
12 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":300,"test_size":1000,"accuracy_test":0.836,"accuracy_train":1.0,"pred_time":0.9808781147003174,"time_taken":1.8}
13 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":400,"test_size":1000,"accuracy_test":0.858,"accuracy_train":1.0,"pred_time":0.9849858283996582,"time_taken":2.12}
14 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":500,"test_size":1000,"accuracy_test":0.864,"accuracy_train":0.998,"pred_time":0.9827330112457275,"time_taken":2.39}
15 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":600,"test_size":1000,"accuracy_test":0.866,"accuracy_train":0.9983333333333333,"pred_time":0.9716253280639648,"time_taken":2.73}
16 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":700,"test_size":1000,"accuracy_test":0.87,"accuracy_train":1.0,"pred_time":0.9795506000518799,"time_taken":3.0}
17 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":800,"test_size":1000,"accuracy_test":0.876,"accuracy_train":0.99625,"pred_time":0.9777274131774902,"time_taken":3.38}
18 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":900,"test_size":1000,"accuracy_test":0.886,"accuracy_train":0.9966666666666667,"pred_time":0.9741647243499756,"time_taken":3.74}
19 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":1000,"test_size":1000,"accuracy_test":0.892,"accuracy_train":0.997,"pred_time":0.9753170013427734,"time_taken":4.03}
20 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":1100,"test_size":1000,"accuracy_test":0.9,"accuracy_train":0.9963636363636363,"pred_time":0.9810361862182617,"time_taken":4.58}
21 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":200,"test_size":1000,"accuracy_test":0.776,"accuracy_train":1.0,"pred_time":0.026402711868286133,"time_taken":0.07}
22 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":300,"test_size":1000,"accuracy_test":0.829,"accuracy_train":1.0,"pred_time":0.0264432430267334,"time_taken":0.08}
23 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":400,"test_size":1000,"accuracy_test":0.849,"accuracy_train":1.0,"pred_time":0.026544809341430664,"time_taken":0.1}
24 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":500,"test_size":1000,"accuracy_test":0.853,"accuracy_train":0.998,"pred_time":0.02636265754699707,"time_taken":0.12}
25 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":600,"test_size":1000,"accuracy_test":0.86,"accuracy_train":0.9983333333333333,"pred_time":0.026688575744628906,"time_taken":0.13}
26 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":700,"test_size":1000,"accuracy_test":0.867,"accuracy_train":0.9971428571428571,"pred_time":0.026691913604736328,"time_taken":0.15}
27 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":800,"test_size":1000,"accuracy_test":0.877,"accuracy_train":0.99375,"pred_time":0.026842832565307617,"time_taken":0.17}
28 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":900,"test_size":1000,"accuracy_test":0.883,"accuracy_train":0.9944444444444445,"pred_time":0.026617050170898438,"time_taken":0.19}
29 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":1000,"test_size":1000,"accuracy_test":0.885,"accuracy_train":0.994,"pred_time":0.02674078941345215,"time_taken":0.21}
30 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":1100,"test_size":1000,"accuracy_test":0.896,"accuracy_train":0.9936363636363637,"pred_time":0.02686476707458496,"time_taken":0.23}
31 | {"dataset":"scope","model":"lr","emb":"ft","train_size":500,"test_size":4000,"accuracy_test":0.55625,"accuracy_train":1.0,"pred_time":0.37118983268737793,"time_taken":2.56}
32 | {"dataset":"scope","model":"lr","emb":"ft","train_size":500,"test_size":4000,"accuracy_test":0.55625,"accuracy_train":1.0,"pred_time":0.37976861000061035,"time_taken":2.74}
33 | {"dataset":"scope","model":"lr","emb":"ft","train_size":1000,"test_size":4000,"accuracy_test":0.70475,"accuracy_train":0.999,"pred_time":0.38712477684020996,"time_taken":5.97}
34 | {"dataset":"scope","model":"lr","emb":"ft","train_size":1500,"test_size":4000,"accuracy_test":0.774,"accuracy_train":0.9986666666666667,"pred_time":0.3793807029724121,"time_taken":12.37}
35 | {"dataset":"scope","model":"lr","emb":"ft","train_size":2000,"test_size":4000,"accuracy_test":0.81025,"accuracy_train":0.9995,"pred_time":0.3807692527770996,"time_taken":14.11}
36 | {"dataset":"scope","model":"lr","emb":"ft","train_size":2500,"test_size":4000,"accuracy_test":0.82875,"accuracy_train":0.9984,"pred_time":0.3881092071533203,"time_taken":18.72}
37 | {"dataset":"scope","model":"lr","emb":"ft","train_size":3000,"test_size":4000,"accuracy_test":0.84925,"accuracy_train":0.998,"pred_time":0.382002592086792,"time_taken":23.23}
38 | {"dataset":"scope","model":"lr","emb":"ft","train_size":3500,"test_size":4000,"accuracy_test":0.8685,"accuracy_train":0.9977142857142857,"pred_time":0.37944746017456055,"time_taken":30.12}
39 | {"dataset":"scope","model":"lr","emb":"ft","train_size":4000,"test_size":4000,"accuracy_test":0.87525,"accuracy_train":0.997,"pred_time":0.37783193588256836,"time_taken":36.35}
40 | {"dataset":"scope","model":"lr","emb":"ft","train_size":4500,"test_size":4000,"accuracy_test":0.8795,"accuracy_train":0.9968888888888889,"pred_time":0.37891101837158203,"time_taken":40.89}
41 | {"dataset":"scope","model":"lr","emb":"ft","train_size":5000,"test_size":4000,"accuracy_test":0.88875,"accuracy_train":0.996,"pred_time":0.3801717758178711,"time_taken":48.31}
42 | {"dataset":"scope","model":"lr","emb":"ft","train_size":5500,"test_size":4000,"accuracy_test":0.89425,"accuracy_train":0.9956363636363637,"pred_time":0.3786475658416748,"time_taken":52.95}
43 | {"dataset":"scope","model":"lr","emb":"ft","train_size":6000,"test_size":4000,"accuracy_test":0.8975,"accuracy_train":0.9956666666666667,"pred_time":0.37787723541259766,"time_taken":59.11}
44 | {"dataset":"scope","model":"lr","emb":"ft","train_size":6500,"test_size":4000,"accuracy_test":0.90075,"accuracy_train":0.9958461538461538,"pred_time":0.3805055618286133,"time_taken":82.49}
45 | {"dataset":"scope","model":"lr","emb":"ft","train_size":7000,"test_size":4000,"accuracy_test":0.9045,"accuracy_train":0.996,"pred_time":0.38358306884765625,"time_taken":90.78}
46 | {"dataset":"scope","model":"lr","emb":"ft","train_size":7500,"test_size":4000,"accuracy_test":0.90925,"accuracy_train":0.9961333333333333,"pred_time":0.3860592842102051,"time_taken":98.64}
47 | {"dataset":"scope","model":"lr","emb":"ft","train_size":8000,"test_size":4000,"accuracy_test":0.912,"accuracy_train":0.995625,"pred_time":0.38474011421203613,"time_taken":106.34}
48 | {"dataset":"scope","model":"lr","emb":"ft","train_size":8500,"test_size":4000,"accuracy_test":0.91175,"accuracy_train":0.9955294117647059,"pred_time":0.3792722225189209,"time_taken":115.82}
49 | {"dataset":"scope","model":"lr","emb":"ft","train_size":9000,"test_size":4000,"accuracy_test":0.914,"accuracy_train":0.9954444444444445,"pred_time":0.382676362991333,"time_taken":124.8}
50 | {"dataset":"scope","model":"lr","emb":"ft","train_size":9500,"test_size":4000,"accuracy_test":0.9185,"accuracy_train":0.9945263157894737,"pred_time":0.38468122482299805,"time_taken":133.39}
51 | {"dataset":"scope","model":"lr","emb":"use","train_size":500,"test_size":4000,"accuracy_test":0.564,"accuracy_train":1.0,"pred_time":4.3004679679870605,"time_taken":8.74}
52 | {"dataset":"scope","model":"lr","emb":"use","train_size":1000,"test_size":4000,"accuracy_test":0.71325,"accuracy_train":1.0,"pred_time":4.3029162883758545,"time_taken":16.01}
53 | {"dataset":"scope","model":"lr","emb":"use","train_size":1500,"test_size":4000,"accuracy_test":0.7815,"accuracy_train":0.9986666666666667,"pred_time":4.322261095046997,"time_taken":22.46}
54 | {"dataset":"scope","model":"lr","emb":"use","train_size":2000,"test_size":4000,"accuracy_test":0.819,"accuracy_train":0.9995,"pred_time":4.327869653701782,"time_taken":29.61}
55 | {"dataset":"scope","model":"lr","emb":"use","train_size":2500,"test_size":4000,"accuracy_test":0.83775,"accuracy_train":0.9984,"pred_time":4.3121161460876465,"time_taken":38.75}
56 | {"dataset":"scope","model":"lr","emb":"use","train_size":3000,"test_size":4000,"accuracy_test":0.85925,"accuracy_train":0.998,"pred_time":4.291502475738525,"time_taken":45.35}
57 | {"dataset":"scope","model":"lr","emb":"use","train_size":3500,"test_size":4000,"accuracy_test":0.8755,"accuracy_train":0.998,"pred_time":4.301600456237793,"time_taken":57.41}
58 | {"dataset":"scope","model":"lr","emb":"use","train_size":4000,"test_size":4000,"accuracy_test":0.88175,"accuracy_train":0.99725,"pred_time":4.298020839691162,"time_taken":66.41}
59 | {"dataset":"scope","model":"lr","emb":"use","train_size":4500,"test_size":4000,"accuracy_test":0.8865,"accuracy_train":0.9973333333333333,"pred_time":4.29729437828064,"time_taken":70.1}
60 | {"dataset":"scope","model":"lr","emb":"use","train_size":5000,"test_size":4000,"accuracy_test":0.89675,"accuracy_train":0.9962,"pred_time":4.304081678390503,"time_taken":77.83}
61 | {"dataset":"scope","model":"lr","emb":"use","train_size":5500,"test_size":4000,"accuracy_test":0.90175,"accuracy_train":0.996,"pred_time":4.2856175899505615,"time_taken":86.89}
62 | {"dataset":"scope","model":"lr","emb":"use","train_size":6000,"test_size":4000,"accuracy_test":0.9065,"accuracy_train":0.9965,"pred_time":4.321293830871582,"time_taken":123.94}
63 | {"dataset":"scope","model":"lr","emb":"use","train_size":6500,"test_size":4000,"accuracy_test":0.90925,"accuracy_train":0.9964615384615385,"pred_time":4.301099061965942,"time_taken":136.0}
64 | {"dataset":"scope","model":"lr","emb":"use","train_size":7000,"test_size":4000,"accuracy_test":0.912,"accuracy_train":0.9965714285714286,"pred_time":4.292471647262573,"time_taken":148.71}
65 | {"dataset":"scope","model":"lr","emb":"use","train_size":7500,"test_size":4000,"accuracy_test":0.915,"accuracy_train":0.9962666666666666,"pred_time":4.3266870975494385,"time_taken":178.36}
66 | {"dataset":"scope","model":"lr","emb":"use","train_size":8000,"test_size":4000,"accuracy_test":0.9165,"accuracy_train":0.996375,"pred_time":4.319508075714111,"time_taken":179.46}
67 | {"dataset":"scope","model":"lr","emb":"use","train_size":8500,"test_size":4000,"accuracy_test":0.919,"accuracy_train":0.9962352941176471,"pred_time":4.291066646575928,"time_taken":194.03}
68 | {"dataset":"scope","model":"lr","emb":"use","train_size":9000,"test_size":4000,"accuracy_test":0.92175,"accuracy_train":0.996,"pred_time":4.30959153175354,"time_taken":206.82}
69 | {"dataset":"scope","model":"lr","emb":"use","train_size":9500,"test_size":4000,"accuracy_test":0.92475,"accuracy_train":0.9956842105263158,"pred_time":4.291162014007568,"time_taken":220.63}
70 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":500,"test_size":4000,"accuracy_test":0.55675,"accuracy_train":1.0,"pred_time":0.17368674278259277,"time_taken":0.86}
71 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":1000,"test_size":4000,"accuracy_test":0.70475,"accuracy_train":0.999,"pred_time":0.1765146255493164,"time_taken":1.51}
72 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":1500,"test_size":4000,"accuracy_test":0.77475,"accuracy_train":0.9986666666666667,"pred_time":0.17641973495483398,"time_taken":2.18}
73 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":2000,"test_size":4000,"accuracy_test":0.8105,"accuracy_train":0.9995,"pred_time":0.17655467987060547,"time_taken":2.96}
74 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":2500,"test_size":4000,"accuracy_test":0.82925,"accuracy_train":0.9984,"pred_time":0.17617464065551758,"time_taken":3.82}
75 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":3000,"test_size":4000,"accuracy_test":0.85,"accuracy_train":0.998,"pred_time":0.17685651779174805,"time_taken":4.57}
76 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":3500,"test_size":4000,"accuracy_test":0.869,"accuracy_train":0.9977142857142857,"pred_time":0.1765122413635254,"time_taken":5.35}
77 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":4000,"test_size":4000,"accuracy_test":0.87525,"accuracy_train":0.997,"pred_time":0.17818188667297363,"time_taken":6.15}
78 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":4500,"test_size":4000,"accuracy_test":0.879,"accuracy_train":0.9968888888888889,"pred_time":0.1768357753753662,"time_taken":7.43}
79 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":5000,"test_size":4000,"accuracy_test":0.8885,"accuracy_train":0.996,"pred_time":0.17785978317260742,"time_taken":9.02}
80 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":5500,"test_size":4000,"accuracy_test":0.894,"accuracy_train":0.9956363636363637,"pred_time":0.17731571197509766,"time_taken":9.72}
81 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":6000,"test_size":4000,"accuracy_test":0.8975,"accuracy_train":0.9955,"pred_time":0.1766970157623291,"time_taken":12.23}
82 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":6500,"test_size":4000,"accuracy_test":0.90075,"accuracy_train":0.9958461538461538,"pred_time":0.17724108695983887,"time_taken":13.85}
83 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":7000,"test_size":4000,"accuracy_test":0.90375,"accuracy_train":0.996,"pred_time":0.1762101650238037,"time_taken":20.67}
84 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":7500,"test_size":4000,"accuracy_test":0.9085,"accuracy_train":0.9961333333333333,"pred_time":0.1766214370727539,"time_taken":24.64}
85 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":8000,"test_size":4000,"accuracy_test":0.912,"accuracy_train":0.995625,"pred_time":0.1761927604675293,"time_taken":26.94}
86 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":8500,"test_size":4000,"accuracy_test":0.9115,"accuracy_train":0.9955294117647059,"pred_time":0.17797327041625977,"time_taken":33.41}
87 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":9000,"test_size":4000,"accuracy_test":0.91375,"accuracy_train":0.9953333333333333,"pred_time":0.17589163780212402,"time_taken":31.29}
88 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":9500,"test_size":4000,"accuracy_test":0.9185,"accuracy_train":0.994421052631579,"pred_time":0.17615866661071777,"time_taken":33.87}
89 |
--------------------------------------------------------------------------------
/language/ar.tsv:
--------------------------------------------------------------------------------
1 | 1 كما as
2 | 2 أنا I
3 | 3 له his
4 | 4 أن that
5 | 5 هو he
6 | 6 كان was
7 | 7 إلى for
8 | 8 في on
9 | 9 هي are
10 | 10 مع with
11 | 11 هم they
12 | 12 يكون be
13 | 13 في at
14 | 14 واحد one
15 | 15 ديك have
16 | 16 هذا this
17 | 17 من from
18 | 18 بواسطة by
19 | 19 حار hot
20 | 20 كلمة word
21 | 21 لكن but
22 | 22 ما what
23 | 23 بعض some
24 | 24 هو is
25 | 25 هو it
26 | 26 أنت you
27 | 27 أو or
28 | 28 كان had
29 | 29 و the
30 | 30 من of
31 | 31 إلى to
32 | 32 و and
33 | 33 و a
34 | 34 في in
35 | 35 نحن we
36 | 36 علبة can
37 | 37 خارج out
38 | 38 البعض other
39 | 39 و were
40 | 40 التي which
41 | 41 القيام do
42 | 42 من their
43 | 43 الوقت time
44 | 44 إذا if
45 | 45 سوف will
46 | 46 كيف how
47 | 47 قال said
48 | 48 و an
49 | 49 كل each
50 | 50 أقول tell
51 | 51 لا does
52 | 52 مجموعة set
53 | 53 ثلاثة three
54 | 54 تريد want
55 | 55 هواء air
56 | 56 جيد well
57 | 57 أيضا also
58 | 58 لعب play
59 | 59 صغير small
60 | 60 نهاية end
61 | 61 وضع put
62 | 62 المنزل home
63 | 63 قرأ read
64 | 64 يد hand
65 | 65 ميناء port
66 | 66 كبير large
67 | 67 تهجى spell
68 | 68 إضافة add
69 | 69 حتى even
70 | 70 الأرض land
71 | 71 هنا here
72 | 72 يجب must
73 | 73 كبير big
74 | 74 ارتفاع high
75 | 75 مثل such
76 | 76 تابع follow
77 | 77 فعل act
78 | 78 لماذا why
79 | 79 تطلب ask
80 | 80 الرجال men
81 | 81 تغيير change
82 | 82 ذهب went
83 | 83 ضوء light
84 | 84 نوع kind
85 | 85 بعيدا off
86 | 86 تحتاج need
87 | 87 منزل house
88 | 88 صور picture
89 | 89 محاولة try
90 | 90 لنا us
91 | 91 مرة أخرى again
92 | 92 الحيوان animal
93 | 93 نقطة point
94 | 94 أم mother
95 | 95 العالم world
96 | 96 قرب near
97 | 97 بناء build
98 | 98 النفس self
99 | 99 أرض earth
100 | 100 الأب father
101 | 101 أي any
102 | 102 جديدة new
103 | 103 العمل work
104 | 104 جزء part
105 | 105 أخذ take
106 | 106 الحصول على get
107 | 107 مكان place
108 | 108 مصنوع made
109 | 109 حي live
110 | 110 حيث where
111 | 111 بعد after
112 | 112 ظهر back
113 | 113 القليل little
114 | 114 فقط only
115 | 115 جولة round
116 | 116 رجل man
117 | 117 عام year
118 | 118 جاء came
119 | 119 المعرض show
120 | 120 كل every
121 | 121 جيد good
122 | 122 أنا me
123 | 123 منح give
124 | 124 لدينا our
125 | 125 تحت under
126 | 126 اسم name
127 | 127 جدا very
128 | 128 من خلال through
129 | 129 فقط just
130 | 130 شكل form
131 | 131 عقوبة sentence
132 | 132 عظيم great
133 | 133 اعتقد think
134 | 134 قول say
135 | 135 مساعدة help
136 | 136 منخفض low
137 | 137 خط line
138 | 138 اختلف differ
139 | 139 منعطف turn
140 | 140 السبب cause
141 | 141 كثيرا much
142 | 142 متوسط mean
143 | 143 قبل before
144 | 144 خطوة move
145 | 145 الحق right
146 | 146 صبي boy
147 | 147 قديم old
148 | 148 أيضا too
149 | 149 نفسه same
150 | 150 هي she
151 | 151 كل all
152 | 152 هناك there
153 | 153 عندما when
154 | 154 فوق up
155 | 155 استخدام use
156 | 156 ك your
157 | 157 طريق way
158 | 158 حول about
159 | 159 كثير many
160 | 160 ثم then
161 | 161 هم them
162 | 162 إرسال write
163 | 163 أراد would
164 | 164 مثل like
165 | 165 هكذا so
166 | 166 هؤلاء these
167 | 167 لها her
168 | 168 طويل long
169 | 169 جعل make
170 | 170 شيء thing
171 | 171 شاهد see
172 | 172 له him
173 | 173 اثنين two
174 | 174 لديه has
175 | 175 بحث look
176 | 176 أكثر more
177 | 177 يوم day
178 | 178 يمكن could
179 | 179 تذهب go
180 | 180 جاء come
181 | 181 لم did
182 | 182 عدد number
183 | 183 صوت sound
184 | 184 لا no
185 | 185 أكثر most
186 | 186 الناس people
187 | 187 لي my
188 | 188 على over
189 | 189 تعرف know
190 | 190 ماء water
191 | 191 من than
192 | 192 دعوة call
193 | 193 الأول first
194 | 194 الذي who
195 | 195 قد may
196 | 196 إلى down
197 | 197 الجانب side
198 | 198 كان been
199 | 199 الآن now
200 | 200 اكتشاف find
201 | 201 رئيس head
202 | 202 الوقوف stand
203 | 203 الخاصة own
204 | 204 الصفحة page
205 | 205 ينبغي should
206 | 206 بلد country
207 | 207 أسس found
208 | 208 الجواب answer
209 | 209 المدرسة school
210 | 210 تنمو grow
211 | 211 دراسة study
212 | 212 لا يزال still
213 | 213 تعلم learn
214 | 214 مصنع plant
215 | 215 غطاء cover
216 | 216 غذاء food
217 | 217 شمس sun
218 | 218 أربعة four
219 | 219 بين between
220 | 220 دولة state
221 | 221 احتفظ keep
222 | 222 العين eye
223 | 223 أبدا never
224 | 224 آخر last
225 | 225 سمح let
226 | 226 يعتقد thought
227 | 227 المدينة city
228 | 228 شجرة tree
229 | 229 عبور cross
230 | 230 مزرعة farm
231 | 231 شاق hard
232 | 232 بداية start
233 | 233 قد might
234 | 234 قصة story
235 | 235 منشار saw
236 | 236 الآن far
237 | 237 بحر sea
238 | 238 رسم draw
239 | 239 غادر left
240 | 240 متأخر late
241 | 241 تشغيل run
242 | 242 لا don’t
243 | 243 في حين while
244 | 244 الصحافة press
245 | 245 قريب close
246 | 246 الليل night
247 | 247 حقيقية real
248 | 248 حياة life
249 | 249 قليل few
250 | 250 شمال north
251 | 251 كتاب book
252 | 252 حمل carry
253 | 253 استغرق took
254 | 254 علم science
255 | 255 أكل eat
256 | 256 غرفة room
257 | 257 صديق friend
258 | 258 بدأ began
259 | 259 فكرة idea
260 | 260 سمك fish
261 | 261 الجبل mountain
262 | 262 توقف stop
263 | 263 مرة واحدة once
264 | 264 قاعدة base
265 | 265 سمع hear
266 | 266 الحصان horse
267 | 267 قطع cut
268 | 268 بالتأكيد sure
269 | 269 راقب watch
270 | 270 لون color
271 | 271 وجه face
272 | 272 الخشب wood
273 | 273 رئيسي main
274 | 274 مفتوحة open
275 | 275 يبدو seem
276 | 276 معا together
277 | 277 المقبل next
278 | 278 أبيض white
279 | 279 الأطفال children
280 | 280 بدأ begin
281 | 281 حصلت got
282 | 282 سير walk
283 | 283 مثال example
284 | 284 سهولة ease
285 | 285 ورق paper
286 | 286 مجموعة group
287 | 287 دائما always
288 | 288 الموسيقى music
289 | 289 تلك those
290 | 290 كلا both
291 | 291 علامة mark
292 | 292 غالبا often
293 | 293 الرسالة letter
294 | 294 حتى until
295 | 295 ميل mile
296 | 296 النهر river
297 | 297 سيارة car
298 | 298 قدم feet
299 | 299 الرعاية care
300 | 300 ثان second
301 | 301 بما فيه الكفاية enough
302 | 302 سهل plain
303 | 303 فتاة girl
304 | 304 معتاد usual
305 | 305 الشباب young
306 | 306 جاهزة ready
307 | 307 فوق above
308 | 308 أبدا ever
309 | 309 أحمر red
310 | 310 قائمة list
311 | 311 على الرغم من though
312 | 312 إحساس feel
313 | 313 الحديث talk
314 | 314 طائر bird
315 | 315 قريبا soon
316 | 316 هيئة body
317 | 317 الكلب dog
318 | 318 عائلة family
319 | 319 مباشر direct
320 | 320 أربك pose
321 | 321 ترك leave
322 | 322 أغنية song
323 | 323 قياس measure
324 | 324 باب door
325 | 325 نتاج product
326 | 326 أسود black
327 | 327 قصير short
328 | 328 الأرقام numeral
329 | 329 الفئة class
330 | 330 الرياح wind
331 | 331 السؤال question
332 | 332 يحدث happen
333 | 333 كاملة complete
334 | 334 سفينة ship
335 | 335 منطقة area
336 | 336 نصف half
337 | 337 صخرة rock
338 | 338 النظام order
339 | 339 النار fire
340 | 340 الجنوب south
341 | 341 مشكلة problem
342 | 342 قطعة piece
343 | 343 قال told
344 | 344 عرف knew
345 | 345 تمر pass
346 | 346 منذ since
347 | 347 أعلى top
348 | 348 ككل whole
349 | 349 ملك king
350 | 350 شارع street
351 | 351 بوصة inch
352 | 352 مضاعفة multiply
353 | 353 لا شيء nothing
354 | 354 بالطبع course
355 | 355 إقامة stay
356 | 356 عجلة wheel
357 | 357 كامل full
358 | 358 قوة force
359 | 359 الأزرق blue
360 | 360 موضوع object
361 | 361 تقرر decide
362 | 362 سطح surface
363 | 363 عميق deep
364 | 364 ضوء القمر moon
365 | 365 الجزيرة island
366 | 366 القدم foot
367 | 367 نظام system
368 | 368 مشغول busy
369 | 369 اختبار test
370 | 370 سجل record
371 | 371 قارب boat
372 | 372 مشترك common
373 | 373 الذهب gold
374 | 374 ممكن possible
375 | 375 طائرة plane
376 | 376 بدلا stead
377 | 377 جاف dry
378 | 378 عجب wonder
379 | 379 الضحك laugh
380 | 380 ألف thousand
381 | 381 منذ ago
382 | 382 ركض ran
383 | 383 تحقق check
384 | 384 لعبة game
385 | 385 شكل shape
386 | 386 مساواة equate
387 | 387 حار hot
388 | 388 ملكة جمال miss
389 | 389 جلبت brought
390 | 390 حرارة heat
391 | 391 ثلج snow
392 | 392 إطار العجلة tire
393 | 393 جلب bring
394 | 394 نعم yes
395 | 395 بعيد distant
396 | 396 شغل fill
397 | 397 الشرق east
398 | 398 الطلاء paint
399 | 399 اللغة language
400 | 400 بين among
401 | 401 وحدة unit
402 | 402 قوة power
403 | 403 مدينة town
404 | 404 غرامة fine
405 | 405 معين certain
406 | 406 يطير fly
407 | 407 سقط fall
408 | 408 قيادة lead
409 | 409 صرخة cry
410 | 410 مظلم dark
411 | 411 آلة machine
412 | 412 مذكرة note
413 | 413 انتظر wait
414 | 414 خطة plan
415 | 415 الرقم figure
416 | 416 نجمة star
417 | 417 صندوق box
418 | 418 إسم noun
419 | 419 الحقل field
420 | 420 بقية rest
421 | 421 صحيح correct
422 | 422 قادرة able
423 | 423 الجنيه pound
424 | 424 منجز done
425 | 425 الجمال beauty
426 | 426 محرك drive
427 | 427 وقفت stood
428 | 428 تحتوي على contain
429 | 429 الجبهة front
430 | 430 تعليم teach
431 | 431 أسبوع week
432 | 432 نهائي final
433 | 433 أعطى gave
434 | 434 الأخضر green
435 | 435 يا oh
436 | 436 سريع quick
437 | 437 تطوير develop
438 | 438 المحيط ocean
439 | 439 الحارة warm
440 | 440 حر free
441 | 441 دقيقة minute
442 | 442 قوي strong
443 | 443 خاص special
444 | 444 عقل mind
445 | 445 خلف behind
446 | 446 واضح clear
447 | 447 ذيل tail
448 | 448 إنتاج produce
449 | 449 حقيقة fact
450 | 450 الفضاء space
451 | 451 سمعت heard
452 | 452 أفضل best
453 | 453 ساعة hour
454 | 454 أفضل better
455 | 455 صحيح true
456 | 456 خلال during
457 | 457 مائة hundred
458 | 458 خمسة five
459 | 459 تذكر remember
460 | 460 خطوة step
461 | 461 في وقت مبكر early
462 | 462 عقد hold
463 | 463 غرب west
464 | 464 أرض ground
465 | 465 مصلحة interest
466 | 466 الوصول reach
467 | 467 بسرعة fast
468 | 468 الفعل verb
469 | 469 الغناء sing
470 | 470 استمع listen
471 | 471 ستة six
472 | 472 الجدول table
473 | 473 السفر travel
474 | 474 أقل less
475 | 475 الصباح morning
476 | 476 عشرة ten
477 | 477 بسيطة simple
478 | 478 عدة several
479 | 479 حرف لين vowel
480 | 480 نحو toward
481 | 481 حرب war
482 | 482 وضع lay
483 | 483 ضد against
484 | 484 نمط pattern
485 | 485 بطيئة slow
486 | 486 مركز center
487 | 487 الحب love
488 | 488 شخص person
489 | 489 المال money
490 | 490 خدمة serve
491 | 491 تظهر appear
492 | 492 طريق road
493 | 493 خريطة map
494 | 494 مطر rain
495 | 495 حكم rule
496 | 496 الحكم govern
497 | 497 سحب pull
498 | 498 بارد cold
499 | 499 إشعار notice
500 | 500 صوت voice
501 | 501 الطاقة energy
502 | 502 مطاردة hunt
503 | 503 محتمل probable
504 | 504 قاع bed
505 | 505 شقيق brother
506 | 506 البيض egg
507 | 507 ركوب ride
508 | 508 خلية cell
509 | 509 اعتقد believe
510 | 510 ربما perhaps
511 | 511 اختيار pick
512 | 512 مفاجئ sudden
513 | 513 عد count
514 | 514 مربع square
515 | 515 سبب reason
516 | 516 طول length
517 | 517 تمثيل represent
518 | 518 فن art
519 | 519 الموضوع subject
520 | 520 منطقة region
521 | 521 حجم size
522 | 522 تختلف vary
523 | 523 حل settle
524 | 524 الكلام speak
525 | 525 الوزن weight
526 | 526 عام general
527 | 527 الجليد ice
528 | 528 مسألة matter
529 | 529 دائرة circle
530 | 530 زوج pair
531 | 531 تضمن include
532 | 532 الفجوة divide
533 | 533 مقطع لفظي syllable
534 | 534 شعر felt
535 | 535 الكبرى grand
536 | 536 كرة ball
537 | 537 بعد yet
538 | 538 موجة wave
539 | 539 إسقاط drop
540 | 540 قلب heart
541 | 541 صباحا am
542 | 542 الحاضر present
543 | 543 ثقيلة heavy
544 | 544 رقص dance
545 | 545 محرك engine
546 | 546 موقف position
547 | 547 الذراع arm
548 | 548 واسع wide
549 | 549 الشراع sail
550 | 550 مادة material
551 | 551 جزء fraction
552 | 552 الغابات forest
553 | 553 الجلوس sit
554 | 554 سباق race
555 | 555 نافذة window
556 | 556 متجر store
557 | 557 الصيف summer
558 | 558 قطار train
559 | 559 نوم sleep
560 | 560 إثبات prove
561 | 561 وحيد lone
562 | 562 الساق leg
563 | 563 ممارسة exercise
564 | 564 جدار wall
565 | 565 مزلاج catch
566 | 566 جبل mount
567 | 567 رغب wish
568 | 568 سماء sky
569 | 569 مجلس الإدارة board
570 | 570 الفرح joy
571 | 571 شتاء winter
572 | 572 السبت sat
573 | 573 مكتوبة written
574 | 574 بري wild
575 | 575 أداة instrument
576 | 576 أبقى kept
577 | 577 زجاج glass
578 | 578 العشب grass
579 | 579 بقرة cow
580 | 580 العمل job
581 | 581 حافة edge
582 | 582 علامة sign
583 | 583 زيارة visit
584 | 584 الماضي past
585 | 585 لينة soft
586 | 586 مرح fun
587 | 587 مشرق bright
588 | 588 الغاز gas
589 | 589 الطقس weather
590 | 590 شهر month
591 | 591 مليون million
592 | 592 تحمل bear
593 | 593 نهاية finish
594 | 594 سعيدة happy
595 | 595 نأمل hope
596 | 596 زهرة flower
597 | 597 كسا clothe
598 | 598 غريب strange
599 | 599 ذهب gone
600 | 600 تجارة trade
601 | 601 لحن melody
602 | 602 رحلة trip
603 | 603 مكتب office
604 | 604 تلقي receive
605 | 605 الصف row
606 | 606 فم mouth
607 | 607 بالضبط exact
608 | 608 رمز symbol
609 | 609 مات die
610 | 610 الأقل least
611 | 611 مشكلة trouble
612 | 612 صيحة shout
613 | 613 إلا except
614 | 614 كتب wrote
615 | 615 بذرة seed
616 | 616 نغمة tone
617 | 617 انضمام join
618 | 618 اقترح suggest
619 | 619 نظيف clean
620 | 620 استراحة break
621 | 621 سيدة lady
622 | 622 ساحة yard
623 | 623 الارتفاع rise
624 | 624 سيئة bad
625 | 625 ضربة blow
626 | 626 زيت oil
627 | 627 الدم blood
628 | 628 لمس touch
629 | 629 نما grew
630 | 630 المائة cent
631 | 631 مزيج mix
632 | 632 فريق team
633 | 633 سلك wire
634 | 634 التكلفة cost
635 | 635 فقدت lost
636 | 636 أسمر brown
637 | 637 ارتداء wear
638 | 638 حديقة garden
639 | 639 متساو equal
640 | 640 أرسلت sent
641 | 641 اختيار choose
642 | 642 سقط fell
643 | 643 تناسب fit
644 | 644 تدفق flow
645 | 645 عادل fair
646 | 646 البنك bank
647 | 647 جمع collect
648 | 648 حفظ save
649 | 649 السيطرة control
650 | 650 العشري decimal
651 | 651 إذن ear
652 | 652 آخر else
653 | 653 تماما quite
654 | 654 حطم broke
655 | 655 حالة case
656 | 656 وسط middle
657 | 657 قتل kill
658 | 658 ابن son
659 | 659 بحيرة lake
660 | 660 لحظة moment
661 | 661 على نطاق و scale
662 | 662 بصوت عال loud
663 | 663 الربيع spring
664 | 664 رصد observe
665 | 665 طفل child
666 | 666 مباشرة straight
667 | 667 متوافق consonant
668 | 668 الأمة nation
669 | 669 قاموس dictionary
670 | 670 حليب milk
671 | 671 السرعة speed
672 | 672 طريقة method
673 | 673 عضو organ
674 | 674 دفع pay
675 | 675 العمر age
676 | 676 القسم section
677 | 677 فستان dress
678 | 678 سحابة cloud
679 | 679 مفاجأة surprise
680 | 680 هادئ quiet
681 | 681 حجر stone
682 | 682 صغير tiny
683 | 683 تسلق climb
684 | 684 بارد cool
685 | 685 تصميم design
686 | 686 فقير poor
687 | 687 الكثير lot
688 | 688 تجربة experiment
689 | 689 أسفل bottom
690 | 690 مفتاح key
691 | 691 حديد iron
692 | 692 احد single
693 | 693 عصا stick
694 | 694 شقة flat
695 | 695 عشرون twenty
696 | 696 الجلد skin
697 | 697 الابتسامة smile
698 | 698 تجعد crease
699 | 699 حفرة hole
700 | 700 القفزة jump
701 | 701 طفل baby
702 | 702 ثمانية eight
703 | 703 قرية village
704 | 704 تلبية meet
705 | 705 الجذر root
706 | 706 شراء buy
707 | 707 رفع raise
708 | 708 حل solve
709 | 709 المعادن metal
710 | 710 سواء whether
711 | 711 دفع push
712 | 712 سبعة seven
713 | 713 فقرة paragraph
714 | 714 الثالث third
715 | 715 يجب shall
716 | 716 عقدت held
717 | 717 الشعر hair
718 | 718 وصف describe
719 | 719 طبخ cook
720 | 720 الطابق floor
721 | 721 كل either
722 | 722 نتيجة result
723 | 723 حرق burn
724 | 724 التل hill
725 | 725 آمنة safe
726 | 726 القط cat
727 | 727 القرن century
728 | 728 النظر consider
729 | 729 نوع type
730 | 730 القانون law
731 | 731 لقمة bit
732 | 732 الساحل coast
733 | 733 نسخة copy
734 | 734 العبارة phrase
735 | 735 الصمت silent
736 | 736 طويل tall
737 | 737 رمل sand
738 | 738 التربة soil
739 | 739 لفة roll
740 | 740 درجة الحرارة temperature
741 | 741 إصبع finger
742 | 742 صناعة industry
743 | 743 قيمة value
744 | 744 المعركة fight
745 | 745 كذبة lie
746 | 746 فوز beat
747 | 747 أثار excite
748 | 748 طبيعي natural
749 | 749 رأي view
750 | 750 المعنى sense
751 | 751 العاصمة capital
752 | 752 سوف لا won’t
753 | 753 كرسي chair
754 | 754 خطر danger
755 | 755 الفاكهة fruit
756 | 756 غني rich
757 | 757 سميك thick
758 | 758 جندي soldier
759 | 759 عملية process
760 | 760 العمل operate
761 | 761 ممارسة practice
762 | 762 منفصلة separate
763 | 763 الصعب difficult
764 | 764 الطبيب doctor
765 | 765 الرجاء please
766 | 766 حماية protect
767 | 767 ظهرا noon
768 | 768 محصول crop
769 | 769 الحديث modern
770 | 770 عنصر element
771 | 771 ضرب hit
772 | 772 طالب student
773 | 773 ركن corner
774 | 774 حزب party
775 | 775 تزويد supply
776 | 776 الذي whose
777 | 777 حدد locate
778 | 778 عصابة ring
779 | 779 حرف character
780 | 780 الحشرة insect
781 | 781 اشتعلت caught
782 | 782 فترة period
783 | 783 تشير indicate
784 | 784 راديو radio
785 | 785 وتحدث spoke
786 | 786 ذرة atom
787 | 787 بشري human
788 | 788 التاريخ history
789 | 789 تأثير effect
790 | 790 كهربائي electric
791 | 791 توقع expect
792 | 792 العظام bone
793 | 793 سكة حديدية rail
794 | 794 تخيل imagine
795 | 795 توفير provide
796 | 796 توافق agree
797 | 797 وبالتالي thus
798 | 798 لطيف gentle
799 | 799 امرأة woman
800 | 800 نقيب captain
801 | 801 تخمين guess
802 | 802 الضرورة necessary
803 | 803 حاد sharp
804 | 804 جناح wing
805 | 805 خلق create
806 | 806 الجيران neighbor
807 | 807 غسل wash
808 | 808 خفاش bat
809 | 809 بالأحرى rather
810 | 810 الحشد crowd
811 | 811 الذرة corn
812 | 812 مقارنة compare
813 | 813 قصيدة poem
814 | 814 سلسلة string
815 | 815 جرس bell
816 | 816 تعتمد depend
817 | 817 اللحوم meat
818 | 818 فرك rub
819 | 819 أنبوب tube
820 | 820 الشهيرة famous
821 | 921 الدولار dollar
822 | 822 تيار stream
823 | 823 خوف fear
824 | 284 مشهد sight
825 | 825 رقيق thin
826 | 826 مثلث triangle
827 | 827 كوكب planet
828 | 828 عجل hurry
829 | 829 رئيس chief
830 | 830 مستعمرة colony
831 | 831 على مدار الساعة clock
832 | 832 منجم mine
833 | 833 ربطة عنق tie
834 | 834 دخول enter
835 | 835 الرئيسية major
836 | 836 جديدة fresh
837 | 837 البحث search
838 | 838 إرسال send
839 | 839 الأصفر yellow
840 | 840 بندقية gun
841 | 841 السماح allow
842 | 842 طباعة print
843 | 843 ميت dead
844 | 844 بقعة spot
845 | 845 صحراء desert
846 | 846 دعوى suit
847 | 847 التيار current
848 | 848 رفع lift
849 | 840 ارتفع rose
850 | 850 وصول arrive
851 | 851 سيد master
852 | 852 المسار track
853 | 853 الأم parent
854 | 854 الشاطئ shore
855 | 855 تقسيم division
856 | 856 ورقة sheet
857 | 857 مادة substance
858 | 858 صالح favor
859 | 859 اتصال connect
860 | 860 آخر post
861 | 861 أنفق spend
862 | 862 وتر chord
863 | 863 دهن fat
864 | 864 سعيد glad
865 | 865 الأصلي original
866 | 866 حصة share
867 | 867 محطة station
868 | 868 أب dad
869 | 869 الخبز bread
870 | 870 تهمة charge
871 | 871 السليم proper
872 | 872 شريط bar
873 | 873 العرض offer
874 | 874 قطاع segment
875 | 875 الرقيق slave
876 | 876 بط duck
877 | 877 لحظة instant
878 | 878 سوق market
879 | 879 درجة degree
880 | 880 أهل populate
881 | 881 كتكوت chick
882 | 882 عزيزتي dear
883 | 883 العدو enemy
884 | 884 الرد reply
885 | 885 شراب drink
886 | 886 تحدث occur
887 | 887 دعم support
888 | 888 خطاب speech
889 | 889 الطبيعة nature
890 | 890 مجموعة range
891 | 891 البخار steam
892 | 892 الحركة motion
893 | 893 مسار path
894 | 894 السائل liquid
895 | 895 تسجيل log
896 | 896 مقصود meant
897 | 897 حاصل quotient
898 | 898 أسنان teeth
899 | 899 قذيفة shell
900 | 900 الرقبة neck
901 | 901 الأكسجين oxygen
902 | 902 السكر sugar
903 | 903 الموت death
904 | 904 جميل pretty
905 | 905 مهارة skill
906 | 906 النساء women
907 | 907 الموسم season
908 | 908 حل solution
909 | 909 مغناطيس magnet
910 | 910 فضة silver
911 | 911 شكرا thank
912 | 912 فرع branch
913 | 913 مباراة match
914 | 914 لاحقة suffix
915 | 915 خاصة especially
916 | 916 تين fig
917 | 917 خائف afraid
918 | 918 ضخم huge
919 | 919 شقيقة sister
920 | 920 الصلب steel
921 | 921 بحث discuss
922 | 922 إلى الأمام forward
923 | 923 مماثل similar
924 | 924 توجيه guide
925 | 925 التجربة experience
926 | 926 النتيجة score
927 | 927 تفاحة apple
928 | 928 اشترى bought
929 | 929 أدى led
930 | 930 رمية pitch
931 | 931 معطف coat
932 | 932 كتلة mass
933 | 933 بطاقة card
934 | 934 فرقة band
935 | 935 حبل rope
936 | 936 انزلاق slip
937 | 937 الفوز win
938 | 938 حلم dream
939 | 939 مساء evening
940 | 940 حالة condition
941 | 941 علف feed
942 | 942 أداة tool
943 | 943 الكل total
944 | 944 الأساسية basic
945 | 945 رائحة smell
946 | 946 الوادي valley
947 | 947 ولا nor
948 | 948 ضعف double
949 | 949 مقعد seat
950 | 950 تواصل continue
951 | 951 كتلة block
952 | 952 جدول chart
953 | 953 قبعة hat
954 | 954 بيع sell
955 | 955 نجاح success
956 | 956 شركة company
957 | 957 طرح subtract
958 | 958 حدث event
959 | 959 خاصة particular
960 | 960 صفقة deal
961 | 961 السباحة swim
962 | 962 مصطلح term
963 | 963 العكس opposite
964 | 964 زوجة wife
965 | 965 حذاء shoe
966 | 966 الكتف shoulder
967 | 967 انتشار spread
968 | 968 رتب arrange
969 | 969 مخيم camp
970 | 970 اختراع invent
971 | 971 قطن cotton
972 | 972 الولادة born
973 | 973 حدد determine
974 | 974 رابعا quart
975 | 975 تسعة nine
976 | 976 شاحنة truck
977 | 977 الضوضاء noise
978 | 978 مستوى level
979 | 979 فرصة chance
980 | 980 جمع gather
981 | 981 متجر shop
982 | 982 امتداد stretch
983 | 983 رمي throw
984 | 984 تألق shine
985 | 985 الملكية property
986 | 986 عمود column
987 | 987 جزيء molecule
988 | 988 اختر select
989 | 989 خاطئ wrong
990 | 990 رمادي gray
991 | 991 كرر repeat
992 | 992 تتطلب require
993 | 993 واسع broad
994 | 994 إعداد prepare
995 | 995 ملح salt
996 | 996 الأنف nose
997 | 997 الجمع plural
998 | 998 غضب anger
999 | 999 مطالبة claim
--------------------------------------------------------------------------------
/language/en.tsv:
--------------------------------------------------------------------------------
1 | 1 as
2 | 2 I
3 | 3 his
4 | 4 that
5 | 5 he
6 | 6 was
7 | 7 for
8 | 8 on
9 | 9 are
10 | 10 with
11 | 11 they
12 | 12 be
13 | 13 at
14 | 14 one
15 | 15 have
16 | 16 this
17 | 17 from
18 | 18 by
19 | 19 hot
20 | 20 word
21 | 21 but
22 | 22 what
23 | 23 some
24 | 24 is
25 | 25 it
26 | 26 you
27 | 27 or
28 | 28 had
29 | 29 the
30 | 30 of
31 | 31 to
32 | 32 and
33 | 33 a
34 | 34 in
35 | 35 we
36 | 36 can
37 | 37 out
38 | 38 other
39 | 39 were
40 | 40 which
41 | 41 do
42 | 42 their
43 | 43 time
44 | 44 if
45 | 45 will
46 | 46 how
47 | 47 said
48 | 48 an
49 | 49 each
50 | 50 tell
51 | 51 does
52 | 52 set
53 | 53 three
54 | 54 want
55 | 55 air
56 | 56 well
57 | 57 also
58 | 58 play
59 | 59 small
60 | 60 end
61 | 61 put
62 | 62 home
63 | 63 read
64 | 64 hand
65 | 65 port
66 | 66 large
67 | 67 spell
68 | 68 add
69 | 69 even
70 | 70 land
71 | 71 here
72 | 72 must
73 | 73 big
74 | 74 high
75 | 75 such
76 | 76 follow
77 | 77 act
78 | 78 why
79 | 79 ask
80 | 80 men
81 | 81 change
82 | 82 went
83 | 83 light
84 | 84 kind
85 | 85 off
86 | 86 need
87 | 87 house
88 | 88 picture
89 | 89 try
90 | 90 us
91 | 91 again
92 | 92 animal
93 | 93 point
94 | 94 mother
95 | 95 world
96 | 96 near
97 | 97 build
98 | 98 self
99 | 99 earth
100 | 100 father
101 | 101 any
102 | 102 new
103 | 103 work
104 | 104 part
105 | 105 take
106 | 106 get
107 | 107 place
108 | 108 made
109 | 109 live
110 | 110 where
111 | 111 after
112 | 112 back
113 | 113 little
114 | 114 only
115 | 115 round
116 | 116 man
117 | 117 year
118 | 118 came
119 | 119 show
120 | 120 every
121 | 121 good
122 | 122 me
123 | 123 give
124 | 124 our
125 | 125 under
126 | 126 name
127 | 127 very
128 | 128 through
129 | 129 just
130 | 130 form
131 | 131 sentence
132 | 132 great
133 | 133 think
134 | 134 say
135 | 135 help
136 | 136 low
137 | 137 line
138 | 138 differ
139 | 139 turn
140 | 140 cause
141 | 141 much
142 | 142 mean
143 | 143 before
144 | 144 move
145 | 145 right
146 | 146 boy
147 | 147 old
148 | 148 too
149 | 149 same
150 | 150 she
151 | 151 all
152 | 152 there
153 | 153 when
154 | 154 up
155 | 155 use
156 | 156 your
157 | 157 way
158 | 158 about
159 | 159 many
160 | 160 then
161 | 161 them
162 | 162 write
163 | 163 would
164 | 164 like
165 | 165 so
166 | 166 these
167 | 167 her
168 | 168 long
169 | 169 make
170 | 170 thing
171 | 171 see
172 | 172 him
173 | 173 two
174 | 174 has
175 | 175 look
176 | 176 more
177 | 177 day
178 | 178 could
179 | 179 go
180 | 180 come
181 | 181 did
182 | 182 number
183 | 183 sound
184 | 184 no
185 | 185 most
186 | 186 people
187 | 187 my
188 | 188 over
189 | 189 know
190 | 190 water
191 | 191 than
192 | 192 call
193 | 193 first
194 | 194 who
195 | 195 may
196 | 196 down
197 | 197 side
198 | 198 been
199 | 199 now
200 | 200 find
201 | 201 head
202 | 202 stand
203 | 203 own
204 | 204 page
205 | 205 should
206 | 206 country
207 | 207 found
208 | 208 answer
209 | 209 school
210 | 210 grow
211 | 211 study
212 | 212 still
213 | 213 learn
214 | 214 plant
215 | 215 cover
216 | 216 food
217 | 217 sun
218 | 218 four
219 | 219 between
220 | 220 state
221 | 221 keep
222 | 222 eye
223 | 223 never
224 | 224 last
225 | 225 let
226 | 226 thought
227 | 227 city
228 | 228 tree
229 | 229 cross
230 | 230 farm
231 | 231 hard
232 | 232 start
233 | 233 might
234 | 234 story
235 | 235 saw
236 | 236 far
237 | 237 sea
238 | 238 draw
239 | 239 left
240 | 240 late
241 | 241 run
242 | 242 don’t
243 | 243 while
244 | 244 press
245 | 245 close
246 | 246 night
247 | 247 real
248 | 248 life
249 | 249 few
250 | 250 north
251 | 251 book
252 | 252 carry
253 | 253 took
254 | 254 science
255 | 255 eat
256 | 256 room
257 | 257 friend
258 | 258 began
259 | 259 idea
260 | 260 fish
261 | 261 mountain
262 | 262 stop
263 | 263 once
264 | 264 base
265 | 265 hear
266 | 266 horse
267 | 267 cut
268 | 268 sure
269 | 269 watch
270 | 270 color
271 | 271 face
272 | 272 wood
273 | 273 main
274 | 274 open
275 | 275 seem
276 | 276 together
277 | 277 next
278 | 278 white
279 | 279 children
280 | 280 begin
281 | 281 got
282 | 282 walk
283 | 283 example
284 | 284 ease
285 | 285 paper
286 | 286 group
287 | 287 always
288 | 288 music
289 | 289 those
290 | 290 both
291 | 291 mark
292 | 292 often
293 | 293 letter
294 | 294 until
295 | 295 mile
296 | 296 river
297 | 297 car
298 | 298 feet
299 | 299 care
300 | 300 second
301 | 301 enough
302 | 302 plain
303 | 303 girl
304 | 304 usual
305 | 305 young
306 | 306 ready
307 | 307 above
308 | 308 ever
309 | 309 red
310 | 310 list
311 | 311 though
312 | 312 feel
313 | 313 talk
314 | 314 bird
315 | 315 soon
316 | 316 body
317 | 317 dog
318 | 318 family
319 | 319 direct
320 | 320 pose
321 | 321 leave
322 | 322 song
323 | 323 measure
324 | 324 door
325 | 325 product
326 | 326 black
327 | 327 short
328 | 328 numeral
329 | 329 class
330 | 330 wind
331 | 331 question
332 | 332 happen
333 | 333 complete
334 | 334 ship
335 | 335 area
336 | 336 half
337 | 337 rock
338 | 338 order
339 | 339 fire
340 | 340 south
341 | 341 problem
342 | 342 piece
343 | 343 told
344 | 344 knew
345 | 345 pass
346 | 346 since
347 | 347 top
348 | 348 whole
349 | 349 king
350 | 350 street
351 | 351 inch
352 | 352 multiply
353 | 353 nothing
354 | 354 course
355 | 355 stay
356 | 356 wheel
357 | 357 full
358 | 358 force
359 | 359 blue
360 | 360 object
361 | 361 decide
362 | 362 surface
363 | 363 deep
364 | 364 moon
365 | 365 island
366 | 366 foot
367 | 367 system
368 | 368 busy
369 | 369 test
370 | 370 record
371 | 371 boat
372 | 372 common
373 | 373 gold
374 | 374 possible
375 | 375 plane
376 | 376 stead
377 | 377 dry
378 | 378 wonder
379 | 379 laugh
380 | 380 thousand
381 | 381 ago
382 | 382 ran
383 | 383 check
384 | 384 game
385 | 385 shape
386 | 386 equate
387 | 387 hot
388 | 388 miss
389 | 389 brought
390 | 390 heat
391 | 391 snow
392 | 392 tire
393 | 393 bring
394 | 394 yes
395 | 395 distant
396 | 396 fill
397 | 397 east
398 | 398 paint
399 | 399 language
400 | 400 among
401 | 401 unit
402 | 402 power
403 | 403 town
404 | 404 fine
405 | 405 certain
406 | 406 fly
407 | 407 fall
408 | 408 lead
409 | 409 cry
410 | 410 dark
411 | 411 machine
412 | 412 note
413 | 413 wait
414 | 414 plan
415 | 415 figure
416 | 416 star
417 | 417 box
418 | 418 noun
419 | 419 field
420 | 420 rest
421 | 421 correct
422 | 422 able
423 | 423 pound
424 | 424 done
425 | 425 beauty
426 | 426 drive
427 | 427 stood
428 | 428 contain
429 | 429 front
430 | 430 teach
431 | 431 week
432 | 432 final
433 | 433 gave
434 | 434 green
435 | 435 oh
436 | 436 quick
437 | 437 develop
438 | 438 ocean
439 | 439 warm
440 | 440 free
441 | 441 minute
442 | 442 strong
443 | 443 special
444 | 444 mind
445 | 445 behind
446 | 446 clear
447 | 447 tail
448 | 448 produce
449 | 449 fact
450 | 450 space
451 | 451 heard
452 | 452 best
453 | 453 hour
454 | 454 better
455 | 455 true
456 | 456 during
457 | 457 hundred
458 | 458 five
459 | 459 remember
460 | 460 step
461 | 461 early
462 | 462 hold
463 | 463 west
464 | 464 ground
465 | 465 interest
466 | 466 reach
467 | 467 fast
468 | 468 verb
469 | 469 sing
470 | 470 listen
471 | 471 six
472 | 472 table
473 | 473 travel
474 | 474 less
475 | 475 morning
476 | 476 ten
477 | 477 simple
478 | 478 several
479 | 479 vowel
480 | 480 toward
481 | 481 war
482 | 482 lay
483 | 483 against
484 | 484 pattern
485 | 485 slow
486 | 486 center
487 | 487 love
488 | 488 person
489 | 489 money
490 | 490 serve
491 | 491 appear
492 | 492 road
493 | 493 map
494 | 494 rain
495 | 495 rule
496 | 496 govern
497 | 497 pull
498 | 498 cold
499 | 499 notice
500 | 500 voice
501 | 501 energy
502 | 502 hunt
503 | 503 probable
504 | 504 bed
505 | 505 brother
506 | 506 egg
507 | 507 ride
508 | 508 cell
509 | 509 believe
510 | 510 perhaps
511 | 511 pick
512 | 512 sudden
513 | 513 count
514 | 514 square
515 | 515 reason
516 | 516 length
517 | 517 represent
518 | 518 art
519 | 519 subject
520 | 520 region
521 | 521 size
522 | 522 vary
523 | 523 settle
524 | 524 speak
525 | 525 weight
526 | 526 general
527 | 527 ice
528 | 528 matter
529 | 529 circle
530 | 530 pair
531 | 531 include
532 | 532 divide
533 | 533 syllable
534 | 534 felt
535 | 535 grand
536 | 536 ball
537 | 537 yet
538 | 538 wave
539 | 539 drop
540 | 540 heart
541 | 541 am
542 | 542 present
543 | 543 heavy
544 | 544 dance
545 | 545 engine
546 | 546 position
547 | 547 arm
548 | 548 wide
549 | 549 sail
550 | 550 material
551 | 551 fraction
552 | 552 forest
553 | 553 sit
554 | 554 race
555 | 555 window
556 | 556 store
557 | 557 summer
558 | 558 train
559 | 559 sleep
560 | 560 prove
561 | 561 lone
562 | 562 leg
563 | 563 exercise
564 | 564 wall
565 | 565 catch
566 | 566 mount
567 | 567 wish
568 | 568 sky
569 | 569 board
570 | 570 joy
571 | 571 winter
572 | 572 sat
573 | 573 written
574 | 574 wild
575 | 575 instrument
576 | 576 kept
577 | 577 glass
578 | 578 grass
579 | 579 cow
580 | 580 job
581 | 581 edge
582 | 582 sign
583 | 583 visit
584 | 584 past
585 | 585 soft
586 | 586 fun
587 | 587 bright
588 | 588 gas
589 | 589 weather
590 | 590 month
591 | 591 million
592 | 592 bear
593 | 593 finish
594 | 594 happy
595 | 595 hope
596 | 596 flower
597 | 597 clothe
598 | 598 strange
599 | 599 gone
600 | 600 trade
601 | 601 melody
602 | 602 trip
603 | 603 office
604 | 604 receive
605 | 605 row
606 | 606 mouth
607 | 607 exact
608 | 608 symbol
609 | 609 die
610 | 610 least
611 | 611 trouble
612 | 612 shout
613 | 613 except
614 | 614 wrote
615 | 615 seed
616 | 616 tone
617 | 617 join
618 | 618 suggest
619 | 619 clean
620 | 620 break
621 | 621 lady
622 | 622 yard
623 | 623 rise
624 | 624 bad
625 | 625 blow
626 | 626 oil
627 | 627 blood
628 | 628 touch
629 | 629 grew
630 | 630 cent
631 | 631 mix
632 | 632 team
633 | 633 wire
634 | 634 cost
635 | 635 lost
636 | 636 brown
637 | 637 wear
638 | 638 garden
639 | 639 equal
640 | 640 sent
641 | 641 choose
642 | 642 fell
643 | 643 fit
644 | 644 flow
645 | 645 fair
646 | 646 bank
647 | 647 collect
648 | 648 save
649 | 649 control
650 | 650 decimal
651 | 651 ear
652 | 652 else
653 | 653 quite
654 | 654 broke
655 | 655 case
656 | 656 middle
657 | 657 kill
658 | 658 son
659 | 659 lake
660 | 660 moment
661 | 661 scale
662 | 662 loud
663 | 663 spring
664 | 664 observe
665 | 665 child
666 | 666 straight
667 | 667 consonant
668 | 668 nation
669 | 669 dictionary
670 | 670 milk
671 | 671 speed
672 | 672 method
673 | 673 organ
674 | 674 pay
675 | 675 age
676 | 676 section
677 | 677 dress
678 | 678 cloud
679 | 679 surprise
680 | 680 quiet
681 | 681 stone
682 | 682 tiny
683 | 683 climb
684 | 684 cool
685 | 685 design
686 | 686 poor
687 | 687 lot
688 | 688 experiment
689 | 689 bottom
690 | 690 key
691 | 691 iron
692 | 692 single
693 | 693 stick
694 | 694 flat
695 | 695 twenty
696 | 696 skin
697 | 697 smile
698 | 698 crease
699 | 699 hole
700 | 700 jump
701 | 701 baby
702 | 702 eight
703 | 703 village
704 | 704 meet
705 | 705 root
706 | 706 buy
707 | 707 raise
708 | 708 solve
709 | 709 metal
710 | 710 whether
711 | 711 push
712 | 712 seven
713 | 713 paragraph
714 | 714 third
715 | 715 shall
716 | 716 held
717 | 717 hair
718 | 718 describe
719 | 719 cook
720 | 720 floor
721 | 721 either
722 | 722 result
723 | 723 burn
724 | 724 hill
725 | 725 safe
726 | 726 cat
727 | 727 century
728 | 728 consider
729 | 729 type
730 | 730 law
731 | 731 bit
732 | 732 coast
733 | 733 copy
734 | 734 phrase
735 | 735 silent
736 | 736 tall
737 | 737 sand
738 | 738 soil
739 | 739 roll
740 | 740 temperature
741 | 741 finger
742 | 742 industry
743 | 743 value
744 | 744 fight
745 | 745 lie
746 | 746 beat
747 | 747 excite
748 | 748 natural
749 | 749 view
750 | 750 sense
751 | 751 capital
752 | 752 won’t
753 | 753 chair
754 | 754 danger
755 | 755 fruit
756 | 756 rich
757 | 757 thick
758 | 758 soldier
759 | 759 process
760 | 760 operate
761 | 761 practice
762 | 762 separate
763 | 763 difficult
764 | 764 doctor
765 | 765 please
766 | 766 protect
767 | 767 noon
768 | 768 crop
769 | 769 modern
770 | 770 element
771 | 771 hit
772 | 772 student
773 | 773 corner
774 | 774 party
775 | 775 supply
776 | 776 whose
777 | 777 locate
778 | 778 ring
779 | 779 character
780 | 780 insect
781 | 781 caught
782 | 782 period
783 | 783 indicate
784 | 784 radio
785 | 785 spoke
786 | 786 atom
787 | 787 human
788 | 788 history
789 | 789 effect
790 | 790 electric
791 | 791 expect
792 | 792 bone
793 | 793 rail
794 | 794 imagine
795 | 795 provide
796 | 796 agree
797 | 797 thus
798 | 798 gentle
799 | 799 woman
800 | 800 captain
801 | 801 guess
802 | 802 necessary
803 | 803 sharp
804 | 804 wing
805 | 805 create
806 | 806 neighbor
807 | 807 wash
808 | 808 bat
809 | 809 rather
810 | 810 crowd
811 | 811 corn
812 | 812 compare
813 | 813 poem
814 | 814 string
815 | 815 bell
816 | 816 depend
817 | 817 meat
818 | 818 rub
819 | 819 tube
820 | 820 famous
821 | 921 dollar
822 | 822 stream
823 | 823 fear
824 | 284 sight
825 | 825 thin
826 | 826 triangle
827 | 827 planet
828 | 828 hurry
829 | 829 chief
830 | 830 colony
831 | 831 clock
832 | 832 mine
833 | 833 tie
834 | 834 enter
835 | 835 major
836 | 836 fresh
837 | 837 search
838 | 838 send
839 | 839 yellow
840 | 840 gun
841 | 841 allow
842 | 842 print
843 | 843 dead
844 | 844 spot
845 | 845 desert
846 | 846 suit
847 | 847 current
848 | 848 lift
849 | 840 rose
850 | 850 arrive
851 | 851 master
852 | 852 track
853 | 853 parent
854 | 854 shore
855 | 855 division
856 | 856 sheet
857 | 857 substance
858 | 858 favor
859 | 859 connect
860 | 860 post
861 | 861 spend
862 | 862 chord
863 | 863 fat
864 | 864 glad
865 | 865 original
866 | 866 share
867 | 867 station
868 | 868 dad
869 | 869 bread
870 | 870 charge
871 | 871 proper
872 | 872 bar
873 | 873 offer
874 | 874 segment
875 | 875 slave
876 | 876 duck
877 | 877 instant
878 | 878 market
879 | 879 degree
880 | 880 populate
881 | 881 chick
882 | 882 dear
883 | 883 enemy
884 | 884 reply
885 | 885 drink
886 | 886 occur
887 | 887 support
888 | 888 speech
889 | 889 nature
890 | 890 range
891 | 891 steam
892 | 892 motion
893 | 893 path
894 | 894 liquid
895 | 895 log
896 | 896 meant
897 | 897 quotient
898 | 898 teeth
899 | 899 shell
900 | 900 neck
901 | 901 oxygen
902 | 902 sugar
903 | 903 death
904 | 904 pretty
905 | 905 skill
906 | 906 women
907 | 907 season
908 | 908 solution
909 | 909 magnet
910 | 910 silver
911 | 911 thank
912 | 912 branch
913 | 913 match
914 | 914 suffix
915 | 915 especially
916 | 916 fig
917 | 917 afraid
918 | 918 huge
919 | 919 sister
920 | 920 steel
921 | 921 discuss
922 | 922 forward
923 | 923 similar
924 | 924 guide
925 | 925 experience
926 | 926 score
927 | 927 apple
928 | 928 bought
929 | 929 led
930 | 930 pitch
931 | 931 coat
932 | 932 mass
933 | 933 card
934 | 934 band
935 | 935 rope
936 | 936 slip
937 | 937 win
938 | 938 dream
939 | 939 evening
940 | 940 condition
941 | 941 feed
942 | 942 tool
943 | 943 total
944 | 944 basic
945 | 945 smell
946 | 946 valley
947 | 947 nor
948 | 948 double
949 | 949 seat
950 | 950 continue
951 | 951 block
952 | 952 chart
953 | 953 hat
954 | 954 sell
955 | 955 success
956 | 956 company
957 | 957 subtract
958 | 958 event
959 | 959 particular
960 | 960 deal
961 | 961 swim
962 | 962 term
963 | 963 opposite
964 | 964 wife
965 | 965 shoe
966 | 966 shoulder
967 | 967 spread
968 | 968 arrange
969 | 969 camp
970 | 970 invent
971 | 971 cotton
972 | 972 born
973 | 973 determine
974 | 974 quart
975 | 975 nine
976 | 976 truck
977 | 977 noise
978 | 978 level
979 | 979 chance
980 | 980 gather
981 | 981 shop
982 | 982 stretch
983 | 983 throw
984 | 984 shine
985 | 985 property
986 | 986 column
987 | 987 molecule
988 | 988 select
989 | 989 wrong
990 | 990 gray
991 | 991 repeat
992 | 992 require
993 | 993 broad
994 | 994 prepare
995 | 995 salt
996 | 996 nose
997 | 997 plural
998 | 998 anger
999 | 999 claim
1000 | 1000 continent
--------------------------------------------------------------------------------
/language/eo.tsv:
--------------------------------------------------------------------------------
1 | 1 kiel as
2 | 2 Mi I
3 | 3 sian his
4 | 4 ke that
5 | 5 li he
6 | 6 estis was
7 | 7 por for
8 | 8 sur on
9 | 9 estas are
10 | 10 kun with
11 | 11 ili they
12 | 12 esti be
13 | 13 ĉe at
14 | 14 unu one
15 | 15 havos have
16 | 16 tiu this
17 | 17 el from
18 | 18 per by
19 | 19 varmega hot
20 | 20 vorto word
21 | 21 sed but
22 | 22 kio what
23 | 23 iuj some
24 | 24 estas is
25 | 25 ĝi it
26 | 26 vi you
27 | 27 aŭ or
28 | 28 havis had
29 | 29 la the
30 | 30 de of
31 | 31 por to
32 | 32 kaj and
33 | 33 a a
34 | 34 en in
35 | 35 ni we
36 | 36 povas can
37 | 37 el out
38 | 38 aliaj other
39 | 39 estis were
40 | 40 kio which
41 | 41 fari do
42 | 42 iliaj their
43 | 43 tempo time
44 | 44 se if
45 | 45 volo will
46 | 46 kiom how
47 | 47 diris said
48 | 48 kiel an
49 | 49 ĉiu each
50 | 50 diru tell
51 | 51 faras does
52 | 52 aro set
53 | 53 tri three
54 | 54 volas want
55 | 55 aero air
56 | 56 bone well
57 | 57 ankaŭ also
58 | 58 ludos play
59 | 59 malgranda small
60 | 60 fino end
61 | 61 metita put
62 | 62 hejmo home
63 | 63 legi read
64 | 64 manoj hand
65 | 65 port port
66 | 66 granda large
67 | 67 literumi spell
68 | 68 aldoni add
69 | 69 eĉ even
70 | 70 lando land
71 | 71 tien here
72 | 72 devas must
73 | 73 granda big
74 | 74 alta high
75 | 75 tiaj such
76 | 76 sekvi follow
77 | 77 akto act
78 | 78 kial why
79 | 79 demandu ask
80 | 80 viroj men
81 | 81 ŝanĝo change
82 | 82 eniris went
83 | 83 malpeza light
84 | 84 afabla kind
85 | 85 ekstere off
86 | 86 bezonas need
87 | 87 domo house
88 | 88 foton picture
89 | 89 provu try
90 | 90 nin us
91 | 91 denove again
92 | 92 besto animal
93 | 93 punkto point
94 | 94 patrino mother
95 | 95 mondo world
96 | 96 proksime near
97 | 97 konstrui build
98 | 98 aŭto self
99 | 99 tero earth
100 | 100 patro father
101 | 101 ajna any
102 | 102 nova new
103 | 103 laboro work
104 | 104 parto part
105 | 105 preni take
106 | 106 akiri get
107 | 107 loko place
108 | 108 faris made
109 | 109 vivi live
110 | 110 kie where
111 | 111 post after
112 | 112 reen back
113 | 113 iom little
114 | 114 nur only
115 | 115 ronda round
116 | 116 viro man
117 | 117 jaro year
118 | 118 venis came
119 | 119 spektaklo show
120 | 120 ĉiu every
121 | 121 bonaj good
122 | 122 Min me
123 | 123 doni give
124 | 124 nian our
125 | 125 sub under
126 | 126 nomo name
127 | 127 tre very
128 | 128 per through
129 | 129 simple just
130 | 130 formo form
131 | 131 kondamno sentence
132 | 132 granda great
133 | 133 pensas think
134 | 134 diras say
135 | 135 helpi help
136 | 136 malalta low
137 | 137 linio line
138 | 138 diferenciĝas differ
139 | 139 Siavice turn
140 | 140 kaŭzo cause
141 | 141 multa much
142 | 142 signifus mean
143 | 143 antaŭ before
144 | 144 movado move
145 | 145 dekstra right
146 | 146 knabo boy
147 | 147 malnova old
148 | 148 tro too
149 | 149 sama same
150 | 150 ŝi she
151 | 151 ĉiuj all
152 | 152 tie there
153 | 153 kiam when
154 | 154 supren up
155 | 155 uzo use
156 | 156 via your
157 | 157 vojo way
158 | 158 proksimume about
159 | 159 multaj many
160 | 160 tiam then
161 | 161 ilin them
162 | 162 skribi write
163 | 163 farus would
164 | 164 kiel like
165 | 165 tiel so
166 | 166 tiuj these
167 | 167 ŝia her
168 | 168 longajn long
169 | 169 fari make
170 | 170 afero thing
171 | 171 vidi see
172 | 172 li him
173 | 173 du two
174 | 174 havas has
175 | 175 rigardi look
176 | 176 pli more
177 | 177 tagon day
178 | 178 povis could
179 | 179 iri go
180 | 180 venu come
181 | 181 faris did
182 | 182 nombro number
183 | 183 soni sound
184 | 184 neniu no
185 | 185 plej most
186 | 186 homoj people
187 | 187 Mian my
188 | 188 super over
189 | 189 scias know
190 | 190 akvo water
191 | 191 ol than
192 | 192 alvoko call
193 | 193 unua first
194 | 194 kiuj who
195 | 195 may may
196 | 196 malsupren down
197 | 197 flanko side
198 | 198 estintaj been
199 | 199 nun now
200 | 200 trovi find
201 | 201 kapo head
202 | 202 staras stand
203 | 203 propra own
204 | 204 paĝo page
205 | 205 devus should
206 | 206 lando country
207 | 207 trovita found
208 | 208 respondo answer
209 | 209 lernejo school
210 | 210 kreski grow
211 | 211 studo study
212 | 212 ankoraŭ still
213 | 213 lerni learn
214 | 214 planto plant
215 | 215 kovrilo cover
216 | 216 nutraĵo food
217 | 217 suno sun
218 | 218 kvar four
219 | 219 inter between
220 | 220 ŝtata state
221 | 221 konservi keep
222 | 222 okulo eye
223 | 223 neniam never
224 | 224 lasta last
225 | 225 lasu let
226 | 226 pensis thought
227 | 227 urbo city
228 | 228 arbo tree
229 | 229 transiri cross
230 | 230 farm farm
231 | 231 malfacila hard
232 | 232 komenco start
233 | 233 potenco might
234 | 234 rakonto story
235 | 235 segilo saw
236 | 236 malproksime far
237 | 237 maro sea
238 | 238 desegni draw
239 | 239 maldekstra left
240 | 240 malfrue late
241 | 241 run run
242 | 242 ne don’t
243 | 243 dum while
244 | 244 gazetaro press
245 | 245 Fermi close
246 | 246 nokto night
247 | 247 reala real
248 | 248 vivo life
249 | 249 malmultaj few
250 | 250 Norde north
251 | 251 libro book
252 | 252 porti carry
253 | 253 prenis took
254 | 254 scienco science
255 | 255 manĝi eat
256 | 256 ĉambro room
257 | 257 amiko friend
258 | 258 komencis began
259 | 259 ideon idea
260 | 260 fiŝo fish
261 | 261 monto mountain
262 | 262 ĉesi stop
263 | 263 unufoje once
264 | 264 bazo base
265 | 265 aŭdi hear
266 | 266 ĉevalo horse
267 | 267 tranĉo cut
268 | 268 certas sure
269 | 269 spekti watch
270 | 270 koloro color
271 | 271 vizaĝo face
272 | 272 ligno wood
273 | 273 ĉefa main
274 | 274 malfermita open
275 | 275 ŝajnas seem
276 | 276 kune together
277 | 277 sekva next
278 | 278 blanka white
279 | 279 infanoj children
280 | 280 komenci begin
281 | 281 akiris got
282 | 282 marŝi walk
283 | 283 ekzemplo example
284 | 284 faciligi ease
285 | 285 papero paper
286 | 286 grupo group
287 | 287 ĉiam always
288 | 288 muziko music
289 | 289 tiuj those
290 | 290 ambaŭ both
291 | 291 markon mark
292 | 292 ofte often
293 | 293 letero letter
294 | 294 ĝis until
295 | 295 mejlo mile
296 | 296 rivero river
297 | 297 aŭto car
298 | 298 piedojn feet
299 | 299 prizorgi care
300 | 300 dua second
301 | 301 sufiĉa enough
302 | 302 ebenaĵo plain
303 | 303 knabino girl
304 | 304 kutima usual
305 | 305 junulo young
306 | 306 preta ready
307 | 307 supre above
308 | 308 iam ever
309 | 309 ruĝa red
310 | 310 lerta list
311 | 311 kvankam though
312 | 312 sentas feel
313 | 313 Diskuto talk
314 | 314 birdo bird
315 | 315 baldaŭ soon
316 | 316 korpo body
317 | 317 hundo dog
318 | 318 familio family
319 | 319 rekta direct
320 | 320 supozi pose
321 | 321 forlasi leave
322 | 322 kanto song
323 | 323 mezuri measure
324 | 324 pordo door
325 | 325 produkto product
326 | 326 nigra black
327 | 327 mallonga short
328 | 328 numeralo numeral
329 | 329 klaso class
330 | 330 vento wind
331 | 331 demando question
332 | 332 okazi happen
333 | 333 kompleta complete
334 | 334 ŝipo ship
335 | 335 spaco area
336 | 336 duono half
337 | 337 roko rock
338 | 338 ordon order
339 | 339 fajro fire
340 | 340 sude south
341 | 341 problemo problem
342 | 342 peco piece
343 | 343 rakontis told
344 | 344 sciis knew
345 | 345 pasi pass
346 | 346 ekde since
347 | 347 supro top
348 | 348 aro whole
349 | 349 reĝo king
350 | 350 strato street
351 | 351 colo inch
352 | 352 multipliki multiply
353 | 353 nenio nothing
354 | 354 Kompreneble course
355 | 355 resti stay
356 | 356 radon wheel
357 | 357 plena full
358 | 358 forto force
359 | 359 blua blue
360 | 360 objekto object
361 | 361 decidi decide
362 | 362 surfaco surface
363 | 363 profunda deep
364 | 364 luno moon
365 | 365 insulo island
366 | 366 piedo foot
367 | 367 sistemo system
368 | 368 okupata busy
369 | 369 testo test
370 | 370 rekordo record
371 | 371 kruĉo boat
372 | 372 komunaj common
373 | 373 oro gold
374 | 374 ebla possible
375 | 375 ebeno plane
376 | 376 anstataux stead
377 | 377 seka dry
378 | 378 demandas wonder
379 | 379 ridon laugh
380 | 380 milo thousand
381 | 381 monato ago
382 | 382 kuris ran
383 | 383 kontroli check
384 | 384 ludo game
385 | 385 formo shape
386 | 386 egaligas equate
387 | 387 varmega hot
388 | 388 miss miss
389 | 389 alportis brought
390 | 390 varmo heat
391 | 391 neĝo snow
392 | 392 pneŭo tire
393 | 393 alporti bring
394 | 394 jes yes
395 | 395 malproksima distant
396 | 396 plenigi fill
397 | 397 Oriente east
398 | 398 pentri paint
399 | 399 lingvo language
400 | 400 inter among
401 | 401 unuo unit
402 | 402 potenco power
403 | 403 urbo town
404 | 404 fajna fine
405 | 405 certaj certain
406 | 406 muŝo fly
407 | 407 fali fall
408 | 408 konduki lead
409 | 409 krio cry
410 | 410 mallumo dark
411 | 411 maŝino machine
412 | 412 noto note
413 | 413 atendi wait
414 | 414 plano plan
415 | 415 cifero figure
416 | 416 stelo star
417 | 417 skatolo box
418 | 418 substantivo noun
419 | 419 kampo field
420 | 420 resto rest
421 | 421 ĝusta correct
422 | 422 povis able
423 | 423 funto pound
424 | 424 farita done
425 | 425 beleco beauty
426 | 426 disko drive
427 | 427 staris stood
428 | 428 enhavi contain
429 | 429 antaŭa front
430 | 430 instruos teach
431 | 431 semajno week
432 | 432 lasta final
433 | 433 donis gave
434 | 434 verda green
435 | 435 ¡oh oh
436 | 436 rapida quick
437 | 437 evoluigi develop
438 | 438 oceano ocean
439 | 439 varma warm
440 | 440 senpaga free
441 | 441 minuto minute
442 | 442 forta strong
443 | 443 speciala special
444 | 444 menso mind
445 | 445 malantaŭ behind
446 | 446 evidenta clear
447 | 447 vosto tail
448 | 448 produkti produce
449 | 449 fakto fact
450 | 450 spaco space
451 | 451 aŭdis heard
452 | 452 bona best
453 | 453 horo hour
454 | 454 pli better
455 | 455 vera true
456 | 456 dum during
457 | 457 cent hundred
458 | 458 kvin five
459 | 459 memori remember
460 | 460 paŝo step
461 | 461 frua early
462 | 462 teni hold
463 | 463 okcidenta west
464 | 464 tero ground
465 | 465 intereson interest
466 | 466 alveni reach
467 | 467 rapida fast
468 | 468 verbo verb
469 | 469 kanti sing
470 | 470 aŭskulti listen
471 | 471 ses six
472 | 472 tablo table
473 | 473 vojaĝado travel
474 | 474 malpli less
475 | 475 mateno morning
476 | 476 dek ten
477 | 477 simpla simple
478 | 478 pluraj several
479 | 479 vokalo vowel
480 | 480 rilate toward
481 | 481 milito war
482 | 482 kuŝis lay
483 | 483 kontraŭ against
484 | 484 ŝablono pattern
485 | 485 malrapida slow
486 | 486 centro center
487 | 487 love love
488 | 488 persono person
489 | 489 monon money
490 | 490 servas serve
491 | 491 aperi appear
492 | 492 vojo road
493 | 493 Mapo map
494 | 494 pluvo rain
495 | 495 regulo rule
496 | 496 regi govern
497 | 497 tiri pull
498 | 498 malvarma cold
499 | 499 anonco notice
500 | 500 voĉo voice
501 | 501 energion energy
502 | 502 ĉasi hunt
503 | 503 probabla probable
504 | 504 lito bed
505 | 505 fraton brother
506 | 506 ovo egg
507 | 507 ride ride
508 | 508 ĉelo cell
509 | 509 kredas believe
510 | 510 eble perhaps
511 | 511 pick pick
512 | 512 subita sudden
513 | 513 kalkuli count
514 | 514 kvadrata square
515 | 515 kialo reason
516 | 516 longo length
517 | 517 reprezentas represent
518 | 518 arto art
519 | 519 subjekto subject
520 | 520 regiono region
521 | 521 grandeco size
522 | 522 varios vary
523 | 523 starigi settle
524 | 524 paroli speak
525 | 525 pezo weight
526 | 526 Ĝenerale general
527 | 527 glacio ice
528 | 528 afero matter
529 | 529 rondo circle
530 | 530 paro pair
531 | 531 inkluzivi include
532 | 532 dividi divide
533 | 533 silabo syllable
534 | 534 sentis felt
535 | 535 grandioza grand
536 | 536 pilko ball
537 | 537 ankoraŭ yet
538 | 538 ondo wave
539 | 539 faligi drop
540 | 540 koro heart
541 | 541 estas am
542 | 542 ĉeestanta present
543 | 543 peza heavy
544 | 544 danco dance
545 | 545 motoro engine
546 | 546 pozicion position
547 | 547 brako arm
548 | 548 larĝa wide
549 | 549 velo sail
550 | 550 materialon material
551 | 551 frakcio fraction
552 | 552 arbaro forest
553 | 553 sidiĝi sit
554 | 554 raso race
555 | 555 fenestro window
556 | 556 vendejo store
557 | 557 someron summer
558 | 558 trajno train
559 | 559 dormo sleep
560 | 560 elprovi prove
561 | 561 Lone lone
562 | 562 kruro leg
563 | 563 ekzerco exercise
564 | 564 muro wall
565 | 565 catch catch
566 | 566 monto mount
567 | 567 deziri wish
568 | 568 ĉielo sky
569 | 569 surŝipe board
570 | 570 ĝojo joy
571 | 571 vintro winter
572 | 572 SAT sat
573 | 573 skribita written
574 | 574 sovaĝa wild
575 | 575 instrumento instrument
576 | 576 konservis kept
577 | 577 vitro glass
578 | 578 herbo grass
579 | 579 bovino cow
580 | 580 laboro job
581 | 581 eĝo edge
582 | 582 signo sign
583 | 583 vizito visit
584 | 584 estinteco past
585 | 585 mola soft
586 | 586 amuza fun
587 | 587 brila bright
588 | 588 gaso gas
589 | 589 vetero weather
590 | 590 monato month
591 | 591 miliono million
592 | 592 elporti bear
593 | 593 fini finish
594 | 594 feliĉa happy
595 | 595 atendi hope
596 | 596 floro flower
597 | 597 Vestu clothe
598 | 598 stranga strange
599 | 599 iritaj gone
600 | 600 komerco trade
601 | 601 melodio melody
602 | 602 vojaĝo trip
603 | 603 oficejo office
604 | 604 ricevos receive
605 | 605 vico row
606 | 606 buŝo mouth
607 | 607 ĝusta exact
608 | 608 simbolo symbol
609 | 609 morti die
610 | 610 almenaŭ least
611 | 611 malfelicxo trouble
612 | 612 krio shout
613 | 613 krom except
614 | 614 skribis wrote
615 | 615 semoj seed
616 | 616 tono tone
617 | 617 aliĝi join
618 | 618 sugesti suggest
619 | 619 purigi clean
620 | 620 paŭzo break
621 | 621 lady lady
622 | 622 jardo yard
623 | 623 supreniri rise
624 | 624 malbona bad
625 | 625 baton blow
626 | 626 petrolo oil
627 | 627 sango blood
628 | 628 tuŝi touch
629 | 629 kreskis grew
630 | 630 cendo cent
631 | 631 miksi mix
632 | 632 teamo team
633 | 633 drato wire
634 | 634 kosto cost
635 | 635 perdita lost
636 | 636 bruna brown
637 | 637 surhavi wear
638 | 638 ĝardeno garden
639 | 639 egalaj equal
640 | 640 sendis sent
641 | 641 elekti choose
642 | 642 falis fell
643 | 643 persvadis fit
644 | 644 fluas flow
645 | 645 bela fair
646 | 646 bordo bank
647 | 647 kolekti collect
648 | 648 ŝpari save
649 | 649 kontrolo control
650 | 650 dekuma decimal
651 | 651 orelo ear
652 | 652 alia else
653 | 653 tute quite
654 | 654 rompis broke
655 | 655 okazo case
656 | 656 mezo middle
657 | 657 mortigi kill
658 | 658 filo son
659 | 659 lago lake
660 | 660 momenton moment
661 | 661 skalo scale
662 | 662 laŭta loud
663 | 663 printempo spring
664 | 664 observi observe
665 | 665 infano child
666 | 666 rektaj straight
667 | 667 konsonanto consonant
668 | 668 nacio nation
669 | 669 vortaro dictionary
670 | 670 lakto milk
671 | 671 rapido speed
672 | 672 metodo method
673 | 673 organo organ
674 | 674 pagi pay
675 | 675 aĝo age
676 | 676 sekcio section
677 | 677 robo dress
678 | 678 nubo cloud
679 | 679 surprizo surprise
680 | 680 trankvila quiet
681 | 681 ŝtono stone
682 | 682 etajn tiny
683 | 683 grimpo climb
684 | 684 malvarmeta cool
685 | 685 dezajno design
686 | 686 malriĉa poor
687 | 687 multa lot
688 | 688 eksperimento experiment
689 | 689 malsupro bottom
690 | 690 ŝlosilo key
691 | 691 fero iron
692 | 692 sola single
693 | 693 bastono stick
694 | 694 plata flat
695 | 695 dudek twenty
696 | 696 haŭto skin
697 | 697 rideto smile
698 | 698 crease crease
699 | 699 truo hole
700 | 700 salti jump
701 | 701 bebo baby
702 | 702 ok eight
703 | 703 vilaĝo village
704 | 704 kunvenas meet
705 | 705 radiko root
706 | 706 aĉeti buy
707 | 707 enspezi raise
708 | 708 solvi solve
709 | 709 metalo metal
710 | 710 ĉu whether
711 | 711 puŝo push
712 | 712 sep seven
713 | 713 paragrafo paragraph
714 | 714 triono third
715 | 715 mortigu shall
716 | 716 tenis held
717 | 717 haroj hair
718 | 718 priskribi describe
719 | 719 kuiristino cook
720 | 720 planko floor
721 | 721 ĉu either
722 | 722 rezulto result
723 | 723 bruligi burn
724 | 724 monto hill
725 | 725 sekura safe
726 | 726 kato cat
727 | 727 jarcento century
728 | 728 konsideri consider
729 | 729 tipo type
730 | 730 leĝo law
731 | 731 iom bit
732 | 732 marbordo coast
733 | 733 kopio copy
734 | 734 frazon phrase
735 | 735 malbrua silent
736 | 736 altkreska tall
737 | 737 sablon sand
738 | 738 planko soil
739 | 739 rulo roll
740 | 740 temperaturo temperature
741 | 741 fingro finger
742 | 742 industrio industry
743 | 743 valoro value
744 | 744 lukto fight
745 | 745 mensogo lie
746 | 746 venki beat
747 | 747 eksciti excite
748 | 748 natura natural
749 | 749 vido view
750 | 750 sento sense
751 | 751 ĉefurbo capital
752 | 752 ne faros won’t
753 | 753 seĝo chair
754 | 754 danĝero danger
755 | 755 fruktoj fruit
756 | 756 riĉa rich
757 | 757 dikaj thick
758 | 758 soldato soldier
759 | 759 procezo process
760 | 760 funkcii operate
761 | 761 praktiko practice
762 | 762 apartaj separate
763 | 763 malfacila difficult
764 | 764 kuracisto doctor
765 | 765 bonvolu please
766 | 766 protekti protect
767 | 767 tagmezo noon
768 | 768 kropo crop
769 | 769 modernaj modern
770 | 770 elemento element
771 | 771 batita hit
772 | 772 lernantino student
773 | 773 angulo corner
774 | 774 festo party
775 | 775 provizado supply
776 | 776 kies whose
777 | 777 lokalizi locate
778 | 778 ringon ring
779 | 779 gravulo character
780 | 780 insekto insect
781 | 781 kaptita caught
782 | 782 periodo period
783 | 783 indiki indicate
784 | 784 radio radio
785 | 785 parolis spoke
786 | 786 atomo atom
787 | 787 homa human
788 | 788 historio history
789 | 789 efekto effect
790 | 790 elektra electric
791 | 791 atendi expect
792 | 792 osto bone
793 | 793 fervoja rail
794 | 794 imagu imagine
795 | 795 provizi provide
796 | 796 interkonsenti agree
797 | 797 tiele thus
798 | 798 afabla gentle
799 | 799 virino woman
800 | 800 kapitano captain
801 | 801 divenu guess
802 | 802 necesajn necessary
803 | 803 akra sharp
804 | 804 flugilo wing
805 | 805 Krei create
806 | 806 proksimulo neighbor
807 | 807 lavita wash
808 | 808 vesperto bat
809 | 809 anstataŭ rather
810 | 810 amaso crowd
811 | 811 grajnoj corn
812 | 812 kompari compare
813 | 813 poemo poem
814 | 814 kordo string
815 | 815 sonorilo bell
816 | 816 dependi depend
817 | 817 viando meat
818 | 818 rub rub
819 | 819 tubo tube
820 | 820 famaj famous
821 | 921 dolaro dollar
822 | 822 fluo stream
823 | 823 timo fear
824 | 284 vido sight
825 | 825 maldika thin
826 | 826 triangulo triangle
827 | 827 planedo planet
828 | 828 rapidi hurry
829 | 829 estro chief
830 | 830 kolonio colony
831 | 831 horloĝo clock
832 | 832 miaj mine
833 | 833 egaleco tie
834 | 834 eniri enter
835 | 835 granda major
836 | 836 freŝa fresh
837 | 837 serĉo search
838 | 838 sendu send
839 | 839 flava yellow
840 | 840 pafilo gun
841 | 841 permesi allow
842 | 842 print print
843 | 843 mortinto dead
844 | 844 punkto spot
845 | 845 dezerto desert
846 | 846 kostumo suit
847 | 847 aktuala current
848 | 848 telfero lift
849 | 840 leviĝis rose
850 | 850 alveni arrive
851 | 851 majstro master
852 | 852 aŭtoveturejo track
853 | 853 gepatro parent
854 | 854 bordo shore
855 | 855 divido division
856 | 856 sheet sheet
857 | 857 substanco substance
858 | 858 favorus favor
859 | 859 konekti connect
860 | 860 posteno post
861 | 861 elspezi spend
862 | 862 chord chord
863 | 863 graso fat
864 | 864 ĝojas glad
865 | 865 originala original
866 | 866 samkondiĉe share
867 | 867 stacidomo station
868 | 868 dad dad
869 | 869 pano bread
870 | 870 ŝarĝi charge
871 | 871 taŭga proper
872 | 872 trinkejo bar
873 | 873 oferto offer
874 | 874 segmento segment
875 | 875 sklavo slave
876 | 876 anaso duck
877 | 877 momenteto instant
878 | 878 merkato market
879 | 879 grado degree
880 | 880 popoli populate
881 | 881 ĉik chick
882 | 882 kara dear
883 | 883 malamiko enemy
884 | 884 respondi reply
885 | 885 trinkaĵon drink
886 | 886 okazi occur
887 | 887 subteno support
888 | 888 parolado speech
889 | 889 naturo nature
890 | 890 ventumilo range
891 | 891 vaporo steam
892 | 892 movado motion
893 | 893 vojo path
894 | 894 likva liquid
895 | 895 ensaluti log
896 | 896 signifis meant
897 | 897 kvociento quotient
898 | 898 dentoj teeth
899 | 899 konko shell
900 | 900 kolo neck
901 | 901 oksigeno oxygen
902 | 902 sukero sugar
903 | 903 morto death
904 | 904 bela pretty
905 | 905 lerto skill
906 | 906 virinoj women
907 | 907 sezono season
908 | 908 solvo solution
909 | 909 magneto magnet
910 | 910 arĝento silver
911 | 911 dankon thank
912 | 912 branĉo branch
913 | 913 match match
914 | 914 sufikso suffix
915 | 915 ĉefe especially
916 | 916 figo fig
917 | 917 timis afraid
918 | 918 grandega huge
919 | 919 fratino sister
920 | 920 ŝtalo steel
921 | 921 diskuti discuss
922 | 922 antaŭen forward
923 | 923 simila similar
924 | 924 gvidi guide
925 | 925 sperto experience
926 | 926 interpunkcio score
927 | 927 pomon apple
928 | 928 aĉetinta bought
929 | 929 kondukis led
930 | 930 tonalto pitch
931 | 931 ŝildo coat
932 | 932 maso mass
933 | 933 karton card
934 | 934 bando band
935 | 935 ŝnuro rope
936 | 936 slip slip
937 | 937 venko win
938 | 938 sonĝi dream
939 | 939 vespero evening
940 | 940 kondiĉo condition
941 | 941 feed feed
942 | 942 ilo tool
943 | 943 entute total
944 | 944 baza basic
945 | 945 bonodoro smell
946 | 946 valo valley
947 | 947 nek nor
948 | 948 duobla double
949 | 949 seĝo seat
950 | 950 daŭrigi continue
951 | 951 bloko block
952 | 952 grafikaĵo chart
953 | 953 ĉapelo hat
954 | 954 vendi sell
955 | 955 sukceso success
956 | 956 kompanio company
957 | 957 subtrahi subtract
958 | 958 okazaĵo event
959 | 959 aparta particular
960 | 960 interkonsento deal
961 | 961 naĝi swim
962 | 962 termino term
963 | 963 kontraŭa opposite
964 | 964 edzino wife
965 | 965 ŝuo shoe
966 | 966 ŝultro shoulder
967 | 967 disvastiĝo spread
968 | 968 aranĝi arrange
969 | 969 tendaro camp
970 | 970 elpensi invent
971 | 971 kotono cotton
972 | 972 Born born
973 | 973 determini determine
974 | 974 kvarto quart
975 | 975 naŭ nine
976 | 976 kamiono truck
977 | 977 bruo noise
978 | 978 nivelo level
979 | 979 ŝanco chance
980 | 980 kolekti gather
981 | 981 butiko shop
982 | 982 sekcio stretch
983 | 983 ĵeti throw
984 | 984 briligi shine
985 | 985 propraĵo property
986 | 986 kolono column
987 | 987 molekulo molecule
988 | 988 elekti select
989 | 989 malĝusta wrong
990 | 990 grizaj gray
991 | 991 ripeto repeat
992 | 992 postuli require
993 | 993 larĝa broad
994 | 994 pretigi prepare
995 | 995 Sala salt
996 | 996 nazon nose
997 | 997 pluralo plural
998 | 998 kolero anger
999 | 999 pretendo claim
1000 | 1000 kontinento continent
--------------------------------------------------------------------------------
/language/lid.176.ftz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/language/lid.176.ftz
--------------------------------------------------------------------------------
/language/tr.tsv:
--------------------------------------------------------------------------------
1 | 1 olarak as
2 | 2 ben I
3 | 3 onun his
4 | 4 bu that
5 | 5 diye he
6 | 6 oldu was
7 | 7 için for
8 | 8 üzerinde on
9 | 9 vardır are
10 | 10 ile with
11 | 11 onlar they
12 | 12 olmak be
13 | 13 at at
14 | 14 bir one
15 | 15 var have
16 | 16 Bu this
17 | 17 dan from
18 | 18 tarafından by
19 | 19 sıcak hot
20 | 20 kelime word
21 | 21 ancak but
22 | 22 ne what
23 | 23 bazı some
24 | 24 olduğunu is
25 | 25 o it
26 | 26 sen you
27 | 27 veya or
28 | 28 vardı had
29 | 29 the
30 | 30 arasında of
31 | 31 karşı to
32 | 32 ve and
33 | 33 bir a
34 | 34 içinde in
35 | 35 biz we
36 | 36 can can
37 | 37 üzerinden out
38 | 38 diğer other
39 | 39 vardı were
40 | 40 hangi which
41 | 41 do do
42 | 42 onların their
43 | 43 zaman time
44 | 44 eğer if
45 | 45 olacak will
46 | 46 nasıl how
47 | 47 dedi said
48 | 48 bir an
49 | 49 her each
50 | 50 söyle tell
51 | 51 yok does
52 | 52 set set
53 | 53 üç three
54 | 54 istiyorum want
55 | 55 hava air
56 | 56 iyi well
57 | 57 ayrıca also
58 | 58 oynamak play
59 | 59 küçük small
60 | 60 son end
61 | 61 koymak put
62 | 62 ev home
63 | 63 okumak read
64 | 64 el hand
65 | 65 liman port
66 | 66 büyük large
67 | 67 büyü spell
68 | 68 ekleyin add
69 | 69 hatta even
70 | 70 arazi land
71 | 71 burada here
72 | 72 gerekir must
73 | 73 büyük big
74 | 74 yüksek high
75 | 75 böyle such
76 | 76 izleyin follow
77 | 77 hareket act
78 | 78 neden why
79 | 79 sormak ask
80 | 80 erkekler men
81 | 81 değişim change
82 | 82 gitti went
83 | 83 ışık light
84 | 84 tür kind
85 | 85 kapalı off
86 | 86 gerek need
87 | 87 ev house
88 | 88 resim picture
89 | 89 denemek try
90 | 90 bizi us
91 | 91 tekrar again
92 | 92 hayvan animal
93 | 93 nokta point
94 | 94 anne mother
95 | 95 dünya world
96 | 96 yakın near
97 | 97 inşa etmek build
98 | 98 öz self
99 | 99 toprak earth
100 | 100 baba father
101 | 101 herhangi bir any
102 | 102 yeni new
103 | 103 iş work
104 | 104 bölüm part
105 | 105 almak take
106 | 106 olsun get
107 | 107 yer place
108 | 108 yapılan made
109 | 109 canlı live
110 | 110 nerede where
111 | 111 sonra after
112 | 112 geri back
113 | 113 küçük little
114 | 114 sadece only
115 | 115 yuvarlak round
116 | 116 adam man
117 | 117 yıl year
118 | 118 geldi came
119 | 119 gösterisi show
120 | 120 her every
121 | 121 iyi good
122 | 122 bana me
123 | 123 vermek give
124 | 124 bizim our
125 | 125 altında under
126 | 126 adı name
127 | 127 çok very
128 | 128 ile through
129 | 129 sadece just
130 | 130 formu form
131 | 131 cümle sentence
132 | 132 büyük great
133 | 133 düşünmek think
134 | 134 demek say
135 | 135 yardım help
136 | 136 düşük low
137 | 137 hat line
138 | 138 farklı differ
139 | 139 dönüş turn
140 | 140 neden cause
141 | 141 çok much
142 | 142 Yani mean
143 | 143 önce before
144 | 144 hareket move
145 | 145 sağ right
146 | 146 çocuk boy
147 | 147 eski old
148 | 148 çok too
149 | 149 aynı same
150 | 150 diye she
151 | 151 tüm all
152 | 152 orada there
153 | 153 zaman when
154 | 154 yukarı up
155 | 155 Kullanım use
156 | 156 senin your
157 | 157 yol way
158 | 158 hakkında about
159 | 159 birçok many
160 | 160 o zaman then
161 | 161 onları them
162 | 162 yazmak write
163 | 163 would would
164 | 164 gibi like
165 | 165 bu yüzden so
166 | 166 bu these
167 | 167 onu her
168 | 168 uzun long
169 | 169 yapmak make
170 | 170 şey thing
171 | 171 görmek see
172 | 172 onu him
173 | 173 iki two
174 | 174 var has
175 | 175 bak look
176 | 176 daha fazla more
177 | 177 gün day
178 | 178 olabilir could
179 | 179 gitmek go
180 | 180 gel come
181 | 181 yaptım did
182 | 182 numarası number
183 | 183 ses sound
184 | 184 hayır no
185 | 185 en most
186 | 186 insanlar people
187 | 187 benim my
188 | 188 üzerinde over
189 | 189 bilmek know
190 | 190 su water
191 | 191 daha than
192 | 192 çağrı call
193 | 193 ilk first
194 | 194 kim who
195 | 195 may may
196 | 196 aşağı down
197 | 197 yan side
198 | 198 oldu been
199 | 199 Şimdi now
200 | 200 bulmak find
201 | 201 kafa head
202 | 202 standı stand
203 | 203 kendi own
204 | 204 sayfa page
205 | 205 gerekir should
206 | 206 ülke country
207 | 207 bulundu found
208 | 208 cevap answer
209 | 209 okul school
210 | 210 büyümek grow
211 | 211 çalışma study
212 | 212 hala still
213 | 213 öğrenmek learn
214 | 214 bitki plant
215 | 215 kapak cover
216 | 216 gıda food
217 | 217 güneş sun
218 | 218 dört four
219 | 219 arasında between
220 | 220 devlet state
221 | 221 tutmak keep
222 | 222 göz eye
223 | 223 asla never
224 | 224 son last
225 | 225 let let
226 | 226 düşünce thought
227 | 227 Şehir city
228 | 228 ağaç tree
229 | 229 çapraz cross
230 | 230 çiftlik farm
231 | 231 sert hard
232 | 232 başlangıç start
233 | 233 olabilir might
234 | 234 hikaye story
235 | 235 testere saw
236 | 236 kadar far
237 | 237 deniz sea
238 | 238 çizmek draw
239 | 239 sol left
240 | 240 geç late
241 | 241 run run
242 | 242 yapamaz don’t
243 | 243 süre while
244 | 244 basın press
245 | 245 yakın close
246 | 246 gece night
247 | 247 gerçek real
248 | 248 hayat life
249 | 249 az few
250 | 250 kuzey north
251 | 251 kitap book
252 | 252 taşımak carry
253 | 253 aldı took
254 | 254 bilim science
255 | 255 yemek eat
256 | 256 oda room
257 | 257 arkadaşı friend
258 | 258 başladı began
259 | 259 fikir idea
260 | 260 balık fish
261 | 261 dağ mountain
262 | 262 dur stop
263 | 263 bir kez once
264 | 264 baz base
265 | 265 duymak hear
266 | 266 at horse
267 | 267 kesim cut
268 | 268 emin sure
269 | 269 izle watch
270 | 270 renk color
271 | 271 yüz face
272 | 272 ahşap wood
273 | 273 ana main
274 | 274 açık open
275 | 275 görünmek seem
276 | 276 birlikte together
277 | 277 sonraki next
278 | 278 beyaz white
279 | 279 çocuk children
280 | 280 başla begin
281 | 281 var got
282 | 282 yürümek walk
283 | 283 örnek example
284 | 284 kolaylığı ease
285 | 285 kâğıt paper
286 | 286 grup group
287 | 287 her zaman always
288 | 288 müzik music
289 | 289 bu those
290 | 290 her ikisi de both
291 | 291 işareti mark
292 | 292 sık sık often
293 | 293 mektup letter
294 | 294 kadar until
295 | 295 kilometre mile
296 | 296 nehir river
297 | 297 araba car
298 | 298 ayaklar feet
299 | 299 bakım care
300 | 300 ikinci second
301 | 301 yeterli enough
302 | 302 düz plain
303 | 303 kız girl
304 | 304 olağan usual
305 | 305 genç young
306 | 306 hazır ready
307 | 307 yukarıdaki above
308 | 308 hiç ever
309 | 309 kırmızı red
310 | 310 listesi list
311 | 311 olsa though
312 | 312 hissetmek feel
313 | 313 tartışma talk
314 | 314 kuş bird
315 | 315 yakında soon
316 | 316 vücut body
317 | 317 köpek dog
318 | 318 aile family
319 | 319 doğrudan direct
320 | 320 poz pose
321 | 321 bırakın leave
322 | 322 şarkı song
323 | 323 ölçmek measure
324 | 324 kapı door
325 | 325 ürün product
326 | 326 siyah black
327 | 327 kısa short
328 | 328 rakam numeral
329 | 329 sınıf class
330 | 330 rüzgar wind
331 | 331 soru question
332 | 332 olur happen
333 | 333 komple complete
334 | 334 gemi ship
335 | 335 alan area
336 | 336 yarım half
337 | 337 kaya rock
338 | 338 sipariş order
339 | 339 yangın fire
340 | 340 güney south
341 | 341 sorun problem
342 | 342 parça piece
343 | 343 söyledi told
344 | 344 biliyordu knew
345 | 345 geçmek pass
346 | 346 beri since
347 | 347 üst top
348 | 348 bütün whole
349 | 349 kral king
350 | 350 sokak street
351 | 351 inch inch
352 | 352 çarpmak multiply
353 | 353 hiçbir şey nothing
354 | 354 ders course
355 | 355 kalmak stay
356 | 356 tekerlek wheel
357 | 357 tam full
358 | 358 kuvvet force
359 | 359 mavi blue
360 | 360 nesne object
361 | 361 karar vermek decide
362 | 362 yüzey surface
363 | 363 derin deep
364 | 364 ay moon
365 | 365 ada island
366 | 366 ayak foot
367 | 367 sistem system
368 | 368 meşgul busy
369 | 369 Test test
370 | 370 kayıt record
371 | 371 tekne boat
372 | 372 ortak common
373 | 373 altın gold
374 | 374 mümkün possible
375 | 375 düzlem plane
376 | 376 başkasının yeri stead
377 | 377 kuru dry
378 | 378 acaba wonder
379 | 379 gülmek laugh
380 | 380 bin thousand
381 | 381 önce ago
382 | 382 koştu ran
383 | 383 kontrol check
384 | 384 oyunu game
385 | 385 şekil shape
386 | 386 eşit equate
387 | 387 sıcak hot
388 | 388 özledim miss
389 | 389 getirdi brought
390 | 390 ısı heat
391 | 391 kar snow
392 | 392 lastik tire
393 | 393 getirmek bring
394 | 394 evet yes
395 | 395 uzak distant
396 | 396 doldurun fill
397 | 397 doğu east
398 | 398 boya paint
399 | 399 dil language
400 | 400 arasında among
401 | 401 ünitesi unit
402 | 402 güç power
403 | 403 kasaba town
404 | 404 ince fine
405 | 405 belirli certain
406 | 406 uçmak fly
407 | 407 düşmek fall
408 | 408 kurşun lead
409 | 409 ağlamak cry
410 | 410 karanlık dark
411 | 411 makine machine
412 | 412 notu note
413 | 413 bekleyin wait
414 | 414 planı plan
415 | 415 rakam figure
416 | 416 yıldız star
417 | 417 kutu box
418 | 418 i noun
419 | 419 alan field
420 | 420 gerisi rest
421 | 421 doğru correct
422 | 422 yetenekli able
423 | 423 kiloluk pound
424 | 424 bitti done
425 | 425 güzellik beauty
426 | 426 sürücü drive
427 | 427 durdu stood
428 | 428 içeren contain
429 | 429 ön front
430 | 430 öğretmek teach
431 | 431 hafta week
432 | 432 nihai final
433 | 433 verdi gave
434 | 434 yeşil green
435 | 435 oh oh
436 | 436 hızlı quick
437 | 437 geliştirmek develop
438 | 438 okyanus ocean
439 | 439 sıcak warm
440 | 440 ücretsiz free
441 | 441 dakika minute
442 | 442 güçlü strong
443 | 443 özel special
444 | 444 zihin mind
445 | 445 arkasında behind
446 | 446 açık clear
447 | 447 kuyruk tail
448 | 448 üretmek produce
449 | 449 gerçek fact
450 | 450 uzay space
451 | 451 duydum heard
452 | 452 en iyi best
453 | 453 saat hour
454 | 454 daha iyi better
455 | 455 gerçek true
456 | 456 sırasında during
457 | 457 yüz hundred
458 | 458 beş five
459 | 459 hatırlıyorum remember
460 | 460 adım step
461 | 461 erken early
462 | 462 tutun hold
463 | 463 batı west
464 | 464 zemin ground
465 | 465 faiz interest
466 | 466 ulaşmak reach
467 | 467 hızlı fast
468 | 468 fiil verb
469 | 469 şarkı söylemek sing
470 | 470 dinle listen
471 | 471 altı six
472 | 472 tablo table
473 | 473 seyahat travel
474 | 474 daha az less
475 | 475 sabah morning
476 | 476 on ten
477 | 477 basit simple
478 | 478 birkaç several
479 | 479 ünlü vowel
480 | 480 doğru toward
481 | 481 savaş war
482 | 482 koymak lay
483 | 483 karşı against
484 | 484 desen pattern
485 | 485 yavaş slow
486 | 486 merkezi center
487 | 487 aşk love
488 | 488 kişi person
489 | 489 para money
490 | 490 hizmet serve
491 | 491 görünür appear
492 | 492 yol road
493 | 493 harita map
494 | 494 yağmur rain
495 | 495 kural rule
496 | 496 idare govern
497 | 497 Çek pull
498 | 498 soğuk cold
499 | 499 haber notice
500 | 500 ses voice
501 | 501 enerji energy
502 | 502 avı hunt
503 | 503 muhtemel probable
504 | 504 yatak bed
505 | 505 kardeş brother
506 | 506 yumurta egg
507 | 507 binmek ride
508 | 508 hücre cell
509 | 509 inanıyorum believe
510 | 510 belki perhaps
511 | 511 seçmek pick
512 | 512 ani sudden
513 | 513 saymak count
514 | 514 kare square
515 | 515 neden reason
516 | 516 uzunluk length
517 | 517 temsil represent
518 | 518 sanat art
519 | 519 konu subject
520 | 520 bölge region
521 | 521 boyut size
522 | 522 değiştirmek vary
523 | 523 yerleşmek settle
524 | 524 konuşmak speak
525 | 525 ağırlık weight
526 | 526 genel general
527 | 527 buz ice
528 | 528 madde matter
529 | 529 daire circle
530 | 530 çifti pair
531 | 531 dahil include
532 | 532 bölmek divide
533 | 533 hece syllable
534 | 534 keçe felt
535 | 535 büyük grand
536 | 536 top ball
537 | 537 henüz yet
538 | 538 dalga wave
539 | 539 bırakın drop
540 | 540 kalp heart
541 | 541 duyuyorum am
542 | 542 mevcut present
543 | 543 ağır heavy
544 | 544 dans dance
545 | 545 motor engine
546 | 546 pozisyon position
547 | 547 kol arm
548 | 548 geniş wide
549 | 549 yelken sail
550 | 550 materyal material
551 | 551 kesir fraction
552 | 552 orman forest
553 | 553 oturmak sit
554 | 554 yarış race
555 | 555 penceresi window
556 | 556 mağaza store
557 | 557 yaz summer
558 | 558 tren train
559 | 559 uyku sleep
560 | 560 kanıtlamak prove
561 | 561 yalnız lone
562 | 562 bacak leg
563 | 563 egzersiz exercise
564 | 564 duvar wall
565 | 565 catch catch
566 | 566 dağ mount
567 | 567 dilek wish
568 | 568 gökyüzü sky
569 | 569 kurulu board
570 | 570 sevinç joy
571 | 571 kış winter
572 | 572 Cts sat
573 | 573 yazılı written
574 | 574 vahşi wild
575 | 575 enstrüman instrument
576 | 576 tuttu kept
577 | 577 cam glass
578 | 578 çim grass
579 | 579 inek cow
580 | 580 iş job
581 | 581 kenar edge
582 | 582 işareti sign
583 | 583 ziyareti visit
584 | 584 geçmiş past
585 | 585 yumuşak soft
586 | 586 eğlenceli fun
587 | 587 parlak bright
588 | 588 gaz gas
589 | 589 hava weather
590 | 590 ay month
591 | 591 milyon million
592 | 592 ayı bear
593 | 593 bitirmek finish
594 | 594 mutlu happy
595 | 595 umut hope
596 | 596 çiçek flower
597 | 597 giydirmek clothe
598 | 598 garip strange
599 | 599 gitti gone
600 | 600 ticaret trade
601 | 601 melodi melody
602 | 602 gezi trip
603 | 603 ofis office
604 | 604 almak receive
605 | 605 satır row
606 | 606 ağız mouth
607 | 607 tam exact
608 | 608 sembol symbol
609 | 609 ölmek die
610 | 610 en az least
611 | 611 sorun trouble
612 | 612 bağırmak shout
613 | 613 hariç except
614 | 614 yazdı wrote
615 | 615 tohum seed
616 | 616 sesi tone
617 | 617 katılmak join
618 | 618 önermek suggest
619 | 619 temiz clean
620 | 620 molası break
621 | 621 bayan lady
622 | 622 yarda yard
623 | 623 yükselecek rise
624 | 624 kötü bad
625 | 625 darbe blow
626 | 626 yağ oil
627 | 627 kan blood
628 | 628 dokunma touch
629 | 629 büyüdü grew
630 | 630 yüzde cent
631 | 631 karıştırmak mix
632 | 632 takım team
633 | 633 tel wire
634 | 634 maliyet cost
635 | 635 kaybetti lost
636 | 636 kahverengi brown
637 | 637 giymek wear
638 | 638 bahçe garden
639 | 639 eşit equal
640 | 640 gönderildi sent
641 | 641 seçim choose
642 | 642 düştü fell
643 | 643 uygun fit
644 | 644 akış flow
645 | 645 adil fair
646 | 646 banka bank
647 | 647 toplamak collect
648 | 648 kaydetmek save
649 | 649 kontrolü control
650 | 650 ondalık decimal
651 | 651 kulak ear
652 | 652 başka else
653 | 653 oldukça quite
654 | 654 kırdı broke
655 | 655 dava case
656 | 656 orta middle
657 | 657 öldürmek kill
658 | 658 oğlu son
659 | 659 göl lake
660 | 660 an moment
661 | 661 ölçek scale
662 | 662 yüksek sesle loud
663 | 663 bahar spring
664 | 664 gözlemlemek observe
665 | 665 çocuk child
666 | 666 düz straight
667 | 667 ünsüz consonant
668 | 668 ulus nation
669 | 669 Sözlük dictionary
670 | 670 süt milk
671 | 671 hız speed
672 | 672 yöntem method
673 | 673 Organ organ
674 | 674 ödeme pay
675 | 675 yaş age
676 | 676 bölüm section
677 | 677 elbise dress
678 | 678 bulut cloud
679 | 679 sürpriz surprise
680 | 680 sessiz quiet
681 | 681 taş stone
682 | 682 küçücük tiny
683 | 683 tırmanış climb
684 | 684 serin cool
685 | 685 dizayn design
686 | 686 kötü poor
687 | 687 çok lot
688 | 688 deneme experiment
689 | 689 alt bottom
690 | 690 anahtar key
691 | 691 demir iron
692 | 692 tek single
693 | 693 sopa stick
694 | 694 düz flat
695 | 695 yirmi twenty
696 | 696 cilt skin
697 | 697 gülümseme smile
698 | 698 kırışık crease
699 | 699 delik hole
700 | 700 atlama jump
701 | 701 bebek baby
702 | 702 sekiz eight
703 | 703 köy village
704 | 704 karşılamak meet
705 | 705 kök root
706 | 706 satın buy
707 | 707 yükseltmek raise
708 | 708 çözmek solve
709 | 709 metal metal
710 | 710 olsun whether
711 | 711 it push
712 | 712 yedi seven
713 | 713 paragraf paragraph
714 | 714 üçüncü third
715 | 715 Shall shall
716 | 716 tutulan held
717 | 717 saç hair
718 | 718 tanımlamak describe
719 | 719 aşçı cook
720 | 720 zemin floor
721 | 721 ya da either
722 | 722 sonuç result
723 | 723 yanmak burn
724 | 724 tepe hill
725 | 725 güvenli safe
726 | 726 kedi cat
727 | 727 yüzyıl century
728 | 728 düşünmek consider
729 | 729 tipi type
730 | 730 hukuk law
731 | 731 bit bit
732 | 732 sahil coast
733 | 733 kopya copy
734 | 734 ifade phrase
735 | 735 sessiz silent
736 | 736 uzun boylu tall
737 | 737 kum sand
738 | 738 toprak soil
739 | 739 rulo roll
740 | 740 sıcaklık temperature
741 | 741 parmak finger
742 | 742 sanayi industry
743 | 743 değeri value
744 | 744 kavga fight
745 | 745 yalan lie
746 | 746 yendi beat
747 | 747 heyecanlandırmak excite
748 | 748 doğal natural
749 | 749 görünüm view
750 | 750 sense sense
751 | 751 sermaye capital
752 | 752 olmaz won’t
753 | 753 sandalye chair
754 | 754 tehlike danger
755 | 755 meyve fruit
756 | 756 zengin rich
757 | 757 kalın thick
758 | 758 asker soldier
759 | 759 süreç process
760 | 760 işletmek operate
761 | 761 uygulama practice
762 | 762 ayrı separate
763 | 763 zor difficult
764 | 764 doktor doctor
765 | 765 lütfen please
766 | 766 korumak protect
767 | 767 öğlen noon
768 | 768 kırpma crop
769 | 769 çağdaş modern
770 | 770 eleman element
771 | 771 vurmak hit
772 | 772 öğrenci student
773 | 773 köşe corner
774 | 774 parti party
775 | 775 besleme supply
776 | 776 kimin whose
777 | 777 yerleştirmek locate
778 | 778 halka ring
779 | 779 karakter character
780 | 780 böcek insect
781 | 781 yakalandı caught
782 | 782 dönemi period
783 | 783 göstermektedir indicate
784 | 784 radyo radio
785 | 785 konuştu spoke
786 | 786 atomudur atom
787 | 787 insan human
788 | 788 geçmişi history
789 | 789 etkisi effect
790 | 790 elektrik electric
791 | 791 bekliyoruz expect
792 | 792 kemik bone
793 | 793 demiryolu rail
794 | 794 hayal imagine
795 | 795 sağlamak provide
796 | 796 katılıyorum agree
797 | 797 böylece thus
798 | 798 nazik gentle
799 | 799 kadın woman
800 | 800 kaptan captain
801 | 801 sanırım guess
802 | 802 gerekli necessary
803 | 803 keskin sharp
804 | 804 kanat wing
805 | 805 oluşturmak create
806 | 806 komşu neighbor
807 | 807 yıkama wash
808 | 808 yarasa bat
809 | 809 yerine rather
810 | 810 kalabalık crowd
811 | 811 mısır corn
812 | 812 karşılaştırma compare
813 | 813 şiir poem
814 | 814 dize string
815 | 815 çan bell
816 | 816 bağlı depend
817 | 817 et meat
818 | 818 ovmak rub
819 | 819 tüp tube
820 | 820 ünlü famous
821 | 921 dolar dollar
822 | 822 akışı stream
823 | 823 korku fear
824 | 284 görüş sight
825 | 825 ince thin
826 | 826 üçgen triangle
827 | 827 gezegen planet
828 | 828 acele hurry
829 | 829 baş chief
830 | 830 koloni colony
831 | 831 saat clock
832 | 832 mayın mine
833 | 833 kravat tie
834 | 834 girin enter
835 | 835 büyük major
836 | 836 taze fresh
837 | 837 arama search
838 | 838 göndermek send
839 | 839 sarı yellow
840 | 840 gun gun
841 | 841 izin allow
842 | 842 baskı print
843 | 843 ölü dead
844 | 844 nokta spot
845 | 845 çöl desert
846 | 846 takım elbise suit
847 | 847 akım current
848 | 848 asansör lift
849 | 840 gül rose
850 | 850 varmak arrive
851 | 851 usta master
852 | 852 iz track
853 | 853 ebeveyn parent
854 | 854 kıyı shore
855 | 855 bölünme division
856 | 856 levha sheet
857 | 857 madde substance
858 | 858 iyilik favor
859 | 859 bağlamak connect
860 | 860 sonrası post
861 | 861 harcamak spend
862 | 862 akor chord
863 | 863 şişman fat
864 | 864 memnun glad
865 | 865 orijinal original
866 | 866 payı share
867 | 867 istasyon station
868 | 868 baba dad
869 | 869 ekmek bread
870 | 870 şarj charge
871 | 871 uygun proper
872 | 872 bar bar
873 | 873 teklif offer
874 | 874 segmenti segment
875 | 875 köle slave
876 | 876 ördek duck
877 | 877 anlık instant
878 | 878 pazar market
879 | 879 derecesi degree
880 | 880 doldurmak populate
881 | 881 civciv chick
882 | 882 sevgili dear
883 | 883 düşman enemy
884 | 884 cevap reply
885 | 885 içki drink
886 | 886 meydana occur
887 | 887 destek support
888 | 888 konuşma speech
889 | 889 doğa nature
890 | 890 aralığı range
891 | 891 buhar steam
892 | 892 hareket motion
893 | 893 yol path
894 | 894 sıvı liquid
895 | 895 log log
896 | 896 demek meant
897 | 897 bölüm quotient
898 | 898 dişler teeth
899 | 899 kabuk shell
900 | 900 boyun neck
901 | 901 oksijen oxygen
902 | 902 şeker sugar
903 | 903 ölüm death
904 | 904 güzel pretty
905 | 905 beceri skill
906 | 906 kadın women
907 | 907 sezon season
908 | 908 çözüm solution
909 | 909 mıknatıs magnet
910 | 910 gümüş silver
911 | 911 teşekkür ederim thank
912 | 912 şube branch
913 | 913 maç match
914 | 914 sonek suffix
915 | 915 özellikle especially
916 | 916 incir fig
917 | 917 korkuyor afraid
918 | 918 büyük huge
919 | 919 kardeş sister
920 | 920 çelik steel
921 | 921 tartışmak discuss
922 | 922 ileri forward
923 | 923 benzer similar
924 | 924 kılavuz guide
925 | 925 deneyim experience
926 | 926 puan score
927 | 927 elma apple
928 | 928 satın aldı bought
929 | 929 açtı led
930 | 930 zift pitch
931 | 931 ceket coat
932 | 932 kitle mass
933 | 933 kart card
934 | 934 bant band
935 | 935 halat rope
936 | 936 kayma slip
937 | 937 win win
938 | 938 rüya dream
939 | 939 akşam evening
940 | 940 durumu condition
941 | 941 beslemek feed
942 | 942 aracı tool
943 | 943 toplam total
944 | 944 temel basic
945 | 945 koku smell
946 | 946 vadi valley
947 | 947 ne nor
948 | 948 çift double
949 | 949 koltuk seat
950 | 950 devam etmek continue
951 | 951 bloğu block
952 | 952 grafiği chart
953 | 953 şapka hat
954 | 954 satmak sell
955 | 955 başarı success
956 | 956 şirket company
957 | 957 çıkarmak subtract
958 | 958 olay event
959 | 959 özellikle particular
960 | 960 anlaşma deal
961 | 961 yüzmek swim
962 | 962 vadeli term
963 | 963 karşısında opposite
964 | 964 karısı wife
965 | 965 ayakkabı shoe
966 | 966 omuz shoulder
967 | 967 yaymak spread
968 | 968 düzenlemek arrange
969 | 969 kamp camp
970 | 970 icat invent
971 | 971 pamuk cotton
972 | 972 doğmuş born
973 | 973 belirlemek determine
974 | 974 kuart quart
975 | 975 dokuz nine
976 | 976 kamyon truck
977 | 977 gürültü noise
978 | 978 seviyesi level
979 | 979 şans chance
980 | 980 toplamak gather
981 | 981 dükkan shop
982 | 982 streç stretch
983 | 983 atmak throw
984 | 984 parlaklık shine
985 | 985 mülkiyet property
986 | 986 sütun column
987 | 987 molekülü molecule
988 | 988 seçin select
989 | 989 yanlış wrong
990 | 990 gri gray
991 | 991 tekrar repeat
992 | 992 gerektirir require
993 | 993 geniş broad
994 | 994 hazırlamak prepare
995 | 995 tuz salt
996 | 996 burun nose
997 | 997 çoğul plural
998 | 998 öfke anger
999 | 999 iddia claim
1000 | 1000 kıta continent
--------------------------------------------------------------------------------
/language/vi.tsv:
--------------------------------------------------------------------------------
1 | 1 như as
2 | 2 tôi I
3 | 3 mình his
4 | 4 mà that
5 | 5 ông he
6 | 6 là was
7 | 7 cho for
8 | 8 trên on
9 | 9 là are
10 | 10 với with
11 | 11 họ they
12 | 12 được be
13 | 13 tại at
14 | 14 một one
15 | 15 có have
16 | 16 này this
17 | 17 từ from
18 | 18 bởi by
19 | 19 nóng hot
20 | 20 từ word
21 | 21 nhưng but
22 | 22 những gì what
23 | 23 một số some
24 | 24 là is
25 | 25 nó it
26 | 26 anh you
27 | 27 hoặc or
28 | 28 có had
29 | 29 các the
30 | 30 của of
31 | 31 để to
32 | 32 và and
33 | 33 một a
34 | 34 trong in
35 | 35 chúng tôi we
36 | 36 có thể can
37 | 37 ra out
38 | 38 khác other
39 | 39 là were
40 | 40 mà which
41 | 41 làm do
42 | 42 của họ their
43 | 43 thời gian time
44 | 44 nếu if
45 | 45 sẽ will
46 | 46 như thế nào how
47 | 47 nói said
48 | 48 một an
49 | 49 môi each
50 | 50 nói tell
51 | 51 không does
52 | 52 bộ set
53 | 53 ba three
54 | 54 muốn want
55 | 55 không khí air
56 | 56 cũng well
57 | 57 cũng also
58 | 58 chơi play
59 | 59 nhỏ small
60 | 60 cuối end
61 | 61 đặt put
62 | 62 nhà home
63 | 63 đọc read
64 | 64 tay hand
65 | 65 cổng port
66 | 66 lớn large
67 | 67 chính tả spell
68 | 68 thêm add
69 | 69 thậm chí even
70 | 70 đất land
71 | 71 ở đây here
72 | 72 phải must
73 | 73 lớn big
74 | 74 cao high
75 | 75 như vậy such
76 | 76 theo follow
77 | 77 hành động act
78 | 78 lý do tại sao why
79 | 79 xin ask
80 | 80 người đàn ông men
81 | 81 thay đổi change
82 | 82 đi went
83 | 83 ánh sáng light
84 | 84 loại kind
85 | 85 tắt off
86 | 86 cần need
87 | 87 nhà house
88 | 88 hình ảnh picture
89 | 89 thử try
90 | 90 chúng tôi us
91 | 91 một lần nữa again
92 | 92 động vật animal
93 | 93 điểm point
94 | 94 mẹ mother
95 | 95 thế giới world
96 | 96 gần near
97 | 97 xây dựng build
98 | 98 tự self
99 | 99 đất earth
100 | 100 cha father
101 | 101 bất kỳ any
102 | 102 mới new
103 | 103 công việc work
104 | 104 một phần part
105 | 105 có take
106 | 106 được get
107 | 107 nơi place
108 | 108 thực hiện made
109 | 109 sống live
110 | 110 nơi where
111 | 111 sau khi after
112 | 112 trở lại back
113 | 113 ít little
114 | 114 chỉ only
115 | 115 chung quanh round
116 | 116 người đàn ông man
117 | 117 năm year
118 | 118 đến came
119 | 119 chương trình show
120 | 120 mỗi every
121 | 121 tốt good
122 | 122 tôi me
123 | 123 cung cấp cho give
124 | 124 của chúng tôi our
125 | 125 dưới under
126 | 126 tên name
127 | 127 rất very
128 | 128 thông qua through
129 | 129 chỉ just
130 | 130 hình thức form
131 | 131 câu sentence
132 | 132 tuyệt vời great
133 | 133 nghi think
134 | 134 nói say
135 | 135 giúp help
136 | 136 thấp low
137 | 137 dòng line
138 | 138 khác nhau differ
139 | 139 lần lượt turn
140 | 140 nguyên nhân cause
141 | 141 nhiều much
142 | 142 có nghĩa là mean
143 | 143 trước before
144 | 144 di chuyển move
145 | 145 ngay right
146 | 146 cậu bé boy
147 | 147 cũ old
148 | 148 quá too
149 | 149 như nhau same
150 | 150 cô she
151 | 151 tất cả all
152 | 152 có there
153 | 153 khi when
154 | 154 lên up
155 | 155 sử dụng use
156 | 156 của bạn your
157 | 157 cách way
158 | 158 về about
159 | 159 nhiều many
160 | 160 sau đó then
161 | 161 họ them
162 | 162 viết write
163 | 163 sẽ would
164 | 164 như like
165 | 165 để so
166 | 166 các these
167 | 167 cô her
168 | 168 lâu long
169 | 169 làm make
170 | 170 điều thing
171 | 171 thấy see
172 | 172 anh him
173 | 173 hai two
174 | 174 có has
175 | 175 xem look
176 | 176 hơn more
177 | 177 ngày day
178 | 178 có thể could
179 | 179 đi go
180 | 180 đến come
181 | 181 đã làm did
182 | 182 số number
183 | 183 âm thanh sound
184 | 184 không có no
185 | 185 nhất most
186 | 186 nhân dân people
187 | 187 của tôi my
188 | 188 hơn over
189 | 189 biết know
190 | 190 nước water
191 | 191 hơn than
192 | 192 gọi call
193 | 193 đầu tiên first
194 | 194 người who
195 | 195 có thể may
196 | 196 xuống down
197 | 197 bên side
198 | 198 được been
199 | 199 bây giờ now
200 | 200 tìm find
201 | 201 đầu head
202 | 202 đứng stand
203 | 203 riêng own
204 | 204 trang page
205 | 205 nên should
206 | 206 nước country
207 | 207 tìm thấy found
208 | 208 câu trả lời answer
209 | 209 trường school
210 | 210 phát triển grow
211 | 211 nghiên cứu study
212 | 212 vẫn still
213 | 213 học learn
214 | 214 nhà máy plant
215 | 215 bìa cover
216 | 216 thực phẩm food
217 | 217 ánh nắng mặt trời sun
218 | 218 bốn four
219 | 219 giữa between
220 | 220 nhà nước state
221 | 221 giữ keep
222 | 222 mắt eye
223 | 223 không bao giờ never
224 | 224 cuối cùng last
225 | 225 cho phép let
226 | 226 nghĩ thought
227 | 227 thành phố city
228 | 228 cây tree
229 | 229 qua cross
230 | 230 trang trại farm
231 | 231 cứng hard
232 | 232 bắt đầu start
233 | 233 might might
234 | 234 câu chuyện story
235 | 235 cưa saw
236 | 236 đến nay far
237 | 237 biển sea
238 | 238 vẽ draw
239 | 239 còn lại left
240 | 240 cuối late
241 | 241 chạy run
242 | 242 không don’t
243 | 243 trong khi while
244 | 244 báo chí press
245 | 245 gần close
246 | 246 đêm night
247 | 247 thực real
248 | 248 cuộc sống life
249 | 249 số few
250 | 250 phía bắc north
251 | 251 cuốn sách book
252 | 252 thực hiện carry
253 | 253 mất took
254 | 254 khoa học science
255 | 255 ăn eat
256 | 256 phòng room
257 | 257 người bạn friend
258 | 258 bắt đầu began
259 | 259 ý tưởng idea
260 | 260 cá fish
261 | 261 núi mountain
262 | 262 ngăn chặn stop
263 | 263 một lần once
264 | 264 cơ sở base
265 | 265 nghe hear
266 | 266 ngựa horse
267 | 267 cắt cut
268 | 268 chắc chắn sure
269 | 269 xem watch
270 | 270 màu color
271 | 271 khuôn mặt face
272 | 272 gỗ wood
273 | 273 chính main
274 | 274 mở open
275 | 275 dường như seem
276 | 276 cùng together
277 | 277 tiếp theo next
278 | 278 trắng white
279 | 279 trẻ em children
280 | 280 bắt đầu begin
281 | 281 có got
282 | 282 đi bộ walk
283 | 283 Ví dụ example
284 | 284 giảm bớt ease
285 | 285 giấy paper
286 | 286 nhóm group
287 | 287 luôn luôn always
288 | 288 nhạc music
289 | 289 những those
290 | 290 cả hai both
291 | 291 đánh dấu mark
292 | 292 thường often
293 | 293 thư letter
294 | 294 cho đến khi until
295 | 295 dặm mile
296 | 296 sông river
297 | 297 xe car
298 | 298 chân feet
299 | 299 chăm sóc care
300 | 300 thứ hai second
301 | 301 đủ enough
302 | 302 đồng bằng plain
303 | 303 cô gái girl
304 | 304 thông thường usual
305 | 305 trẻ young
306 | 306 sẵn sàng ready
307 | 307 trên đây above
308 | 308 bao giờ ever
309 | 309 màu đỏ red
310 | 310 danh sách list
311 | 311 mặc dù though
312 | 312 cảm thấy feel
313 | 313 nói chuyện talk
314 | 314 chim bird
315 | 315 sớm soon
316 | 316 cơ thể body
317 | 317 con chó dog
318 | 318 gia đình family
319 | 319 trực tiếp direct
320 | 320 đặt ra pose
321 | 321 lại leave
322 | 322 bài hát song
323 | 323 đo lường measure
324 | 324 cửa door
325 | 325 sản phẩm product
326 | 326 đen black
327 | 327 ngắn short
328 | 328 chữ số numeral
329 | 329 lớp class
330 | 330 gió wind
331 | 331 câu hỏi question
332 | 332 xảy ra happen
333 | 333 hoàn thành complete
334 | 334 tàu ship
335 | 335 khu vực area
336 | 336 một nửa half
337 | 337 đá rock
338 | 338 để order
339 | 339 lửa fire
340 | 340 nam south
341 | 341 vấn đề problem
342 | 342 mảnh piece
343 | 343 nói told
344 | 344 biết knew
345 | 345 vượt qua pass
346 | 346 từ since
347 | 347 đầu top
348 | 348 toàn bộ whole
349 | 349 vua king
350 | 350 đường phố street
351 | 351 inch inch
352 | 352 nhân multiply
353 | 353 không có gì nothing
354 | 354 Tất nhiên course
355 | 355 ở lại stay
356 | 356 bánh xe wheel
357 | 357 đầy đủ full
358 | 358 lực force
359 | 359 màu xanh blue
360 | 360 đối tượng object
361 | 361 quyết định decide
362 | 362 bề mặt surface
363 | 363 sâu deep
364 | 364 mặt trăng moon
365 | 365 đảo island
366 | 366 chân foot
367 | 367 hệ thống system
368 | 368 bận rộn busy
369 | 369 kiểm tra test
370 | 370 ghi record
371 | 371 thuyền boat
372 | 372 phổ biến common
373 | 373 vàng gold
374 | 374 có thể possible
375 | 375 máy bay plane
376 | 376 thay stead
377 | 377 khô dry
378 | 378 tự hỏi wonder
379 | 379 cười laugh
380 | 380 ngàn thousand
381 | 381 trước ago
382 | 382 ran ran
383 | 383 kiểm tra check
384 | 384 trò chơi game
385 | 385 hình dạng shape
386 | 386 đánh đồng equate
387 | 387 nóng hot
388 | 388 bỏ lỡ miss
389 | 389 mang brought
390 | 390 nhiệt heat
391 | 391 tuyết snow
392 | 392 lốp xe tire
393 | 393 mang lại bring
394 | 394 vâng yes
395 | 395 xa distant
396 | 396 điền fill
397 | 397 đông east
398 | 398 sơn paint
399 | 399 ngôn ngữ language
400 | 400 trong among
401 | 401 đơn vị unit
402 | 402 điện power
403 | 403 thị trấn town
404 | 404 tốt fine
405 | 405 nhất định certain
406 | 406 bay fly
407 | 407 giảm fall
408 | 408 dẫn lead
409 | 409 kêu cry
410 | 410 tối dark
411 | 411 máy machine
412 | 412 ghi note
413 | 413 đợi wait
414 | 414 kế hoạch plan
415 | 415 con số figure
416 | 416 sao star
417 | 417 hộp box
418 | 418 danh từ noun
419 | 419 lĩnh vực field
420 | 420 phần còn lại rest
421 | 421 chính xác correct
422 | 422 thể able
423 | 423 bảng pound
424 | 424 Xong done
425 | 425 vẻ đẹp beauty
426 | 426 ổ đĩa drive
427 | 427 đứng stood
428 | 428 chứa contain
429 | 429 trước front
430 | 430 dạy teach
431 | 431 tuần week
432 | 432 thức final
433 | 433 đã gave
434 | 434 màu xanh lá cây green
435 | 435 oh oh
436 | 436 nhanh chóng quick
437 | 437 phát triển develop
438 | 438 đại dương ocean
439 | 439 ấm áp warm
440 | 440 miễn phí free
441 | 441 phút minute
442 | 442 mạnh mẽ strong
443 | 443 đặc biệt special
444 | 444 tâm mind
445 | 445 sau behind
446 | 446 trong clear
447 | 447 đuôi tail
448 | 448 sản xuất produce
449 | 449 thực tế fact
450 | 450 không gian space
451 | 451 nghe heard
452 | 452 tốt nhất best
453 | 453 giờ hour
454 | 454 tốt hơn better
455 | 455 đúng true
456 | 456 trong khi during
457 | 457 trăm hundred
458 | 458 năm five
459 | 459 nhớ remember
460 | 460 bước step
461 | 461 đầu early
462 | 462 giư hold
463 | 463 tây west
464 | 464 mặt đất ground
465 | 465 quan tâm interest
466 | 466 đạt reach
467 | 467 nhanh chóng fast
468 | 468 động từ verb
469 | 469 hát sing
470 | 470 lắng nghe listen
471 | 471 sáu six
472 | 472 bảng table
473 | 473 du lịch travel
474 | 474 ít less
475 | 475 buổi sáng morning
476 | 476 mười ten
477 | 477 đơn giản simple
478 | 478 nhiều several
479 | 479 nguyên âm vowel
480 | 480 hướng toward
481 | 481 chiến tranh war
482 | 482 đặt lay
483 | 483 chống lại against
484 | 484 mô hình pattern
485 | 485 chậm slow
486 | 486 trung tâm center
487 | 487 tình yêu love
488 | 488 người person
489 | 489 tiền money
490 | 490 phục vụ serve
491 | 491 xuất hiện appear
492 | 492 đường road
493 | 493 Bản đồ map
494 | 494 mưa rain
495 | 495 quy tắc rule
496 | 496 phối govern
497 | 497 kéo pull
498 | 498 lạnh cold
499 | 499 thông báo notice
500 | 500 giọng nói voice
501 | 501 năng lượng energy
502 | 502 săn hunt
503 | 503 có thể xảy ra probable
504 | 504 giường bed
505 | 505 anh trai brother
506 | 506 trứng egg
507 | 507 đi xe ride
508 | 508 pin cell
509 | 509 tin believe
510 | 510 có lẽ perhaps
511 | 511 chọn pick
512 | 512 đột ngột sudden
513 | 513 tính count
514 | 514 vuông square
515 | 515 lý do reason
516 | 516 chiều dài length
517 | 517 đại diện represent
518 | 518 nghệ thuật art
519 | 519 Tiêu đề subject
520 | 520 khu region
521 | 521 kích thước size
522 | 522 khác nhau vary
523 | 523 giải quyết settle
524 | 524 nói speak
525 | 525 trọng lượng weight
526 | 526 chung general
527 | 527 băng ice
528 | 528 vấn đề matter
529 | 529 vòng tròn circle
530 | 530 đôi pair
531 | 531 bao gồm include
532 | 532 chia divide
533 | 533 âm tiết syllable
534 | 534 cảm thấy felt
535 | 535 lớn grand
536 | 536 bóng ball
537 | 537 nhưng yet
538 | 538 sóng wave
539 | 539 rơi drop
540 | 540 tim heart
541 | 541 là am
542 | 542 hiện nay present
543 | 543 nặng heavy
544 | 544 khiêu vũ dance
545 | 545 động cơ engine
546 | 546 vị trí position
547 | 547 cánh tay arm
548 | 548 rộng wide
549 | 549 buồm sail
550 | 550 tài liệu material
551 | 551 phần fraction
552 | 552 rừng forest
553 | 553 ngồi sit
554 | 554 cuộc đua race
555 | 555 cửa sổ window
556 | 556 cửa hàng store
557 | 557 mùa hè summer
558 | 558 đào tạo train
559 | 559 ngủ sleep
560 | 560 chứng minh prove
561 | 561 đơn độc lone
562 | 562 chân leg
563 | 563 tập thể dục exercise
564 | 564 tường wall
565 | 565 bắt catch
566 | 566 mount mount
567 | 567 muốn wish
568 | 568 bầu trời sky
569 | 569 hội đồng quản trị board
570 | 570 niềm vui joy
571 | 571 mùa đông winter
572 | 572 ngồi sat
573 | 573 bằng văn bản written
574 | 574 hoang dã wild
575 | 575 cụ instrument
576 | 576 giữ kept
577 | 577 kính glass
578 | 578 cỏ grass
579 | 579 bò cow
580 | 580 công việc job
581 | 581 cạnh edge
582 | 582 dấu hiệu sign
583 | 583 lần visit
584 | 584 qua past
585 | 585 mềm soft
586 | 586 vui vẻ fun
587 | 587 sáng bright
588 | 588 khí gas
589 | 589 thời tiết weather
590 | 590 tháng month
591 | 591 triệu million
592 | 592 chịu bear
593 | 593 kết thúc finish
594 | 594 hạnh phúc happy
595 | 595 hy vọng hope
596 | 596 hoa flower
597 | 597 mặc clothe
598 | 598 lạ strange
599 | 599 ra đi gone
600 | 600 thương mại trade
601 | 601 giai điệu melody
602 | 602 chuyến đi trip
603 | 603 văn phòng office
604 | 604 nhận receive
605 | 605 hàng row
606 | 606 miệng mouth
607 | 607 chính xác exact
608 | 608 biểu tượng symbol
609 | 609 chết die
610 | 610 nhất least
611 | 611 rắc rối trouble
612 | 612 hét lên shout
613 | 613 trừ except
614 | 614 đã viết wrote
615 | 615 hạt giống seed
616 | 616 giai điệu tone
617 | 617 tham gia join
618 | 618 đề nghị suggest
619 | 619 sạch clean
620 | 620 nghỉ break
621 | 621 phụ nữ lady
622 | 622 sân yard
623 | 623 tăng rise
624 | 624 xấu bad
625 | 625 đòn blow
626 | 626 dầu oil
627 | 627 máu blood
628 | 628 chạm touch
629 | 629 tăng grew
630 | 630 phần trăm cent
631 | 631 trộn mix
632 | 632 đội team
633 | 633 dây wire
634 | 634 chi phí cost
635 | 635 thua lost
636 | 636 nâu brown
637 | 637 mặc wear
638 | 638 vườn garden
639 | 639 như nhau equal
640 | 640 gửi sent
641 | 641 chọn choose
642 | 642 giảm fell
643 | 643 phù hợp với fit
644 | 644 chảy flow
645 | 645 công bằng fair
646 | 646 ngân hàng bank
647 | 647 thu thập collect
648 | 648 lưu save
649 | 649 kiểm soát control
650 | 650 số thập phân decimal
651 | 651 tai ear
652 | 652 khác else
653 | 653 khá quite
654 | 654 đã phá vỡ broke
655 | 655 khi case
656 | 656 trung middle
657 | 657 giết kill
658 | 658 con trai son
659 | 659 hồ lake
660 | 660 thời điểm moment
661 | 661 quy mô scale
662 | 662 lớn loud
663 | 663 mùa xuân spring
664 | 664 quan sát observe
665 | 665 con child
666 | 666 thẳng straight
667 | 667 phụ âm consonant
668 | 668 quốc gia nation
669 | 669 từ điển dictionary
670 | 670 sưa milk
671 | 671 tốc độ speed
672 | 672 phương pháp method
673 | 673 cơ quan organ
674 | 674 trả pay
675 | 675 tuổi age
676 | 676 phần section
677 | 677 váy dress
678 | 678 điện toán đám mây cloud
679 | 679 bất ngờ surprise
680 | 680 yên tĩnh quiet
681 | 681 đá stone
682 | 682 nhỏ tiny
683 | 683 lên cao climb
684 | 684 mát mẻ cool
685 | 685 thiết kế design
686 | 686 người nghèo poor
687 | 687 rất nhiều lot
688 | 688 thí nghiệm experiment
689 | 689 dưới bottom
690 | 690 chính key
691 | 691 sắt iron
692 | 692 đơn single
693 | 693 thanh stick
694 | 694 phẳng flat
695 | 695 hai mươi twenty
696 | 696 da skin
697 | 697 nụ cười smile
698 | 698 nếp crease
699 | 699 lỗ hole
700 | 700 nhảy jump
701 | 701 bé baby
702 | 702 tám eight
703 | 703 làng village
704 | 704 đáp ứng meet
705 | 705 gốc root
706 | 706 mua buy
707 | 707 nâng cao raise
708 | 708 giải quyết solve
709 | 709 kim loại metal
710 | 710 liệu whether
711 | 711 đẩy push
712 | 712 bảy seven
713 | 713 đoạn paragraph
714 | 714 thứ ba third
715 | 715 có trách nhiệm shall
716 | 716 được tổ chức held
717 | 717 lông hair
718 | 718 mô tả describe
719 | 719 nấu ăn cook
720 | 720 sàn floor
721 | 721 hoặc either
722 | 722 kết quả result
723 | 723 ghi burn
724 | 724 đồi hill
725 | 725 an toàn safe
726 | 726 mèo cat
727 | 727 thế kỷ century
728 | 728 xem xét consider
729 | 729 loại type
730 | 730 pháp luật law
731 | 731 bit bit
732 | 732 bờ biển coast
733 | 733 bản sao copy
734 | 734 cụm từ phrase
735 | 735 im lặng silent
736 | 736 cao tall
737 | 737 cát sand
738 | 738 đất soil
739 | 739 cuộn roll
740 | 740 nhiệt độ temperature
741 | 741 ngón tay finger
742 | 742 ngành công nghiệp industry
743 | 743 giá trị value
744 | 744 cuộc chiến fight
745 | 745 lời nói dối lie
746 | 746 đánh bại beat
747 | 747 kích thích excite
748 | 748 tự nhiên natural
749 | 749 xem view
750 | 750 ý nghĩa sense
751 | 751 vốn capital
752 | 752 sẽ không won’t
753 | 753 ghế chair
754 | 754 nguy hiểm danger
755 | 755 trái cây fruit
756 | 756 giàu rich
757 | 757 dày thick
758 | 758 người lính soldier
759 | 759 quá trình process
760 | 760 hoạt động operate
761 | 761 thực hành practice
762 | 762 riêng biệt separate
763 | 763 khó khăn difficult
764 | 764 bác sĩ doctor
765 | 765 xin vui lòng please
766 | 766 bảo vệ protect
767 | 767 trưa noon
768 | 768 cây trồng crop
769 | 769 hiện đại modern
770 | 770 yếu tố element
771 | 771 nhấn hit
772 | 772 sinh viên student
773 | 773 góc corner
774 | 774 bên party
775 | 775 cung cấp supply
776 | 776 có whose
777 | 777 xác định vị trí locate
778 | 778 vòng ring
779 | 779 nhân vật character
780 | 780 côn trùng insect
781 | 781 bắt caught
782 | 782 thời gian period
783 | 783 chỉ ra indicate
784 | 784 radio radio
785 | 785 nói spoke
786 | 786 nguyên tử atom
787 | 787 con người human
788 | 788 lịch sử history
789 | 789 hiệu lực effect
790 | 790 điện electric
791 | 791 mong đợi expect
792 | 792 xương bone
793 | 793 đường sắt rail
794 | 794 tưởng tượng imagine
795 | 795 cho provide
796 | 796 đồng ý agree
797 | 797 do đó thus
798 | 798 nhẹ nhàng gentle
799 | 799 người phụ nữ woman
800 | 800 đội trưởng captain
801 | 801 đoán guess
802 | 802 cần thiết necessary
803 | 803 sắc nét sharp
804 | 804 cánh wing
805 | 805 tạo create
806 | 806 hàng xóm neighbor
807 | 807 rửa wash
808 | 808 bat bat
809 | 809 thay rather
810 | 810 đám đông crowd
811 | 811 ngô corn
812 | 812 so sánh compare
813 | 813 bài thơ poem
814 | 814 chuỗi string
815 | 815 chuông bell
816 | 816 phụ thuộc depend
817 | 817 thịt meat
818 | 818 chà rub
819 | 819 ống tube
820 | 820 nổi tiếng famous
821 | 921 đồng đô la dollar
822 | 822 sông stream
823 | 823 sợ hãi fear
824 | 284 cảnh sight
825 | 825 mỏng thin
826 | 826 tam giác triangle
827 | 827 hành tinh planet
828 | 828 nhanh hurry
829 | 829 trưởng chief
830 | 830 thuộc địa colony
831 | 831 đồng hồ clock
832 | 832 tôi mine
833 | 833 cà vạt tie
834 | 834 nhập enter
835 | 835 chính major
836 | 836 tươi fresh
837 | 837 tìm kiếm search
838 | 838 gửi send
839 | 839 vàng yellow
840 | 840 súng gun
841 | 841 cho phép allow
842 | 842 in print
843 | 843 chết dead
844 | 844 tại chỗ spot
845 | 845 sa mạc desert
846 | 846 phù hợp với suit
847 | 847 hiện tại current
848 | 848 thang máy lift
849 | 840 tăng rose
850 | 850 đến arrive
851 | 851 chủ master
852 | 852 theo dõi track
853 | 853 mẹ parent
854 | 854 bờ shore
855 | 855 phân chia division
856 | 856 tờ sheet
857 | 857 chất substance
858 | 858 ủng hộ favor
859 | 859 kết nối connect
860 | 860 bài post
861 | 861 chi tiêu spend
862 | 862 hợp âm chord
863 | 863 chất béo fat
864 | 864 vui glad
865 | 865 ban đầu original
866 | 866 chia sẻ share
867 | 867 trạm station
868 | 868 cha dad
869 | 869 bánh mì bread
870 | 870 phí charge
871 | 871 thích hợp proper
872 | 872 thanh bar
873 | 873 phục vụ offer
874 | 874 phân khúc segment
875 | 875 nô lệ slave
876 | 876 vịt duck
877 | 877 ngay lập tức instant
878 | 878 thị trường market
879 | 879 mức độ degree
880 | 880 cư populate
881 | 881 gà chick
882 | 882 thân yêu dear
883 | 883 kẻ thù enemy
884 | 884 trả lời reply
885 | 885 ly drink
886 | 886 xảy ra occur
887 | 887 hỗ trợ support
888 | 888 bài phát biểu speech
889 | 889 thiên nhiên nature
890 | 890 phạm vi range
891 | 891 hơi nước steam
892 | 892 chuyển động motion
893 | 893 con đường path
894 | 894 chất lỏng liquid
895 | 895 đăng nhập log
896 | 896 có nghĩa là meant
897 | 897 thương quotient
898 | 898 răng teeth
899 | 899 vỏ shell
900 | 900 cổ neck
901 | 901 oxy oxygen
902 | 902 đường sugar
903 | 903 chết death
904 | 904 khá pretty
905 | 905 kỹ năng skill
906 | 906 phụ nữ women
907 | 907 mùa season
908 | 908 giải pháp solution
909 | 909 nam châm magnet
910 | 910 bạc silver
911 | 911 cảm ơn thank
912 | 912 chi nhánh branch
913 | 913 trận đấu match
914 | 914 hậu tố suffix
915 | 915 đặc biệt là especially
916 | 916 sung fig
917 | 917 sợ afraid
918 | 918 to huge
919 | 919 em gái sister
920 | 920 thép steel
921 | 921 thảo luận discuss
922 | 922 về phía trước forward
923 | 923 tương tự similar
924 | 924 hướng dẫn guide
925 | 925 kinh nghiệm experience
926 | 926 điểm score
927 | 927 táo apple
928 | 928 mua bought
929 | 929 dẫn led
930 | 930 sân pitch
931 | 931 áo coat
932 | 932 khối lượng mass
933 | 933 thẻ card
934 | 934 ban nhạc band
935 | 935 dây rope
936 | 936 trượt slip
937 | 937 giành chiến thắng win
938 | 938 mơ dream
939 | 939 buổi tối evening
940 | 940 điều kiện condition
941 | 941 thức ăn chăn nuôi feed
942 | 942 công cụ tool
943 | 943 tổng số total
944 | 944 cơ bản basic
945 | 945 mùi smell
946 | 946 thung lũng valley
947 | 947 cũng không nor
948 | 948 đôi double
949 | 949 ghế seat
950 | 950 tiếp tục continue
951 | 951 khối block
952 | 952 biểu đồ chart
953 | 953 mũ hat
954 | 954 bán sell
955 | 955 thành công success
956 | 956 công ty company
957 | 957 trừ subtract
958 | 958 sự kiện event
959 | 959 riêng particular
960 | 960 thỏa thuận deal
961 | 961 bơi swim
962 | 962 hạn term
963 | 963 ngược lại opposite
964 | 964 vợ wife
965 | 965 giày shoe
966 | 966 vai shoulder
967 | 967 lây lan spread
968 | 968 sắp xếp arrange
969 | 969 trại camp
970 | 970 phát minh invent
971 | 971 bông cotton
972 | 972 Sinh born
973 | 973 xác định determine
974 | 974 lít quart
975 | 975 chín nine
976 | 976 xe tải truck
977 | 977 tiếng ồn noise
978 | 978 mức level
979 | 979 cơ hội chance
980 | 980 thu thập gather
981 | 981 cửa hàng shop
982 | 982 căng ra stretch
983 | 983 ném throw
984 | 984 tỏa sáng shine
985 | 985 tài sản property
986 | 986 cột column
987 | 987 phân tử molecule
988 | 988 chọn select
989 | 989 sai wrong
990 | 990 màu xám gray
991 | 991 lặp lại repeat
992 | 992 yêu cầu require
993 | 993 rộng broad
994 | 994 chuẩn bị prepare
995 | 995 muối salt
996 | 996 mui nose
997 | 997 số nhiều plural
998 | 998 tức giận anger
999 | 999 xin claim
1000 | 1000 lục continent
--------------------------------------------------------------------------------
/named-name-recognition/arabic-names.txt:
--------------------------------------------------------------------------------
1 | abbad
2 | abbas
3 | abd
4 | al-uzza
5 | abdus
6 | salam
7 | manaf
8 | rabbo
9 | abdel
10 | fattah
11 | nour
12 | abdi
13 | abdolreza
14 | abdu
15 | abdul
16 | ahad
17 | ali
18 | alim
19 | azim
20 | al-aziz
21 | baqi
22 | bari
23 | basir
24 | basit
25 | ghaffar
26 | ghani
27 | hadi
28 | hafiz
29 | hai
30 | hakim
31 | halim
32 | hamid
33 | haq
34 | hussein
35 | jabbar
36 | jalil
37 | jamil
38 | karim
39 | khaliq
40 | latif
41 | majid
42 | malik
43 | mannan
44 | monem
45 | muttalib
46 | qadir
47 | qayyum
48 | quddus
49 | rashid
50 | samad
51 | sattar
52 | wadud
53 | wahhab
54 | wahid
55 | zahir
56 | zahra
57 | abdullah
58 | abdur
59 | rab
60 | rahim
61 | al-rahman
62 | raqib
63 | rauf
64 | razzaq
65 | sabur
66 | shakur
67 | abid
68 | abidin
69 | abo
70 | ab
71 | abu
72 | al-qasim
73 | bakr
74 | hafs
75 | hamza
76 | nasir
77 | nasr
78 | abu'l-fadl
79 | adam
80 | adeel
81 | adeem
82 | adem
83 | aden
84 | adham
85 | adib
86 | adil
87 | adir
88 | adli
89 | adnan
90 | afif
91 | ahmad
92 | ahmed
93 | tijani
94 | ahsan
95 | akeem
96 | akif
97 | akram
98 | alaa
99 | aladdin
100 | naqi
101 | reza
102 | aman
103 | amanullah
104 | amer
105 | amin
106 | al-din
107 | aminullah
108 | amir
109 | amjad
110 | ammar
111 | amr
112 | anas
113 | anis
114 | anisur
115 | rahman
116 | anjem
117 | anwar
118 | anwaruddin
119 | aqeel
120 | ari
121 | arif
122 | asad
123 | asadullah
124 | asem
125 | asghar
126 | ashraf
127 | asif
128 | asil
129 | islam
130 | ataullah
131 | atif
132 | atiq
133 | atiqullah
134 | awad
135 | ayad
136 | ayman/aiman/aimen/aymen
137 | ayub
138 | azem
139 | azeem
140 | azhar
141 | azimullah
142 | aziz
143 | azizullah
144 | azizur
145 | azmi
146 | azus
147 | badi
148 | badr
149 | bagher
150 | baha
151 | baha'
152 | bahri
153 | baki
154 | bakir
155 | bara
156 | barkat
157 | barkatullah
158 | bashar
159 | bashir
160 | basri
161 | bilal
162 | bilel
163 | billah
164 | boualem
165 | boulos
166 | boutros
167 | brahim
168 | burhan
169 | caden
170 | chadli
171 | daniel/danyal
172 | dastgir
173 | daud
174 | dawoud
175 | dhikrullah
176 | ehsanullah
177 | ekram
178 | fadel
179 | fahd
180 | faheem
181 | fahmi
182 | fahri
183 | faisal
184 | faiz
185 | faizan
186 | faizullah
187 | fakhr
188 | fakhraddin
189 | fakhruddin
190 | faqir
191 | faraj
192 | farhat
193 | farid
194 | fariduddin
195 | faris
196 | farooq
197 | fasih
198 | fathallah
199 | fathi
200 | fatin
201 | fawaz
202 | fawzi
203 | fayez
204 | fazel
205 | fazl
206 | ur
207 | fazlallah
208 | fazli
209 | fazlul
210 | fikri
211 | fouzan
212 | fouad
213 | fuad
214 | furkan
215 | gaffar
216 | gamil
217 | ghanem
218 | ghassan
219 | ghiyath
220 | ghulam
221 | faruq
222 | mohiuddin
223 | gulzar
224 | habib
225 | habibullah
226 | hadem
227 | hadid
228 | hafeez
229 | hafizullah
230 | haitham
231 | hajj
232 | hajji
233 | hakam
234 | haldun
235 | hamdan
236 | hamdi
237 | hamidullah
238 | hani
239 | harbi
240 | hanif
241 | harun
242 | hashem
243 | hashim
244 | hasib
245 | hassan
246 | hassim
247 | hatem
248 | hayatullah
249 | haydar
250 | hazem
251 | hibat
252 | allah
253 | hichem
254 | hidayatullah
255 | hikmat
256 | hilmi
257 | hisham
258 | ud-din
259 | hossam
260 | hurairah
261 | husam
262 | ad-din
263 | ibrahim
264 | ibro
265 | idris
266 | ihab
267 | ihsan
268 | ikhtiyar
269 | ikramullah
270 | ikrimah
271 | ilyas
272 | imad
273 | imran
274 | imtiaz
275 | inaam
276 | inam-ul-haq
277 | inayatullah
278 | iqbal
279 | irfan
280 | isa
281 | ishak
282 | issam
283 | ishtiaq
284 | iskandar
285 | ismail
286 | ismat
287 | ismatullah
288 | izz
289 | izzat
290 | izzatullah
291 | ja'far
292 | jabal
293 | jaber
294 | jabir
295 | jabr
296 | jahid
297 | jalal
298 | jamal
299 | jameel
300 | jarrah
301 | jasem
302 | jawad
303 | jawdat
304 | jihad
305 | jubayr
306 | junayd
307 | jurj
308 | min
309 | ka'b
310 | kadeem
311 | kadir
312 | kadri
313 | kafeel
314 | kamal
315 | kamil
316 | karem
317 | kashif
318 | kazem
319 | khadem
320 | khair
321 | khakim
322 | khalfan
323 | khalid
324 | khalifah
325 | khalil
326 | khalil-ur-rehman
327 | khamis
328 | kulthum
329 | labib
330 | lalji
331 | luay
332 | lutfullah
333 | lutfur
334 | mahalati
335 | mahbubur
336 | mahdi
337 | mahfuz
338 | mahir
339 | mahmud
340 | mamdouh
341 | mansur
342 | manzur
343 | marwan
344 | mashallah
345 | masoud
346 | maytham
347 | mehdi
348 | melhem
349 | michel
350 | midhat
351 | mizanur
352 | moatassem
353 | moeen
354 | moemen
355 | mohammad
356 | taqi
357 | mohannad
358 | mohy
359 | moin
360 | moinuddin
361 | mojtaba
362 | moncef
363 | moneim
364 | mu'iz
365 | mua'dh
366 | muammer
367 | mubarak
368 | muhammad
369 | muharrem
370 | muhibullah
371 | muhsin
372 | mukhtar
373 | mumtaz
374 | munib
375 | munif
376 | munir
377 | murad
378 | murtaza
379 | musa
380 | muslim
381 | mustafa
382 | muzaffar
383 | nabih
384 | nabil
385 | nadeem
386 | nader
387 | nadir
388 | nadur
389 | naguib
390 | nahyan
391 | naif
392 | naim
393 | naji
394 | najib
395 | najibullah
396 | najim
397 | najm
398 | naqibullah
399 | naseeb
400 | naseer
401 | nasim
402 | nasrallah
403 | nasri
404 | nasser
405 | nassif
406 | nasuh
407 | nawaf
408 | nawaz
409 | nowfal
410 | nazif
411 | nazim
412 | nazimuddin
413 | nazmi
414 | nihad
415 | nimatullah
416 | nizam
417 | nu'man
418 | numan
419 | nuh
420 | nur
421 | nuri
422 | nurullah
423 | nusrat
424 | omar
425 | osama
426 | othman
427 | qaid
428 | qamar
429 | qasim
430 | qasymbek
431 | qudratullah
432 | qusay
433 | qutb
434 | rabih
435 | raed
436 | rafiq
437 | rahmatullah
438 | rahmi
439 | rajab
440 | rajaei
441 | raji
442 | ramiz
443 | ramzan
444 | rakibul
445 | rakib
446 | ramzi
447 | rashad
448 | rasul
449 | rayan
450 | redouane
451 | riad
452 | riaz
453 | ridwan
454 | rifat
455 | rizqallah
456 | ruhi
457 | ruhullah
458 | rukn
459 | rushdi
460 | sa'd
461 | seif
462 | ilislam
463 | sa‘id
464 | saud
465 | saad
466 | saadallah
467 | sabri
468 | sabah
469 | saddam
470 | sadik
471 | sadr
472 | safi
473 | safiullah
474 | sahir
475 | saif
476 | saifullah
477 | saifur
478 | sajid
479 | sajjad
480 | salah
481 | saleh
482 | salem
483 | salim
484 | salman
485 | samadu
486 | samee
487 | samer
488 | sami
489 | samir
490 | samirah
491 | samiullah
492 | sanaullah
493 | saqib
494 | sardar
495 | sarmad
496 | satam
497 | sayf
498 | sayyid
499 | shaban
500 | shad
501 | shafiq
502 | shafiqullah
503 | shahid
504 | shahrukh
505 | shakeel
506 | shakir
507 | shams
508 | shamsur
509 | sharaf
510 | sharifullah
511 | shawkat
512 | shawki
513 | shiraz
514 | shihab
515 | shujauddin
516 | shukri
517 | sidique
518 | sidqi
519 | sirajuddin
520 | suhail
521 | suleiman
522 | sultan
523 | shaheed
524 | taha
525 | taher
526 | tahmid
527 | tahsin
528 | talal
529 | talat
530 | talhah
531 | talib
532 | tarazi
533 | tariq
534 | tawfik
535 | tayeb
536 | tayfur
537 | tufail
538 | turki
539 | talin
540 | ubay
541 | ubayd
542 | uday
543 | uthman
544 | usama
545 | wadih
546 | wael
547 | wafi
548 | wahed
549 | wajdi
550 | wajid
551 | waleed
552 | waliullah
553 | wasim
554 | wazir
555 | wissem
556 | yacine
557 | yadollah
558 | yahya
559 | yakub
560 | yahir
561 | yasser
562 | yunus
563 | yusha
564 | yusuf
565 | zafar
566 | zafarullah
567 | zafer
568 | zahed
569 | zaid
570 | zaim
571 | zainal
572 | zakariya
573 | zaki
574 | zane
575 | zayn
576 | zeeshan
577 | ziad
578 | ziauddin
579 | ziaur
580 | zubayr
581 | abeer
582 | abiha
583 | adela
584 | afaf
585 | afreen
586 | aisha
587 | aliya
588 | alya
589 | amalia
590 | amina
591 | amira
592 | arwa
593 | ashraqat
594 | ashfa
595 | asma
596 | atikah
597 | aya
598 | azra
599 | aziza
600 | boutheina
601 | bushra
602 | besma
603 | chaima
604 | dalal
605 | dalia
606 | danielle
607 | dana
608 | dareen
609 | dina
610 | eliana
611 | esma
612 | eva
613 | farah
614 | farida
615 | fatima
616 | feyrouz
617 | fouz
618 | habiba
619 | hafsa
620 | hajra
621 | hala
622 | halima
623 | hamida
624 | hana
625 | hanifa
626 | havva
627 | hawa
628 | hayat
629 | hessa
630 | huda
631 | ibtisam
632 | ireen
633 | jamila
634 | jana
635 | jawahir
636 | jena
637 | jennifer
638 | jessica
639 | joelle
640 | joud
641 | jouri
642 | julia
643 | jumana
644 | kareena
645 | karima
646 | katya
647 | khadija
648 | khairunnisa
649 | khawlah
650 | lama
651 | lamia
652 | lana
653 | lara
654 | latifa
655 | layan
656 | leila
657 | lina
658 | lulwa
659 | madiha
660 | maha
661 | mahmuna
662 | mai
663 | malika
664 | maria
665 | marwa
666 | maryam
667 | maya
668 | maysoon
669 | melek
670 | melissa
671 | mila
672 | mira
673 | mirna
674 | mona
675 | munira
676 | nadia
677 | nadine
678 | nafisa
679 | nahla
680 | naila
681 | naima
682 | najat
683 | najma
684 | nalini
685 | nasrin
686 | natasha
687 | nathalie
688 | nawal
689 | nayla
690 | naziha
691 | nazira
692 | nehal
693 | nejla
694 | nermin
695 | nezha
696 | nina
697 | qistina
698 | rabia
699 | rahima
700 | rana
701 | rania
702 | rashida
703 | reem
704 | riffat
705 | rimas
706 | rita
707 | ruqayya
708 | saadia
709 | sabiha
710 | subiksha
711 | safaa
712 | safiya
713 | sahar
714 | saida
715 | saira
716 | sajida
717 | sakhra
718 | sakina
719 | salma
720 | samar
721 | samira
722 | samiya
723 | sandra
724 | sania
725 | sarah
726 | selma
727 | shahd
728 | shakira
729 | shatha
730 | sherine
731 | shumaila
732 | suha
733 | sumaya
734 | sonia
735 | soraida
736 | tahira
737 | tala
738 | tamara
739 | tanisha
740 | tanya
741 | tara
742 | taslima
743 | tina
744 | tuba
745 | umm
746 | wafaa
747 | waliya
748 | wajahat
749 | wajhat
750 | yamina
751 | yara
752 | yasmin
753 | zalayha
754 | zakiya
755 | zaynab
756 | zuhal
757 | zoraida
758 | amal
759 | chadi
760 | farrah
761 | laden
762 | nakia
763 | noor
764 | nair
765 | zain
766 | zia
767 | zial
--------------------------------------------------------------------------------
/named-name-recognition/french-names.txt:
--------------------------------------------------------------------------------
1 | Gabriel
2 | Léo
3 | Raphaël
4 | Arthur
5 | Louis
6 | Lucas
7 | Adam
8 | Jules
9 | Hugo
10 | Maël
11 | Liam
12 | Noah
13 | Paul
14 | Ethan
15 | Tiago
16 | Sacha
17 | Gabin
18 | Nathan
19 | Mohamed
20 | Aaron
21 | Tom
22 | Eden
23 | Théo
24 | Noé
25 | Léon
26 | Martin
27 | Mathis
28 | Nolan
29 | Victor
30 | Timéo
31 | Enzo
32 | Marius
33 | Axel
34 | Antoine
35 | Robin
36 | Isaac
37 | Naël
38 | Amir
39 | Valentin
40 | Rayan
41 | Augustin
42 | Ayden
43 | Clément
44 | Eliott
45 | Samuel
46 | Marceau
47 | Baptiste
48 | Gaspard
49 | Maxence
50 | Yanis
51 | Malo
52 | Ibrahim
53 | Sohan
54 | Maxime
55 | Evan
56 | Nino
57 | Mathéo
58 | Simon
59 | Lyam
60 | Alexandre
61 | Imran
62 | Naïm
63 | Kaïs
64 | Camille
65 | Thomas
66 | Ismaël
67 | Milo
68 | Côme
69 | Owen
70 | Lenny
71 | Soan
72 | Ilyan
73 | Kylian
74 | Noa
75 | Ilyes
76 | Oscar
77 | Léandre
78 | Pablo
79 | Diego
80 | Mathys
81 | Joseph
82 | Ayoub
83 | Youssef
84 | Wassim
85 | Noam
86 | Adem
87 | William
88 | Ali
89 | Basile
90 | Charles
91 | Antonin
92 | Thiago
93 | Logan
94 | Adrien
95 | Marin
96 | Jean
97 | Charly
98 | Esteban
99 | Noham
100 | Elio
101 | Charlie
102 | Auguste
103 | Timothée
104 | Alexis
105 | Milan
106 | David
107 | Issa
108 | Lorenzo
109 | Younes
110 | Amine
111 | Mahé
112 | Rafael
113 | Benjamin
114 | Kenzo
115 | Abel
116 | Anas
117 | Lucien
118 | Alessio
119 | Roméo
120 | Sandro
121 | Livio
122 | Mattéo
123 | Malone
124 | Rafaël
125 | Aymen
126 | Elias
127 | Louka
128 | Nathanaël
129 | Sasha
130 | Nahil
131 | Mathias
132 | Hamza
133 | Yassine
134 | Léonard
135 | Achille
136 | Pierre
137 | Ahmed
138 | Aylan
139 | Ewen
140 | Julian
141 | Ilan
142 | Ezio
143 | Thibault
144 | Timothé
145 | Zakaria
146 | Bastien
147 | Marcel
148 | Ruben
149 | Ulysse
150 | Maé
151 | Hayden
152 | Andrea
153 | Nassim
154 | Daniel
155 | Haroun
156 | Arsène
157 | Swan
158 | Mehdi
159 | Jayden
160 | Armand
161 | Alban
162 | Amaury
163 | Luka
164 | Matéo
165 | César
166 | Luca
167 | Eliot
168 | Omar
169 | Gianni
170 | Ilyas
171 | Moussa
172 | Idriss
173 | Dylan
174 | Mayron
175 | Joshua
176 | Kayden
177 | Nicolas
178 | Mohammed
179 | Félix
180 | Loan
181 | Corentin
182 | Erwan
183 | James
184 | Jonas
185 | Mylan
186 | Lino
187 | Gustave
188 | Swann
189 | Djibril
190 | Elliot
191 | Kassim
192 | Léandro
193 | Marcus
194 | Octave
195 | Iyad
196 | Ryan
197 | Quentin
198 | Bilal
199 | Ismaïl
200 | Anatole
201 | Hector
202 | Jad
203 | Loris
204 | Nahel
205 | Tyméo
206 | Léopold
207 | Milhan
208 | Samy
209 | Nael
210 | Tristan
211 | Matteo
212 | Kamil
213 | Emile
214 | Imrane
215 | Tao
216 | Marlon
217 | Zayn
218 | Elie
219 | Alex
220 | Elyo
221 | Henri
222 | Sofiane
223 | Sami
224 | Tim
225 | Loïs
226 | Marley
227 | Souleymane
228 | Souleyman
229 | Johan
230 | Titouan
231 | Anis
232 | Ismael
233 | Giulian
234 | Mathieu
235 | Lenzo
236 | Emmanuel
237 | Ange
238 | Gaël
239 | Issam
240 | Mamadou
241 | Ylan
242 | Emilio
243 | Eyden
244 | Cameron
245 | Edouard
246 | Ernest
247 | Lilian
248 | Julien
249 | Gauthier
250 | Romain
251 | Warren
252 | Ezra
253 | Iyed
254 | Nolhan
255 | Leandro
256 | Yann
257 | Célestin
258 | Gaston
259 | Solal
260 | Théodore
261 | Aubin
262 | Émile
263 | Luis
264 | Malik
265 | Andréa
266 | Elijah
267 | Justin
268 | Max
269 | Yacine
270 | Leo
271 | Tylio
272 | Morgan
273 | Aloïs
274 | Lisandro
275 | Éden
276 | Marwan
277 | Lukas
278 | Rayane
279 | Khalil
280 | Sam
281 | Élio
282 | Emir
283 | Tino
284 | Paolo
285 | Téo
286 | Joachim
287 | Aksel
288 | Georges
289 | Jassim
290 | Naïl
291 | Tony
292 | Adrian
293 | Wyatt
294 | Bryan
295 | Milann
296 | Nils
297 | Zayd
298 | Adel
299 | Raphael
300 | Louison
301 | Yusuf
302 | Jordan
303 | Elyas
304 | Lewis
305 | Qassim
306 | Jessim
307 | Lohan
308 | Aïden
309 | Soren
310 | Salim
311 | Youssouf
312 | Angelo
313 | Abdallah
314 | Marc
315 | Alix
316 | Fares
317 | Isaïah
318 | Edgar
319 | Youcef
320 | Dorian
321 | Ilian
322 | Roman
323 | Tilio
324 | Anton
325 | Ilhan
326 | Illan
327 | Tyler
328 | Ilyès
329 | Leny
330 | Mael
331 | Matthew
332 | Célian
333 | Lou
334 | Néo
335 | Fabio
336 | Aydan
337 | Bilel
338 | Lény
339 | Zack
340 | Marvin
341 | Zacharie
342 | Éthan
343 | Thibaut
344 | Aurèle
345 | Enaël
346 | Manoé
347 | Ziyad
348 | Nolann
349 | Harry
350 | Paco
351 | Matthieu
352 | Ousmane
353 | Yazid
354 | Aron
355 | Yaël
356 | Aurélien
357 | Ishaq
358 | Joud
359 | Loïc
360 | Eymen
361 | Tyago
362 | Ambroise
363 | Anthony
364 | Malonn
365 | Walid
366 | Chahine
367 | Ibrahima
368 | Loup
369 | Selim
370 | Ayan
371 | Dario
372 | Élie
373 | Ismail
374 | Farès
375 | Riyad
376 | Abdoulaye
377 | Tobias
378 | Lubin
379 | Nathaël
380 | Andy
381 | Jason
382 | Yahya
383 | Milàn
384 | Wael
385 | Thyméo
386 | Adil
387 | Vadim
388 | Wesley
389 | Idris
390 | Nohan
391 | Rémi
392 | Waël
393 | Muhammad
394 | Zakariya
395 | Mylann
396 | Siméon
397 | Evann
398 | Gaëtan
399 | Karim
400 | Miran
401 | Oumar
402 | Jaden
403 | Mateo
404 | Giovanni
405 | Ishak
406 | Jibril
407 | Loukas
408 | Mouhamed
409 | Melvin
410 | Soën
411 | Younès
412 | Alan
413 | Angel
414 | Dimitri
415 | Elouan
416 | Kevin
417 | Télio
418 | Amjad
419 | Stan
420 | Andrew
421 | Yasser
422 | Juliann
423 | Eli
424 | Hadrien
425 | Joan
426 | Ugo
427 | Yassin
428 | Tayron
429 | Théophile
430 | Alessandro
431 | Kelyan
432 | Clovis
433 | Kyllian
434 | Soulayman
435 | Aboubacar
436 | Albin
437 | Paulin
438 | Soann
439 | Etienne
440 | Pharell
441 | Jacques
442 | Nelson
443 | Thiméo
444 | Marco
445 | Imrân
446 | Kenan
447 | Haron
448 | Jacob
449 | Mohamed-Amine
450 | Zakarya
451 | Enes
452 | Florian
453 | Hassan
454 | Ayman
455 | Hippolyte
456 | Lilio
457 | Ylann
458 | Amadou
459 | Isaiah
460 | Ziad
461 | Adonis
462 | Colin
463 | Jonathan
464 | Lissandro
465 | Milhane
466 | Stanislas
467 | Louca
468 | Mickaël
469 | Mahdi
470 | Matis
471 | Ayaz
472 | Ewenn
473 | Islem
474 | Manoa
475 | Damian
476 | Joris
477 | Lysandre
478 | Miguel
479 | Olivier
480 | Damien
481 | Maëlan
482 | Matthias
483 | Balthazar
484 | Elliott
485 | Grégoire
486 | Guillaume
487 | Lyham
488 | Aymeric
489 | François
490 | Kyle
491 | Maximilien
492 | Pacôme
493 | Lowen
494 | Vincent
495 | Cayden
496 | Louay
497 | Safwan
498 | Ilann
499 | Luc
500 | Tommy
501 | Emma
502 | Jade
503 | Louise
504 | Alice
505 | Lina
506 | Chloé
507 | Rose
508 | Léa
509 | Mila
510 | Ambre
511 | Mia
512 | Anna
513 | Julia
514 | Inès
515 | Léna
516 | Juliette
517 | Zoé
518 | Manon
519 | Agathe
520 | Lou
521 | Lola
522 | Camille
523 | Nina
524 | Jeanne
525 | Inaya
526 | Romy
527 | Eva
528 | Romane
529 | Léonie
530 | Iris
531 | Lucie
532 | Luna
533 | Adèle
534 | Sarah
535 | Louna
536 | Charlotte
537 | Margaux
538 | Olivia
539 | Sofia
540 | Charlie
541 | Victoria
542 | Victoire
543 | Nour
544 | Margot
545 | Mya
546 | Giulia
547 | Clémence
548 | Alix
549 | Aya
550 | Clara
551 | Elena
552 | Capucine
553 | Lana
554 | Lya
555 | Lyna
556 | Lyana
557 | Théa
558 | Léana
559 | Anaïs
560 | Gabrielle
561 | Emy
562 | Yasmine
563 | Mathilde
564 | Maëlys
565 | Alicia
566 | Lilou
567 | Apolline
568 | Roxane
569 | Lise
570 | Assia
571 | Elise
572 | Lily
573 | Maria
574 | Maya
575 | Valentine
576 | Héloïse
577 | Marie
578 | Elsa
579 | Noémie
580 | Lisa
581 | Lila
582 | Alya
583 | Thaïs
584 | Ilyana
585 | Célia
586 | Candice
587 | Livia
588 | Zélie
589 | Salomé
590 | Constance
591 | Soline
592 | Emmy
593 | Maëlle
594 | Eléna
595 | Maryam
596 | Amélia
597 | Joy
598 | Océane
599 | Maïssa
600 | Arya
601 | Andréa
602 | Hanna
603 | Mélina
604 | Manel
605 | Joséphine
606 | Ella
607 | Céleste
608 | Myla
609 | Garance
610 | Faustine
611 | Sara
612 | Alma
613 | Aliyah
614 | Calie
615 | Ava
616 | Pauline
617 | Louisa
618 | Emna
619 | Diane
620 | Ines
621 | Elisa
622 | Nora
623 | Suzanne
624 | Amira
625 | Sophia
626 | Jana
627 | Aïcha
628 | Jenna
629 | Lison
630 | Louane
631 | Anaé
632 | Amina
633 | Elya
634 | Kenza
635 | Aria
636 | Ninon
637 | Eloïse
638 | Jennah
639 | Fatima
640 | Laura
641 | Nelya
642 | Albane
643 | Ana
644 | Charline
645 | Eden
646 | Elina
647 | Dina
648 | Justine
649 | Anaëlle
650 | Lara
651 | Élise
652 | Cléa
653 | Selma
654 | Liya
655 | Aliya
656 | Stella
657 | Lilia
658 | Coline
659 | Célestine
660 | Eléonore
661 | Jasmine
662 | Naya
663 | Camélia
664 | Neyla
665 | Leïla
666 | Khadija
667 | Mariam
668 | Léane
669 | Lilya
670 | Julie
671 | Éléonore
672 | Maëlya
673 | Cataleya
674 | Layana
675 | Maddy
676 | Fatoumata
677 | Cassandre
678 | Myriam
679 | Hana
680 | Sophie
681 | Ellie
682 | Augustine
683 | Éléna
684 | Méline
685 | Eline
686 | Alba
687 | Naomi
688 | Norah
689 | Naëlle
690 | Sirine
691 | Mélissa
692 | Judith
693 | Salma
694 | Leyna
695 | Louison
696 | Asma
697 | Kamila
698 | Marwa
699 | Carla
700 | Lia
701 | Aminata
702 | Anouk
703 | Alia
704 | Mayssa
705 | Assya
706 | Anastasia
707 | Lucy
708 | Maëva
709 | Raphaëlle
710 | Elia
711 | Safiya
712 | Elyna
713 | Emilie
714 | Tessa
715 | Chiara
716 | Noor
717 | Madeleine
718 | Axelle
719 | Liyah
720 | Sasha
721 | Tasnim
722 | Miya
723 | Alyssa
724 | Aurore
725 | Nélia
726 | Daphné
727 | Eléa
728 | Naïla
729 | Hortense
730 | Valentina
731 | Elyne
732 | Nola
733 | Lily-Rose
734 | Nayla
735 | Clémentine
736 | Lena
737 | Ayline
738 | Hafsa
739 | Lyanna
740 | Rachel
741 | Tess
742 | Kiara
743 | Nélya
744 | Line
745 | Soumaya
746 | Kayla
747 | Joyce
748 | Janna
749 | Jannah
750 | Maïwenn
751 | Mina
752 | Lylou
753 | Naomie
754 | Hannah
755 | Hanaé
756 | Clarisse
757 | Hawa
758 | Liana
759 | Noélie
760 | Chelsea
761 | Olympe
762 | Violette
763 | Sana
764 | Liv
765 | Maïna
766 | Talia
767 | Éline
768 | Lahna
769 | Mélia
770 | Cassie
771 | Aaliyah
772 | Aliénor
773 | Cloé
774 | Lucile
775 | Maïa
776 | Angèle
777 | Naïa
778 | Malak
779 | Enora
780 | Imane
781 | Maxine
782 | Yuna
783 | Esther
784 | Cléo
785 | Éva
786 | Amalia
787 | Ariane
788 | Bérénice
789 | Emilia
790 | Lyah
791 | Malia
792 | Izia
793 | Milla
794 | June
795 | Kélya
796 | Meryem
797 | Serena
798 | Blanche
799 | Paloma
800 | Lexie
801 | Lilas
802 | Brune
803 | Thelma
804 | Marion
805 | Paola
806 | Ashley
807 | Camila
808 | Wendy
809 | Elyana
810 | Mona
811 | Pia
812 | Alessia
813 | Diana
814 | Éloïse
815 | Abby
816 | Alizée
817 | Isaure
818 | Morgane
819 | Rosalie
820 | Amy
821 | Elif
822 | Shanna
823 | Flora
824 | Assiya
825 | Élina
826 | Safia
827 | Maëline
828 | Malya
829 | Marilou
830 | Maëlyne
831 | Thalia
832 | Héléna
833 | Ilona
834 | Solène
835 | Alexia
836 | Lyne
837 | Gabriella
838 | Anissa
839 | Claire
840 | Hidaya
841 | Talya
842 | Athénaïs
843 | Séléna
844 | Tasnime
845 | Amélie
846 | Mïa
847 | Aline
848 | Isra
849 | Amel
850 | Esma
851 | Safa
852 | Noa
853 | Awa
854 | Evy
855 | Ayla
856 | Dania
857 | Lucia
858 | Leïa
859 | Léonore
860 | Neïla
861 | Billie
862 | Eliana
863 | Lylia
864 | Anaya
865 | Éléa
866 | Luce
867 | Castille
868 | Laya
869 | Lili
870 | Philippine
871 | Tesnim
872 | Assil
873 | Farah
874 | Mira
875 | Amandine
876 | Estelle
877 | Johanna
878 | Mélya
879 | Shana
880 | Alyah
881 | Kelly
882 | Athéna
883 | Selena
884 | Bianca
885 | Lyla
886 | Séréna
887 | Alexandra
888 | Esmée
889 | Abigaëlle
890 | Juliana
891 | Sélène
892 | Yousra
893 | Gabriela
894 | Ema
895 | Amaya
896 | April
897 | Cassandra
898 | Elisabeth
899 | Halima
900 | Leya
901 | Mariama
902 | Rania
903 | Astrid
904 | Émilie
905 | Maddie
906 | Roxanne
907 | Leyla
908 | Hajar
909 | Hélèna
910 | Israa
911 | Sibylle
912 | Tiana
913 | Ania
914 | Gaïa
915 | Helena
916 | Romie
917 | Alycia
918 | Anaë
919 | Céline
920 | Maëlie
921 | Mélyna
922 | Maé
923 | Maëly
924 | Irina
925 | Nawel
926 | Leïna
927 | Kataleya
928 | Melissa
929 | Téa
930 | Alana
931 | Eléanore
932 | Meriem
933 | Amelia
934 | Fanny
935 | Kyara
936 | Annaëlle
937 | Azra
938 | Maëlia
939 | Melina
940 | Ela
941 | Eugénie
942 | Isis
943 | Lydia
944 | Maïmouna
945 | Zahra
946 | Kessy
947 | Mélyne
948 | Prune
949 | Hind
950 | Rita
951 | Tina
952 | Yaëlle
953 | Zeynep
954 | Aylin
955 | Rebecca
956 | Sidra
957 | Fanta
958 | Angelina
959 | Carmen
960 | Élisa
961 | Mellina
962 | Ariana
963 | Octavia
964 | Alina
965 | Enola
966 | Marylou
967 | Soraya
968 | Syrine
969 | Aicha
970 | Émy
971 | Anya
972 | Ayana
973 | Giuliana
974 | Gloria
975 | Ivy
976 | Kelya
977 | Mayline
978 | Adélaïde
979 | Aïna
980 | Imany
981 | Lilly
982 | Colette
983 | Juline
984 | Callie
985 | Andrea
986 | Ayna
987 | Catalina
988 | Élia
989 | Yara
990 | Sienna
991 | Oumou
992 | Aïsha
993 | Layna
994 | Rosie
995 | Sanaa
996 | Zaynab
997 | Fatou
998 | Leila
999 | Milana
1000 | Sixtine
--------------------------------------------------------------------------------
/named-name-recognition/named-name-recognition.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 1,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import spacy\n",
10 | "import pathlib\n",
11 | "import pandas as pd\n",
12 | "from memo import grid, memlist"
13 | ]
14 | },
15 | {
16 | "cell_type": "code",
17 | "execution_count": 2,
18 | "metadata": {},
19 | "outputs": [
20 | {
21 | "data": {
22 | "application/vnd.jupyter.widget-view+json": {
23 | "model_id": "d525cac2f1984c6095bac273b39b2dc1",
24 | "version_major": 2,
25 | "version_minor": 0
26 | },
27 | "text/plain": [
28 | "Output()"
29 | ]
30 | },
31 | "metadata": {},
32 | "output_type": "display_data"
33 | }
34 | ],
35 | "source": [
36 | "data = [] \n",
37 | "\n",
38 | "@memlist(data=data)\n",
39 | "def run_experiment(mod, datafile, cap=False):\n",
40 | " # load in spacy model \n",
41 | " nlp = spacy.load(mod)\n",
42 | " \n",
43 | " # load in names\n",
44 | " names = pathlib.Path(datafile).read_text().split(\"\\n\")\n",
45 | " \n",
46 | " # try to detect the names using simple \"je m'appelle\"\n",
47 | " ent_types = [d[-1].ent_type_ for d in nlp.pipe(f\"Bonjour je m'appelle {n.capitalize() if cap else n}\" for n in names)]\n",
48 | " results_df = pd.DataFrame({\"name\": names, \"ent_type\": ent_types}).groupby(\"ent_type\").count()\n",
49 | " return dict(results_df['name'])\n",
50 | "\n",
51 | "for setting in grid(datafile=['arabic-names.txt', 'french-names.txt'],\n",
52 | " mod=['fr_core_news_sm', 'fr_core_news_md'],\n",
53 | " cap=[True, False]):\n",
54 | " run_experiment(**setting)"
55 | ]
56 | },
57 | {
58 | "cell_type": "code",
59 | "execution_count": 26,
60 | "metadata": {},
61 | "outputs": [],
62 | "source": [
63 | "def clean_data(dataf):\n",
64 | " return (dataf\n",
65 | " .assign(NO_ENT=lambda d: d[''])\n",
66 | " .drop(columns=[''])\n",
67 | " .fillna(0)\n",
68 | " .assign(total=lambda d: d[['LOC', 'MISC', 'ORG', 'PER', 'NO_ENT']].sum(axis=1))\n",
69 | " .assign(p_person=lambda d: d['PER']/d['total'])\n",
70 | " .assign(p_none=lambda d: d['NO_ENT']/d['total']))\n",
71 | "\n",
72 | "# pd.DataFrame(data).assign(NO_ENT=lambda d: d['']).drop(columns=['']).fillna(0)"
73 | ]
74 | },
75 | {
76 | "cell_type": "code",
77 | "execution_count": 27,
78 | "metadata": {},
79 | "outputs": [
80 | {
81 | "data": {
82 | "text/html": [
83 | "
\n",
84 | "\n",
97 | "
\n",
98 | " \n",
99 | " \n",
100 | " | \n",
101 | " datafile | \n",
102 | " mod | \n",
103 | " cap | \n",
104 | " LOC | \n",
105 | " MISC | \n",
106 | " ORG | \n",
107 | " PER | \n",
108 | " NO_ENT | \n",
109 | " total | \n",
110 | " p_person | \n",
111 | " p_none | \n",
112 | "
\n",
113 | " \n",
114 | " \n",
115 | " \n",
116 | " 0 | \n",
117 | " arabic-names.txt | \n",
118 | " fr_core_news_sm | \n",
119 | " True | \n",
120 | " 327.0 | \n",
121 | " 37.0 | \n",
122 | " 33.0 | \n",
123 | " 355.0 | \n",
124 | " 15 | \n",
125 | " 767.0 | \n",
126 | " 0.462842 | \n",
127 | " 0.019557 | \n",
128 | "
\n",
129 | " \n",
130 | " 1 | \n",
131 | " arabic-names.txt | \n",
132 | " fr_core_news_sm | \n",
133 | " False | \n",
134 | " 0.0 | \n",
135 | " 0.0 | \n",
136 | " 0.0 | \n",
137 | " 0.0 | \n",
138 | " 767 | \n",
139 | " 767.0 | \n",
140 | " 0.000000 | \n",
141 | " 1.000000 | \n",
142 | "
\n",
143 | " \n",
144 | " 2 | \n",
145 | " arabic-names.txt | \n",
146 | " fr_core_news_md | \n",
147 | " True | \n",
148 | " 178.0 | \n",
149 | " 130.0 | \n",
150 | " 19.0 | \n",
151 | " 417.0 | \n",
152 | " 23 | \n",
153 | " 767.0 | \n",
154 | " 0.543677 | \n",
155 | " 0.029987 | \n",
156 | "
\n",
157 | " \n",
158 | " 3 | \n",
159 | " arabic-names.txt | \n",
160 | " fr_core_news_md | \n",
161 | " False | \n",
162 | " 0.0 | \n",
163 | " 0.0 | \n",
164 | " 0.0 | \n",
165 | " 0.0 | \n",
166 | " 767 | \n",
167 | " 767.0 | \n",
168 | " 0.000000 | \n",
169 | " 1.000000 | \n",
170 | "
\n",
171 | " \n",
172 | " 4 | \n",
173 | " french-names.txt | \n",
174 | " fr_core_news_sm | \n",
175 | " True | \n",
176 | " 423.0 | \n",
177 | " 51.0 | \n",
178 | " 16.0 | \n",
179 | " 507.0 | \n",
180 | " 3 | \n",
181 | " 1000.0 | \n",
182 | " 0.507000 | \n",
183 | " 0.003000 | \n",
184 | "
\n",
185 | " \n",
186 | " 5 | \n",
187 | " french-names.txt | \n",
188 | " fr_core_news_sm | \n",
189 | " False | \n",
190 | " 424.0 | \n",
191 | " 51.0 | \n",
192 | " 16.0 | \n",
193 | " 508.0 | \n",
194 | " 1 | \n",
195 | " 1000.0 | \n",
196 | " 0.508000 | \n",
197 | " 0.001000 | \n",
198 | "
\n",
199 | " \n",
200 | " 6 | \n",
201 | " french-names.txt | \n",
202 | " fr_core_news_md | \n",
203 | " True | \n",
204 | " 149.0 | \n",
205 | " 293.0 | \n",
206 | " 10.0 | \n",
207 | " 535.0 | \n",
208 | " 13 | \n",
209 | " 1000.0 | \n",
210 | " 0.535000 | \n",
211 | " 0.013000 | \n",
212 | "
\n",
213 | " \n",
214 | " 7 | \n",
215 | " french-names.txt | \n",
216 | " fr_core_news_md | \n",
217 | " False | \n",
218 | " 149.0 | \n",
219 | " 294.0 | \n",
220 | " 10.0 | \n",
221 | " 536.0 | \n",
222 | " 11 | \n",
223 | " 1000.0 | \n",
224 | " 0.536000 | \n",
225 | " 0.011000 | \n",
226 | "
\n",
227 | " \n",
228 | "
\n",
229 | "
"
230 | ],
231 | "text/plain": [
232 | " datafile mod cap LOC MISC ORG PER \\\n",
233 | "0 arabic-names.txt fr_core_news_sm True 327.0 37.0 33.0 355.0 \n",
234 | "1 arabic-names.txt fr_core_news_sm False 0.0 0.0 0.0 0.0 \n",
235 | "2 arabic-names.txt fr_core_news_md True 178.0 130.0 19.0 417.0 \n",
236 | "3 arabic-names.txt fr_core_news_md False 0.0 0.0 0.0 0.0 \n",
237 | "4 french-names.txt fr_core_news_sm True 423.0 51.0 16.0 507.0 \n",
238 | "5 french-names.txt fr_core_news_sm False 424.0 51.0 16.0 508.0 \n",
239 | "6 french-names.txt fr_core_news_md True 149.0 293.0 10.0 535.0 \n",
240 | "7 french-names.txt fr_core_news_md False 149.0 294.0 10.0 536.0 \n",
241 | "\n",
242 | " NO_ENT total p_person p_none \n",
243 | "0 15 767.0 0.462842 0.019557 \n",
244 | "1 767 767.0 0.000000 1.000000 \n",
245 | "2 23 767.0 0.543677 0.029987 \n",
246 | "3 767 767.0 0.000000 1.000000 \n",
247 | "4 3 1000.0 0.507000 0.003000 \n",
248 | "5 1 1000.0 0.508000 0.001000 \n",
249 | "6 13 1000.0 0.535000 0.013000 \n",
250 | "7 11 1000.0 0.536000 0.011000 "
251 | ]
252 | },
253 | "execution_count": 27,
254 | "metadata": {},
255 | "output_type": "execute_result"
256 | }
257 | ],
258 | "source": [
259 | "pd.DataFrame(data).pipe(clean_data)"
260 | ]
261 | }
262 | ],
263 | "metadata": {
264 | "kernelspec": {
265 | "display_name": "Python 3",
266 | "language": "python",
267 | "name": "python3"
268 | },
269 | "language_info": {
270 | "codemirror_mode": {
271 | "name": "ipython",
272 | "version": 3
273 | },
274 | "file_extension": ".py",
275 | "mimetype": "text/x-python",
276 | "name": "python",
277 | "nbconvert_exporter": "python",
278 | "pygments_lexer": "ipython3",
279 | "version": "3.7.7"
280 | }
281 | },
282 | "nbformat": 4,
283 | "nbformat_minor": 4
284 | }
285 |
--------------------------------------------------------------------------------
/toxic/toxicity.ipynb:
--------------------------------------------------------------------------------
1 | {
2 | "cells": [
3 | {
4 | "cell_type": "code",
5 | "execution_count": 12,
6 | "metadata": {},
7 | "outputs": [],
8 | "source": [
9 | "import time \n",
10 | "import pandas as pd\n",
11 | "from whatlies.language import BytePairLanguage, UniversalSentenceLanguage, SentenceTFMLanguage, CountVectorLanguage\n",
12 | "\n",
13 | "lang_use = UniversalSentenceLanguage(\"large\")\n",
14 | "lang_bp = BytePairLanguage(\"en\", dim=300, vs=200_000)\n",
15 | "lang_brt = SentenceTFMLanguage('distilbert-base-nli-stsb-mean-tokens')"
16 | ]
17 | },
18 | {
19 | "cell_type": "markdown",
20 | "metadata": {},
21 | "source": [
22 | "Let's load the dataset and make some utility functions to get it in the right format."
23 | ]
24 | },
25 | {
26 | "cell_type": "code",
27 | "execution_count": 13,
28 | "metadata": {},
29 | "outputs": [],
30 | "source": [
31 | "def clean_text(txt_col):\n",
32 | " return txt_col.str.replace(r'\\n', \" \")\n",
33 | "\n",
34 | "def to_train_df(dataf):\n",
35 | " dataf = dataf.copy() \n",
36 | " dataf['bad'] = dataf[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1)\n",
37 | " dataf['label'] = ['toxic' if b else 'fine' for b in dataf['bad'] != 0]\n",
38 | " dataf['text'] = clean_text(dataf['comment_text'])\n",
39 | " return dataf[['text', 'label']]\n",
40 | "\n",
41 | "df = pd.read_csv(\"toxicity-train.csv.zip\").replace({\"\\n\", \"\"})"
42 | ]
43 | },
44 | {
45 | "cell_type": "markdown",
46 | "metadata": {},
47 | "source": [
48 | "Next, let's make a proper train/test split."
49 | ]
50 | },
51 | {
52 | "cell_type": "code",
53 | "execution_count": 17,
54 | "metadata": {},
55 | "outputs": [],
56 | "source": [
57 | "from sklearn.metrics import classification_report\n",
58 | "from sklearn.pipeline import Pipeline\n",
59 | "from sklearn.linear_model import LogisticRegression\n",
60 | "from sklearn.svm import SVC\n",
61 | "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n",
62 | "from sklearn.model_selection import train_test_split\n",
63 | "\n",
64 | "train_df = df.pipe(to_train_df)[:10000]\n",
65 | "\n",
66 | "x_train, x_test, y_train, y_test = train_test_split(list(train_df['text']), train_df['label'])"
67 | ]
68 | },
69 | {
70 | "cell_type": "code",
71 | "execution_count": 28,
72 | "metadata": {},
73 | "outputs": [
74 | {
75 | "data": {
76 | "text/html": [
77 | "\n",
78 | "\n",
91 | "
\n",
92 | " \n",
93 | " \n",
94 | " | \n",
95 | " text | \n",
96 | "
\n",
97 | " \n",
98 | " label | \n",
99 | " | \n",
100 | "
\n",
101 | " \n",
102 | " \n",
103 | " \n",
104 | " fine | \n",
105 | " 8970 | \n",
106 | "
\n",
107 | " \n",
108 | " toxic | \n",
109 | " 1030 | \n",
110 | "
\n",
111 | " \n",
112 | "
\n",
113 | "
"
114 | ],
115 | "text/plain": [
116 | " text\n",
117 | "label \n",
118 | "fine 8970\n",
119 | "toxic 1030"
120 | ]
121 | },
122 | "execution_count": 28,
123 | "metadata": {},
124 | "output_type": "execute_result"
125 | }
126 | ],
127 | "source": [
128 | "train_df.groupby(\"label\").count()"
129 | ]
130 | },
131 | {
132 | "cell_type": "code",
133 | "execution_count": 29,
134 | "metadata": {},
135 | "outputs": [
136 | {
137 | "name": "stdout",
138 | "output_type": "stream",
139 | "text": [
140 | "report for ('CountVectorizer()', \"SVC(class_weight='balanced')\")\n",
141 | "train time: 9.83952283859253\n",
142 | "pred time: 3.0086185932159424\n",
143 | " precision recall f1-score support\n",
144 | "\n",
145 | " fine 0.97 0.79 0.87 2251\n",
146 | " toxic 0.30 0.82 0.44 249\n",
147 | "\n",
148 | " accuracy 0.79 2500\n",
149 | " macro avg 0.64 0.80 0.65 2500\n",
150 | "weighted avg 0.91 0.79 0.83 2500\n",
151 | "\n",
152 | "report for ('CountVectorizer()', \"LogisticRegression(class_weight='balanced', solver='liblinear')\")\n",
153 | "train time: 0.7045333385467529\n",
154 | "pred time: 0.1499195098876953\n",
155 | " precision recall f1-score support\n",
156 | "\n",
157 | " fine 0.97 0.97 0.97 2251\n",
158 | " toxic 0.70 0.70 0.70 249\n",
159 | "\n",
160 | " accuracy 0.94 2500\n",
161 | " macro avg 0.84 0.83 0.83 2500\n",
162 | "weighted avg 0.94 0.94 0.94 2500\n",
163 | "\n"
164 | ]
165 | }
166 | ],
167 | "source": [
168 | "for mod in [SVC(class_weight=\"balanced\"), LogisticRegression(solver='liblinear', class_weight=\"balanced\")]:\n",
169 | " pipe = Pipeline([\n",
170 | " (\"feat\", CountVectorizer()),\n",
171 | " (\"mod\", mod)\n",
172 | " ])\n",
173 | " tic = time.time()\n",
174 | " pipe.fit(list(x_train), y_train)\n",
175 | " toc = time.time() \n",
176 | " print(f\"report for {str(lang), str(mod)}\")\n",
177 | " print(f\"train time: {toc - tic}\")\n",
178 | " tic = time.time()\n",
179 | " y_pred = pipe.predict(x_test)\n",
180 | " toc = time.time()\n",
181 | " print(f\"pred time: {toc - tic}\")\n",
182 | " print(classification_report(y_test, y_pred))"
183 | ]
184 | },
185 | {
186 | "cell_type": "code",
187 | "execution_count": 31,
188 | "metadata": {},
189 | "outputs": [],
190 | "source": [
191 | "import pandas as pd \n",
192 | "df = pd.read_csv(\"results.csv\")"
193 | ]
194 | },
195 | {
196 | "cell_type": "code",
197 | "execution_count": 34,
198 | "metadata": {},
199 | "outputs": [
200 | {
201 | "data": {
202 | "text/html": [
203 | "\n",
204 | "\n",
217 | "
\n",
218 | " \n",
219 | " \n",
220 | " | \n",
221 | " lang | \n",
222 | " mod | \n",
223 | " precision | \n",
224 | " recall | \n",
225 | " pred-time | \n",
226 | " train-time | \n",
227 | "
\n",
228 | " \n",
229 | " \n",
230 | " \n",
231 | " 1 | \n",
232 | " CountVectorizer() | \n",
233 | " LogisticRegression() | \n",
234 | " 0.807229 | \n",
235 | " 0.538153 | \n",
236 | " 0.085694 | \n",
237 | " 0.882420 | \n",
238 | "
\n",
239 | " \n",
240 | " 0 | \n",
241 | " CountVectorizer() | \n",
242 | " SVC() | \n",
243 | " 1.000000 | \n",
244 | " 0.032129 | \n",
245 | " 1.634564 | \n",
246 | " 5.422204 | \n",
247 | "
\n",
248 | " \n",
249 | "
\n",
250 | "
"
251 | ],
252 | "text/plain": [
253 | " lang mod precision recall pred-time \\\n",
254 | "1 CountVectorizer() LogisticRegression() 0.807229 0.538153 0.085694 \n",
255 | "0 CountVectorizer() SVC() 1.000000 0.032129 1.634564 \n",
256 | "\n",
257 | " train-time \n",
258 | "1 0.882420 \n",
259 | "0 5.422204 "
260 | ]
261 | },
262 | "execution_count": 34,
263 | "metadata": {},
264 | "output_type": "execute_result"
265 | }
266 | ],
267 | "source": [
268 | "df.sort_values('precision')"
269 | ]
270 | },
271 | {
272 | "cell_type": "code",
273 | "execution_count": 42,
274 | "metadata": {},
275 | "outputs": [],
276 | "source": [
277 | "pipe = Pipeline([\n",
278 | " (\"feat\", lang_use),\n",
279 | " (\"mod\", LogisticRegression(solver='liblinear', class_weight=\"balanced\"))\n",
280 | "])\n",
281 | "\n",
282 | "pipe.fit(list(x_train), y_train)"
283 | ]
284 | },
285 | {
286 | "cell_type": "code",
287 | "execution_count": null,
288 | "metadata": {},
289 | "outputs": [],
290 | "source": [
291 | "pipe.predict([\"you're a bad ass!\"])"
292 | ]
293 | },
294 | {
295 | "cell_type": "code",
296 | "execution_count": null,
297 | "metadata": {},
298 | "outputs": [],
299 | "source": []
300 | }
301 | ],
302 | "metadata": {
303 | "kernelspec": {
304 | "display_name": "Python 3",
305 | "language": "python",
306 | "name": "python3"
307 | },
308 | "language_info": {
309 | "codemirror_mode": {
310 | "name": "ipython",
311 | "version": 3
312 | },
313 | "file_extension": ".py",
314 | "mimetype": "text/x-python",
315 | "name": "python",
316 | "nbconvert_exporter": "python",
317 | "pygments_lexer": "ipython3",
318 | "version": "3.8.5"
319 | }
320 | },
321 | "nbformat": 4,
322 | "nbformat_minor": 4
323 | }
324 |
--------------------------------------------------------------------------------
/toxic/toxicity.py:
--------------------------------------------------------------------------------
1 | import time
2 | import pandas as pd
3 | from sklearn.model_selection import train_test_split
4 |
5 | from whatlies.language import BytePairLanguage, UniversalSentenceLanguage, SentenceTFMLanguage, CountVectorLanguage
6 | from sklearn.metrics import classification_report
7 | from sklearn.pipeline import Pipeline
8 | from sklearn.linear_model import LogisticRegression
9 | from sklearn.svm import SVC
10 | from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
11 |
12 |
13 | df = pd.read_csv("toxicity-train.csv.zip").replace({"\n", ""})
14 |
15 | def clean_text(txt_col):
16 | return txt_col.str.replace(r'\n', " ")
17 |
18 | def to_train_df(dataf):
19 | dataf = dataf.copy()
20 | dataf['bad'] = dataf[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1)
21 | dataf['label'] = ['toxic' if b else 'fine' for b in dataf['bad'] != 0]
22 | dataf['text'] = clean_text(dataf['comment_text'])
23 | return dataf[['text', 'label']]
24 |
25 |
26 |
27 | train_df = df.pipe(to_train_df)[:20000]
28 | x_train, x_test, y_train, y_test = train_test_split(list(train_df['text']), train_df['label'])
29 |
30 |
31 | lang_use = UniversalSentenceLanguage("large")
32 | lang_bp = BytePairLanguage("en", dim=300, vs=200_000)
33 | lang_brt = SentenceTFMLanguage('distilbert-base-nli-stsb-mean-tokens')
34 |
35 |
36 | models = {}
37 | results = []
38 |
39 | def get_name(o):
40 | return o.__class__.__name__
41 |
42 | for lang in [CountVectorizer(), lang_bp, lang_use, lang_brt]:
43 | for mod in [SVC(class_weight='balanced'), LogisticRegression(solver='liblinear', class_weight='balanced')]:
44 | pipe = Pipeline([
45 | ("feat", lang),
46 | ("mod", mod)
47 | ])
48 | models[get_name(lang), get_name(mod)] = pipe
49 | tic = time.time()
50 | pipe.fit(list(x_train), y_train)
51 | toc = time.time()
52 | print(f"report for {get_name(lang), get_name(mod)}")
53 | train_time = toc - tic
54 | print(f"train time: {train_time}")
55 | tic = time.time()
56 | y_pred = pipe.predict(x_test)
57 | toc = time.time()
58 | print(f"pred time: {toc - tic}")
59 | d = classification_report(y_test, y_pred, output_dict=True)
60 | data = {
61 | 'lang': get_name(lang),
62 | 'mod': get_name(mod),
63 | 'precision': d['toxic']['precision'],
64 | 'recall': d['toxic']['recall'],
65 | 'pred-time': toc - tic,
66 | 'train-time': train_time
67 |
68 | }
69 | results.append(data)
70 | print(classification_report(y_test, y_pred))
71 |
72 | pd.DataFrame(results).to_csv("results.csv", index=False)
73 |
--------------------------------------------------------------------------------