├── .gitignore ├── LICENSE ├── README.md ├── analogies └── analogies.ipynb ├── bad-data-labels ├── bad-label-experiment.ipynb └── outofscope-intent-classification-dataset.csv ├── bias ├── 3d-projection.ipynb ├── debias.ipynb ├── female-words.txt └── male-words.txt ├── crf └── config.yml ├── diet ├── config-mega-basic.yml ├── config-orig.yml ├── diet-heavy.yml ├── diet-light.yml ├── diet-replace-mask.yml ├── diet-replace.yml ├── readme.md └── viewresults.py ├── glove ├── glove.py └── glove_variant_embeddings.ipynb ├── images ├── benchmarks.jpeg ├── bias.jpg ├── bulk-labelling-ui.jpeg ├── byte.jpg ├── confidence.jpeg ├── countv.jpg ├── debias-hard.jpg ├── debias-proj.jpg ├── fallback-detection.jpeg ├── flashtext.jpeg ├── incremental-training.jpeg ├── iterate-data.jpeg ├── labse.jpeg ├── language.jpeg ├── leven.jpg ├── lexical-ambiguity.jpeg ├── logo.png ├── remain-careful.jpg ├── resp1.jpg ├── resp2.jpg ├── subw-imp.jpg ├── subw.jpg ├── ted1.jpg ├── ted2.jpg ├── toxic-lang.png ├── vid-1.jpg ├── vid-10.jpg ├── vid-11.jpg ├── vid-12.jpg ├── vid-13.jpg ├── vid-2.jpg ├── vid-3.jpg ├── vid-4.jpg ├── vid-5.jpg ├── vid-6.jpg ├── vid-7.jpg ├── vid-8.jpg └── word-analogies.jpg ├── intent-benchmark ├── benchmark.py ├── intent-benchmark.ipynb └── n-ft-use-train-experiment.jsonl ├── language ├── ar.tsv ├── de.tsv ├── en.tsv ├── eo.tsv ├── es.tsv ├── language-detection-models.ipynb ├── lid.176.ftz ├── lt.tsv ├── nl.tsv ├── pt.tsv ├── tr.tsv └── vi.tsv ├── letter-embeddings ├── algo_whiteboard_letter_embeddings_v1.ipynb └── algo_whiteboard_letter_embeddings_v2.ipynb ├── named-name-recognition ├── arabic-names.txt ├── french-names.txt └── named-name-recognition.ipynb ├── toxic ├── toxicity.ipynb └── toxicity.py └── whatlies └── whatlies.ipynb /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .DS_Store -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Algorithm Whiteboard Resources 2 | 3 | 4 | 5 | 6 | 7 | This is where we share notebooks and projects used in our [youtube channel](https://www.youtube.com/watch?v=wWNMST6t1TA&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb). 8 | 9 | 10 | 11 | 12 | 13 | ## Video 1: [DIET Architecture - How it Works](https://www.youtube.com/watch?v=vWStcJDuOUk&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb) 14 | 15 | This video explains the parts of the DIET architecture. It does not discuss any code. 16 | 17 | 18 | 19 | 20 | 21 | ## Video 2: [DIET Architecture - Design Decisions](https://www.youtube.com/watch?v=KUGGuJ0aTL8&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb) 22 | 23 | This video explains the parts of the DIET architecture. It does not discuss any code. 24 | 25 | 26 | 27 | 28 | 29 | ## Video 3: [DIET Architecture - Benchmarks](https://www.youtube.com/watch?v=oj5oPGDlep4&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb) 30 | 31 | In this video we make changes to a configuration file. The configuration files, the streamlit application as well as an instructions manual can be found in the `diet` folder. 32 | 33 | 34 | 35 | 36 | 37 | ## Video 4: [Word Embeddings - Letter Embeddings](https://www.youtube.com/watch?v=mWvnlVw_LiY&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb) 38 | 39 | In this video we demonstrate how to train letter embeddings in order to gain intuition on what word embeddings are. 40 | 41 | The kaggle dataset that we use in this video can be found [here](https://www.kaggle.com/therohk/million-headlines). 42 | 43 | We've added the two notebooks in this repo in the `letter-embeddings` folder. But you can also run them yourself in google colab. The notebooks are mostly identical but the `v1` notebook only uses one token to predict the next one while `v2` uses two tokens to predict the next one. 44 | 45 | Notebook with one token input: 46 | 47 | Notebook with two token input: 48 | 49 | 50 | 51 | 52 | 53 | 54 | ## Video 5: [Word Embeddings - CBOW & SkipGram](https://www.youtube.com/watch?v=BWaHLmG1lak&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=6) 55 | 56 | 57 | This video explains two algorithms but it does not discuss any code. 58 | 59 |
60 | 61 | 62 | 63 | 64 | 65 | ## Video 6: [Word Embeddings - GloVe](https://www.youtube.com/watch?v=BWaHLmG1lak&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=7) 66 | 67 | This video discusses GloVe but also offers code to train a variant of your own. The keras model can be found in the `glove` folder. 68 | 69 | The `glove.py` file contains just the keras algorithm while the notebook 70 | contains the full code. You can also go online to colab 71 | and play with the full notebook from there. 72 | 73 | The full notebook: 74 | 75 | 76 | 77 | 78 | 79 | ## Video 7: [Word Embeddings - WhatLies](https://www.youtube.com/watch?v=FwkwC7IJWO0&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=9) 80 | 81 | This video discusses a small visualisation package we've open sourced. The documentation for it can be found [here](https://rasahq.github.io/whatlies/). 82 | 83 | The notebook that we made in this video can be found in the `whatlies` folder. 84 | 85 | 86 | 87 | 88 | 89 | ## Video 8: [Attention - Self Attention](https://www.youtube.com/watch?v=yGTUuEx3GkA&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=9) 90 | 91 | This video discusses the idea behind attention (you may notice some similarities 92 | with a convolution) but it does not discuss any code. 93 | 94 | 95 | 96 | 97 | 98 | 99 | ## Video 9: [Attention: Keys, Values, Queries](https://www.youtube.com/watch?v=tIvKXrEDMhk&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=11) 100 | 101 | This video discusses how you can add more context to the self attention mechanism by introducing layers. This video does not discuss any code though. 102 | 103 | 104 | 105 | 106 | 107 | ## Video 10: [Attention: Multi Head Attention](https://www.youtube.com/watch?v=23XUv0T9L5c&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=12) 108 | 109 | This video explains how you can increase the potential of attention by introducing multiple layers of keys, queries and values. The video does not discuss any code though. 110 | 111 | 112 | 113 | 114 | 115 | ## Video 11: [Attention: Transformers](https://www.youtube.com/watch?v=EXNBy8G43MM&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=13) 116 | 117 | Given the lessons from the previous videos, this video wraps everything together by 118 | combining everything into a transformer block. There is no code for this video. 119 | 120 | 121 | 122 | 123 | 124 | 125 | ## Video 12: [StarSpace](https://www.youtube.com/watch?v=ZT3_9Kjx7oI&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=14) 126 | 127 | This video discusses the [StarSpace](https://arxiv.org/abs/1709.03856) algorithm. The 128 | video serves as an introduction to the TED policy. This video contains no code. 129 | 130 |
131 | 132 | 133 | 134 | 135 | 136 | ## Video 13: [TED Policy](https://www.youtube.com/watch?v=j90NvurJI4I&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=14) 137 | 138 | This video only discusses the theory behind the TED algorithm. The next video will 139 | show how TED more on a practical level. This video contains no code. 140 | 141 | 142 | 143 | 144 | 145 | ## Video 14: [TED Policy in Practice](https://www.youtube.com/watch?v=d8JMJMvErSg&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=15) 146 | 147 | This video makes use of a rasa project that can be found [here](https://github.com/RasaHQ/rasa-ted-demo). By tuning the `history` hyperparameter we see how the chatbot is able 148 | to deal with context switches over a long period in the dialogue. 149 | 150 | 151 | 152 | 153 | 154 | ## Video 15: [Response Selection](https://www.youtube.com/watch?v=2jvyWngHEJM&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=16) 155 | 156 | This video explains how a response selection model might make your model more accurate in a FAQ/Chitchat scenario. There is no code for this video. 157 | 158 | 159 | 160 | 161 | 162 | ## Video 16: [Response Selection](https://www.youtube.com/watch?v=0tXkFScW0hE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=17) 163 | 164 | This video explains how a response selection model is implemented internally. There is no code for this video. 165 | 166 |
167 | 168 | 169 | 170 | 171 | 172 | ## Video 17: [CountVectors](https://www.youtube.com/watch?v=Ju7l5ADg10U&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=18) 173 | 174 | This video explains why CountVectors are still the unsung hero of natural language processing. There is no code attachment for this video. 175 | 176 |
177 | 178 | 179 | 180 | 181 | 182 | ## Video 18: [Subword Embeddings](https://www.youtube.com/watch?v=kNw9dpzp5RU&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=19) 183 | 184 | This video tries to combine the ideas from word embeddings with the idea of countvectors. To reproduce, check out [whatlies](https://rasahq.github.io/whatlies/). 185 | 186 | 187 | 188 | 189 | 190 | ## Video 19: [Subword Implementation](https://www.youtube.com/watch?v=8D3Gamk1Jig&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=20) 191 | 192 | This video explains how you might implement subword embeddings from a neural network design perspective. There is no code for this video. 193 | 194 | 195 | 196 | 197 | 198 | ## Video 20: [BytePair Embeddings](https://www.youtube.com/watch?v=-0IjF-7OB3s&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=21) 199 | 200 | This video explains how BytePair embeddings work. If you want to use these embeddings in Rasa please check out [rasa-nlu-examples](https://rasahq.github.io/rasa-nlu-examples/). 201 | 202 | 203 | 204 | 205 | 206 | ## Video 21: [Levenshtein Vectors](https://www.youtube.com/watch?v=e9JdIKgf0QY&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=22) 207 | 208 | This video explains how count vector mights be turned from sparse into dense layers. While doing this, we also learn that these vectors also encode levensthein distance. 209 | 210 | 211 | 212 | 213 | 214 | ## Video 22: [Bias in Word Embeddings](https://www.youtube.com/watch?v=UwAvyACOrWs&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=23) 215 | 216 | This video explains how you might measure gender bias in word embeddings. It's part of a larger series and the code for it can be found in the `bias` folder of this repository. 217 | 218 | 219 | 220 | 221 | 222 | ## Video 23: [De-Biasing Projections](https://www.youtube.com/watch?v=8xQbWlCEHRw&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=24) 223 | 224 | There's a lot of research on how we might remove bias from word-embeddings. In this video we'll discuss one such technique. For the code, check the `bias` folder of this repository. 225 | 226 | 227 | 228 | 229 | 230 | ## Video 24: [Remain Careful with Debiasing](https://www.youtube.com/watch?v=MHdAd48dANo&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=25) 231 | 232 | In this video we explain why de-biasing techniques have limits. For the code, check the `bias` folder of this repository. 233 | 234 | 235 | 236 | 237 | 238 | ## Video 25: [Why Debiasing is Hard](https://www.youtube.com/watch?v=2ROP1QFKsqc&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=26) 239 | 240 | In this video we explain why de-biasing techniques have limits. For the code, check the `bias` folder of this repository. 241 | 242 | 243 | 244 | 245 | 246 | ## Video 26: [Word Analogies](https://www.youtube.com/watch?v=u6EmngzBUEU&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=27) 247 | 248 | In this video we explain why "word analogies" don't really work by merely applying arithmetic on word-vectors. For the code, check the `analogies` folder of this repository. 249 | 250 | 251 | 252 | 253 | 254 | ## Video 27: [Toxic Language](https://www.youtube.com/watch?v=Czto6GzJah8&feature=youtu.be&ab_channel=Rasa) 255 | 256 | In this video we explain why detecting toxic language is harder than it might seem. Code for the video can be found in the `toxic` folder in this repository. 257 | 258 | 259 | 260 | 261 | 262 | ## Video 28: [Lexical Ambiguity](https://www.youtube.com/watch?v=byy19WPLPBQ&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=34) 263 | 264 | In this video we explain why detecting, in general, NLP models fall short. Models don't *really* understand language, they merely model it. 265 | 266 | 267 | 268 | 269 | 270 | ## Video 29: [Fallback Detection](https://www.youtube.com/watch?v=VldHznqAYlE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=35) 271 | 272 | It's important to understand the limits of our models. They can sometimes tell us when they're uncertain about a prediction and this information should not be ignored. 273 | 274 | 275 | 276 | 277 | 278 | ## Video 30: [Language Detection](https://www.youtube.com/watch?v=Z0pnQcWHBZE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=36) 279 | 280 | What might an assistant do if it sees a text from a language that it isn't trained on? It might make assumptions because it's unlike anything it has seen before and a standard fallback mechanism might not be able to pick it up. 281 | 282 | For the code, check the `language` folder of this repository. 283 | 284 | 285 | 286 | 287 | 288 | ## Video 31: [Incremental Training](https://www.youtube.com/watch?v=FipRjQRaCz8&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=37) 289 | 290 | Sometimes we don't need to completely retrain our algorithms. At times we can just finetune on new data. In this video we explain how that might be done with DIET. 291 | 292 | 293 | 294 | 295 | 296 | ## Video 31: [Bulk Labelling UI](https://www.youtube.com/watch?v=T0dDetqgra4&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=38) 297 | 298 | This video demonstrates a new feature in our bulk labelling demo. The code can be found [here](https://github.com/RasaHQ/rasalit/tree/main/notebooks/bulk-labelling). 299 | 300 | 301 | 302 | 303 | 304 | ## Video 32: [Language Agnostic BERT (LaBSE)](https://www.youtube.com/watch?v=7tAWk_Coj-s&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=39) 305 | 306 | In this episode, I'll discuss how you might tweak the standard BERT model to accommodate multiple languages at the same time. We'll also demonstrate a pre-trained model that you can use right away! If you're interested in the paper, you can find it [here](https://arxiv.org/abs/2007.01852). 307 | 308 | 309 | 310 | 311 | 312 | ## Video 33: [Iterate on Data](https://www.youtube.com/watch?v=xpm17ibm0E8&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=40) 313 | 314 | Instead of debugging a model, it might be *much* more effective to consider debugging your data. In this video, we'll discuss some techniques that you can start with while also demonstrating some new features in Rasa X. 315 | 316 | 317 | 318 | 319 | 320 | ## Video 34: [Meaningful Benchmarks](https://www.youtube.com/watch?v=GTClb8RQSGM&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=41) 321 | 322 | It's easy to get distracted when you go down the rabbit hole of performance statistics. But! Not every impressive benchmark is meaningful and it's important to make the distinction. In this video, we're going to explore one benchmark to demonstrate what we mean by this. 323 | 324 | The code for this can be found in the `intent-benchmark` folder. 325 | 326 | 327 | 328 | 329 | 330 | ## Video 35: [Model Confidence](https://www.youtube.com/watch?v=ev1tNXPo3tE&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=42) 331 | 332 | If we're going to apply a fallback, we better make sure that we have a good measure for confidence. In this video we explain an update that we've made to DIET that makes the confidence measure a more representative number. 333 | 334 | 335 | 336 | 337 | 338 | ## Video 36: [FlashText Entity Extraction](https://www.youtube.com/watch?v=IHHKrqgFgs4&list=PL75e0qA87dlG-za8eLI6t0_Pbxafk-cxb&index=43) 339 | 340 | If we're going to apply a fallback, we better make sure that we have a good measure for confidence. In this video we explain an update that we've made to DIET that makes the confidence measure a more representative number. 341 | -------------------------------------------------------------------------------- /bias/female-words.txt: -------------------------------------------------------------------------------- 1 | countrywoman 2 | sororal 3 | witches 4 | maidservant 5 | mothers 6 | diva 7 | actress 8 | spinster 9 | mama 10 | duchesses 11 | barwoman 12 | countrywomen 13 | dowry 14 | hostesses 15 | airwomen 16 | menopause 17 | clitoris 18 | princess 19 | governesses 20 | abbess 21 | women 22 | widow 23 | ladies 24 | sorceresses 25 | madam 26 | brides 27 | baroness 28 | housewives 29 | godesses 30 | niece 31 | widows 32 | lady 33 | sister 34 | brides 35 | nun 36 | adultresses 37 | obstetrics 38 | bellgirls 39 | her 40 | marchioness 41 | princesses 42 | empresses 43 | mare 44 | chairwoman 45 | convent 46 | priestesses 47 | girlhood 48 | ladies 49 | queen 50 | gals 51 | mommies 52 | maid 53 | female_ejaculation 54 | spokeswoman 55 | seamstress 56 | cowgirls 57 | chick 58 | spinsters 59 | hair_salon 60 | empress 61 | mommy 62 | feminism 63 | gals 64 | enchantress 65 | gal 66 | motherhood 67 | estrogen 68 | camerawomen 69 | godmother 70 | strongwoman 71 | goddess 72 | matriarch 73 | aunt 74 | chairwomen 75 | ma'am 76 | sisterhood 77 | hostess 78 | estradiol 79 | wife 80 | mom 81 | stewardess 82 | females 83 | viagra 84 | spokeswomen 85 | ma 86 | belle 87 | minx 88 | maiden 89 | witch 90 | miss 91 | nieces 92 | mothered 93 | cow 94 | belles 95 | councilwomen 96 | landladies 97 | granddaughter 98 | fiancees 99 | stepmothers 100 | horsewomen 101 | grandmothers 102 | adultress 103 | schoolgirl 104 | hen 105 | granddaughters 106 | bachelorette 107 | camerawoman 108 | moms 109 | her 110 | mistress 111 | lass 112 | policewoman 113 | nun 114 | actresses 115 | saleswomen 116 | girlfriend 117 | councilwoman 118 | lady 119 | stateswoman 120 | maternal 121 | lass 122 | landlady 123 | sistren 124 | ladies 125 | wenches 126 | sorority 127 | bellgirl 128 | duchess 129 | ballerina 130 | chicks 131 | fiancee 132 | fillies 133 | wives 134 | suitress 135 | maternity 136 | she 137 | businesswoman 138 | masseuses 139 | heroine 140 | doe 141 | busgirls 142 | girlfriends 143 | queens 144 | sisters 145 | mistresses 146 | stepmother 147 | brides 148 | daughter 149 | minxes 150 | cowgirl 151 | lady 152 | daughters 153 | mezzo 154 | saleswoman 155 | mistress 156 | hostess 157 | nuns 158 | maids 159 | mrs. 160 | headmistresses 161 | lasses 162 | congresswoman 163 | airwoman 164 | housewife 165 | priestess 166 | barwomen 167 | barnoesses 168 | abbesses 169 | handywoman 170 | toque 171 | sororities 172 | stewardesses 173 | filly 174 | czarina 175 | stepdaughters 176 | herself 177 | girls 178 | lionesses 179 | lady 180 | vagina 181 | hers 182 | masseuse 183 | cows 184 | aunts 185 | wench 186 | toques 187 | wife 188 | lioness 189 | sorceress 190 | effeminate 191 | mother 192 | lesbians 193 | female 194 | waitresses 195 | ovum 196 | skene_gland 197 | stepdaughter 198 | womb 199 | businesswomen 200 | heiress 201 | waitress 202 | headmistress 203 | woman 204 | governess 205 | godess 206 | bride 207 | grandma 208 | bride 209 | gal 210 | lesbian 211 | ladies 212 | girl 213 | grandmother 214 | mare 215 | maternity 216 | hens 217 | uterus 218 | nuns 219 | maidservants 220 | seamstress' 221 | busgirl 222 | heroines -------------------------------------------------------------------------------- /bias/male-words.txt: -------------------------------------------------------------------------------- 1 | countryman 2 | fraternal 3 | wizards 4 | manservant 5 | fathers 6 | divo 7 | actor 8 | bachelor 9 | papa 10 | dukes 11 | barman 12 | countrymen 13 | brideprice 14 | hosts 15 | airmen 16 | andropause 17 | penis 18 | prince 19 | governors 20 | abbot 21 | men 22 | widower 23 | gentlemen 24 | sorcerers 25 | sir 26 | bridegrooms 27 | baron 28 | househusbands 29 | gods 30 | nephew 31 | widowers 32 | lord 33 | brother 34 | grooms 35 | priest 36 | adultors 37 | andrology 38 | bellboys 39 | his 40 | marquis 41 | princes 42 | emperors 43 | stallion 44 | chairman 45 | monastery 46 | priests 47 | boyhood 48 | fellas 49 | king 50 | dudes 51 | daddies 52 | manservant 53 | semen 54 | spokesman 55 | tailor 56 | cowboys 57 | dude 58 | bachelors 59 | barbershop 60 | emperor 61 | daddy 62 | masculism 63 | guys 64 | enchanter 65 | guy 66 | fatherhood 67 | androgen 68 | cameramen 69 | godfather 70 | strongman 71 | god 72 | patriarch 73 | uncle 74 | chairmen 75 | sir 76 | brotherhood 77 | host 78 | testosterone 79 | husband 80 | dad 81 | steward 82 | males 83 | cialis 84 | spokesmen 85 | pa 86 | beau 87 | stud 88 | bachelor 89 | wizard 90 | sir 91 | nephews 92 | fathered 93 | bull 94 | beaus 95 | councilmen 96 | landlords 97 | grandson 98 | fiances 99 | stepfathers 100 | horsemen 101 | grandfathers 102 | adultor 103 | schoolboy 104 | rooster 105 | grandsons 106 | bachelor 107 | cameraman 108 | dads 109 | him 110 | master 111 | lad 112 | policeman 113 | monk 114 | actors 115 | salesmen 116 | boyfriend 117 | councilman 118 | fella 119 | statesman 120 | paternal 121 | chap 122 | landlord 123 | brethren 124 | lords 125 | blokes 126 | fraternity 127 | bellboy 128 | duke 129 | ballet_dancer 130 | dudes 131 | fiance 132 | colts 133 | husbands 134 | suitor 135 | paternity 136 | he 137 | businessman 138 | masseurs 139 | hero 140 | deer 141 | busboys 142 | boyfriends 143 | kings 144 | brothers 145 | masters 146 | stepfather 147 | grooms 148 | son 149 | studs 150 | cowboy 151 | mentleman 152 | sons 153 | baritone 154 | salesman 155 | paramour 156 | male_host 157 | monks 158 | menservants 159 | mr. 160 | headmasters 161 | lads 162 | congressman 163 | airman 164 | househusband 165 | priest 166 | barmen 167 | barons 168 | abbots 169 | handyman 170 | beard 171 | fraternities 172 | stewards 173 | colt 174 | czar 175 | stepsons 176 | himself 177 | boys 178 | lions 179 | gentleman 180 | penis 181 | his 182 | masseur 183 | bulls 184 | uncles 185 | bloke 186 | beards 187 | hubby 188 | lion 189 | sorcerer 190 | macho 191 | father 192 | gays 193 | male 194 | waiters 195 | sperm 196 | prostate 197 | stepson 198 | prostatic_utricle 199 | businessmen 200 | heir 201 | waiter 202 | headmaster 203 | man 204 | governor 205 | god 206 | bridegroom 207 | grandpa 208 | groom 209 | dude 210 | gay 211 | gents 212 | boy 213 | grandfather 214 | gelding 215 | paternity 216 | roosters 217 | prostatic_utricle 218 | priests 219 | manservants 220 | stailor 221 | busboy 222 | heros -------------------------------------------------------------------------------- /crf/config.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | 3 | pipeline: 4 | - name: WhitespaceTokenizer 5 | - name: CountVectorsFeaturizer 6 | - name: LexicalSyntacticFeaturizer 7 | "features": [ 8 | ["low", "title", "upper"], 9 | [ 10 | "BOS", 11 | "EOS", 12 | "low", 13 | "upper", 14 | "title", 15 | "digit", 16 | ], 17 | ["low", "title", "upper"], 18 | ] 19 | - name: DIETClassifier 20 | 21 | policies: 22 | - name: MemoizationPolicy 23 | - name: KerasPolicy 24 | - name: MappingPolicy 25 | -------------------------------------------------------------------------------- /diet/config-mega-basic.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | pipeline: 3 | - name: WhitespaceTokenizer 4 | - name: CountVectorsFeaturizer 5 | - name: EmbeddingIntentClassifier 6 | policies: 7 | - name: EmbeddingPolicy 8 | max_history: 10 9 | epochs: 20 10 | batch_size: 11 | - 32 12 | - 64 13 | - max_history: 6 14 | name: AugmentedMemoizationPolicy 15 | - core_threshold: 0.3 16 | name: TwoStageFallbackPolicy 17 | nlu_threshold: 0.8 18 | - name: FormPolicy 19 | - name: MappingPolicy 20 | -------------------------------------------------------------------------------- /diet/config-orig.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | pipeline: 3 | - name: WhitespaceTokenizer 4 | - name: CRFEntityExtractor 5 | - name: CountVectorsFeaturizer 6 | OOV_token: oov 7 | token_pattern: (?u)\b\w+\b 8 | - name: CountVectorsFeaturizer 9 | analyzer: char_wb 10 | min_ngram: 1 11 | max_ngram: 4 12 | - name: EmbeddingIntentClassifier 13 | epochs: 50 14 | ranking_length: 5 15 | - name: DucklingHTTPExtractor 16 | url: http://localhost:8000 17 | dimensions: 18 | - email 19 | - number 20 | - amount-of-money 21 | - name: EntitySynonymMapper 22 | policies: 23 | - name: EmbeddingPolicy 24 | max_history: 10 25 | epochs: 20 26 | batch_size: 27 | - 32 28 | - 64 29 | - max_history: 6 30 | name: AugmentedMemoizationPolicy 31 | - core_threshold: 0.3 32 | name: TwoStageFallbackPolicy 33 | nlu_threshold: 0.8 34 | - name: FormPolicy 35 | - name: MappingPolicy 36 | -------------------------------------------------------------------------------- /diet/diet-heavy.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | pipeline: 3 | - name: HFTransformersNLP 4 | model_weights: "bert-base-uncased" 5 | model_name: "bert" 6 | - name: LanguageModelTokenizer 7 | - name: LanguageModelFeaturizer 8 | - name: CountVectorsFeaturizer 9 | analyzer: char_wb 10 | min_ngram: 1 11 | max_ngram: 4 12 | - name: CountVectorsFeaturizer 13 | - name: DIETClassifier 14 | epochs: 30 15 | num_transformer_layers: 4 16 | transformer_size: 256 17 | use_masked_language_model: True 18 | drop_rate: 0.25 19 | weight_sparsity: 0.7 20 | batch_size: [64, 256] 21 | embedding_dimension: 30 22 | hidden_layer_sizes: 23 | text: [512, 128] 24 | policies: 25 | - name: EmbeddingPolicy 26 | max_history: 10 27 | epochs: 20 28 | batch_size: 29 | - 32 30 | - 64 31 | - max_history: 6 32 | name: AugmentedMemoizationPolicy 33 | - core_threshold: 0.3 34 | name: TwoStageFallbackPolicy 35 | nlu_threshold: 0.8 36 | - name: FormPolicy 37 | - name: MappingPolicy 38 | -------------------------------------------------------------------------------- /diet/diet-light.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | pipeline: 3 | - name: WhitespaceTokenizer 4 | - name: CountVectorsFeaturizer 5 | - name: CountVectorsFeaturizer 6 | analyzer: char_wb 7 | min_ngram: 1 8 | max_ngram: 4 9 | - name: DIETClassifier 10 | epochs: 20 11 | learning_rate: 0.005 12 | num_transformer_layers: 0 13 | embedding_dimension: 10 14 | weight_sparcity: 0.90 15 | hidden_layer_sizes: 16 | text: [256, 128] 17 | policies: 18 | - name: EmbeddingPolicy 19 | max_history: 10 20 | epochs: 20 21 | batch_size: 22 | - 32 23 | - 64 24 | - max_history: 6 25 | name: AugmentedMemoizationPolicy 26 | - core_threshold: 0.3 27 | name: TwoStageFallbackPolicy 28 | nlu_threshold: 0.8 29 | - name: FormPolicy 30 | - name: MappingPolicy 31 | -------------------------------------------------------------------------------- /diet/diet-replace-mask.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | pipeline: 3 | - name: WhitespaceTokenizer 4 | - name: LexicalSyntacticFeaturizer 5 | - name: CountVectorsFeaturizer 6 | OOV_token: oov 7 | token_pattern: (?u)\b\w+\b 8 | - name: CountVectorsFeaturizer 9 | analyzer: char_wb 10 | min_ngram: 1 11 | max_ngram: 4 12 | - name: DIETClassifier 13 | epochs: 50 14 | ranking_length: 5 15 | use_masked_language_model: True 16 | - name: DucklingHTTPExtractor 17 | url: http://localhost:8000 18 | dimensions: 19 | - email 20 | - number 21 | - amount-of-money 22 | - name: EntitySynonymMapper 23 | policies: 24 | - name: EmbeddingPolicy 25 | max_history: 10 26 | epochs: 20 27 | batch_size: 28 | - 32 29 | - 64 30 | - max_history: 6 31 | name: AugmentedMemoizationPolicy 32 | - core_threshold: 0.3 33 | name: TwoStageFallbackPolicy 34 | nlu_threshold: 0.8 35 | - name: FormPolicy 36 | - name: MappingPolicy 37 | -------------------------------------------------------------------------------- /diet/diet-replace.yml: -------------------------------------------------------------------------------- 1 | language: en 2 | pipeline: 3 | - name: WhitespaceTokenizer 4 | - name: LexicalSyntacticFeaturizer 5 | - name: CountVectorsFeaturizer 6 | OOV_token: oov 7 | token_pattern: (?u)\b\w+\b 8 | - name: CountVectorsFeaturizer 9 | analyzer: char_wb 10 | min_ngram: 1 11 | max_ngram: 4 12 | - name: DIETClassifier 13 | epochs: 50 14 | ranking_length: 5 15 | - name: DucklingHTTPExtractor 16 | url: http://localhost:8000 17 | dimensions: 18 | - email 19 | - number 20 | - amount-of-money 21 | - name: EntitySynonymMapper 22 | policies: 23 | - name: EmbeddingPolicy 24 | max_history: 10 25 | epochs: 20 26 | batch_size: 27 | - 32 28 | - 64 29 | - max_history: 6 30 | name: AugmentedMemoizationPolicy 31 | - core_threshold: 0.3 32 | name: TwoStageFallbackPolicy 33 | nlu_threshold: 0.8 34 | - name: FormPolicy 35 | - name: MappingPolicy 36 | -------------------------------------------------------------------------------- /diet/readme.md: -------------------------------------------------------------------------------- 1 | ## readme 2 | 3 | This gist contains the code to repeat the steps in the DIET benchmarking youtube video. You can download all the files by cloning this gist; 4 | 5 | ``` 6 | git clone git@gist.github.com:81fc9433182ccfb9dece4bb4dbde1f7a.git 7 | ``` 8 | 9 | You'll also need to clone the repository over [here](https://github.com/RasaHQ/rasa-demo) to get the dataset you'll need. You can clone that repository via; 10 | 11 | ``` 12 | git clone git@github.com:RasaHQ/rasa-demo.git 13 | ``` 14 | 15 | You will also need to ensure that you've installed the bert dependencies if you 16 | want to run the heavy model. 17 | 18 | ``` 19 | pip install "rasa[transformers]" 20 | ``` 21 | 22 | Once that is done you can repeat everything we've done here by running; 23 | 24 | ``` 25 | mkdir results 26 | rasa test nlu --config configs/config-orig.yml --cross-validation --runs 1 --folds 2 --out results/config-orig 27 | rasa test nlu --config configs/config-init.yml --cross-validation --runs 1 --folds 2 --out results/config-init 28 | rasa test nlu --config configs/diet-replace.yml --cross-validation --runs 1 --folds 2 --out results/diet-replace 29 | rasa test nlu --config configs/diet-minimum.yml --cross-validation --runs 1 --folds 2 --out results/diet-minimum 30 | rasa test nlu --config configs/diet-heavy.yml --cross-validation --runs 1 --folds 2 --out results/diet-heavy 31 | ``` 32 | 33 | Once done you can use streamlit to see a dasbboard of the results. 34 | 35 | ``` 36 | pip install streamlit 37 | streamlit run viewresults.py 38 | ``` 39 | 40 | -------------------------------------------------------------------------------- /diet/viewresults.py: -------------------------------------------------------------------------------- 1 | # to run this please make sure you've got the dependencies 2 | # pip install streamlit altair pandas 3 | 4 | import json 5 | import pathlib 6 | 7 | import streamlit as st 8 | import altair as alt 9 | import pandas as pd 10 | import plotnine as p9 11 | 12 | def read_intent_report(path): 13 | blob = json.loads(path.read_text()) 14 | jsonl = [{**v, 'config': path.parts[1]} for k,v in blob.items() if 'weighted avg' in k] 15 | return pd.DataFrame(jsonl).drop(columns=['support']) 16 | 17 | def read_entity_report(path): 18 | blob = json.loads(path.read_text()) 19 | jsonl = [{**v, 'config': path.parts[1]} for k,v in blob.items() if 'weighted avg' in k] 20 | return pd.DataFrame(jsonl).drop(columns=['support']) 21 | 22 | def add_zeros(dataf, all_configs): 23 | for cfg in all_configs: 24 | if cfg not in list(dataf['config']): 25 | dataf = pd.concat([dataf, pd.DataFrame({'precision': [0], 26 | 'recall': [0], 27 | 'f1-score': [0], 28 | 'config': cfg})]) 29 | return dataf 30 | 31 | st.cache() 32 | def read_pandas(): 33 | paths = list(pathlib.Path("results").glob("*/*_report.json")) 34 | configurations = set([p.parts[1] for p in paths]) 35 | intent_df = pd.concat([read_intent_report(p) for p in paths if 'intent_report' in str(p)]) 36 | paths = list(pathlib.Path("results").glob("*/CRFEntityExtractor_report.json")) 37 | paths += list(pathlib.Path("results").glob("*/DIETClassifier_report.json")) 38 | entity_df = pd.concat([read_entity_report(p) for p in paths]).pipe(add_zeros, all_configs=configurations) 39 | return intent_df, entity_df 40 | 41 | intent_df, entity_df = read_pandas() 42 | possible_configs = list(intent_df['config']) 43 | 44 | st.markdown("# Rasa GridResults Summary") 45 | st.markdown("Quick Overview of Crossvalidated Runs") 46 | 47 | st.sidebar.markdown("### Configure Overview") 48 | st.sidebar.markdown("Select what you care about.") 49 | selected_config = st.sidebar.multiselect("Select Result Folders", 50 | possible_configs, 51 | default=possible_configs) 52 | show_raw_data = st.sidebar.checkbox("Show Raw Data") 53 | 54 | subset_df = intent_df.loc[lambda d: d['config'].isin(selected_config)].melt('config') 55 | 56 | 57 | st.markdown("## Intent Summary Overview") 58 | 59 | c = alt.Chart(subset_df).mark_bar().encode( 60 | y='config:N', 61 | x='value:Q', 62 | color='config:N', 63 | row='variable:N' 64 | ) 65 | st.altair_chart(c) 66 | 67 | if show_raw_data: 68 | st.write(intent_df.loc[lambda d: d['config'].isin(selected_config)]) 69 | 70 | 71 | subset_df = entity_df.loc[lambda d: d['config'].isin(selected_config)].melt('config') 72 | 73 | st.markdown("## Entity Summary Overview") 74 | c = alt.Chart(subset_df).mark_bar().encode( 75 | y='config:N', 76 | x='value:Q', 77 | color='config:N', 78 | row='variable:N' 79 | ) 80 | 81 | st.altair_chart(c) 82 | 83 | if show_raw_data: 84 | st.write(entity_df.loc[lambda d: d['config'].isin(selected_config)]) 85 | -------------------------------------------------------------------------------- /glove/glove.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import Embedding, Dense, Flatten, Input, Dot 2 | from tensorflow.keras.models import Sequential, Model 3 | 4 | dim_words = 5 5 | 6 | # this one is so we might grab the embeddings 7 | model_emb = Sequential() 8 | embedding = Embedding(num_words, dim_words, input_length=1) 9 | model_emb.add(embedding) 10 | model_emb.add(Flatten()) 11 | 12 | word_one = Input(shape=(1,)) 13 | word_two = Input(shape=(1,)) 14 | 15 | cross_prod = Dot(axes=1)([model_emb(word_one), model_emb(word_two)]) 16 | out = Dense(1, activation="relu")(cross_prod) 17 | 18 | glovelike = Model(inputs=[word_one, word_two], outputs=out) -------------------------------------------------------------------------------- /images/benchmarks.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/benchmarks.jpeg -------------------------------------------------------------------------------- /images/bias.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/bias.jpg -------------------------------------------------------------------------------- /images/bulk-labelling-ui.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/bulk-labelling-ui.jpeg -------------------------------------------------------------------------------- /images/byte.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/byte.jpg -------------------------------------------------------------------------------- /images/confidence.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/confidence.jpeg -------------------------------------------------------------------------------- /images/countv.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/countv.jpg -------------------------------------------------------------------------------- /images/debias-hard.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/debias-hard.jpg -------------------------------------------------------------------------------- /images/debias-proj.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/debias-proj.jpg -------------------------------------------------------------------------------- /images/fallback-detection.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/fallback-detection.jpeg -------------------------------------------------------------------------------- /images/flashtext.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/flashtext.jpeg -------------------------------------------------------------------------------- /images/incremental-training.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/incremental-training.jpeg -------------------------------------------------------------------------------- /images/iterate-data.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/iterate-data.jpeg -------------------------------------------------------------------------------- /images/labse.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/labse.jpeg -------------------------------------------------------------------------------- /images/language.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/language.jpeg -------------------------------------------------------------------------------- /images/leven.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/leven.jpg -------------------------------------------------------------------------------- /images/lexical-ambiguity.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/lexical-ambiguity.jpeg -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/logo.png -------------------------------------------------------------------------------- /images/remain-careful.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/remain-careful.jpg -------------------------------------------------------------------------------- /images/resp1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/resp1.jpg -------------------------------------------------------------------------------- /images/resp2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/resp2.jpg -------------------------------------------------------------------------------- /images/subw-imp.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/subw-imp.jpg -------------------------------------------------------------------------------- /images/subw.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/subw.jpg -------------------------------------------------------------------------------- /images/ted1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/ted1.jpg -------------------------------------------------------------------------------- /images/ted2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/ted2.jpg -------------------------------------------------------------------------------- /images/toxic-lang.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/toxic-lang.png -------------------------------------------------------------------------------- /images/vid-1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-1.jpg -------------------------------------------------------------------------------- /images/vid-10.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-10.jpg -------------------------------------------------------------------------------- /images/vid-11.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-11.jpg -------------------------------------------------------------------------------- /images/vid-12.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-12.jpg -------------------------------------------------------------------------------- /images/vid-13.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-13.jpg -------------------------------------------------------------------------------- /images/vid-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-2.jpg -------------------------------------------------------------------------------- /images/vid-3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-3.jpg -------------------------------------------------------------------------------- /images/vid-4.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-4.jpg -------------------------------------------------------------------------------- /images/vid-5.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-5.jpg -------------------------------------------------------------------------------- /images/vid-6.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-6.jpg -------------------------------------------------------------------------------- /images/vid-7.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-7.jpg -------------------------------------------------------------------------------- /images/vid-8.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/vid-8.jpg -------------------------------------------------------------------------------- /images/word-analogies.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/images/word-analogies.jpg -------------------------------------------------------------------------------- /intent-benchmark/benchmark.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pathlib 3 | 4 | import numpy as np 5 | import pandas as pd 6 | 7 | from sklearn.pipeline import Pipeline, make_pipeline, FeatureUnion 8 | from sklearn.feature_extraction.text import CountVectorizer 9 | 10 | from memo import memlist, memfunc, memfile, time_taken, grid 11 | 12 | from sklearn.model_selection import train_test_split 13 | from whatlies.language import FasttextLanguage, UniversalSentenceLanguage 14 | 15 | ft_lang = FasttextLanguage("../whatlies/embeddings/cc.en.300.bin") 16 | use_lang = UniversalSentenceLanguage() 17 | 18 | 19 | def generate_model(emb, model='lr'): 20 | models = { 21 | 'lr': LogisticRegression(solver='liblinear', class_weight="balanced"), 22 | } 23 | if emb == "use": 24 | union = FeatureUnion([ 25 | ('cv', CountVectorizer()), 26 | ('cv-ngram', CountVectorizer(analyzer='char', ngram_range=(2, 3))), 27 | ('use_lang', use_lang) 28 | ]) 29 | mod = make_pipeline(union, models[model]) 30 | elif emb == "ft": 31 | union = FeatureUnion([ 32 | ('cv', CountVectorizer()), 33 | ('cv-ngram', CountVectorizer(analyzer='char', ngram_range=(2, 3))), 34 | ('ft', ft) 35 | ]) 36 | mod = make_pipeline(union, models[model]) 37 | elif emb == "cv-ngram": 38 | union = FeatureUnion([ 39 | ('cv', CountVectorizer()), 40 | ('cv-ngram', CountVectorizer(analyzer='char', ngram_range=(2, 3))), 41 | ]) 42 | mod = make_pipeline(union, models[model]) 43 | return mod 44 | 45 | 46 | @memfile('benchmark-logs.jsonl') 47 | @time_taken() 48 | def experiment(dataset, model, emb="cv", train_size=100, test_size=1000): 49 | df = (pd.read_csv(datasets[dataset]) 50 | .loc[lambda d: ~d['text'].isna()] 51 | .loc[lambda d: ~d['label'].isna()]) 52 | 53 | X_train, X_test, y_train, y_test = train_test_split(df['text'], df['label'], 54 | test_size=test_size, 55 | stratify=df['label'], 56 | random_state=42) 57 | 58 | # Everything must be a list when the input is text. 59 | X_train, y_train = list(X_train[:train_size]), list(y_train[:train_size]) 60 | X_test, y_test = list(X_test), list(y_test) 61 | 62 | # Generate and train the model 63 | mod = generate_model(emb=emb, model=model) 64 | mod.fit(X_train, y_train) 65 | 66 | # Gather stats 67 | y_train_pred = mod.predict(X_train) 68 | tic = time.time() 69 | y_test_pred = mod.predict(X_test) 70 | toc = time.time() 71 | return { 72 | 'accuracy_test': np.mean(y_test == y_test_pred), 73 | 'accuracy_train': np.mean(y_train == y_train_pred), 74 | 'pred_time': toc - tic 75 | } 76 | 77 | 78 | settings = grid( 79 | dataset=["scope"], 80 | model=["lr"], 81 | emb=["ft", "use", "cv-ngram"], 82 | train_size=np.arange(500, 9500, 500), 83 | test_size=[4000] 84 | ) 85 | 86 | for s in settings: 87 | experiment(**s) 88 | -------------------------------------------------------------------------------- /intent-benchmark/n-ft-use-train-experiment.jsonl: -------------------------------------------------------------------------------- 1 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":200,"test_size":1000,"accuracy_test":0.775,"accuracy_train":1.0,"pred_time":0.05856132507324219,"time_taken":0.19} 2 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":300,"test_size":1000,"accuracy_test":0.827,"accuracy_train":1.0,"pred_time":0.05572152137756348,"time_taken":0.26} 3 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":400,"test_size":1000,"accuracy_test":0.849,"accuracy_train":1.0,"pred_time":0.05605268478393555,"time_taken":0.34} 4 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":500,"test_size":1000,"accuracy_test":0.855,"accuracy_train":0.998,"pred_time":0.05651450157165527,"time_taken":0.42} 5 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":600,"test_size":1000,"accuracy_test":0.857,"accuracy_train":0.9983333333333333,"pred_time":0.056448936462402344,"time_taken":0.49} 6 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":700,"test_size":1000,"accuracy_test":0.864,"accuracy_train":0.9985714285714286,"pred_time":0.05691123008728027,"time_taken":0.57} 7 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":800,"test_size":1000,"accuracy_test":0.877,"accuracy_train":0.9975,"pred_time":0.05725860595703125,"time_taken":0.66} 8 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":900,"test_size":1000,"accuracy_test":0.882,"accuracy_train":0.9966666666666667,"pred_time":0.05646491050720215,"time_taken":0.74} 9 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":1000,"test_size":1000,"accuracy_test":0.886,"accuracy_train":0.996,"pred_time":0.056417226791381836,"time_taken":0.83} 10 | {"dataset":"rasademo","model":"lr","emb":"ft","train_size":1100,"test_size":1000,"accuracy_test":0.896,"accuracy_train":0.9963636363636363,"pred_time":0.056476593017578125,"time_taken":0.94} 11 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":200,"test_size":1000,"accuracy_test":0.783,"accuracy_train":1.0,"pred_time":0.9751472473144531,"time_taken":1.7} 12 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":300,"test_size":1000,"accuracy_test":0.836,"accuracy_train":1.0,"pred_time":0.9808781147003174,"time_taken":1.8} 13 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":400,"test_size":1000,"accuracy_test":0.858,"accuracy_train":1.0,"pred_time":0.9849858283996582,"time_taken":2.12} 14 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":500,"test_size":1000,"accuracy_test":0.864,"accuracy_train":0.998,"pred_time":0.9827330112457275,"time_taken":2.39} 15 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":600,"test_size":1000,"accuracy_test":0.866,"accuracy_train":0.9983333333333333,"pred_time":0.9716253280639648,"time_taken":2.73} 16 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":700,"test_size":1000,"accuracy_test":0.87,"accuracy_train":1.0,"pred_time":0.9795506000518799,"time_taken":3.0} 17 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":800,"test_size":1000,"accuracy_test":0.876,"accuracy_train":0.99625,"pred_time":0.9777274131774902,"time_taken":3.38} 18 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":900,"test_size":1000,"accuracy_test":0.886,"accuracy_train":0.9966666666666667,"pred_time":0.9741647243499756,"time_taken":3.74} 19 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":1000,"test_size":1000,"accuracy_test":0.892,"accuracy_train":0.997,"pred_time":0.9753170013427734,"time_taken":4.03} 20 | {"dataset":"rasademo","model":"lr","emb":"use","train_size":1100,"test_size":1000,"accuracy_test":0.9,"accuracy_train":0.9963636363636363,"pred_time":0.9810361862182617,"time_taken":4.58} 21 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":200,"test_size":1000,"accuracy_test":0.776,"accuracy_train":1.0,"pred_time":0.026402711868286133,"time_taken":0.07} 22 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":300,"test_size":1000,"accuracy_test":0.829,"accuracy_train":1.0,"pred_time":0.0264432430267334,"time_taken":0.08} 23 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":400,"test_size":1000,"accuracy_test":0.849,"accuracy_train":1.0,"pred_time":0.026544809341430664,"time_taken":0.1} 24 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":500,"test_size":1000,"accuracy_test":0.853,"accuracy_train":0.998,"pred_time":0.02636265754699707,"time_taken":0.12} 25 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":600,"test_size":1000,"accuracy_test":0.86,"accuracy_train":0.9983333333333333,"pred_time":0.026688575744628906,"time_taken":0.13} 26 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":700,"test_size":1000,"accuracy_test":0.867,"accuracy_train":0.9971428571428571,"pred_time":0.026691913604736328,"time_taken":0.15} 27 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":800,"test_size":1000,"accuracy_test":0.877,"accuracy_train":0.99375,"pred_time":0.026842832565307617,"time_taken":0.17} 28 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":900,"test_size":1000,"accuracy_test":0.883,"accuracy_train":0.9944444444444445,"pred_time":0.026617050170898438,"time_taken":0.19} 29 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":1000,"test_size":1000,"accuracy_test":0.885,"accuracy_train":0.994,"pred_time":0.02674078941345215,"time_taken":0.21} 30 | {"dataset":"rasademo","model":"lr","emb":"cv-ngram","train_size":1100,"test_size":1000,"accuracy_test":0.896,"accuracy_train":0.9936363636363637,"pred_time":0.02686476707458496,"time_taken":0.23} 31 | {"dataset":"scope","model":"lr","emb":"ft","train_size":500,"test_size":4000,"accuracy_test":0.55625,"accuracy_train":1.0,"pred_time":0.37118983268737793,"time_taken":2.56} 32 | {"dataset":"scope","model":"lr","emb":"ft","train_size":500,"test_size":4000,"accuracy_test":0.55625,"accuracy_train":1.0,"pred_time":0.37976861000061035,"time_taken":2.74} 33 | {"dataset":"scope","model":"lr","emb":"ft","train_size":1000,"test_size":4000,"accuracy_test":0.70475,"accuracy_train":0.999,"pred_time":0.38712477684020996,"time_taken":5.97} 34 | {"dataset":"scope","model":"lr","emb":"ft","train_size":1500,"test_size":4000,"accuracy_test":0.774,"accuracy_train":0.9986666666666667,"pred_time":0.3793807029724121,"time_taken":12.37} 35 | {"dataset":"scope","model":"lr","emb":"ft","train_size":2000,"test_size":4000,"accuracy_test":0.81025,"accuracy_train":0.9995,"pred_time":0.3807692527770996,"time_taken":14.11} 36 | {"dataset":"scope","model":"lr","emb":"ft","train_size":2500,"test_size":4000,"accuracy_test":0.82875,"accuracy_train":0.9984,"pred_time":0.3881092071533203,"time_taken":18.72} 37 | {"dataset":"scope","model":"lr","emb":"ft","train_size":3000,"test_size":4000,"accuracy_test":0.84925,"accuracy_train":0.998,"pred_time":0.382002592086792,"time_taken":23.23} 38 | {"dataset":"scope","model":"lr","emb":"ft","train_size":3500,"test_size":4000,"accuracy_test":0.8685,"accuracy_train":0.9977142857142857,"pred_time":0.37944746017456055,"time_taken":30.12} 39 | {"dataset":"scope","model":"lr","emb":"ft","train_size":4000,"test_size":4000,"accuracy_test":0.87525,"accuracy_train":0.997,"pred_time":0.37783193588256836,"time_taken":36.35} 40 | {"dataset":"scope","model":"lr","emb":"ft","train_size":4500,"test_size":4000,"accuracy_test":0.8795,"accuracy_train":0.9968888888888889,"pred_time":0.37891101837158203,"time_taken":40.89} 41 | {"dataset":"scope","model":"lr","emb":"ft","train_size":5000,"test_size":4000,"accuracy_test":0.88875,"accuracy_train":0.996,"pred_time":0.3801717758178711,"time_taken":48.31} 42 | {"dataset":"scope","model":"lr","emb":"ft","train_size":5500,"test_size":4000,"accuracy_test":0.89425,"accuracy_train":0.9956363636363637,"pred_time":0.3786475658416748,"time_taken":52.95} 43 | {"dataset":"scope","model":"lr","emb":"ft","train_size":6000,"test_size":4000,"accuracy_test":0.8975,"accuracy_train":0.9956666666666667,"pred_time":0.37787723541259766,"time_taken":59.11} 44 | {"dataset":"scope","model":"lr","emb":"ft","train_size":6500,"test_size":4000,"accuracy_test":0.90075,"accuracy_train":0.9958461538461538,"pred_time":0.3805055618286133,"time_taken":82.49} 45 | {"dataset":"scope","model":"lr","emb":"ft","train_size":7000,"test_size":4000,"accuracy_test":0.9045,"accuracy_train":0.996,"pred_time":0.38358306884765625,"time_taken":90.78} 46 | {"dataset":"scope","model":"lr","emb":"ft","train_size":7500,"test_size":4000,"accuracy_test":0.90925,"accuracy_train":0.9961333333333333,"pred_time":0.3860592842102051,"time_taken":98.64} 47 | {"dataset":"scope","model":"lr","emb":"ft","train_size":8000,"test_size":4000,"accuracy_test":0.912,"accuracy_train":0.995625,"pred_time":0.38474011421203613,"time_taken":106.34} 48 | {"dataset":"scope","model":"lr","emb":"ft","train_size":8500,"test_size":4000,"accuracy_test":0.91175,"accuracy_train":0.9955294117647059,"pred_time":0.3792722225189209,"time_taken":115.82} 49 | {"dataset":"scope","model":"lr","emb":"ft","train_size":9000,"test_size":4000,"accuracy_test":0.914,"accuracy_train":0.9954444444444445,"pred_time":0.382676362991333,"time_taken":124.8} 50 | {"dataset":"scope","model":"lr","emb":"ft","train_size":9500,"test_size":4000,"accuracy_test":0.9185,"accuracy_train":0.9945263157894737,"pred_time":0.38468122482299805,"time_taken":133.39} 51 | {"dataset":"scope","model":"lr","emb":"use","train_size":500,"test_size":4000,"accuracy_test":0.564,"accuracy_train":1.0,"pred_time":4.3004679679870605,"time_taken":8.74} 52 | {"dataset":"scope","model":"lr","emb":"use","train_size":1000,"test_size":4000,"accuracy_test":0.71325,"accuracy_train":1.0,"pred_time":4.3029162883758545,"time_taken":16.01} 53 | {"dataset":"scope","model":"lr","emb":"use","train_size":1500,"test_size":4000,"accuracy_test":0.7815,"accuracy_train":0.9986666666666667,"pred_time":4.322261095046997,"time_taken":22.46} 54 | {"dataset":"scope","model":"lr","emb":"use","train_size":2000,"test_size":4000,"accuracy_test":0.819,"accuracy_train":0.9995,"pred_time":4.327869653701782,"time_taken":29.61} 55 | {"dataset":"scope","model":"lr","emb":"use","train_size":2500,"test_size":4000,"accuracy_test":0.83775,"accuracy_train":0.9984,"pred_time":4.3121161460876465,"time_taken":38.75} 56 | {"dataset":"scope","model":"lr","emb":"use","train_size":3000,"test_size":4000,"accuracy_test":0.85925,"accuracy_train":0.998,"pred_time":4.291502475738525,"time_taken":45.35} 57 | {"dataset":"scope","model":"lr","emb":"use","train_size":3500,"test_size":4000,"accuracy_test":0.8755,"accuracy_train":0.998,"pred_time":4.301600456237793,"time_taken":57.41} 58 | {"dataset":"scope","model":"lr","emb":"use","train_size":4000,"test_size":4000,"accuracy_test":0.88175,"accuracy_train":0.99725,"pred_time":4.298020839691162,"time_taken":66.41} 59 | {"dataset":"scope","model":"lr","emb":"use","train_size":4500,"test_size":4000,"accuracy_test":0.8865,"accuracy_train":0.9973333333333333,"pred_time":4.29729437828064,"time_taken":70.1} 60 | {"dataset":"scope","model":"lr","emb":"use","train_size":5000,"test_size":4000,"accuracy_test":0.89675,"accuracy_train":0.9962,"pred_time":4.304081678390503,"time_taken":77.83} 61 | {"dataset":"scope","model":"lr","emb":"use","train_size":5500,"test_size":4000,"accuracy_test":0.90175,"accuracy_train":0.996,"pred_time":4.2856175899505615,"time_taken":86.89} 62 | {"dataset":"scope","model":"lr","emb":"use","train_size":6000,"test_size":4000,"accuracy_test":0.9065,"accuracy_train":0.9965,"pred_time":4.321293830871582,"time_taken":123.94} 63 | {"dataset":"scope","model":"lr","emb":"use","train_size":6500,"test_size":4000,"accuracy_test":0.90925,"accuracy_train":0.9964615384615385,"pred_time":4.301099061965942,"time_taken":136.0} 64 | {"dataset":"scope","model":"lr","emb":"use","train_size":7000,"test_size":4000,"accuracy_test":0.912,"accuracy_train":0.9965714285714286,"pred_time":4.292471647262573,"time_taken":148.71} 65 | {"dataset":"scope","model":"lr","emb":"use","train_size":7500,"test_size":4000,"accuracy_test":0.915,"accuracy_train":0.9962666666666666,"pred_time":4.3266870975494385,"time_taken":178.36} 66 | {"dataset":"scope","model":"lr","emb":"use","train_size":8000,"test_size":4000,"accuracy_test":0.9165,"accuracy_train":0.996375,"pred_time":4.319508075714111,"time_taken":179.46} 67 | {"dataset":"scope","model":"lr","emb":"use","train_size":8500,"test_size":4000,"accuracy_test":0.919,"accuracy_train":0.9962352941176471,"pred_time":4.291066646575928,"time_taken":194.03} 68 | {"dataset":"scope","model":"lr","emb":"use","train_size":9000,"test_size":4000,"accuracy_test":0.92175,"accuracy_train":0.996,"pred_time":4.30959153175354,"time_taken":206.82} 69 | {"dataset":"scope","model":"lr","emb":"use","train_size":9500,"test_size":4000,"accuracy_test":0.92475,"accuracy_train":0.9956842105263158,"pred_time":4.291162014007568,"time_taken":220.63} 70 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":500,"test_size":4000,"accuracy_test":0.55675,"accuracy_train":1.0,"pred_time":0.17368674278259277,"time_taken":0.86} 71 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":1000,"test_size":4000,"accuracy_test":0.70475,"accuracy_train":0.999,"pred_time":0.1765146255493164,"time_taken":1.51} 72 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":1500,"test_size":4000,"accuracy_test":0.77475,"accuracy_train":0.9986666666666667,"pred_time":0.17641973495483398,"time_taken":2.18} 73 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":2000,"test_size":4000,"accuracy_test":0.8105,"accuracy_train":0.9995,"pred_time":0.17655467987060547,"time_taken":2.96} 74 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":2500,"test_size":4000,"accuracy_test":0.82925,"accuracy_train":0.9984,"pred_time":0.17617464065551758,"time_taken":3.82} 75 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":3000,"test_size":4000,"accuracy_test":0.85,"accuracy_train":0.998,"pred_time":0.17685651779174805,"time_taken":4.57} 76 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":3500,"test_size":4000,"accuracy_test":0.869,"accuracy_train":0.9977142857142857,"pred_time":0.1765122413635254,"time_taken":5.35} 77 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":4000,"test_size":4000,"accuracy_test":0.87525,"accuracy_train":0.997,"pred_time":0.17818188667297363,"time_taken":6.15} 78 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":4500,"test_size":4000,"accuracy_test":0.879,"accuracy_train":0.9968888888888889,"pred_time":0.1768357753753662,"time_taken":7.43} 79 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":5000,"test_size":4000,"accuracy_test":0.8885,"accuracy_train":0.996,"pred_time":0.17785978317260742,"time_taken":9.02} 80 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":5500,"test_size":4000,"accuracy_test":0.894,"accuracy_train":0.9956363636363637,"pred_time":0.17731571197509766,"time_taken":9.72} 81 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":6000,"test_size":4000,"accuracy_test":0.8975,"accuracy_train":0.9955,"pred_time":0.1766970157623291,"time_taken":12.23} 82 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":6500,"test_size":4000,"accuracy_test":0.90075,"accuracy_train":0.9958461538461538,"pred_time":0.17724108695983887,"time_taken":13.85} 83 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":7000,"test_size":4000,"accuracy_test":0.90375,"accuracy_train":0.996,"pred_time":0.1762101650238037,"time_taken":20.67} 84 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":7500,"test_size":4000,"accuracy_test":0.9085,"accuracy_train":0.9961333333333333,"pred_time":0.1766214370727539,"time_taken":24.64} 85 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":8000,"test_size":4000,"accuracy_test":0.912,"accuracy_train":0.995625,"pred_time":0.1761927604675293,"time_taken":26.94} 86 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":8500,"test_size":4000,"accuracy_test":0.9115,"accuracy_train":0.9955294117647059,"pred_time":0.17797327041625977,"time_taken":33.41} 87 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":9000,"test_size":4000,"accuracy_test":0.91375,"accuracy_train":0.9953333333333333,"pred_time":0.17589163780212402,"time_taken":31.29} 88 | {"dataset":"scope","model":"lr","emb":"cv-ngram","train_size":9500,"test_size":4000,"accuracy_test":0.9185,"accuracy_train":0.994421052631579,"pred_time":0.17615866661071777,"time_taken":33.87} 89 | -------------------------------------------------------------------------------- /language/ar.tsv: -------------------------------------------------------------------------------- 1 | 1 كما as 2 | 2 أنا I 3 | 3 له his 4 | 4 أن that 5 | 5 هو he 6 | 6 كان was 7 | 7 إلى for 8 | 8 في on 9 | 9 هي are 10 | 10 مع with 11 | 11 هم they 12 | 12 يكون be 13 | 13 في at 14 | 14 واحد one 15 | 15 ديك have 16 | 16 هذا this 17 | 17 من from 18 | 18 بواسطة by 19 | 19 حار hot 20 | 20 كلمة word 21 | 21 لكن but 22 | 22 ما what 23 | 23 بعض some 24 | 24 هو is 25 | 25 هو it 26 | 26 أنت you 27 | 27 أو or 28 | 28 كان had 29 | 29 و the 30 | 30 من of 31 | 31 إلى to 32 | 32 و and 33 | 33 و a 34 | 34 في in 35 | 35 نحن we 36 | 36 علبة can 37 | 37 خارج out 38 | 38 البعض other 39 | 39 و were 40 | 40 التي which 41 | 41 القيام do 42 | 42 من their 43 | 43 الوقت time 44 | 44 إذا if 45 | 45 سوف will 46 | 46 كيف how 47 | 47 قال said 48 | 48 و an 49 | 49 كل each 50 | 50 أقول tell 51 | 51 لا does 52 | 52 مجموعة set 53 | 53 ثلاثة three 54 | 54 تريد want 55 | 55 هواء air 56 | 56 جيد well 57 | 57 أيضا also 58 | 58 لعب play 59 | 59 صغير small 60 | 60 نهاية end 61 | 61 وضع put 62 | 62 المنزل home 63 | 63 قرأ read 64 | 64 يد hand 65 | 65 ميناء port 66 | 66 كبير large 67 | 67 تهجى spell 68 | 68 إضافة add 69 | 69 حتى even 70 | 70 الأرض land 71 | 71 هنا here 72 | 72 يجب must 73 | 73 كبير big 74 | 74 ارتفاع high 75 | 75 مثل such 76 | 76 تابع follow 77 | 77 فعل act 78 | 78 لماذا why 79 | 79 تطلب ask 80 | 80 الرجال men 81 | 81 تغيير change 82 | 82 ذهب went 83 | 83 ضوء light 84 | 84 نوع kind 85 | 85 بعيدا off 86 | 86 تحتاج need 87 | 87 منزل house 88 | 88 صور picture 89 | 89 محاولة try 90 | 90 لنا us 91 | 91 مرة أخرى again 92 | 92 الحيوان animal 93 | 93 نقطة point 94 | 94 أم mother 95 | 95 العالم world 96 | 96 قرب near 97 | 97 بناء build 98 | 98 النفس self 99 | 99 أرض earth 100 | 100 الأب father 101 | 101 أي any 102 | 102 جديدة new 103 | 103 العمل work 104 | 104 جزء part 105 | 105 أخذ take 106 | 106 الحصول على get 107 | 107 مكان place 108 | 108 مصنوع made 109 | 109 حي live 110 | 110 حيث where 111 | 111 بعد after 112 | 112 ظهر back 113 | 113 القليل little 114 | 114 فقط only 115 | 115 جولة round 116 | 116 رجل man 117 | 117 عام year 118 | 118 جاء came 119 | 119 المعرض show 120 | 120 كل every 121 | 121 جيد good 122 | 122 أنا me 123 | 123 منح give 124 | 124 لدينا our 125 | 125 تحت under 126 | 126 اسم name 127 | 127 جدا very 128 | 128 من خلال through 129 | 129 فقط just 130 | 130 شكل form 131 | 131 عقوبة sentence 132 | 132 عظيم great 133 | 133 اعتقد think 134 | 134 قول say 135 | 135 مساعدة help 136 | 136 منخفض low 137 | 137 خط line 138 | 138 اختلف differ 139 | 139 منعطف turn 140 | 140 السبب cause 141 | 141 كثيرا much 142 | 142 متوسط mean 143 | 143 قبل before 144 | 144 خطوة move 145 | 145 الحق right 146 | 146 صبي boy 147 | 147 قديم old 148 | 148 أيضا too 149 | 149 نفسه same 150 | 150 هي she 151 | 151 كل all 152 | 152 هناك there 153 | 153 عندما when 154 | 154 فوق up 155 | 155 استخدام use 156 | 156 ك your 157 | 157 طريق way 158 | 158 حول about 159 | 159 كثير many 160 | 160 ثم then 161 | 161 هم them 162 | 162 إرسال write 163 | 163 أراد would 164 | 164 مثل like 165 | 165 هكذا so 166 | 166 هؤلاء these 167 | 167 لها her 168 | 168 طويل long 169 | 169 جعل make 170 | 170 شيء thing 171 | 171 شاهد see 172 | 172 له him 173 | 173 اثنين two 174 | 174 لديه has 175 | 175 بحث look 176 | 176 أكثر more 177 | 177 يوم day 178 | 178 يمكن could 179 | 179 تذهب go 180 | 180 جاء come 181 | 181 لم did 182 | 182 عدد number 183 | 183 صوت sound 184 | 184 لا no 185 | 185 أكثر most 186 | 186 الناس people 187 | 187 لي my 188 | 188 على over 189 | 189 تعرف know 190 | 190 ماء water 191 | 191 من than 192 | 192 دعوة call 193 | 193 الأول first 194 | 194 الذي who 195 | 195 قد may 196 | 196 إلى down 197 | 197 الجانب side 198 | 198 كان been 199 | 199 الآن now 200 | 200 اكتشاف find 201 | 201 رئيس head 202 | 202 الوقوف stand 203 | 203 الخاصة own 204 | 204 الصفحة page 205 | 205 ينبغي should 206 | 206 بلد country 207 | 207 أسس found 208 | 208 الجواب answer 209 | 209 المدرسة school 210 | 210 تنمو grow 211 | 211 دراسة study 212 | 212 لا يزال still 213 | 213 تعلم learn 214 | 214 مصنع plant 215 | 215 غطاء cover 216 | 216 غذاء food 217 | 217 شمس sun 218 | 218 أربعة four 219 | 219 بين between 220 | 220 دولة state 221 | 221 احتفظ keep 222 | 222 العين eye 223 | 223 أبدا never 224 | 224 آخر last 225 | 225 سمح let 226 | 226 يعتقد thought 227 | 227 المدينة city 228 | 228 شجرة tree 229 | 229 عبور cross 230 | 230 مزرعة farm 231 | 231 شاق hard 232 | 232 بداية start 233 | 233 قد might 234 | 234 قصة story 235 | 235 منشار saw 236 | 236 الآن far 237 | 237 بحر sea 238 | 238 رسم draw 239 | 239 غادر left 240 | 240 متأخر late 241 | 241 تشغيل run 242 | 242 لا don’t 243 | 243 في حين while 244 | 244 الصحافة press 245 | 245 قريب close 246 | 246 الليل night 247 | 247 حقيقية real 248 | 248 حياة life 249 | 249 قليل few 250 | 250 شمال north 251 | 251 كتاب book 252 | 252 حمل carry 253 | 253 استغرق took 254 | 254 علم science 255 | 255 أكل eat 256 | 256 غرفة room 257 | 257 صديق friend 258 | 258 بدأ began 259 | 259 فكرة idea 260 | 260 سمك fish 261 | 261 الجبل mountain 262 | 262 توقف stop 263 | 263 مرة واحدة once 264 | 264 قاعدة base 265 | 265 سمع hear 266 | 266 الحصان horse 267 | 267 قطع cut 268 | 268 بالتأكيد sure 269 | 269 راقب watch 270 | 270 لون color 271 | 271 وجه face 272 | 272 الخشب wood 273 | 273 رئيسي main 274 | 274 مفتوحة open 275 | 275 يبدو seem 276 | 276 معا together 277 | 277 المقبل next 278 | 278 أبيض white 279 | 279 الأطفال children 280 | 280 بدأ begin 281 | 281 حصلت got 282 | 282 سير walk 283 | 283 مثال example 284 | 284 سهولة ease 285 | 285 ورق paper 286 | 286 مجموعة group 287 | 287 دائما always 288 | 288 الموسيقى music 289 | 289 تلك those 290 | 290 كلا both 291 | 291 علامة mark 292 | 292 غالبا often 293 | 293 الرسالة letter 294 | 294 حتى until 295 | 295 ميل mile 296 | 296 النهر river 297 | 297 سيارة car 298 | 298 قدم feet 299 | 299 الرعاية care 300 | 300 ثان second 301 | 301 بما فيه الكفاية enough 302 | 302 سهل plain 303 | 303 فتاة girl 304 | 304 معتاد usual 305 | 305 الشباب young 306 | 306 جاهزة ready 307 | 307 فوق above 308 | 308 أبدا ever 309 | 309 أحمر red 310 | 310 قائمة list 311 | 311 على الرغم من though 312 | 312 إحساس feel 313 | 313 الحديث talk 314 | 314 طائر bird 315 | 315 قريبا soon 316 | 316 هيئة body 317 | 317 الكلب dog 318 | 318 عائلة family 319 | 319 مباشر direct 320 | 320 أربك pose 321 | 321 ترك leave 322 | 322 أغنية song 323 | 323 قياس measure 324 | 324 باب door 325 | 325 نتاج product 326 | 326 أسود black 327 | 327 قصير short 328 | 328 الأرقام numeral 329 | 329 الفئة class 330 | 330 الرياح wind 331 | 331 السؤال question 332 | 332 يحدث happen 333 | 333 كاملة complete 334 | 334 سفينة ship 335 | 335 منطقة area 336 | 336 نصف half 337 | 337 صخرة rock 338 | 338 النظام order 339 | 339 النار fire 340 | 340 الجنوب south 341 | 341 مشكلة problem 342 | 342 قطعة piece 343 | 343 قال told 344 | 344 عرف knew 345 | 345 تمر pass 346 | 346 منذ since 347 | 347 أعلى top 348 | 348 ككل whole 349 | 349 ملك king 350 | 350 شارع street 351 | 351 بوصة inch 352 | 352 مضاعفة multiply 353 | 353 لا شيء nothing 354 | 354 بالطبع course 355 | 355 إقامة stay 356 | 356 عجلة wheel 357 | 357 كامل full 358 | 358 قوة force 359 | 359 الأزرق blue 360 | 360 موضوع object 361 | 361 تقرر decide 362 | 362 سطح surface 363 | 363 عميق deep 364 | 364 ضوء القمر moon 365 | 365 الجزيرة island 366 | 366 القدم foot 367 | 367 نظام system 368 | 368 مشغول busy 369 | 369 اختبار test 370 | 370 سجل record 371 | 371 قارب boat 372 | 372 مشترك common 373 | 373 الذهب gold 374 | 374 ممكن possible 375 | 375 طائرة plane 376 | 376 بدلا stead 377 | 377 جاف dry 378 | 378 عجب wonder 379 | 379 الضحك laugh 380 | 380 ألف thousand 381 | 381 منذ ago 382 | 382 ركض ran 383 | 383 تحقق check 384 | 384 لعبة game 385 | 385 شكل shape 386 | 386 مساواة equate 387 | 387 حار hot 388 | 388 ملكة جمال miss 389 | 389 جلبت brought 390 | 390 حرارة heat 391 | 391 ثلج snow 392 | 392 إطار العجلة tire 393 | 393 جلب bring 394 | 394 نعم yes 395 | 395 بعيد distant 396 | 396 شغل fill 397 | 397 الشرق east 398 | 398 الطلاء paint 399 | 399 اللغة language 400 | 400 بين among 401 | 401 وحدة unit 402 | 402 قوة power 403 | 403 مدينة town 404 | 404 غرامة fine 405 | 405 معين certain 406 | 406 يطير fly 407 | 407 سقط fall 408 | 408 قيادة lead 409 | 409 صرخة cry 410 | 410 مظلم dark 411 | 411 آلة machine 412 | 412 مذكرة note 413 | 413 انتظر wait 414 | 414 خطة plan 415 | 415 الرقم figure 416 | 416 نجمة star 417 | 417 صندوق box 418 | 418 إسم noun 419 | 419 الحقل field 420 | 420 بقية rest 421 | 421 صحيح correct 422 | 422 قادرة able 423 | 423 الجنيه pound 424 | 424 منجز done 425 | 425 الجمال beauty 426 | 426 محرك drive 427 | 427 وقفت stood 428 | 428 تحتوي على contain 429 | 429 الجبهة front 430 | 430 تعليم teach 431 | 431 أسبوع week 432 | 432 نهائي final 433 | 433 أعطى gave 434 | 434 الأخضر green 435 | 435 يا oh 436 | 436 سريع quick 437 | 437 تطوير develop 438 | 438 المحيط ocean 439 | 439 الحارة warm 440 | 440 حر free 441 | 441 دقيقة minute 442 | 442 قوي strong 443 | 443 خاص special 444 | 444 عقل mind 445 | 445 خلف behind 446 | 446 واضح clear 447 | 447 ذيل tail 448 | 448 إنتاج produce 449 | 449 حقيقة fact 450 | 450 الفضاء space 451 | 451 سمعت heard 452 | 452 أفضل best 453 | 453 ساعة hour 454 | 454 أفضل better 455 | 455 صحيح true 456 | 456 خلال during 457 | 457 مائة hundred 458 | 458 خمسة five 459 | 459 تذكر remember 460 | 460 خطوة step 461 | 461 في وقت مبكر early 462 | 462 عقد hold 463 | 463 غرب west 464 | 464 أرض ground 465 | 465 مصلحة interest 466 | 466 الوصول reach 467 | 467 بسرعة fast 468 | 468 الفعل verb 469 | 469 الغناء sing 470 | 470 استمع listen 471 | 471 ستة six 472 | 472 الجدول table 473 | 473 السفر travel 474 | 474 أقل less 475 | 475 الصباح morning 476 | 476 عشرة ten 477 | 477 بسيطة simple 478 | 478 عدة several 479 | 479 حرف لين vowel 480 | 480 نحو toward 481 | 481 حرب war 482 | 482 وضع lay 483 | 483 ضد against 484 | 484 نمط pattern 485 | 485 بطيئة slow 486 | 486 مركز center 487 | 487 الحب love 488 | 488 شخص person 489 | 489 المال money 490 | 490 خدمة serve 491 | 491 تظهر appear 492 | 492 طريق road 493 | 493 خريطة map 494 | 494 مطر rain 495 | 495 حكم rule 496 | 496 الحكم govern 497 | 497 سحب pull 498 | 498 بارد cold 499 | 499 إشعار notice 500 | 500 صوت voice 501 | 501 الطاقة energy 502 | 502 مطاردة hunt 503 | 503 محتمل probable 504 | 504 قاع bed 505 | 505 شقيق brother 506 | 506 البيض egg 507 | 507 ركوب ride 508 | 508 خلية cell 509 | 509 اعتقد believe 510 | 510 ربما perhaps 511 | 511 اختيار pick 512 | 512 مفاجئ sudden 513 | 513 عد count 514 | 514 مربع square 515 | 515 سبب reason 516 | 516 طول length 517 | 517 تمثيل represent 518 | 518 فن art 519 | 519 الموضوع subject 520 | 520 منطقة region 521 | 521 حجم size 522 | 522 تختلف vary 523 | 523 حل settle 524 | 524 الكلام speak 525 | 525 الوزن weight 526 | 526 عام general 527 | 527 الجليد ice 528 | 528 مسألة matter 529 | 529 دائرة circle 530 | 530 زوج pair 531 | 531 تضمن include 532 | 532 الفجوة divide 533 | 533 مقطع لفظي syllable 534 | 534 شعر felt 535 | 535 الكبرى grand 536 | 536 كرة ball 537 | 537 بعد yet 538 | 538 موجة wave 539 | 539 إسقاط drop 540 | 540 قلب heart 541 | 541 صباحا am 542 | 542 الحاضر present 543 | 543 ثقيلة heavy 544 | 544 رقص dance 545 | 545 محرك engine 546 | 546 موقف position 547 | 547 الذراع arm 548 | 548 واسع wide 549 | 549 الشراع sail 550 | 550 مادة material 551 | 551 جزء fraction 552 | 552 الغابات forest 553 | 553 الجلوس sit 554 | 554 سباق race 555 | 555 نافذة window 556 | 556 متجر store 557 | 557 الصيف summer 558 | 558 قطار train 559 | 559 نوم sleep 560 | 560 إثبات prove 561 | 561 وحيد lone 562 | 562 الساق leg 563 | 563 ممارسة exercise 564 | 564 جدار wall 565 | 565 مزلاج catch 566 | 566 جبل mount 567 | 567 رغب wish 568 | 568 سماء sky 569 | 569 مجلس الإدارة board 570 | 570 الفرح joy 571 | 571 شتاء winter 572 | 572 السبت sat 573 | 573 مكتوبة written 574 | 574 بري wild 575 | 575 أداة instrument 576 | 576 أبقى kept 577 | 577 زجاج glass 578 | 578 العشب grass 579 | 579 بقرة cow 580 | 580 العمل job 581 | 581 حافة edge 582 | 582 علامة sign 583 | 583 زيارة visit 584 | 584 الماضي past 585 | 585 لينة soft 586 | 586 مرح fun 587 | 587 مشرق bright 588 | 588 الغاز gas 589 | 589 الطقس weather 590 | 590 شهر month 591 | 591 مليون million 592 | 592 تحمل bear 593 | 593 نهاية finish 594 | 594 سعيدة happy 595 | 595 نأمل hope 596 | 596 زهرة flower 597 | 597 كسا clothe 598 | 598 غريب strange 599 | 599 ذهب gone 600 | 600 تجارة trade 601 | 601 لحن melody 602 | 602 رحلة trip 603 | 603 مكتب office 604 | 604 تلقي receive 605 | 605 الصف row 606 | 606 فم mouth 607 | 607 بالضبط exact 608 | 608 رمز symbol 609 | 609 مات die 610 | 610 الأقل least 611 | 611 مشكلة trouble 612 | 612 صيحة shout 613 | 613 إلا except 614 | 614 كتب wrote 615 | 615 بذرة seed 616 | 616 نغمة tone 617 | 617 انضمام join 618 | 618 اقترح suggest 619 | 619 نظيف clean 620 | 620 استراحة break 621 | 621 سيدة lady 622 | 622 ساحة yard 623 | 623 الارتفاع rise 624 | 624 سيئة bad 625 | 625 ضربة blow 626 | 626 زيت oil 627 | 627 الدم blood 628 | 628 لمس touch 629 | 629 نما grew 630 | 630 المائة cent 631 | 631 مزيج mix 632 | 632 فريق team 633 | 633 سلك wire 634 | 634 التكلفة cost 635 | 635 فقدت lost 636 | 636 أسمر brown 637 | 637 ارتداء wear 638 | 638 حديقة garden 639 | 639 متساو equal 640 | 640 أرسلت sent 641 | 641 اختيار choose 642 | 642 سقط fell 643 | 643 تناسب fit 644 | 644 تدفق flow 645 | 645 عادل fair 646 | 646 البنك bank 647 | 647 جمع collect 648 | 648 حفظ save 649 | 649 السيطرة control 650 | 650 العشري decimal 651 | 651 إذن ear 652 | 652 آخر else 653 | 653 تماما quite 654 | 654 حطم broke 655 | 655 حالة case 656 | 656 وسط middle 657 | 657 قتل kill 658 | 658 ابن son 659 | 659 بحيرة lake 660 | 660 لحظة moment 661 | 661 على نطاق و scale 662 | 662 بصوت عال loud 663 | 663 الربيع spring 664 | 664 رصد observe 665 | 665 طفل child 666 | 666 مباشرة straight 667 | 667 متوافق consonant 668 | 668 الأمة nation 669 | 669 قاموس dictionary 670 | 670 حليب milk 671 | 671 السرعة speed 672 | 672 طريقة method 673 | 673 عضو organ 674 | 674 دفع pay 675 | 675 العمر age 676 | 676 القسم section 677 | 677 فستان dress 678 | 678 سحابة cloud 679 | 679 مفاجأة surprise 680 | 680 هادئ quiet 681 | 681 حجر stone 682 | 682 صغير tiny 683 | 683 تسلق climb 684 | 684 بارد cool 685 | 685 تصميم design 686 | 686 فقير poor 687 | 687 الكثير lot 688 | 688 تجربة experiment 689 | 689 أسفل bottom 690 | 690 مفتاح key 691 | 691 حديد iron 692 | 692 احد single 693 | 693 عصا stick 694 | 694 شقة flat 695 | 695 عشرون twenty 696 | 696 الجلد skin 697 | 697 الابتسامة smile 698 | 698 تجعد crease 699 | 699 حفرة hole 700 | 700 القفزة jump 701 | 701 طفل baby 702 | 702 ثمانية eight 703 | 703 قرية village 704 | 704 تلبية meet 705 | 705 الجذر root 706 | 706 شراء buy 707 | 707 رفع raise 708 | 708 حل solve 709 | 709 المعادن metal 710 | 710 سواء whether 711 | 711 دفع push 712 | 712 سبعة seven 713 | 713 فقرة paragraph 714 | 714 الثالث third 715 | 715 يجب shall 716 | 716 عقدت held 717 | 717 الشعر hair 718 | 718 وصف describe 719 | 719 طبخ cook 720 | 720 الطابق floor 721 | 721 كل either 722 | 722 نتيجة result 723 | 723 حرق burn 724 | 724 التل hill 725 | 725 آمنة safe 726 | 726 القط cat 727 | 727 القرن century 728 | 728 النظر consider 729 | 729 نوع type 730 | 730 القانون law 731 | 731 لقمة bit 732 | 732 الساحل coast 733 | 733 نسخة copy 734 | 734 العبارة phrase 735 | 735 الصمت silent 736 | 736 طويل tall 737 | 737 رمل sand 738 | 738 التربة soil 739 | 739 لفة roll 740 | 740 درجة الحرارة temperature 741 | 741 إصبع finger 742 | 742 صناعة industry 743 | 743 قيمة value 744 | 744 المعركة fight 745 | 745 كذبة lie 746 | 746 فوز beat 747 | 747 أثار excite 748 | 748 طبيعي natural 749 | 749 رأي view 750 | 750 المعنى sense 751 | 751 العاصمة capital 752 | 752 سوف لا won’t 753 | 753 كرسي chair 754 | 754 خطر danger 755 | 755 الفاكهة fruit 756 | 756 غني rich 757 | 757 سميك thick 758 | 758 جندي soldier 759 | 759 عملية process 760 | 760 العمل operate 761 | 761 ممارسة practice 762 | 762 منفصلة separate 763 | 763 الصعب difficult 764 | 764 الطبيب doctor 765 | 765 الرجاء please 766 | 766 حماية protect 767 | 767 ظهرا noon 768 | 768 محصول crop 769 | 769 الحديث modern 770 | 770 عنصر element 771 | 771 ضرب hit 772 | 772 طالب student 773 | 773 ركن corner 774 | 774 حزب party 775 | 775 تزويد supply 776 | 776 الذي whose 777 | 777 حدد locate 778 | 778 عصابة ring 779 | 779 حرف character 780 | 780 الحشرة insect 781 | 781 اشتعلت caught 782 | 782 فترة period 783 | 783 تشير indicate 784 | 784 راديو radio 785 | 785 وتحدث spoke 786 | 786 ذرة atom 787 | 787 بشري human 788 | 788 التاريخ history 789 | 789 تأثير effect 790 | 790 كهربائي electric 791 | 791 توقع expect 792 | 792 العظام bone 793 | 793 سكة حديدية rail 794 | 794 تخيل imagine 795 | 795 توفير provide 796 | 796 توافق agree 797 | 797 وبالتالي thus 798 | 798 لطيف gentle 799 | 799 امرأة woman 800 | 800 نقيب captain 801 | 801 تخمين guess 802 | 802 الضرورة necessary 803 | 803 حاد sharp 804 | 804 جناح wing 805 | 805 خلق create 806 | 806 الجيران neighbor 807 | 807 غسل wash 808 | 808 خفاش bat 809 | 809 بالأحرى rather 810 | 810 الحشد crowd 811 | 811 الذرة corn 812 | 812 مقارنة compare 813 | 813 قصيدة poem 814 | 814 سلسلة string 815 | 815 جرس bell 816 | 816 تعتمد depend 817 | 817 اللحوم meat 818 | 818 فرك rub 819 | 819 أنبوب tube 820 | 820 الشهيرة famous 821 | 921 الدولار dollar 822 | 822 تيار stream 823 | 823 خوف fear 824 | 284 مشهد sight 825 | 825 رقيق thin 826 | 826 مثلث triangle 827 | 827 كوكب planet 828 | 828 عجل hurry 829 | 829 رئيس chief 830 | 830 مستعمرة colony 831 | 831 على مدار الساعة clock 832 | 832 منجم mine 833 | 833 ربطة عنق tie 834 | 834 دخول enter 835 | 835 الرئيسية major 836 | 836 جديدة fresh 837 | 837 البحث search 838 | 838 إرسال send 839 | 839 الأصفر yellow 840 | 840 بندقية gun 841 | 841 السماح allow 842 | 842 طباعة print 843 | 843 ميت dead 844 | 844 بقعة spot 845 | 845 صحراء desert 846 | 846 دعوى suit 847 | 847 التيار current 848 | 848 رفع lift 849 | 840 ارتفع rose 850 | 850 وصول arrive 851 | 851 سيد master 852 | 852 المسار track 853 | 853 الأم parent 854 | 854 الشاطئ shore 855 | 855 تقسيم division 856 | 856 ورقة sheet 857 | 857 مادة substance 858 | 858 صالح favor 859 | 859 اتصال connect 860 | 860 آخر post 861 | 861 أنفق spend 862 | 862 وتر chord 863 | 863 دهن fat 864 | 864 سعيد glad 865 | 865 الأصلي original 866 | 866 حصة share 867 | 867 محطة station 868 | 868 أب dad 869 | 869 الخبز bread 870 | 870 تهمة charge 871 | 871 السليم proper 872 | 872 شريط bar 873 | 873 العرض offer 874 | 874 قطاع segment 875 | 875 الرقيق slave 876 | 876 بط duck 877 | 877 لحظة instant 878 | 878 سوق market 879 | 879 درجة degree 880 | 880 أهل populate 881 | 881 كتكوت chick 882 | 882 عزيزتي dear 883 | 883 العدو enemy 884 | 884 الرد reply 885 | 885 شراب drink 886 | 886 تحدث occur 887 | 887 دعم support 888 | 888 خطاب speech 889 | 889 الطبيعة nature 890 | 890 مجموعة range 891 | 891 البخار steam 892 | 892 الحركة motion 893 | 893 مسار path 894 | 894 السائل liquid 895 | 895 تسجيل log 896 | 896 مقصود meant 897 | 897 حاصل quotient 898 | 898 أسنان teeth 899 | 899 قذيفة shell 900 | 900 الرقبة neck 901 | 901 الأكسجين oxygen 902 | 902 السكر sugar 903 | 903 الموت death 904 | 904 جميل pretty 905 | 905 مهارة skill 906 | 906 النساء women 907 | 907 الموسم season 908 | 908 حل solution 909 | 909 مغناطيس magnet 910 | 910 فضة silver 911 | 911 شكرا thank 912 | 912 فرع branch 913 | 913 مباراة match 914 | 914 لاحقة suffix 915 | 915 خاصة especially 916 | 916 تين fig 917 | 917 خائف afraid 918 | 918 ضخم huge 919 | 919 شقيقة sister 920 | 920 الصلب steel 921 | 921 بحث discuss 922 | 922 إلى الأمام forward 923 | 923 مماثل similar 924 | 924 توجيه guide 925 | 925 التجربة experience 926 | 926 النتيجة score 927 | 927 تفاحة apple 928 | 928 اشترى bought 929 | 929 أدى led 930 | 930 رمية pitch 931 | 931 معطف coat 932 | 932 كتلة mass 933 | 933 بطاقة card 934 | 934 فرقة band 935 | 935 حبل rope 936 | 936 انزلاق slip 937 | 937 الفوز win 938 | 938 حلم dream 939 | 939 مساء evening 940 | 940 حالة condition 941 | 941 علف feed 942 | 942 أداة tool 943 | 943 الكل total 944 | 944 الأساسية basic 945 | 945 رائحة smell 946 | 946 الوادي valley 947 | 947 ولا nor 948 | 948 ضعف double 949 | 949 مقعد seat 950 | 950 تواصل continue 951 | 951 كتلة block 952 | 952 جدول chart 953 | 953 قبعة hat 954 | 954 بيع sell 955 | 955 نجاح success 956 | 956 شركة company 957 | 957 طرح subtract 958 | 958 حدث event 959 | 959 خاصة particular 960 | 960 صفقة deal 961 | 961 السباحة swim 962 | 962 مصطلح term 963 | 963 العكس opposite 964 | 964 زوجة wife 965 | 965 حذاء shoe 966 | 966 الكتف shoulder 967 | 967 انتشار spread 968 | 968 رتب arrange 969 | 969 مخيم camp 970 | 970 اختراع invent 971 | 971 قطن cotton 972 | 972 الولادة born 973 | 973 حدد determine 974 | 974 رابعا quart 975 | 975 تسعة nine 976 | 976 شاحنة truck 977 | 977 الضوضاء noise 978 | 978 مستوى level 979 | 979 فرصة chance 980 | 980 جمع gather 981 | 981 متجر shop 982 | 982 امتداد stretch 983 | 983 رمي throw 984 | 984 تألق shine 985 | 985 الملكية property 986 | 986 عمود column 987 | 987 جزيء molecule 988 | 988 اختر select 989 | 989 خاطئ wrong 990 | 990 رمادي gray 991 | 991 كرر repeat 992 | 992 تتطلب require 993 | 993 واسع broad 994 | 994 إعداد prepare 995 | 995 ملح salt 996 | 996 الأنف nose 997 | 997 الجمع plural 998 | 998 غضب anger 999 | 999 مطالبة claim -------------------------------------------------------------------------------- /language/en.tsv: -------------------------------------------------------------------------------- 1 | 1 as 2 | 2 I 3 | 3 his 4 | 4 that 5 | 5 he 6 | 6 was 7 | 7 for 8 | 8 on 9 | 9 are 10 | 10 with 11 | 11 they 12 | 12 be 13 | 13 at 14 | 14 one 15 | 15 have 16 | 16 this 17 | 17 from 18 | 18 by 19 | 19 hot 20 | 20 word 21 | 21 but 22 | 22 what 23 | 23 some 24 | 24 is 25 | 25 it 26 | 26 you 27 | 27 or 28 | 28 had 29 | 29 the 30 | 30 of 31 | 31 to 32 | 32 and 33 | 33 a 34 | 34 in 35 | 35 we 36 | 36 can 37 | 37 out 38 | 38 other 39 | 39 were 40 | 40 which 41 | 41 do 42 | 42 their 43 | 43 time 44 | 44 if 45 | 45 will 46 | 46 how 47 | 47 said 48 | 48 an 49 | 49 each 50 | 50 tell 51 | 51 does 52 | 52 set 53 | 53 three 54 | 54 want 55 | 55 air 56 | 56 well 57 | 57 also 58 | 58 play 59 | 59 small 60 | 60 end 61 | 61 put 62 | 62 home 63 | 63 read 64 | 64 hand 65 | 65 port 66 | 66 large 67 | 67 spell 68 | 68 add 69 | 69 even 70 | 70 land 71 | 71 here 72 | 72 must 73 | 73 big 74 | 74 high 75 | 75 such 76 | 76 follow 77 | 77 act 78 | 78 why 79 | 79 ask 80 | 80 men 81 | 81 change 82 | 82 went 83 | 83 light 84 | 84 kind 85 | 85 off 86 | 86 need 87 | 87 house 88 | 88 picture 89 | 89 try 90 | 90 us 91 | 91 again 92 | 92 animal 93 | 93 point 94 | 94 mother 95 | 95 world 96 | 96 near 97 | 97 build 98 | 98 self 99 | 99 earth 100 | 100 father 101 | 101 any 102 | 102 new 103 | 103 work 104 | 104 part 105 | 105 take 106 | 106 get 107 | 107 place 108 | 108 made 109 | 109 live 110 | 110 where 111 | 111 after 112 | 112 back 113 | 113 little 114 | 114 only 115 | 115 round 116 | 116 man 117 | 117 year 118 | 118 came 119 | 119 show 120 | 120 every 121 | 121 good 122 | 122 me 123 | 123 give 124 | 124 our 125 | 125 under 126 | 126 name 127 | 127 very 128 | 128 through 129 | 129 just 130 | 130 form 131 | 131 sentence 132 | 132 great 133 | 133 think 134 | 134 say 135 | 135 help 136 | 136 low 137 | 137 line 138 | 138 differ 139 | 139 turn 140 | 140 cause 141 | 141 much 142 | 142 mean 143 | 143 before 144 | 144 move 145 | 145 right 146 | 146 boy 147 | 147 old 148 | 148 too 149 | 149 same 150 | 150 she 151 | 151 all 152 | 152 there 153 | 153 when 154 | 154 up 155 | 155 use 156 | 156 your 157 | 157 way 158 | 158 about 159 | 159 many 160 | 160 then 161 | 161 them 162 | 162 write 163 | 163 would 164 | 164 like 165 | 165 so 166 | 166 these 167 | 167 her 168 | 168 long 169 | 169 make 170 | 170 thing 171 | 171 see 172 | 172 him 173 | 173 two 174 | 174 has 175 | 175 look 176 | 176 more 177 | 177 day 178 | 178 could 179 | 179 go 180 | 180 come 181 | 181 did 182 | 182 number 183 | 183 sound 184 | 184 no 185 | 185 most 186 | 186 people 187 | 187 my 188 | 188 over 189 | 189 know 190 | 190 water 191 | 191 than 192 | 192 call 193 | 193 first 194 | 194 who 195 | 195 may 196 | 196 down 197 | 197 side 198 | 198 been 199 | 199 now 200 | 200 find 201 | 201 head 202 | 202 stand 203 | 203 own 204 | 204 page 205 | 205 should 206 | 206 country 207 | 207 found 208 | 208 answer 209 | 209 school 210 | 210 grow 211 | 211 study 212 | 212 still 213 | 213 learn 214 | 214 plant 215 | 215 cover 216 | 216 food 217 | 217 sun 218 | 218 four 219 | 219 between 220 | 220 state 221 | 221 keep 222 | 222 eye 223 | 223 never 224 | 224 last 225 | 225 let 226 | 226 thought 227 | 227 city 228 | 228 tree 229 | 229 cross 230 | 230 farm 231 | 231 hard 232 | 232 start 233 | 233 might 234 | 234 story 235 | 235 saw 236 | 236 far 237 | 237 sea 238 | 238 draw 239 | 239 left 240 | 240 late 241 | 241 run 242 | 242 don’t 243 | 243 while 244 | 244 press 245 | 245 close 246 | 246 night 247 | 247 real 248 | 248 life 249 | 249 few 250 | 250 north 251 | 251 book 252 | 252 carry 253 | 253 took 254 | 254 science 255 | 255 eat 256 | 256 room 257 | 257 friend 258 | 258 began 259 | 259 idea 260 | 260 fish 261 | 261 mountain 262 | 262 stop 263 | 263 once 264 | 264 base 265 | 265 hear 266 | 266 horse 267 | 267 cut 268 | 268 sure 269 | 269 watch 270 | 270 color 271 | 271 face 272 | 272 wood 273 | 273 main 274 | 274 open 275 | 275 seem 276 | 276 together 277 | 277 next 278 | 278 white 279 | 279 children 280 | 280 begin 281 | 281 got 282 | 282 walk 283 | 283 example 284 | 284 ease 285 | 285 paper 286 | 286 group 287 | 287 always 288 | 288 music 289 | 289 those 290 | 290 both 291 | 291 mark 292 | 292 often 293 | 293 letter 294 | 294 until 295 | 295 mile 296 | 296 river 297 | 297 car 298 | 298 feet 299 | 299 care 300 | 300 second 301 | 301 enough 302 | 302 plain 303 | 303 girl 304 | 304 usual 305 | 305 young 306 | 306 ready 307 | 307 above 308 | 308 ever 309 | 309 red 310 | 310 list 311 | 311 though 312 | 312 feel 313 | 313 talk 314 | 314 bird 315 | 315 soon 316 | 316 body 317 | 317 dog 318 | 318 family 319 | 319 direct 320 | 320 pose 321 | 321 leave 322 | 322 song 323 | 323 measure 324 | 324 door 325 | 325 product 326 | 326 black 327 | 327 short 328 | 328 numeral 329 | 329 class 330 | 330 wind 331 | 331 question 332 | 332 happen 333 | 333 complete 334 | 334 ship 335 | 335 area 336 | 336 half 337 | 337 rock 338 | 338 order 339 | 339 fire 340 | 340 south 341 | 341 problem 342 | 342 piece 343 | 343 told 344 | 344 knew 345 | 345 pass 346 | 346 since 347 | 347 top 348 | 348 whole 349 | 349 king 350 | 350 street 351 | 351 inch 352 | 352 multiply 353 | 353 nothing 354 | 354 course 355 | 355 stay 356 | 356 wheel 357 | 357 full 358 | 358 force 359 | 359 blue 360 | 360 object 361 | 361 decide 362 | 362 surface 363 | 363 deep 364 | 364 moon 365 | 365 island 366 | 366 foot 367 | 367 system 368 | 368 busy 369 | 369 test 370 | 370 record 371 | 371 boat 372 | 372 common 373 | 373 gold 374 | 374 possible 375 | 375 plane 376 | 376 stead 377 | 377 dry 378 | 378 wonder 379 | 379 laugh 380 | 380 thousand 381 | 381 ago 382 | 382 ran 383 | 383 check 384 | 384 game 385 | 385 shape 386 | 386 equate 387 | 387 hot 388 | 388 miss 389 | 389 brought 390 | 390 heat 391 | 391 snow 392 | 392 tire 393 | 393 bring 394 | 394 yes 395 | 395 distant 396 | 396 fill 397 | 397 east 398 | 398 paint 399 | 399 language 400 | 400 among 401 | 401 unit 402 | 402 power 403 | 403 town 404 | 404 fine 405 | 405 certain 406 | 406 fly 407 | 407 fall 408 | 408 lead 409 | 409 cry 410 | 410 dark 411 | 411 machine 412 | 412 note 413 | 413 wait 414 | 414 plan 415 | 415 figure 416 | 416 star 417 | 417 box 418 | 418 noun 419 | 419 field 420 | 420 rest 421 | 421 correct 422 | 422 able 423 | 423 pound 424 | 424 done 425 | 425 beauty 426 | 426 drive 427 | 427 stood 428 | 428 contain 429 | 429 front 430 | 430 teach 431 | 431 week 432 | 432 final 433 | 433 gave 434 | 434 green 435 | 435 oh 436 | 436 quick 437 | 437 develop 438 | 438 ocean 439 | 439 warm 440 | 440 free 441 | 441 minute 442 | 442 strong 443 | 443 special 444 | 444 mind 445 | 445 behind 446 | 446 clear 447 | 447 tail 448 | 448 produce 449 | 449 fact 450 | 450 space 451 | 451 heard 452 | 452 best 453 | 453 hour 454 | 454 better 455 | 455 true 456 | 456 during 457 | 457 hundred 458 | 458 five 459 | 459 remember 460 | 460 step 461 | 461 early 462 | 462 hold 463 | 463 west 464 | 464 ground 465 | 465 interest 466 | 466 reach 467 | 467 fast 468 | 468 verb 469 | 469 sing 470 | 470 listen 471 | 471 six 472 | 472 table 473 | 473 travel 474 | 474 less 475 | 475 morning 476 | 476 ten 477 | 477 simple 478 | 478 several 479 | 479 vowel 480 | 480 toward 481 | 481 war 482 | 482 lay 483 | 483 against 484 | 484 pattern 485 | 485 slow 486 | 486 center 487 | 487 love 488 | 488 person 489 | 489 money 490 | 490 serve 491 | 491 appear 492 | 492 road 493 | 493 map 494 | 494 rain 495 | 495 rule 496 | 496 govern 497 | 497 pull 498 | 498 cold 499 | 499 notice 500 | 500 voice 501 | 501 energy 502 | 502 hunt 503 | 503 probable 504 | 504 bed 505 | 505 brother 506 | 506 egg 507 | 507 ride 508 | 508 cell 509 | 509 believe 510 | 510 perhaps 511 | 511 pick 512 | 512 sudden 513 | 513 count 514 | 514 square 515 | 515 reason 516 | 516 length 517 | 517 represent 518 | 518 art 519 | 519 subject 520 | 520 region 521 | 521 size 522 | 522 vary 523 | 523 settle 524 | 524 speak 525 | 525 weight 526 | 526 general 527 | 527 ice 528 | 528 matter 529 | 529 circle 530 | 530 pair 531 | 531 include 532 | 532 divide 533 | 533 syllable 534 | 534 felt 535 | 535 grand 536 | 536 ball 537 | 537 yet 538 | 538 wave 539 | 539 drop 540 | 540 heart 541 | 541 am 542 | 542 present 543 | 543 heavy 544 | 544 dance 545 | 545 engine 546 | 546 position 547 | 547 arm 548 | 548 wide 549 | 549 sail 550 | 550 material 551 | 551 fraction 552 | 552 forest 553 | 553 sit 554 | 554 race 555 | 555 window 556 | 556 store 557 | 557 summer 558 | 558 train 559 | 559 sleep 560 | 560 prove 561 | 561 lone 562 | 562 leg 563 | 563 exercise 564 | 564 wall 565 | 565 catch 566 | 566 mount 567 | 567 wish 568 | 568 sky 569 | 569 board 570 | 570 joy 571 | 571 winter 572 | 572 sat 573 | 573 written 574 | 574 wild 575 | 575 instrument 576 | 576 kept 577 | 577 glass 578 | 578 grass 579 | 579 cow 580 | 580 job 581 | 581 edge 582 | 582 sign 583 | 583 visit 584 | 584 past 585 | 585 soft 586 | 586 fun 587 | 587 bright 588 | 588 gas 589 | 589 weather 590 | 590 month 591 | 591 million 592 | 592 bear 593 | 593 finish 594 | 594 happy 595 | 595 hope 596 | 596 flower 597 | 597 clothe 598 | 598 strange 599 | 599 gone 600 | 600 trade 601 | 601 melody 602 | 602 trip 603 | 603 office 604 | 604 receive 605 | 605 row 606 | 606 mouth 607 | 607 exact 608 | 608 symbol 609 | 609 die 610 | 610 least 611 | 611 trouble 612 | 612 shout 613 | 613 except 614 | 614 wrote 615 | 615 seed 616 | 616 tone 617 | 617 join 618 | 618 suggest 619 | 619 clean 620 | 620 break 621 | 621 lady 622 | 622 yard 623 | 623 rise 624 | 624 bad 625 | 625 blow 626 | 626 oil 627 | 627 blood 628 | 628 touch 629 | 629 grew 630 | 630 cent 631 | 631 mix 632 | 632 team 633 | 633 wire 634 | 634 cost 635 | 635 lost 636 | 636 brown 637 | 637 wear 638 | 638 garden 639 | 639 equal 640 | 640 sent 641 | 641 choose 642 | 642 fell 643 | 643 fit 644 | 644 flow 645 | 645 fair 646 | 646 bank 647 | 647 collect 648 | 648 save 649 | 649 control 650 | 650 decimal 651 | 651 ear 652 | 652 else 653 | 653 quite 654 | 654 broke 655 | 655 case 656 | 656 middle 657 | 657 kill 658 | 658 son 659 | 659 lake 660 | 660 moment 661 | 661 scale 662 | 662 loud 663 | 663 spring 664 | 664 observe 665 | 665 child 666 | 666 straight 667 | 667 consonant 668 | 668 nation 669 | 669 dictionary 670 | 670 milk 671 | 671 speed 672 | 672 method 673 | 673 organ 674 | 674 pay 675 | 675 age 676 | 676 section 677 | 677 dress 678 | 678 cloud 679 | 679 surprise 680 | 680 quiet 681 | 681 stone 682 | 682 tiny 683 | 683 climb 684 | 684 cool 685 | 685 design 686 | 686 poor 687 | 687 lot 688 | 688 experiment 689 | 689 bottom 690 | 690 key 691 | 691 iron 692 | 692 single 693 | 693 stick 694 | 694 flat 695 | 695 twenty 696 | 696 skin 697 | 697 smile 698 | 698 crease 699 | 699 hole 700 | 700 jump 701 | 701 baby 702 | 702 eight 703 | 703 village 704 | 704 meet 705 | 705 root 706 | 706 buy 707 | 707 raise 708 | 708 solve 709 | 709 metal 710 | 710 whether 711 | 711 push 712 | 712 seven 713 | 713 paragraph 714 | 714 third 715 | 715 shall 716 | 716 held 717 | 717 hair 718 | 718 describe 719 | 719 cook 720 | 720 floor 721 | 721 either 722 | 722 result 723 | 723 burn 724 | 724 hill 725 | 725 safe 726 | 726 cat 727 | 727 century 728 | 728 consider 729 | 729 type 730 | 730 law 731 | 731 bit 732 | 732 coast 733 | 733 copy 734 | 734 phrase 735 | 735 silent 736 | 736 tall 737 | 737 sand 738 | 738 soil 739 | 739 roll 740 | 740 temperature 741 | 741 finger 742 | 742 industry 743 | 743 value 744 | 744 fight 745 | 745 lie 746 | 746 beat 747 | 747 excite 748 | 748 natural 749 | 749 view 750 | 750 sense 751 | 751 capital 752 | 752 won’t 753 | 753 chair 754 | 754 danger 755 | 755 fruit 756 | 756 rich 757 | 757 thick 758 | 758 soldier 759 | 759 process 760 | 760 operate 761 | 761 practice 762 | 762 separate 763 | 763 difficult 764 | 764 doctor 765 | 765 please 766 | 766 protect 767 | 767 noon 768 | 768 crop 769 | 769 modern 770 | 770 element 771 | 771 hit 772 | 772 student 773 | 773 corner 774 | 774 party 775 | 775 supply 776 | 776 whose 777 | 777 locate 778 | 778 ring 779 | 779 character 780 | 780 insect 781 | 781 caught 782 | 782 period 783 | 783 indicate 784 | 784 radio 785 | 785 spoke 786 | 786 atom 787 | 787 human 788 | 788 history 789 | 789 effect 790 | 790 electric 791 | 791 expect 792 | 792 bone 793 | 793 rail 794 | 794 imagine 795 | 795 provide 796 | 796 agree 797 | 797 thus 798 | 798 gentle 799 | 799 woman 800 | 800 captain 801 | 801 guess 802 | 802 necessary 803 | 803 sharp 804 | 804 wing 805 | 805 create 806 | 806 neighbor 807 | 807 wash 808 | 808 bat 809 | 809 rather 810 | 810 crowd 811 | 811 corn 812 | 812 compare 813 | 813 poem 814 | 814 string 815 | 815 bell 816 | 816 depend 817 | 817 meat 818 | 818 rub 819 | 819 tube 820 | 820 famous 821 | 921 dollar 822 | 822 stream 823 | 823 fear 824 | 284 sight 825 | 825 thin 826 | 826 triangle 827 | 827 planet 828 | 828 hurry 829 | 829 chief 830 | 830 colony 831 | 831 clock 832 | 832 mine 833 | 833 tie 834 | 834 enter 835 | 835 major 836 | 836 fresh 837 | 837 search 838 | 838 send 839 | 839 yellow 840 | 840 gun 841 | 841 allow 842 | 842 print 843 | 843 dead 844 | 844 spot 845 | 845 desert 846 | 846 suit 847 | 847 current 848 | 848 lift 849 | 840 rose 850 | 850 arrive 851 | 851 master 852 | 852 track 853 | 853 parent 854 | 854 shore 855 | 855 division 856 | 856 sheet 857 | 857 substance 858 | 858 favor 859 | 859 connect 860 | 860 post 861 | 861 spend 862 | 862 chord 863 | 863 fat 864 | 864 glad 865 | 865 original 866 | 866 share 867 | 867 station 868 | 868 dad 869 | 869 bread 870 | 870 charge 871 | 871 proper 872 | 872 bar 873 | 873 offer 874 | 874 segment 875 | 875 slave 876 | 876 duck 877 | 877 instant 878 | 878 market 879 | 879 degree 880 | 880 populate 881 | 881 chick 882 | 882 dear 883 | 883 enemy 884 | 884 reply 885 | 885 drink 886 | 886 occur 887 | 887 support 888 | 888 speech 889 | 889 nature 890 | 890 range 891 | 891 steam 892 | 892 motion 893 | 893 path 894 | 894 liquid 895 | 895 log 896 | 896 meant 897 | 897 quotient 898 | 898 teeth 899 | 899 shell 900 | 900 neck 901 | 901 oxygen 902 | 902 sugar 903 | 903 death 904 | 904 pretty 905 | 905 skill 906 | 906 women 907 | 907 season 908 | 908 solution 909 | 909 magnet 910 | 910 silver 911 | 911 thank 912 | 912 branch 913 | 913 match 914 | 914 suffix 915 | 915 especially 916 | 916 fig 917 | 917 afraid 918 | 918 huge 919 | 919 sister 920 | 920 steel 921 | 921 discuss 922 | 922 forward 923 | 923 similar 924 | 924 guide 925 | 925 experience 926 | 926 score 927 | 927 apple 928 | 928 bought 929 | 929 led 930 | 930 pitch 931 | 931 coat 932 | 932 mass 933 | 933 card 934 | 934 band 935 | 935 rope 936 | 936 slip 937 | 937 win 938 | 938 dream 939 | 939 evening 940 | 940 condition 941 | 941 feed 942 | 942 tool 943 | 943 total 944 | 944 basic 945 | 945 smell 946 | 946 valley 947 | 947 nor 948 | 948 double 949 | 949 seat 950 | 950 continue 951 | 951 block 952 | 952 chart 953 | 953 hat 954 | 954 sell 955 | 955 success 956 | 956 company 957 | 957 subtract 958 | 958 event 959 | 959 particular 960 | 960 deal 961 | 961 swim 962 | 962 term 963 | 963 opposite 964 | 964 wife 965 | 965 shoe 966 | 966 shoulder 967 | 967 spread 968 | 968 arrange 969 | 969 camp 970 | 970 invent 971 | 971 cotton 972 | 972 born 973 | 973 determine 974 | 974 quart 975 | 975 nine 976 | 976 truck 977 | 977 noise 978 | 978 level 979 | 979 chance 980 | 980 gather 981 | 981 shop 982 | 982 stretch 983 | 983 throw 984 | 984 shine 985 | 985 property 986 | 986 column 987 | 987 molecule 988 | 988 select 989 | 989 wrong 990 | 990 gray 991 | 991 repeat 992 | 992 require 993 | 993 broad 994 | 994 prepare 995 | 995 salt 996 | 996 nose 997 | 997 plural 998 | 998 anger 999 | 999 claim 1000 | 1000 continent -------------------------------------------------------------------------------- /language/eo.tsv: -------------------------------------------------------------------------------- 1 | 1 kiel as 2 | 2 Mi I 3 | 3 sian his 4 | 4 ke that 5 | 5 li he 6 | 6 estis was 7 | 7 por for 8 | 8 sur on 9 | 9 estas are 10 | 10 kun with 11 | 11 ili they 12 | 12 esti be 13 | 13 ĉe at 14 | 14 unu one 15 | 15 havos have 16 | 16 tiu this 17 | 17 el from 18 | 18 per by 19 | 19 varmega hot 20 | 20 vorto word 21 | 21 sed but 22 | 22 kio what 23 | 23 iuj some 24 | 24 estas is 25 | 25 ĝi it 26 | 26 vi you 27 | 27 aŭ or 28 | 28 havis had 29 | 29 la the 30 | 30 de of 31 | 31 por to 32 | 32 kaj and 33 | 33 a a 34 | 34 en in 35 | 35 ni we 36 | 36 povas can 37 | 37 el out 38 | 38 aliaj other 39 | 39 estis were 40 | 40 kio which 41 | 41 fari do 42 | 42 iliaj their 43 | 43 tempo time 44 | 44 se if 45 | 45 volo will 46 | 46 kiom how 47 | 47 diris said 48 | 48 kiel an 49 | 49 ĉiu each 50 | 50 diru tell 51 | 51 faras does 52 | 52 aro set 53 | 53 tri three 54 | 54 volas want 55 | 55 aero air 56 | 56 bone well 57 | 57 ankaŭ also 58 | 58 ludos play 59 | 59 malgranda small 60 | 60 fino end 61 | 61 metita put 62 | 62 hejmo home 63 | 63 legi read 64 | 64 manoj hand 65 | 65 port port 66 | 66 granda large 67 | 67 literumi spell 68 | 68 aldoni add 69 | 69 eĉ even 70 | 70 lando land 71 | 71 tien here 72 | 72 devas must 73 | 73 granda big 74 | 74 alta high 75 | 75 tiaj such 76 | 76 sekvi follow 77 | 77 akto act 78 | 78 kial why 79 | 79 demandu ask 80 | 80 viroj men 81 | 81 ŝanĝo change 82 | 82 eniris went 83 | 83 malpeza light 84 | 84 afabla kind 85 | 85 ekstere off 86 | 86 bezonas need 87 | 87 domo house 88 | 88 foton picture 89 | 89 provu try 90 | 90 nin us 91 | 91 denove again 92 | 92 besto animal 93 | 93 punkto point 94 | 94 patrino mother 95 | 95 mondo world 96 | 96 proksime near 97 | 97 konstrui build 98 | 98 aŭto self 99 | 99 tero earth 100 | 100 patro father 101 | 101 ajna any 102 | 102 nova new 103 | 103 laboro work 104 | 104 parto part 105 | 105 preni take 106 | 106 akiri get 107 | 107 loko place 108 | 108 faris made 109 | 109 vivi live 110 | 110 kie where 111 | 111 post after 112 | 112 reen back 113 | 113 iom little 114 | 114 nur only 115 | 115 ronda round 116 | 116 viro man 117 | 117 jaro year 118 | 118 venis came 119 | 119 spektaklo show 120 | 120 ĉiu every 121 | 121 bonaj good 122 | 122 Min me 123 | 123 doni give 124 | 124 nian our 125 | 125 sub under 126 | 126 nomo name 127 | 127 tre very 128 | 128 per through 129 | 129 simple just 130 | 130 formo form 131 | 131 kondamno sentence 132 | 132 granda great 133 | 133 pensas think 134 | 134 diras say 135 | 135 helpi help 136 | 136 malalta low 137 | 137 linio line 138 | 138 diferenciĝas differ 139 | 139 Siavice turn 140 | 140 kaŭzo cause 141 | 141 multa much 142 | 142 signifus mean 143 | 143 antaŭ before 144 | 144 movado move 145 | 145 dekstra right 146 | 146 knabo boy 147 | 147 malnova old 148 | 148 tro too 149 | 149 sama same 150 | 150 ŝi she 151 | 151 ĉiuj all 152 | 152 tie there 153 | 153 kiam when 154 | 154 supren up 155 | 155 uzo use 156 | 156 via your 157 | 157 vojo way 158 | 158 proksimume about 159 | 159 multaj many 160 | 160 tiam then 161 | 161 ilin them 162 | 162 skribi write 163 | 163 farus would 164 | 164 kiel like 165 | 165 tiel so 166 | 166 tiuj these 167 | 167 ŝia her 168 | 168 longajn long 169 | 169 fari make 170 | 170 afero thing 171 | 171 vidi see 172 | 172 li him 173 | 173 du two 174 | 174 havas has 175 | 175 rigardi look 176 | 176 pli more 177 | 177 tagon day 178 | 178 povis could 179 | 179 iri go 180 | 180 venu come 181 | 181 faris did 182 | 182 nombro number 183 | 183 soni sound 184 | 184 neniu no 185 | 185 plej most 186 | 186 homoj people 187 | 187 Mian my 188 | 188 super over 189 | 189 scias know 190 | 190 akvo water 191 | 191 ol than 192 | 192 alvoko call 193 | 193 unua first 194 | 194 kiuj who 195 | 195 may may 196 | 196 malsupren down 197 | 197 flanko side 198 | 198 estintaj been 199 | 199 nun now 200 | 200 trovi find 201 | 201 kapo head 202 | 202 staras stand 203 | 203 propra own 204 | 204 paĝo page 205 | 205 devus should 206 | 206 lando country 207 | 207 trovita found 208 | 208 respondo answer 209 | 209 lernejo school 210 | 210 kreski grow 211 | 211 studo study 212 | 212 ankoraŭ still 213 | 213 lerni learn 214 | 214 planto plant 215 | 215 kovrilo cover 216 | 216 nutraĵo food 217 | 217 suno sun 218 | 218 kvar four 219 | 219 inter between 220 | 220 ŝtata state 221 | 221 konservi keep 222 | 222 okulo eye 223 | 223 neniam never 224 | 224 lasta last 225 | 225 lasu let 226 | 226 pensis thought 227 | 227 urbo city 228 | 228 arbo tree 229 | 229 transiri cross 230 | 230 farm farm 231 | 231 malfacila hard 232 | 232 komenco start 233 | 233 potenco might 234 | 234 rakonto story 235 | 235 segilo saw 236 | 236 malproksime far 237 | 237 maro sea 238 | 238 desegni draw 239 | 239 maldekstra left 240 | 240 malfrue late 241 | 241 run run 242 | 242 ne don’t 243 | 243 dum while 244 | 244 gazetaro press 245 | 245 Fermi close 246 | 246 nokto night 247 | 247 reala real 248 | 248 vivo life 249 | 249 malmultaj few 250 | 250 Norde north 251 | 251 libro book 252 | 252 porti carry 253 | 253 prenis took 254 | 254 scienco science 255 | 255 manĝi eat 256 | 256 ĉambro room 257 | 257 amiko friend 258 | 258 komencis began 259 | 259 ideon idea 260 | 260 fiŝo fish 261 | 261 monto mountain 262 | 262 ĉesi stop 263 | 263 unufoje once 264 | 264 bazo base 265 | 265 aŭdi hear 266 | 266 ĉevalo horse 267 | 267 tranĉo cut 268 | 268 certas sure 269 | 269 spekti watch 270 | 270 koloro color 271 | 271 vizaĝo face 272 | 272 ligno wood 273 | 273 ĉefa main 274 | 274 malfermita open 275 | 275 ŝajnas seem 276 | 276 kune together 277 | 277 sekva next 278 | 278 blanka white 279 | 279 infanoj children 280 | 280 komenci begin 281 | 281 akiris got 282 | 282 marŝi walk 283 | 283 ekzemplo example 284 | 284 faciligi ease 285 | 285 papero paper 286 | 286 grupo group 287 | 287 ĉiam always 288 | 288 muziko music 289 | 289 tiuj those 290 | 290 ambaŭ both 291 | 291 markon mark 292 | 292 ofte often 293 | 293 letero letter 294 | 294 ĝis until 295 | 295 mejlo mile 296 | 296 rivero river 297 | 297 aŭto car 298 | 298 piedojn feet 299 | 299 prizorgi care 300 | 300 dua second 301 | 301 sufiĉa enough 302 | 302 ebenaĵo plain 303 | 303 knabino girl 304 | 304 kutima usual 305 | 305 junulo young 306 | 306 preta ready 307 | 307 supre above 308 | 308 iam ever 309 | 309 ruĝa red 310 | 310 lerta list 311 | 311 kvankam though 312 | 312 sentas feel 313 | 313 Diskuto talk 314 | 314 birdo bird 315 | 315 baldaŭ soon 316 | 316 korpo body 317 | 317 hundo dog 318 | 318 familio family 319 | 319 rekta direct 320 | 320 supozi pose 321 | 321 forlasi leave 322 | 322 kanto song 323 | 323 mezuri measure 324 | 324 pordo door 325 | 325 produkto product 326 | 326 nigra black 327 | 327 mallonga short 328 | 328 numeralo numeral 329 | 329 klaso class 330 | 330 vento wind 331 | 331 demando question 332 | 332 okazi happen 333 | 333 kompleta complete 334 | 334 ŝipo ship 335 | 335 spaco area 336 | 336 duono half 337 | 337 roko rock 338 | 338 ordon order 339 | 339 fajro fire 340 | 340 sude south 341 | 341 problemo problem 342 | 342 peco piece 343 | 343 rakontis told 344 | 344 sciis knew 345 | 345 pasi pass 346 | 346 ekde since 347 | 347 supro top 348 | 348 aro whole 349 | 349 reĝo king 350 | 350 strato street 351 | 351 colo inch 352 | 352 multipliki multiply 353 | 353 nenio nothing 354 | 354 Kompreneble course 355 | 355 resti stay 356 | 356 radon wheel 357 | 357 plena full 358 | 358 forto force 359 | 359 blua blue 360 | 360 objekto object 361 | 361 decidi decide 362 | 362 surfaco surface 363 | 363 profunda deep 364 | 364 luno moon 365 | 365 insulo island 366 | 366 piedo foot 367 | 367 sistemo system 368 | 368 okupata busy 369 | 369 testo test 370 | 370 rekordo record 371 | 371 kruĉo boat 372 | 372 komunaj common 373 | 373 oro gold 374 | 374 ebla possible 375 | 375 ebeno plane 376 | 376 anstataux stead 377 | 377 seka dry 378 | 378 demandas wonder 379 | 379 ridon laugh 380 | 380 milo thousand 381 | 381 monato ago 382 | 382 kuris ran 383 | 383 kontroli check 384 | 384 ludo game 385 | 385 formo shape 386 | 386 egaligas equate 387 | 387 varmega hot 388 | 388 miss miss 389 | 389 alportis brought 390 | 390 varmo heat 391 | 391 neĝo snow 392 | 392 pneŭo tire 393 | 393 alporti bring 394 | 394 jes yes 395 | 395 malproksima distant 396 | 396 plenigi fill 397 | 397 Oriente east 398 | 398 pentri paint 399 | 399 lingvo language 400 | 400 inter among 401 | 401 unuo unit 402 | 402 potenco power 403 | 403 urbo town 404 | 404 fajna fine 405 | 405 certaj certain 406 | 406 muŝo fly 407 | 407 fali fall 408 | 408 konduki lead 409 | 409 krio cry 410 | 410 mallumo dark 411 | 411 maŝino machine 412 | 412 noto note 413 | 413 atendi wait 414 | 414 plano plan 415 | 415 cifero figure 416 | 416 stelo star 417 | 417 skatolo box 418 | 418 substantivo noun 419 | 419 kampo field 420 | 420 resto rest 421 | 421 ĝusta correct 422 | 422 povis able 423 | 423 funto pound 424 | 424 farita done 425 | 425 beleco beauty 426 | 426 disko drive 427 | 427 staris stood 428 | 428 enhavi contain 429 | 429 antaŭa front 430 | 430 instruos teach 431 | 431 semajno week 432 | 432 lasta final 433 | 433 donis gave 434 | 434 verda green 435 | 435 ¡oh oh 436 | 436 rapida quick 437 | 437 evoluigi develop 438 | 438 oceano ocean 439 | 439 varma warm 440 | 440 senpaga free 441 | 441 minuto minute 442 | 442 forta strong 443 | 443 speciala special 444 | 444 menso mind 445 | 445 malantaŭ behind 446 | 446 evidenta clear 447 | 447 vosto tail 448 | 448 produkti produce 449 | 449 fakto fact 450 | 450 spaco space 451 | 451 aŭdis heard 452 | 452 bona best 453 | 453 horo hour 454 | 454 pli better 455 | 455 vera true 456 | 456 dum during 457 | 457 cent hundred 458 | 458 kvin five 459 | 459 memori remember 460 | 460 paŝo step 461 | 461 frua early 462 | 462 teni hold 463 | 463 okcidenta west 464 | 464 tero ground 465 | 465 intereson interest 466 | 466 alveni reach 467 | 467 rapida fast 468 | 468 verbo verb 469 | 469 kanti sing 470 | 470 aŭskulti listen 471 | 471 ses six 472 | 472 tablo table 473 | 473 vojaĝado travel 474 | 474 malpli less 475 | 475 mateno morning 476 | 476 dek ten 477 | 477 simpla simple 478 | 478 pluraj several 479 | 479 vokalo vowel 480 | 480 rilate toward 481 | 481 milito war 482 | 482 kuŝis lay 483 | 483 kontraŭ against 484 | 484 ŝablono pattern 485 | 485 malrapida slow 486 | 486 centro center 487 | 487 love love 488 | 488 persono person 489 | 489 monon money 490 | 490 servas serve 491 | 491 aperi appear 492 | 492 vojo road 493 | 493 Mapo map 494 | 494 pluvo rain 495 | 495 regulo rule 496 | 496 regi govern 497 | 497 tiri pull 498 | 498 malvarma cold 499 | 499 anonco notice 500 | 500 voĉo voice 501 | 501 energion energy 502 | 502 ĉasi hunt 503 | 503 probabla probable 504 | 504 lito bed 505 | 505 fraton brother 506 | 506 ovo egg 507 | 507 ride ride 508 | 508 ĉelo cell 509 | 509 kredas believe 510 | 510 eble perhaps 511 | 511 pick pick 512 | 512 subita sudden 513 | 513 kalkuli count 514 | 514 kvadrata square 515 | 515 kialo reason 516 | 516 longo length 517 | 517 reprezentas represent 518 | 518 arto art 519 | 519 subjekto subject 520 | 520 regiono region 521 | 521 grandeco size 522 | 522 varios vary 523 | 523 starigi settle 524 | 524 paroli speak 525 | 525 pezo weight 526 | 526 Ĝenerale general 527 | 527 glacio ice 528 | 528 afero matter 529 | 529 rondo circle 530 | 530 paro pair 531 | 531 inkluzivi include 532 | 532 dividi divide 533 | 533 silabo syllable 534 | 534 sentis felt 535 | 535 grandioza grand 536 | 536 pilko ball 537 | 537 ankoraŭ yet 538 | 538 ondo wave 539 | 539 faligi drop 540 | 540 koro heart 541 | 541 estas am 542 | 542 ĉeestanta present 543 | 543 peza heavy 544 | 544 danco dance 545 | 545 motoro engine 546 | 546 pozicion position 547 | 547 brako arm 548 | 548 larĝa wide 549 | 549 velo sail 550 | 550 materialon material 551 | 551 frakcio fraction 552 | 552 arbaro forest 553 | 553 sidiĝi sit 554 | 554 raso race 555 | 555 fenestro window 556 | 556 vendejo store 557 | 557 someron summer 558 | 558 trajno train 559 | 559 dormo sleep 560 | 560 elprovi prove 561 | 561 Lone lone 562 | 562 kruro leg 563 | 563 ekzerco exercise 564 | 564 muro wall 565 | 565 catch catch 566 | 566 monto mount 567 | 567 deziri wish 568 | 568 ĉielo sky 569 | 569 surŝipe board 570 | 570 ĝojo joy 571 | 571 vintro winter 572 | 572 SAT sat 573 | 573 skribita written 574 | 574 sovaĝa wild 575 | 575 instrumento instrument 576 | 576 konservis kept 577 | 577 vitro glass 578 | 578 herbo grass 579 | 579 bovino cow 580 | 580 laboro job 581 | 581 eĝo edge 582 | 582 signo sign 583 | 583 vizito visit 584 | 584 estinteco past 585 | 585 mola soft 586 | 586 amuza fun 587 | 587 brila bright 588 | 588 gaso gas 589 | 589 vetero weather 590 | 590 monato month 591 | 591 miliono million 592 | 592 elporti bear 593 | 593 fini finish 594 | 594 feliĉa happy 595 | 595 atendi hope 596 | 596 floro flower 597 | 597 Vestu clothe 598 | 598 stranga strange 599 | 599 iritaj gone 600 | 600 komerco trade 601 | 601 melodio melody 602 | 602 vojaĝo trip 603 | 603 oficejo office 604 | 604 ricevos receive 605 | 605 vico row 606 | 606 buŝo mouth 607 | 607 ĝusta exact 608 | 608 simbolo symbol 609 | 609 morti die 610 | 610 almenaŭ least 611 | 611 malfelicxo trouble 612 | 612 krio shout 613 | 613 krom except 614 | 614 skribis wrote 615 | 615 semoj seed 616 | 616 tono tone 617 | 617 aliĝi join 618 | 618 sugesti suggest 619 | 619 purigi clean 620 | 620 paŭzo break 621 | 621 lady lady 622 | 622 jardo yard 623 | 623 supreniri rise 624 | 624 malbona bad 625 | 625 baton blow 626 | 626 petrolo oil 627 | 627 sango blood 628 | 628 tuŝi touch 629 | 629 kreskis grew 630 | 630 cendo cent 631 | 631 miksi mix 632 | 632 teamo team 633 | 633 drato wire 634 | 634 kosto cost 635 | 635 perdita lost 636 | 636 bruna brown 637 | 637 surhavi wear 638 | 638 ĝardeno garden 639 | 639 egalaj equal 640 | 640 sendis sent 641 | 641 elekti choose 642 | 642 falis fell 643 | 643 persvadis fit 644 | 644 fluas flow 645 | 645 bela fair 646 | 646 bordo bank 647 | 647 kolekti collect 648 | 648 ŝpari save 649 | 649 kontrolo control 650 | 650 dekuma decimal 651 | 651 orelo ear 652 | 652 alia else 653 | 653 tute quite 654 | 654 rompis broke 655 | 655 okazo case 656 | 656 mezo middle 657 | 657 mortigi kill 658 | 658 filo son 659 | 659 lago lake 660 | 660 momenton moment 661 | 661 skalo scale 662 | 662 laŭta loud 663 | 663 printempo spring 664 | 664 observi observe 665 | 665 infano child 666 | 666 rektaj straight 667 | 667 konsonanto consonant 668 | 668 nacio nation 669 | 669 vortaro dictionary 670 | 670 lakto milk 671 | 671 rapido speed 672 | 672 metodo method 673 | 673 organo organ 674 | 674 pagi pay 675 | 675 aĝo age 676 | 676 sekcio section 677 | 677 robo dress 678 | 678 nubo cloud 679 | 679 surprizo surprise 680 | 680 trankvila quiet 681 | 681 ŝtono stone 682 | 682 etajn tiny 683 | 683 grimpo climb 684 | 684 malvarmeta cool 685 | 685 dezajno design 686 | 686 malriĉa poor 687 | 687 multa lot 688 | 688 eksperimento experiment 689 | 689 malsupro bottom 690 | 690 ŝlosilo key 691 | 691 fero iron 692 | 692 sola single 693 | 693 bastono stick 694 | 694 plata flat 695 | 695 dudek twenty 696 | 696 haŭto skin 697 | 697 rideto smile 698 | 698 crease crease 699 | 699 truo hole 700 | 700 salti jump 701 | 701 bebo baby 702 | 702 ok eight 703 | 703 vilaĝo village 704 | 704 kunvenas meet 705 | 705 radiko root 706 | 706 aĉeti buy 707 | 707 enspezi raise 708 | 708 solvi solve 709 | 709 metalo metal 710 | 710 ĉu whether 711 | 711 puŝo push 712 | 712 sep seven 713 | 713 paragrafo paragraph 714 | 714 triono third 715 | 715 mortigu shall 716 | 716 tenis held 717 | 717 haroj hair 718 | 718 priskribi describe 719 | 719 kuiristino cook 720 | 720 planko floor 721 | 721 ĉu either 722 | 722 rezulto result 723 | 723 bruligi burn 724 | 724 monto hill 725 | 725 sekura safe 726 | 726 kato cat 727 | 727 jarcento century 728 | 728 konsideri consider 729 | 729 tipo type 730 | 730 leĝo law 731 | 731 iom bit 732 | 732 marbordo coast 733 | 733 kopio copy 734 | 734 frazon phrase 735 | 735 malbrua silent 736 | 736 altkreska tall 737 | 737 sablon sand 738 | 738 planko soil 739 | 739 rulo roll 740 | 740 temperaturo temperature 741 | 741 fingro finger 742 | 742 industrio industry 743 | 743 valoro value 744 | 744 lukto fight 745 | 745 mensogo lie 746 | 746 venki beat 747 | 747 eksciti excite 748 | 748 natura natural 749 | 749 vido view 750 | 750 sento sense 751 | 751 ĉefurbo capital 752 | 752 ne faros won’t 753 | 753 seĝo chair 754 | 754 danĝero danger 755 | 755 fruktoj fruit 756 | 756 riĉa rich 757 | 757 dikaj thick 758 | 758 soldato soldier 759 | 759 procezo process 760 | 760 funkcii operate 761 | 761 praktiko practice 762 | 762 apartaj separate 763 | 763 malfacila difficult 764 | 764 kuracisto doctor 765 | 765 bonvolu please 766 | 766 protekti protect 767 | 767 tagmezo noon 768 | 768 kropo crop 769 | 769 modernaj modern 770 | 770 elemento element 771 | 771 batita hit 772 | 772 lernantino student 773 | 773 angulo corner 774 | 774 festo party 775 | 775 provizado supply 776 | 776 kies whose 777 | 777 lokalizi locate 778 | 778 ringon ring 779 | 779 gravulo character 780 | 780 insekto insect 781 | 781 kaptita caught 782 | 782 periodo period 783 | 783 indiki indicate 784 | 784 radio radio 785 | 785 parolis spoke 786 | 786 atomo atom 787 | 787 homa human 788 | 788 historio history 789 | 789 efekto effect 790 | 790 elektra electric 791 | 791 atendi expect 792 | 792 osto bone 793 | 793 fervoja rail 794 | 794 imagu imagine 795 | 795 provizi provide 796 | 796 interkonsenti agree 797 | 797 tiele thus 798 | 798 afabla gentle 799 | 799 virino woman 800 | 800 kapitano captain 801 | 801 divenu guess 802 | 802 necesajn necessary 803 | 803 akra sharp 804 | 804 flugilo wing 805 | 805 Krei create 806 | 806 proksimulo neighbor 807 | 807 lavita wash 808 | 808 vesperto bat 809 | 809 anstataŭ rather 810 | 810 amaso crowd 811 | 811 grajnoj corn 812 | 812 kompari compare 813 | 813 poemo poem 814 | 814 kordo string 815 | 815 sonorilo bell 816 | 816 dependi depend 817 | 817 viando meat 818 | 818 rub rub 819 | 819 tubo tube 820 | 820 famaj famous 821 | 921 dolaro dollar 822 | 822 fluo stream 823 | 823 timo fear 824 | 284 vido sight 825 | 825 maldika thin 826 | 826 triangulo triangle 827 | 827 planedo planet 828 | 828 rapidi hurry 829 | 829 estro chief 830 | 830 kolonio colony 831 | 831 horloĝo clock 832 | 832 miaj mine 833 | 833 egaleco tie 834 | 834 eniri enter 835 | 835 granda major 836 | 836 freŝa fresh 837 | 837 serĉo search 838 | 838 sendu send 839 | 839 flava yellow 840 | 840 pafilo gun 841 | 841 permesi allow 842 | 842 print print 843 | 843 mortinto dead 844 | 844 punkto spot 845 | 845 dezerto desert 846 | 846 kostumo suit 847 | 847 aktuala current 848 | 848 telfero lift 849 | 840 leviĝis rose 850 | 850 alveni arrive 851 | 851 majstro master 852 | 852 aŭtoveturejo track 853 | 853 gepatro parent 854 | 854 bordo shore 855 | 855 divido division 856 | 856 sheet sheet 857 | 857 substanco substance 858 | 858 favorus favor 859 | 859 konekti connect 860 | 860 posteno post 861 | 861 elspezi spend 862 | 862 chord chord 863 | 863 graso fat 864 | 864 ĝojas glad 865 | 865 originala original 866 | 866 samkondiĉe share 867 | 867 stacidomo station 868 | 868 dad dad 869 | 869 pano bread 870 | 870 ŝarĝi charge 871 | 871 taŭga proper 872 | 872 trinkejo bar 873 | 873 oferto offer 874 | 874 segmento segment 875 | 875 sklavo slave 876 | 876 anaso duck 877 | 877 momenteto instant 878 | 878 merkato market 879 | 879 grado degree 880 | 880 popoli populate 881 | 881 ĉik chick 882 | 882 kara dear 883 | 883 malamiko enemy 884 | 884 respondi reply 885 | 885 trinkaĵon drink 886 | 886 okazi occur 887 | 887 subteno support 888 | 888 parolado speech 889 | 889 naturo nature 890 | 890 ventumilo range 891 | 891 vaporo steam 892 | 892 movado motion 893 | 893 vojo path 894 | 894 likva liquid 895 | 895 ensaluti log 896 | 896 signifis meant 897 | 897 kvociento quotient 898 | 898 dentoj teeth 899 | 899 konko shell 900 | 900 kolo neck 901 | 901 oksigeno oxygen 902 | 902 sukero sugar 903 | 903 morto death 904 | 904 bela pretty 905 | 905 lerto skill 906 | 906 virinoj women 907 | 907 sezono season 908 | 908 solvo solution 909 | 909 magneto magnet 910 | 910 arĝento silver 911 | 911 dankon thank 912 | 912 branĉo branch 913 | 913 match match 914 | 914 sufikso suffix 915 | 915 ĉefe especially 916 | 916 figo fig 917 | 917 timis afraid 918 | 918 grandega huge 919 | 919 fratino sister 920 | 920 ŝtalo steel 921 | 921 diskuti discuss 922 | 922 antaŭen forward 923 | 923 simila similar 924 | 924 gvidi guide 925 | 925 sperto experience 926 | 926 interpunkcio score 927 | 927 pomon apple 928 | 928 aĉetinta bought 929 | 929 kondukis led 930 | 930 tonalto pitch 931 | 931 ŝildo coat 932 | 932 maso mass 933 | 933 karton card 934 | 934 bando band 935 | 935 ŝnuro rope 936 | 936 slip slip 937 | 937 venko win 938 | 938 sonĝi dream 939 | 939 vespero evening 940 | 940 kondiĉo condition 941 | 941 feed feed 942 | 942 ilo tool 943 | 943 entute total 944 | 944 baza basic 945 | 945 bonodoro smell 946 | 946 valo valley 947 | 947 nek nor 948 | 948 duobla double 949 | 949 seĝo seat 950 | 950 daŭrigi continue 951 | 951 bloko block 952 | 952 grafikaĵo chart 953 | 953 ĉapelo hat 954 | 954 vendi sell 955 | 955 sukceso success 956 | 956 kompanio company 957 | 957 subtrahi subtract 958 | 958 okazaĵo event 959 | 959 aparta particular 960 | 960 interkonsento deal 961 | 961 naĝi swim 962 | 962 termino term 963 | 963 kontraŭa opposite 964 | 964 edzino wife 965 | 965 ŝuo shoe 966 | 966 ŝultro shoulder 967 | 967 disvastiĝo spread 968 | 968 aranĝi arrange 969 | 969 tendaro camp 970 | 970 elpensi invent 971 | 971 kotono cotton 972 | 972 Born born 973 | 973 determini determine 974 | 974 kvarto quart 975 | 975 naŭ nine 976 | 976 kamiono truck 977 | 977 bruo noise 978 | 978 nivelo level 979 | 979 ŝanco chance 980 | 980 kolekti gather 981 | 981 butiko shop 982 | 982 sekcio stretch 983 | 983 ĵeti throw 984 | 984 briligi shine 985 | 985 propraĵo property 986 | 986 kolono column 987 | 987 molekulo molecule 988 | 988 elekti select 989 | 989 malĝusta wrong 990 | 990 grizaj gray 991 | 991 ripeto repeat 992 | 992 postuli require 993 | 993 larĝa broad 994 | 994 pretigi prepare 995 | 995 Sala salt 996 | 996 nazon nose 997 | 997 pluralo plural 998 | 998 kolero anger 999 | 999 pretendo claim 1000 | 1000 kontinento continent -------------------------------------------------------------------------------- /language/lid.176.ftz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RasaHQ/algorithm-whiteboard-resources/0ffee41dd20ef3f61822884052de8b666e8c51c0/language/lid.176.ftz -------------------------------------------------------------------------------- /language/tr.tsv: -------------------------------------------------------------------------------- 1 | 1 olarak as 2 | 2 ben I 3 | 3 onun his 4 | 4 bu that 5 | 5 diye he 6 | 6 oldu was 7 | 7 için for 8 | 8 üzerinde on 9 | 9 vardır are 10 | 10 ile with 11 | 11 onlar they 12 | 12 olmak be 13 | 13 at at 14 | 14 bir one 15 | 15 var have 16 | 16 Bu this 17 | 17 dan from 18 | 18 tarafından by 19 | 19 sıcak hot 20 | 20 kelime word 21 | 21 ancak but 22 | 22 ne what 23 | 23 bazı some 24 | 24 olduğunu is 25 | 25 o it 26 | 26 sen you 27 | 27 veya or 28 | 28 vardı had 29 | 29 the 30 | 30 arasında of 31 | 31 karşı to 32 | 32 ve and 33 | 33 bir a 34 | 34 içinde in 35 | 35 biz we 36 | 36 can can 37 | 37 üzerinden out 38 | 38 diğer other 39 | 39 vardı were 40 | 40 hangi which 41 | 41 do do 42 | 42 onların their 43 | 43 zaman time 44 | 44 eğer if 45 | 45 olacak will 46 | 46 nasıl how 47 | 47 dedi said 48 | 48 bir an 49 | 49 her each 50 | 50 söyle tell 51 | 51 yok does 52 | 52 set set 53 | 53 üç three 54 | 54 istiyorum want 55 | 55 hava air 56 | 56 iyi well 57 | 57 ayrıca also 58 | 58 oynamak play 59 | 59 küçük small 60 | 60 son end 61 | 61 koymak put 62 | 62 ev home 63 | 63 okumak read 64 | 64 el hand 65 | 65 liman port 66 | 66 büyük large 67 | 67 büyü spell 68 | 68 ekleyin add 69 | 69 hatta even 70 | 70 arazi land 71 | 71 burada here 72 | 72 gerekir must 73 | 73 büyük big 74 | 74 yüksek high 75 | 75 böyle such 76 | 76 izleyin follow 77 | 77 hareket act 78 | 78 neden why 79 | 79 sormak ask 80 | 80 erkekler men 81 | 81 değişim change 82 | 82 gitti went 83 | 83 ışık light 84 | 84 tür kind 85 | 85 kapalı off 86 | 86 gerek need 87 | 87 ev house 88 | 88 resim picture 89 | 89 denemek try 90 | 90 bizi us 91 | 91 tekrar again 92 | 92 hayvan animal 93 | 93 nokta point 94 | 94 anne mother 95 | 95 dünya world 96 | 96 yakın near 97 | 97 inşa etmek build 98 | 98 öz self 99 | 99 toprak earth 100 | 100 baba father 101 | 101 herhangi bir any 102 | 102 yeni new 103 | 103 iş work 104 | 104 bölüm part 105 | 105 almak take 106 | 106 olsun get 107 | 107 yer place 108 | 108 yapılan made 109 | 109 canlı live 110 | 110 nerede where 111 | 111 sonra after 112 | 112 geri back 113 | 113 küçük little 114 | 114 sadece only 115 | 115 yuvarlak round 116 | 116 adam man 117 | 117 yıl year 118 | 118 geldi came 119 | 119 gösterisi show 120 | 120 her every 121 | 121 iyi good 122 | 122 bana me 123 | 123 vermek give 124 | 124 bizim our 125 | 125 altında under 126 | 126 adı name 127 | 127 çok very 128 | 128 ile through 129 | 129 sadece just 130 | 130 formu form 131 | 131 cümle sentence 132 | 132 büyük great 133 | 133 düşünmek think 134 | 134 demek say 135 | 135 yardım help 136 | 136 düşük low 137 | 137 hat line 138 | 138 farklı differ 139 | 139 dönüş turn 140 | 140 neden cause 141 | 141 çok much 142 | 142 Yani mean 143 | 143 önce before 144 | 144 hareket move 145 | 145 sağ right 146 | 146 çocuk boy 147 | 147 eski old 148 | 148 çok too 149 | 149 aynı same 150 | 150 diye she 151 | 151 tüm all 152 | 152 orada there 153 | 153 zaman when 154 | 154 yukarı up 155 | 155 Kullanım use 156 | 156 senin your 157 | 157 yol way 158 | 158 hakkında about 159 | 159 birçok many 160 | 160 o zaman then 161 | 161 onları them 162 | 162 yazmak write 163 | 163 would would 164 | 164 gibi like 165 | 165 bu yüzden so 166 | 166 bu these 167 | 167 onu her 168 | 168 uzun long 169 | 169 yapmak make 170 | 170 şey thing 171 | 171 görmek see 172 | 172 onu him 173 | 173 iki two 174 | 174 var has 175 | 175 bak look 176 | 176 daha fazla more 177 | 177 gün day 178 | 178 olabilir could 179 | 179 gitmek go 180 | 180 gel come 181 | 181 yaptım did 182 | 182 numarası number 183 | 183 ses sound 184 | 184 hayır no 185 | 185 en most 186 | 186 insanlar people 187 | 187 benim my 188 | 188 üzerinde over 189 | 189 bilmek know 190 | 190 su water 191 | 191 daha than 192 | 192 çağrı call 193 | 193 ilk first 194 | 194 kim who 195 | 195 may may 196 | 196 aşağı down 197 | 197 yan side 198 | 198 oldu been 199 | 199 Şimdi now 200 | 200 bulmak find 201 | 201 kafa head 202 | 202 standı stand 203 | 203 kendi own 204 | 204 sayfa page 205 | 205 gerekir should 206 | 206 ülke country 207 | 207 bulundu found 208 | 208 cevap answer 209 | 209 okul school 210 | 210 büyümek grow 211 | 211 çalışma study 212 | 212 hala still 213 | 213 öğrenmek learn 214 | 214 bitki plant 215 | 215 kapak cover 216 | 216 gıda food 217 | 217 güneş sun 218 | 218 dört four 219 | 219 arasında between 220 | 220 devlet state 221 | 221 tutmak keep 222 | 222 göz eye 223 | 223 asla never 224 | 224 son last 225 | 225 let let 226 | 226 düşünce thought 227 | 227 Şehir city 228 | 228 ağaç tree 229 | 229 çapraz cross 230 | 230 çiftlik farm 231 | 231 sert hard 232 | 232 başlangıç start 233 | 233 olabilir might 234 | 234 hikaye story 235 | 235 testere saw 236 | 236 kadar far 237 | 237 deniz sea 238 | 238 çizmek draw 239 | 239 sol left 240 | 240 geç late 241 | 241 run run 242 | 242 yapamaz don’t 243 | 243 süre while 244 | 244 basın press 245 | 245 yakın close 246 | 246 gece night 247 | 247 gerçek real 248 | 248 hayat life 249 | 249 az few 250 | 250 kuzey north 251 | 251 kitap book 252 | 252 taşımak carry 253 | 253 aldı took 254 | 254 bilim science 255 | 255 yemek eat 256 | 256 oda room 257 | 257 arkadaşı friend 258 | 258 başladı began 259 | 259 fikir idea 260 | 260 balık fish 261 | 261 dağ mountain 262 | 262 dur stop 263 | 263 bir kez once 264 | 264 baz base 265 | 265 duymak hear 266 | 266 at horse 267 | 267 kesim cut 268 | 268 emin sure 269 | 269 izle watch 270 | 270 renk color 271 | 271 yüz face 272 | 272 ahşap wood 273 | 273 ana main 274 | 274 açık open 275 | 275 görünmek seem 276 | 276 birlikte together 277 | 277 sonraki next 278 | 278 beyaz white 279 | 279 çocuk children 280 | 280 başla begin 281 | 281 var got 282 | 282 yürümek walk 283 | 283 örnek example 284 | 284 kolaylığı ease 285 | 285 kâğıt paper 286 | 286 grup group 287 | 287 her zaman always 288 | 288 müzik music 289 | 289 bu those 290 | 290 her ikisi de both 291 | 291 işareti mark 292 | 292 sık sık often 293 | 293 mektup letter 294 | 294 kadar until 295 | 295 kilometre mile 296 | 296 nehir river 297 | 297 araba car 298 | 298 ayaklar feet 299 | 299 bakım care 300 | 300 ikinci second 301 | 301 yeterli enough 302 | 302 düz plain 303 | 303 kız girl 304 | 304 olağan usual 305 | 305 genç young 306 | 306 hazır ready 307 | 307 yukarıdaki above 308 | 308 hiç ever 309 | 309 kırmızı red 310 | 310 listesi list 311 | 311 olsa though 312 | 312 hissetmek feel 313 | 313 tartışma talk 314 | 314 kuş bird 315 | 315 yakında soon 316 | 316 vücut body 317 | 317 köpek dog 318 | 318 aile family 319 | 319 doğrudan direct 320 | 320 poz pose 321 | 321 bırakın leave 322 | 322 şarkı song 323 | 323 ölçmek measure 324 | 324 kapı door 325 | 325 ürün product 326 | 326 siyah black 327 | 327 kısa short 328 | 328 rakam numeral 329 | 329 sınıf class 330 | 330 rüzgar wind 331 | 331 soru question 332 | 332 olur happen 333 | 333 komple complete 334 | 334 gemi ship 335 | 335 alan area 336 | 336 yarım half 337 | 337 kaya rock 338 | 338 sipariş order 339 | 339 yangın fire 340 | 340 güney south 341 | 341 sorun problem 342 | 342 parça piece 343 | 343 söyledi told 344 | 344 biliyordu knew 345 | 345 geçmek pass 346 | 346 beri since 347 | 347 üst top 348 | 348 bütün whole 349 | 349 kral king 350 | 350 sokak street 351 | 351 inch inch 352 | 352 çarpmak multiply 353 | 353 hiçbir şey nothing 354 | 354 ders course 355 | 355 kalmak stay 356 | 356 tekerlek wheel 357 | 357 tam full 358 | 358 kuvvet force 359 | 359 mavi blue 360 | 360 nesne object 361 | 361 karar vermek decide 362 | 362 yüzey surface 363 | 363 derin deep 364 | 364 ay moon 365 | 365 ada island 366 | 366 ayak foot 367 | 367 sistem system 368 | 368 meşgul busy 369 | 369 Test test 370 | 370 kayıt record 371 | 371 tekne boat 372 | 372 ortak common 373 | 373 altın gold 374 | 374 mümkün possible 375 | 375 düzlem plane 376 | 376 başkasının yeri stead 377 | 377 kuru dry 378 | 378 acaba wonder 379 | 379 gülmek laugh 380 | 380 bin thousand 381 | 381 önce ago 382 | 382 koştu ran 383 | 383 kontrol check 384 | 384 oyunu game 385 | 385 şekil shape 386 | 386 eşit equate 387 | 387 sıcak hot 388 | 388 özledim miss 389 | 389 getirdi brought 390 | 390 ısı heat 391 | 391 kar snow 392 | 392 lastik tire 393 | 393 getirmek bring 394 | 394 evet yes 395 | 395 uzak distant 396 | 396 doldurun fill 397 | 397 doğu east 398 | 398 boya paint 399 | 399 dil language 400 | 400 arasında among 401 | 401 ünitesi unit 402 | 402 güç power 403 | 403 kasaba town 404 | 404 ince fine 405 | 405 belirli certain 406 | 406 uçmak fly 407 | 407 düşmek fall 408 | 408 kurşun lead 409 | 409 ağlamak cry 410 | 410 karanlık dark 411 | 411 makine machine 412 | 412 notu note 413 | 413 bekleyin wait 414 | 414 planı plan 415 | 415 rakam figure 416 | 416 yıldız star 417 | 417 kutu box 418 | 418 i noun 419 | 419 alan field 420 | 420 gerisi rest 421 | 421 doğru correct 422 | 422 yetenekli able 423 | 423 kiloluk pound 424 | 424 bitti done 425 | 425 güzellik beauty 426 | 426 sürücü drive 427 | 427 durdu stood 428 | 428 içeren contain 429 | 429 ön front 430 | 430 öğretmek teach 431 | 431 hafta week 432 | 432 nihai final 433 | 433 verdi gave 434 | 434 yeşil green 435 | 435 oh oh 436 | 436 hızlı quick 437 | 437 geliştirmek develop 438 | 438 okyanus ocean 439 | 439 sıcak warm 440 | 440 ücretsiz free 441 | 441 dakika minute 442 | 442 güçlü strong 443 | 443 özel special 444 | 444 zihin mind 445 | 445 arkasında behind 446 | 446 açık clear 447 | 447 kuyruk tail 448 | 448 üretmek produce 449 | 449 gerçek fact 450 | 450 uzay space 451 | 451 duydum heard 452 | 452 en iyi best 453 | 453 saat hour 454 | 454 daha iyi better 455 | 455 gerçek true 456 | 456 sırasında during 457 | 457 yüz hundred 458 | 458 beş five 459 | 459 hatırlıyorum remember 460 | 460 adım step 461 | 461 erken early 462 | 462 tutun hold 463 | 463 batı west 464 | 464 zemin ground 465 | 465 faiz interest 466 | 466 ulaşmak reach 467 | 467 hızlı fast 468 | 468 fiil verb 469 | 469 şarkı söylemek sing 470 | 470 dinle listen 471 | 471 altı six 472 | 472 tablo table 473 | 473 seyahat travel 474 | 474 daha az less 475 | 475 sabah morning 476 | 476 on ten 477 | 477 basit simple 478 | 478 birkaç several 479 | 479 ünlü vowel 480 | 480 doğru toward 481 | 481 savaş war 482 | 482 koymak lay 483 | 483 karşı against 484 | 484 desen pattern 485 | 485 yavaş slow 486 | 486 merkezi center 487 | 487 aşk love 488 | 488 kişi person 489 | 489 para money 490 | 490 hizmet serve 491 | 491 görünür appear 492 | 492 yol road 493 | 493 harita map 494 | 494 yağmur rain 495 | 495 kural rule 496 | 496 idare govern 497 | 497 Çek pull 498 | 498 soğuk cold 499 | 499 haber notice 500 | 500 ses voice 501 | 501 enerji energy 502 | 502 avı hunt 503 | 503 muhtemel probable 504 | 504 yatak bed 505 | 505 kardeş brother 506 | 506 yumurta egg 507 | 507 binmek ride 508 | 508 hücre cell 509 | 509 inanıyorum believe 510 | 510 belki perhaps 511 | 511 seçmek pick 512 | 512 ani sudden 513 | 513 saymak count 514 | 514 kare square 515 | 515 neden reason 516 | 516 uzunluk length 517 | 517 temsil represent 518 | 518 sanat art 519 | 519 konu subject 520 | 520 bölge region 521 | 521 boyut size 522 | 522 değiştirmek vary 523 | 523 yerleşmek settle 524 | 524 konuşmak speak 525 | 525 ağırlık weight 526 | 526 genel general 527 | 527 buz ice 528 | 528 madde matter 529 | 529 daire circle 530 | 530 çifti pair 531 | 531 dahil include 532 | 532 bölmek divide 533 | 533 hece syllable 534 | 534 keçe felt 535 | 535 büyük grand 536 | 536 top ball 537 | 537 henüz yet 538 | 538 dalga wave 539 | 539 bırakın drop 540 | 540 kalp heart 541 | 541 duyuyorum am 542 | 542 mevcut present 543 | 543 ağır heavy 544 | 544 dans dance 545 | 545 motor engine 546 | 546 pozisyon position 547 | 547 kol arm 548 | 548 geniş wide 549 | 549 yelken sail 550 | 550 materyal material 551 | 551 kesir fraction 552 | 552 orman forest 553 | 553 oturmak sit 554 | 554 yarış race 555 | 555 penceresi window 556 | 556 mağaza store 557 | 557 yaz summer 558 | 558 tren train 559 | 559 uyku sleep 560 | 560 kanıtlamak prove 561 | 561 yalnız lone 562 | 562 bacak leg 563 | 563 egzersiz exercise 564 | 564 duvar wall 565 | 565 catch catch 566 | 566 dağ mount 567 | 567 dilek wish 568 | 568 gökyüzü sky 569 | 569 kurulu board 570 | 570 sevinç joy 571 | 571 kış winter 572 | 572 Cts sat 573 | 573 yazılı written 574 | 574 vahşi wild 575 | 575 enstrüman instrument 576 | 576 tuttu kept 577 | 577 cam glass 578 | 578 çim grass 579 | 579 inek cow 580 | 580 iş job 581 | 581 kenar edge 582 | 582 işareti sign 583 | 583 ziyareti visit 584 | 584 geçmiş past 585 | 585 yumuşak soft 586 | 586 eğlenceli fun 587 | 587 parlak bright 588 | 588 gaz gas 589 | 589 hava weather 590 | 590 ay month 591 | 591 milyon million 592 | 592 ayı bear 593 | 593 bitirmek finish 594 | 594 mutlu happy 595 | 595 umut hope 596 | 596 çiçek flower 597 | 597 giydirmek clothe 598 | 598 garip strange 599 | 599 gitti gone 600 | 600 ticaret trade 601 | 601 melodi melody 602 | 602 gezi trip 603 | 603 ofis office 604 | 604 almak receive 605 | 605 satır row 606 | 606 ağız mouth 607 | 607 tam exact 608 | 608 sembol symbol 609 | 609 ölmek die 610 | 610 en az least 611 | 611 sorun trouble 612 | 612 bağırmak shout 613 | 613 hariç except 614 | 614 yazdı wrote 615 | 615 tohum seed 616 | 616 sesi tone 617 | 617 katılmak join 618 | 618 önermek suggest 619 | 619 temiz clean 620 | 620 molası break 621 | 621 bayan lady 622 | 622 yarda yard 623 | 623 yükselecek rise 624 | 624 kötü bad 625 | 625 darbe blow 626 | 626 yağ oil 627 | 627 kan blood 628 | 628 dokunma touch 629 | 629 büyüdü grew 630 | 630 yüzde cent 631 | 631 karıştırmak mix 632 | 632 takım team 633 | 633 tel wire 634 | 634 maliyet cost 635 | 635 kaybetti lost 636 | 636 kahverengi brown 637 | 637 giymek wear 638 | 638 bahçe garden 639 | 639 eşit equal 640 | 640 gönderildi sent 641 | 641 seçim choose 642 | 642 düştü fell 643 | 643 uygun fit 644 | 644 akış flow 645 | 645 adil fair 646 | 646 banka bank 647 | 647 toplamak collect 648 | 648 kaydetmek save 649 | 649 kontrolü control 650 | 650 ondalık decimal 651 | 651 kulak ear 652 | 652 başka else 653 | 653 oldukça quite 654 | 654 kırdı broke 655 | 655 dava case 656 | 656 orta middle 657 | 657 öldürmek kill 658 | 658 oğlu son 659 | 659 göl lake 660 | 660 an moment 661 | 661 ölçek scale 662 | 662 yüksek sesle loud 663 | 663 bahar spring 664 | 664 gözlemlemek observe 665 | 665 çocuk child 666 | 666 düz straight 667 | 667 ünsüz consonant 668 | 668 ulus nation 669 | 669 Sözlük dictionary 670 | 670 süt milk 671 | 671 hız speed 672 | 672 yöntem method 673 | 673 Organ organ 674 | 674 ödeme pay 675 | 675 yaş age 676 | 676 bölüm section 677 | 677 elbise dress 678 | 678 bulut cloud 679 | 679 sürpriz surprise 680 | 680 sessiz quiet 681 | 681 taş stone 682 | 682 küçücük tiny 683 | 683 tırmanış climb 684 | 684 serin cool 685 | 685 dizayn design 686 | 686 kötü poor 687 | 687 çok lot 688 | 688 deneme experiment 689 | 689 alt bottom 690 | 690 anahtar key 691 | 691 demir iron 692 | 692 tek single 693 | 693 sopa stick 694 | 694 düz flat 695 | 695 yirmi twenty 696 | 696 cilt skin 697 | 697 gülümseme smile 698 | 698 kırışık crease 699 | 699 delik hole 700 | 700 atlama jump 701 | 701 bebek baby 702 | 702 sekiz eight 703 | 703 köy village 704 | 704 karşılamak meet 705 | 705 kök root 706 | 706 satın buy 707 | 707 yükseltmek raise 708 | 708 çözmek solve 709 | 709 metal metal 710 | 710 olsun whether 711 | 711 it push 712 | 712 yedi seven 713 | 713 paragraf paragraph 714 | 714 üçüncü third 715 | 715 Shall shall 716 | 716 tutulan held 717 | 717 saç hair 718 | 718 tanımlamak describe 719 | 719 aşçı cook 720 | 720 zemin floor 721 | 721 ya da either 722 | 722 sonuç result 723 | 723 yanmak burn 724 | 724 tepe hill 725 | 725 güvenli safe 726 | 726 kedi cat 727 | 727 yüzyıl century 728 | 728 düşünmek consider 729 | 729 tipi type 730 | 730 hukuk law 731 | 731 bit bit 732 | 732 sahil coast 733 | 733 kopya copy 734 | 734 ifade phrase 735 | 735 sessiz silent 736 | 736 uzun boylu tall 737 | 737 kum sand 738 | 738 toprak soil 739 | 739 rulo roll 740 | 740 sıcaklık temperature 741 | 741 parmak finger 742 | 742 sanayi industry 743 | 743 değeri value 744 | 744 kavga fight 745 | 745 yalan lie 746 | 746 yendi beat 747 | 747 heyecanlandırmak excite 748 | 748 doğal natural 749 | 749 görünüm view 750 | 750 sense sense 751 | 751 sermaye capital 752 | 752 olmaz won’t 753 | 753 sandalye chair 754 | 754 tehlike danger 755 | 755 meyve fruit 756 | 756 zengin rich 757 | 757 kalın thick 758 | 758 asker soldier 759 | 759 süreç process 760 | 760 işletmek operate 761 | 761 uygulama practice 762 | 762 ayrı separate 763 | 763 zor difficult 764 | 764 doktor doctor 765 | 765 lütfen please 766 | 766 korumak protect 767 | 767 öğlen noon 768 | 768 kırpma crop 769 | 769 çağdaş modern 770 | 770 eleman element 771 | 771 vurmak hit 772 | 772 öğrenci student 773 | 773 köşe corner 774 | 774 parti party 775 | 775 besleme supply 776 | 776 kimin whose 777 | 777 yerleştirmek locate 778 | 778 halka ring 779 | 779 karakter character 780 | 780 böcek insect 781 | 781 yakalandı caught 782 | 782 dönemi period 783 | 783 göstermektedir indicate 784 | 784 radyo radio 785 | 785 konuştu spoke 786 | 786 atomudur atom 787 | 787 insan human 788 | 788 geçmişi history 789 | 789 etkisi effect 790 | 790 elektrik electric 791 | 791 bekliyoruz expect 792 | 792 kemik bone 793 | 793 demiryolu rail 794 | 794 hayal imagine 795 | 795 sağlamak provide 796 | 796 katılıyorum agree 797 | 797 böylece thus 798 | 798 nazik gentle 799 | 799 kadın woman 800 | 800 kaptan captain 801 | 801 sanırım guess 802 | 802 gerekli necessary 803 | 803 keskin sharp 804 | 804 kanat wing 805 | 805 oluşturmak create 806 | 806 komşu neighbor 807 | 807 yıkama wash 808 | 808 yarasa bat 809 | 809 yerine rather 810 | 810 kalabalık crowd 811 | 811 mısır corn 812 | 812 karşılaştırma compare 813 | 813 şiir poem 814 | 814 dize string 815 | 815 çan bell 816 | 816 bağlı depend 817 | 817 et meat 818 | 818 ovmak rub 819 | 819 tüp tube 820 | 820 ünlü famous 821 | 921 dolar dollar 822 | 822 akışı stream 823 | 823 korku fear 824 | 284 görüş sight 825 | 825 ince thin 826 | 826 üçgen triangle 827 | 827 gezegen planet 828 | 828 acele hurry 829 | 829 baş chief 830 | 830 koloni colony 831 | 831 saat clock 832 | 832 mayın mine 833 | 833 kravat tie 834 | 834 girin enter 835 | 835 büyük major 836 | 836 taze fresh 837 | 837 arama search 838 | 838 göndermek send 839 | 839 sarı yellow 840 | 840 gun gun 841 | 841 izin allow 842 | 842 baskı print 843 | 843 ölü dead 844 | 844 nokta spot 845 | 845 çöl desert 846 | 846 takım elbise suit 847 | 847 akım current 848 | 848 asansör lift 849 | 840 gül rose 850 | 850 varmak arrive 851 | 851 usta master 852 | 852 iz track 853 | 853 ebeveyn parent 854 | 854 kıyı shore 855 | 855 bölünme division 856 | 856 levha sheet 857 | 857 madde substance 858 | 858 iyilik favor 859 | 859 bağlamak connect 860 | 860 sonrası post 861 | 861 harcamak spend 862 | 862 akor chord 863 | 863 şişman fat 864 | 864 memnun glad 865 | 865 orijinal original 866 | 866 payı share 867 | 867 istasyon station 868 | 868 baba dad 869 | 869 ekmek bread 870 | 870 şarj charge 871 | 871 uygun proper 872 | 872 bar bar 873 | 873 teklif offer 874 | 874 segmenti segment 875 | 875 köle slave 876 | 876 ördek duck 877 | 877 anlık instant 878 | 878 pazar market 879 | 879 derecesi degree 880 | 880 doldurmak populate 881 | 881 civciv chick 882 | 882 sevgili dear 883 | 883 düşman enemy 884 | 884 cevap reply 885 | 885 içki drink 886 | 886 meydana occur 887 | 887 destek support 888 | 888 konuşma speech 889 | 889 doğa nature 890 | 890 aralığı range 891 | 891 buhar steam 892 | 892 hareket motion 893 | 893 yol path 894 | 894 sıvı liquid 895 | 895 log log 896 | 896 demek meant 897 | 897 bölüm quotient 898 | 898 dişler teeth 899 | 899 kabuk shell 900 | 900 boyun neck 901 | 901 oksijen oxygen 902 | 902 şeker sugar 903 | 903 ölüm death 904 | 904 güzel pretty 905 | 905 beceri skill 906 | 906 kadın women 907 | 907 sezon season 908 | 908 çözüm solution 909 | 909 mıknatıs magnet 910 | 910 gümüş silver 911 | 911 teşekkür ederim thank 912 | 912 şube branch 913 | 913 maç match 914 | 914 sonek suffix 915 | 915 özellikle especially 916 | 916 incir fig 917 | 917 korkuyor afraid 918 | 918 büyük huge 919 | 919 kardeş sister 920 | 920 çelik steel 921 | 921 tartışmak discuss 922 | 922 ileri forward 923 | 923 benzer similar 924 | 924 kılavuz guide 925 | 925 deneyim experience 926 | 926 puan score 927 | 927 elma apple 928 | 928 satın aldı bought 929 | 929 açtı led 930 | 930 zift pitch 931 | 931 ceket coat 932 | 932 kitle mass 933 | 933 kart card 934 | 934 bant band 935 | 935 halat rope 936 | 936 kayma slip 937 | 937 win win 938 | 938 rüya dream 939 | 939 akşam evening 940 | 940 durumu condition 941 | 941 beslemek feed 942 | 942 aracı tool 943 | 943 toplam total 944 | 944 temel basic 945 | 945 koku smell 946 | 946 vadi valley 947 | 947 ne nor 948 | 948 çift double 949 | 949 koltuk seat 950 | 950 devam etmek continue 951 | 951 bloğu block 952 | 952 grafiği chart 953 | 953 şapka hat 954 | 954 satmak sell 955 | 955 başarı success 956 | 956 şirket company 957 | 957 çıkarmak subtract 958 | 958 olay event 959 | 959 özellikle particular 960 | 960 anlaşma deal 961 | 961 yüzmek swim 962 | 962 vadeli term 963 | 963 karşısında opposite 964 | 964 karısı wife 965 | 965 ayakkabı shoe 966 | 966 omuz shoulder 967 | 967 yaymak spread 968 | 968 düzenlemek arrange 969 | 969 kamp camp 970 | 970 icat invent 971 | 971 pamuk cotton 972 | 972 doğmuş born 973 | 973 belirlemek determine 974 | 974 kuart quart 975 | 975 dokuz nine 976 | 976 kamyon truck 977 | 977 gürültü noise 978 | 978 seviyesi level 979 | 979 şans chance 980 | 980 toplamak gather 981 | 981 dükkan shop 982 | 982 streç stretch 983 | 983 atmak throw 984 | 984 parlaklık shine 985 | 985 mülkiyet property 986 | 986 sütun column 987 | 987 molekülü molecule 988 | 988 seçin select 989 | 989 yanlış wrong 990 | 990 gri gray 991 | 991 tekrar repeat 992 | 992 gerektirir require 993 | 993 geniş broad 994 | 994 hazırlamak prepare 995 | 995 tuz salt 996 | 996 burun nose 997 | 997 çoğul plural 998 | 998 öfke anger 999 | 999 iddia claim 1000 | 1000 kıta continent -------------------------------------------------------------------------------- /language/vi.tsv: -------------------------------------------------------------------------------- 1 | 1 như as 2 | 2 tôi I 3 | 3 mình his 4 | 4 mà that 5 | 5 ông he 6 | 6 là was 7 | 7 cho for 8 | 8 trên on 9 | 9 là are 10 | 10 với with 11 | 11 họ they 12 | 12 được be 13 | 13 tại at 14 | 14 một one 15 | 15 có have 16 | 16 này this 17 | 17 từ from 18 | 18 bởi by 19 | 19 nóng hot 20 | 20 từ word 21 | 21 nhưng but 22 | 22 những gì what 23 | 23 một số some 24 | 24 là is 25 | 25 nó it 26 | 26 anh you 27 | 27 hoặc or 28 | 28 có had 29 | 29 các the 30 | 30 của of 31 | 31 để to 32 | 32 và and 33 | 33 một a 34 | 34 trong in 35 | 35 chúng tôi we 36 | 36 có thể can 37 | 37 ra out 38 | 38 khác other 39 | 39 là were 40 | 40 mà which 41 | 41 làm do 42 | 42 của họ their 43 | 43 thời gian time 44 | 44 nếu if 45 | 45 sẽ will 46 | 46 như thế nào how 47 | 47 nói said 48 | 48 một an 49 | 49 môi each 50 | 50 nói tell 51 | 51 không does 52 | 52 bộ set 53 | 53 ba three 54 | 54 muốn want 55 | 55 không khí air 56 | 56 cũng well 57 | 57 cũng also 58 | 58 chơi play 59 | 59 nhỏ small 60 | 60 cuối end 61 | 61 đặt put 62 | 62 nhà home 63 | 63 đọc read 64 | 64 tay hand 65 | 65 cổng port 66 | 66 lớn large 67 | 67 chính tả spell 68 | 68 thêm add 69 | 69 thậm chí even 70 | 70 đất land 71 | 71 ở đây here 72 | 72 phải must 73 | 73 lớn big 74 | 74 cao high 75 | 75 như vậy such 76 | 76 theo follow 77 | 77 hành động act 78 | 78 lý do tại sao why 79 | 79 xin ask 80 | 80 người đàn ông men 81 | 81 thay đổi change 82 | 82 đi went 83 | 83 ánh sáng light 84 | 84 loại kind 85 | 85 tắt off 86 | 86 cần need 87 | 87 nhà house 88 | 88 hình ảnh picture 89 | 89 thử try 90 | 90 chúng tôi us 91 | 91 một lần nữa again 92 | 92 động vật animal 93 | 93 điểm point 94 | 94 mẹ mother 95 | 95 thế giới world 96 | 96 gần near 97 | 97 xây dựng build 98 | 98 tự self 99 | 99 đất earth 100 | 100 cha father 101 | 101 bất kỳ any 102 | 102 mới new 103 | 103 công việc work 104 | 104 một phần part 105 | 105 có take 106 | 106 được get 107 | 107 nơi place 108 | 108 thực hiện made 109 | 109 sống live 110 | 110 nơi where 111 | 111 sau khi after 112 | 112 trở lại back 113 | 113 ít little 114 | 114 chỉ only 115 | 115 chung quanh round 116 | 116 người đàn ông man 117 | 117 năm year 118 | 118 đến came 119 | 119 chương trình show 120 | 120 mỗi every 121 | 121 tốt good 122 | 122 tôi me 123 | 123 cung cấp cho give 124 | 124 của chúng tôi our 125 | 125 dưới under 126 | 126 tên name 127 | 127 rất very 128 | 128 thông qua through 129 | 129 chỉ just 130 | 130 hình thức form 131 | 131 câu sentence 132 | 132 tuyệt vời great 133 | 133 nghi think 134 | 134 nói say 135 | 135 giúp help 136 | 136 thấp low 137 | 137 dòng line 138 | 138 khác nhau differ 139 | 139 lần lượt turn 140 | 140 nguyên nhân cause 141 | 141 nhiều much 142 | 142 có nghĩa là mean 143 | 143 trước before 144 | 144 di chuyển move 145 | 145 ngay right 146 | 146 cậu bé boy 147 | 147 cũ old 148 | 148 quá too 149 | 149 như nhau same 150 | 150 cô she 151 | 151 tất cả all 152 | 152 có there 153 | 153 khi when 154 | 154 lên up 155 | 155 sử dụng use 156 | 156 của bạn your 157 | 157 cách way 158 | 158 về about 159 | 159 nhiều many 160 | 160 sau đó then 161 | 161 họ them 162 | 162 viết write 163 | 163 sẽ would 164 | 164 như like 165 | 165 để so 166 | 166 các these 167 | 167 cô her 168 | 168 lâu long 169 | 169 làm make 170 | 170 điều thing 171 | 171 thấy see 172 | 172 anh him 173 | 173 hai two 174 | 174 có has 175 | 175 xem look 176 | 176 hơn more 177 | 177 ngày day 178 | 178 có thể could 179 | 179 đi go 180 | 180 đến come 181 | 181 đã làm did 182 | 182 số number 183 | 183 âm thanh sound 184 | 184 không có no 185 | 185 nhất most 186 | 186 nhân dân people 187 | 187 của tôi my 188 | 188 hơn over 189 | 189 biết know 190 | 190 nước water 191 | 191 hơn than 192 | 192 gọi call 193 | 193 đầu tiên first 194 | 194 người who 195 | 195 có thể may 196 | 196 xuống down 197 | 197 bên side 198 | 198 được been 199 | 199 bây giờ now 200 | 200 tìm find 201 | 201 đầu head 202 | 202 đứng stand 203 | 203 riêng own 204 | 204 trang page 205 | 205 nên should 206 | 206 nước country 207 | 207 tìm thấy found 208 | 208 câu trả lời answer 209 | 209 trường school 210 | 210 phát triển grow 211 | 211 nghiên cứu study 212 | 212 vẫn still 213 | 213 học learn 214 | 214 nhà máy plant 215 | 215 bìa cover 216 | 216 thực phẩm food 217 | 217 ánh nắng mặt trời sun 218 | 218 bốn four 219 | 219 giữa between 220 | 220 nhà nước state 221 | 221 giữ keep 222 | 222 mắt eye 223 | 223 không bao giờ never 224 | 224 cuối cùng last 225 | 225 cho phép let 226 | 226 nghĩ thought 227 | 227 thành phố city 228 | 228 cây tree 229 | 229 qua cross 230 | 230 trang trại farm 231 | 231 cứng hard 232 | 232 bắt đầu start 233 | 233 might might 234 | 234 câu chuyện story 235 | 235 cưa saw 236 | 236 đến nay far 237 | 237 biển sea 238 | 238 vẽ draw 239 | 239 còn lại left 240 | 240 cuối late 241 | 241 chạy run 242 | 242 không don’t 243 | 243 trong khi while 244 | 244 báo chí press 245 | 245 gần close 246 | 246 đêm night 247 | 247 thực real 248 | 248 cuộc sống life 249 | 249 số few 250 | 250 phía bắc north 251 | 251 cuốn sách book 252 | 252 thực hiện carry 253 | 253 mất took 254 | 254 khoa học science 255 | 255 ăn eat 256 | 256 phòng room 257 | 257 người bạn friend 258 | 258 bắt đầu began 259 | 259 ý tưởng idea 260 | 260 cá fish 261 | 261 núi mountain 262 | 262 ngăn chặn stop 263 | 263 một lần once 264 | 264 cơ sở base 265 | 265 nghe hear 266 | 266 ngựa horse 267 | 267 cắt cut 268 | 268 chắc chắn sure 269 | 269 xem watch 270 | 270 màu color 271 | 271 khuôn mặt face 272 | 272 gỗ wood 273 | 273 chính main 274 | 274 mở open 275 | 275 dường như seem 276 | 276 cùng together 277 | 277 tiếp theo next 278 | 278 trắng white 279 | 279 trẻ em children 280 | 280 bắt đầu begin 281 | 281 có got 282 | 282 đi bộ walk 283 | 283 Ví dụ example 284 | 284 giảm bớt ease 285 | 285 giấy paper 286 | 286 nhóm group 287 | 287 luôn luôn always 288 | 288 nhạc music 289 | 289 những those 290 | 290 cả hai both 291 | 291 đánh dấu mark 292 | 292 thường often 293 | 293 thư letter 294 | 294 cho đến khi until 295 | 295 dặm mile 296 | 296 sông river 297 | 297 xe car 298 | 298 chân feet 299 | 299 chăm sóc care 300 | 300 thứ hai second 301 | 301 đủ enough 302 | 302 đồng bằng plain 303 | 303 cô gái girl 304 | 304 thông thường usual 305 | 305 trẻ young 306 | 306 sẵn sàng ready 307 | 307 trên đây above 308 | 308 bao giờ ever 309 | 309 màu đỏ red 310 | 310 danh sách list 311 | 311 mặc dù though 312 | 312 cảm thấy feel 313 | 313 nói chuyện talk 314 | 314 chim bird 315 | 315 sớm soon 316 | 316 cơ thể body 317 | 317 con chó dog 318 | 318 gia đình family 319 | 319 trực tiếp direct 320 | 320 đặt ra pose 321 | 321 lại leave 322 | 322 bài hát song 323 | 323 đo lường measure 324 | 324 cửa door 325 | 325 sản phẩm product 326 | 326 đen black 327 | 327 ngắn short 328 | 328 chữ số numeral 329 | 329 lớp class 330 | 330 gió wind 331 | 331 câu hỏi question 332 | 332 xảy ra happen 333 | 333 hoàn thành complete 334 | 334 tàu ship 335 | 335 khu vực area 336 | 336 một nửa half 337 | 337 đá rock 338 | 338 để order 339 | 339 lửa fire 340 | 340 nam south 341 | 341 vấn đề problem 342 | 342 mảnh piece 343 | 343 nói told 344 | 344 biết knew 345 | 345 vượt qua pass 346 | 346 từ since 347 | 347 đầu top 348 | 348 toàn bộ whole 349 | 349 vua king 350 | 350 đường phố street 351 | 351 inch inch 352 | 352 nhân multiply 353 | 353 không có gì nothing 354 | 354 Tất nhiên course 355 | 355 ở lại stay 356 | 356 bánh xe wheel 357 | 357 đầy đủ full 358 | 358 lực force 359 | 359 màu xanh blue 360 | 360 đối tượng object 361 | 361 quyết định decide 362 | 362 bề mặt surface 363 | 363 sâu deep 364 | 364 mặt trăng moon 365 | 365 đảo island 366 | 366 chân foot 367 | 367 hệ thống system 368 | 368 bận rộn busy 369 | 369 kiểm tra test 370 | 370 ghi record 371 | 371 thuyền boat 372 | 372 phổ biến common 373 | 373 vàng gold 374 | 374 có thể possible 375 | 375 máy bay plane 376 | 376 thay stead 377 | 377 khô dry 378 | 378 tự hỏi wonder 379 | 379 cười laugh 380 | 380 ngàn thousand 381 | 381 trước ago 382 | 382 ran ran 383 | 383 kiểm tra check 384 | 384 trò chơi game 385 | 385 hình dạng shape 386 | 386 đánh đồng equate 387 | 387 nóng hot 388 | 388 bỏ lỡ miss 389 | 389 mang brought 390 | 390 nhiệt heat 391 | 391 tuyết snow 392 | 392 lốp xe tire 393 | 393 mang lại bring 394 | 394 vâng yes 395 | 395 xa distant 396 | 396 điền fill 397 | 397 đông east 398 | 398 sơn paint 399 | 399 ngôn ngữ language 400 | 400 trong among 401 | 401 đơn vị unit 402 | 402 điện power 403 | 403 thị trấn town 404 | 404 tốt fine 405 | 405 nhất định certain 406 | 406 bay fly 407 | 407 giảm fall 408 | 408 dẫn lead 409 | 409 kêu cry 410 | 410 tối dark 411 | 411 máy machine 412 | 412 ghi note 413 | 413 đợi wait 414 | 414 kế hoạch plan 415 | 415 con số figure 416 | 416 sao star 417 | 417 hộp box 418 | 418 danh từ noun 419 | 419 lĩnh vực field 420 | 420 phần còn lại rest 421 | 421 chính xác correct 422 | 422 thể able 423 | 423 bảng pound 424 | 424 Xong done 425 | 425 vẻ đẹp beauty 426 | 426 ổ đĩa drive 427 | 427 đứng stood 428 | 428 chứa contain 429 | 429 trước front 430 | 430 dạy teach 431 | 431 tuần week 432 | 432 thức final 433 | 433 đã gave 434 | 434 màu xanh lá cây green 435 | 435 oh oh 436 | 436 nhanh chóng quick 437 | 437 phát triển develop 438 | 438 đại dương ocean 439 | 439 ấm áp warm 440 | 440 miễn phí free 441 | 441 phút minute 442 | 442 mạnh mẽ strong 443 | 443 đặc biệt special 444 | 444 tâm mind 445 | 445 sau behind 446 | 446 trong clear 447 | 447 đuôi tail 448 | 448 sản xuất produce 449 | 449 thực tế fact 450 | 450 không gian space 451 | 451 nghe heard 452 | 452 tốt nhất best 453 | 453 giờ hour 454 | 454 tốt hơn better 455 | 455 đúng true 456 | 456 trong khi during 457 | 457 trăm hundred 458 | 458 năm five 459 | 459 nhớ remember 460 | 460 bước step 461 | 461 đầu early 462 | 462 giư hold 463 | 463 tây west 464 | 464 mặt đất ground 465 | 465 quan tâm interest 466 | 466 đạt reach 467 | 467 nhanh chóng fast 468 | 468 động từ verb 469 | 469 hát sing 470 | 470 lắng nghe listen 471 | 471 sáu six 472 | 472 bảng table 473 | 473 du lịch travel 474 | 474 ít less 475 | 475 buổi sáng morning 476 | 476 mười ten 477 | 477 đơn giản simple 478 | 478 nhiều several 479 | 479 nguyên âm vowel 480 | 480 hướng toward 481 | 481 chiến tranh war 482 | 482 đặt lay 483 | 483 chống lại against 484 | 484 mô hình pattern 485 | 485 chậm slow 486 | 486 trung tâm center 487 | 487 tình yêu love 488 | 488 người person 489 | 489 tiền money 490 | 490 phục vụ serve 491 | 491 xuất hiện appear 492 | 492 đường road 493 | 493 Bản đồ map 494 | 494 mưa rain 495 | 495 quy tắc rule 496 | 496 phối govern 497 | 497 kéo pull 498 | 498 lạnh cold 499 | 499 thông báo notice 500 | 500 giọng nói voice 501 | 501 năng lượng energy 502 | 502 săn hunt 503 | 503 có thể xảy ra probable 504 | 504 giường bed 505 | 505 anh trai brother 506 | 506 trứng egg 507 | 507 đi xe ride 508 | 508 pin cell 509 | 509 tin believe 510 | 510 có lẽ perhaps 511 | 511 chọn pick 512 | 512 đột ngột sudden 513 | 513 tính count 514 | 514 vuông square 515 | 515 lý do reason 516 | 516 chiều dài length 517 | 517 đại diện represent 518 | 518 nghệ thuật art 519 | 519 Tiêu đề subject 520 | 520 khu region 521 | 521 kích thước size 522 | 522 khác nhau vary 523 | 523 giải quyết settle 524 | 524 nói speak 525 | 525 trọng lượng weight 526 | 526 chung general 527 | 527 băng ice 528 | 528 vấn đề matter 529 | 529 vòng tròn circle 530 | 530 đôi pair 531 | 531 bao gồm include 532 | 532 chia divide 533 | 533 âm tiết syllable 534 | 534 cảm thấy felt 535 | 535 lớn grand 536 | 536 bóng ball 537 | 537 nhưng yet 538 | 538 sóng wave 539 | 539 rơi drop 540 | 540 tim heart 541 | 541 là am 542 | 542 hiện nay present 543 | 543 nặng heavy 544 | 544 khiêu vũ dance 545 | 545 động cơ engine 546 | 546 vị trí position 547 | 547 cánh tay arm 548 | 548 rộng wide 549 | 549 buồm sail 550 | 550 tài liệu material 551 | 551 phần fraction 552 | 552 rừng forest 553 | 553 ngồi sit 554 | 554 cuộc đua race 555 | 555 cửa sổ window 556 | 556 cửa hàng store 557 | 557 mùa hè summer 558 | 558 đào tạo train 559 | 559 ngủ sleep 560 | 560 chứng minh prove 561 | 561 đơn độc lone 562 | 562 chân leg 563 | 563 tập thể dục exercise 564 | 564 tường wall 565 | 565 bắt catch 566 | 566 mount mount 567 | 567 muốn wish 568 | 568 bầu trời sky 569 | 569 hội đồng quản trị board 570 | 570 niềm vui joy 571 | 571 mùa đông winter 572 | 572 ngồi sat 573 | 573 bằng văn bản written 574 | 574 hoang dã wild 575 | 575 cụ instrument 576 | 576 giữ kept 577 | 577 kính glass 578 | 578 cỏ grass 579 | 579 bò cow 580 | 580 công việc job 581 | 581 cạnh edge 582 | 582 dấu hiệu sign 583 | 583 lần visit 584 | 584 qua past 585 | 585 mềm soft 586 | 586 vui vẻ fun 587 | 587 sáng bright 588 | 588 khí gas 589 | 589 thời tiết weather 590 | 590 tháng month 591 | 591 triệu million 592 | 592 chịu bear 593 | 593 kết thúc finish 594 | 594 hạnh phúc happy 595 | 595 hy vọng hope 596 | 596 hoa flower 597 | 597 mặc clothe 598 | 598 lạ strange 599 | 599 ra đi gone 600 | 600 thương mại trade 601 | 601 giai điệu melody 602 | 602 chuyến đi trip 603 | 603 văn phòng office 604 | 604 nhận receive 605 | 605 hàng row 606 | 606 miệng mouth 607 | 607 chính xác exact 608 | 608 biểu tượng symbol 609 | 609 chết die 610 | 610 nhất least 611 | 611 rắc rối trouble 612 | 612 hét lên shout 613 | 613 trừ except 614 | 614 đã viết wrote 615 | 615 hạt giống seed 616 | 616 giai điệu tone 617 | 617 tham gia join 618 | 618 đề nghị suggest 619 | 619 sạch clean 620 | 620 nghỉ break 621 | 621 phụ nữ lady 622 | 622 sân yard 623 | 623 tăng rise 624 | 624 xấu bad 625 | 625 đòn blow 626 | 626 dầu oil 627 | 627 máu blood 628 | 628 chạm touch 629 | 629 tăng grew 630 | 630 phần trăm cent 631 | 631 trộn mix 632 | 632 đội team 633 | 633 dây wire 634 | 634 chi phí cost 635 | 635 thua lost 636 | 636 nâu brown 637 | 637 mặc wear 638 | 638 vườn garden 639 | 639 như nhau equal 640 | 640 gửi sent 641 | 641 chọn choose 642 | 642 giảm fell 643 | 643 phù hợp với fit 644 | 644 chảy flow 645 | 645 công bằng fair 646 | 646 ngân hàng bank 647 | 647 thu thập collect 648 | 648 lưu save 649 | 649 kiểm soát control 650 | 650 số thập phân decimal 651 | 651 tai ear 652 | 652 khác else 653 | 653 khá quite 654 | 654 đã phá vỡ broke 655 | 655 khi case 656 | 656 trung middle 657 | 657 giết kill 658 | 658 con trai son 659 | 659 hồ lake 660 | 660 thời điểm moment 661 | 661 quy mô scale 662 | 662 lớn loud 663 | 663 mùa xuân spring 664 | 664 quan sát observe 665 | 665 con child 666 | 666 thẳng straight 667 | 667 phụ âm consonant 668 | 668 quốc gia nation 669 | 669 từ điển dictionary 670 | 670 sưa milk 671 | 671 tốc độ speed 672 | 672 phương pháp method 673 | 673 cơ quan organ 674 | 674 trả pay 675 | 675 tuổi age 676 | 676 phần section 677 | 677 váy dress 678 | 678 điện toán đám mây cloud 679 | 679 bất ngờ surprise 680 | 680 yên tĩnh quiet 681 | 681 đá stone 682 | 682 nhỏ tiny 683 | 683 lên cao climb 684 | 684 mát mẻ cool 685 | 685 thiết kế design 686 | 686 người nghèo poor 687 | 687 rất nhiều lot 688 | 688 thí nghiệm experiment 689 | 689 dưới bottom 690 | 690 chính key 691 | 691 sắt iron 692 | 692 đơn single 693 | 693 thanh stick 694 | 694 phẳng flat 695 | 695 hai mươi twenty 696 | 696 da skin 697 | 697 nụ cười smile 698 | 698 nếp crease 699 | 699 lỗ hole 700 | 700 nhảy jump 701 | 701 bé baby 702 | 702 tám eight 703 | 703 làng village 704 | 704 đáp ứng meet 705 | 705 gốc root 706 | 706 mua buy 707 | 707 nâng cao raise 708 | 708 giải quyết solve 709 | 709 kim loại metal 710 | 710 liệu whether 711 | 711 đẩy push 712 | 712 bảy seven 713 | 713 đoạn paragraph 714 | 714 thứ ba third 715 | 715 có trách nhiệm shall 716 | 716 được tổ chức held 717 | 717 lông hair 718 | 718 mô tả describe 719 | 719 nấu ăn cook 720 | 720 sàn floor 721 | 721 hoặc either 722 | 722 kết quả result 723 | 723 ghi burn 724 | 724 đồi hill 725 | 725 an toàn safe 726 | 726 mèo cat 727 | 727 thế kỷ century 728 | 728 xem xét consider 729 | 729 loại type 730 | 730 pháp luật law 731 | 731 bit bit 732 | 732 bờ biển coast 733 | 733 bản sao copy 734 | 734 cụm từ phrase 735 | 735 im lặng silent 736 | 736 cao tall 737 | 737 cát sand 738 | 738 đất soil 739 | 739 cuộn roll 740 | 740 nhiệt độ temperature 741 | 741 ngón tay finger 742 | 742 ngành công nghiệp industry 743 | 743 giá trị value 744 | 744 cuộc chiến fight 745 | 745 lời nói dối lie 746 | 746 đánh bại beat 747 | 747 kích thích excite 748 | 748 tự nhiên natural 749 | 749 xem view 750 | 750 ý nghĩa sense 751 | 751 vốn capital 752 | 752 sẽ không won’t 753 | 753 ghế chair 754 | 754 nguy hiểm danger 755 | 755 trái cây fruit 756 | 756 giàu rich 757 | 757 dày thick 758 | 758 người lính soldier 759 | 759 quá trình process 760 | 760 hoạt động operate 761 | 761 thực hành practice 762 | 762 riêng biệt separate 763 | 763 khó khăn difficult 764 | 764 bác sĩ doctor 765 | 765 xin vui lòng please 766 | 766 bảo vệ protect 767 | 767 trưa noon 768 | 768 cây trồng crop 769 | 769 hiện đại modern 770 | 770 yếu tố element 771 | 771 nhấn hit 772 | 772 sinh viên student 773 | 773 góc corner 774 | 774 bên party 775 | 775 cung cấp supply 776 | 776 có whose 777 | 777 xác định vị trí locate 778 | 778 vòng ring 779 | 779 nhân vật character 780 | 780 côn trùng insect 781 | 781 bắt caught 782 | 782 thời gian period 783 | 783 chỉ ra indicate 784 | 784 radio radio 785 | 785 nói spoke 786 | 786 nguyên tử atom 787 | 787 con người human 788 | 788 lịch sử history 789 | 789 hiệu lực effect 790 | 790 điện electric 791 | 791 mong đợi expect 792 | 792 xương bone 793 | 793 đường sắt rail 794 | 794 tưởng tượng imagine 795 | 795 cho provide 796 | 796 đồng ý agree 797 | 797 do đó thus 798 | 798 nhẹ nhàng gentle 799 | 799 người phụ nữ woman 800 | 800 đội trưởng captain 801 | 801 đoán guess 802 | 802 cần thiết necessary 803 | 803 sắc nét sharp 804 | 804 cánh wing 805 | 805 tạo create 806 | 806 hàng xóm neighbor 807 | 807 rửa wash 808 | 808 bat bat 809 | 809 thay rather 810 | 810 đám đông crowd 811 | 811 ngô corn 812 | 812 so sánh compare 813 | 813 bài thơ poem 814 | 814 chuỗi string 815 | 815 chuông bell 816 | 816 phụ thuộc depend 817 | 817 thịt meat 818 | 818 chà rub 819 | 819 ống tube 820 | 820 nổi tiếng famous 821 | 921 đồng đô la dollar 822 | 822 sông stream 823 | 823 sợ hãi fear 824 | 284 cảnh sight 825 | 825 mỏng thin 826 | 826 tam giác triangle 827 | 827 hành tinh planet 828 | 828 nhanh hurry 829 | 829 trưởng chief 830 | 830 thuộc địa colony 831 | 831 đồng hồ clock 832 | 832 tôi mine 833 | 833 cà vạt tie 834 | 834 nhập enter 835 | 835 chính major 836 | 836 tươi fresh 837 | 837 tìm kiếm search 838 | 838 gửi send 839 | 839 vàng yellow 840 | 840 súng gun 841 | 841 cho phép allow 842 | 842 in print 843 | 843 chết dead 844 | 844 tại chỗ spot 845 | 845 sa mạc desert 846 | 846 phù hợp với suit 847 | 847 hiện tại current 848 | 848 thang máy lift 849 | 840 tăng rose 850 | 850 đến arrive 851 | 851 chủ master 852 | 852 theo dõi track 853 | 853 mẹ parent 854 | 854 bờ shore 855 | 855 phân chia division 856 | 856 tờ sheet 857 | 857 chất substance 858 | 858 ủng hộ favor 859 | 859 kết nối connect 860 | 860 bài post 861 | 861 chi tiêu spend 862 | 862 hợp âm chord 863 | 863 chất béo fat 864 | 864 vui glad 865 | 865 ban đầu original 866 | 866 chia sẻ share 867 | 867 trạm station 868 | 868 cha dad 869 | 869 bánh mì bread 870 | 870 phí charge 871 | 871 thích hợp proper 872 | 872 thanh bar 873 | 873 phục vụ offer 874 | 874 phân khúc segment 875 | 875 nô lệ slave 876 | 876 vịt duck 877 | 877 ngay lập tức instant 878 | 878 thị trường market 879 | 879 mức độ degree 880 | 880 cư populate 881 | 881 gà chick 882 | 882 thân yêu dear 883 | 883 kẻ thù enemy 884 | 884 trả lời reply 885 | 885 ly drink 886 | 886 xảy ra occur 887 | 887 hỗ trợ support 888 | 888 bài phát biểu speech 889 | 889 thiên nhiên nature 890 | 890 phạm vi range 891 | 891 hơi nước steam 892 | 892 chuyển động motion 893 | 893 con đường path 894 | 894 chất lỏng liquid 895 | 895 đăng nhập log 896 | 896 có nghĩa là meant 897 | 897 thương quotient 898 | 898 răng teeth 899 | 899 vỏ shell 900 | 900 cổ neck 901 | 901 oxy oxygen 902 | 902 đường sugar 903 | 903 chết death 904 | 904 khá pretty 905 | 905 kỹ năng skill 906 | 906 phụ nữ women 907 | 907 mùa season 908 | 908 giải pháp solution 909 | 909 nam châm magnet 910 | 910 bạc silver 911 | 911 cảm ơn thank 912 | 912 chi nhánh branch 913 | 913 trận đấu match 914 | 914 hậu tố suffix 915 | 915 đặc biệt là especially 916 | 916 sung fig 917 | 917 sợ afraid 918 | 918 to huge 919 | 919 em gái sister 920 | 920 thép steel 921 | 921 thảo luận discuss 922 | 922 về phía trước forward 923 | 923 tương tự similar 924 | 924 hướng dẫn guide 925 | 925 kinh nghiệm experience 926 | 926 điểm score 927 | 927 táo apple 928 | 928 mua bought 929 | 929 dẫn led 930 | 930 sân pitch 931 | 931 áo coat 932 | 932 khối lượng mass 933 | 933 thẻ card 934 | 934 ban nhạc band 935 | 935 dây rope 936 | 936 trượt slip 937 | 937 giành chiến thắng win 938 | 938 mơ dream 939 | 939 buổi tối evening 940 | 940 điều kiện condition 941 | 941 thức ăn chăn nuôi feed 942 | 942 công cụ tool 943 | 943 tổng số total 944 | 944 cơ bản basic 945 | 945 mùi smell 946 | 946 thung lũng valley 947 | 947 cũng không nor 948 | 948 đôi double 949 | 949 ghế seat 950 | 950 tiếp tục continue 951 | 951 khối block 952 | 952 biểu đồ chart 953 | 953 mũ hat 954 | 954 bán sell 955 | 955 thành công success 956 | 956 công ty company 957 | 957 trừ subtract 958 | 958 sự kiện event 959 | 959 riêng particular 960 | 960 thỏa thuận deal 961 | 961 bơi swim 962 | 962 hạn term 963 | 963 ngược lại opposite 964 | 964 vợ wife 965 | 965 giày shoe 966 | 966 vai shoulder 967 | 967 lây lan spread 968 | 968 sắp xếp arrange 969 | 969 trại camp 970 | 970 phát minh invent 971 | 971 bông cotton 972 | 972 Sinh born 973 | 973 xác định determine 974 | 974 lít quart 975 | 975 chín nine 976 | 976 xe tải truck 977 | 977 tiếng ồn noise 978 | 978 mức level 979 | 979 cơ hội chance 980 | 980 thu thập gather 981 | 981 cửa hàng shop 982 | 982 căng ra stretch 983 | 983 ném throw 984 | 984 tỏa sáng shine 985 | 985 tài sản property 986 | 986 cột column 987 | 987 phân tử molecule 988 | 988 chọn select 989 | 989 sai wrong 990 | 990 màu xám gray 991 | 991 lặp lại repeat 992 | 992 yêu cầu require 993 | 993 rộng broad 994 | 994 chuẩn bị prepare 995 | 995 muối salt 996 | 996 mui nose 997 | 997 số nhiều plural 998 | 998 tức giận anger 999 | 999 xin claim 1000 | 1000 lục continent -------------------------------------------------------------------------------- /named-name-recognition/arabic-names.txt: -------------------------------------------------------------------------------- 1 | abbad 2 | abbas 3 | abd 4 | al-uzza 5 | abdus 6 | salam 7 | manaf 8 | rabbo 9 | abdel 10 | fattah 11 | nour 12 | abdi 13 | abdolreza 14 | abdu 15 | abdul 16 | ahad 17 | ali 18 | alim 19 | azim 20 | al-aziz 21 | baqi 22 | bari 23 | basir 24 | basit 25 | ghaffar 26 | ghani 27 | hadi 28 | hafiz 29 | hai 30 | hakim 31 | halim 32 | hamid 33 | haq 34 | hussein 35 | jabbar 36 | jalil 37 | jamil 38 | karim 39 | khaliq 40 | latif 41 | majid 42 | malik 43 | mannan 44 | monem 45 | muttalib 46 | qadir 47 | qayyum 48 | quddus 49 | rashid 50 | samad 51 | sattar 52 | wadud 53 | wahhab 54 | wahid 55 | zahir 56 | zahra 57 | abdullah 58 | abdur 59 | rab 60 | rahim 61 | al-rahman 62 | raqib 63 | rauf 64 | razzaq 65 | sabur 66 | shakur 67 | abid 68 | abidin 69 | abo 70 | ab 71 | abu 72 | al-qasim 73 | bakr 74 | hafs 75 | hamza 76 | nasir 77 | nasr 78 | abu'l-fadl 79 | adam 80 | adeel 81 | adeem 82 | adem 83 | aden 84 | adham 85 | adib 86 | adil 87 | adir 88 | adli 89 | adnan 90 | afif 91 | ahmad 92 | ahmed 93 | tijani 94 | ahsan 95 | akeem 96 | akif 97 | akram 98 | alaa 99 | aladdin 100 | naqi 101 | reza 102 | aman 103 | amanullah 104 | amer 105 | amin 106 | al-din 107 | aminullah 108 | amir 109 | amjad 110 | ammar 111 | amr 112 | anas 113 | anis 114 | anisur 115 | rahman 116 | anjem 117 | anwar 118 | anwaruddin 119 | aqeel 120 | ari 121 | arif 122 | asad 123 | asadullah 124 | asem 125 | asghar 126 | ashraf 127 | asif 128 | asil 129 | islam 130 | ataullah 131 | atif 132 | atiq 133 | atiqullah 134 | awad 135 | ayad 136 | ayman/aiman/aimen/aymen 137 | ayub 138 | azem 139 | azeem 140 | azhar 141 | azimullah 142 | aziz 143 | azizullah 144 | azizur 145 | azmi 146 | azus 147 | badi 148 | badr 149 | bagher 150 | baha 151 | baha' 152 | bahri 153 | baki 154 | bakir 155 | bara 156 | barkat 157 | barkatullah 158 | bashar 159 | bashir 160 | basri 161 | bilal 162 | bilel 163 | billah 164 | boualem 165 | boulos 166 | boutros 167 | brahim 168 | burhan 169 | caden 170 | chadli 171 | daniel/danyal 172 | dastgir 173 | daud 174 | dawoud 175 | dhikrullah 176 | ehsanullah 177 | ekram 178 | fadel 179 | fahd 180 | faheem 181 | fahmi 182 | fahri 183 | faisal 184 | faiz 185 | faizan 186 | faizullah 187 | fakhr 188 | fakhraddin 189 | fakhruddin 190 | faqir 191 | faraj 192 | farhat 193 | farid 194 | fariduddin 195 | faris 196 | farooq 197 | fasih 198 | fathallah 199 | fathi 200 | fatin 201 | fawaz 202 | fawzi 203 | fayez 204 | fazel 205 | fazl 206 | ur 207 | fazlallah 208 | fazli 209 | fazlul 210 | fikri 211 | fouzan 212 | fouad 213 | fuad 214 | furkan 215 | gaffar 216 | gamil 217 | ghanem 218 | ghassan 219 | ghiyath 220 | ghulam 221 | faruq 222 | mohiuddin 223 | gulzar 224 | habib 225 | habibullah 226 | hadem 227 | hadid 228 | hafeez 229 | hafizullah 230 | haitham 231 | hajj 232 | hajji 233 | hakam 234 | haldun 235 | hamdan 236 | hamdi 237 | hamidullah 238 | hani 239 | harbi 240 | hanif 241 | harun 242 | hashem 243 | hashim 244 | hasib 245 | hassan 246 | hassim 247 | hatem 248 | hayatullah 249 | haydar 250 | hazem 251 | hibat 252 | allah 253 | hichem 254 | hidayatullah 255 | hikmat 256 | hilmi 257 | hisham 258 | ud-din 259 | hossam 260 | hurairah 261 | husam 262 | ad-din 263 | ibrahim 264 | ibro 265 | idris 266 | ihab 267 | ihsan 268 | ikhtiyar 269 | ikramullah 270 | ikrimah 271 | ilyas 272 | imad 273 | imran 274 | imtiaz 275 | inaam 276 | inam-ul-haq 277 | inayatullah 278 | iqbal 279 | irfan 280 | isa 281 | ishak 282 | issam 283 | ishtiaq 284 | iskandar 285 | ismail 286 | ismat 287 | ismatullah 288 | izz 289 | izzat 290 | izzatullah 291 | ja'far 292 | jabal 293 | jaber 294 | jabir 295 | jabr 296 | jahid 297 | jalal 298 | jamal 299 | jameel 300 | jarrah 301 | jasem 302 | jawad 303 | jawdat 304 | jihad 305 | jubayr 306 | junayd 307 | jurj 308 | min 309 | ka'b 310 | kadeem 311 | kadir 312 | kadri 313 | kafeel 314 | kamal 315 | kamil 316 | karem 317 | kashif 318 | kazem 319 | khadem 320 | khair 321 | khakim 322 | khalfan 323 | khalid 324 | khalifah 325 | khalil 326 | khalil-ur-rehman 327 | khamis 328 | kulthum 329 | labib 330 | lalji 331 | luay 332 | lutfullah 333 | lutfur 334 | mahalati 335 | mahbubur 336 | mahdi 337 | mahfuz 338 | mahir 339 | mahmud 340 | mamdouh 341 | mansur 342 | manzur 343 | marwan 344 | mashallah 345 | masoud 346 | maytham 347 | mehdi 348 | melhem 349 | michel 350 | midhat 351 | mizanur 352 | moatassem 353 | moeen 354 | moemen 355 | mohammad 356 | taqi 357 | mohannad 358 | mohy 359 | moin 360 | moinuddin 361 | mojtaba 362 | moncef 363 | moneim 364 | mu'iz 365 | mua'dh 366 | muammer 367 | mubarak 368 | muhammad 369 | muharrem 370 | muhibullah 371 | muhsin 372 | mukhtar 373 | mumtaz 374 | munib 375 | munif 376 | munir 377 | murad 378 | murtaza 379 | musa 380 | muslim 381 | mustafa 382 | muzaffar 383 | nabih 384 | nabil 385 | nadeem 386 | nader 387 | nadir 388 | nadur 389 | naguib 390 | nahyan 391 | naif 392 | naim 393 | naji 394 | najib 395 | najibullah 396 | najim 397 | najm 398 | naqibullah 399 | naseeb 400 | naseer 401 | nasim 402 | nasrallah 403 | nasri 404 | nasser 405 | nassif 406 | nasuh 407 | nawaf 408 | nawaz 409 | nowfal 410 | nazif 411 | nazim 412 | nazimuddin 413 | nazmi 414 | nihad 415 | nimatullah 416 | nizam 417 | nu'man 418 | numan 419 | nuh 420 | nur 421 | nuri 422 | nurullah 423 | nusrat 424 | omar 425 | osama 426 | othman 427 | qaid 428 | qamar 429 | qasim 430 | qasymbek 431 | qudratullah 432 | qusay 433 | qutb 434 | rabih 435 | raed 436 | rafiq 437 | rahmatullah 438 | rahmi 439 | rajab 440 | rajaei 441 | raji 442 | ramiz 443 | ramzan 444 | rakibul 445 | rakib 446 | ramzi 447 | rashad 448 | rasul 449 | rayan 450 | redouane 451 | riad 452 | riaz 453 | ridwan 454 | rifat 455 | rizqallah 456 | ruhi 457 | ruhullah 458 | rukn 459 | rushdi 460 | sa'd 461 | seif 462 | ilislam 463 | sa‘id 464 | saud 465 | saad 466 | saadallah 467 | sabri 468 | sabah 469 | saddam 470 | sadik 471 | sadr 472 | safi 473 | safiullah 474 | sahir 475 | saif 476 | saifullah 477 | saifur 478 | sajid 479 | sajjad 480 | salah 481 | saleh 482 | salem 483 | salim 484 | salman 485 | samadu 486 | samee 487 | samer 488 | sami 489 | samir 490 | samirah 491 | samiullah 492 | sanaullah 493 | saqib 494 | sardar 495 | sarmad 496 | satam 497 | sayf 498 | sayyid 499 | shaban 500 | shad 501 | shafiq 502 | shafiqullah 503 | shahid 504 | shahrukh 505 | shakeel 506 | shakir 507 | shams 508 | shamsur 509 | sharaf 510 | sharifullah 511 | shawkat 512 | shawki 513 | shiraz 514 | shihab 515 | shujauddin 516 | shukri 517 | sidique 518 | sidqi 519 | sirajuddin 520 | suhail 521 | suleiman 522 | sultan 523 | shaheed 524 | taha 525 | taher 526 | tahmid 527 | tahsin 528 | talal 529 | talat 530 | talhah 531 | talib 532 | tarazi 533 | tariq 534 | tawfik 535 | tayeb 536 | tayfur 537 | tufail 538 | turki 539 | talin 540 | ubay 541 | ubayd 542 | uday 543 | uthman 544 | usama 545 | wadih 546 | wael 547 | wafi 548 | wahed 549 | wajdi 550 | wajid 551 | waleed 552 | waliullah 553 | wasim 554 | wazir 555 | wissem 556 | yacine 557 | yadollah 558 | yahya 559 | yakub 560 | yahir 561 | yasser 562 | yunus 563 | yusha 564 | yusuf 565 | zafar 566 | zafarullah 567 | zafer 568 | zahed 569 | zaid 570 | zaim 571 | zainal 572 | zakariya 573 | zaki 574 | zane 575 | zayn 576 | zeeshan 577 | ziad 578 | ziauddin 579 | ziaur 580 | zubayr 581 | abeer 582 | abiha 583 | adela 584 | afaf 585 | afreen 586 | aisha 587 | aliya 588 | alya 589 | amalia 590 | amina 591 | amira 592 | arwa 593 | ashraqat 594 | ashfa 595 | asma 596 | atikah 597 | aya 598 | azra 599 | aziza 600 | boutheina 601 | bushra 602 | besma 603 | chaima 604 | dalal 605 | dalia 606 | danielle 607 | dana 608 | dareen 609 | dina 610 | eliana 611 | esma 612 | eva 613 | farah 614 | farida 615 | fatima 616 | feyrouz 617 | fouz 618 | habiba 619 | hafsa 620 | hajra 621 | hala 622 | halima 623 | hamida 624 | hana 625 | hanifa 626 | havva 627 | hawa 628 | hayat 629 | hessa 630 | huda 631 | ibtisam 632 | ireen 633 | jamila 634 | jana 635 | jawahir 636 | jena 637 | jennifer 638 | jessica 639 | joelle 640 | joud 641 | jouri 642 | julia 643 | jumana 644 | kareena 645 | karima 646 | katya 647 | khadija 648 | khairunnisa 649 | khawlah 650 | lama 651 | lamia 652 | lana 653 | lara 654 | latifa 655 | layan 656 | leila 657 | lina 658 | lulwa 659 | madiha 660 | maha 661 | mahmuna 662 | mai 663 | malika 664 | maria 665 | marwa 666 | maryam 667 | maya 668 | maysoon 669 | melek 670 | melissa 671 | mila 672 | mira 673 | mirna 674 | mona 675 | munira 676 | nadia 677 | nadine 678 | nafisa 679 | nahla 680 | naila 681 | naima 682 | najat 683 | najma 684 | nalini 685 | nasrin 686 | natasha 687 | nathalie 688 | nawal 689 | nayla 690 | naziha 691 | nazira 692 | nehal 693 | nejla 694 | nermin 695 | nezha 696 | nina 697 | qistina 698 | rabia 699 | rahima 700 | rana 701 | rania 702 | rashida 703 | reem 704 | riffat 705 | rimas 706 | rita 707 | ruqayya 708 | saadia 709 | sabiha 710 | subiksha 711 | safaa 712 | safiya 713 | sahar 714 | saida 715 | saira 716 | sajida 717 | sakhra 718 | sakina 719 | salma 720 | samar 721 | samira 722 | samiya 723 | sandra 724 | sania 725 | sarah 726 | selma 727 | shahd 728 | shakira 729 | shatha 730 | sherine 731 | shumaila 732 | suha 733 | sumaya 734 | sonia 735 | soraida 736 | tahira 737 | tala 738 | tamara 739 | tanisha 740 | tanya 741 | tara 742 | taslima 743 | tina 744 | tuba 745 | umm 746 | wafaa 747 | waliya 748 | wajahat 749 | wajhat 750 | yamina 751 | yara 752 | yasmin 753 | zalayha 754 | zakiya 755 | zaynab 756 | zuhal 757 | zoraida 758 | amal 759 | chadi 760 | farrah 761 | laden 762 | nakia 763 | noor 764 | nair 765 | zain 766 | zia 767 | zial -------------------------------------------------------------------------------- /named-name-recognition/french-names.txt: -------------------------------------------------------------------------------- 1 | Gabriel 2 | Léo 3 | Raphaël 4 | Arthur 5 | Louis 6 | Lucas 7 | Adam 8 | Jules 9 | Hugo 10 | Maël 11 | Liam 12 | Noah 13 | Paul 14 | Ethan 15 | Tiago 16 | Sacha 17 | Gabin 18 | Nathan 19 | Mohamed 20 | Aaron 21 | Tom 22 | Eden 23 | Théo 24 | Noé 25 | Léon 26 | Martin 27 | Mathis 28 | Nolan 29 | Victor 30 | Timéo 31 | Enzo 32 | Marius 33 | Axel 34 | Antoine 35 | Robin 36 | Isaac 37 | Naël 38 | Amir 39 | Valentin 40 | Rayan 41 | Augustin 42 | Ayden 43 | Clément 44 | Eliott 45 | Samuel 46 | Marceau 47 | Baptiste 48 | Gaspard 49 | Maxence 50 | Yanis 51 | Malo 52 | Ibrahim 53 | Sohan 54 | Maxime 55 | Evan 56 | Nino 57 | Mathéo 58 | Simon 59 | Lyam 60 | Alexandre 61 | Imran 62 | Naïm 63 | Kaïs 64 | Camille 65 | Thomas 66 | Ismaël 67 | Milo 68 | Côme 69 | Owen 70 | Lenny 71 | Soan 72 | Ilyan 73 | Kylian 74 | Noa 75 | Ilyes 76 | Oscar 77 | Léandre 78 | Pablo 79 | Diego 80 | Mathys 81 | Joseph 82 | Ayoub 83 | Youssef 84 | Wassim 85 | Noam 86 | Adem 87 | William 88 | Ali 89 | Basile 90 | Charles 91 | Antonin 92 | Thiago 93 | Logan 94 | Adrien 95 | Marin 96 | Jean 97 | Charly 98 | Esteban 99 | Noham 100 | Elio 101 | Charlie 102 | Auguste 103 | Timothée 104 | Alexis 105 | Milan 106 | David 107 | Issa 108 | Lorenzo 109 | Younes 110 | Amine 111 | Mahé 112 | Rafael 113 | Benjamin 114 | Kenzo 115 | Abel 116 | Anas 117 | Lucien 118 | Alessio 119 | Roméo 120 | Sandro 121 | Livio 122 | Mattéo 123 | Malone 124 | Rafaël 125 | Aymen 126 | Elias 127 | Louka 128 | Nathanaël 129 | Sasha 130 | Nahil 131 | Mathias 132 | Hamza 133 | Yassine 134 | Léonard 135 | Achille 136 | Pierre 137 | Ahmed 138 | Aylan 139 | Ewen 140 | Julian 141 | Ilan 142 | Ezio 143 | Thibault 144 | Timothé 145 | Zakaria 146 | Bastien 147 | Marcel 148 | Ruben 149 | Ulysse 150 | Maé 151 | Hayden 152 | Andrea 153 | Nassim 154 | Daniel 155 | Haroun 156 | Arsène 157 | Swan 158 | Mehdi 159 | Jayden 160 | Armand 161 | Alban 162 | Amaury 163 | Luka 164 | Matéo 165 | César 166 | Luca 167 | Eliot 168 | Omar 169 | Gianni 170 | Ilyas 171 | Moussa 172 | Idriss 173 | Dylan 174 | Mayron 175 | Joshua 176 | Kayden 177 | Nicolas 178 | Mohammed 179 | Félix 180 | Loan 181 | Corentin 182 | Erwan 183 | James 184 | Jonas 185 | Mylan 186 | Lino 187 | Gustave 188 | Swann 189 | Djibril 190 | Elliot 191 | Kassim 192 | Léandro 193 | Marcus 194 | Octave 195 | Iyad 196 | Ryan 197 | Quentin 198 | Bilal 199 | Ismaïl 200 | Anatole 201 | Hector 202 | Jad 203 | Loris 204 | Nahel 205 | Tyméo 206 | Léopold 207 | Milhan 208 | Samy 209 | Nael 210 | Tristan 211 | Matteo 212 | Kamil 213 | Emile 214 | Imrane 215 | Tao 216 | Marlon 217 | Zayn 218 | Elie 219 | Alex 220 | Elyo 221 | Henri 222 | Sofiane 223 | Sami 224 | Tim 225 | Loïs 226 | Marley 227 | Souleymane 228 | Souleyman 229 | Johan 230 | Titouan 231 | Anis 232 | Ismael 233 | Giulian 234 | Mathieu 235 | Lenzo 236 | Emmanuel 237 | Ange 238 | Gaël 239 | Issam 240 | Mamadou 241 | Ylan 242 | Emilio 243 | Eyden 244 | Cameron 245 | Edouard 246 | Ernest 247 | Lilian 248 | Julien 249 | Gauthier 250 | Romain 251 | Warren 252 | Ezra 253 | Iyed 254 | Nolhan 255 | Leandro 256 | Yann 257 | Célestin 258 | Gaston 259 | Solal 260 | Théodore 261 | Aubin 262 | Émile 263 | Luis 264 | Malik 265 | Andréa 266 | Elijah 267 | Justin 268 | Max 269 | Yacine 270 | Leo 271 | Tylio 272 | Morgan 273 | Aloïs 274 | Lisandro 275 | Éden 276 | Marwan 277 | Lukas 278 | Rayane 279 | Khalil 280 | Sam 281 | Élio 282 | Emir 283 | Tino 284 | Paolo 285 | Téo 286 | Joachim 287 | Aksel 288 | Georges 289 | Jassim 290 | Naïl 291 | Tony 292 | Adrian 293 | Wyatt 294 | Bryan 295 | Milann 296 | Nils 297 | Zayd 298 | Adel 299 | Raphael 300 | Louison 301 | Yusuf 302 | Jordan 303 | Elyas 304 | Lewis 305 | Qassim 306 | Jessim 307 | Lohan 308 | Aïden 309 | Soren 310 | Salim 311 | Youssouf 312 | Angelo 313 | Abdallah 314 | Marc 315 | Alix 316 | Fares 317 | Isaïah 318 | Edgar 319 | Youcef 320 | Dorian 321 | Ilian 322 | Roman 323 | Tilio 324 | Anton 325 | Ilhan 326 | Illan 327 | Tyler 328 | Ilyès 329 | Leny 330 | Mael 331 | Matthew 332 | Célian 333 | Lou 334 | Néo 335 | Fabio 336 | Aydan 337 | Bilel 338 | Lény 339 | Zack 340 | Marvin 341 | Zacharie 342 | Éthan 343 | Thibaut 344 | Aurèle 345 | Enaël 346 | Manoé 347 | Ziyad 348 | Nolann 349 | Harry 350 | Paco 351 | Matthieu 352 | Ousmane 353 | Yazid 354 | Aron 355 | Yaël 356 | Aurélien 357 | Ishaq 358 | Joud 359 | Loïc 360 | Eymen 361 | Tyago 362 | Ambroise 363 | Anthony 364 | Malonn 365 | Walid 366 | Chahine 367 | Ibrahima 368 | Loup 369 | Selim 370 | Ayan 371 | Dario 372 | Élie 373 | Ismail 374 | Farès 375 | Riyad 376 | Abdoulaye 377 | Tobias 378 | Lubin 379 | Nathaël 380 | Andy 381 | Jason 382 | Yahya 383 | Milàn 384 | Wael 385 | Thyméo 386 | Adil 387 | Vadim 388 | Wesley 389 | Idris 390 | Nohan 391 | Rémi 392 | Waël 393 | Muhammad 394 | Zakariya 395 | Mylann 396 | Siméon 397 | Evann 398 | Gaëtan 399 | Karim 400 | Miran 401 | Oumar 402 | Jaden 403 | Mateo 404 | Giovanni 405 | Ishak 406 | Jibril 407 | Loukas 408 | Mouhamed 409 | Melvin 410 | Soën 411 | Younès 412 | Alan 413 | Angel 414 | Dimitri 415 | Elouan 416 | Kevin 417 | Télio 418 | Amjad 419 | Stan 420 | Andrew 421 | Yasser 422 | Juliann 423 | Eli 424 | Hadrien 425 | Joan 426 | Ugo 427 | Yassin 428 | Tayron 429 | Théophile 430 | Alessandro 431 | Kelyan 432 | Clovis 433 | Kyllian 434 | Soulayman 435 | Aboubacar 436 | Albin 437 | Paulin 438 | Soann 439 | Etienne 440 | Pharell 441 | Jacques 442 | Nelson 443 | Thiméo 444 | Marco 445 | Imrân 446 | Kenan 447 | Haron 448 | Jacob 449 | Mohamed-Amine 450 | Zakarya 451 | Enes 452 | Florian 453 | Hassan 454 | Ayman 455 | Hippolyte 456 | Lilio 457 | Ylann 458 | Amadou 459 | Isaiah 460 | Ziad 461 | Adonis 462 | Colin 463 | Jonathan 464 | Lissandro 465 | Milhane 466 | Stanislas 467 | Louca 468 | Mickaël 469 | Mahdi 470 | Matis 471 | Ayaz 472 | Ewenn 473 | Islem 474 | Manoa 475 | Damian 476 | Joris 477 | Lysandre 478 | Miguel 479 | Olivier 480 | Damien 481 | Maëlan 482 | Matthias 483 | Balthazar 484 | Elliott 485 | Grégoire 486 | Guillaume 487 | Lyham 488 | Aymeric 489 | François 490 | Kyle 491 | Maximilien 492 | Pacôme 493 | Lowen 494 | Vincent 495 | Cayden 496 | Louay 497 | Safwan 498 | Ilann 499 | Luc 500 | Tommy 501 | Emma 502 | Jade 503 | Louise 504 | Alice 505 | Lina 506 | Chloé 507 | Rose 508 | Léa 509 | Mila 510 | Ambre 511 | Mia 512 | Anna 513 | Julia 514 | Inès 515 | Léna 516 | Juliette 517 | Zoé 518 | Manon 519 | Agathe 520 | Lou 521 | Lola 522 | Camille 523 | Nina 524 | Jeanne 525 | Inaya 526 | Romy 527 | Eva 528 | Romane 529 | Léonie 530 | Iris 531 | Lucie 532 | Luna 533 | Adèle 534 | Sarah 535 | Louna 536 | Charlotte 537 | Margaux 538 | Olivia 539 | Sofia 540 | Charlie 541 | Victoria 542 | Victoire 543 | Nour 544 | Margot 545 | Mya 546 | Giulia 547 | Clémence 548 | Alix 549 | Aya 550 | Clara 551 | Elena 552 | Capucine 553 | Lana 554 | Lya 555 | Lyna 556 | Lyana 557 | Théa 558 | Léana 559 | Anaïs 560 | Gabrielle 561 | Emy 562 | Yasmine 563 | Mathilde 564 | Maëlys 565 | Alicia 566 | Lilou 567 | Apolline 568 | Roxane 569 | Lise 570 | Assia 571 | Elise 572 | Lily 573 | Maria 574 | Maya 575 | Valentine 576 | Héloïse 577 | Marie 578 | Elsa 579 | Noémie 580 | Lisa 581 | Lila 582 | Alya 583 | Thaïs 584 | Ilyana 585 | Célia 586 | Candice 587 | Livia 588 | Zélie 589 | Salomé 590 | Constance 591 | Soline 592 | Emmy 593 | Maëlle 594 | Eléna 595 | Maryam 596 | Amélia 597 | Joy 598 | Océane 599 | Maïssa 600 | Arya 601 | Andréa 602 | Hanna 603 | Mélina 604 | Manel 605 | Joséphine 606 | Ella 607 | Céleste 608 | Myla 609 | Garance 610 | Faustine 611 | Sara 612 | Alma 613 | Aliyah 614 | Calie 615 | Ava 616 | Pauline 617 | Louisa 618 | Emna 619 | Diane 620 | Ines 621 | Elisa 622 | Nora 623 | Suzanne 624 | Amira 625 | Sophia 626 | Jana 627 | Aïcha 628 | Jenna 629 | Lison 630 | Louane 631 | Anaé 632 | Amina 633 | Elya 634 | Kenza 635 | Aria 636 | Ninon 637 | Eloïse 638 | Jennah 639 | Fatima 640 | Laura 641 | Nelya 642 | Albane 643 | Ana 644 | Charline 645 | Eden 646 | Elina 647 | Dina 648 | Justine 649 | Anaëlle 650 | Lara 651 | Élise 652 | Cléa 653 | Selma 654 | Liya 655 | Aliya 656 | Stella 657 | Lilia 658 | Coline 659 | Célestine 660 | Eléonore 661 | Jasmine 662 | Naya 663 | Camélia 664 | Neyla 665 | Leïla 666 | Khadija 667 | Mariam 668 | Léane 669 | Lilya 670 | Julie 671 | Éléonore 672 | Maëlya 673 | Cataleya 674 | Layana 675 | Maddy 676 | Fatoumata 677 | Cassandre 678 | Myriam 679 | Hana 680 | Sophie 681 | Ellie 682 | Augustine 683 | Éléna 684 | Méline 685 | Eline 686 | Alba 687 | Naomi 688 | Norah 689 | Naëlle 690 | Sirine 691 | Mélissa 692 | Judith 693 | Salma 694 | Leyna 695 | Louison 696 | Asma 697 | Kamila 698 | Marwa 699 | Carla 700 | Lia 701 | Aminata 702 | Anouk 703 | Alia 704 | Mayssa 705 | Assya 706 | Anastasia 707 | Lucy 708 | Maëva 709 | Raphaëlle 710 | Elia 711 | Safiya 712 | Elyna 713 | Emilie 714 | Tessa 715 | Chiara 716 | Noor 717 | Madeleine 718 | Axelle 719 | Liyah 720 | Sasha 721 | Tasnim 722 | Miya 723 | Alyssa 724 | Aurore 725 | Nélia 726 | Daphné 727 | Eléa 728 | Naïla 729 | Hortense 730 | Valentina 731 | Elyne 732 | Nola 733 | Lily-Rose 734 | Nayla 735 | Clémentine 736 | Lena 737 | Ayline 738 | Hafsa 739 | Lyanna 740 | Rachel 741 | Tess 742 | Kiara 743 | Nélya 744 | Line 745 | Soumaya 746 | Kayla 747 | Joyce 748 | Janna 749 | Jannah 750 | Maïwenn 751 | Mina 752 | Lylou 753 | Naomie 754 | Hannah 755 | Hanaé 756 | Clarisse 757 | Hawa 758 | Liana 759 | Noélie 760 | Chelsea 761 | Olympe 762 | Violette 763 | Sana 764 | Liv 765 | Maïna 766 | Talia 767 | Éline 768 | Lahna 769 | Mélia 770 | Cassie 771 | Aaliyah 772 | Aliénor 773 | Cloé 774 | Lucile 775 | Maïa 776 | Angèle 777 | Naïa 778 | Malak 779 | Enora 780 | Imane 781 | Maxine 782 | Yuna 783 | Esther 784 | Cléo 785 | Éva 786 | Amalia 787 | Ariane 788 | Bérénice 789 | Emilia 790 | Lyah 791 | Malia 792 | Izia 793 | Milla 794 | June 795 | Kélya 796 | Meryem 797 | Serena 798 | Blanche 799 | Paloma 800 | Lexie 801 | Lilas 802 | Brune 803 | Thelma 804 | Marion 805 | Paola 806 | Ashley 807 | Camila 808 | Wendy 809 | Elyana 810 | Mona 811 | Pia 812 | Alessia 813 | Diana 814 | Éloïse 815 | Abby 816 | Alizée 817 | Isaure 818 | Morgane 819 | Rosalie 820 | Amy 821 | Elif 822 | Shanna 823 | Flora 824 | Assiya 825 | Élina 826 | Safia 827 | Maëline 828 | Malya 829 | Marilou 830 | Maëlyne 831 | Thalia 832 | Héléna 833 | Ilona 834 | Solène 835 | Alexia 836 | Lyne 837 | Gabriella 838 | Anissa 839 | Claire 840 | Hidaya 841 | Talya 842 | Athénaïs 843 | Séléna 844 | Tasnime 845 | Amélie 846 | Mïa 847 | Aline 848 | Isra 849 | Amel 850 | Esma 851 | Safa 852 | Noa 853 | Awa 854 | Evy 855 | Ayla 856 | Dania 857 | Lucia 858 | Leïa 859 | Léonore 860 | Neïla 861 | Billie 862 | Eliana 863 | Lylia 864 | Anaya 865 | Éléa 866 | Luce 867 | Castille 868 | Laya 869 | Lili 870 | Philippine 871 | Tesnim 872 | Assil 873 | Farah 874 | Mira 875 | Amandine 876 | Estelle 877 | Johanna 878 | Mélya 879 | Shana 880 | Alyah 881 | Kelly 882 | Athéna 883 | Selena 884 | Bianca 885 | Lyla 886 | Séréna 887 | Alexandra 888 | Esmée 889 | Abigaëlle 890 | Juliana 891 | Sélène 892 | Yousra 893 | Gabriela 894 | Ema 895 | Amaya 896 | April 897 | Cassandra 898 | Elisabeth 899 | Halima 900 | Leya 901 | Mariama 902 | Rania 903 | Astrid 904 | Émilie 905 | Maddie 906 | Roxanne 907 | Leyla 908 | Hajar 909 | Hélèna 910 | Israa 911 | Sibylle 912 | Tiana 913 | Ania 914 | Gaïa 915 | Helena 916 | Romie 917 | Alycia 918 | Anaë 919 | Céline 920 | Maëlie 921 | Mélyna 922 | Maé 923 | Maëly 924 | Irina 925 | Nawel 926 | Leïna 927 | Kataleya 928 | Melissa 929 | Téa 930 | Alana 931 | Eléanore 932 | Meriem 933 | Amelia 934 | Fanny 935 | Kyara 936 | Annaëlle 937 | Azra 938 | Maëlia 939 | Melina 940 | Ela 941 | Eugénie 942 | Isis 943 | Lydia 944 | Maïmouna 945 | Zahra 946 | Kessy 947 | Mélyne 948 | Prune 949 | Hind 950 | Rita 951 | Tina 952 | Yaëlle 953 | Zeynep 954 | Aylin 955 | Rebecca 956 | Sidra 957 | Fanta 958 | Angelina 959 | Carmen 960 | Élisa 961 | Mellina 962 | Ariana 963 | Octavia 964 | Alina 965 | Enola 966 | Marylou 967 | Soraya 968 | Syrine 969 | Aicha 970 | Émy 971 | Anya 972 | Ayana 973 | Giuliana 974 | Gloria 975 | Ivy 976 | Kelya 977 | Mayline 978 | Adélaïde 979 | Aïna 980 | Imany 981 | Lilly 982 | Colette 983 | Juline 984 | Callie 985 | Andrea 986 | Ayna 987 | Catalina 988 | Élia 989 | Yara 990 | Sienna 991 | Oumou 992 | Aïsha 993 | Layna 994 | Rosie 995 | Sanaa 996 | Zaynab 997 | Fatou 998 | Leila 999 | Milana 1000 | Sixtine -------------------------------------------------------------------------------- /named-name-recognition/named-name-recognition.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import spacy\n", 10 | "import pathlib\n", 11 | "import pandas as pd\n", 12 | "from memo import grid, memlist" 13 | ] 14 | }, 15 | { 16 | "cell_type": "code", 17 | "execution_count": 2, 18 | "metadata": {}, 19 | "outputs": [ 20 | { 21 | "data": { 22 | "application/vnd.jupyter.widget-view+json": { 23 | "model_id": "d525cac2f1984c6095bac273b39b2dc1", 24 | "version_major": 2, 25 | "version_minor": 0 26 | }, 27 | "text/plain": [ 28 | "Output()" 29 | ] 30 | }, 31 | "metadata": {}, 32 | "output_type": "display_data" 33 | } 34 | ], 35 | "source": [ 36 | "data = [] \n", 37 | "\n", 38 | "@memlist(data=data)\n", 39 | "def run_experiment(mod, datafile, cap=False):\n", 40 | " # load in spacy model \n", 41 | " nlp = spacy.load(mod)\n", 42 | " \n", 43 | " # load in names\n", 44 | " names = pathlib.Path(datafile).read_text().split(\"\\n\")\n", 45 | " \n", 46 | " # try to detect the names using simple \"je m'appelle\"\n", 47 | " ent_types = [d[-1].ent_type_ for d in nlp.pipe(f\"Bonjour je m'appelle {n.capitalize() if cap else n}\" for n in names)]\n", 48 | " results_df = pd.DataFrame({\"name\": names, \"ent_type\": ent_types}).groupby(\"ent_type\").count()\n", 49 | " return dict(results_df['name'])\n", 50 | "\n", 51 | "for setting in grid(datafile=['arabic-names.txt', 'french-names.txt'],\n", 52 | " mod=['fr_core_news_sm', 'fr_core_news_md'],\n", 53 | " cap=[True, False]):\n", 54 | " run_experiment(**setting)" 55 | ] 56 | }, 57 | { 58 | "cell_type": "code", 59 | "execution_count": 26, 60 | "metadata": {}, 61 | "outputs": [], 62 | "source": [ 63 | "def clean_data(dataf):\n", 64 | " return (dataf\n", 65 | " .assign(NO_ENT=lambda d: d[''])\n", 66 | " .drop(columns=[''])\n", 67 | " .fillna(0)\n", 68 | " .assign(total=lambda d: d[['LOC', 'MISC', 'ORG', 'PER', 'NO_ENT']].sum(axis=1))\n", 69 | " .assign(p_person=lambda d: d['PER']/d['total'])\n", 70 | " .assign(p_none=lambda d: d['NO_ENT']/d['total']))\n", 71 | "\n", 72 | "# pd.DataFrame(data).assign(NO_ENT=lambda d: d['']).drop(columns=['']).fillna(0)" 73 | ] 74 | }, 75 | { 76 | "cell_type": "code", 77 | "execution_count": 27, 78 | "metadata": {}, 79 | "outputs": [ 80 | { 81 | "data": { 82 | "text/html": [ 83 | "
\n", 84 | "\n", 97 | "\n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | " \n", 113 | " \n", 114 | " \n", 115 | " \n", 116 | " \n", 117 | " \n", 118 | " \n", 119 | " \n", 120 | " \n", 121 | " \n", 122 | " \n", 123 | " \n", 124 | " \n", 125 | " \n", 126 | " \n", 127 | " \n", 128 | " \n", 129 | " \n", 130 | " \n", 131 | " \n", 132 | " \n", 133 | " \n", 134 | " \n", 135 | " \n", 136 | " \n", 137 | " \n", 138 | " \n", 139 | " \n", 140 | " \n", 141 | " \n", 142 | " \n", 143 | " \n", 144 | " \n", 145 | " \n", 146 | " \n", 147 | " \n", 148 | " \n", 149 | " \n", 150 | " \n", 151 | " \n", 152 | " \n", 153 | " \n", 154 | " \n", 155 | " \n", 156 | " \n", 157 | " \n", 158 | " \n", 159 | " \n", 160 | " \n", 161 | " \n", 162 | " \n", 163 | " \n", 164 | " \n", 165 | " \n", 166 | " \n", 167 | " \n", 168 | " \n", 169 | " \n", 170 | " \n", 171 | " \n", 172 | " \n", 173 | " \n", 174 | " \n", 175 | " \n", 176 | " \n", 177 | " \n", 178 | " \n", 179 | " \n", 180 | " \n", 181 | " \n", 182 | " \n", 183 | " \n", 184 | " \n", 185 | " \n", 186 | " \n", 187 | " \n", 188 | " \n", 189 | " \n", 190 | " \n", 191 | " \n", 192 | " \n", 193 | " \n", 194 | " \n", 195 | " \n", 196 | " \n", 197 | " \n", 198 | " \n", 199 | " \n", 200 | " \n", 201 | " \n", 202 | " \n", 203 | " \n", 204 | " \n", 205 | " \n", 206 | " \n", 207 | " \n", 208 | " \n", 209 | " \n", 210 | " \n", 211 | " \n", 212 | " \n", 213 | " \n", 214 | " \n", 215 | " \n", 216 | " \n", 217 | " \n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | "
datafilemodcapLOCMISCORGPERNO_ENTtotalp_personp_none
0arabic-names.txtfr_core_news_smTrue327.037.033.0355.015767.00.4628420.019557
1arabic-names.txtfr_core_news_smFalse0.00.00.00.0767767.00.0000001.000000
2arabic-names.txtfr_core_news_mdTrue178.0130.019.0417.023767.00.5436770.029987
3arabic-names.txtfr_core_news_mdFalse0.00.00.00.0767767.00.0000001.000000
4french-names.txtfr_core_news_smTrue423.051.016.0507.031000.00.5070000.003000
5french-names.txtfr_core_news_smFalse424.051.016.0508.011000.00.5080000.001000
6french-names.txtfr_core_news_mdTrue149.0293.010.0535.0131000.00.5350000.013000
7french-names.txtfr_core_news_mdFalse149.0294.010.0536.0111000.00.5360000.011000
\n", 229 | "
" 230 | ], 231 | "text/plain": [ 232 | " datafile mod cap LOC MISC ORG PER \\\n", 233 | "0 arabic-names.txt fr_core_news_sm True 327.0 37.0 33.0 355.0 \n", 234 | "1 arabic-names.txt fr_core_news_sm False 0.0 0.0 0.0 0.0 \n", 235 | "2 arabic-names.txt fr_core_news_md True 178.0 130.0 19.0 417.0 \n", 236 | "3 arabic-names.txt fr_core_news_md False 0.0 0.0 0.0 0.0 \n", 237 | "4 french-names.txt fr_core_news_sm True 423.0 51.0 16.0 507.0 \n", 238 | "5 french-names.txt fr_core_news_sm False 424.0 51.0 16.0 508.0 \n", 239 | "6 french-names.txt fr_core_news_md True 149.0 293.0 10.0 535.0 \n", 240 | "7 french-names.txt fr_core_news_md False 149.0 294.0 10.0 536.0 \n", 241 | "\n", 242 | " NO_ENT total p_person p_none \n", 243 | "0 15 767.0 0.462842 0.019557 \n", 244 | "1 767 767.0 0.000000 1.000000 \n", 245 | "2 23 767.0 0.543677 0.029987 \n", 246 | "3 767 767.0 0.000000 1.000000 \n", 247 | "4 3 1000.0 0.507000 0.003000 \n", 248 | "5 1 1000.0 0.508000 0.001000 \n", 249 | "6 13 1000.0 0.535000 0.013000 \n", 250 | "7 11 1000.0 0.536000 0.011000 " 251 | ] 252 | }, 253 | "execution_count": 27, 254 | "metadata": {}, 255 | "output_type": "execute_result" 256 | } 257 | ], 258 | "source": [ 259 | "pd.DataFrame(data).pipe(clean_data)" 260 | ] 261 | } 262 | ], 263 | "metadata": { 264 | "kernelspec": { 265 | "display_name": "Python 3", 266 | "language": "python", 267 | "name": "python3" 268 | }, 269 | "language_info": { 270 | "codemirror_mode": { 271 | "name": "ipython", 272 | "version": 3 273 | }, 274 | "file_extension": ".py", 275 | "mimetype": "text/x-python", 276 | "name": "python", 277 | "nbconvert_exporter": "python", 278 | "pygments_lexer": "ipython3", 279 | "version": "3.7.7" 280 | } 281 | }, 282 | "nbformat": 4, 283 | "nbformat_minor": 4 284 | } 285 | -------------------------------------------------------------------------------- /toxic/toxicity.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 12, 6 | "metadata": {}, 7 | "outputs": [], 8 | "source": [ 9 | "import time \n", 10 | "import pandas as pd\n", 11 | "from whatlies.language import BytePairLanguage, UniversalSentenceLanguage, SentenceTFMLanguage, CountVectorLanguage\n", 12 | "\n", 13 | "lang_use = UniversalSentenceLanguage(\"large\")\n", 14 | "lang_bp = BytePairLanguage(\"en\", dim=300, vs=200_000)\n", 15 | "lang_brt = SentenceTFMLanguage('distilbert-base-nli-stsb-mean-tokens')" 16 | ] 17 | }, 18 | { 19 | "cell_type": "markdown", 20 | "metadata": {}, 21 | "source": [ 22 | "Let's load the dataset and make some utility functions to get it in the right format." 23 | ] 24 | }, 25 | { 26 | "cell_type": "code", 27 | "execution_count": 13, 28 | "metadata": {}, 29 | "outputs": [], 30 | "source": [ 31 | "def clean_text(txt_col):\n", 32 | " return txt_col.str.replace(r'\\n', \" \")\n", 33 | "\n", 34 | "def to_train_df(dataf):\n", 35 | " dataf = dataf.copy() \n", 36 | " dataf['bad'] = dataf[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1)\n", 37 | " dataf['label'] = ['toxic' if b else 'fine' for b in dataf['bad'] != 0]\n", 38 | " dataf['text'] = clean_text(dataf['comment_text'])\n", 39 | " return dataf[['text', 'label']]\n", 40 | "\n", 41 | "df = pd.read_csv(\"toxicity-train.csv.zip\").replace({\"\\n\", \"\"})" 42 | ] 43 | }, 44 | { 45 | "cell_type": "markdown", 46 | "metadata": {}, 47 | "source": [ 48 | "Next, let's make a proper train/test split." 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": 17, 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [ 57 | "from sklearn.metrics import classification_report\n", 58 | "from sklearn.pipeline import Pipeline\n", 59 | "from sklearn.linear_model import LogisticRegression\n", 60 | "from sklearn.svm import SVC\n", 61 | "from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer\n", 62 | "from sklearn.model_selection import train_test_split\n", 63 | "\n", 64 | "train_df = df.pipe(to_train_df)[:10000]\n", 65 | "\n", 66 | "x_train, x_test, y_train, y_test = train_test_split(list(train_df['text']), train_df['label'])" 67 | ] 68 | }, 69 | { 70 | "cell_type": "code", 71 | "execution_count": 28, 72 | "metadata": {}, 73 | "outputs": [ 74 | { 75 | "data": { 76 | "text/html": [ 77 | "
\n", 78 | "\n", 91 | "\n", 92 | " \n", 93 | " \n", 94 | " \n", 95 | " \n", 96 | " \n", 97 | " \n", 98 | " \n", 99 | " \n", 100 | " \n", 101 | " \n", 102 | " \n", 103 | " \n", 104 | " \n", 105 | " \n", 106 | " \n", 107 | " \n", 108 | " \n", 109 | " \n", 110 | " \n", 111 | " \n", 112 | "
text
label
fine8970
toxic1030
\n", 113 | "
" 114 | ], 115 | "text/plain": [ 116 | " text\n", 117 | "label \n", 118 | "fine 8970\n", 119 | "toxic 1030" 120 | ] 121 | }, 122 | "execution_count": 28, 123 | "metadata": {}, 124 | "output_type": "execute_result" 125 | } 126 | ], 127 | "source": [ 128 | "train_df.groupby(\"label\").count()" 129 | ] 130 | }, 131 | { 132 | "cell_type": "code", 133 | "execution_count": 29, 134 | "metadata": {}, 135 | "outputs": [ 136 | { 137 | "name": "stdout", 138 | "output_type": "stream", 139 | "text": [ 140 | "report for ('CountVectorizer()', \"SVC(class_weight='balanced')\")\n", 141 | "train time: 9.83952283859253\n", 142 | "pred time: 3.0086185932159424\n", 143 | " precision recall f1-score support\n", 144 | "\n", 145 | " fine 0.97 0.79 0.87 2251\n", 146 | " toxic 0.30 0.82 0.44 249\n", 147 | "\n", 148 | " accuracy 0.79 2500\n", 149 | " macro avg 0.64 0.80 0.65 2500\n", 150 | "weighted avg 0.91 0.79 0.83 2500\n", 151 | "\n", 152 | "report for ('CountVectorizer()', \"LogisticRegression(class_weight='balanced', solver='liblinear')\")\n", 153 | "train time: 0.7045333385467529\n", 154 | "pred time: 0.1499195098876953\n", 155 | " precision recall f1-score support\n", 156 | "\n", 157 | " fine 0.97 0.97 0.97 2251\n", 158 | " toxic 0.70 0.70 0.70 249\n", 159 | "\n", 160 | " accuracy 0.94 2500\n", 161 | " macro avg 0.84 0.83 0.83 2500\n", 162 | "weighted avg 0.94 0.94 0.94 2500\n", 163 | "\n" 164 | ] 165 | } 166 | ], 167 | "source": [ 168 | "for mod in [SVC(class_weight=\"balanced\"), LogisticRegression(solver='liblinear', class_weight=\"balanced\")]:\n", 169 | " pipe = Pipeline([\n", 170 | " (\"feat\", CountVectorizer()),\n", 171 | " (\"mod\", mod)\n", 172 | " ])\n", 173 | " tic = time.time()\n", 174 | " pipe.fit(list(x_train), y_train)\n", 175 | " toc = time.time() \n", 176 | " print(f\"report for {str(lang), str(mod)}\")\n", 177 | " print(f\"train time: {toc - tic}\")\n", 178 | " tic = time.time()\n", 179 | " y_pred = pipe.predict(x_test)\n", 180 | " toc = time.time()\n", 181 | " print(f\"pred time: {toc - tic}\")\n", 182 | " print(classification_report(y_test, y_pred))" 183 | ] 184 | }, 185 | { 186 | "cell_type": "code", 187 | "execution_count": 31, 188 | "metadata": {}, 189 | "outputs": [], 190 | "source": [ 191 | "import pandas as pd \n", 192 | "df = pd.read_csv(\"results.csv\")" 193 | ] 194 | }, 195 | { 196 | "cell_type": "code", 197 | "execution_count": 34, 198 | "metadata": {}, 199 | "outputs": [ 200 | { 201 | "data": { 202 | "text/html": [ 203 | "
\n", 204 | "\n", 217 | "\n", 218 | " \n", 219 | " \n", 220 | " \n", 221 | " \n", 222 | " \n", 223 | " \n", 224 | " \n", 225 | " \n", 226 | " \n", 227 | " \n", 228 | " \n", 229 | " \n", 230 | " \n", 231 | " \n", 232 | " \n", 233 | " \n", 234 | " \n", 235 | " \n", 236 | " \n", 237 | " \n", 238 | " \n", 239 | " \n", 240 | " \n", 241 | " \n", 242 | " \n", 243 | " \n", 244 | " \n", 245 | " \n", 246 | " \n", 247 | " \n", 248 | " \n", 249 | "
langmodprecisionrecallpred-timetrain-time
1CountVectorizer()LogisticRegression()0.8072290.5381530.0856940.882420
0CountVectorizer()SVC()1.0000000.0321291.6345645.422204
\n", 250 | "
" 251 | ], 252 | "text/plain": [ 253 | " lang mod precision recall pred-time \\\n", 254 | "1 CountVectorizer() LogisticRegression() 0.807229 0.538153 0.085694 \n", 255 | "0 CountVectorizer() SVC() 1.000000 0.032129 1.634564 \n", 256 | "\n", 257 | " train-time \n", 258 | "1 0.882420 \n", 259 | "0 5.422204 " 260 | ] 261 | }, 262 | "execution_count": 34, 263 | "metadata": {}, 264 | "output_type": "execute_result" 265 | } 266 | ], 267 | "source": [ 268 | "df.sort_values('precision')" 269 | ] 270 | }, 271 | { 272 | "cell_type": "code", 273 | "execution_count": 42, 274 | "metadata": {}, 275 | "outputs": [], 276 | "source": [ 277 | "pipe = Pipeline([\n", 278 | " (\"feat\", lang_use),\n", 279 | " (\"mod\", LogisticRegression(solver='liblinear', class_weight=\"balanced\"))\n", 280 | "])\n", 281 | "\n", 282 | "pipe.fit(list(x_train), y_train)" 283 | ] 284 | }, 285 | { 286 | "cell_type": "code", 287 | "execution_count": null, 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [ 291 | "pipe.predict([\"you're a bad ass!\"])" 292 | ] 293 | }, 294 | { 295 | "cell_type": "code", 296 | "execution_count": null, 297 | "metadata": {}, 298 | "outputs": [], 299 | "source": [] 300 | } 301 | ], 302 | "metadata": { 303 | "kernelspec": { 304 | "display_name": "Python 3", 305 | "language": "python", 306 | "name": "python3" 307 | }, 308 | "language_info": { 309 | "codemirror_mode": { 310 | "name": "ipython", 311 | "version": 3 312 | }, 313 | "file_extension": ".py", 314 | "mimetype": "text/x-python", 315 | "name": "python", 316 | "nbconvert_exporter": "python", 317 | "pygments_lexer": "ipython3", 318 | "version": "3.8.5" 319 | } 320 | }, 321 | "nbformat": 4, 322 | "nbformat_minor": 4 323 | } 324 | -------------------------------------------------------------------------------- /toxic/toxicity.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pandas as pd 3 | from sklearn.model_selection import train_test_split 4 | 5 | from whatlies.language import BytePairLanguage, UniversalSentenceLanguage, SentenceTFMLanguage, CountVectorLanguage 6 | from sklearn.metrics import classification_report 7 | from sklearn.pipeline import Pipeline 8 | from sklearn.linear_model import LogisticRegression 9 | from sklearn.svm import SVC 10 | from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer 11 | 12 | 13 | df = pd.read_csv("toxicity-train.csv.zip").replace({"\n", ""}) 14 | 15 | def clean_text(txt_col): 16 | return txt_col.str.replace(r'\n', " ") 17 | 18 | def to_train_df(dataf): 19 | dataf = dataf.copy() 20 | dataf['bad'] = dataf[['toxic', 'severe_toxic', 'obscene', 'threat', 'insult', 'identity_hate']].sum(axis=1) 21 | dataf['label'] = ['toxic' if b else 'fine' for b in dataf['bad'] != 0] 22 | dataf['text'] = clean_text(dataf['comment_text']) 23 | return dataf[['text', 'label']] 24 | 25 | 26 | 27 | train_df = df.pipe(to_train_df)[:20000] 28 | x_train, x_test, y_train, y_test = train_test_split(list(train_df['text']), train_df['label']) 29 | 30 | 31 | lang_use = UniversalSentenceLanguage("large") 32 | lang_bp = BytePairLanguage("en", dim=300, vs=200_000) 33 | lang_brt = SentenceTFMLanguage('distilbert-base-nli-stsb-mean-tokens') 34 | 35 | 36 | models = {} 37 | results = [] 38 | 39 | def get_name(o): 40 | return o.__class__.__name__ 41 | 42 | for lang in [CountVectorizer(), lang_bp, lang_use, lang_brt]: 43 | for mod in [SVC(class_weight='balanced'), LogisticRegression(solver='liblinear', class_weight='balanced')]: 44 | pipe = Pipeline([ 45 | ("feat", lang), 46 | ("mod", mod) 47 | ]) 48 | models[get_name(lang), get_name(mod)] = pipe 49 | tic = time.time() 50 | pipe.fit(list(x_train), y_train) 51 | toc = time.time() 52 | print(f"report for {get_name(lang), get_name(mod)}") 53 | train_time = toc - tic 54 | print(f"train time: {train_time}") 55 | tic = time.time() 56 | y_pred = pipe.predict(x_test) 57 | toc = time.time() 58 | print(f"pred time: {toc - tic}") 59 | d = classification_report(y_test, y_pred, output_dict=True) 60 | data = { 61 | 'lang': get_name(lang), 62 | 'mod': get_name(mod), 63 | 'precision': d['toxic']['precision'], 64 | 'recall': d['toxic']['recall'], 65 | 'pred-time': toc - tic, 66 | 'train-time': train_time 67 | 68 | } 69 | results.append(data) 70 | print(classification_report(y_test, y_pred)) 71 | 72 | pd.DataFrame(results).to_csv("results.csv", index=False) 73 | --------------------------------------------------------------------------------