', 6 | '
', 7 | ' ', 11 | '', 12 | '
'+y+"
"}function l(){var e,n=Docsify.dom.find("div.search"),t=Docsify.dom.find(n,"input");Docsify.dom.on(n,"click",function(e){return"A"!==e.target.tagName&&e.stopPropagation()}),Docsify.dom.on(t,"input",function(n){clearTimeout(e),e=setTimeout(function(e){return c(n.target.value.trim())},100)})}function f(e,n){var t=Docsify.dom.getNode('.search input[type="search"]');if(t)if("string"==typeof e)t.placeholder=e;else{var o=Object.keys(e).filter(function(e){return n.indexOf(e)>-1})[0];t.placeholder=e[o]}}function d(e,n){if("string"==typeof e)y=e;else{var t=Object.keys(e).filter(function(e){return n.indexOf(e)>-1})[0];y=e[t]}}function p(e,n){var t=n.router.parse().query.s;a(),s(e,t),l(),t&&setTimeout(function(e){return c(t)},500)}function u(e,n){f(e.placeholder,n.route.path),d(e.noData,n.route.path)}var h,g={},y="",m={placeholder:"Type to search",noData:"No Results!",paths:"auto",depth:2,maxAge:864e5},v=function(e,n){var t=Docsify.util,o=n.config.search||m;Array.isArray(o)?m.paths=o:"object"==typeof o&&(m.paths=Array.isArray(o.paths)?o.paths:"auto",m.maxAge=t.isPrimitive(o.maxAge)?o.maxAge:m.maxAge,m.placeholder=o.placeholder||m.placeholder,m.noData=o.noData||m.noData,m.depth=o.depth||m.depth);var i="auto"===m.paths;e.mounted(function(e){p(m,n),!i&&r(m,n)}),e.doneEach(function(e){u(m,n),i&&r(m,n)})};$docsify.plugins=[].concat(v,$docsify.plugins)}(); 2 | -------------------------------------------------------------------------------- /asset/style.css: -------------------------------------------------------------------------------- 1 | /*隐藏头部的目录*/ 2 | #main>ul:nth-child(1) { 3 | display: none; 4 | } 5 | 6 | #main>ul:nth-child(2) { 7 | display: none; 8 | } 9 | 10 | .markdown-section h1 { 11 | margin: 3rem 0 2rem 0; 12 | } 13 | 14 | .markdown-section h2 { 15 | margin: 2rem 0 1rem; 16 | } 17 | 18 | img, 19 | pre { 20 | border-radius: 8px; 21 | } 22 | 23 | .content, 24 | .sidebar, 25 | .markdown-section, 26 | body, 27 | .search input { 28 | background-color: rgba(243, 242, 238, 1) !important; 29 | } 30 | 31 | @media (min-width:600px) { 32 | .sidebar-toggle { 33 | background-color: #f3f2ee; 34 | } 35 | } 36 | 37 | .docsify-copy-code-button { 38 | background: #f8f8f8 !important; 39 | color: #7a7a7a !important; 40 | } 41 | 42 | body { 43 | /*font-family: Microsoft YaHei, Source Sans Pro, Helvetica Neue, Arial, sans-serif !important;*/ 44 | } 45 | 46 | .markdown-section>p { 47 | font-size: 16px !important; 48 | } 49 | 50 | .markdown-section pre>code { 51 | font-family: Consolas, Roboto Mono, Monaco, courier, monospace !important; 52 | font-size: .9rem !important; 53 | 54 | } 55 | 56 | /*.anchor span { 57 | color: rgb(66, 185, 131); 58 | }*/ 59 | 60 | section.cover h1 { 61 | margin: 0; 62 | } 63 | 64 | body>section>div.cover-main>ul>li>a { 65 | color: #42b983; 66 | } 67 | 68 | .markdown-section img { 69 | box-shadow: 7px 9px 10px #aaa !important; 70 | } 71 | 72 | 73 | pre { 74 | background-color: #f3f2ee !important; 75 | } 76 | 77 | @media (min-width:600px) { 78 | pre code { 79 | /*box-shadow: 2px 1px 20px 2px #aaa;*/ 80 | /*border-radius: 10px !important;*/ 81 | padding-left: 20px !important; 82 | } 83 | } 84 | 85 | @media (max-width:600px) { 86 | pre { 87 | padding-left: 0px !important; 88 | padding-right: 0px !important; 89 | } 90 | } 91 | 92 | .markdown-section pre { 93 | padding-left: 0 !important; 94 | padding-right: 0px !important; 95 | box-shadow: 2px 1px 20px 2px #aaa; 96 | } -------------------------------------------------------------------------------- /doc/en/api.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: api 3 | title:API 4 | --- 5 | 6 | We automatically generate our [API documentation](/docs/en/html/index.html) with doxygen. 7 | -------------------------------------------------------------------------------- /doc/en/cheatsheet.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: cheatsheet 3 | title: Cheatsheet 4 | --- 5 | 6 | ## Word representation learning 7 | 8 | In order to learn word vectors do: 9 | 10 | ```bash 11 | $ ./fasttext skipgram -input data.txt -output model 12 | ``` 13 | 14 | ## Obtaining word vectors 15 | 16 | Print word vectors for a text file `queries.txt` containing words. 17 | 18 | ```bash 19 | $ ./fasttext print-word-vectors model.bin < queries.txt 20 | ``` 21 | 22 | ## Text classification 23 | 24 | In order to train a text classifier do: 25 | 26 | ```bash 27 | $ ./fasttext supervised -input train.txt -output model 28 | ``` 29 | 30 | Once the model was trained, you can evaluate it by computing the precision and recall at k (P@k and R@k) on a test set using: 31 | 32 | ```bash 33 | $ ./fasttext test model.bin test.txt 1 34 | ``` 35 | 36 | In order to obtain the k most likely labels for a piece of text, use: 37 | 38 | ```bash 39 | $ ./fasttext predict model.bin test.txt k 40 | ``` 41 | 42 | In order to obtain the k most likely labels and their associated probabilities for a piece of text, use: 43 | 44 | ```bash 45 | $ ./fasttext predict-prob model.bin test.txt k 46 | ``` 47 | 48 | If you want to compute vector representations of sentences or paragraphs, please use: 49 | 50 | ```bash 51 | $ ./fasttext print-sentence-vectors model.bin < text.txt 52 | ``` 53 | 54 | ## Quantization 55 | 56 | In order to create a `.ftz` file with a smaller memory footprint do: 57 | 58 | ```bash 59 | $ ./fasttext quantize -output model 60 | ``` 61 | 62 | All other commands such as test also work with this model 63 | 64 | ```bash 65 | $ ./fasttext test model.ftz test.txt 66 | ``` 67 | -------------------------------------------------------------------------------- /doc/en/crawl-vectors.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: crawl-vectors 3 | title: Word vectors for 157 languages 4 | --- 5 | 6 | We distribute pre-trained word vectors for 157 languages, trained on [*Common Crawl*](http://commoncrawl.org/) and [*Wikipedia*](https://www.wikipedia.org) using fastText. 7 | These models were trained using CBOW with position-weights, in dimension 300, with character n-grams of length 5, a window of size 5 and 10 negatives. 8 | We also distribute three new word analogy datasets, for French, Hindi and Polish. 9 | 10 | ### Format 11 | 12 | The word vectors are available in both binary and text formats. 13 | 14 | Using the binary models, vectors for out-of-vocabulary words can be obtained with 15 | ``` 16 | $ ./fasttext print-word-vectors wiki.it.300.bin < oov_words.txt 17 | ``` 18 | where the file oov_words.txt contains out-of-vocabulary words. 19 | 20 | In the text format, each line contain a word followed by its vector. 21 | Each value is space separated, and words are sorted by frequency in descending order. 22 | These text models can easily be loaded in Python using the following code: 23 | ```python 24 | import io 25 | 26 | def load_vectors(fname): 27 | fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore') 28 | n, d = map(int, fin.readline().split()) 29 | data = {} 30 | for line in fin: 31 | tokens = line.rstrip().split(' ') 32 | data[tokens[0]] = map(float, tokens[1:]) 33 | return data 34 | ``` 35 | 36 | ### Tokenization 37 | 38 | We used the [*Stanford word segmenter*](https://nlp.stanford.edu/software/segmenter.html) for Chinese, [*Mecab*](http://taku910.github.io/mecab/) for Japanese and [*UETsegmenter*](https://github.com/phongnt570/UETsegmenter) for Vietnamese. 39 | For languages using the Latin, Cyrillic, Hebrew or Greek scripts, we used the tokenizer from the [*Europarl*](http://www.statmt.org/europarl/) preprocessing tools. 40 | For the remaining languages, we used the ICU tokenizer. 41 | 42 | More information about the training of these models can be found in the article [*Learning Word Vectors for 157 Languages*](https://arxiv.org/abs/1802.06893). 43 | 44 | ### License 45 | 46 | The word vectors are distributed under the [*Creative Commons Attribution-Share-Alike License 3.0*](https://creativecommons.org/licenses/by-sa/3.0/). 47 | 48 | ### References 49 | 50 | If you use these word vectors, please cite the following paper: 51 | 52 | E. Grave\*, P. Bojanowski\*, P. Gupta, A. Joulin, T. Mikolov, [*Learning Word Vectors for 157 Languages*](https://arxiv.org/abs/1802.06893) 53 | 54 | ```markup 55 | @inproceedings{grave2018learning, 56 | title={Learning Word Vectors for 157 Languages}, 57 | author={Grave, Edouard and Bojanowski, Piotr and Gupta, Prakhar and Joulin, Armand and Mikolov, Tomas}, 58 | booktitle={Proceedings of the International Conference on Language Resources and Evaluation (LREC 2018)}, 59 | year={2018} 60 | } 61 | ``` 62 | 63 | ### Evaluation datasets 64 | 65 | The analogy evaluation datasets described in the paper are available here: [French](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-analogies/questions-words-fr.txt), [Hindi](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-analogies/questions-words-hi.txt), [Polish](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-analogies/questions-words-pl.txt). 66 | 67 | ### Models 68 | 69 | The models can be downloaded from: 70 | 71 | |||| 72 | |-|-|-| 73 | | Afrikaans: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.af.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.af.300.vec.gz) | Albanian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sq.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sq.300.vec.gz) | Alemannic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.als.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.als.300.vec.gz) | 74 | | Amharic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.am.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.am.300.vec.gz) | Arabic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ar.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ar.300.vec.gz) | Aragonese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.an.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.an.300.vec.gz) | 75 | | Armenian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hy.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hy.300.vec.gz) | Assamese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.as.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.as.300.vec.gz) | Asturian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ast.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ast.300.vec.gz) | 76 | | Azerbaijani: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.az.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.az.300.vec.gz) | Bashkir: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ba.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ba.300.vec.gz) | Basque: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.eu.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.eu.300.vec.gz) | 77 | | Bavarian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bar.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bar.300.vec.gz) | Belarusian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.be.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.be.300.vec.gz) | Bengali: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bn.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bn.300.vec.gz) | 78 | | Bihari: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bh.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bh.300.vec.gz) | Bishnupriya Manipuri: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bpy.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bpy.300.vec.gz) | Bosnian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bs.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bs.300.vec.gz) | 79 | | Breton: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.br.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.br.300.vec.gz) | Bulgarian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bg.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bg.300.vec.gz) | Burmese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.my.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.my.300.vec.gz) | 80 | | Catalan: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ca.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ca.300.vec.gz) | Cebuano: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ceb.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ceb.300.vec.gz) | Central Bicolano: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bcl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bcl.300.vec.gz) | 81 | | Chechen: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ce.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ce.300.vec.gz) | Chinese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.zh.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.zh.300.vec.gz) | Chuvash: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.cv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.cv.300.vec.gz) | 82 | | Corsican: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.co.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.co.300.vec.gz) | Croatian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hr.300.vec.gz) | Czech: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.cs.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.cs.300.vec.gz) | 83 | | Danish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.da.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.da.300.vec.gz) | Divehi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.dv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.dv.300.vec.gz) | Dutch: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nl.300.vec.gz) | 84 | | Eastern Punjabi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pa.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pa.300.vec.gz) | Egyptian Arabic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.arz.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.arz.300.vec.gz) | Emilian-Romagnol: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.eml.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.eml.300.vec.gz) | 85 | | Erzya: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.myv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.myv.300.vec.gz) | Esperanto: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.eo.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.eo.300.vec.gz) | Estonian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.et.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.et.300.vec.gz) | 86 | | Fiji Hindi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hif.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hif.300.vec.gz) | Finnish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fi.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fi.300.vec.gz) | French: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fr.300.vec.gz) | 87 | | Galician: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gl.300.vec.gz) | Georgian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ka.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ka.300.vec.gz) | German: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.de.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.de.300.vec.gz) | 88 | | Goan Konkani: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gom.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gom.300.vec.gz) | Greek: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.el.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.el.300.vec.gz) | Gujarati: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gu.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gu.300.vec.gz) | 89 | | Haitian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ht.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ht.300.vec.gz) | Hebrew: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.he.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.he.300.vec.gz) | Hill Mari: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mrj.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mrj.300.vec.gz) | 90 | | Hindi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hi.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hi.300.vec.gz) | Hungarian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hu.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hu.300.vec.gz) | Icelandic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.is.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.is.300.vec.gz) | 91 | | Ido: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.io.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.io.300.vec.gz) | Ilokano: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ilo.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ilo.300.vec.gz) | Indonesian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.id.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.id.300.vec.gz) | 92 | | Interlingua: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ia.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ia.300.vec.gz) | Irish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ga.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ga.300.vec.gz) | Italian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.it.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.it.300.vec.gz) | 93 | | Japanese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ja.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ja.300.vec.gz) | Javanese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.jv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.jv.300.vec.gz) | Kannada: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.kn.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.kn.300.vec.gz) | 94 | | Kapampangan: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pam.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pam.300.vec.gz) | Kazakh: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.kk.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.kk.300.vec.gz) | Khmer: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.km.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.km.300.vec.gz) | 95 | | Kirghiz: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ky.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ky.300.vec.gz) | Korean: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ko.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ko.300.vec.gz) | Kurdish (Kurmanji): [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ku.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ku.300.vec.gz) | 96 | | Kurdish (Sorani): [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ckb.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ckb.300.vec.gz) | Latin: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.la.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.la.300.vec.gz) | Latvian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lv.300.vec.gz) | 97 | | Limburgish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.li.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.li.300.vec.gz) | Lithuanian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lt.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lt.300.vec.gz) | Lombard: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lmo.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lmo.300.vec.gz) | 98 | | Low Saxon: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nds.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nds.300.vec.gz) | Luxembourgish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lb.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.lb.300.vec.gz) | Macedonian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mk.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mk.300.vec.gz) | 99 | | Maithili: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mai.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mai.300.vec.gz) | Malagasy: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mg.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mg.300.vec.gz) | Malay: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ms.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ms.300.vec.gz) | 100 | | Malayalam: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ml.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ml.300.vec.gz) | Maltese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mt.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mt.300.vec.gz) | Manx: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gv.300.vec.gz) | 101 | | Marathi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mr.300.vec.gz) | Mazandarani: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mzn.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mzn.300.vec.gz) | Meadow Mari: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mhr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mhr.300.vec.gz) | 102 | | Minangkabau: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.min.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.min.300.vec.gz) | Mingrelian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.xmf.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.xmf.300.vec.gz) | Mirandese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mwl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mwl.300.vec.gz) | 103 | | Mongolian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mn.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.mn.300.vec.gz) | Nahuatl: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nah.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nah.300.vec.gz) | Neapolitan: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nap.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nap.300.vec.gz) | 104 | | Nepali: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ne.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ne.300.vec.gz) | Newar: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.new.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.new.300.vec.gz) | North Frisian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.frr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.frr.300.vec.gz) | 105 | | Northern Sotho: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nso.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nso.300.vec.gz) | Norwegian (Bokmål): [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.no.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.no.300.vec.gz) | Norwegian (Nynorsk): [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nn.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.nn.300.vec.gz) | 106 | | Occitan: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.oc.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.oc.300.vec.gz) | Oriya: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.or.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.or.300.vec.gz) | Ossetian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.os.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.os.300.vec.gz) | 107 | | Palatinate German: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pfl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pfl.300.vec.gz) | Pashto: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ps.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ps.300.vec.gz) | Persian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fa.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fa.300.vec.gz) | 108 | | Piedmontese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pms.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pms.300.vec.gz) | Polish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pl.300.vec.gz) | Portuguese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pt.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pt.300.vec.gz) | 109 | | Quechua: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.qu.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.qu.300.vec.gz) | Romanian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ro.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ro.300.vec.gz) | Romansh: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.rm.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.rm.300.vec.gz) | 110 | | Russian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ru.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ru.300.vec.gz) | Sakha: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sah.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sah.300.vec.gz) | Sanskrit: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sa.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sa.300.vec.gz) | 111 | | Sardinian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sc.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sc.300.vec.gz) | Scots: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sco.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sco.300.vec.gz) | Scottish Gaelic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gd.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.gd.300.vec.gz) | 112 | | Serbian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sr.300.vec.gz) | Serbo-Croatian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sh.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sh.300.vec.gz) | Sicilian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.scn.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.scn.300.vec.gz) | 113 | | Sindhi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sd.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sd.300.vec.gz) | Sinhalese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.si.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.si.300.vec.gz) | Slovak: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sk.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sk.300.vec.gz) | 114 | | Slovenian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sl.300.vec.gz) | Somali: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.so.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.so.300.vec.gz) | Southern Azerbaijani: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.azb.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.azb.300.vec.gz) | 115 | | Spanish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.es.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.es.300.vec.gz) | Sundanese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.su.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.su.300.vec.gz) | Swahili: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sw.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sw.300.vec.gz) | 116 | | Swedish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sv.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.sv.300.vec.gz) | Tagalog: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tl.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tl.300.vec.gz) | Tajik: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tg.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tg.300.vec.gz) | 117 | | Tamil: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ta.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ta.300.vec.gz) | Tatar: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tt.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tt.300.vec.gz) | Telugu: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.te.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.te.300.vec.gz) | 118 | | Thai: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.th.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.th.300.vec.gz) | Tibetan: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bo.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.bo.300.vec.gz) | Turkish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tr.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tr.300.vec.gz) | 119 | | Turkmen: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tk.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.tk.300.vec.gz) | Ukrainian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.uk.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.uk.300.vec.gz) | Upper Sorbian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hsb.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.hsb.300.vec.gz) | 120 | | Urdu: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ur.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ur.300.vec.gz) | Uyghur: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ug.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.ug.300.vec.gz) | Uzbek: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.uz.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.uz.300.vec.gz) | 121 | | Venetian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vec.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vec.300.vec.gz) | Vietnamese: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vi.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vi.300.vec.gz) | Volapük: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vo.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vo.300.vec.gz) | 122 | | Walloon: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.wa.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.wa.300.vec.gz) | Waray: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.war.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.war.300.vec.gz) | Welsh: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.cy.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.cy.300.vec.gz) | 123 | | West Flemish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vls.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.vls.300.vec.gz) | West Frisian: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fy.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.fy.300.vec.gz) | Western Punjabi: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pnb.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.pnb.300.vec.gz) | 124 | | Yiddish: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.yi.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.yi.300.vec.gz) | Yoruba: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.yo.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.yo.300.vec.gz) | Zazaki: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.diq.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.diq.300.vec.gz) | 125 | | Zeelandic: [bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.zea.300.bin.gz), [text](https://s3-us-west-1.amazonaws.com/fasttext-vectors/word-vectors-v2/cc.zea.300.vec.gz) | 126 | -------------------------------------------------------------------------------- /doc/en/dataset.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: dataset 3 | title: Datasets 4 | --- 5 | 6 | [Download YFCC100M Dataset](https://fb-public.box.com/s/htfdbrvycvroebv9ecaezaztocbcnsdn) 7 | -------------------------------------------------------------------------------- /doc/en/english-vectors.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: english-vectors 3 | title: English word vectors 4 | --- 5 | 6 | This page gathers several pre-trained word vectors trained using fastText. 7 | 8 | ### Download pre-trained word vectors 9 | 10 | Pre-trained word vectors learned on different sources can be downloaded below: 11 | 12 | 1. [wiki-news-300d-1M.vec.zip](https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki-news-300d-1M.vec.zip): 1 million word vectors trained on Wikipedia 2017, UMBC webbase corpus and statmt.org news dataset (16B tokens). 13 | 2. [wiki-news-300d-1M-subword.vec.zip](https://s3-us-west-1.amazonaws.com/fasttext-vectors/wiki-news-300d-1M-subword.vec.zip): 1 million word vectors trained with subword infomation on Wikipedia 2017, UMBC webbase corpus and statmt.org news dataset (16B tokens). 14 | 3. [crawl-300d-2M.vec.zip](https://s3-us-west-1.amazonaws.com/fasttext-vectors/crawl-300d-2M.vec.zip): 2 million word vectors trained on Common Crawl (600B tokens). 15 | 16 | ### Format 17 | 18 | The first line of the file contains the number of words in the vocabulary and the size of the vectors. 19 | Each line contains a word followed by its vectors, like in the default fastText text format. 20 | Each value is space separated. Words are ordered by descending frequency. 21 | 22 | ### License 23 | 24 | These word vectors are distributed under the [*Creative Commons Attribution-Share-Alike License 3.0*](https://creativecommons.org/licenses/by-sa/3.0/). 25 | 26 | ### References 27 | 28 | If you use these word vectors, please cite the following paper: 29 | 30 | T. Mikolov, E. Grave, P. Bojanowski, C. Puhrsch, A. Joulin. [*Advances in Pre-Training Distributed Word Representations*](https://arxiv.org/abs/1712.09405) 31 | 32 | ```markup 33 | @inproceedings{mikolov2018advances, 34 | title={Advances in Pre-Training Distributed Word Representations}, 35 | author={Mikolov, Tomas and Grave, Edouard and Bojanowski, Piotr and Puhrsch, Christian and Joulin, Armand}, 36 | booktitle={Proceedings of the International Conference on Language Resources and Evaluation (LREC 2018)}, 37 | year={2018} 38 | } 39 | ``` 40 | -------------------------------------------------------------------------------- /doc/en/faqs.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: faqs 3 | title:FAQ 4 | --- 5 | 6 | ## What is fastText? Are there tutorials? 7 | 8 | FastText is a library for text classification and representation. It transforms text into continuous vectors that can later be used on any language related task. A few tutorials are available. 9 | 10 | ## Why are my fastText models that big? 11 | 12 | fastText uses a hashtable for either word or character ngrams. The size of the hashtable directly impacts the size of a model. To reduce the size of the model, it is possible to reduce the size of this table with the option '-hash'. For example a good value is 20000. Another option that greatly impacts the size of a model is the size of the vectors (-dim). This dimension can be reduced to save space but this can significantly impact performance. If that still produce a model that is too big, one can further reduce the size of a trained model with the quantization option. 13 | ```bash 14 | ./fasttext quantize -output model 15 | ``` 16 | 17 | ## What would be the best way to represent word phrases rather than words? 18 | 19 | Currently the best approach to represent word phrases or sentence is to take a bag of words of word vectors. Additionally, for phrases like “New York”, preprocessing the data so that it becomes a single token “New_York” can greatly help. 20 | 21 | ## Why does fastText produce vectors even for unknown words? 22 | 23 | One of the key features of fastText word representation is its ability to produce vectors for any words, even made-up ones. 24 | Indeed, fastText word vectors are built from vectors of substrings of characters contained in it. 25 | This allows to build vectors even for misspelled words or concatenation of words. 26 | 27 | ## Why is the hierarchical softmax slightly worse in performance than the full softmax? 28 | 29 | The hierachical softmax is an approximation of the full softmax loss that allows to train on large number of class efficiently. This is often at the cost of a few percent of accuracy. 30 | Note also that this loss is thought for classes that are unbalanced, that is some classes are more frequent than others. If your dataset has a balanced number of examples per class, it is worth trying the negative sampling loss (-loss ns -neg 100). 31 | However, negative sampling will still be very slow at test time, since the full softmax will be computed. 32 | 33 | ## Can we run fastText program on a GPU? 34 | 35 | FastText only works on CPU for accessibility. That being said, fastText has been implemented in the caffe2 library which can be run on GPU. 36 | 37 | ## Can I use fastText with python? Or other languages? 38 | 39 | There are few unofficial wrappers for python or lua available on github. 40 | 41 | ## Can I use fastText with continuous data? 42 | 43 | FastText works on discrete tokens and thus cannot be directly used on continuous tokens. However, one can discretize continuous tokens to use fastText on them, for example by rounding values to a specific digit ("12.3" becomes "12"). 44 | 45 | ## There are misspellings in the dictionary. Should we improve text normalization? 46 | 47 | If the words are infrequent, there is no need to worry. 48 | 49 | ## I'm encountering a NaN, why could this be? 50 | 51 | You'll likely see this behavior because your learning rate is too high. Try reducing it until you don't see this error anymore. 52 | 53 | ## My compiler / architecture can't build fastText. What should I do? 54 | Try a newer version of your compiler. We try to maintain compatibility with older versions of gcc and many platforms, however sometimes maintaining backwards compatibility becomes very hard. In general, compilers and tool chains that ship with LTS versions of major linux distributions should be fair game. In any case, create an issue with your compiler version and architecture and we'll try to implement compatibility. 55 | 56 | 57 | 58 | 59 | -------------------------------------------------------------------------------- /doc/en/language-identification.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: language-identification 3 | title: Language identification 4 | --- 5 | 6 | ### Description 7 | 8 | We distribute two models for language identification, which can recognize 176 languages (see the list of ISO codes below). These models were trained on data from [Wikipedia](https://www.wikipedia.org/), [Tatoeba](https://tatoeba.org/eng/) and [SETimes](http://nlp.ffzg.hr/resources/corpora/setimes/), used under [CC-BY-SA](http://creativecommons.org/licenses/by-sa/3.0/). 9 | 10 | We distribute two versions of the models: 11 | 12 | * [lid.176.bin](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/lid.176.bin), which is faster and slightly more accurate, but has a file size of 126MB ; 13 | * [lid.176.ftz](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/lid.176.ftz), which is the compressed version of the model, with a file size of 917kB. 14 | 15 | These models were trained on UTF-8 data, and therefore expect UTF-8 as input. 16 | 17 | ### License 18 | 19 | The models are distributed under the [*Creative Commons Attribution-Share-Alike License 3.0*](https://creativecommons.org/licenses/by-sa/3.0/). 20 | 21 | ### List of supported languages 22 | ``` 23 | af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs bxr ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es et eu fa fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia id ie ilo io is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li lmo lo lrc lt lv mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne new nl nn no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru rue sa sah sc scn sco sd sh si sk sl so sq sr su sv sw ta te tg th tk tl tr tt tyv ug uk ur uz vec vep vi vls vo wa war wuu xal xmf yi yo yue zh 24 | ``` 25 | 26 | ### References 27 | 28 | If you use these models, please cite the following papers: 29 | 30 | [1] A. Joulin, E. Grave, P. Bojanowski, T. Mikolov, [*Bag of Tricks for Efficient Text Classification*](https://arxiv.org/abs/1607.01759) 31 | ``` 32 | @article{joulin2016bag, 33 | title={Bag of Tricks for Efficient Text Classification}, 34 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas}, 35 | journal={arXiv preprint arXiv:1607.01759}, 36 | year={2016} 37 | } 38 | ``` 39 | [2] A. Joulin, E. Grave, P. Bojanowski, M. Douze, H. Jégou, T. Mikolov, [*FastText.zip: Compressing text classification models* ](https://arxiv.org/abs/1612.03651) 40 | ``` 41 | @article{joulin2016fasttext, 42 | title={FastText.zip: Compressing text classification models}, 43 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas}, 44 | journal={arXiv preprint arXiv:1612.03651}, 45 | year={2016} 46 | } 47 | ``` 48 | -------------------------------------------------------------------------------- /doc/en/options.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: options 3 | title: List of options 4 | --- 5 | 6 | Invoke a command without arguments to list available arguments and their default values: 7 | 8 | ```bash 9 | $ ./fasttext supervised 10 | Empty input or output path. 11 | 12 | The following arguments are mandatory: 13 | -input training file path 14 | -output output file path 15 | 16 | The following arguments are optional: 17 | -verbose verbosity level [2] 18 | 19 | The following arguments for the dictionary are optional: 20 | -minCount minimal number of word occurences [5] 21 | -minCountLabel minimal number of label occurences [0] 22 | -wordNgrams max length of word ngram [1] 23 | -bucket number of buckets [2000000] 24 | -minn min length of char ngram [3] 25 | -maxn max length of char ngram [6] 26 | -t sampling threshold [0.0001] 27 | -label labels prefix [__label__] 28 | 29 | The following arguments for training are optional: 30 | -lr learning rate [0.05] 31 | -lrUpdateRate change the rate of updates for the learning rate [100] 32 | -dim size of word vectors [100] 33 | -ws size of the context window [5] 34 | -epoch number of epochs [5] 35 | -neg number of negatives sampled [5] 36 | -loss loss function {ns, hs, softmax} [ns] 37 | -thread number of threads [12] 38 | -pretrainedVectors pretrained word vectors for supervised learning [] 39 | -saveOutput whether output params should be saved [0] 40 | 41 | The following arguments for quantization are optional: 42 | -cutoff number of words and ngrams to retain [0] 43 | -retrain finetune embeddings if a cutoff is applied [0] 44 | -qnorm quantizing the norm separately [0] 45 | -qout quantizing the classifier [0] 46 | -dsub size of each sub-vector [2] 47 | ``` 48 | 49 | Defaults may vary by mode. (Word-representation modes `skipgram` and `cbow` use a default `-minCount` of 5.) 50 | 51 | -------------------------------------------------------------------------------- /doc/en/references.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: references 3 | title: References 4 | --- 5 | 6 | Please cite [1](#enriching-word-vectors-with-subword-information) if using this code for learning word representations or [2](#bag-of-tricks-for-efficient-text-classification) if using for text classification. 7 | 8 | [1] P. Bojanowski\*, E. Grave\*, A. Joulin, T. Mikolov, [*Enriching Word Vectors with Subword Information*](https://arxiv.org/abs/1607.04606) 9 | 10 | ```markup 11 | @article{bojanowski2016enriching, 12 | title={Enriching Word Vectors with Subword Information}, 13 | author={Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas}, 14 | journal={arXiv preprint arXiv:1607.04606}, 15 | year={2016} 16 | } 17 | ``` 18 | 19 | [2] A. Joulin, E. Grave, P. Bojanowski, T. Mikolov, [*Bag of Tricks for Efficient Text Classification*](https://arxiv.org/abs/1607.01759) 20 | 21 | ```markup 22 | @article{joulin2016bag, 23 | title={Bag of Tricks for Efficient Text Classification}, 24 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas}, 25 | journal={arXiv preprint arXiv:1607.01759}, 26 | year={2016} 27 | } 28 | ``` 29 | 30 | [3] A. Joulin, E. Grave, P. Bojanowski, M. Douze, H. Jégou, T. Mikolov, [*FastText.zip: Compressing text classification models*](https://arxiv.org/abs/1612.03651) 31 | 32 | ```markup 33 | @article{joulin2016fasttext, 34 | title={FastText.zip: Compressing text classification models}, 35 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas}, 36 | journal={arXiv preprint arXiv:1612.03651}, 37 | year={2016} 38 | } 39 | ``` 40 | 41 | (\* These authors contributed equally.) 42 | -------------------------------------------------------------------------------- /doc/en/supervised-models.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: supervised-models 3 | title: Supervised models 4 | --- 5 | 6 | This page gathers several pre-trained supervised models on several datasets. 7 | 8 | ### Description 9 | 10 | The regular models are trained using the procedure described in [1]. They can be reproduced using the classification-results.sh script within our github repository. The quantized models are build by using the respective supervised settings and adding the following flags to the quantize subcommand. 11 | 12 | ```bash 13 | -qnorm -retrain -cutoff 100000 14 | ``` 15 | 16 | ### Table of models 17 | 18 | Each entry describes the test accuracy and size of the model. You can click on a table cell to download the corresponding model. 19 | 20 | | dataset | ag news | amazon review full | amazon review polarity | dbpedia | 21 | |-----------|-----------------------|-----------------------|------------------------|------------------------| 22 | | regular | [0.924 / 387MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/ag_news.bin) | [0.603 / 462MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/amazon_review_full.bin) | [0.946 / 471MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/amazon_review_polarity.bin) | [0.986 / 427MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/dbpedia.bin) | 23 | | compressed | [0.92 / 1.6MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/ag_news.ftz) | [0.599 / 1.6MB]( https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/amazon_review_full.ftz) | [0.93 / 1.6MB]( https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/amazon_review_polarity.ftz) | [0.984 / 1.7MB]( https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/dbpedia.ftz) | 24 | 25 | | dataset | sogou news | yahoo answers | yelp review polarity | yelp review full | 26 | |-----------|----------------------|------------------------|----------------------|------------------------| 27 | | regular | [0.969 / 402MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/sogou_news.bin) | [0.724 / 494MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/yahoo_answers.bin)| [0.957 / 409MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/yelp_review_polarity.bin)| [0.639 / 412MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/yelp_review_full.bin)| 28 | | compressed | [0.968 / 1.4MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/sogou_news.ftz) | [0.717 / 1.6MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/yahoo_answers.ftz) | [0.957 / 1.5MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/yelp_review_polarity.ftz) | [0.636 / 1.5MB](https://s3-us-west-1.amazonaws.com/fasttext-vectors/supervised_models/yelp_review_full.ftz) | 29 | 30 | ### References 31 | 32 | If you use these models, please cite the following paper: 33 | 34 | [1] A. Joulin, E. Grave, P. Bojanowski, T. Mikolov, [*Bag of Tricks for Efficient Text Classification*](https://arxiv.org/abs/1607.01759) 35 | 36 | ```markup 37 | @article{joulin2016bag, 38 | title={Bag of Tricks for Efficient Text Classification}, 39 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas}, 40 | journal={arXiv preprint arXiv:1607.01759}, 41 | year={2016} 42 | } 43 | ``` 44 | 45 | [2] A. Joulin, E. Grave, P. Bojanowski, M. Douze, H. Jégou, T. Mikolov, [*FastText.zip: Compressing text classification models*](https://arxiv.org/abs/1612.03651) 46 | 47 | ```markup 48 | @article{joulin2016fasttext, 49 | title={FastText.zip: Compressing text classification models}, 50 | author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas}, 51 | journal={arXiv preprint arXiv:1612.03651}, 52 | year={2016} 53 | } 54 | ``` 55 | -------------------------------------------------------------------------------- /doc/en/supervised-tutorial.md: -------------------------------------------------------------------------------- 1 | --- 2 | id: supervised-tutorial 3 | title: Text classification 4 | --- 5 | 6 | Text classification is a core problem to many applications, like spam detection, sentiment analysis or smart replies. In this tutorial, we describe how to build a text classifier with the fastText tool. 7 | 8 | ## What is text classification? 9 | 10 | The goal of text classification is to assign documents (such as emails, posts, text messages, product reviews, etc...) to one or multiple categories. Such categories can be review scores, spam v.s. non-spam, or the language in which the document was typed. Nowadays, the dominant approach to build such classifiers is machine learning, that is learning classification rules from examples. In order to build such classifiers, we need labeled data, which consists of documents and their corresponding categories (or tags, or labels). 11 | 12 | As an example, we build a classifier which automatically classifies stackexchange questions about cooking into one of several possible tags, such as `pot`, `bowl` or `baking`. 13 | 14 | ## Installing fastText 15 | 16 | The first step of this tutorial is to install and build fastText. It only requires a c++ compiler with good support of c++11. 17 | 18 | Let us start by downloading the [most recent release](https://github.com/facebookresearch/fastText/releases): 19 | 20 | ```bash 21 | $ wget https://github.com/facebookresearch/fastText/archive/v0.1.0.zip 22 | $ unzip v0.1.0.zip 23 | ``` 24 | 25 | Move to the fastText directory and build it: 26 | 27 | ```bash 28 | $ cd fastText-0.1.0 29 | $ make 30 | ``` 31 | 32 | Running the binary without any argument will print the high level documentation, showing the different usecases supported by fastText: 33 | 34 | ```bash 35 | >> ./fasttext 36 | usage: fasttext