├── .gitignore ├── .travis.yml ├── Dockerfile ├── Dockerfile.binary ├── LICENSE ├── Makefile ├── README.md ├── deployments ├── README.md ├── jelastic │ ├── README.md │ ├── images │ │ ├── checking-logs.png │ │ ├── choose-service-provider.png │ │ ├── installation-dialog.png │ │ ├── installation-progress.png │ │ └── signup-verification.png │ └── jelastic.jps └── kubernetes │ ├── README.md │ ├── deployment.yaml │ └── job.yaml └── examples ├── classification-data.sh └── classification-example.sh /.gitignore: -------------------------------------------------------------------------------- 1 | **data/* 2 | **result/* 3 | fasttext.bin 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | sudo: required 2 | 3 | services: 4 | - docker 5 | 6 | script: 7 | - make 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:xenial 2 | MAINTAINER Mark Kockerbeck 3 | RUN apt-get update && apt-get install -y \ 4 | build-essential \ 5 | wget \ 6 | git \ 7 | python-dev \ 8 | unzip \ 9 | python-numpy \ 10 | python-scipy \ 11 | && rm -rf /var/cache/apk/* 12 | 13 | RUN git clone https://github.com/facebookresearch/fastText.git /tmp/fastText && \ 14 | rm -rf /tmp/fastText/.git* && \ 15 | mv /tmp/fastText/* / && \ 16 | cd / && \ 17 | make 18 | 19 | WORKDIR / 20 | CMD ["./fasttext"] 21 | -------------------------------------------------------------------------------- /Dockerfile.binary: -------------------------------------------------------------------------------- 1 | # See Makefile for how to build with this Dockerfile 2 | FROM ubuntu:xenial 3 | MAINTAINER Mark Kockerbeck 4 | ADD fasttext.bin /fasttext 5 | WORKDIR / 6 | ENTRYPOINT ["./fasttext"] 7 | CMD ["help"] 8 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: all 2 | 3 | all: clean build binary 4 | 5 | DOCKER_IMAGE_PREFIX=xebxeb/fasttext-docker# must be lower case 6 | DOCKER_BUILD_CMD=docker build 7 | 8 | clean: 9 | rm -rf examples/data 10 | rm -rf examples/result 11 | 12 | build: 13 | $(DOCKER_BUILD_CMD) -t $(DOCKER_IMAGE_PREFIX):devel -f Dockerfile . 14 | $(DOCKER_BUILD_CMD) -t $(DOCKER_IMAGE_PREFIX):latest -f Dockerfile . 15 | 16 | binary: build 17 | docker run --rm $(DOCKER_IMAGE_PREFIX):devel cat ./fasttext > fasttext.bin 18 | chmod +x fasttext.bin 19 | $(DOCKER_BUILD_CMD) -t $(DOCKER_IMAGE_PREFIX):binary -f Dockerfile.binary . 20 | rm fasttext.bin 21 | 22 | # ----- 23 | # Other make options below 24 | # ----- 25 | 26 | test: 27 | docker run --rm -it $(DOCKER_IMAGE_PREFIX):devel ./word-vector-example.sh 28 | docker run --rm -it $(DOCKER_IMAGE_PREFIX):devel ./classification-example.sh 29 | docker run --rm -it $(DOCKER_IMAGE_PREFIX):devel ./classification-results.sh 30 | 31 | run: 32 | docker run --rm -it $(DOCKER_IMAGE_PREFIX):devel /bin/bash 33 | 34 | example: 35 | cd examples && ./classification-example.sh 36 | 37 | publish: 38 | docker push $(DOCKER_IMAGE_PREFIX):devel 39 | docker push $(DOCKER_IMAGE_PREFIX):latest 40 | docker push $(DOCKER_IMAGE_PREFIX):binary 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # fastText Docker [![Build Status](https://travis-ci.org/xeb/fastText-docker.svg?branch=master)](https://travis-ci.org/xeb/fastText-docker) 2 | Dockerfile and example for Facebook Research's [fastText](https://github.com/facebookresearch/fastText). 3 | 4 | # Quick Deployments 5 | If you'd like to use a cluster manager to deploy fastText-docker, we have configurations for: 6 | - [Kubernetes](deployments/kubernetes/) 7 | 8 | See more in [deployments](deployments/) 9 | 10 | # Getting Started 11 | The quickest way to see the fastText classification tutorial with fastText-docker is: 12 | ``` 13 | docker pull xebxeb/fasttext-docker 14 | mkdir -p /tmp/data && mkdir -p /tmp/result 15 | docker run --rm -v /tmp/data:/data -v /tmp/result:/result -it xebxeb/fasttext-docker ./classification-example.sh 16 | ``` 17 | 18 | **NOTE**: if you ran the above on macOS, the data & results are going to be on your Docker Machine VM. Use a path in ```/Users/${USER}/``` if you want to map to your local system. Or do something like: ```docker-machine ssh `docker-machine active` ls /tmp/data``` to see the files in your VM. 19 | 20 | # Types of Dockerfiles 21 | There are two Dockerfiles, including: 22 | - **Dockerfile** all-in-one, used for development purposes. Includes the fastText binary, the entire source repository and Python dependencies. 23 | - **Dockerfile.binary** just for executing the fastText binary. The fasttext binary is the container's entrypoint. 24 | 25 | # Pulling Prebuilt Images 26 | If you'd like to use the published prebuilt images, you can pull them from DockerHub. *NOTE*: the _latest_ will always be the *devel* tag. 27 | ``` 28 | docker pull xebxeb/fasttext-docker 29 | docker pull xebxeb/fasttext-docker:devel 30 | docker pull xebxeb/fasttext-docker:binary 31 | ``` 32 | 33 | # Development Container 34 | ## Building Devel 35 | To build the devel Docker image, simply do a ```make``` after cloning or: 36 | ``` 37 | docker build -t fasttext . 38 | ``` 39 | Building the docker image will create the fasttext binary as well as clone the repository -- all in the root of the container. 40 | There is no entrypoint for the *devel* container and any of the examples in the [fastText](https://github.com/facebookresearch/fastText) repository will work. 41 | 42 | ## Using Devel 43 | The development container is meant to be interactive, so the best way to use it is probably with a mounted volume and bash. 44 | ``` 45 | $ mkdir -p /tmp/data && mkdir -p /tmp/result 46 | $ docker run --rm -it -v /tmp/data:/data -v /tmp/result:/result fasttext /bin/bash 47 | # ./fasttext 48 | usage: fasttext 49 | 50 | The commands supported by fasttext are: 51 | 52 | supervised train a supervised classifier 53 | test evaluate a supervised classifier 54 | predict predict most likely label 55 | skipgram train a skipgram model 56 | cbow train a cbow model 57 | print-vectors print vectors given a trained model 58 | 59 | # ./classification-example.sh 60 | Resolving googledrive.com (googledrive.com)... 216.58.194.33, 2607:f8b0:4000:802::2001 61 | Connecting to googledrive.com (googledrive.com)|216.58.194.33|:443... connected. 62 | HTTP request sent, awaiting response... 302 Moved Temporarily 63 | .... 64 | dbpedia_csv/ 65 | dbpedia_csv/classes.txt 66 | dbpedia_csv/test.csv 67 | dbpedia_csv/train.csv 68 | dbpedia_csv/readme.txt 69 | make: Nothing to be done for `opt'. 70 | Read 32M words 71 | Progress: 50.2% words/sec/thread: 1833592 lr: 0.049821 loss: 0.141374 eta: 0h0m 72 | 73 | ``` 74 | You get the idea... it's a full interactive shell with a mounted volume. 75 | 76 | **NOTE** be sure to use absolute paths in your local mount arguments! And if you are on macOS, be sure that your path is within /Users/ -- otherwise you will map to a path on your Docker Machine VM. See [classification-example.sh](examples/classification-example.sh) for an example. 77 | 78 | # Binary Container 79 | ## Building Binary 80 | If you'd just like a pre-built binary of fastText, you can build the *binary* simply by doing: 81 | ``` 82 | make binary 83 | ``` 84 | (and modifying the Makefile to your image name if you'd like) 85 | 86 | 87 | ## Using Binary 88 | You will likely want to mount a volume with Docker in order to use the binary container because it has an entrypoint of the fasttext binary. For example: 89 | 90 | ``` 91 | docker run --rm -v /var/path/to/data:/data -v /var/path/to/results:/results test "/result/dbpedia.bin" "/data/dbpedia.test" 92 | docker run --rm -v /var/path/to/data:/data -v /var/path/to/results:/results predict "/result/dbpedia.bin" "/data/dbpedia.test" > "data/dbpedia.test.predict" 93 | ``` 94 | See [classification-example.sh](examples/classification-example.sh) for an example using the **devel** tag. Simply replace that with **binary** and remove the _./fastText_ argument to achieve the same result. 95 | -------------------------------------------------------------------------------- /deployments/README.md: -------------------------------------------------------------------------------- 1 | #fastText-Docker Deployments 2 | 3 | This folder contains examples of how to deploy the fastText-docker container in various environments. Specifically: 4 | 5 | - [Kubernetes](kubernetes/) 6 | 7 | - [Jelastic](jelastic/) 8 | -------------------------------------------------------------------------------- /deployments/jelastic/README.md: -------------------------------------------------------------------------------- 1 | # Deploying fastText-docker on Jelastic 2 | 3 | ### Initiate Deployment 4 | 5 | 6 | Press deploy button above. 7 | 8 | ### Choose a service provier 9 | 10 | 11 | Press install button. 12 | 13 | ### Signup Verification 14 | 15 | 16 | Please note: verification process may vary at different Jelastic cloud service providers. 17 | 18 | ### Installation Dialog 19 | 20 | 21 | Press install button. 22 | 23 | ### Installation Progress 24 | 25 | 26 | Wait until it's ready. 27 | 28 | ### Checking Logs 29 | 30 | 31 | Checks the logs https://docs.jelastic.com/view-log-files or jump to SSH console https://docs.jelastic.com/ssh-access 32 | 33 | -------------------------------------------------------------------------------- /deployments/jelastic/images/checking-logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xeb/fastText-docker/01d87416de8ebabe51b77da48a9aaf509370bcc8/deployments/jelastic/images/checking-logs.png -------------------------------------------------------------------------------- /deployments/jelastic/images/choose-service-provider.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xeb/fastText-docker/01d87416de8ebabe51b77da48a9aaf509370bcc8/deployments/jelastic/images/choose-service-provider.png -------------------------------------------------------------------------------- /deployments/jelastic/images/installation-dialog.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xeb/fastText-docker/01d87416de8ebabe51b77da48a9aaf509370bcc8/deployments/jelastic/images/installation-dialog.png -------------------------------------------------------------------------------- /deployments/jelastic/images/installation-progress.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xeb/fastText-docker/01d87416de8ebabe51b77da48a9aaf509370bcc8/deployments/jelastic/images/installation-progress.png -------------------------------------------------------------------------------- /deployments/jelastic/images/signup-verification.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/xeb/fastText-docker/01d87416de8ebabe51b77da48a9aaf509370bcc8/deployments/jelastic/images/signup-verification.png -------------------------------------------------------------------------------- /deployments/jelastic/jelastic.jps: -------------------------------------------------------------------------------- 1 | { 2 | "jpsType": "install", 3 | "application": { 4 | "name": "fastText", 5 | "homepage": "https://research.facebook.com/blog/fasttext/", 6 | "description": "fastText is a library designed to help build scalable solutions for text representation and classification, open-sourced by Facebook AI Research (FAIR)", 7 | "env": { 8 | "topology": { 9 | "nodes": [ 10 | { 11 | "cloudlets": 16, 12 | "count": 1, 13 | "nodeGroup": "cp", 14 | "docker": { 15 | "image": "xebxeb/fasttext-docker", 16 | "env": {}, 17 | "volumes": [ 18 | "/result", 19 | "/data" 20 | ], 21 | "cmd": "/classification-example.sh" 22 | } 23 | } 24 | ] 25 | } 26 | } 27 | }, 28 | "success": "Your fastText has been installed successfully. Please check the logs or jump to SSH Gate" 29 | } 30 | -------------------------------------------------------------------------------- /deployments/kubernetes/README.md: -------------------------------------------------------------------------------- 1 | # Deploying fastText-docker on Kubernetes 2 | 3 | fastText is not currently a parallel-izable process. So any use in Kubernetes (that I can foresee), would be either as: 4 | - [Job](job.yaml) for running one-off analyses 5 | - [Deployment](deployment.yaml) for accessing inside a cluster 6 | 7 | # Getting Started 8 | If you are unfamiliar with Kubernetes, I'd recommend trying these examples out with: [minikube](https://github.com/kubernetes/minikube). 9 | 10 | After you've `minikube start`'d your local cluster, you can create either the deployment or run the classification job. Example commands and expected output are below. 11 | 12 | # Job Example 13 | The job will just run the classification-example in the Kubernetes cluster (& schedule it) and leave the resulting logs. No model will be captured. 14 | 15 | ``` 16 | $ kubectl apply -f job.yaml 17 | job "fasttext-class" created 18 | 19 | $ kubectl get jobs 20 | NAME DESIRED SUCCESSFUL AGE 21 | fasttext-class 1 0 4s 22 | Deck:kubernetes xeb$ kubectl describe job fasttext-class 23 | Name: fasttext-class 24 | Namespace: default 25 | Image(s): xebxeb/fasttext-docker:latest 26 | Selector: controller-uid=fd643bb1-70de-11e6-8192-42a11ce5e7a2 27 | Parallelism: 1 28 | Completions: 1 29 | Start Time: Fri, 02 Sep 2016 00:29:31 -0700 30 | Labels: controller-uid=fd643bb1-70de-11e6-8192-42a11ce5e7a2 31 | job-name=fasttext-class 32 | Pods Statuses: 1 Running / 0 Succeeded / 0 Failed 33 | No volumes. 34 | Events: 35 | FirstSeen LastSeen Count From SubobjectPath Type Reason Message 36 | --------- -------- ----- ---- ------------- -------- ------ ------- 37 | 14s 14s 1 {job-controller } Normal SuccessfulCreate Created pod: fasttext-class-wcn9q 38 | 39 | 40 | $ kubectl logs fasttext-class-wcn9q 41 | 42 | --2016-09-02 07:29:34-- https://googledrive.com/host/0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k 43 | Resolving googledrive.com (googledrive.com)... 216.58.194.97, 2607:f8b0:4000:809::2001 44 | Connecting to googledrive.com (googledrive.com)|216.58.194.97|:443... connected. 45 | HTTP request sent, awaiting response... 302 Moved Temporarily 46 | Location: https://8c47f35de2544a202d525720f08188d254682381.googledrive.com/host/0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k [following] 47 | --2016-09-02 07:29:44-- https://8c47f35de2544a202d525720f08188d254682381.googledrive.com/host/0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k 48 | .... 49 | 64750K .......... .......... .......... .......... .......... 12.7M 50 | 64800K .......... .......... .......... .......... .......... 16.1M 51 | 64850K .......... .......... .......... .......... .......... 16.4M 52 | 64900K .......... .......... .......... .......... .......... 15.6M 53 | 64950K .......... .......... .......... .......... .......... 15.7M 54 | 65000K .......... .......... .......... .......... .......... 14.5M 55 | 65050K .......... .......... .......... .......... .......... 12.9M 56 | 65100K .......... .......... .......... .......... .......... 12.3M 57 | 65150K .......... .......... .......... .......... .......... 16.9M 58 | 65200K .......... .......... .......... .......... .......... 16.3M 59 | 65250K .......... .......... .......... .......... .......... 16.3M 60 | 65300K .......... .......... .......... .......... .......... 15.3M 61 | 65350K .......... .......... .......... .......... .......... 12.4M 62 | 65400K .......... .......... .......... .......... .......... 15.1M 63 | 65450K .......... .......... .......... .......... .......... 11.5M 64 | 65500K .......... .......... .......... .......... .......... 16.5M 65 | 65550K .......... .......... .......... .......... .......... 16.8M 66 | 65600K .......... .......... .......... .......... .......... 16.3M 67 | 65650K .......... .......... .......... .......... .......... 15.6M 68 | 65700K .......... .......... .......... .......... .......... 16.3M 69 | 65750K .......... .......... .......... .......... .......... 11.7M 70 | 65800K .......... .......... .......... .......... .......... 15.4M 71 | 65850K .......... .......... .......... .......... .......... 11.9M 72 | 65900K .......... .......... .......... .......... .......... 17.6M 73 | 65950K .......... .......... .......... .......... .......... 15.0M 74 | 66000K .......... .......... .......... .......... .......... 16.5M 75 | 66050K .......... .......... .......... .......... .......... 15.4M 76 | 66100K .......... .......... .......... .......... .......... 16.4M 77 | 66150K .......... .......... .......... .......... .......... 11.7M 78 | 66200K .......... .......... .......... .......... .......... 15.7M 79 | 66250K .......... .......... .......... .......... .......... 11.5M 80 | 66300K .......... .......... .......... .......... .......... 17.7M 81 | 66350K .......... .......... .......... .......... .......... 15.5M 82 | 66400K .......... .......... .......... .......... .......... 15.0M 83 | 66450K .......... .......... .......... .......... .......... 17.8M 84 | 66500K .......... .......... .......... .......... .......... 10.9M 85 | 66550K .......... .......... .......... .......... .......... 16.7M 86 | 66600K .......... .......... .......... .......... .......... 17.6M 87 | 66650K .......... .......... .......... .......... .......... 11.0M 88 | 66700K .......... .......... .......... ......... 24.4M=4.8s 89 | 90 | 2016-09-02 07:29:59 (13.5 MB/s) - 'data/dbpedia_csv.tar.gz' saved [68341698] 91 | 92 | dbpedia_csv/ 93 | dbpedia_csv/classes.txt 94 | dbpedia_csv/test.csv 95 | dbpedia_csv/train.csv 96 | dbpedia_csv/readme.txt 97 | make: Nothing to be done for `opt'. 98 | Read 32M words 99 | Number of words: 803537 100 | Number of labels: 14 101 | 102 | 103 | $ 104 | ``` 105 | 106 | # Deployment Example 107 | The deployment example will just get a running dev container from the Kubernetes cluster. It won't do anything (unlike the Job) 108 | ``` 109 | $ kubectl create -f deployment.yaml 110 | deployment "fasttext" created 111 | 112 | $ kubectl get deployments 113 | NAME DESIRED CURRENT UP-TO-DATE AVAILABLE AGE 114 | fasttext 1 1 1 0 19s 115 | 116 | $ kubectl describe deployment fasttext 117 | Name: fasttext 118 | Namespace: default 119 | CreationTimestamp: Fri, 02 Sep 2016 00:23:36 -0700 120 | Labels: app=fasttext 121 | Selector: app=fasttext 122 | Replicas: 1 updated | 1 total | 1 available | 0 unavailable 123 | StrategyType: RollingUpdate 124 | MinReadySeconds: 0 125 | RollingUpdateStrategy: 1 max unavailable, 1 max surge 126 | OldReplicaSets: 127 | NewReplicaSet: fasttext-657236089 (1/1 replicas created) 128 | Events: 129 | FirstSeen LastSeen Count From SubobjectPath Type Reason Message 130 | --------- -------- ----- ---- ------------- -------- ------ ------- 131 | 11s 11s 1 {deployment-controller } Normal ScalingReplicaSet Scaled up replica set fasttext-657236089 to 1 132 | 133 | 134 | 135 | $ kubectl get pods 136 | NAME READY STATUS RESTARTS AGE 137 | fasttext-657236089-at1e4 1/1 Running 0 1m 138 | 139 | $ kubectl exec -it fasttext-657236089-at1e4 /bin/bash216.58.194.97, 2607:f8b0:4000:80d::2001 140 | Connecting to googledrive.com (googledrive.com)|216.58.194.97|:443... connected. 141 | HTTP request sent, awaiting response... 302 Moved Temporarily 142 | Location: https://8c47f35de2544a202d525720f08188d254682381.googledrive.com/host/0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k [following] 143 | --2016-09-02 07:25:04-- https://8c47f35de2544a202d525720f08188d254682381.googledrive.com/host/0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k 144 | Resolving 8c47f35de2544a202d525720f08188d254682381.googledrive.com (8c47f35de2544a202d525720f08188d254682381.googledrive.com)... 216.58.218.97, 2607:f8b0:4005:801::2001 145 | Connecting to 8c47f35de2544a202d525720f08188d254682381.googledrive.com (8c47f35de2544a202d525720f08188d254682381.googledrive.com)|216.58.218.97|:443... connected. 146 | HTTP request sent, awaiting response... 200 OK 147 | Length: unspecified [application/x-gzip] 148 | Saving to: 'data/dbpedia_csv.tar.gz' 149 | 150 | [ <=> ] 68,341,698 14.1MB/s in 4.8s 151 | 152 | 2016-09-02 07:25:19 (13.6 MB/s) - 'data/dbpedia_csv.tar.gz' saved [68341698] 153 | 154 | dbpedia_csv/ 155 | dbpedia_csv/classes.txt 156 | dbpedia_csv/test.csv 157 | dbpedia_csv/train.csv 158 | dbpedia_csv/readme.txt 159 | make: Nothing to be done for `opt'. 160 | Read 32M words 161 | Number of words: 803537 162 | Number of labels: 14 163 | Progress: 1.1% words/sec/thread: 1250676 lr: 0.098876 loss: 1.953058 eta: 0h0m h-14m 164 | 165 | $ kubectl delete deployment fasttext 166 | 167 | ``` 168 | -------------------------------------------------------------------------------- /deployments/kubernetes/deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: extensions/v1beta1 2 | kind: Deployment 3 | metadata: 4 | name: fasttext 5 | spec: 6 | replicas: 1 7 | template: 8 | metadata: 9 | labels: 10 | app: fasttext 11 | spec: 12 | containers: 13 | - name: fasttext 14 | image: xebxeb/fasttext-docker:latest 15 | command: ["/bin/sleep", "100000"] 16 | # NOTE: this deployment is meant to be accessed elsewhere 17 | -------------------------------------------------------------------------------- /deployments/kubernetes/job.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: batch/v1 2 | kind: Job 3 | metadata: 4 | name: fasttext-class 5 | spec: 6 | template: 7 | metadata: 8 | name: fasttext-class 9 | spec: 10 | containers: 11 | - name: fasttext-class 12 | image: xebxeb/fasttext-docker:latest 13 | command: ["./classification-example.sh"] 14 | restartPolicy: Never 15 | -------------------------------------------------------------------------------- /examples/classification-data.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | myshuf() { 4 | perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@"; 5 | } 6 | 7 | normalize_text() { 8 | tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \ 9 | sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/
/ /g' \ 10 | -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \ 11 | -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf 12 | } 13 | 14 | RESULTDIR=result 15 | DATADIR=data 16 | 17 | mkdir -p "${RESULTDIR}" 18 | mkdir -p "${DATADIR}" 19 | 20 | if [ ! -f "${DATADIR}/dbpedia.train" ] 21 | then 22 | wget -c "https://googledrive.com/host/0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k" -O "${DATADIR}/dbpedia_csv.tar.gz" 23 | tar -xzvf "${DATADIR}/dbpedia_csv.tar.gz" -C "${DATADIR}" 24 | cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "${DATADIR}/dbpedia.train" 25 | cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "${DATADIR}/dbpedia.test" 26 | rm dbpedia_csv.tar.gz 27 | fi 28 | -------------------------------------------------------------------------------- /examples/classification-example.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | source classification-data.sh 4 | 5 | # Get absolute paths for mounting in Docker 6 | LOCAL_RESULTDIR=$(python -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' $RESULTDIR) 7 | LOCAL_DATADIR=$(python -c 'import os,sys;print(os.path.realpath(sys.argv[1]))' $DATADIR) 8 | 9 | # Note the only difference between the devel and binary tags in this context 10 | # will be the "./fasttext" command within DOCKER_CMD. The binary tag does not require it. 11 | DOCKER_IMAGE_PREFIX=xebxeb/fasttext-docker:devel 12 | DOCKER_CMD="docker run --rm -v ${LOCAL_RESULTDIR}:/${RESULTDIR} -v ${LOCAL_DATADIR}:/${DATADIR} ${DOCKER_IMAGE_PREFIX} ./fasttext" 13 | 14 | if [[ -z $(docker images | grep -E "^${DOCKER_IMAGE_PREFIX}\s") ]] 15 | then 16 | make 17 | fi 18 | 19 | echo Training model 20 | $DOCKER_CMD supervised -input "/${DATADIR}/dbpedia.train" -output "/${RESULTDIR}/dbpedia" -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 4 21 | 22 | echo Testing model 23 | $DOCKER_CMD test "/${RESULTDIR}/dbpedia.bin" "/${DATADIR}/dbpedia.test" 24 | 25 | echo Getting prediction 26 | $DOCKER_CMD predict "/${RESULTDIR}/dbpedia.bin" "/${DATADIR}/dbpedia.test" > "${LOCAL_RESULTDIR}/dbpedia.test.predict" 27 | --------------------------------------------------------------------------------