├── .gitignore ├── .gitmodules ├── .travis.yml ├── LICENSE ├── README.md ├── pom.xml ├── vw-webservice-common ├── README.md ├── pom.xml └── src │ ├── main │ └── java │ │ └── com │ │ └── eharmony │ │ └── matching │ │ └── vw │ │ └── webservice │ │ └── common │ │ ├── example │ │ ├── Example.java │ │ ├── ExampleFormatException.java │ │ ├── ExampleMediaTypes.java │ │ ├── StringExample.java │ │ └── StructuredExample.java │ │ └── prediction │ │ ├── Prediction.java │ │ ├── PredictionMediaTypes.java │ │ └── StringPrediction.java │ └── test │ ├── java │ └── com │ │ └── eharmony │ │ └── matching │ │ └── vw │ │ └── webservice │ │ └── example │ │ └── StructuredExampleTest.java │ └── resources │ ├── ner.train.gz │ └── vw_example_schema.json ├── vw-webservice-core ├── README.md ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── eharmony │ └── matching │ └── vw │ └── webservice │ └── core │ ├── ExampleReadException.java │ ├── ExamplesIterable.java │ ├── ExamplesIterableImpl.java │ └── exampleprocessor │ ├── ExampleProcessingEventHandler.java │ ├── ExampleProcessingManager.java │ ├── ExampleProcessor.java │ ├── ExampleProcessorFactory.java │ ├── ExampleProcessorFeatures.java │ ├── ExampleProcessorFeaturesImpl.java │ ├── ExampleSubmissionException.java │ ├── ExampleSubmissionState.java │ ├── PredictionFetchException.java │ ├── PredictionFetchState.java │ └── tcpip │ ├── AsyncFailFastTCPIPExampleProcessor.java │ ├── TCPIPExampleProcessingManager.java │ ├── TCPIPExampleProcessorFactory.java │ ├── TCPIPPredictionsIterator.java │ ├── TCPIPSocketFactory.java │ └── TCPIPSocketFactoryImpl.java └── vw-webservice-jersey ├── README.md ├── pom.xml └── src ├── main ├── java │ └── com │ │ └── eharmony │ │ └── matching │ │ └── vw │ │ └── webservice │ │ ├── PredictResource.java │ │ ├── RequestHandler.java │ │ ├── messagebodyreader │ │ ├── jsonexamplesmessagebodyreader │ │ │ ├── GsonJsonExamplesProvider.java │ │ │ ├── JsonExamplesProvider.java │ │ │ ├── SimpleJsonExamplesMessageBodyReader.java │ │ │ ├── StructuredJsonExamplesMessageBodyReader.java │ │ │ ├── StructuredJsonExamplesProvider.java │ │ │ ├── StructuredJsonPropertyNames.java │ │ │ └── TracingJsonReader.java │ │ └── plaintextexamplesmessagebodyreader │ │ │ ├── PlainTextExamplesMessageBodyReader.java │ │ │ └── StringExampleIterator.java │ │ └── util │ │ └── StringIterable.java ├── resources │ ├── logback.xml │ ├── logging.properties │ └── vw-webservice.properties └── webapp │ └── WEB-INF │ ├── applicationContext.xml │ └── web.xml └── test ├── java └── com │ └── eharmony │ └── matching │ └── vw │ └── webservice │ ├── client │ ├── AsyncHttpClientTest.java │ └── TestUtils.java │ ├── core │ └── exampleprocessor │ │ └── tcpip │ │ └── AsyncFailFastTCPIPExampleProcessorTest.java │ ├── messagebodyreader │ ├── jsonexamplesmessagebodyreader │ │ ├── GsonJsonExamplesProviderTest.java │ │ ├── JsonTestUtils.java │ │ ├── SimpleJsonExamplesMessageBodyReaderTest.java │ │ └── StructuredJsonExamplesMessageBodyReaderTest.java │ └── plaintextexamplesmessagebodyreader │ │ └── PlainTextExamplesMessageBodyReaderTest.java │ └── util │ └── StringIterableTest.java └── resources ├── logback-test.xml ├── logging.properties └── ner.train.gz /.gitignore: -------------------------------------------------------------------------------- 1 | .settings 2 | .classpath 3 | .project 4 | .springBeans 5 | .gitignore 6 | target/ 7 | META-INF/ 8 | vw-webservice.np.dc1.eharmony.com 9 | /vowpal_wabbit 10 | ner.train 11 | index.jsp 12 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "vowpal_wabbit"] 2 | path = vowpal_wabbit 3 | url = https://github.com/JohnLangford/vowpal_wabbit.git 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2013, eHarmony Inc 2 | Copyright (c) 2014, eHarmony Inc 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without modification, 6 | are permitted provided that the following conditions are met: 7 | 8 | * Redistributions of source code must retain the above copyright notice, this 9 | list of conditions and the following disclaimer. 10 | 11 | * Redistributions in binary form must reproduce the above copyright notice, this 12 | list of conditions and the following disclaimer in the documentation and/or 13 | other materials provided with the distribution. 14 | 15 | * Neither the name of eHarmony nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR 23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/eHarmony/vw-webservice.png)](https://travis-ci.org/eHarmony/vw-webservice) 2 | 3 | # Vowpal Wabbit Webservice 4 | 5 | This is a simple web service that wraps [vowpal wabbit](https://github.com/JohnLangford/vowpal_wabbit) daemon. 6 | 7 | ## Installation 8 | 9 | ### Dependencies 10 | 11 | * Java 1.7 12 | * Maven 2.2.1 or Maven 3 13 | * Jetty 9.1.10 14 | * Vowpal Wabbit (included as a submodule) 15 | 16 | The current web service was developed and tested on Jetty 9.1.0. You will need Maven (either version 2 or 3) to build the web service. Instructions for both versions have been included in this document, so pick the version of Maven you'd like to use and execute the provided instructions. 17 | 18 | But first, we need to get the right version of Java... 19 | 20 | #### Java 1.7 21 | 22 | You will need Java 7 in order to run Jetty 9.1.10. Furthermore, you need the JDK (instead of the JRE) in order to run Maven. 23 | 24 | From the Maven documentation: 25 | 26 | ``` 27 | Make sure that JAVA_HOME is set to the location of your JDK, e.g. export JAVA_HOME=/usr/java/jdk1.7.x and that $JAVA_HOME/bin is in your PATH environment variable. 28 | ``` 29 | 30 | Once the Java 7 JDK is ready to go (with JAVA_HOME/bin also properly set on your PATH), you can install the version of Maven you'd like to use (pick either 2.2.1 or 3.1.1 from below). 31 | 32 | #### Maven 2.2.1 33 | 34 | ``` 35 | wget http://mirror.tcpdiag.net/apache/maven/maven-2/2.2.1/binaries/apache-maven-2.2.1-bin.tar.gz 36 | tar xzvf apache-maven-2.2.1-bin.tar.gz 37 | export M2_HOME=$PWD/apache-maven-2.2.1 38 | export PATH=$M2_HOME/bin:$PATH 39 | 40 | # check it worked 41 | mvn -version 42 | ``` 43 | 44 | #### Maven 3.1.1 45 | 46 | ``` 47 | wget http://mirror.tcpdiag.net/apache/maven/maven-3/3.1.1/binaries/apache-maven-3.1.1-bin.tar.gz 48 | tar xzvf apache-maven-3.1.1-bin.tar.gz 49 | export M2_HOME=$PWD/apache-maven-3.1.1 50 | export PATH=$M2_HOME/bin:$PATH 51 | 52 | # check it worked 53 | mvn -version 54 | ``` 55 | 56 | Now let's install Jetty which we'll use as our web container for the web service. Although technically you should be able to use the web container of your choice (Tomcat/Glassfish/etc) bear in mind that so far we've only 57 | tested the web service using Jetty. 58 | 59 | #### Jetty 9.1.10 60 | 61 | On the box where you plan on running the web service, install [Jetty 9.1.0](http://eclipse.org/downloads/download.php?file=/jetty/9.1.0.v20131115/dist/jetty-distribution-9.1.0.v20131115.tar.gz&r=1). 62 | 63 | On the command line you can use: 64 | 65 | ``` 66 | wget http://mirrors.ibiblio.org/eclipse/jetty/9.1.0.v20131115/dist/jetty-distribution-9.1.0.v20131115.tar.gz 67 | tar xzvf jetty-distribution-9.1.0.v20131115.tar.gz 68 | ``` 69 | 70 | That's it for the prerequisites. Now you can go ahead and set up the VW web service. 71 | 72 | ### Building and Deploying the VW Web Service 73 | 74 | This involves 3 steps: 75 | 76 | 1. Build vowpal wabbit from source, then launch it in daemon mode. 77 | 2. Specify the host and port where vowpal wabbit is running in the vw-webservice.properties file, and build+package the webservice to produce the .war (Web Application Resource) file. 78 | 3. Place the .war file into the /webapps folder of Jetty. 79 | 80 | Let's get started. 81 | 82 | Clone this repo: 83 | 84 | ``` 85 | git clone --recursive git@github.com:eHarmony/vw-webservice.git 86 | cd vw-webservice 87 | ``` 88 | 89 | Note: for the --recursive option to work (it grabs the vowpal wabbit submodule for you), you will need git 1.6.5 or later. Otherwise you can pull the vowpal wabbit submodule in separately using ``git submodule``. 90 | 91 | You should now have a vw-webservice directory with some files and 4 directories inside of it: 92 | 93 | * vowpal_wabbit 94 | * vw-webservice-common 95 | * vw-webservice-core 96 | * vw-webservice-jersey 97 | 98 | #### Building Vowpal Wabbit 99 | 100 | Now that you have the webservice, under the vw-webservice/vowpal_wabbit folder, you should find the C++ source for Vowpal Wabbit. Before you can launch the daemon you will have to build it. 101 | 102 | If you're using Linux, then make sure you already have gcc and g++ installed on your system. Note also that Vowpal Wabbit depends on boost program options (on a Mac this can be installed via [homebrew](http://brew.sh): ``brew install boost`` and on Linux you can try ``sudo apt-get install -y -m libboost-program-options-dev``). 103 | 104 | ``` 105 | cd vowpal_wabbit 106 | make clean 107 | make 108 | 109 | #now launch it in daemon mode (from within the vw-webservice/vowpal_wabbit directory) 110 | ./vowpalwabbit/vw --daemon [other options you like] 111 | ``` 112 | 113 | #### Building VW Web Service 114 | 115 | Now that we have Vowpal Wabbit up and running, we just need to make sure that the web service knows the host and port where the daemon lives. Edit the config: 116 | 117 | ``` 118 | vim vw-webservice/vw-webservice-jersey/src/main/resources/vw-webservice.properties 119 | ``` 120 | 121 | and change if necessary: 122 | ``` 123 | vw.hostName=localhost 124 | vw.port=26542 125 | ``` 126 | 127 | Now let's build and package up the web service: 128 | 129 | ``` 130 | mvn package 131 | ``` 132 | 133 | In the output, you should see the location where the WAR (Web Application Resource) file has been created: 134 | 135 | ``` 136 | ... 137 | ... 138 | ... 139 | [INFO] Webapp assembled in[172 msecs] 140 | [INFO] Building war: vw-webservice/vw-webservice-jersey/target/vw-webservice-jersey.war 141 | [INFO] ------------------------------------------------------------------------ 142 | [INFO] BUILD SUCCESSFUL 143 | [INFO] ------------------------------------------------------------------------ 144 | [INFO] Total time: 23 seconds 145 | ... 146 | ... 147 | ... 148 | ``` 149 | 150 | Now you can deploy the .war file: 151 | 152 | ``` 153 | # the maven build (assuming you're using the default directories) will have spit out the WAR file to the 'target' subdirectory 154 | # if you're running the Jetty instance on your local machine, copy the WAR over to the 'webapps' directory of Jetty 155 | cp vw-webservice/vw-webservice-jersey/target/vw-webservice-jersey.war /path/to/jetty-9.1.0/webapps/ 156 | 157 | # alternatively, you can scp the war file to the box where you are running your jetty instance: 158 | # scp vw-webservice/vw-webservice-jersey/target/vw-webservice-jersey.war box.running.jetty.com:/path/to/jetty-9.1.0/webapps/ 159 | 160 | # Restart the Jetty instance (wherever you have Jetty running). 161 | cd /path/to/jetty-9.1.0 162 | java -jar start.jar 163 | ``` 164 | 165 | The last command will start spitting out the Jetty logs to the console. You can keep an eye on this as you submit requests to the vw-webservice, which will log to the console. The web service 166 | uses logback for logging, and the logging configuration can be found under vw-webservice-jersey/src/main/resources/logback.xml. 167 | 168 | ## Using the Web Service 169 | 170 | You can hit the webservice from the command line using curl, or code up your own client (in any language) to communicate with the web service. Something to keep in mind is that the client you use should support chunked transfer encoding, as this will allow you to stream massive amounts of data to/from the webservice, without buffering it all in memory to calculate the value of the Content-Length request header. A Java client that supports this is the [AsynHttpClient](http://sonatype.github.io/async-http-client/). You can find a test that uses this client in ``vw-webservice-jersey/src/test/java/AsyncHttpClientTest.java``. 171 | 172 | Examples should follow the VW format. For more information on the VW input format, refer to the documentation at: https://github.com/JohnLangford/vowpal_wabbit/wiki/Input-format 173 | 174 | However, when examples are submitted to the web service by a client, they can be either in plaintext format, or in a more structured format. In either case, once an example is received by the web service, it will convert the example to the proper VW format before submitting it to the VW daemon. 175 | 176 | #### Plaintext examples 177 | 178 | This means you will be submitting a stream of examples to the web service, with each example being a string in the accepted VW input format. 179 | 180 | For instance: 181 | 182 | ``` 183 | 1 first|user name=Adam gender=male age=34 |movie Snatch 184 | -1 second|user name=Adam gender=male age=34 |movie Titanic 185 | 1 third|user name=Adam gender=male age=34 |movie Hangover 186 | ``` 187 | 188 | You can submit such examples to the web service from the command line using curl. Assuming all your plaintext VW examples are sitting in some file called examples.txt, you can do the following: 189 | 190 | ``` 191 | curl -H "Content-Type:text/plain" -X POST \ 192 | -T examples.txt \ 193 | http://host.running.jetty.com:8080/vw-webservice-jersey/predict/main \ 194 | -v 195 | ``` 196 | 197 | If you happen to have a humongous gzipped file containing millions of plaintext examples (eg, ner.train.gz, included under vw-webservice-jersey/src/test/resources, which has ~272K examples), you can do the following: 198 | 199 | ``` 200 | # assume we are in the vw-webservice directory 201 | gzcat vw-webservice-jersey/src/test/resources/ner.train.gz \ 202 | | curl -H "Content-Type:text/plain" \ 203 | -X POST \ 204 | -T - \ 205 | http://host.running.jetty.com:8080/vw-webservice-jersey/predict/main \ 206 | -v 207 | ``` 208 | 209 | The curl '-T' switch performs a file transfer, without trying to buffer all the data in memory to compute the Content-Length HTTP request header. 210 | 211 | Of course, you can also use any HTTP client to submit such a stream of plaintext examples to the web service. Just make sure that each example appears on a line by itself. 212 | 213 | #### Structured examples 214 | 215 | This means you will build up each VW example in a structured way using some API, and this structure will be reflected in the format of the data being sent to the web service. 216 | 217 | Currently, there is a class called StructuredExample.java in the package com.eharmony.matching.vw.webservice.common.example in the vw-webservice-common project, that let's you use the Builder pattern to build up an example from it's component parts (a label, a tag, and a set of namespaces, each of which has some number of features). 218 | 219 | To see code that demonstrates this, check out the "simpleExampleBuildingTest" and "simpleExampleBuildingTestWithTag" tests in StructuredExampleTest.java in that same project. These tests demonstrate how to use the API to build up an example piece by piece. 220 | 221 | Once you have an instance of a StructuredExample, you can write that out to some stream. Currently, the web service only supports the json format for submitting structured examples. In json, a stream of structured examples must have the schema described in "vw_example_schema.json" found in the same project (vw-webservice-common) under the src/test/resources folder. Note that this is the schema for the entire stream of structured json examples that will be submitted to the web service. 222 | 223 | The serialized stream of json-formatted VW examples would look like this: 224 | 225 | ```javascript 226 | [ 227 | { 228 | "label": "34", 229 | "tag": "someTag", 230 | "namespaces": [{ 231 | "name": "one", 232 | "features": [{ 233 | "name": "a", 234 | "value": 12.34 235 | }, { 236 | "name": "b", 237 | "value": 45.1 238 | }] 239 | }, { 240 | "name": "two", 241 | "scale": 34.3, 242 | "features": [{ 243 | "name": "bah", 244 | "value": 0.038293 245 | }, { 246 | "name": "another", 247 | "value": 3.4 248 | }, { 249 | "name": "andThis", 250 | "value": 2.0 251 | }] 252 | }] 253 | } 254 | , 255 | { 256 | //the next json example 257 | } 258 | , 259 | ``` 260 | 261 | The first json example in the above chunk would be converted by the web service to the following before submitting to the VW daemon: "34 someTag|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2". 262 | 263 | To see code that shows how to write a single StructuredExample in json format, check out the "writeExample" method in JsonTestUtils.java, which can be found in the vw-webservice-jersey project under src/test/java in the com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader package. 264 | 265 | To see code that writes an entire stream of StructuredExamples in json format, check out the 'getJsonInputStreamBodyGenerator' method of AsyncHttpClientTest.java in the com.eharmony.matching.vw.webservice.client package under src/test/java in the vw-webservice-jersey project. 266 | 267 | ## Benchmarks 268 | 269 | Some basic benchmarks seems to indicate that, as the number of examples increases and hardware memory improves, the web-service seems to perform comparably to netcat. Note that we did not do any performance tweaking of the web-service. VW was running in daemon mode as "vw -b 10 --daemon", and we performed 10 runs with each setup. 270 | 271 | | Setup | # examples | # of features | median time | slowdown | 272 | |:--------------------------------------|-----------:|--------------:|------------:|---------:| 273 | | netcat and vw --daemon on localhost | 27M |1.2B | 239.7s | baseline | 274 | | webservice and vw daemon on localhost | 27M |1.2B | 244.4s | 2% | 275 | 276 | The percentage hit in terms of median times was only about 2%, which seems acceptable. 277 | 278 | ## ToDo 279 | 280 | * Document application/x-vw-text. 281 | * More tests. 282 | * Pull out integration tests into a separate module and have Maven run them as part of the verify phase. Use the failsafe plugin for this. 283 | * Incorporate maven enforcer, findbugs and checkstyle plugin invocations 284 | * Incorporate suggestions made by others during code review 285 | * Protocol buffer support. 286 | * Java client. 287 | * Javascript client. 288 | * Add compression support. 289 | * Automate setup and installation. 290 | * Move all property configuration outside the .war file. Right now the configuration is packaged inside, effectively making the .war files hard-coded. 291 | * Add codahale metrics gathering. 292 | * Go through all the TODO comments in the source code and make changes where necessary. 293 | * CometD support. 294 | * Speed optimizations. 295 | * Document extension points. 296 | * Re-factor tests to instantiate a web-service instance, perhaps using Grizzly http server? 297 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | com.eharmony.matching 4 | vw-webservice 5 | 0.1.0-SNAPSHOT 6 | pom 7 | VW Web Service. 8 | The parent project of all the other vw web service projects. 9 | 10 | 11 | 12 | 13 | 14 | vw-webservice-common 15 | vw-webservice-core 16 | vw-webservice-jersey 17 | 18 | 19 | 20 | 0.1.0-SNAPSHOT 21 | UTF-8 22 | 23 | 24 | 25 | 26 | 27 | 28 | junit 29 | junit 30 | 4.8.2 31 | test 32 | 33 | 34 | 35 | org.mockito 36 | mockito-all 37 | 1.9.5 38 | test 39 | 40 | 41 | 42 | 43 | 44 | 45 | org.springframework 46 | spring-core 47 | 3.1.3.RELEASE 48 | 49 | 50 | 51 | org.springframework 52 | spring-beans 53 | 3.1.3.RELEASE 54 | 55 | 56 | 57 | org.springframework 58 | spring-context 59 | 3.1.3.RELEASE 60 | 61 | 62 | 63 | org.springframework 64 | spring-context-support 65 | 3.1.3.RELEASE 66 | 67 | 68 | 69 | org.springframework 70 | spring-expression 71 | 3.1.3.RELEASE 72 | 73 | 74 | 75 | 76 | org.slf4j 77 | slf4j-api 78 | 1.7.5 79 | 80 | 81 | 82 | ch.qos.logback 83 | logback-classic 84 | 1.0.13 85 | runtime 86 | 87 | 88 | 89 | ch.qos.logback 90 | logback-core 91 | 1.0.13 92 | runtime 93 | 94 | 95 | 96 | 97 | com.google.guava 98 | guava 99 | 15.0 100 | 101 | 102 | 103 | 104 | com.google.code.gson 105 | gson 106 | 2.2.4 107 | 108 | 109 | 110 | 111 | com.fasterxml.jackson.core 112 | jackson-core 113 | 2.3.1 114 | 115 | 116 | 117 | 118 | commons-collections 119 | commons-collections 120 | 3.2.1 121 | 122 | 123 | 124 | 125 | org.apache.commons 126 | commons-lang3 127 | 3.2.1 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | org.apache.maven.plugins 140 | maven-compiler-plugin 141 | 2.5.1 142 | true 143 | 144 | 1.7 145 | 1.7 146 | true 147 | 1024m 148 | 2048m 149 | 150 | 151 | 152 | 153 | org.apache.maven.plugins 154 | maven-surefire-plugin 155 | 2.13 156 | 157 | 158 | surefire-test 159 | test 160 | 161 | test 162 | 163 | 164 | 165 | 166 | false 167 | 168 | **/*Test.java 169 | 170 | once 171 | 172 | 173 | 174 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | -------------------------------------------------------------------------------- /vw-webservice-common/README.md: -------------------------------------------------------------------------------- 1 | vw-webservice-common 2 | ==================== 3 | 4 | Common components for client and server side use, by the VW web service. 5 | -------------------------------------------------------------------------------- /vw-webservice-common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | com.eharmony.matching 6 | vw-webservice 7 | 0.1.0-SNAPSHOT 8 | ../../vw-webservice 9 | 10 | 11 | vw-webservice-common 12 | 13 | VW Web Service Common. 14 | Common components used by client and server side projects. 15 | 16 | 17 | 18 | vw-webservice-common-${project.version} 19 | 20 | 21 | 22 | org.apache.maven.plugins 23 | maven-compiler-plugin 24 | 25 | 26 | 27 | org.apache.maven.plugins 28 | maven-surefire-plugin 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | com.google.guava 39 | guava 40 | 41 | 42 | 43 | 44 | org.apache.commons 45 | commons-lang3 46 | 47 | 48 | 49 | 50 | junit 51 | junit 52 | 53 | 54 | 55 | 56 | org.mockito 57 | mockito-all 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/Example.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.common.example; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * An example to be submitted to VW, in it's proper input format. 10 | */ 11 | public interface Example { 12 | 13 | /* 14 | * Returns the example exactly as it will be submitted to VW, which expects 15 | * plain text examples. 16 | * 17 | * @throws ExampleFormatException to indicate that the format of the example 18 | * isn't valid. 19 | * 20 | * @returns The plain text VW representation of the example. 21 | */ 22 | String getVWStringRepresentation(); 23 | 24 | } 25 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/ExampleFormatException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.common.example; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * Exception indicating that the format of an example isn't valid. 10 | * Examples must be in the proper VW format, after all. I'm making this 11 | * a subclass of RuntimeException as it indicates a programmer error, 12 | * similar to NumberFormatException. 13 | */ 14 | public class ExampleFormatException extends IllegalArgumentException { 15 | 16 | /** 17 | * The serial version UID. 18 | */ 19 | private static final long serialVersionUID = -6238484930971388916L; 20 | 21 | private long exampleNumber = Long.MIN_VALUE; 22 | 23 | public ExampleFormatException() { 24 | super(); 25 | } 26 | 27 | public ExampleFormatException(String message) { 28 | super(message); 29 | } 30 | 31 | public ExampleFormatException(Throwable cause) { 32 | super(cause); 33 | } 34 | 35 | public ExampleFormatException(String message, Throwable cause) { 36 | super(message, cause); 37 | } 38 | 39 | public long getExampleNumber() { 40 | return exampleNumber; 41 | } 42 | 43 | public ExampleFormatException(long exampleNumber) { 44 | super(); 45 | this.exampleNumber = exampleNumber; 46 | } 47 | 48 | public ExampleFormatException(long exampleNumber, String message) { 49 | super(message); 50 | this.exampleNumber = exampleNumber; 51 | } 52 | 53 | public ExampleFormatException(long exampleNumber, Throwable cause) { 54 | super(cause); 55 | this.exampleNumber = exampleNumber; 56 | } 57 | 58 | public ExampleFormatException(long exampleNumber, String message, Throwable cause) { 59 | super(message, cause); 60 | this.exampleNumber = exampleNumber; 61 | } 62 | 63 | public void setExampleNumber(long exampleNumber) { 64 | this.exampleNumber = exampleNumber; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/ExampleMediaTypes.java: -------------------------------------------------------------------------------- 1 | package com.eharmony.matching.vw.webservice.common.example; 2 | 3 | /** 4 | * @author vrahimtoola 5 | * 6 | * String constants for acceptable media types for submitting vw 7 | * examples. 8 | */ 9 | public class ExampleMediaTypes { 10 | 11 | public static final String PLAINTEXT_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+plaintext"; 12 | public static final String SIMPLE_PROTOBUF_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+simpleprotobuf"; 13 | public static final String SIMPLE_JSON_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+simplejson"; 14 | public static final String STRUCTURED_JSON_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+structuredjson"; 15 | 16 | } -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/StringExample.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.common.example; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | /** 9 | * @author vrahimtoola 10 | * 11 | * A VW example that's represented as a simple string, ie, without it 12 | * being possible to access it's individual components separately. 13 | * 14 | */ 15 | public class StringExample implements Example { 16 | 17 | private final String vwExampleString; 18 | 19 | /* 20 | * Constructs a VW example using the exact String representation of it. 21 | * 22 | * @param theString The VW example. May be empty, but cannot be null. 23 | */ 24 | public StringExample(String theString) { 25 | checkNotNull(theString, "Null string provided as example!"); 26 | vwExampleString = theString; 27 | } 28 | 29 | @Override 30 | public String getVWStringRepresentation() { 31 | return vwExampleString; 32 | } 33 | 34 | @Override 35 | public String toString() { 36 | return getVWStringRepresentation(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/StructuredExample.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.common.example; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | import org.apache.commons.lang3.StringUtils; 12 | 13 | import com.google.common.collect.ImmutableList; 14 | 15 | /** 16 | * @author vrahimtoola 17 | * 18 | * An example to be submitted to VW. A structured example can be queried 19 | * to get the various components that make up the example that will be 20 | * submitted to VW. 21 | */ 22 | public class StructuredExample implements Example { 23 | 24 | /* 25 | * The various kinds of examples. 26 | */ 27 | private enum ExampleType { 28 | 29 | /* 30 | * The empty example. This will be sent to VW as a single newline 31 | * character. 32 | */ 33 | EMPTY, 34 | 35 | /* 36 | * The pipe (|) example. This will be sent to VW as a single pipe 37 | * character, ie, '|'. 38 | */ 39 | PIPE, 40 | 41 | /* 42 | * A normal VW example. 43 | */ 44 | NORMAL 45 | } 46 | 47 | /* 48 | * Some pre-defined examples. 49 | */ 50 | 51 | /* 52 | * The empty example. This will be sent to VW as a single newline character. 53 | */ 54 | public static final StructuredExample EMPTY_EXAMPLE = new StructuredExample(ExampleType.EMPTY, "", "", new ArrayList()); 55 | 56 | /* 57 | * The pipe (|) example. This will be sent to VW as a single character, '|'. 58 | */ 59 | public static final StructuredExample PIPE_EXAMPLE = new StructuredExample(ExampleType.PIPE, "", "", new ArrayList()); 60 | 61 | private final ExampleType exampleType; 62 | private final String label; 63 | private final Iterable namespaces; 64 | private final String tag; 65 | 66 | private StructuredExample(ExampleType exampleType, String label, String tag, Iterable namespaces) { 67 | this.exampleType = exampleType; 68 | this.label = label; 69 | this.namespaces = namespaces; 70 | this.tag = tag; 71 | } 72 | 73 | /* 74 | * Returns the label of this example. 75 | * 76 | * @returns The label of this example. 77 | */ 78 | public String getLabel() { 79 | return label; 80 | } 81 | 82 | /* 83 | * Returns the tag of this example. 84 | * 85 | * @returns The tag of this example. 86 | */ 87 | public String getTag() { 88 | return tag; 89 | } 90 | 91 | /* 92 | * Returns the namespaces in this example. 93 | * 94 | * @returns The namespaces in this example. The returned iterable is 95 | * unmodifiable. 96 | */ 97 | public Iterable getNamespaces() { 98 | return namespaces; 99 | } 100 | 101 | @Override 102 | public String getVWStringRepresentation() { 103 | 104 | if (exampleType == ExampleType.EMPTY) 105 | return ""; 106 | else if (exampleType == ExampleType.PIPE) 107 | return " |"; //note the space before the pipe 108 | else { 109 | 110 | final String SPACE = " "; 111 | final String PIPE = "|"; 112 | final String COLON = ":"; 113 | 114 | StringBuilder builder = new StringBuilder(); 115 | 116 | if (label != null) { 117 | builder.append(label); 118 | builder.append(SPACE); 119 | } 120 | 121 | if (tag != null) { 122 | builder.append(tag); 123 | } 124 | 125 | boolean namespacesAdded = false; 126 | 127 | for (Namespace namespace : namespaces) { 128 | 129 | if (namespacesAdded) builder.append(SPACE); 130 | 131 | builder.append(PIPE); 132 | 133 | String namespaceName = namespace.getName(); 134 | 135 | if (StringUtils.isBlank(namespaceName) == false) builder.append(namespaceName); 136 | 137 | if (namespace.getScalingFactor() != null) { 138 | builder.append(COLON); 139 | 140 | Float scalingFactor = namespace.getScalingFactor(); 141 | 142 | //this will take care of getting rid of extraneous 0s, eg, 12.3400000 143 | if (scalingFactor.floatValue() == (int) scalingFactor.floatValue()) 144 | builder.append(String.format("%d", (int) scalingFactor.floatValue())); 145 | else { 146 | builder.append(String.format("%s", scalingFactor.floatValue())); 147 | } 148 | } 149 | 150 | for (StructuredExample.Namespace.Feature feature : namespace.getFeatures()) { 151 | 152 | builder.append(SPACE); 153 | 154 | String featureName = feature.getName(); 155 | Float featureValue = feature.getValue(); 156 | 157 | builder.append(featureName); 158 | 159 | if (featureValue != null) { 160 | builder.append(COLON); 161 | 162 | //this will take care of getting rid of extraneous 0s, eg, 12.3400000 163 | if (featureValue.floatValue() == (int) featureValue.floatValue()) 164 | builder.append(String.format("%d", (int) featureValue.floatValue())); 165 | else { 166 | builder.append(String.format("%s", featureValue.floatValue())); 167 | } 168 | 169 | } 170 | 171 | } 172 | 173 | namespacesAdded = true; 174 | } 175 | 176 | //if there's a label but no namespaces, add a SPACE and a PIPE after the label 177 | if (!namespacesAdded) { 178 | builder.append(PIPE); 179 | } 180 | 181 | return builder.toString(); 182 | } 183 | } 184 | 185 | /* 186 | * Represents a namespace containing 0 or more features. Instances of this 187 | * class are immutable. 188 | */ 189 | public static class Namespace { 190 | 191 | private final List features; 192 | private final String namespaceName; 193 | private final Float scalingFactor; 194 | 195 | private Namespace(String namespaceName, Float scalingFactor, List features) { 196 | this.namespaceName = namespaceName == null ? null : namespaceName.trim(); 197 | this.scalingFactor = scalingFactor; 198 | this.features = features; 199 | } 200 | 201 | /* 202 | * Returns the features of the map. 203 | * 204 | * @returns The list of features. The list is unmodifiable. 205 | */ 206 | public Iterable getFeatures() { 207 | return features; 208 | } 209 | 210 | /* 211 | * Returns the number of features in this namespace. 212 | * 213 | * @returns The number of features in this namespace. Always >= 0. 214 | */ 215 | private int getNumberOfFeatures() { 216 | return (features == null ? 0 : features.size()); 217 | } 218 | 219 | /* 220 | * Returns the name of this namespace. 221 | * 222 | * @returns The name of this namespace. Can be null/empty. 223 | */ 224 | public String getName() { 225 | return namespaceName; 226 | } 227 | 228 | /* 229 | * Returns the scaling factor of this namespace. Can be null, which is 230 | * the same as 1.0 (as per VW documentation). 231 | * 232 | * @returns The scaling factor for this namespace. Can be null. 233 | */ 234 | public Float getScalingFactor() { 235 | return scalingFactor; 236 | } 237 | 238 | /* 239 | * Represents a single feature inside a namespace. 240 | */ 241 | public static class Feature { 242 | private final String name; 243 | private final Float value; 244 | 245 | private Feature(String name, Float value) { 246 | this.name = name.trim(); 247 | this.value = value; 248 | } 249 | 250 | /* 251 | * Returns the name of this feature. 252 | * 253 | * @returns The name of this feature. 254 | */ 255 | public String getName() { 256 | return name; 257 | } 258 | 259 | /* 260 | * Returns the value of this feature. The value can be null. 261 | * 262 | * @returns The value of this feature. Can be null. 263 | */ 264 | public Float getValue() { 265 | return value; 266 | } 267 | } 268 | 269 | /* 270 | * Builds a single namespace of an example. Instances of this class are 271 | * not thread safe. A NamespaceBuilder can be used repeatedly to build 272 | * namespace instances. Just make sure to call 'clear()' before starting 273 | * to build up the second (or subsequent) namespace. Note that invoking 274 | * 'build' does not implicitly invoke 'clear()' after a Namespace has 275 | * been built; 'clear()' must be invoked explicitly. 276 | */ 277 | public static class NamespaceBuilder { 278 | 279 | private List features = null; 280 | private String namespaceName; 281 | private Float scalingFactor; 282 | 283 | /* 284 | * Sets the name for the namespace being built. 285 | * 286 | * @param namespaceName The name of the namespace being built. Can 287 | * be null/empty. From the VW documentation: Currently, the only 288 | * characters that can't be used in feature or namespace names are 289 | * vertical bar, colon, space, and newline. 290 | * 291 | * @returns This builder. 292 | */ 293 | public NamespaceBuilder setName(String namespaceName) { 294 | 295 | if (namespaceName != null) { 296 | if (namespaceName.contains("|") || namespaceName.contains(":") || StringUtils.containsWhitespace(namespaceName)) { 297 | throw new ExampleFormatException("The namespace name cannot contain whitespace, '|' or ':'! Namespace passed in was: " + namespaceName); 298 | } 299 | } 300 | 301 | this.namespaceName = namespaceName; 302 | return this; 303 | } 304 | 305 | /* 306 | * Sets the scaling factor for this namespace. 307 | * 308 | * @param scalingFactor The scaling factor. Can be null (which VW 309 | * will interpret as 1.0). 310 | * 311 | * @returns This builder. 312 | */ 313 | public NamespaceBuilder setScalingFactor(Float scalingFactor) { 314 | this.scalingFactor = scalingFactor; 315 | return this; 316 | } 317 | 318 | /* 319 | * Adds a feature to this namespace. The value of the feature will 320 | * default to 1.0, as per the VW documentation. 321 | * 322 | * @param feature The feature name to be added. 323 | */ 324 | public NamespaceBuilder addFeature(String feature) { 325 | return addFeature(feature, null); 326 | } 327 | 328 | /* 329 | * Adds a feature with the specified value to the namespace. 330 | * 331 | * @param feature The feature to add. Cannot be null/empty. From the 332 | * VW documentation: Currently, the only characters that can't be 333 | * used in feature or namespace names are vertical bar, colon, 334 | * space, and newline. 335 | * 336 | * @param value The float value of the feature. 337 | * 338 | * @returns This builder. 339 | */ 340 | public NamespaceBuilder addFeature(String feature, Float value) { 341 | if (StringUtils.isBlank(feature)) throw new ExampleFormatException("The feature name must be provided!"); 342 | 343 | if (feature.contains("|") || feature.contains(":") || StringUtils.containsWhitespace(feature)) 344 | throw new ExampleFormatException("The feature name cannot contain whitespace, '|' or ':'! Feature name passed in was: " + feature); 345 | 346 | if (features == null) features = new ArrayList(); 347 | features.add(new Feature(feature, value)); 348 | return this; 349 | } 350 | 351 | /* 352 | * Removes all features from the namespace. 353 | * 354 | * @returns This builder. 355 | */ 356 | public NamespaceBuilder clear() { 357 | features = null; 358 | scalingFactor = null; 359 | namespaceName = null; 360 | return this; 361 | } 362 | 363 | /* 364 | * Builds the namespace with a scaling factor of 1.0. 365 | * 366 | * @returns The newly built namespace. 367 | */ 368 | public Namespace build() { 369 | 370 | if (StringUtils.isBlank(namespaceName) && scalingFactor != null) throw new ExampleFormatException("A namespace with a scaling factor must be given a name!"); 371 | 372 | return new Namespace(namespaceName, scalingFactor, features == null ? new ArrayList() : ImmutableList. builder().addAll(features).build()); 373 | } 374 | } 375 | 376 | } 377 | 378 | public static class ExampleBuilder { 379 | 380 | private boolean atLeastOneNamespaceIsNonBlank = false; 381 | private String label = null; 382 | private List namespaces = null; 383 | private String tag = null; 384 | 385 | /* 386 | * Sets the label for the example. 387 | * 388 | * @param label The label for the example. Can be null/empty. Will be 389 | * trimmed (ie, trim() will be called on it) when the example is 390 | * created. 391 | * 392 | * @returns This ExampleBuilder. 393 | */ 394 | public ExampleBuilder setLabel(String label) { 395 | this.label = label; 396 | return this; 397 | } 398 | 399 | /* 400 | * Sets the tag for this example. 401 | * 402 | * @param tag The tag for the example. Can be null/empty. Will be 403 | * trimmed when the example is created. 404 | * 405 | * @returns This ExampleBuilder. 406 | */ 407 | public ExampleBuilder setTag(String tag) { 408 | this.tag = tag; 409 | return this; 410 | } 411 | 412 | /* 413 | * Clears the builder, thus making it ready for use to create the next 414 | * example. 415 | * 416 | * @returns This ExampleBuilder. 417 | */ 418 | public ExampleBuilder clear() { 419 | label = null; 420 | namespaces = null; 421 | tag = null; 422 | return this; 423 | } 424 | 425 | /* 426 | * Adds a namespace to the example. 427 | * 428 | * @param namespace The namespace to add to the builder. Cannot be null. 429 | * 430 | * @returns This builder. 431 | */ 432 | public ExampleBuilder addNamespace(Namespace namespace) { 433 | checkNotNull(namespace); 434 | if (namespaces == null) namespaces = new ArrayList(); 435 | namespaces.add(namespace); 436 | atLeastOneNamespaceIsNonBlank = namespace.getNumberOfFeatures() > 0 || (StringUtils.isBlank(namespace.getName()) == false); 437 | return this; 438 | } 439 | 440 | /* 441 | * Builds and returns the example. 442 | */ 443 | public StructuredExample build() { 444 | if (label != null) label = label.trim(); 445 | if (tag != null) tag = tag.trim(); 446 | 447 | //If no label and no namespaces (or all namespaces are empty), treat it as the pipe example. 448 | if (StringUtils.isBlank(label) && StringUtils.isBlank(tag) && (namespaces == null || namespaces.size() == 0 || atLeastOneNamespaceIsNonBlank == false)) 449 | return StructuredExample.PIPE_EXAMPLE; 450 | 451 | return new StructuredExample(ExampleType.NORMAL, label, tag, namespaces == null ? new ArrayList() : ImmutableList. builder().addAll(namespaces).build()); 452 | 453 | } 454 | } 455 | 456 | } 457 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/prediction/Prediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.common.prediction; 5 | 6 | import java.io.IOException; 7 | import java.io.OutputStream; 8 | 9 | /** 10 | * @author vrahimtoola 11 | * 12 | * A prediction spit out by VW. 13 | */ 14 | public interface Prediction { 15 | 16 | /* 17 | * Returns the string representation of a Prediction. The returned 18 | * representation should be convertible back to a Prediction. Intead of 19 | * relying on the implementor providing a 'toString()' method that works 20 | * sensibly, I thought it would be a better idea to t a proper 21 | * implementation this way. 22 | * 23 | * @returns The prediction, exactly as returned by VW. 24 | */ 25 | String getVWStringRepresentation(); 26 | 27 | /* 28 | * Writes a prediction received from VW out to some output stream. 29 | * 30 | * The implementation will write a newline after writing the prediction to 31 | * the stream. 32 | * 33 | * @param outputStream The stream to write the prediction to. The caller 34 | * owns this stream. 35 | */ 36 | void write(OutputStream outputStream) throws IOException; 37 | } 38 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/prediction/PredictionMediaTypes.java: -------------------------------------------------------------------------------- 1 | package com.eharmony.matching.vw.webservice.common.prediction; 2 | 3 | /** 4 | * @author vrahimtoola 5 | * 6 | * String constants for prediction media types (when sending back vw 7 | * predictions). 8 | */ 9 | public class PredictionMediaTypes { 10 | 11 | public static final String PLAINTEXT_0_1_0 = "application/vowpal-wabbit-prediction-v0.1.0+plaintext"; 12 | 13 | } 14 | 15 | 16 | -------------------------------------------------------------------------------- /vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/prediction/StringPrediction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.common.prediction; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.io.IOException; 9 | import java.io.OutputStream; 10 | 11 | import com.google.common.base.Charsets; 12 | 13 | /** 14 | * @author vrahimtoola 15 | * 16 | * A VW prediction represented as a simple string (UTF8 encoded). 17 | */ 18 | public class StringPrediction implements Prediction { 19 | 20 | private final String vwPrediction; 21 | 22 | private static final byte[] newlineBytes = System.getProperty("line.separator").getBytes(Charsets.UTF_8); 23 | 24 | public StringPrediction(String theString) { 25 | checkNotNull(theString, "Null prediction provided!"); 26 | vwPrediction = theString; 27 | } 28 | 29 | /* 30 | * (non-Javadoc) 31 | * 32 | * @see com.eharmony.matching.vw.webservice.core.VWPrediction#write(java.io. 33 | * OutputStream) 34 | */ 35 | @Override 36 | public void write(OutputStream outputStream) throws IOException { 37 | outputStream.write(vwPrediction.getBytes(Charsets.UTF_8)); 38 | outputStream.write(newlineBytes); 39 | } 40 | 41 | @Override 42 | public String getVWStringRepresentation() { 43 | return vwPrediction; 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /vw-webservice-common/src/test/java/com/eharmony/matching/vw/webservice/example/StructuredExampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.example; 5 | 6 | import junit.framework.Assert; 7 | 8 | import org.junit.Test; 9 | 10 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample; 11 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.ExampleBuilder; 12 | 13 | /** 14 | * @author vrahimtoola 15 | * 16 | * Tests the StructuredExample class. 17 | */ 18 | public class StructuredExampleTest { 19 | 20 | /* 21 | * Tests that a pipe example gets built properly. 22 | */ 23 | @Test 24 | public void testPipeExampleCreation() { 25 | 26 | ExampleBuilder exampleBuilder = new ExampleBuilder(); 27 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder(); 28 | 29 | Assert.assertTrue(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE); 30 | 31 | exampleBuilder.clear(); 32 | namespaceBuilder.clear(); 33 | 34 | exampleBuilder.setLabel("some label"); 35 | 36 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE); 37 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE); 38 | 39 | exampleBuilder.clear(); 40 | namespaceBuilder.clear(); 41 | 42 | StructuredExample.Namespace namespace = namespaceBuilder.build(); 43 | 44 | exampleBuilder.addNamespace(namespace); 45 | 46 | Assert.assertTrue(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE); 47 | 48 | exampleBuilder.clear(); 49 | namespaceBuilder.clear(); 50 | 51 | //TODO: consider verifying that labels cannot contain spaces...? 52 | exampleBuilder.setLabel("some label"); 53 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE); 54 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE); 55 | 56 | exampleBuilder.clear(); 57 | namespaceBuilder.clear(); 58 | 59 | namespaceBuilder.setName("some-namespace-name"); 60 | namespaceBuilder.setScalingFactor(1.0f); 61 | exampleBuilder.addNamespace(namespaceBuilder.build()); 62 | 63 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE); 64 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE); 65 | 66 | exampleBuilder.clear(); 67 | namespaceBuilder.clear(); 68 | 69 | namespaceBuilder.addFeature("someFeature", null); 70 | 71 | exampleBuilder.addNamespace(namespaceBuilder.build()); 72 | 73 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE); 74 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE); 75 | 76 | } 77 | 78 | /* 79 | * Tests that an empty example returns the empty string when it's supposed 80 | * to. 81 | */ 82 | @Test 83 | public void testEmptyExampleReturnsEmptyString() { 84 | Assert.assertEquals("", StructuredExample.EMPTY_EXAMPLE.getVWStringRepresentation()); 85 | } 86 | 87 | /* 88 | * Tests that a PIPE example returns the pipe character when it's supposed 89 | * to. 90 | */ 91 | @Test 92 | public void testPipeExampleReturnsPipeString() { 93 | Assert.assertEquals(" |", StructuredExample.PIPE_EXAMPLE.getVWStringRepresentation()); 94 | } 95 | 96 | /* 97 | * Simple test to verify that basic example building works as expected. 98 | */ 99 | @Test 100 | public void simpleExampleBuildingTest() { 101 | 102 | final String expectedOutput = "34 |one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2"; 103 | 104 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder(); 105 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder(); 106 | 107 | exampleBuilder.setLabel("34"); 108 | 109 | namespaceBuilder.setName("one"); 110 | namespaceBuilder.addFeature("a", 12.34f); 111 | namespaceBuilder.addFeature("b", 45.1f); 112 | 113 | StructuredExample.Namespace firstNamespace = namespaceBuilder.build(); 114 | 115 | namespaceBuilder.clear(); 116 | 117 | namespaceBuilder.setName("two"); 118 | namespaceBuilder.setScalingFactor(34.3f); 119 | namespaceBuilder.addFeature("bah", 0.038293f); 120 | namespaceBuilder.addFeature("another", 3.4000f); 121 | namespaceBuilder.addFeature("andThis", 2.0f); 122 | 123 | StructuredExample.Namespace secondNamespace = namespaceBuilder.build(); 124 | 125 | exampleBuilder.addNamespace(firstNamespace); 126 | exampleBuilder.addNamespace(secondNamespace); 127 | 128 | //System.out.println(exampleBuilder.build().getVWStringRepresentation()); 129 | 130 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation()); 131 | } 132 | 133 | /* 134 | * Like the above, but tests the Tag feature as well (since it was added 135 | * later). 136 | */ 137 | @Test 138 | public void simpleExampleBuildingTestWithTag() { 139 | 140 | String expectedOutput = "34 someTag|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2"; 141 | 142 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder(); 143 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder(); 144 | 145 | exampleBuilder.setLabel("34"); 146 | exampleBuilder.setTag("someTag"); 147 | 148 | namespaceBuilder.setName("one"); 149 | namespaceBuilder.addFeature("a", 12.34f); 150 | namespaceBuilder.addFeature("b", 45.1f); 151 | 152 | StructuredExample.Namespace firstNamespace = namespaceBuilder.build(); 153 | 154 | namespaceBuilder.clear(); 155 | 156 | namespaceBuilder.setName("two"); 157 | namespaceBuilder.setScalingFactor(34.3f); 158 | namespaceBuilder.addFeature("bah", 0.038293f); 159 | namespaceBuilder.addFeature("another", 3.4000f); 160 | namespaceBuilder.addFeature("andThis", 2.0f); 161 | 162 | StructuredExample.Namespace secondNamespace = namespaceBuilder.build(); 163 | 164 | exampleBuilder.addNamespace(firstNamespace); 165 | exampleBuilder.addNamespace(secondNamespace); 166 | 167 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation()); 168 | 169 | //----- 170 | exampleBuilder.setLabel(null); //clear out just the label, leaving everything else as is 171 | expectedOutput = "someTag|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2"; 172 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation()); 173 | //----- 174 | 175 | //----- 176 | exampleBuilder.setTag(null); //clear out the tag as well, leaving just the namespace bit 177 | expectedOutput = "|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2"; 178 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation()); 179 | //----- 180 | 181 | //----- 182 | exampleBuilder.setLabel("theLabel"); //set just the label 183 | expectedOutput = "theLabel |one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2"; 184 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation()); 185 | //----- 186 | } 187 | 188 | } 189 | -------------------------------------------------------------------------------- /vw-webservice-common/src/test/resources/ner.train.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eHarmony/vw-webservice/53b4cfa9d8872058a50fcaa9e11926a6bd73306c/vw-webservice-common/src/test/resources/ner.train.gz -------------------------------------------------------------------------------- /vw-webservice-common/src/test/resources/vw_example_schema.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-04/schema#", 3 | "title": "VW Example", 4 | "type": "array", 5 | "description": "A stream of examples sent to the VW web service, which will submit them to VW", 6 | "items": { 7 | "type" : "object", 8 | "description": "An individual example in the stream", 9 | "properties": { 10 | "label": { "type": "string", 11 | "description": "The label for the example. Can be null/empty, but can contain anything", 12 | "minLength": 0 13 | }, 14 | "tag": { "type": "string", 15 | "description": "The tag for the example. Can be null/empty, but can contain anything", 16 | "minLength": 0 17 | }, 18 | "namespaces": { "type": "array", 19 | "description": "The list of namespaces in the example", 20 | "items": { 21 | "type": "object", 22 | "description": "An individual namespace inside an example", 23 | "properties": { 24 | "name": { "type": "string", 25 | "description": "The name of the namespace. Can be null/empty, but must not contain whitespace, | or :", 26 | "minLength": 0 27 | }, 28 | "scale": { "type": "number", 29 | "description": "The scaling factor for the namespace, optional - defaults to 1 if not present - name must be present for this to be accepted" 30 | }, 31 | "features": { "type": "array", 32 | "description": "The list of features in this namespace", 33 | "items": { 34 | "type": "object", 35 | "description": "An individual feature inside a namespace", 36 | "properties": { 37 | "name": { "type": "string", 38 | "description": "The name of the feature. Must be present", 39 | "minLength": 1 40 | }, 41 | "value": { "type": "number", 42 | "description": "The value of the feature. Defaults to 1 if not present" 43 | } 44 | }, 45 | "additionalProperties": false, 46 | "required": ["name"] 47 | }, 48 | "minItems": 0, 49 | "uniqueItems": false, 50 | "additionalItems": false 51 | } 52 | }, 53 | "minProperties": 0, 54 | "additionalProperties": false 55 | 56 | } 57 | 58 | }, 59 | "minProperties": 0, 60 | "additionalProperties": false 61 | } 62 | }, 63 | "minItems": 0, 64 | "uniqueItems": false, 65 | "additionalItems": false 66 | 67 | } -------------------------------------------------------------------------------- /vw-webservice-core/README.md: -------------------------------------------------------------------------------- 1 | vw-webservice-core 2 | ================== 3 | 4 | The core components of the web service on the server side. This project doesn't refer to any specific web service framework such as Jersey or Restlet or whatever. It only has the code you would need if you were building a web service yourself using the framework of your choice. 5 | -------------------------------------------------------------------------------- /vw-webservice-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | 4 | 5 | com.eharmony.matching 6 | vw-webservice 7 | 0.1.0-SNAPSHOT 8 | ../../vw-webservice 9 | 10 | 11 | 12 | 13 | vw-webservice-core 14 | 15 | VW Web Service Core. 16 | The core components used when building a web service that wraps the VW daemon. Reference this project when you're building your own web service using the framework of your choice (eg, Jersey, Restlet, etc). 17 | 18 | 19 | 20 | 21 | 22 | 23 | com.eharmony.matching 24 | vw-webservice-common 25 | ${vw.webservice.version} 26 | 27 | 28 | 29 | 30 | org.slf4j 31 | slf4j-api 32 | 33 | 34 | 35 | ch.qos.logback 36 | logback-classic 37 | 38 | 39 | 40 | ch.qos.logback 41 | logback-core 42 | 43 | 44 | 45 | 46 | 47 | vw-webservice-core-${project.version} 48 | 49 | 50 | 51 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/ExampleReadException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * Exception thrown when there's a problem reading examples submitted to 10 | * the web service. 11 | * 12 | * Need to make this a RuntimeException since an ExampleReadException 13 | * can be thrown from within an iterator, but the iterator interface 14 | * doesn't allow you to declare a throws clause in the 'next' method 15 | * signature in the implementation. 16 | */ 17 | public class ExampleReadException extends RuntimeException { 18 | 19 | /** 20 | * The serial version UID. 21 | */ 22 | private static final long serialVersionUID = -1744390625692646099L; 23 | 24 | public ExampleReadException() { 25 | super(); 26 | } 27 | 28 | public ExampleReadException(String message) { 29 | super(message); 30 | } 31 | 32 | public ExampleReadException(Throwable cause) { 33 | super(cause); 34 | } 35 | 36 | public ExampleReadException(String message, Throwable cause) { 37 | super(message, cause); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/ExamplesIterable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core; 5 | 6 | import com.eharmony.matching.vw.webservice.common.example.Example; 7 | 8 | /** 9 | * @author vrahimtoola 10 | * 11 | * An iterable of examples to be submitted to VW. Also provides 12 | * attributes that describe the type/quantity of examples. 13 | * 14 | * The 'next()' method of the iterator may throw an ExampleReadException 15 | * if there's an issue reading in examples for any reason. Clients 16 | * iterating over this iterable can catch that exception and handle is 17 | * accordingly. 18 | */ 19 | public interface ExamplesIterable extends Iterable { 20 | 21 | /* 22 | * Returns the number of examples, or Integer.MAX_VALUE if it's a stream of 23 | * examples. This value can be used by components further down the pipeline 24 | * to determine whether or not example submission should occur 25 | * synchronously. 26 | * 27 | * @returns The number of examples (if known) or Integer.MAX_VALUE if 28 | * they're being streamed in and the number of examples isn't known ahead of 29 | * time. This number can never be < 0. 30 | */ 31 | int getNumberOfExamples(); 32 | 33 | /* 34 | * Returns attributes used to describe the set of examples. 35 | */ 36 | String getAttribute(String attributeKey); 37 | } 38 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/ExamplesIterableImpl.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.util.HashMap; 9 | import java.util.Iterator; 10 | import java.util.Map; 11 | 12 | import com.eharmony.matching.vw.webservice.common.example.Example; 13 | 14 | /** 15 | * @author vrahimtoola 16 | * 17 | * An implementation of ExamplesIterable. 18 | */ 19 | public class ExamplesIterableImpl implements ExamplesIterable { 20 | 21 | private final Map attributesMap; 22 | private final Iterator exampleIterator; 23 | private final int numberOfExamples; 24 | 25 | public ExamplesIterableImpl(int numberOfExamples, 26 | Map theMapOfAttributes, 27 | Iterator exampleIterator) { 28 | 29 | checkNotNull(exampleIterator); 30 | 31 | this.numberOfExamples = numberOfExamples; 32 | 33 | if (theMapOfAttributes == null) 34 | attributesMap = new HashMap(); 35 | else { 36 | attributesMap = theMapOfAttributes; 37 | } 38 | 39 | this.exampleIterator = exampleIterator; 40 | } 41 | 42 | public Iterator iterator() { 43 | return exampleIterator; 44 | } 45 | 46 | public String getAttribute(String attributeKey) { 47 | return attributesMap.get(attributeKey); 48 | } 49 | 50 | public int getNumberOfExamples() { 51 | return this.numberOfExamples; 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessingEventHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException; 7 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 8 | 9 | /** 10 | * @author vrahimtoola 11 | * 12 | * Callbacks to be fired when the status of example processing 13 | * (submission/prediction fetching) changes. 14 | */ 15 | public interface ExampleProcessingEventHandler { 16 | 17 | /* 18 | * Fired whenever there's an exception reading examples. 19 | * 20 | * @param exampleProcessingManager The example processing manager that can 21 | * be queried to find out more info about the example processing. 22 | * 23 | * @param theException The exception that occurred. 24 | */ 25 | void onExampleReadException(ExampleProcessingManager exampleProcessingManager, ExampleReadException theException); 26 | 27 | /* 28 | * Fired whenever an invalid example is detected. 29 | * 30 | * @param exampleProcessingManager The example processing manager that can 31 | * be queried to find out more info about the example processing. 32 | * 33 | * @param theException The exception that occurred. 34 | */ 35 | void onExampleFormatException(ExampleProcessingManager exampleProcessingManager, ExampleFormatException theException); 36 | 37 | /* 38 | * Fired whenever there's an exception submitting examples. 39 | * 40 | * @param exampleProcessingManager The example processing manager that can 41 | * be queried to find out more info about the example processing. 42 | * 43 | * @param theException The exception that occurred. 44 | */ 45 | void onExampleSubmissionException(ExampleProcessingManager exampleProcessingManager, ExampleSubmissionException theException); 46 | 47 | /* 48 | * Fired when all examples have been submitted. 49 | * 50 | * @param exampleProcessingManager The example processing manager that can 51 | * be queried to find out more info about the example processing. 52 | */ 53 | void onExampleSubmissionComplete(ExampleProcessingManager exampleProcessingManager); 54 | 55 | /* 56 | * Fired whenever there's an exception fetching predictions. 57 | * 58 | * @param exampleProcessingManager The example processing manager that can 59 | * be queried to find out more info about the example processing. 60 | * 61 | * @param theException The exception that occurred. 62 | */ 63 | void onPredictionFetchException(ExampleProcessingManager exampleProcessingManager, PredictionFetchException theException); 64 | 65 | /* 66 | * Fired when all predictions have been fetched. 67 | * 68 | * @param exampleProcessingManager The example processing manager that can 69 | * be queried to find out more info about the example processing. 70 | */ 71 | void onPredictionFetchComplete(ExampleProcessingManager exampleProcessingManager); 72 | } 73 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessingManager.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction; 7 | 8 | /** 9 | * @author vrahimtoola 10 | * 11 | * The example processing manager can be used to stop the example 12 | * submission process and retrieve a forward-only iterable of 13 | * predictions. 14 | */ 15 | public interface ExampleProcessingManager { 16 | 17 | /* 18 | * Returns the iterable of predictions. 19 | * 20 | * @returns The iterable of predictions. 21 | */ 22 | Iterable getPredictionsIterable(); 23 | 24 | /* 25 | * Stops the example submission process, if it's still ongoing. If it has 26 | * already been stopped, has no effect. Prediction fetching will continue 27 | * until there are no more predictions to be fetched from VW, ie, the 28 | * iterable returned from 'getPredictionsIterable()' returns no more 29 | * predictions. 30 | */ 31 | void stopAll(); 32 | 33 | /* 34 | * Gets the total number of examples submitted thus far. 35 | * 36 | * @returns The total number of examples submitted thus far. 37 | */ 38 | long getTotalNumberOfExamplesSubmitted(); 39 | 40 | /* 41 | * Gets the total number of examples skipped thus far. An example can be 42 | * skipped if it's format is invalid, for instance. 43 | * 44 | * @returns The total number of skipped examples. 45 | */ 46 | long getTotalNumberOfExamplesSkipped(); 47 | 48 | /* 49 | * Gets the total number of predictions fetched from VW. 50 | * 51 | * @returns The total number of predictions fetched from VW. 52 | */ 53 | long getTotalNumberOfPredictionsFetched(); 54 | 55 | /* 56 | * Gets the current state of example submission. 57 | * 58 | * @returns The current example submission state. 59 | */ 60 | ExampleSubmissionState getExampleSubmissionState(); 61 | 62 | /* 63 | * Gets the current state of prediction fetching. 64 | * 65 | * @returns The current prediction fetching state. 66 | */ 67 | PredictionFetchState getPredictionFetchState(); 68 | } 69 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * Submits examples to VW. 10 | */ 11 | public interface ExampleProcessor { 12 | 13 | /* 14 | * Kicks off the example submission process. 15 | * 16 | * @param callback A callback handler to handle various status changes as 17 | * examples are being processed. 18 | * 19 | * @returns An example processing manager that can be used to stop the 20 | * example submission process. 21 | * 22 | * The manner in which examples are submitted to VW will determine the 23 | * manner in which the predictions get fetched. i.e, if you submit examples 24 | * over TCP-IP, you get the predictions back over the same socket 25 | * connection, etc etc. 26 | */ 27 | ExampleProcessingManager submitExamples(ExampleProcessingEventHandler callback) throws ExampleSubmissionException; 28 | 29 | /* 30 | * Returns features describing this example processor. The returned object 31 | * should never be null. 32 | * 33 | * @returns The example processor features provided by the example 34 | * processor. 35 | */ 36 | ExampleProcessorFeatures getExampleProcessorFeatures(); 37 | 38 | } 39 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessorFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 7 | 8 | /** 9 | * @author vrahimtoola 10 | * 11 | * Returns an ExampleProcessor. 12 | */ 13 | public interface ExampleProcessorFactory { 14 | 15 | /* 16 | * Gets the example processor to use. 17 | * 18 | * @param theExamples The VW examples to be submitted. 19 | * 20 | * @returns The example processor. 21 | */ 22 | ExampleProcessor getExampleProcessor(ExamplesIterable theExamples); 23 | } 24 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessorFeatures.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | import java.util.Map; 7 | 8 | /** 9 | * @author vrahimtoola 10 | * 11 | * Features and other stuff describing the example processor. Feature 12 | * keys cannot be null, but feature values may be null. 13 | */ 14 | public interface ExampleProcessorFeatures { 15 | 16 | /* 17 | * Whether or not the example processor's submitExamples() method will 18 | * execute synchronously. 19 | * 20 | * @returns True if the example processor submits examples asynchronously, 21 | * false otherwise. 22 | */ 23 | boolean isAsync(); 24 | 25 | /* 26 | * Returns all the features applicable to this example processor. 27 | * 28 | * @returns All the features that this processor provides. None of the keys 29 | * can be null, but values may be. The types of the values should be 30 | * documented by the example processor. The returned map should never be 31 | * null, but can be empty. 32 | */ 33 | Map getAllFeatures(); 34 | 35 | } 36 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessorFeaturesImpl.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | import java.util.HashMap; 7 | import java.util.Map; 8 | 9 | /** 10 | * @author vrahimtoola 11 | * 12 | * A basic implementation of ExampleProcessorFeatures. 13 | */ 14 | public class ExampleProcessorFeaturesImpl implements ExampleProcessorFeatures { 15 | 16 | private final Map featuresMap; 17 | private final boolean isAsync; 18 | 19 | public ExampleProcessorFeaturesImpl(boolean isAsync, 20 | Map featuresMap) { 21 | 22 | this.isAsync = isAsync; 23 | this.featuresMap = getShallowCopyOfMap(featuresMap); 24 | } 25 | 26 | /* (non-Javadoc) 27 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFeatures#getAllFeatures() 28 | */ 29 | public Map getAllFeatures() { 30 | return getShallowCopyOfMap(featuresMap); 31 | } 32 | 33 | 34 | private Map getShallowCopyOfMap( 35 | Map mapToCopy) { 36 | Map copy = new HashMap(); 37 | 38 | if (mapToCopy != null) 39 | for (Map.Entry entry : mapToCopy.entrySet()) { 40 | if (entry.getKey() != null) // skip over null keys. 41 | copy.put(entry.getKey(), entry.getValue()); 42 | } 43 | 44 | return copy; 45 | } 46 | 47 | public boolean isAsync() { 48 | return isAsync; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleSubmissionException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * Exception thrown when something bad happens while submitting examples 10 | * to VW, or before any examples have been submitted to VW. 11 | */ 12 | public class ExampleSubmissionException extends Exception { 13 | 14 | /** 15 | * The serial version UID. 16 | */ 17 | private static final long serialVersionUID = 5135330791227994409L; 18 | 19 | public ExampleSubmissionException() { 20 | super(); 21 | } 22 | 23 | public ExampleSubmissionException(String message) { 24 | super(message); 25 | } 26 | 27 | public ExampleSubmissionException(Throwable cause) { 28 | super(cause); 29 | } 30 | 31 | public ExampleSubmissionException(String message, Throwable cause) { 32 | super(message, cause); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleSubmissionState.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * The various states in which the example submission process can be. 10 | * This is the most recent state of the example submission process, so 11 | * it's possible for instance that there was an exampleformatexception, 12 | * and then an examplesubmissionexception but the client only gets to 13 | * observe the most recent state which is the example submission fault. 14 | * This depends on the implementation of the example submitter, it can 15 | * choose to stop submitting examples on an exampleformatexception or 16 | * not. 17 | */ 18 | public enum ExampleSubmissionState { 19 | 20 | /* 21 | * All examples have been submitted, with no exceptions. 22 | */ 23 | Complete, 24 | 25 | /* 26 | * Some exception occurred making it impossible to read more examples. No 27 | * more examples will be submitted. 28 | */ 29 | ExampleReadFault, 30 | 31 | /* 32 | * Some exception occurred making it impossible to submit more examples to 33 | * VW. No more examples will be submitted. 34 | */ 35 | ExampleSubmissionFault, 36 | 37 | /* 38 | * One or more examples were invalid. No more examples will be submitted. 39 | */ 40 | ExampleFormatFault, 41 | 42 | /* 43 | * Examples are currently being submitted. 44 | */ 45 | OnGoing, 46 | 47 | /* 48 | * Example submission was stopped for some reason. 49 | */ 50 | Stopped 51 | 52 | } 53 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/PredictionFetchException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * Exception thrown when something bad happens when reading predictions 10 | * from VW. 11 | */ 12 | public class PredictionFetchException extends Exception { 13 | 14 | /** 15 | * The serial version UID. 16 | */ 17 | private static final long serialVersionUID = -5193371328499134437L; 18 | 19 | public PredictionFetchException() { 20 | super(); 21 | } 22 | 23 | public PredictionFetchException(String message) { 24 | super(message); 25 | } 26 | 27 | public PredictionFetchException(Throwable cause) { 28 | super(cause); 29 | } 30 | 31 | public PredictionFetchException(String message, Throwable cause) { 32 | super(message, cause); 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/PredictionFetchState.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * 9 | * The states in which the prediction fetch process can be. 10 | */ 11 | public enum PredictionFetchState { 12 | 13 | /* 14 | * All predictions fetched with no exceptions. 15 | */ 16 | Complete, 17 | 18 | /* 19 | * Some exception occurred when reading predictions from VW. No more 20 | * predictions will be read from VW. 21 | */ 22 | PredictionFetchFault, 23 | 24 | /* 25 | * An exception occurred when sending predictions back to the client. No 26 | * more predictions will be sent. Typically example submission will also be 27 | * stopped. 28 | */ 29 | PredictionWriteFault, 30 | 31 | /* 32 | * Predictions are currently being fetched. 33 | */ 34 | OnGoing 35 | } 36 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/AsyncFailFastTCPIPExampleProcessor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import java.io.BufferedWriter; 7 | import java.io.IOException; 8 | import java.io.OutputStream; 9 | import java.io.OutputStreamWriter; 10 | import java.net.Socket; 11 | import java.util.concurrent.Callable; 12 | import java.util.concurrent.ExecutorService; 13 | 14 | import org.slf4j.LoggerFactory; 15 | 16 | import com.eharmony.matching.vw.webservice.common.example.Example; 17 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException; 18 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 19 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler; 20 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager; 21 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessor; 22 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFeatures; 23 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFeaturesImpl; 24 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionException; 25 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionState; 26 | import com.google.common.base.Charsets; 27 | 28 | /** 29 | * @author vrahimtoola 30 | * 31 | * An asynchronous, fail fast example processor to submit examples to VW 32 | * over a TCP IP socket. 33 | * 34 | * Making this package-private for now. 35 | */ 36 | class AsyncFailFastTCPIPExampleProcessor implements ExampleProcessor { 37 | 38 | private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(AsyncFailFastTCPIPExampleProcessor.class); 39 | 40 | private final ExecutorService executorService; 41 | private final TCPIPSocketFactory socketFactory; 42 | private final Iterable examples; 43 | 44 | public AsyncFailFastTCPIPExampleProcessor(TCPIPSocketFactory socketFactory, ExecutorService executorService, Iterable examples) { 45 | 46 | this.executorService = executorService; 47 | this.socketFactory = socketFactory; 48 | this.examples = examples; 49 | } 50 | 51 | public ExampleProcessingManager submitExamples(final ExampleProcessingEventHandler callback) throws ExampleSubmissionException { 52 | 53 | try { 54 | final Socket socket = socketFactory.getSocket(); 55 | 56 | final TCPIPExampleProcessingManager exampleProcessingManager = new TCPIPExampleProcessingManager(socket, callback); 57 | 58 | executorService.submit(new Callable() { 59 | 60 | public Void call() { 61 | 62 | OutputStream outputStream; 63 | 64 | boolean faulted = false; 65 | 66 | boolean stoppedPrematurely = false; 67 | 68 | BufferedWriter writer = null; 69 | 70 | long numExamplesSent = 0; 71 | 72 | try { 73 | 74 | outputStream = socket.getOutputStream(); 75 | 76 | LOGGER.info("Starting to submit examples to VW..."); 77 | 78 | writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8)); 79 | 80 | for (Example example : examples) { 81 | 82 | String toWrite = null; 83 | 84 | try { 85 | toWrite = example.getVWStringRepresentation(); 86 | writer.write(toWrite); 87 | writer.newLine(); 88 | 89 | numExamplesSent++; 90 | 91 | if (numExamplesSent == 1) LOGGER.debug("First example: {}", toWrite); 92 | 93 | exampleProcessingManager.incrementNumberOfExamplesSubmitted(); 94 | 95 | LOGGER.trace("Submitted example #{}: {}", numExamplesSent, toWrite); 96 | } 97 | catch (ExampleFormatException e) { 98 | 99 | exampleProcessingManager.incrementNumberOfExamplesSkipped(); 100 | if (callback != null) callback.onExampleFormatException(exampleProcessingManager, e); 101 | 102 | } 103 | 104 | if (exampleProcessingManager.isStopped()) { 105 | LOGGER.warn("Example submission process was stopped for some reason!"); 106 | stoppedPrematurely = true; 107 | break; 108 | } 109 | } 110 | 111 | if (!stoppedPrematurely) LOGGER.info("All examples submitted to VW!"); 112 | 113 | LOGGER.info("Sent a total of {} examples to VW", numExamplesSent); 114 | 115 | } 116 | catch (ExampleReadException e) { 117 | 118 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleReadFault); 119 | 120 | if (callback != null) callback.onExampleReadException(exampleProcessingManager, e); 121 | 122 | LOGGER.error("ExampleReadException in ExampleSubmitter: {}", e.getMessage(), e); 123 | 124 | faulted = true; 125 | } 126 | catch (Exception e) { 127 | 128 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleSubmissionFault); 129 | 130 | if (callback != null) callback.onExampleSubmissionException(exampleProcessingManager, new ExampleSubmissionException(e)); 131 | 132 | LOGGER.error("Other Exception in ExampleSubmitter: {}", e.getMessage(), e); 133 | 134 | faulted = true; 135 | } 136 | finally { 137 | 138 | if (writer != null) try { 139 | writer.flush(); //make sure that anything buffered by the bufferedwriter is flushed to the underlying stream 140 | } 141 | catch (IOException e) { 142 | 143 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleSubmissionFault); 144 | 145 | if (callback != null) callback.onExampleSubmissionException(exampleProcessingManager, new ExampleSubmissionException(e)); 146 | 147 | LOGGER.error("IOException when closing example writer in ExampleProcessor: {}", e.getMessage(), e); 148 | 149 | faulted = true; 150 | } 151 | 152 | if (socket != null) try { 153 | 154 | socket.shutdownOutput(); 155 | } 156 | catch (IOException e2) { 157 | 158 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleSubmissionFault); 159 | 160 | if (callback != null) callback.onExampleSubmissionException(exampleProcessingManager, new ExampleSubmissionException(e2)); 161 | 162 | LOGGER.error("IOException when shutting down socket output in ExampleProcessor: {}", e2.getMessage(), e2); 163 | 164 | faulted = true; 165 | } 166 | 167 | if (faulted == false) { 168 | if (stoppedPrematurely == false) 169 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.Complete); 170 | else { 171 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.Stopped); 172 | } 173 | } 174 | 175 | if (callback != null) callback.onExampleSubmissionComplete(exampleProcessingManager); 176 | 177 | } 178 | 179 | return null; 180 | } 181 | 182 | }); 183 | 184 | return exampleProcessingManager; 185 | } 186 | catch (Exception e1) { 187 | 188 | LOGGER.error("Exception in submitExamples(): {}", e1.getMessage()); 189 | 190 | throw new ExampleSubmissionException(e1); 191 | } 192 | 193 | } 194 | 195 | public ExampleProcessorFeatures getExampleProcessorFeatures() { 196 | 197 | return new ExampleProcessorFeaturesImpl(true, null); 198 | } 199 | 200 | } 201 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPExampleProcessingManager.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import java.io.IOException; 7 | import java.net.Socket; 8 | import java.util.Iterator; 9 | 10 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction; 11 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler; 12 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager; 13 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionState; 14 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchState; 15 | 16 | /** 17 | * @author vrahimtoola An implementation of ExampleProcessingManager for use by 18 | * the AsyncFailFastTCPIPExampleProcessor. 19 | */ 20 | class TCPIPExampleProcessingManager implements ExampleProcessingManager { 21 | 22 | private long numExamplesSubmitted, numExamplesSkipped, numPredictionsFetched; 23 | private ExampleSubmissionState exampleSubmissionState = ExampleSubmissionState.OnGoing; 24 | private final TCPIPPredictionsIterator predictionsIterator; 25 | 26 | private boolean isStopped = false; 27 | 28 | public TCPIPExampleProcessingManager(Socket socket, ExampleProcessingEventHandler callback) throws IOException { 29 | this.predictionsIterator = new TCPIPPredictionsIterator(socket, callback, this); 30 | } 31 | 32 | /* 33 | * (non-Javadoc) 34 | * 35 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 36 | * ExampleProcessingManager#getPredictionsIterable() 37 | */ 38 | public Iterable getPredictionsIterable() { 39 | 40 | return new Iterable() { 41 | 42 | public Iterator iterator() { 43 | return predictionsIterator; 44 | } 45 | }; 46 | } 47 | 48 | /* 49 | * (non-Javadoc) 50 | * 51 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 52 | * ExampleProcessingManager#stopAll() 53 | */ 54 | public synchronized void stopAll() { 55 | 56 | isStopped = true; 57 | } 58 | 59 | /* 60 | * (non-Javadoc) 61 | * 62 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 63 | * ExampleProcessingManager#getTotalNumberOfExamplesSubmitted() 64 | */ 65 | public synchronized long getTotalNumberOfExamplesSubmitted() { 66 | return numExamplesSubmitted; 67 | } 68 | 69 | /* 70 | * (non-Javadoc) 71 | * 72 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 73 | * ExampleProcessingManager#getTotalNumberOfExamplesSkipped() 74 | */ 75 | public synchronized long getTotalNumberOfExamplesSkipped() { 76 | return numExamplesSkipped; 77 | } 78 | 79 | /* 80 | * (non-Javadoc) 81 | * 82 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 83 | * ExampleProcessingManager#getExampleSubmissionState() 84 | */ 85 | public synchronized ExampleSubmissionState getExampleSubmissionState() { 86 | return exampleSubmissionState; 87 | } 88 | 89 | /* 90 | * (non-Javadoc) 91 | * 92 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 93 | * ExampleProcessingManager#getPredictionFetchState() 94 | */ 95 | public PredictionFetchState getPredictionFetchState() { 96 | 97 | return predictionsIterator.getPredictionFetchState(); 98 | } 99 | 100 | public synchronized void incrementNumberOfExamplesSubmitted() { 101 | numExamplesSubmitted++; 102 | } 103 | 104 | public synchronized void incrementNumberOfExamplesSkipped() { 105 | numExamplesSkipped++; 106 | } 107 | 108 | public synchronized void incrementNumberOfPredictionsFetched() { 109 | numPredictionsFetched++; 110 | } 111 | 112 | public synchronized void setExampleSubmissionState(ExampleSubmissionState newState) { 113 | exampleSubmissionState = newState; 114 | } 115 | 116 | public synchronized boolean isStopped() { 117 | return isStopped; 118 | } 119 | 120 | public synchronized long getTotalNumberOfPredictionsFetched() { 121 | return numPredictionsFetched; 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPExampleProcessorFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.util.concurrent.ExecutorService; 9 | 10 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 11 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessor; 12 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFactory; 13 | 14 | /** 15 | * @author vrahimtoola 16 | * 17 | * A factory that returns example processors that submit examples to VW 18 | * over a TCP-IP socket and read predictions back the same way. 19 | */ 20 | public class TCPIPExampleProcessorFactory implements ExampleProcessorFactory { 21 | 22 | private final TCPIPSocketFactory socketFactory; 23 | 24 | /* 25 | * An application wide thread pool service. 26 | */ 27 | private final ExecutorService executorService; 28 | 29 | public TCPIPExampleProcessorFactory(TCPIPSocketFactory socketFactory, 30 | ExecutorService executorService) { 31 | 32 | checkNotNull(socketFactory, "A null socket factory cannot be provided!"); 33 | checkNotNull(executorService, "A null executor service cannot be provided!"); 34 | 35 | this.socketFactory = socketFactory; 36 | this.executorService = executorService; 37 | } 38 | 39 | /* 40 | * (non-Javadoc) 41 | * 42 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor. 43 | * ExampleProcessorFactory#getExampleSubmitter(java.lang.Iterable, 44 | * java.util.EnumSet) 45 | */ 46 | public ExampleProcessor getExampleProcessor(ExamplesIterable theExamples) { 47 | 48 | // TODO: return a proper example submitter based on the provided 49 | // examples iterable by examining its attributes. 50 | 51 | // returning the TCP IP async submitter for now. 52 | return new AsyncFailFastTCPIPExampleProcessor(socketFactory, executorService, theExamples); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPPredictionsIterator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | import java.net.Socket; 10 | import java.util.Iterator; 11 | 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction; 16 | import com.eharmony.matching.vw.webservice.common.prediction.StringPrediction; 17 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler; 18 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchException; 19 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchState; 20 | 21 | /** 22 | * @author vrahimtoola 23 | * 24 | * Reads predictions from VW over a TCP-IP socket. 25 | * 26 | * TODO: make it so that the example submitter doesn't start submitting 27 | * examples until someone actually starts to read predictions. TODO: 28 | * test what happens if the prediction fetcher closes the socket before 29 | * all examples have been submitted. 30 | */ 31 | class TCPIPPredictionsIterator implements Iterator { 32 | 33 | private static final Logger LOGGER = LoggerFactory.getLogger(TCPIPPredictionsIterator.class); 34 | 35 | private final Socket socket; 36 | private final BufferedReader reader; 37 | private final ExampleProcessingEventHandler callback; 38 | private final TCPIPExampleProcessingManager exampleProcessingManager; 39 | 40 | private String nextLineToReturn = null; 41 | private PredictionFetchState predictionFetchState = PredictionFetchState.OnGoing; 42 | 43 | private boolean firstCallToHasNext = true; 44 | 45 | public TCPIPPredictionsIterator(Socket socket, ExampleProcessingEventHandler callback, TCPIPExampleProcessingManager exampleProcessingManager) throws IOException { 46 | 47 | this.reader = new BufferedReader(new InputStreamReader(socket.getInputStream())); 48 | this.callback = callback; 49 | this.socket = socket; 50 | this.exampleProcessingManager = exampleProcessingManager; 51 | } 52 | 53 | public boolean hasNext() { 54 | 55 | if (firstCallToHasNext) { 56 | 57 | LOGGER.debug("First call to advance in TCP IP iterator!"); 58 | 59 | advance(); // don't want to call this in the constructor because 60 | // that could block. 61 | 62 | firstCallToHasNext = false; 63 | 64 | } 65 | 66 | return nextLineToReturn != null; 67 | } 68 | 69 | public Prediction next() { 70 | String toReturn = nextLineToReturn; 71 | 72 | advance(); 73 | 74 | return new StringPrediction(toReturn); 75 | } 76 | 77 | public void remove() { 78 | throw new UnsupportedOperationException("The 'remove' operation is not supported!"); 79 | } 80 | 81 | private void advance() { 82 | 83 | boolean closeReader = false; 84 | boolean faulted = false; 85 | try { 86 | 87 | nextLineToReturn = reader.readLine(); 88 | 89 | LOGGER.trace("Read prediction: {}", nextLineToReturn); 90 | 91 | closeReader = nextLineToReturn == null; 92 | 93 | if (nextLineToReturn != null) exampleProcessingManager.incrementNumberOfPredictionsFetched(); 94 | 95 | } 96 | catch (Exception e) { 97 | 98 | LOGGER.error("Error in TCPIPPredictionIterator: {}", e.getMessage(), e); 99 | 100 | faulted = true; 101 | 102 | closeReader = true; 103 | 104 | setPredictionFetchState(PredictionFetchState.PredictionFetchFault); 105 | 106 | if (callback != null) callback.onPredictionFetchException(exampleProcessingManager, new PredictionFetchException(e)); 107 | 108 | } 109 | finally { 110 | 111 | if (closeReader) { 112 | try { 113 | if (socket.isClosed() == false) reader.close(); 114 | } 115 | catch (Exception e2) { 116 | LOGGER.warn("Failed to close the reader in predictions iterator: {}", e2.getMessage(), e2); 117 | } 118 | 119 | if (socket.isClosed() == false) try { 120 | socket.close(); 121 | } 122 | catch (Exception e2) { 123 | LOGGER.warn("Failed to close the socket in predictions iterator: {}", e2.getMessage(), e2); 124 | } 125 | 126 | nextLineToReturn = null; // need to set this explicitly, since 127 | // an exception may have 128 | // occurred 129 | // necessitating the closing of the 130 | // reader. 131 | 132 | if (!faulted) 133 | setPredictionFetchState(PredictionFetchState.Complete); 134 | else { 135 | //faulted, so halt the example submission process 136 | LOGGER.warn("Stopping example submission from within the TCP IP predictions iterator..."); 137 | exampleProcessingManager.stopAll(); 138 | 139 | //if faulted, the prediction fetch state will already have been set in the exception handling code. 140 | } 141 | 142 | if (callback != null) callback.onPredictionFetchComplete(exampleProcessingManager); 143 | } 144 | } 145 | } 146 | 147 | private synchronized void setPredictionFetchState(PredictionFetchState predictionFetchState) { 148 | this.predictionFetchState = predictionFetchState; 149 | } 150 | 151 | public synchronized PredictionFetchState getPredictionFetchState() { 152 | return predictionFetchState; 153 | } 154 | 155 | } 156 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPSocketFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import java.io.IOException; 7 | import java.net.Socket; 8 | import java.net.UnknownHostException; 9 | 10 | /** 11 | * @author vrahimtoola 12 | * 13 | * Returns a TCP IP socket that can be used for communicating with a VW 14 | * daemon. This abstraction has been added to facilitate testing of the 15 | * TCP IP example submitters and prediction fetchers. 16 | */ 17 | public interface TCPIPSocketFactory { 18 | 19 | /* 20 | * Returns a socket connection to a running VW daemon. 21 | * 22 | * @returns A TCP IP socket that can be used for communicating with a VW 23 | * daemon. Note that the caller owns this socket and is responsible for any 24 | * cleanup (ie, shutting it down when done). 25 | */ 26 | Socket getSocket() throws UnknownHostException, IOException; 27 | } 28 | -------------------------------------------------------------------------------- /vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPSocketFactoryImpl.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import static com.google.common.base.Preconditions.checkArgument; 7 | 8 | import java.io.IOException; 9 | import java.net.Socket; 10 | import java.net.UnknownHostException; 11 | 12 | import org.apache.commons.lang3.StringUtils; 13 | import org.slf4j.Logger; 14 | import org.slf4j.LoggerFactory; 15 | 16 | /** 17 | * @author vrahimtoola 18 | * 19 | * A basic implementation of the TCPIPSocketFactory interface. 20 | */ 21 | public class TCPIPSocketFactoryImpl implements TCPIPSocketFactory { 22 | 23 | private static final Logger LOGGER = LoggerFactory.getLogger(TCPIPSocketFactoryImpl.class); 24 | 25 | private final String vwHost; 26 | private final int vwPort; 27 | 28 | public TCPIPSocketFactoryImpl(String vwHost, int vwPort) { 29 | 30 | checkArgument(StringUtils.isBlank(vwHost) == false, "The hostname for VW must be provided!"); 31 | checkArgument(vwPort > 0, "Invalid port specified for VW!"); 32 | 33 | this.vwHost = vwHost; 34 | this.vwPort = vwPort; 35 | } 36 | 37 | public Socket getSocket() throws UnknownHostException, IOException { 38 | 39 | LOGGER.debug("Returning socket for host: {} and port: {}", vwHost, vwPort); 40 | 41 | return new Socket(vwHost, vwPort); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /vw-webservice-jersey/README.md: -------------------------------------------------------------------------------- 1 | vw-webservice-jersey 2 | ================== 3 | 4 | An implementation of the VW web service that uses Jersey to create a RESTful web service. 5 | -------------------------------------------------------------------------------- /vw-webservice-jersey/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | 6 | 7 | com.eharmony.matching 8 | vw-webservice 9 | 0.1.0-SNAPSHOT 10 | ../../vw-webservice 11 | 12 | 13 | vw-webservice-jersey 14 | war 15 | VW Web Service (Jersey). 16 | Jersey based web service. 17 | 18 | 19 | 2.5.1 20 | 21 | 22 | 23 | 29 | 30 | 31 | vw-webservice-jersey-${project.version} 32 | 33 | 34 | 35 | org.apache.maven.plugins 36 | maven-compiler-plugin 37 | 38 | 39 | 40 | org.apache.maven.plugins 41 | maven-surefire-plugin 42 | 43 | 44 | 45 | org.apache.maven.plugins 46 | maven-war-plugin 47 | 2.1.1 48 | 49 | src/main/webapp/WEB-INF/web.xml 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | org.glassfish.jersey 60 | jersey-bom 61 | ${jersey.version} 62 | pom 63 | import 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | com.eharmony.matching 72 | vw-webservice-core 73 | ${project.version} 74 | 75 | 76 | 77 | com.eharmony.matching 78 | vw-webservice-common 79 | ${project.version} 80 | 81 | 82 | 83 | org.glassfish.jersey.containers 84 | 86 | 87 | jersey-container-servlet 88 | 89 | 90 | 94 | 95 | 96 | 97 | junit 98 | junit 99 | 100 | 101 | 102 | org.mockito 103 | mockito-all 104 | 105 | 106 | 107 | 108 | 109 | 114 | 115 | 116 | 117 | 118 | org.apache.commons 119 | commons-lang3 120 | 121 | 122 | 123 | 124 | org.springframework 125 | spring-core 126 | 127 | 128 | org.springframework 129 | spring-beans 130 | 131 | 132 | org.springframework 133 | spring-context 134 | 135 | 136 | org.springframework 137 | spring-context-support 138 | 139 | 140 | org.springframework 141 | spring-expression 142 | 143 | 144 | 145 | 146 | org.glassfish.jersey.ext 147 | jersey-spring3 148 | ${jersey.version} 149 | 150 | 151 | 152 | 153 | org.slf4j 154 | slf4j-api 155 | 156 | 157 | 158 | ch.qos.logback 159 | logback-classic 160 | 161 | 162 | 163 | ch.qos.logback 164 | logback-core 165 | 166 | 167 | 168 | 169 | com.google.guava 170 | guava 171 | 172 | 173 | 174 | 175 | com.google.code.gson 176 | gson 177 | 178 | 179 | 180 | 181 | com.fasterxml.jackson.core 182 | jackson-core 183 | 184 | 185 | 186 | 187 | commons-collections 188 | commons-collections 189 | 190 | 191 | 192 | 193 | com.ning 194 | async-http-client 195 | 1.7.22 196 | test 197 | 198 | 199 | 200 | 201 | 202 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/PredictResource.java: -------------------------------------------------------------------------------- 1 | package com.eharmony.matching.vw.webservice; 2 | 3 | import static com.google.common.base.Preconditions.checkNotNull; 4 | 5 | import java.io.IOException; 6 | import java.util.concurrent.ExecutorService; 7 | 8 | import javax.ws.rs.Consumes; 9 | import javax.ws.rs.GET; 10 | import javax.ws.rs.POST; 11 | import javax.ws.rs.Path; 12 | import javax.ws.rs.Produces; 13 | import javax.ws.rs.core.MediaType; 14 | 15 | import org.glassfish.jersey.server.ChunkedOutput; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | import org.springframework.beans.factory.annotation.Autowired; 19 | 20 | import com.eharmony.matching.vw.webservice.common.example.Example; 21 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 22 | import com.eharmony.matching.vw.webservice.common.prediction.PredictionMediaTypes; 23 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 24 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFactory; 25 | 26 | /** 27 | * Root resource (exposed at "predict" path) 28 | */ 29 | @Path("/predict") 30 | public class PredictResource { 31 | 32 | private final ExampleProcessorFactory exampleProcessorFactory; 33 | 34 | private final ExecutorService executorService; 35 | 36 | private static final Logger LOGGER = LoggerFactory.getLogger(PredictResource.class); 37 | 38 | @Autowired 39 | public PredictResource(ExecutorService executorService, ExampleProcessorFactory exampleProcessorFactory) { 40 | 41 | checkNotNull(exampleProcessorFactory, "An example processor factory must be provided!"); 42 | 43 | this.exampleProcessorFactory = exampleProcessorFactory; 44 | 45 | this.executorService = executorService; 46 | 47 | } 48 | 49 | @POST 50 | @Consumes({ ExampleMediaTypes.PLAINTEXT_0_1_0, MediaType.TEXT_PLAIN, ExampleMediaTypes.SIMPLE_PROTOBUF_0_1_0, ExampleMediaTypes.SIMPLE_JSON_0_1_0, ExampleMediaTypes.STRUCTURED_JSON_0_1_0 }) 51 | @Produces({ PredictionMediaTypes.PLAINTEXT_0_1_0 }) 52 | @Path("/main") 53 | public ChunkedOutput doPredict(ExamplesIterable examplesIterable) throws IOException { 54 | 55 | return new RequestHandler(executorService, exampleProcessorFactory).handleRequest(examplesIterable); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/RequestHandler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice; 5 | 6 | import java.io.IOException; 7 | import java.util.concurrent.ExecutorService; 8 | 9 | import javax.ws.rs.WebApplicationException; 10 | 11 | import org.glassfish.jersey.server.ChunkedOutput; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException; 16 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction; 17 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 18 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 19 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler; 20 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager; 21 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessor; 22 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFactory; 23 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionException; 24 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchException; 25 | 26 | /** 27 | * @author vrahimtoola 28 | * 29 | * Handles an individual request to submit examples to VW and read back 30 | * the predictions. 31 | */ 32 | class RequestHandler implements ExampleProcessingEventHandler { 33 | 34 | private final ExampleProcessorFactory exampleProcessorFactory; 35 | 36 | private final Logger LOGGER = LoggerFactory.getLogger(RequestHandler.class); 37 | 38 | private final ExecutorService executorService; 39 | 40 | public RequestHandler(ExecutorService executorService, ExampleProcessorFactory exampleProcessorFactory) { 41 | 42 | this.exampleProcessorFactory = exampleProcessorFactory; 43 | this.executorService = executorService; 44 | } 45 | 46 | public ChunkedOutput handleRequest(ExamplesIterable examplesIterable) { 47 | 48 | ChunkedOutput chunkedOutput = new ChunkedOutput(String.class); 49 | 50 | // get the example processor. 51 | ExampleProcessor exampleProcessor = exampleProcessorFactory.getExampleProcessor(examplesIterable); 52 | 53 | if (exampleProcessor.getExampleProcessorFeatures().isAsync() == false) 54 | submitSynchronously(exampleProcessor, chunkedOutput); 55 | else { 56 | submitAsynchronously(exampleProcessor, chunkedOutput); 57 | } 58 | 59 | return chunkedOutput; 60 | } 61 | 62 | private void submitSynchronously(final ExampleProcessor exampleProcessor, ChunkedOutput chunkedOutput) { 63 | 64 | final ExampleProcessingEventHandler eventHandler = this; 65 | 66 | long numPredictionsWritten = 0; 67 | 68 | Iterable predictions = null; 69 | 70 | ExampleProcessingManager exampleProcessingManager = null; 71 | 72 | try { 73 | 74 | LOGGER.info("About to submit examples..."); 75 | 76 | // note: depending on the example submitter in use, 77 | // the call to submitExamples could spawn off a separate 78 | // thread to submit examples to VW. 79 | exampleProcessingManager = exampleProcessor.submitExamples(eventHandler); 80 | 81 | predictions = exampleProcessingManager.getPredictionsIterable(); 82 | 83 | for (Prediction p : predictions) { 84 | 85 | try { 86 | String toWrite = p.getVWStringRepresentation() + "\n"; 87 | 88 | LOGGER.trace("Writing prediction: {}", toWrite); 89 | 90 | chunkedOutput.write(toWrite); 91 | 92 | numPredictionsWritten++; 93 | } 94 | catch (IOException e) { 95 | LOGGER.error("IOException when writing out prediction! Message: {}", e.getMessage(), e); 96 | throw new WebApplicationException(e); //nothing we can do if we can't send any data back to the client! 97 | } 98 | } 99 | 100 | LOGGER.info("Submitted a total of {} examples", exampleProcessingManager.getTotalNumberOfExamplesSubmitted()); 101 | LOGGER.info("Skipped a total of {} examples", exampleProcessingManager.getTotalNumberOfExamplesSkipped()); 102 | LOGGER.info("Read a total of {} predictions from VW", exampleProcessingManager.getTotalNumberOfPredictionsFetched()); 103 | LOGGER.info("Wrote a total of {} predictions", numPredictionsWritten); 104 | LOGGER.info("Final example submission state: {}", exampleProcessingManager.getExampleSubmissionState()); 105 | LOGGER.info("Final prediction fetch state: {}", exampleProcessingManager.getPredictionFetchState()); 106 | 107 | } 108 | catch (ExampleSubmissionException e) { 109 | 110 | LOGGER.error("Exception when submitting examples! Message: {}", e.getMessage(), e); 111 | 112 | //output.write(("Exception when submitting examples! Message: " + e.getMessage()).getBytes()); 113 | } 114 | catch (Exception e) { 115 | //if any other exception occurs, stop the example submission process. 116 | LOGGER.error("Other exception when reading predictions: {}", e.getMessage(), e); 117 | 118 | if (exampleProcessingManager != null) { 119 | LOGGER.info("Stopping example submission..."); 120 | exampleProcessingManager.stopAll(); 121 | LOGGER.info("Example submission stopped."); 122 | } 123 | else { 124 | LOGGER.warn("Example processing manager was null!"); 125 | } 126 | } 127 | finally { 128 | try { 129 | chunkedOutput.close(); 130 | } 131 | catch (Exception e2) { 132 | 133 | LOGGER.error("Exception when flushing output stream of predictions! Message: {}", e2.getMessage(), e2); 134 | } 135 | 136 | } 137 | 138 | } 139 | 140 | private void submitAsynchronously(final ExampleProcessor exampleSubmitter, final ChunkedOutput chunkedOutput) { 141 | 142 | executorService.submit(new Runnable() { 143 | 144 | @Override 145 | public void run() { 146 | 147 | submitSynchronously(exampleSubmitter, chunkedOutput); 148 | 149 | } 150 | 151 | }); 152 | 153 | } 154 | 155 | @Override 156 | public void onExampleReadException(ExampleProcessingManager exampleProcessingManager, ExampleReadException theException) { 157 | LOGGER.error("Example read exception: {}", theException.getMessage(), theException); 158 | } 159 | 160 | @Override 161 | public void onExampleFormatException(ExampleProcessingManager exampleProcessingManager, ExampleFormatException theException) { 162 | LOGGER.warn("Example format exception: {}", theException.getMessage(), theException); 163 | 164 | } 165 | 166 | @Override 167 | public void onExampleSubmissionException(ExampleProcessingManager exampleProcessingManager, ExampleSubmissionException theException) { 168 | LOGGER.error("Example submission exception: {}", theException.getMessage(), theException); 169 | 170 | } 171 | 172 | @Override 173 | public void onExampleSubmissionComplete(ExampleProcessingManager exampleProcessingManager) { 174 | LOGGER.info("Example submission complete!"); 175 | 176 | } 177 | 178 | @Override 179 | public void onPredictionFetchException(ExampleProcessingManager exampleProcessingManager, PredictionFetchException theException) { 180 | LOGGER.error("Prediction fetch exception: {}", theException.getMessage(), theException); 181 | 182 | } 183 | 184 | @Override 185 | public void onPredictionFetchComplete(ExampleProcessingManager exampleProcessingManager) { 186 | LOGGER.info("Prediction fetch complete!"); 187 | 188 | } 189 | } 190 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/GsonJsonExamplesProvider.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.io.InputStreamReader; 9 | import java.util.Iterator; 10 | 11 | import com.eharmony.matching.vw.webservice.common.example.Example; 12 | import com.eharmony.matching.vw.webservice.common.example.StringExample; 13 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 14 | import com.google.common.collect.AbstractIterator; 15 | import com.google.gson.stream.JsonReader; 16 | 17 | /** 18 | * @author vrahimtoola 19 | * 20 | * Uses Google's GSON to provide json examples. 21 | */ 22 | public class GsonJsonExamplesProvider implements JsonExamplesProvider { 23 | 24 | @Override 25 | public Iterator getExamplesFromStream(InputStream inputStream) throws ExampleReadException { 26 | 27 | final JsonReader jsonReader = new JsonReader(new InputStreamReader(inputStream)); 28 | 29 | AbstractIterator theIterator = new AbstractIterator() { 30 | 31 | private boolean readStartOfArray = false; 32 | 33 | @Override 34 | public Example computeNext() { 35 | 36 | try { 37 | 38 | if (!readStartOfArray) { 39 | jsonReader.beginArray(); 40 | readStartOfArray = true; 41 | } 42 | 43 | if (jsonReader.hasNext()) { 44 | 45 | return readIndividualJsonExample(jsonReader); 46 | 47 | } 48 | else { 49 | jsonReader.endArray(); 50 | return endOfData(); 51 | } 52 | } 53 | catch (Exception e) { 54 | throw new ExampleReadException(e); 55 | } 56 | 57 | } 58 | }; 59 | 60 | return theIterator; 61 | } 62 | 63 | private Example readIndividualJsonExample(JsonReader reader) throws IOException { 64 | reader.beginObject(); 65 | 66 | String exampleString = null; 67 | 68 | while (reader.hasNext()) { 69 | String propertyName = reader.nextName(); 70 | 71 | if (propertyName.equalsIgnoreCase("example")) { 72 | 73 | if (exampleString != null) throw new ExampleReadException("The property 'example' was found more than once in a single JSON example!"); 74 | 75 | exampleString = reader.nextString(); 76 | 77 | } 78 | else { 79 | throw new ExampleReadException("Unexpected property name found in JSON example: " + propertyName); 80 | } 81 | } 82 | 83 | reader.endObject(); 84 | 85 | if (exampleString == null) throw new ExampleReadException("Empty JSON example found!"); 86 | 87 | return new StringExample(exampleString); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/JsonExamplesProvider.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.InputStream; 7 | import java.util.Iterator; 8 | 9 | import com.eharmony.matching.vw.webservice.common.example.Example; 10 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 11 | 12 | /** 13 | * @author vrahimtoola 14 | * 15 | * Provides VW examples represented as JSON. 16 | */ 17 | public interface JsonExamplesProvider { 18 | 19 | /* 20 | * Allows the caller to consume JSON examples from an input stream. 21 | * 22 | * @param inputStream The input stream to consume JSON examples from. 23 | * 24 | * @returns An iterator that allows the caller to iterate over the examples. 25 | */ 26 | Iterator getExamplesFromStream(InputStream inputStream) throws ExampleReadException; 27 | } 28 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/SimpleJsonExamplesMessageBodyReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.lang.annotation.Annotation; 9 | import java.lang.reflect.Type; 10 | import java.util.List; 11 | import java.util.Map.Entry; 12 | 13 | import javax.ws.rs.Consumes; 14 | import javax.ws.rs.WebApplicationException; 15 | import javax.ws.rs.core.MediaType; 16 | import javax.ws.rs.core.MultivaluedMap; 17 | import javax.ws.rs.ext.MessageBodyReader; 18 | import javax.ws.rs.ext.Provider; 19 | 20 | import org.apache.commons.lang3.StringUtils; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 25 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 26 | import com.eharmony.matching.vw.webservice.core.ExamplesIterableImpl; 27 | 28 | /** 29 | * @author vrahimtoola 30 | * 31 | */ 32 | @Consumes({ ExampleMediaTypes.SIMPLE_JSON_0_1_0 }) 33 | @Provider 34 | public class SimpleJsonExamplesMessageBodyReader implements MessageBodyReader { 35 | 36 | private static final Logger LOGGER = LoggerFactory.getLogger(SimpleJsonExamplesMessageBodyReader.class); 37 | 38 | @Override 39 | public boolean isReadable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { 40 | 41 | LOGGER.debug("Called with media type: {} and type: {}", mediaType.toString(), type); 42 | 43 | boolean willReturn = mediaType.toString().equals(ExampleMediaTypes.SIMPLE_JSON_0_1_0) && type == ExamplesIterable.class; 44 | 45 | LOGGER.debug("Returning: {}", willReturn); 46 | 47 | return willReturn; 48 | } 49 | 50 | @Override 51 | public ExamplesIterable readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException { 52 | 53 | if (LOGGER.isDebugEnabled()) if (httpHeaders != null && httpHeaders.size() > 0) { 54 | LOGGER.debug("Rec'd HTTP headers: "); 55 | 56 | for (Entry> entry : httpHeaders.entrySet()) { 57 | LOGGER.debug("{}:{}", entry.getKey(), StringUtils.join(entry.getValue(), ',')); 58 | } 59 | } 60 | 61 | //TODO: hard-coding to GsonJsonExamplesProvider for now 62 | return new ExamplesIterableImpl(Integer.MAX_VALUE, null, new GsonJsonExamplesProvider().getExamplesFromStream(entityStream)); 63 | 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonExamplesMessageBodyReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.lang.annotation.Annotation; 9 | import java.lang.reflect.Type; 10 | import java.util.List; 11 | import java.util.Map.Entry; 12 | 13 | import javax.ws.rs.Consumes; 14 | import javax.ws.rs.WebApplicationException; 15 | import javax.ws.rs.core.MediaType; 16 | import javax.ws.rs.core.MultivaluedMap; 17 | import javax.ws.rs.ext.MessageBodyReader; 18 | import javax.ws.rs.ext.Provider; 19 | 20 | import org.apache.commons.lang3.StringUtils; 21 | import org.slf4j.Logger; 22 | import org.slf4j.LoggerFactory; 23 | 24 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 25 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 26 | import com.eharmony.matching.vw.webservice.core.ExamplesIterableImpl; 27 | 28 | /** 29 | * @author vrahimtoola 30 | * Reads structured json examples from a stream. 31 | */ 32 | @Consumes({ ExampleMediaTypes.STRUCTURED_JSON_0_1_0 }) 33 | @Provider 34 | public class StructuredJsonExamplesMessageBodyReader implements MessageBodyReader { 35 | 36 | private static final Logger LOGGER = LoggerFactory.getLogger(StructuredJsonExamplesMessageBodyReader.class); 37 | 38 | @Override 39 | public boolean isReadable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { 40 | LOGGER.debug("Called with media type: {} and type: {}", mediaType.toString(), type); 41 | 42 | boolean willReturn = mediaType.toString().equals(ExampleMediaTypes.STRUCTURED_JSON_0_1_0) && type == ExamplesIterable.class; 43 | 44 | LOGGER.debug("Returning: {}", willReturn); 45 | 46 | return willReturn; 47 | } 48 | 49 | @Override 50 | public ExamplesIterable readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException { 51 | 52 | if (LOGGER.isDebugEnabled()) if (httpHeaders != null && httpHeaders.size() > 0) { 53 | LOGGER.debug("Rec'd HTTP headers: "); 54 | 55 | for (Entry> entry : httpHeaders.entrySet()) { 56 | LOGGER.debug("{}:{}", entry.getKey(), StringUtils.join(entry.getValue(), ',')); 57 | } 58 | } 59 | 60 | //TODO: hard-coding to GsonJsonExamplesProvider for now 61 | return new ExamplesIterableImpl(Integer.MAX_VALUE, null, new StructuredJsonExamplesProvider(-1, -1).getExamplesFromStream(entityStream)); 62 | 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonExamplesProvider.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.io.IOException; 9 | import java.io.InputStream; 10 | import java.io.InputStreamReader; 11 | import java.util.Iterator; 12 | 13 | import org.apache.commons.lang3.StringUtils; 14 | import org.slf4j.Logger; 15 | import org.slf4j.LoggerFactory; 16 | 17 | import com.eharmony.matching.vw.webservice.common.example.Example; 18 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException; 19 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample; 20 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.Namespace; 21 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.Namespace.NamespaceBuilder; 22 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 23 | import com.google.common.collect.AbstractIterator; 24 | import com.google.gson.stream.JsonReader; 25 | import com.google.gson.stream.JsonToken; 26 | 27 | /** 28 | * @author vrahimtoola 29 | * 30 | * An example reader writer for the Json format. The Json 31 | * format is expected to adhere to the format specified in 32 | * vw_example_schema.json, placed under src/test/resources. 33 | * The reason it's been placed under src/test/resources as opposed to 34 | * src/main/resources is that right now, this schema 35 | * file is only being used to document the schema, but not being in a 36 | * programmatic way to verify schema adherence (this is 37 | * being done via hand-coded logic). 38 | */ 39 | public class StructuredJsonExamplesProvider implements JsonExamplesProvider { 40 | 41 | private static final Logger LOGGER = LoggerFactory.getLogger(StructuredJsonExamplesProvider.class); 42 | 43 | /* 44 | * The maximum number of features to read into a given namespace. 45 | */ 46 | private final int maxNumberOfFeaturesPerNamespace; 47 | 48 | /* 49 | * The maximum number of namespaces to read into a given example. 50 | */ 51 | private final int maxNumberOfNamespacesPerExample; 52 | 53 | /* 54 | * Constructor. 55 | * 56 | * @param maxNumberOfFeaturesPerNamespace <= 0 or Integer.MAX_VALUE mean 57 | * there's no limit. 58 | * 59 | * @param maxNumberOfNamespacesPerExample <= 0 or Integer.MAX_VALUE mean 60 | * there's no limit. 61 | */ 62 | public StructuredJsonExamplesProvider(int maxNumberOfFeaturesPerNamespace, int maxNumberOfNamespacesPerExample) { 63 | this.maxNumberOfFeaturesPerNamespace = maxNumberOfFeaturesPerNamespace; 64 | this.maxNumberOfNamespacesPerExample = maxNumberOfNamespacesPerExample; 65 | } 66 | 67 | private StructuredExample readExample(long exampleNumber, JsonReader jsonReader) throws IOException { 68 | 69 | jsonReader.beginObject(); 70 | 71 | boolean labelRead = false; 72 | boolean namespacesRead = false; 73 | boolean tagRead = false; 74 | 75 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder(); 76 | 77 | boolean atLeastOnePropertyRead = false; 78 | 79 | while (jsonReader.hasNext()) { 80 | 81 | String propertyNameOriginal = jsonReader.nextName(); 82 | 83 | String propertyName = propertyNameOriginal.trim().toLowerCase(); 84 | 85 | if (propertyName.equals(StructuredJsonPropertyNames.EXAMPLE_LABEL_PROPERTY)) { 86 | 87 | if (labelRead) { 88 | 89 | throw new ExampleFormatException(exampleNumber, "The 'label' property must only appear once in an example!"); 90 | 91 | } 92 | 93 | if (jsonReader.peek() != JsonToken.NULL) 94 | exampleBuilder.setLabel(jsonReader.nextString()); 95 | else { 96 | jsonReader.nextNull(); 97 | } 98 | 99 | labelRead = true; 100 | 101 | atLeastOnePropertyRead = true; 102 | 103 | } 104 | else if (propertyName.equals(StructuredJsonPropertyNames.EXAMPLE_TAG_PROPERTY)) { 105 | 106 | if (tagRead) throw new ExampleFormatException(exampleNumber, "The 'tag' property must only appear once in an example!"); 107 | 108 | if (jsonReader.peek() != JsonToken.NULL) 109 | exampleBuilder.setTag(jsonReader.nextString()); 110 | else { 111 | jsonReader.nextNull(); 112 | } 113 | } 114 | else if (propertyName.equals(StructuredJsonPropertyNames.EXAMPLE_NAMESPACES_PROPERTY)) { 115 | 116 | if (namespacesRead) { 117 | 118 | throw new ExampleFormatException(exampleNumber, "The 'namespaces' property must only appear once in an example!"); 119 | } 120 | 121 | if (jsonReader.peek() != JsonToken.NULL) { 122 | 123 | jsonReader.beginArray(); 124 | 125 | int numNamespacesRead = 0; 126 | 127 | while (jsonReader.hasNext()) { 128 | 129 | Namespace namespace = readNamespace(exampleNumber, jsonReader); 130 | 131 | numNamespacesRead++; 132 | 133 | if (maxNumberOfNamespacesPerExample > 0 && maxNumberOfNamespacesPerExample < Integer.MAX_VALUE && numNamespacesRead > maxNumberOfNamespacesPerExample) { 134 | throw new ExampleFormatException(exampleNumber, "The maximum number of namespaces per example, " + maxNumberOfNamespacesPerExample + " was exceeded!"); 135 | } 136 | 137 | exampleBuilder.addNamespace(namespace); 138 | } 139 | 140 | jsonReader.endArray(); 141 | 142 | } 143 | else { 144 | jsonReader.nextNull(); 145 | } 146 | 147 | namespacesRead = true; 148 | 149 | atLeastOnePropertyRead = true; 150 | 151 | } 152 | else { 153 | 154 | throw new ExampleFormatException(exampleNumber, "Unknown property: " + propertyNameOriginal + " found while reading example!"); 155 | } 156 | 157 | } 158 | 159 | jsonReader.endObject(); 160 | 161 | if (atLeastOnePropertyRead == false) 162 | return StructuredExample.EMPTY_EXAMPLE; 163 | else 164 | return exampleBuilder.build(); //this might return a normal example or a PIPE example. 165 | } 166 | 167 | private Namespace readNamespace(long exampleNumber, JsonReader jsonReader) throws IOException { 168 | jsonReader.beginObject(); 169 | 170 | StructuredExample.Namespace.NamespaceBuilder nsBuilder = new StructuredExample.Namespace.NamespaceBuilder(); 171 | 172 | boolean nameRead = false, scalingFactorRead = false, featuresRead = false; 173 | 174 | while (jsonReader.hasNext()) { 175 | 176 | String propertyNameOriginal = jsonReader.nextName(); 177 | String propertyName = propertyNameOriginal.trim().toLowerCase(); 178 | 179 | if (propertyName.equals(StructuredJsonPropertyNames.NAMESPACE_NAME_PROPERTY)) { 180 | 181 | if (nameRead) { 182 | 183 | throw new ExampleFormatException(exampleNumber, "The 'name' property must only appear once in a namespace!"); 184 | } 185 | 186 | if (jsonReader.peek() == JsonToken.NULL) 187 | jsonReader.nextNull(); 188 | else { 189 | String namespace = jsonReader.nextString(); 190 | nsBuilder.setName(namespace); 191 | } 192 | nameRead = true; 193 | } 194 | else if (propertyName.equals(StructuredJsonPropertyNames.NAMESPACE_SCALING_FACTOR_PROPERTY)) { 195 | 196 | if (scalingFactorRead) { 197 | 198 | throw new ExampleFormatException(exampleNumber, "The 'value' property must only appear once in a namespace!"); 199 | } 200 | 201 | if (jsonReader.peek() == JsonToken.NULL) 202 | jsonReader.nextNull(); 203 | else { 204 | double scalingFactor = jsonReader.nextDouble(); 205 | nsBuilder.setScalingFactor(Float.valueOf((float) scalingFactor)); 206 | } 207 | scalingFactorRead = true; 208 | 209 | } 210 | else if (propertyName.equals(StructuredJsonPropertyNames.NAMESPACE_FEATURES_PROPERTY)) { 211 | 212 | if (featuresRead) { 213 | 214 | throw new ExampleFormatException(exampleNumber, "The 'features' property must only appear once in a namespace!"); 215 | } 216 | 217 | if (jsonReader.peek() == JsonToken.NULL) { 218 | jsonReader.nextNull(); 219 | } 220 | else { 221 | 222 | jsonReader.beginArray(); 223 | 224 | int numFeaturesAdded = 0; 225 | 226 | while (jsonReader.hasNext()) { 227 | readFeatureIntoNamespace(exampleNumber, nsBuilder, jsonReader); 228 | 229 | numFeaturesAdded++; 230 | 231 | if (maxNumberOfFeaturesPerNamespace > 0 && maxNumberOfFeaturesPerNamespace < Integer.MAX_VALUE && numFeaturesAdded > maxNumberOfFeaturesPerNamespace) { 232 | throw new ExampleFormatException(exampleNumber, "The maximum number of features per namespace, " + maxNumberOfFeaturesPerNamespace + " was exceeded!"); 233 | } 234 | } 235 | 236 | jsonReader.endArray(); 237 | 238 | } 239 | featuresRead = true; 240 | 241 | } 242 | else { 243 | throw new ExampleFormatException(exampleNumber, "Unknown property: " + propertyNameOriginal + " found while reading namespace!"); 244 | } 245 | } 246 | 247 | jsonReader.endObject(); 248 | 249 | return nsBuilder.build(); 250 | } 251 | 252 | private void readFeatureIntoNamespace(long exampleNumber, NamespaceBuilder nsBuilder, JsonReader jsonReader) throws IOException { 253 | jsonReader.beginObject(); 254 | 255 | String name = null; 256 | Float value = null; 257 | 258 | boolean nameRead = false, valueRead = false; 259 | 260 | while (jsonReader.hasNext()) { 261 | 262 | String propertyNameOriginal = jsonReader.nextName(); 263 | 264 | String propertyName = propertyNameOriginal.toLowerCase(); 265 | 266 | if (propertyName.equals(StructuredJsonPropertyNames.FEATURE_NAME_PROPERTY)) { 267 | 268 | if (nameRead) { 269 | 270 | throw new ExampleFormatException(exampleNumber, "The 'name' property can only appear once in a feature!"); 271 | } 272 | 273 | name = jsonReader.nextString(); //feature name should never be null, so not doing the null check here. if it's null, let the exception 274 | //be propagated. 275 | 276 | nameRead = true; 277 | 278 | } 279 | else if (propertyName.equals(StructuredJsonPropertyNames.FEATURE_VALUE_PROPERTY)) { 280 | 281 | if (valueRead) { 282 | 283 | throw new ExampleFormatException(exampleNumber, "The 'value' property can only appear once in a feature!"); 284 | } 285 | 286 | if (jsonReader.peek() == JsonToken.NULL) 287 | jsonReader.nextNull(); 288 | else 289 | value = Float.valueOf((float) jsonReader.nextDouble()); 290 | 291 | valueRead = true; 292 | 293 | } 294 | else { 295 | 296 | throw new ExampleFormatException(exampleNumber, "Unknown property: " + propertyNameOriginal + " found while reading feature!"); 297 | } 298 | 299 | } 300 | 301 | jsonReader.endObject(); 302 | 303 | if (StringUtils.isBlank(name) == false) //add feature only if the name exists. 304 | nsBuilder.addFeature(name, value); 305 | } 306 | 307 | @Override 308 | public Iterator getExamplesFromStream(InputStream inputStream) throws ExampleReadException { 309 | 310 | checkNotNull(inputStream); 311 | 312 | final InputStream theInputStream = inputStream; 313 | 314 | return new AbstractIterator() { 315 | 316 | private boolean didBeginArray = false; 317 | 318 | private long currentExampleNumber = 1; 319 | 320 | private TracingJsonReader jsonReader; 321 | 322 | private boolean closeReader = false; 323 | 324 | @Override 325 | protected StructuredExample computeNext() { 326 | 327 | try { 328 | if (!didBeginArray) { 329 | 330 | jsonReader = new TracingJsonReader(new InputStreamReader(theInputStream), LOGGER.isTraceEnabled()); 331 | 332 | jsonReader.beginArray(); 333 | didBeginArray = true; 334 | } 335 | 336 | if (jsonReader.hasNext()) { 337 | 338 | StructuredExample toReturn = readExample(currentExampleNumber++, jsonReader); 339 | 340 | jsonReader.reset(); //prepare for next example 341 | 342 | return toReturn; 343 | } 344 | else { 345 | 346 | jsonReader.endArray(); 347 | 348 | closeReader = true; 349 | 350 | return (StructuredExample) endOfData(); 351 | } 352 | } 353 | catch (ExampleFormatException ee) { 354 | 355 | if (LOGGER.isTraceEnabled()) { 356 | try { 357 | LOGGER.error("Erroneous JSON example: {}", jsonReader.getAllJsonReadSoFar()); 358 | } 359 | catch (IOException e) { 360 | LOGGER.error("Failed to spit out erroneous JSON example!", e); 361 | } 362 | } 363 | 364 | closeReader = true; 365 | 366 | //make sure that we've set the example number, useful for debugging 367 | ee.setExampleNumber(currentExampleNumber - 1); 368 | 369 | throw ee; 370 | } 371 | catch (Exception e) { 372 | 373 | closeReader = true; 374 | 375 | if (jsonReader != null) try { 376 | 377 | LOGGER.error("Example read exception when attempting to read example number {} - tracing json output: {}", currentExampleNumber - 1, jsonReader.getAllJsonReadSoFar()); 378 | } 379 | catch (IOException e1) { 380 | LOGGER.error("Error: {}", e1); 381 | } 382 | 383 | throw new ExampleReadException(e); 384 | } 385 | finally { 386 | 387 | if (closeReader) try { 388 | if (jsonReader != null) { 389 | jsonReader.close(); 390 | } 391 | } 392 | catch (Exception e2) { 393 | LOGGER.warn("Error closing JSON reader! Message: {}", e2.getMessage(), e2); 394 | } 395 | } 396 | 397 | } 398 | }; 399 | 400 | } 401 | 402 | } 403 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonPropertyNames.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | /** 7 | * @author vrahimtoola 8 | * Names of properties in structured json. 9 | */ 10 | public class StructuredJsonPropertyNames { 11 | 12 | public static final String EXAMPLE_LABEL_PROPERTY = "label"; 13 | public static final String EXAMPLE_TAG_PROPERTY = "tag"; 14 | public static final String EXAMPLE_NAMESPACES_PROPERTY = "namespaces"; 15 | 16 | public static final String NAMESPACE_NAME_PROPERTY = "name"; 17 | public static final String NAMESPACE_SCALING_FACTOR_PROPERTY = "scale"; 18 | public static final String NAMESPACE_FEATURES_PROPERTY = "features"; 19 | 20 | public static final String FEATURE_NAME_PROPERTY = "name"; 21 | public static final String FEATURE_VALUE_PROPERTY = "value"; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/TracingJsonReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.io.IOException; 9 | import java.io.Reader; 10 | import java.io.StringWriter; 11 | 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import com.fasterxml.jackson.core.JsonFactory; 16 | import com.fasterxml.jackson.core.JsonGenerator; 17 | import com.google.gson.stream.JsonReader; 18 | import com.google.gson.stream.JsonToken; 19 | 20 | /** 21 | * @author vrahimtoola 22 | * 23 | * A Json reader that writes everything it reads about an example, into 24 | * a json writer. The only time it will not do this is when skipValue() 25 | * is called, in which case the string "(SKIPPED VALUE)" is written out 26 | * instead. 27 | * 28 | * The JsonReader uses Gson because I like it's API a lot better than 29 | * Jackson's, which I find to be lower level than Gson's. However, with 30 | * Gson the writer will not write out just the fieldname, you have to 31 | * give it the value of the field as well. Even if you call flush() on 32 | * it, it still won't write out just a fieldname because that's not 33 | * valid Json (you need the value to go along with it). 34 | * 35 | * For the purpose of debugging, I want ALL the Json exactly as it's 36 | * been read, and the Jackson writer lets you spit out just the 37 | * fieldnames if need be, without forcing you to supply a null value. 38 | * 39 | * It's not ideal since it requires the project to pull in 2 separate 40 | * libraries for the same aspect (JSON processing), but I'm not going to 41 | * fuss over it for the moment... 42 | */ 43 | public class TracingJsonReader extends JsonReader { 44 | 45 | private static final Logger LOGGER = LoggerFactory.getLogger(TracingJsonReader.class); 46 | 47 | private final String SKIPPED_VALUE_STRING = "(SKIPPED VALUE)"; 48 | 49 | private StringWriter exampleJsonWriter; 50 | private JsonGenerator debugWriter; 51 | 52 | public TracingJsonReader(Reader in, boolean isTracingEnabled) throws IOException { 53 | super(in); 54 | 55 | checkNotNull(in); 56 | 57 | if (isTracingEnabled) { 58 | this.exampleJsonWriter = new StringWriter(); 59 | this.debugWriter = new JsonFactory().createGenerator(exampleJsonWriter); 60 | } 61 | 62 | } 63 | 64 | /* 65 | * Returns all the Json that has been read in thus far. 66 | */ 67 | public String getAllJsonReadSoFar() throws IOException { 68 | if (debugWriter != null) { 69 | debugWriter.flush(); 70 | return exampleJsonWriter.toString(); 71 | } 72 | 73 | return ""; 74 | } 75 | 76 | /* 77 | * Resets the string writer, making this instance ready for the next 78 | * example. 79 | */ 80 | public void reset() throws IOException { 81 | 82 | if (debugWriter != null) { //if the debugwriter is null, then tracing is not enabled, so don't do anything. 83 | exampleJsonWriter.getBuffer().setLength(0); 84 | } 85 | } 86 | 87 | @Override 88 | public void beginArray() throws IOException { 89 | super.beginArray(); 90 | 91 | if (debugWriter != null) debugWriter.writeStartArray(); 92 | 93 | } 94 | 95 | @Override 96 | public void beginObject() throws IOException { 97 | super.beginObject(); 98 | 99 | if (debugWriter != null) debugWriter.writeStartObject(); 100 | } 101 | 102 | @Override 103 | public void close() throws IOException { 104 | super.close(); 105 | 106 | if (debugWriter != null) debugWriter.close(); 107 | } 108 | 109 | @Override 110 | public void endArray() throws IOException { 111 | super.endArray(); 112 | 113 | if (debugWriter != null) debugWriter.writeEndArray(); 114 | } 115 | 116 | @Override 117 | public void endObject() throws IOException { 118 | super.endObject(); 119 | 120 | if (debugWriter != null) debugWriter.writeEndObject(); 121 | } 122 | 123 | @Override 124 | public boolean hasNext() throws IOException { 125 | return super.hasNext(); 126 | } 127 | 128 | @Override 129 | public boolean nextBoolean() throws IOException { 130 | boolean toReturn = super.nextBoolean(); 131 | 132 | if (debugWriter != null) debugWriter.writeBoolean(toReturn); 133 | 134 | return toReturn; 135 | } 136 | 137 | @Override 138 | public double nextDouble() throws IOException { 139 | double toReturn = super.nextDouble(); 140 | 141 | if (debugWriter != null) debugWriter.writeNumber(toReturn); 142 | 143 | return toReturn; 144 | } 145 | 146 | @Override 147 | public int nextInt() throws IOException { 148 | int toReturn = super.nextInt(); 149 | 150 | if (debugWriter != null) debugWriter.writeNumber(toReturn); 151 | 152 | return toReturn; 153 | } 154 | 155 | @Override 156 | public long nextLong() throws IOException { 157 | long toReturn = super.nextLong(); 158 | 159 | if (debugWriter != null) debugWriter.writeNumber(toReturn); 160 | 161 | return toReturn; 162 | } 163 | 164 | @Override 165 | public String nextName() throws IOException { 166 | String toReturn = super.nextName(); 167 | 168 | if (debugWriter != null) debugWriter.writeFieldName(toReturn); 169 | 170 | return toReturn; 171 | } 172 | 173 | @Override 174 | public void nextNull() throws IOException { 175 | super.nextNull(); 176 | 177 | if (debugWriter != null) debugWriter.writeNull(); //write out a null, so we stay in sync with the reader. 178 | } 179 | 180 | @Override 181 | public String nextString() throws IOException { 182 | String toReturn = super.nextString(); 183 | 184 | if (debugWriter != null) debugWriter.writeString(toReturn); 185 | 186 | return toReturn; 187 | } 188 | 189 | @Override 190 | public JsonToken peek() throws IOException { 191 | return super.peek(); 192 | } 193 | 194 | @Override 195 | public void skipValue() throws IOException { 196 | super.skipValue(); 197 | 198 | //write out a null, so we stay in sync with the reader 199 | if (debugWriter != null) debugWriter.writeString(SKIPPED_VALUE_STRING); 200 | } 201 | 202 | @Override 203 | public String toString() { 204 | return super.toString(); 205 | } 206 | 207 | } 208 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/plaintextexamplesmessagebodyreader/PlainTextExamplesMessageBodyReader.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.plaintextexamplesmessagebodyreader; 5 | 6 | import java.io.IOException; 7 | import java.io.InputStream; 8 | import java.lang.annotation.Annotation; 9 | import java.lang.reflect.Type; 10 | import java.nio.charset.Charset; 11 | import java.util.List; 12 | import java.util.Map.Entry; 13 | 14 | import javax.ws.rs.Consumes; 15 | import javax.ws.rs.WebApplicationException; 16 | import javax.ws.rs.core.MediaType; 17 | import javax.ws.rs.core.MultivaluedMap; 18 | import javax.ws.rs.ext.MessageBodyReader; 19 | import javax.ws.rs.ext.Provider; 20 | 21 | import org.apache.commons.lang3.StringUtils; 22 | import org.glassfish.jersey.message.internal.ReaderWriter; 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 27 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 28 | import com.eharmony.matching.vw.webservice.core.ExamplesIterableImpl; 29 | 30 | /** 31 | * @author vrahimtoola 32 | * 33 | * A message body reader that can read an Iterable from the 34 | * message body of an HTTP request. 35 | */ 36 | @Consumes({ MediaType.TEXT_PLAIN, ExampleMediaTypes.PLAINTEXT_0_1_0 }) 37 | @Provider 38 | public class PlainTextExamplesMessageBodyReader implements MessageBodyReader { 39 | 40 | public PlainTextExamplesMessageBodyReader() { 41 | 42 | } 43 | 44 | private static final Logger LOGGER = LoggerFactory.getLogger(PlainTextExamplesMessageBodyReader.class); 45 | 46 | @Override 47 | public boolean isReadable(Class type, Type genericType, Annotation[] annotations, MediaType mediaType) { 48 | 49 | LOGGER.debug("Called with media type: {} and type: {}", mediaType.toString(), type); 50 | 51 | boolean willReturn = (mediaType.isCompatible(MediaType.TEXT_PLAIN_TYPE) || mediaType.toString().equals(ExampleMediaTypes.PLAINTEXT_0_1_0)) && type == ExamplesIterable.class; 52 | 53 | LOGGER.debug("Returning: {}", willReturn); 54 | 55 | return willReturn; 56 | } 57 | 58 | /* 59 | * (non-Javadoc) 60 | * 61 | * @see javax.ws.rs.ext.MessageBodyReader#readFrom(java.lang.Class, 62 | * java.lang.reflect.Type, java.lang.annotation.Annotation[], 63 | * javax.ws.rs.core.MediaType, javax.ws.rs.core.MultivaluedMap, 64 | * java.io.InputStream) 65 | * 66 | * Expects data to arrive as url-encoded strings. 67 | * 68 | * TODO: look at specific mediatypes eg text/vw 69 | */ 70 | @Override 71 | public ExamplesIterable readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException { 72 | 73 | if (LOGGER.isDebugEnabled()) if (httpHeaders != null && httpHeaders.size() > 0) { 74 | LOGGER.debug("Rec'd HTTP headers: "); 75 | 76 | for (Entry> entry : httpHeaders.entrySet()) { 77 | LOGGER.debug("{}:{}", entry.getKey(), StringUtils.join(entry.getValue(), ',')); 78 | } 79 | } 80 | 81 | // TODO: 82 | // if a content-length has been provided, then use that to read entire 83 | // string in one go. 84 | 85 | Charset charset = ReaderWriter.getCharset(mediaType); 86 | 87 | LOGGER.debug("Reading examples using charset: {}", charset.displayName()); 88 | 89 | StringExampleIterator theIterator = new StringExampleIterator(entityStream, charset); 90 | 91 | // TODO: provide the proper number of examples here 92 | // setting this to Integer.MAX_VALUE for now to force streaming 93 | return new ExamplesIterableImpl(Integer.MAX_VALUE, null, theIterator); 94 | } 95 | 96 | } 97 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/plaintextexamplesmessagebodyreader/StringExampleIterator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.plaintextexamplesmessagebodyreader; 5 | 6 | import static com.google.common.base.Preconditions.checkNotNull; 7 | 8 | import java.io.BufferedReader; 9 | import java.io.IOException; 10 | import java.io.InputStream; 11 | import java.io.InputStreamReader; 12 | import java.nio.charset.Charset; 13 | import java.util.Iterator; 14 | import java.util.NoSuchElementException; 15 | 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | 19 | import com.eharmony.matching.vw.webservice.common.example.Example; 20 | import com.eharmony.matching.vw.webservice.common.example.StringExample; 21 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 22 | 23 | /** 24 | * @author vrahimtoola 25 | * 26 | * Reads 1 string at a time from some input stream. 27 | * 28 | * TODO look at guava's abstract iterator and the test that comes with 29 | * guava 30 | */ 31 | public class StringExampleIterator implements Iterator { 32 | 33 | private static final Logger LOGGER = LoggerFactory.getLogger(StringExampleIterator.class); 34 | 35 | /* 36 | * The reader. 37 | */ 38 | private final BufferedReader reader; 39 | 40 | /* 41 | * The example to be returned, when 'next()' is called. 42 | */ 43 | private String nextExampleToReturn = null; 44 | 45 | private long numTotalExamples = 0; 46 | 47 | public StringExampleIterator(InputStream inputStream, Charset charset) throws IOException { 48 | 49 | checkNotNull(inputStream, "A null input stream was provided!"); 50 | reader = new BufferedReader(new InputStreamReader(inputStream, charset)); 51 | 52 | advance(); 53 | } 54 | 55 | @Override 56 | public boolean hasNext() { 57 | return nextExampleToReturn != null; 58 | } 59 | 60 | @Override 61 | public Example next() { 62 | 63 | String toReturn = nextExampleToReturn; 64 | 65 | if (toReturn == null) throw new NoSuchElementException("No element to return! Make sure to call 'hasNext()' and that it returns true before invoking this method!"); 66 | 67 | try { 68 | advance(); 69 | } 70 | catch (IOException e) { 71 | 72 | throw new ExampleReadException("Exception reading examples! Message: " + e.getMessage(), e); 73 | } 74 | 75 | //TODO remove this 76 | if (toReturn.length() >= 592) { 77 | LOGGER.trace("Received super long example: {}", toReturn); 78 | } 79 | 80 | return new StringExample(toReturn); 81 | } 82 | 83 | @Override 84 | public void remove() { 85 | throw new UnsupportedOperationException("The 'remove' operation is not supported!"); 86 | } 87 | 88 | private void advance() throws IOException { 89 | 90 | nextExampleToReturn = reader.readLine(); 91 | 92 | if (nextExampleToReturn != null) { 93 | numTotalExamples++; 94 | 95 | if (numTotalExamples == 1) LOGGER.debug("First example read: {}", nextExampleToReturn); 96 | } 97 | else { 98 | LOGGER.debug("Read a total of {} examples", numTotalExamples); 99 | } 100 | } 101 | 102 | } -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/util/StringIterable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.util; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.StringReader; 9 | import java.util.Iterator; 10 | import java.util.NoSuchElementException; 11 | 12 | /** 13 | * @author vrahimtoola 14 | * An implementation of Iterable that returns an iterator to iterate over the lines of a given chunk of text. 15 | * FYI the iterator returned is not thread safe. 16 | */ 17 | public class StringIterable implements Iterable { 18 | 19 | private String theText; 20 | 21 | /* 22 | * Constructor. 23 | * @param chunkOfText The text to iterate over. Cannot be null (but can be empty). 24 | */ 25 | public StringIterable(String chunkOfText) 26 | { 27 | if (chunkOfText == null) 28 | throw new IllegalArgumentException("'chunkOfText' cannot be null!"); 29 | 30 | theText = chunkOfText; 31 | } 32 | 33 | /* (non-Javadoc) 34 | * @see java.lang.Iterable#iterator() 35 | */ 36 | @Override 37 | public Iterator iterator() { 38 | 39 | return new StringBufferedReaderIterator(theText); 40 | } 41 | 42 | /* 43 | * The iterator. Uses a BufferedReader to read lines one at a time from the chunk of text. 44 | * Note that this iterator is not thread-safe. 45 | */ 46 | private static class StringBufferedReaderIterator implements Iterator 47 | { 48 | private BufferedReader bufferedReader = null; 49 | 50 | private String nextLineToReturn = null; 51 | 52 | private boolean faultedOrClosed = false; 53 | 54 | public StringBufferedReaderIterator(String theText) 55 | { 56 | bufferedReader = new BufferedReader(new StringReader(theText)); 57 | advance(); 58 | } 59 | 60 | @Override 61 | public boolean hasNext() { 62 | 63 | return nextLineToReturn != null; 64 | } 65 | 66 | @Override 67 | public String next() { 68 | 69 | if (nextLineToReturn == null) 70 | throw new NoSuchElementException("No element to return! Make sure 'hasNext()' has been called and it returned 'true' before invoking this method."); 71 | 72 | String toReturnString = nextLineToReturn; //save reference, since the call to 'advance' below updates 'nextLineToReturn'. 73 | 74 | advance(); 75 | 76 | return toReturnString; 77 | } 78 | 79 | @Override 80 | public void remove() { 81 | throw new UnsupportedOperationException("The 'remove' operation is not supported."); 82 | } 83 | 84 | 85 | private void advance() 86 | { 87 | if (faultedOrClosed) 88 | return; 89 | 90 | try { 91 | nextLineToReturn = bufferedReader.readLine(); 92 | 93 | if (nextLineToReturn == null) //close the bufferedReader if no more lines to return 94 | { 95 | bufferedReader.close(); 96 | faultedOrClosed = true; //so that we don't try again to read from the bufferedReader 97 | } 98 | 99 | } catch (IOException e) { 100 | faultedOrClosed = true; //so that we don't try again to read from the bufferedReader 101 | nextLineToReturn = null; //so that an exception is thrown if someone calls 'next()'. 102 | } 103 | 104 | } 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/resources/logging.properties: -------------------------------------------------------------------------------- 1 | .level = ALL 2 | 3 | handlers=java.util.logging.ConsoleHandler 4 | 5 | 6 | java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter 7 | java.util.logging.ConsoleHandler.level = FINEST 8 | 9 | java.util.logging.FileHandler.level = FINEST 10 | #java.util.logging.FileHandler.filter specifies the name of a Filter class to use (defaults to no Filter). 11 | java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter 12 | #java.util.logging.FileHandler.encoding the name of the character set encoding to use (defaults to the default platform encoding). 13 | #java.util.logging.FileHandler.limit specifies an approximate maximum amount to write (in bytes) to any one file. If this is zero, then there is no limit. (Defaults to no limit). 14 | #java.util.logging.FileHandler.count specifies how many output files to cycle through (defaults to 1). 15 | java.util.logging.FileHandler.pattern = /Users/vrahimtoola/Desktop/vw-webservice.log 16 | #java.util.logging.FileHandler.append specifies whether the FileHandler should append onto any existing files (defaults to false). 17 | 18 | org.apache.http.level = FINEST -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/resources/vw-webservice.properties: -------------------------------------------------------------------------------- 1 | vw.hostName=localhost 2 | vw.port=26542 -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/webapp/WEB-INF/applicationContext.xml: -------------------------------------------------------------------------------- 1 | 2 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/main/webapp/WEB-INF/web.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 5 | 6 | contextConfigLocation 7 | /WEB-INF/applicationContext.xml 8 | 9 | 10 | org.springframework.web.context.ContextLoaderListener 11 | 12 | 13 | vw-webservice 14 | org.glassfish.jersey.servlet.ServletContainer 15 | 16 | jersey.config.server.provider.packages 17 | com.eharmony.matching.vw.webservice;com.eharmony.matching.vw.webservice.messagebodyreader 18 | 19 | 1 20 | true 21 | 22 | 23 | vw-webservice 24 | /* 25 | 26 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/client/AsyncHttpClientTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.client; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | import java.io.OutputStreamWriter; 10 | import java.io.PipedInputStream; 11 | import java.io.PipedOutputStream; 12 | import java.util.concurrent.Callable; 13 | import java.util.concurrent.ExecutionException; 14 | import java.util.concurrent.Executors; 15 | import java.util.concurrent.Future; 16 | 17 | import junit.framework.Assert; 18 | 19 | import org.junit.Before; 20 | import org.junit.Ignore; 21 | import org.junit.Test; 22 | import org.slf4j.Logger; 23 | import org.slf4j.LoggerFactory; 24 | 25 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 26 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample; 27 | import com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader.JsonTestUtils; 28 | import com.google.gson.stream.JsonWriter; 29 | import com.ning.http.client.AsyncHandler; 30 | import com.ning.http.client.AsyncHttpClient; 31 | import com.ning.http.client.AsyncHttpClientConfig; 32 | import com.ning.http.client.AsyncHttpClientConfig.Builder; 33 | import com.ning.http.client.BodyGenerator; 34 | import com.ning.http.client.HttpResponseBodyPart; 35 | import com.ning.http.client.HttpResponseHeaders; 36 | import com.ning.http.client.HttpResponseStatus; 37 | import com.ning.http.client.Request; 38 | import com.ning.http.client.RequestBuilder; 39 | import com.ning.http.client.Response; 40 | import com.ning.http.client.generators.InputStreamBodyGenerator; 41 | 42 | /** 43 | * @author vrahimtoola 44 | * Uses the Async Http Client to hit the web service. This is the only 45 | * java client that I've been able to get to work! 46 | */ 47 | public class AsyncHttpClientTest { 48 | 49 | private static final Logger LOGGER = LoggerFactory.getLogger(AsyncHttpClientTest.class); 50 | 51 | private int roundsOfDataToSubmit = 1; 52 | 53 | private boolean testFailed = false; 54 | 55 | @Before 56 | public void setUp() { 57 | roundsOfDataToSubmit = 3; //this means 3 * (number of examples in ner.train) examples will be submitted to the web service. 58 | testFailed = false; 59 | } 60 | 61 | private synchronized void onTestFailed() { 62 | testFailed = true; 63 | } 64 | 65 | private synchronized boolean getTestFailed() { 66 | return testFailed; 67 | } 68 | 69 | /* 70 | * The ignore annotation is to keep the travis-ci build from failing. 71 | */ 72 | @Ignore 73 | @Test 74 | public void plainTextExamplesTest() throws IOException, InterruptedException, ExecutionException { 75 | 76 | RequestBuilder builder = new RequestBuilder("POST"); 77 | 78 | //note: assumes that a vw-webservice is running on localhost at 8080. 79 | //modify the address accordingly if it's running on a different host/port. 80 | 81 | Request request = builder.setUrl("http://localhost:8080/vw-webservice-jersey/predict/main").addHeader("Content-Type", ExampleMediaTypes.PLAINTEXT_0_1_0).setBody(getPlainTextInputStreamBodyGenerator()).build(); 82 | 83 | doTest(request); 84 | } 85 | 86 | /* 87 | * The ignore annotation is to keep the travis-ci build from failing. 88 | */ 89 | @Ignore 90 | @Test 91 | public void structuredJsonExamplesTest() throws IOException, InterruptedException, ExecutionException { 92 | 93 | RequestBuilder builder = new RequestBuilder("POST"); 94 | 95 | //note: assumes that a vw-webservice is running on localhost at 8080. 96 | //modify the address accordingly if it's running on a different host/port. 97 | 98 | Request request = builder.setUrl("http://localhost:8080/vw-webservice-jersey/predict/main").addHeader("Content-Type", ExampleMediaTypes.STRUCTURED_JSON_0_1_0).setBody(getJsonInputStreamBodyGenerator()).build(); 99 | 100 | doTest(request); 101 | } 102 | 103 | /* 104 | * The main method that carries out the test agains the web service and 105 | * verifies the results. 106 | */ 107 | private void doTest(Request request) throws InterruptedException, ExecutionException, IOException { 108 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(); 109 | final PipedInputStream pipedInputStream = new PipedInputStream(pipedOutputStream); 110 | 111 | AsyncHandler asyncHandler = new AsyncHandler() { 112 | private final Response.ResponseBuilder builder = new Response.ResponseBuilder(); 113 | 114 | @Override 115 | public STATE onBodyPartReceived(final HttpResponseBodyPart content) throws Exception { 116 | content.writeTo(pipedOutputStream); 117 | return STATE.CONTINUE; 118 | } 119 | 120 | @Override 121 | public STATE onStatusReceived(final HttpResponseStatus status) throws Exception { 122 | builder.accumulate(status); 123 | return STATE.CONTINUE; 124 | } 125 | 126 | @Override 127 | public STATE onHeadersReceived(final HttpResponseHeaders headers) throws Exception { 128 | builder.accumulate(headers); 129 | return STATE.CONTINUE; 130 | } 131 | 132 | @Override 133 | public Response onCompleted() throws Exception { 134 | 135 | LOGGER.info("On complete called!"); 136 | 137 | pipedOutputStream.flush(); 138 | pipedOutputStream.close(); 139 | 140 | return builder.build(); 141 | 142 | } 143 | 144 | @Override 145 | public void onThrowable(Throwable arg0) { 146 | // TODO Auto-generated method stub 147 | LOGGER.error("Error: {}", arg0); 148 | onTestFailed(); 149 | } 150 | 151 | }; 152 | 153 | Future readingThreadFuture = Executors.newCachedThreadPool().submit(new Callable() { 154 | 155 | @Override 156 | public Void call() throws Exception { 157 | BufferedReader reader = new BufferedReader(new InputStreamReader(pipedInputStream)); 158 | 159 | String readPrediction; 160 | 161 | int numPredictionsRead = 0; 162 | 163 | while ((readPrediction = reader.readLine()) != null) { 164 | //LOGGER.info("Got prediction: {}", readPrediction); 165 | numPredictionsRead++; 166 | } 167 | 168 | LOGGER.info("Read a total of {} predictions", numPredictionsRead); 169 | Assert.assertEquals(roundsOfDataToSubmit * 272274, numPredictionsRead); 170 | 171 | return null; 172 | } 173 | }); 174 | 175 | Builder config = new AsyncHttpClientConfig.Builder(); 176 | 177 | config.setRequestTimeoutInMs(-1); //need to set this to -1, to indicate wait forever. setting to 0 actually means a 0 ms timeout! 178 | 179 | AsyncHttpClient client = new AsyncHttpClient(config.build()); 180 | 181 | client.executeRequest(request, asyncHandler).get(); 182 | 183 | readingThreadFuture.get(); //verify no exceptions occurred when reading predictions 184 | 185 | client.close(); 186 | 187 | Assert.assertFalse(getTestFailed()); 188 | } 189 | 190 | /* 191 | * Returns a body generator that places plain text examples into the request 192 | * body. 193 | */ 194 | private BodyGenerator getPlainTextInputStreamBodyGenerator() throws IOException { 195 | 196 | //the examples 197 | //final GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass().getClassLoader().getResourceAsStream("ner.train.gz")); 198 | 199 | PipedInputStream pipedInputStream = new PipedInputStream(); 200 | 201 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(pipedInputStream); 202 | 203 | Executors.newCachedThreadPool().submit(new Runnable() { 204 | 205 | @Override 206 | public void run() { 207 | 208 | try { 209 | 210 | for (int x = 0; x < roundsOfDataToSubmit; x++) { 211 | 212 | Iterable structuredExamplesIterable = TestUtils.getStructuredExamplesFromNerTrain(); 213 | 214 | for (StructuredExample structuredExample : structuredExamplesIterable) { 215 | pipedOutputStream.write((structuredExample.getVWStringRepresentation() + "\n").getBytes()); 216 | pipedOutputStream.flush(); 217 | } 218 | 219 | LOGGER.info("Submitted round {} of examples...", (x + 1)); 220 | } 221 | 222 | } 223 | catch (Exception e) { 224 | LOGGER.error("Error in submitting examples to piped output stream!", e); 225 | onTestFailed(); 226 | } 227 | finally { 228 | try { 229 | pipedOutputStream.close(); 230 | } 231 | catch (IOException e) { 232 | LOGGER.error("Failed to close piped outputstream!", e); 233 | onTestFailed(); 234 | } 235 | } 236 | 237 | } 238 | }); 239 | 240 | return new InputStreamBodyGenerator(pipedInputStream); 241 | } 242 | 243 | /* 244 | * Returns a body generator that places JSON formatted examples into the 245 | * request body. 246 | */ 247 | private BodyGenerator getJsonInputStreamBodyGenerator() throws IOException { 248 | 249 | //the examples 250 | //final GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass().getClassLoader().getResourceAsStream("ner.train.gz")); 251 | 252 | PipedInputStream pipedInputStream = new PipedInputStream(); 253 | 254 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(pipedInputStream); 255 | 256 | Executors.newCachedThreadPool().submit(new Runnable() { 257 | 258 | @Override 259 | public void run() { 260 | 261 | OutputStreamWriter outputStreamWriter = new OutputStreamWriter(pipedOutputStream); 262 | JsonWriter jsonWriter = new JsonWriter(outputStreamWriter); 263 | 264 | try { 265 | 266 | int submitRound = 0; 267 | 268 | jsonWriter.beginArray(); 269 | 270 | for (int x = 0; x < roundsOfDataToSubmit; x++) { 271 | 272 | Iterable structuredExamplesIterable = TestUtils.getStructuredExamplesFromNerTrain(); 273 | 274 | for (StructuredExample structuredExample : structuredExamplesIterable) { 275 | JsonTestUtils.writeExample(jsonWriter, structuredExample); 276 | outputStreamWriter.flush(); 277 | pipedOutputStream.flush(); 278 | } 279 | 280 | LOGGER.info("Submitted round {} of examples...", ++submitRound); 281 | } 282 | 283 | jsonWriter.endArray(); 284 | 285 | } 286 | catch (Exception e) { 287 | LOGGER.error("Error in submitting examples to piped output stream!", e); 288 | onTestFailed(); 289 | } 290 | finally { 291 | try { 292 | jsonWriter.flush(); 293 | } 294 | catch (IOException e) { 295 | LOGGER.error("Error flushing json writer!", e); 296 | onTestFailed(); 297 | } 298 | 299 | try { 300 | jsonWriter.close(); 301 | } 302 | catch (IOException e) { 303 | LOGGER.error("Error closing json writer!", e); 304 | onTestFailed(); 305 | } 306 | 307 | try { 308 | pipedOutputStream.close(); 309 | } 310 | catch (IOException e) { 311 | LOGGER.error("Error closing piped outputstream!", e); 312 | onTestFailed(); 313 | } 314 | } 315 | 316 | } 317 | }); 318 | 319 | return new InputStreamBodyGenerator(pipedInputStream); 320 | } 321 | 322 | } 323 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/client/TestUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.client; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | import java.util.Iterator; 10 | import java.util.zip.GZIPInputStream; 11 | 12 | import junit.framework.Assert; 13 | 14 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample; 15 | import com.google.common.collect.AbstractIterator; 16 | 17 | /** 18 | * @author vrahimtoola 19 | * General utility code for tests. 20 | */ 21 | public class TestUtils { 22 | 23 | /* 24 | * Returns the examples from ner.train.gz as structured examples. 25 | */ 26 | public static Iterable getStructuredExamplesFromNerTrain() { 27 | 28 | return new Iterable() { 29 | 30 | @Override 31 | public Iterator iterator() { 32 | try { 33 | return getStructuredExampleIteratorFromNerTrain(); 34 | } 35 | catch (IOException e) { 36 | throw new RuntimeException(e); 37 | } 38 | } 39 | }; 40 | 41 | } 42 | 43 | private static Iterator getStructuredExampleIteratorFromNerTrain() throws IOException { 44 | 45 | final GZIPInputStream gzipInputStream = new GZIPInputStream(TestUtils.class.getClassLoader().getResourceAsStream("ner.train.gz")); 46 | final BufferedReader exampleReader = new BufferedReader(new InputStreamReader(gzipInputStream)); 47 | final StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder(); 48 | final StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder(); 49 | 50 | return new AbstractIterator() { 51 | 52 | @Override 53 | protected StructuredExample computeNext() { 54 | 55 | try { 56 | String readExample = exampleReader.readLine(); 57 | 58 | exampleBuilder.clear(); 59 | namespaceBuilder.clear(); 60 | 61 | if (readExample != null) { 62 | if (readExample.trim().length() == 0) { 63 | //just a line - empty example 64 | return StructuredExample.EMPTY_EXAMPLE; 65 | } 66 | else { 67 | //locate the " | " 68 | int indexOfSpacePipeSpace = readExample.indexOf(" | "); 69 | 70 | Assert.assertTrue(indexOfSpacePipeSpace > 0); 71 | 72 | String[] labelAndAllFeatures = readExample.split(" \\| "); 73 | 74 | Assert.assertEquals(2, labelAndAllFeatures.length); 75 | 76 | exampleBuilder.setLabel(labelAndAllFeatures[0]); 77 | 78 | String allFeaturesString = labelAndAllFeatures[1]; 79 | 80 | String[] individualFeatures = allFeaturesString.split(" "); 81 | 82 | for (String individualFeature : individualFeatures) { 83 | namespaceBuilder.addFeature(individualFeature); 84 | } 85 | 86 | exampleBuilder.addNamespace(namespaceBuilder.build()); 87 | 88 | return exampleBuilder.build(); 89 | } 90 | } 91 | else 92 | return endOfData(); 93 | } 94 | catch (Exception e) { 95 | throw new RuntimeException(e); 96 | } 97 | 98 | } 99 | }; 100 | 101 | } 102 | } 103 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/AsyncFailFastTCPIPExampleProcessorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip; 5 | 6 | import static org.mockito.Mockito.mock; 7 | import static org.mockito.Mockito.times; 8 | import static org.mockito.Mockito.verify; 9 | import static org.mockito.Mockito.when; 10 | 11 | import java.io.BufferedReader; 12 | import java.io.ByteArrayInputStream; 13 | import java.io.ByteArrayOutputStream; 14 | import java.io.IOException; 15 | import java.io.InputStream; 16 | import java.io.StringReader; 17 | import java.net.Socket; 18 | import java.net.UnknownHostException; 19 | import java.util.ArrayList; 20 | import java.util.Iterator; 21 | import java.util.List; 22 | import java.util.concurrent.CountDownLatch; 23 | import java.util.concurrent.Executors; 24 | import java.util.concurrent.TimeUnit; 25 | 26 | import org.junit.Assert; 27 | import org.junit.Before; 28 | import org.junit.Test; 29 | import org.mockito.invocation.InvocationOnMock; 30 | import org.mockito.stubbing.Answer; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import com.eharmony.matching.vw.webservice.common.example.Example; 35 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException; 36 | import com.eharmony.matching.vw.webservice.common.example.StringExample; 37 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction; 38 | import com.eharmony.matching.vw.webservice.core.ExampleReadException; 39 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler; 40 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager; 41 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionException; 42 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionState; 43 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchException; 44 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchState; 45 | 46 | /** 47 | * @author vrahimtoola 48 | * 49 | * Tests the AsyncFailFastTCPIPExampleProcessor. 50 | */ 51 | public class AsyncFailFastTCPIPExampleProcessorTest implements ExampleProcessingEventHandler { 52 | 53 | private static final Logger LOGGER = LoggerFactory.getLogger(AsyncFailFastTCPIPExampleProcessorTest.class); 54 | 55 | /* 56 | * These variables could get written to by the example submitting thread, so 57 | * we need to make them volatile. 58 | */ 59 | private volatile boolean exampleReadExceptionThrown, exampleFormatExceptionThrown, exampleSubmissionExceptionThrown, exampleSubmissionCompleteCalled; 60 | private volatile boolean predictionFetchExceptionThrown, predictionFetchCompleteCalled; 61 | 62 | private ExampleSubmissionState expectedStateOnExampleSubmissionComplete; 63 | private PredictionFetchState expectedStateOnPredictionFetchComplete; 64 | private long expectedNumberOfSkippedExamples, expectedNumberOfSubmittedExamples; 65 | 66 | private CountDownLatch countDownLatch; 67 | 68 | /** 69 | * @throws java.lang.Exception 70 | */ 71 | @Before 72 | public void setUp() throws Exception { 73 | 74 | exampleReadExceptionThrown = false; 75 | exampleFormatExceptionThrown = false; 76 | exampleSubmissionExceptionThrown = false; 77 | exampleSubmissionCompleteCalled = false; 78 | 79 | predictionFetchCompleteCalled = false; 80 | predictionFetchExceptionThrown = false; 81 | 82 | expectedNumberOfSkippedExamples = -1; 83 | expectedNumberOfSubmittedExamples = -1; 84 | 85 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete; 86 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete; 87 | 88 | countDownLatch = new CountDownLatch(2); 89 | 90 | } 91 | 92 | /* 93 | * Just a simple test to verify that examples can be submitted and read as 94 | * expected. 95 | */ 96 | @Test(timeout = 10000) 97 | public void simpleTest() throws IOException, ExampleSubmissionException, InterruptedException { 98 | 99 | Iterable examples = getExamples("One", "Two", "Three"); 100 | 101 | InputStream predictionInputStream = getPredictionInputStream("1", "2", "3"); 102 | 103 | Socket socket = mock(Socket.class); 104 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 105 | when(socket.getOutputStream()).thenReturn(outputStream); 106 | when(socket.getInputStream()).thenReturn(predictionInputStream); 107 | 108 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class); 109 | when(socketFactory.getSocket()).thenReturn(socket); 110 | 111 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples); 112 | 113 | expectedNumberOfSkippedExamples = 0; 114 | expectedNumberOfSubmittedExamples = 3; 115 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete; 116 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete; 117 | 118 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable(); 119 | 120 | int x = 0; 121 | 122 | for (Prediction p : predictions) { 123 | 124 | switch (x++) { 125 | 126 | case 0: 127 | Assert.assertEquals("1", p.getVWStringRepresentation()); 128 | break; 129 | 130 | case 1: 131 | Assert.assertEquals("2", p.getVWStringRepresentation()); 132 | break; 133 | 134 | case 2: 135 | Assert.assertEquals("3", p.getVWStringRepresentation()); 136 | break; 137 | 138 | default: 139 | Assert.fail("Too many predictions!"); 140 | } 141 | } 142 | 143 | Assert.assertEquals(3, x); 144 | 145 | boolean succeeded = countDownLatch.await(9, TimeUnit.SECONDS); //wait till the example thread is done as well. 146 | 147 | Assert.assertTrue("Waited for longer than 9 seconds!!", succeeded); 148 | 149 | //check that all examples got there 150 | BufferedReader bReader = new BufferedReader(new StringReader(new String(outputStream.toByteArray()))); 151 | 152 | x = 0; 153 | String line = null; 154 | while ((line = bReader.readLine()) != null) { 155 | 156 | switch (x++) { 157 | 158 | case 0: 159 | Assert.assertEquals("One", line); 160 | break; 161 | 162 | case 1: 163 | Assert.assertEquals("Two", line); 164 | break; 165 | 166 | case 2: 167 | Assert.assertEquals("Three", line); 168 | break; 169 | 170 | default: 171 | Assert.fail("Too many examples!"); 172 | } 173 | 174 | } 175 | 176 | Assert.assertEquals(3, x); 177 | 178 | verify(socketFactory, times(1)).getSocket(); 179 | verify(socket, times(1)).getInputStream(); 180 | verify(socket, times(1)).getOutputStream(); 181 | verify(socket, times(1)).shutdownOutput(); 182 | verify(socket, times(1)).close(); 183 | 184 | //no exceptions should have been thrown 185 | Assert.assertFalse(exampleReadExceptionThrown); 186 | Assert.assertFalse(exampleFormatExceptionThrown); 187 | Assert.assertFalse(exampleSubmissionExceptionThrown); 188 | Assert.assertFalse(predictionFetchExceptionThrown); 189 | 190 | //the completion call backs should have been fired 191 | Assert.assertTrue(exampleSubmissionCompleteCalled); 192 | Assert.assertTrue(predictionFetchCompleteCalled); 193 | 194 | } 195 | 196 | /* 197 | * Tests that an ExampleSubmissionException is thrown when the socket cannot 198 | * be retrieved from the socket factory. 199 | */ 200 | @Test(expected = ExampleSubmissionException.class) 201 | public void throwsExampleSubmissionException() throws IOException, ExampleSubmissionException { 202 | 203 | Iterable examples = getExamples("One", "Two", "Three"); 204 | 205 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class); 206 | when(socketFactory.getSocket()).thenThrow(UnknownHostException.class); 207 | 208 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples); 209 | 210 | toTest.submitExamples(this); 211 | 212 | } 213 | 214 | /* 215 | * Tests that an ExampleReadException is handled as expected. 216 | */ 217 | @Test(timeout = 5000) 218 | public void handlesExampleReadException() throws IOException, ExampleSubmissionException, InterruptedException { 219 | 220 | Iterator iterator = mock(Iterator.class); 221 | when(iterator.hasNext()).thenReturn(true); 222 | when(iterator.next()).thenThrow(ExampleReadException.class); 223 | 224 | Iterable examples = mock(Iterable.class); 225 | when(examples.iterator()).thenReturn(iterator); 226 | 227 | InputStream predictionInputStream = getPredictionInputStream("1", "2", "3"); 228 | 229 | Socket socket = mock(Socket.class); 230 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 231 | when(socket.getOutputStream()).thenReturn(outputStream); 232 | when(socket.getInputStream()).thenReturn(predictionInputStream); 233 | 234 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class); 235 | when(socketFactory.getSocket()).thenReturn(socket); 236 | 237 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples); 238 | 239 | expectedNumberOfSkippedExamples = 0; 240 | expectedNumberOfSubmittedExamples = 0; 241 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.ExampleReadFault; 242 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete; 243 | 244 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable(); 245 | 246 | int x = 0; 247 | 248 | for (Prediction p : predictions) { 249 | 250 | switch (x++) { 251 | 252 | case 0: 253 | Assert.assertEquals("1", p.getVWStringRepresentation()); 254 | break; 255 | 256 | case 1: 257 | Assert.assertEquals("2", p.getVWStringRepresentation()); 258 | break; 259 | 260 | case 2: 261 | Assert.assertEquals("3", p.getVWStringRepresentation()); 262 | break; 263 | 264 | default: 265 | Assert.fail("Too many predictions!"); 266 | } 267 | } 268 | 269 | Assert.assertEquals(3, x); 270 | 271 | countDownLatch.await(); //wait till example submission and prediction fetch are both done. 272 | 273 | verify(socketFactory, times(1)).getSocket(); 274 | verify(socket, times(1)).getInputStream(); 275 | verify(socket, times(1)).getOutputStream(); 276 | verify(socket, times(1)).shutdownOutput(); 277 | verify(socket, times(1)).close(); 278 | 279 | Assert.assertTrue(exampleReadExceptionThrown); 280 | Assert.assertFalse(exampleFormatExceptionThrown); 281 | Assert.assertFalse(exampleSubmissionExceptionThrown); 282 | Assert.assertFalse(predictionFetchExceptionThrown); 283 | 284 | //the completion call backs should have been fired 285 | Assert.assertTrue(exampleSubmissionCompleteCalled); 286 | Assert.assertTrue(predictionFetchCompleteCalled); 287 | 288 | } 289 | 290 | /* 291 | * Tests that example format exceptions are handled as expected. 292 | */ 293 | @Test(timeout = 5000) 294 | public void handlesExampleFormatException() throws IOException, ExampleSubmissionException, InterruptedException { 295 | 296 | StringExample errorExample = mock(StringExample.class); 297 | when(errorExample.getVWStringRepresentation()).thenThrow(ExampleFormatException.class); 298 | 299 | Iterator iterator = mock(Iterator.class); 300 | when(iterator.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false); 301 | when(iterator.next()).thenReturn(new StringExample("One")).thenReturn(errorExample).thenReturn(new StringExample("Two")); 302 | 303 | Iterable examples = mock(Iterable.class); 304 | when(examples.iterator()).thenReturn(iterator); 305 | 306 | InputStream predictionInputStream = getPredictionInputStream("1", "2", "3"); 307 | 308 | Socket socket = mock(Socket.class); 309 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 310 | when(socket.getOutputStream()).thenReturn(outputStream); 311 | when(socket.getInputStream()).thenReturn(predictionInputStream); 312 | 313 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class); 314 | when(socketFactory.getSocket()).thenReturn(socket); 315 | 316 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples); 317 | 318 | expectedNumberOfSkippedExamples = 1; 319 | expectedNumberOfSubmittedExamples = 2; 320 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete; 321 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete; 322 | 323 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable(); 324 | 325 | int x = 0; 326 | 327 | for (Prediction p : predictions) { 328 | 329 | switch (x++) { 330 | 331 | case 0: 332 | Assert.assertEquals("1", p.getVWStringRepresentation()); 333 | break; 334 | 335 | case 1: 336 | Assert.assertEquals("2", p.getVWStringRepresentation()); 337 | break; 338 | 339 | case 2: 340 | Assert.assertEquals("3", p.getVWStringRepresentation()); 341 | break; 342 | 343 | default: 344 | Assert.fail("Too many predictions!"); 345 | } 346 | } 347 | 348 | Assert.assertEquals(3, x); 349 | 350 | countDownLatch.await(); //wait till example submission and prediction fetch are both done. 351 | 352 | //check that all examples got there 353 | BufferedReader bReader = new BufferedReader(new StringReader(new String(outputStream.toByteArray()))); 354 | 355 | x = 0; 356 | String line = null; 357 | while ((line = bReader.readLine()) != null) { 358 | 359 | switch (x++) { 360 | 361 | case 0: 362 | Assert.assertEquals("One", line); 363 | break; 364 | 365 | case 1: 366 | Assert.assertEquals("Two", line); 367 | break; 368 | 369 | default: 370 | Assert.fail("Too many examples!"); 371 | } 372 | 373 | } 374 | 375 | Assert.assertEquals(2, x); 376 | 377 | verify(socketFactory, times(1)).getSocket(); 378 | verify(socket, times(1)).getInputStream(); 379 | verify(socket, times(1)).getOutputStream(); 380 | verify(socket, times(1)).shutdownOutput(); 381 | verify(socket, times(1)).close(); 382 | 383 | Assert.assertFalse(exampleReadExceptionThrown); 384 | Assert.assertTrue(exampleFormatExceptionThrown); 385 | Assert.assertFalse(exampleSubmissionExceptionThrown); 386 | Assert.assertFalse(predictionFetchExceptionThrown); 387 | 388 | //the completion call backs should have been fired 389 | Assert.assertTrue(exampleSubmissionCompleteCalled); 390 | Assert.assertTrue(predictionFetchCompleteCalled); 391 | 392 | } 393 | 394 | /* 395 | * Tests that a prediction fetch exception is handled correctly. 396 | */ 397 | @Test(timeout = 5000) 398 | public void handlePredictionFetchException() throws IOException, ExampleSubmissionException, InterruptedException { 399 | 400 | Iterable examples = getExamples("One", "Two"); 401 | 402 | Socket socket = mock(Socket.class); 403 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); 404 | when(socket.getOutputStream()).thenReturn(outputStream); 405 | 406 | InputStream inputStream = mock(InputStream.class, new Answer() { 407 | 408 | @Override 409 | public Object answer(InvocationOnMock invocation) throws Throwable { 410 | throw new IOException(); 411 | } 412 | 413 | }); 414 | 415 | when(socket.getInputStream()).thenReturn(inputStream); 416 | 417 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class); 418 | when(socketFactory.getSocket()).thenReturn(socket); 419 | 420 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples); 421 | 422 | expectedNumberOfSkippedExamples = 0; 423 | expectedNumberOfSubmittedExamples = 2; 424 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete; 425 | expectedStateOnPredictionFetchComplete = PredictionFetchState.PredictionFetchFault; 426 | 427 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable(); 428 | 429 | int x = 0; 430 | 431 | for (Prediction p : predictions) { 432 | 433 | x++; 434 | } 435 | 436 | Assert.assertEquals(0, x); 437 | 438 | countDownLatch.await(); //wait till example submission and prediction fetch are both done. 439 | 440 | //check that all examples got there 441 | BufferedReader bReader = new BufferedReader(new StringReader(new String(outputStream.toByteArray()))); 442 | 443 | x = 0; 444 | String line = null; 445 | while ((line = bReader.readLine()) != null) { 446 | 447 | switch (x++) { 448 | 449 | case 0: 450 | Assert.assertEquals("One", line); 451 | break; 452 | 453 | case 1: 454 | Assert.assertEquals("Two", line); 455 | break; 456 | 457 | default: 458 | Assert.fail("Too many examples!"); 459 | } 460 | 461 | } 462 | 463 | Assert.assertEquals(2, x); 464 | 465 | verify(socketFactory, times(1)).getSocket(); 466 | verify(socket, times(1)).getInputStream(); 467 | verify(socket, times(1)).getOutputStream(); 468 | verify(socket, times(1)).shutdownOutput(); 469 | verify(socket, times(1)).close(); 470 | 471 | Assert.assertFalse(exampleReadExceptionThrown); 472 | Assert.assertFalse(exampleFormatExceptionThrown); 473 | Assert.assertFalse(exampleSubmissionExceptionThrown); 474 | Assert.assertTrue(predictionFetchExceptionThrown); 475 | 476 | //the completion call backs should have been fired 477 | Assert.assertTrue(exampleSubmissionCompleteCalled); 478 | Assert.assertTrue(predictionFetchCompleteCalled); 479 | 480 | } 481 | 482 | private Iterable getExamples(String... examples) { 483 | 484 | List toReturn = new ArrayList(); 485 | 486 | for (String s : examples) { 487 | 488 | toReturn.add(new StringExample(s)); 489 | } 490 | 491 | return toReturn; 492 | } 493 | 494 | private InputStream getPredictionInputStream(String... predictions) { 495 | 496 | String newLine = System.getProperty("line.separator"); 497 | 498 | StringBuilder sbr = new StringBuilder(); 499 | 500 | for (String s : predictions) { 501 | sbr.append(s); 502 | sbr.append(newLine); 503 | } 504 | 505 | return new ByteArrayInputStream(sbr.toString().getBytes()); 506 | } 507 | 508 | @Override 509 | public void onExampleReadException(ExampleProcessingManager exampleProcessingManager, ExampleReadException theException) { 510 | 511 | Assert.assertTrue(exampleProcessingManager.getExampleSubmissionState() == ExampleSubmissionState.ExampleReadFault); 512 | 513 | exampleReadExceptionThrown = true; 514 | } 515 | 516 | @Override 517 | public void onExampleFormatException(ExampleProcessingManager exampleProcessingManager, ExampleFormatException theException) { 518 | 519 | //the async tcp ip example processor carries on when it encounters an example format exception, 520 | //and doesn't consider it to be a fault. 521 | Assert.assertTrue(exampleProcessingManager.getExampleSubmissionState() == ExampleSubmissionState.OnGoing); 522 | 523 | exampleFormatExceptionThrown = true; 524 | 525 | } 526 | 527 | @Override 528 | public void onExampleSubmissionException(ExampleProcessingManager exampleProcessingManager, ExampleSubmissionException theException) { 529 | 530 | Assert.assertTrue(exampleProcessingManager.getExampleSubmissionState() == ExampleSubmissionState.ExampleSubmissionFault); 531 | 532 | exampleSubmissionExceptionThrown = true; 533 | } 534 | 535 | @Override 536 | public void onExampleSubmissionComplete(ExampleProcessingManager exampleProcessingManager) { 537 | 538 | LOGGER.info("Example submission complete called!"); 539 | 540 | Assert.assertTrue(exampleProcessingManager.getExampleSubmissionState() == expectedStateOnExampleSubmissionComplete); 541 | Assert.assertEquals(exampleProcessingManager.getTotalNumberOfExamplesSkipped(), expectedNumberOfSkippedExamples); 542 | Assert.assertEquals(exampleProcessingManager.getTotalNumberOfExamplesSubmitted(), expectedNumberOfSubmittedExamples); 543 | 544 | exampleSubmissionCompleteCalled = true; 545 | 546 | countDownLatch.countDown(); 547 | 548 | } 549 | 550 | @Override 551 | public void onPredictionFetchException(ExampleProcessingManager exampleProcessingManager, PredictionFetchException theException) { 552 | 553 | Assert.assertTrue(exampleProcessingManager.getPredictionFetchState() == PredictionFetchState.PredictionFetchFault); 554 | 555 | predictionFetchExceptionThrown = true; 556 | } 557 | 558 | @Override 559 | public void onPredictionFetchComplete(ExampleProcessingManager exampleProcessingManager) { 560 | 561 | LOGGER.info("Prediction fetch complete called!"); 562 | 563 | Assert.assertTrue(exampleProcessingManager.getPredictionFetchState() == expectedStateOnPredictionFetchComplete); 564 | 565 | predictionFetchCompleteCalled = true; 566 | 567 | countDownLatch.countDown(); 568 | 569 | } 570 | 571 | } 572 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/GsonJsonExamplesProviderTest.java: -------------------------------------------------------------------------------- 1 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStreamReader; 6 | import java.io.OutputStreamWriter; 7 | import java.io.PipedInputStream; 8 | import java.io.PipedOutputStream; 9 | import java.util.Iterator; 10 | import java.util.concurrent.Callable; 11 | import java.util.concurrent.CountDownLatch; 12 | import java.util.concurrent.Exchanger; 13 | import java.util.concurrent.ExecutionException; 14 | import java.util.concurrent.Executors; 15 | import java.util.concurrent.Future; 16 | import java.util.concurrent.TimeUnit; 17 | import java.util.concurrent.TimeoutException; 18 | import java.util.zip.GZIPInputStream; 19 | 20 | import junit.framework.Assert; 21 | 22 | import org.junit.Test; 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | import com.eharmony.matching.vw.webservice.common.example.Example; 27 | import com.google.common.base.Charsets; 28 | import com.google.gson.stream.JsonWriter; 29 | 30 | public class GsonJsonExamplesProviderTest { 31 | 32 | private static final Logger LOGGER = LoggerFactory.getLogger(GsonJsonExamplesProviderTest.class); 33 | 34 | /* 35 | * Tests that a whole bunch of examples can be written and read in JSON 36 | * format. 37 | */ 38 | @Test 39 | public void test() throws IOException, InterruptedException, ExecutionException, TimeoutException { 40 | 41 | //the examples 42 | GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass().getClassLoader().getResourceAsStream("ner.train.gz")); 43 | final BufferedReader testReader = new BufferedReader(new InputStreamReader(gzipInputStream, Charsets.UTF_8)); 44 | 45 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(); 46 | PipedInputStream pipedInputStream = new PipedInputStream(pipedOutputStream); 47 | 48 | GsonJsonExamplesProvider toTest = new GsonJsonExamplesProvider(); 49 | 50 | final CountDownLatch startWritingExamplesLatch = new CountDownLatch(1); 51 | 52 | final Exchanger exampleExchanger = new Exchanger(); 53 | 54 | final JsonWriter jsonWriter = new JsonWriter(new OutputStreamWriter(pipedOutputStream)); 55 | 56 | Future successFuture = Executors.newCachedThreadPool().submit(new Callable() { 57 | 58 | @Override 59 | public Void call() throws Exception { 60 | 61 | startWritingExamplesLatch.await(); 62 | 63 | String readExample = null; 64 | 65 | jsonWriter.beginArray(); 66 | 67 | while ((readExample = testReader.readLine()) != null) { 68 | 69 | jsonWriter.beginObject(); 70 | jsonWriter.name("example"); 71 | jsonWriter.value(readExample); 72 | jsonWriter.endObject(); 73 | 74 | jsonWriter.flush(); 75 | pipedOutputStream.flush(); //need to flush right after writing, otherwise the reading thread won't get it 76 | 77 | exampleExchanger.exchange(readExample, 1, TimeUnit.SECONDS); //don't care about what's being given to us in exchange 78 | } 79 | 80 | jsonWriter.endArray(); 81 | jsonWriter.flush(); 82 | jsonWriter.close(); 83 | pipedOutputStream.flush(); 84 | pipedOutputStream.close(); 85 | 86 | return null; 87 | } 88 | 89 | }); 90 | 91 | Iterator examplesIterable = toTest.getExamplesFromStream(pipedInputStream); 92 | 93 | startWritingExamplesLatch.countDown(); //tell the example writing thread to start writing examples 94 | 95 | long numExamplesProcessed = 0; 96 | 97 | while (examplesIterable.hasNext()) { //start reading the examples 98 | 99 | Example theExample = examplesIterable.next(); 100 | 101 | Assert.assertEquals(exampleExchanger.exchange("", 1, TimeUnit.SECONDS), theExample.getVWStringRepresentation()); 102 | 103 | if (numExamplesProcessed % 20000 == 0) LOGGER.debug("Read example: {}", theExample.getVWStringRepresentation()); 104 | 105 | numExamplesProcessed++; 106 | } 107 | 108 | successFuture.get(); //verify no exceptions were thrown. 109 | 110 | Assert.assertEquals(272274, numExamplesProcessed); //verify all examples in ner.train.gz were processed. 111 | 112 | } 113 | 114 | } 115 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/JsonTestUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.IOException; 7 | 8 | import org.apache.commons.lang3.StringUtils; 9 | 10 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample; 11 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.Namespace; 12 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.Namespace.Feature; 13 | import com.google.gson.stream.JsonWriter; 14 | 15 | /** 16 | * @author vrahimtoola 17 | * Writes out a StructuredExample to JSON format. 18 | */ 19 | public class JsonTestUtils { 20 | 21 | public static void writeExample(JsonWriter jsonWriter, StructuredExample structuredExample) throws IOException { 22 | 23 | jsonWriter.beginObject(); 24 | 25 | String label = structuredExample.getLabel(); 26 | 27 | //always write the label out, this is how a pipe example is distinguished from an empty example. 28 | jsonWriter.name(StructuredJsonPropertyNames.EXAMPLE_LABEL_PROPERTY); 29 | 30 | if (StringUtils.isBlank(label)) { 31 | jsonWriter.nullValue(); 32 | } 33 | else { 34 | jsonWriter.value(label); 35 | } 36 | 37 | //for the tag and namespaces properties, only write them if they're non-null 38 | String tag = structuredExample.getTag(); 39 | 40 | if (StringUtils.isBlank(tag) == false) jsonWriter.name(StructuredJsonPropertyNames.EXAMPLE_TAG_PROPERTY).value(tag); 41 | 42 | Iterable namespaces = structuredExample.getNamespaces(); 43 | 44 | if (namespaces != null) { 45 | 46 | jsonWriter.name(StructuredJsonPropertyNames.EXAMPLE_NAMESPACES_PROPERTY); 47 | 48 | jsonWriter.beginArray(); 49 | 50 | for (Namespace ns : namespaces) { 51 | writeNamespace(ns, jsonWriter); 52 | } 53 | 54 | jsonWriter.endArray(); 55 | 56 | } 57 | 58 | jsonWriter.endObject(); //for the empty example, just write the "{}". 59 | 60 | } 61 | 62 | private static void writeNamespace(Namespace namespace, JsonWriter jsonWriter) throws IOException { 63 | jsonWriter.beginObject(); 64 | 65 | String name = namespace.getName(); 66 | Float scale = namespace.getScalingFactor(); 67 | 68 | if (StringUtils.isBlank(name) == false) { 69 | jsonWriter.name(StructuredJsonPropertyNames.NAMESPACE_NAME_PROPERTY).value(name); 70 | } 71 | 72 | if (scale != null) { 73 | jsonWriter.name(StructuredJsonPropertyNames.NAMESPACE_SCALING_FACTOR_PROPERTY).value(scale); 74 | } 75 | 76 | Iterable features = namespace.getFeatures(); 77 | 78 | if (features != null) { 79 | jsonWriter.name(StructuredJsonPropertyNames.NAMESPACE_FEATURES_PROPERTY); 80 | 81 | jsonWriter.beginArray(); 82 | 83 | for (Feature feature : features) { 84 | writeFeature(feature, jsonWriter); 85 | } 86 | 87 | jsonWriter.endArray(); 88 | } 89 | 90 | jsonWriter.endObject(); 91 | } 92 | 93 | private static void writeFeature(Feature feature, JsonWriter jsonWriter) throws IOException { 94 | jsonWriter.beginObject(); 95 | 96 | String name = feature.getName(); 97 | Float value = feature.getValue(); 98 | 99 | if (StringUtils.isBlank(name) == false) { 100 | jsonWriter.name(StructuredJsonPropertyNames.FEATURE_NAME_PROPERTY).value(name); 101 | } 102 | 103 | if (value != null) { 104 | jsonWriter.name(StructuredJsonPropertyNames.FEATURE_VALUE_PROPERTY).value(value); 105 | } 106 | 107 | jsonWriter.endObject(); 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/SimpleJsonExamplesMessageBodyReaderTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.IOException; 8 | import java.io.InputStreamReader; 9 | import java.io.OutputStreamWriter; 10 | import java.io.PipedInputStream; 11 | import java.io.PipedOutputStream; 12 | import java.util.concurrent.Callable; 13 | import java.util.concurrent.CountDownLatch; 14 | import java.util.concurrent.Exchanger; 15 | import java.util.concurrent.ExecutionException; 16 | import java.util.concurrent.Executors; 17 | import java.util.concurrent.Future; 18 | import java.util.concurrent.TimeUnit; 19 | import java.util.concurrent.TimeoutException; 20 | import java.util.zip.GZIPInputStream; 21 | 22 | import javax.ws.rs.core.MediaType; 23 | 24 | import junit.framework.Assert; 25 | 26 | import org.junit.Before; 27 | import org.junit.Test; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | import com.eharmony.matching.vw.webservice.common.example.Example; 32 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 33 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 34 | import com.google.common.base.Charsets; 35 | import com.google.gson.stream.JsonWriter; 36 | 37 | /** 38 | * @author vrahimtoola 39 | * 40 | * Tests the SimpleJsonExamplesMessageBodyReader. 41 | */ 42 | public class SimpleJsonExamplesMessageBodyReaderTest { 43 | 44 | private static final Logger LOGGER = LoggerFactory.getLogger(SimpleJsonExamplesMessageBodyReaderTest.class); 45 | 46 | private SimpleJsonExamplesMessageBodyReader toTest; 47 | 48 | @Before 49 | public void setUp() { 50 | toTest = new SimpleJsonExamplesMessageBodyReader(); 51 | } 52 | 53 | /* 54 | * Tests that the readFrom method works as expected. 55 | */ 56 | @Test 57 | public void isReadableTest() { 58 | 59 | Assert.assertTrue(toTest.isReadable(ExamplesIterable.class, null, null, new MediaType() { 60 | @Override 61 | public String toString() { 62 | return ExampleMediaTypes.SIMPLE_JSON_0_1_0; 63 | } 64 | })); 65 | 66 | Assert.assertFalse(toTest.isReadable(ExamplesIterable.class, null, null, MediaType.TEXT_PLAIN_TYPE)); 67 | } 68 | 69 | /* 70 | * Tests that the readFrom method works as expected. 71 | */ 72 | @Test 73 | public void readFromTest() throws IOException, InterruptedException, TimeoutException, ExecutionException { 74 | //the examples 75 | GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass().getClassLoader().getResourceAsStream("ner.train.gz")); 76 | final BufferedReader testReader = new BufferedReader(new InputStreamReader(gzipInputStream, Charsets.UTF_8)); 77 | 78 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(); 79 | PipedInputStream pipedInputStream = new PipedInputStream(pipedOutputStream); 80 | 81 | final CountDownLatch startWritingExamplesLatch = new CountDownLatch(1); 82 | 83 | final Exchanger exampleExchanger = new Exchanger(); 84 | 85 | final JsonWriter jsonWriter = new JsonWriter(new OutputStreamWriter(pipedOutputStream)); 86 | 87 | Future successFuture = Executors.newCachedThreadPool().submit(new Callable() { 88 | 89 | @Override 90 | public Void call() throws Exception { 91 | 92 | startWritingExamplesLatch.await(); 93 | 94 | String readExample = null; 95 | 96 | jsonWriter.beginArray(); 97 | 98 | while ((readExample = testReader.readLine()) != null) { 99 | jsonWriter.beginObject(); 100 | jsonWriter.name("example"); 101 | jsonWriter.value(readExample); 102 | jsonWriter.endObject(); 103 | 104 | jsonWriter.flush(); 105 | pipedOutputStream.flush(); //need to flush right after writing, otherwise the reading thread won't get it 106 | 107 | exampleExchanger.exchange(readExample, 1, TimeUnit.SECONDS); //don't care about what's being given to us in exchange 108 | } 109 | 110 | jsonWriter.endArray(); 111 | jsonWriter.flush(); 112 | jsonWriter.close(); 113 | pipedOutputStream.flush(); 114 | pipedOutputStream.close(); 115 | 116 | return null; 117 | } 118 | 119 | }); 120 | 121 | ExamplesIterable examplesIterable = toTest.readFrom(ExamplesIterable.class, null, null, null, null, pipedInputStream); 122 | 123 | startWritingExamplesLatch.countDown(); //tell the example writing thread to start writing examples 124 | 125 | long numExamplesProcessed = 0; 126 | 127 | for (Example example : examplesIterable) { //start reading the examples 128 | 129 | Assert.assertEquals(exampleExchanger.exchange("", 1, TimeUnit.SECONDS), example.getVWStringRepresentation()); 130 | 131 | if (numExamplesProcessed % 20000 == 0) LOGGER.debug("Read example: {}", example.getVWStringRepresentation()); 132 | 133 | numExamplesProcessed++; 134 | } 135 | 136 | successFuture.get(); //verify no exceptions were thrown. 137 | 138 | Assert.assertEquals(272274, numExamplesProcessed); //verify all examples in ner.train.gz were processed. 139 | } 140 | 141 | } 142 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonExamplesMessageBodyReaderTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader; 5 | 6 | import java.io.IOException; 7 | import java.io.OutputStreamWriter; 8 | import java.io.PipedInputStream; 9 | import java.io.PipedOutputStream; 10 | import java.io.StringWriter; 11 | import java.util.concurrent.Callable; 12 | import java.util.concurrent.CountDownLatch; 13 | import java.util.concurrent.Exchanger; 14 | import java.util.concurrent.ExecutionException; 15 | import java.util.concurrent.ExecutorService; 16 | import java.util.concurrent.Executors; 17 | import java.util.concurrent.Future; 18 | import java.util.concurrent.TimeUnit; 19 | import java.util.concurrent.TimeoutException; 20 | 21 | import javax.ws.rs.core.MediaType; 22 | 23 | import junit.framework.Assert; 24 | 25 | import org.junit.Before; 26 | import org.junit.Test; 27 | import org.slf4j.Logger; 28 | import org.slf4j.LoggerFactory; 29 | 30 | import com.eharmony.matching.vw.webservice.client.TestUtils; 31 | import com.eharmony.matching.vw.webservice.common.example.Example; 32 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes; 33 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample; 34 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable; 35 | import com.google.common.base.Charsets; 36 | import com.google.gson.stream.JsonWriter; 37 | 38 | /** 39 | * @author vrahimtoola 40 | * 41 | */ 42 | public class StructuredJsonExamplesMessageBodyReaderTest { 43 | 44 | private static final Logger LOGGER = LoggerFactory.getLogger(StructuredJsonExamplesMessageBodyReaderTest.class); 45 | 46 | private StructuredJsonExamplesMessageBodyReader toTest; 47 | 48 | @Before 49 | public void setUp() { 50 | toTest = new StructuredJsonExamplesMessageBodyReader(); 51 | } 52 | 53 | /* 54 | * Tests that the readFrom method works as expected. 55 | */ 56 | @Test 57 | public void isReadableTest() { 58 | 59 | Assert.assertTrue(toTest.isReadable(ExamplesIterable.class, null, null, new MediaType() { 60 | @Override 61 | public String toString() { 62 | return ExampleMediaTypes.STRUCTURED_JSON_0_1_0; 63 | } 64 | })); 65 | 66 | Assert.assertFalse(toTest.isReadable(ExamplesIterable.class, null, null, MediaType.TEXT_PLAIN_TYPE)); 67 | } 68 | 69 | @Test 70 | public void throwAwayTest() throws IOException { 71 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder(); 72 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder(); 73 | 74 | exampleBuilder.setLabel("34"); 75 | exampleBuilder.setTag("someTag"); 76 | 77 | namespaceBuilder.setName("one"); 78 | namespaceBuilder.addFeature("a", 12.34f); 79 | namespaceBuilder.addFeature("b", 45.1f); 80 | 81 | StructuredExample.Namespace firstNamespace = namespaceBuilder.build(); 82 | 83 | namespaceBuilder.clear(); 84 | 85 | namespaceBuilder.setName("two"); 86 | namespaceBuilder.setScalingFactor(34.3f); 87 | namespaceBuilder.addFeature("bah", 0.038293f); 88 | namespaceBuilder.addFeature("another", 3.4000f); 89 | namespaceBuilder.addFeature("andThis", 2.0f); 90 | 91 | StructuredExample.Namespace secondNamespace = namespaceBuilder.build(); 92 | 93 | exampleBuilder.addNamespace(firstNamespace); 94 | exampleBuilder.addNamespace(secondNamespace); 95 | 96 | StringWriter stringWriter = new StringWriter(); 97 | 98 | JsonWriter jsonWriter = new JsonWriter(stringWriter); 99 | 100 | JsonTestUtils.writeExample(jsonWriter, exampleBuilder.build()); 101 | 102 | jsonWriter.flush(); 103 | jsonWriter.close(); 104 | 105 | LOGGER.debug("The JSON is: {}", stringWriter.toString()); 106 | } 107 | 108 | /* 109 | * Tests that the readFrom method works as expected. 110 | */ 111 | @Test 112 | public void readFromTest() throws IOException, InterruptedException, TimeoutException, ExecutionException { 113 | 114 | final CountDownLatch readThreadIsReadyLatch = new CountDownLatch(1); 115 | final Exchanger exampleExchanger = new Exchanger(); 116 | 117 | final PipedInputStream pipedInputStream = new PipedInputStream(); //the reading thread will read from this stream 118 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(pipedInputStream); //the submission thread will write to this stream 119 | 120 | ExecutorService executorService = Executors.newCachedThreadPool(); 121 | 122 | //------- 123 | //this is the thread that will read the structured examples and compare 124 | //them to what was submitted by the submitting thread. 125 | Future readingThreadFuture = executorService.submit(new Callable() { 126 | 127 | @Override 128 | public Integer call() throws Exception { 129 | 130 | readThreadIsReadyLatch.countDown(); //signal to the writing thread that this thread is ready. 131 | 132 | Iterable readStructuredExamples = toTest.readFrom(ExamplesIterable.class, null, null, null, null, pipedInputStream); 133 | 134 | int numExamplesRead = 0; 135 | 136 | LOGGER.trace("Starting to read examples..."); 137 | 138 | for (Example readExample : readStructuredExamples) { 139 | //LOGGER.trace("Read example: {}", readExample.getVWStringRepresentation()); 140 | 141 | exampleExchanger.exchange(readExample); 142 | numExamplesRead++; 143 | } 144 | 145 | return Integer.valueOf(numExamplesRead); 146 | } 147 | }); 148 | 149 | readThreadIsReadyLatch.await(); 150 | 151 | LOGGER.trace("Writing examples..."); 152 | 153 | StructuredExample lastComputedExample = null; 154 | OutputStreamWriter outputStreamWriter = new OutputStreamWriter(pipedOutputStream, Charsets.UTF_8); 155 | 156 | JsonWriter jsonWriter = new JsonWriter(outputStreamWriter); 157 | 158 | jsonWriter.beginArray(); 159 | 160 | Iterable structuredExamplesIterable = TestUtils.getStructuredExamplesFromNerTrain(); 161 | 162 | for (StructuredExample example : structuredExamplesIterable) { 163 | 164 | if (lastComputedExample != null) { 165 | Assert.assertEquals(lastComputedExample.getVWStringRepresentation(), exampleExchanger.exchange(null, 2000, TimeUnit.MILLISECONDS).getVWStringRepresentation()); 166 | } 167 | 168 | if (example != StructuredExample.EMPTY_EXAMPLE) 169 | JsonTestUtils.writeExample(jsonWriter, example); 170 | else { 171 | jsonWriter.beginObject(); 172 | jsonWriter.endObject(); 173 | } 174 | 175 | jsonWriter.flush(); 176 | 177 | lastComputedExample = example; 178 | 179 | }//end for 180 | 181 | jsonWriter.endArray(); 182 | 183 | jsonWriter.flush(); 184 | 185 | jsonWriter.close(); 186 | 187 | LOGGER.trace("Verifying final example..."); 188 | 189 | //don't forget to verify the very last example! 190 | Assert.assertEquals(lastComputedExample.getVWStringRepresentation(), exampleExchanger.exchange(null, 2000, TimeUnit.MILLISECONDS).getVWStringRepresentation()); 191 | 192 | Assert.assertEquals(272274, readingThreadFuture.get().intValue()); //assert that no exceptions where thrown. 193 | 194 | } 195 | 196 | } 197 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/messagebodyreader/plaintextexamplesmessagebodyreader/PlainTextExamplesMessageBodyReaderTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.messagebodyreader.plaintextexamplesmessagebodyreader; 5 | 6 | import java.io.BufferedReader; 7 | import java.io.ByteArrayInputStream; 8 | import java.io.IOException; 9 | import java.io.InputStreamReader; 10 | import java.util.zip.GZIPInputStream; 11 | 12 | import javax.ws.rs.WebApplicationException; 13 | import javax.ws.rs.core.MediaType; 14 | 15 | import junit.framework.Assert; 16 | 17 | import org.glassfish.jersey.message.internal.ReaderWriter; 18 | import org.junit.Test; 19 | import org.slf4j.Logger; 20 | import org.slf4j.LoggerFactory; 21 | 22 | import com.eharmony.matching.vw.webservice.common.example.Example; 23 | 24 | /** 25 | * @author vrahimtoola Tests the PlainTextPredictionsMessageBodyReader. 26 | */ 27 | public class PlainTextExamplesMessageBodyReaderTest { 28 | 29 | private static final Logger LOGGER = LoggerFactory 30 | .getLogger(PlainTextExamplesMessageBodyReaderTest.class); 31 | 32 | /* 33 | * Simple test of examples. 34 | */ 35 | @Test 36 | public void spacesAtBeginningAndDifferentNewlinesTest() 37 | throws WebApplicationException, IOException { 38 | 39 | for (String newLineToUse : new String[] { "\n", "\r", "\r\n" }) { 40 | PlainTextExamplesMessageBodyReader toTest = new PlainTextExamplesMessageBodyReader(); 41 | 42 | MediaType mediaType = MediaType.TEXT_PLAIN_TYPE; 43 | 44 | StringBuilder theExamples = new StringBuilder(); 45 | theExamples.append("Example 1"); 46 | theExamples.append(newLineToUse); 47 | theExamples.append("Example 2"); 48 | theExamples.append(newLineToUse); 49 | theExamples.append("Example3 and 4 and 5"); 50 | theExamples.append(newLineToUse); 51 | theExamples.append(newLineToUse); 52 | 53 | // note: data needs to be encoded using the correct char set, which 54 | // must match the mediatype. 55 | ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream( 56 | theExamples.toString().getBytes( 57 | ReaderWriter.getCharset(mediaType))); 58 | 59 | // the readFrom method only looks at the mediatype and the input 60 | // stream, so other params can be null. 61 | Iterable theIterable = toTest.readFrom(null, null, null, 62 | mediaType, null, byteArrayInputStream); 63 | 64 | int x = 0; 65 | for (Example example : theIterable) { 66 | switch (x++) { 67 | case 0: 68 | Assert.assertEquals("Example 1", 69 | example.getVWStringRepresentation()); 70 | break; 71 | 72 | case 1: 73 | Assert.assertEquals("Example 2", 74 | example.getVWStringRepresentation()); 75 | break; 76 | 77 | case 2: 78 | Assert.assertEquals("Example3 and 4 and 5", 79 | example.getVWStringRepresentation()); 80 | break; 81 | 82 | case 3: 83 | Assert.assertEquals("", example.getVWStringRepresentation()); 84 | break; 85 | 86 | default: 87 | Assert.fail(); 88 | } 89 | } 90 | 91 | Assert.assertEquals(4, x); 92 | 93 | } 94 | 95 | } 96 | 97 | /* 98 | * Tests a massive number of examples using the ner.train.gz training set 99 | * from the vowpal wabbit github repository. This training set was taken 100 | * from the '/test/train-sets/' subfolder of the vowpal wabbit github repo 101 | * (git@github.com:JohnLangford/vowpal_wabbit.git). 102 | */ 103 | @Test 104 | public void hugeFileTest() throws IOException { 105 | // pass the gzip inputstream to the string message body reader, and at 106 | // the same time, read from the file. 107 | // then compare the examples read in to verify that they match. 108 | 109 | // the input stream to read directly from the file 110 | GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass() 111 | .getClassLoader().getResourceAsStream("ner.train.gz")); 112 | 113 | BufferedReader testReader = new BufferedReader(new InputStreamReader( 114 | gzipInputStream)); 115 | 116 | // the input stream that the PlainTextPredictionsMessageBodyReader will 117 | // use. 118 | GZIPInputStream gzipInputStreamForTestSubject = new GZIPInputStream( 119 | this.getClass().getClassLoader() 120 | .getResourceAsStream("ner.train.gz")); 121 | 122 | PlainTextExamplesMessageBodyReader toTest = new PlainTextExamplesMessageBodyReader(); 123 | 124 | MediaType mediaType = MediaType.TEXT_PLAIN_TYPE; 125 | 126 | Iterable theIterableOfExamples = toTest.readFrom(null, null, 127 | null, mediaType, null, gzipInputStreamForTestSubject); 128 | 129 | int numExamples = 0; 130 | 131 | boolean dumpExamples = false; // turn on to see some examples 132 | 133 | for (Example example : theIterableOfExamples) { 134 | String expectedExample = testReader.readLine(); 135 | 136 | Assert.assertEquals(expectedExample, 137 | example.getVWStringRepresentation()); 138 | 139 | numExamples++; 140 | 141 | if (dumpExamples && numExamples % 21 == 0) // print every 21st 142 | // example 143 | { 144 | 145 | // TODO: get a jenkin's build going, turn on code coverage + 146 | // findbugs 147 | // etc etc 148 | 149 | LOGGER.debug("expected example: {}", expectedExample); 150 | LOGGER.debug("read example : {}", example); 151 | LOGGER.debug(""); 152 | } 153 | 154 | } 155 | 156 | Assert.assertTrue(testReader.readLine() == null); // ensure all examples 157 | // read and 158 | // verified. 159 | } 160 | } 161 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/util/StringIterableTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | package com.eharmony.matching.vw.webservice.util; 5 | 6 | import junit.framework.Assert; 7 | 8 | import org.junit.Test; 9 | 10 | import com.eharmony.matching.vw.webservice.util.StringIterable; 11 | 12 | /** 13 | * @author vrahimtoola Tests the StringIterable. 14 | */ 15 | public class StringIterableTest { 16 | 17 | @Test(expected = IllegalArgumentException.class) 18 | public void throwsExceptionOnNullChunkOfTextTest() { 19 | new StringIterable(null); 20 | } 21 | 22 | @Test 23 | public void BasicNewLinesTest() { 24 | String theTextString = "Line 1\nLine 2\nLine 3"; 25 | 26 | StringIterable stringIterable = new StringIterable(theTextString); 27 | 28 | int x = 0; 29 | for (String ln : stringIterable) { 30 | switch (x++) { 31 | case 0: 32 | Assert.assertEquals("Line 1", ln); 33 | break; 34 | case 1: 35 | Assert.assertEquals("Line 2", ln); 36 | break; 37 | case 2: 38 | Assert.assertEquals("Line 3", ln); 39 | break; 40 | default: 41 | Assert.fail(); 42 | } 43 | 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/resources/logging.properties: -------------------------------------------------------------------------------- 1 | .level = ALL 2 | 3 | handlers=java.util.logging.ConsoleHandler 4 | 5 | 6 | java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter 7 | java.util.logging.ConsoleHandler.level = FINEST 8 | 9 | java.util.logging.FileHandler.level = FINEST 10 | #java.util.logging.FileHandler.filter specifies the name of a Filter class to use (defaults to no Filter). 11 | java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter 12 | #java.util.logging.FileHandler.encoding the name of the character set encoding to use (defaults to the default platform encoding). 13 | #java.util.logging.FileHandler.limit specifies an approximate maximum amount to write (in bytes) to any one file. If this is zero, then there is no limit. (Defaults to no limit). 14 | #java.util.logging.FileHandler.count specifies how many output files to cycle through (defaults to 1). 15 | java.util.logging.FileHandler.pattern = /Users/vrahimtoola/Desktop/vw-webservice.log 16 | #java.util.logging.FileHandler.append specifies whether the FileHandler should append onto any existing files (defaults to false). 17 | 18 | org.apache.http.level = FINEST -------------------------------------------------------------------------------- /vw-webservice-jersey/src/test/resources/ner.train.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/eHarmony/vw-webservice/53b4cfa9d8872058a50fcaa9e11926a6bd73306c/vw-webservice-jersey/src/test/resources/ner.train.gz --------------------------------------------------------------------------------