├── .gitignore
├── .gitmodules
├── .travis.yml
├── LICENSE
├── README.md
├── pom.xml
├── vw-webservice-common
├── README.md
├── pom.xml
└── src
│ ├── main
│ └── java
│ │ └── com
│ │ └── eharmony
│ │ └── matching
│ │ └── vw
│ │ └── webservice
│ │ └── common
│ │ ├── example
│ │ ├── Example.java
│ │ ├── ExampleFormatException.java
│ │ ├── ExampleMediaTypes.java
│ │ ├── StringExample.java
│ │ └── StructuredExample.java
│ │ └── prediction
│ │ ├── Prediction.java
│ │ ├── PredictionMediaTypes.java
│ │ └── StringPrediction.java
│ └── test
│ ├── java
│ └── com
│ │ └── eharmony
│ │ └── matching
│ │ └── vw
│ │ └── webservice
│ │ └── example
│ │ └── StructuredExampleTest.java
│ └── resources
│ ├── ner.train.gz
│ └── vw_example_schema.json
├── vw-webservice-core
├── README.md
├── pom.xml
└── src
│ └── main
│ └── java
│ └── com
│ └── eharmony
│ └── matching
│ └── vw
│ └── webservice
│ └── core
│ ├── ExampleReadException.java
│ ├── ExamplesIterable.java
│ ├── ExamplesIterableImpl.java
│ └── exampleprocessor
│ ├── ExampleProcessingEventHandler.java
│ ├── ExampleProcessingManager.java
│ ├── ExampleProcessor.java
│ ├── ExampleProcessorFactory.java
│ ├── ExampleProcessorFeatures.java
│ ├── ExampleProcessorFeaturesImpl.java
│ ├── ExampleSubmissionException.java
│ ├── ExampleSubmissionState.java
│ ├── PredictionFetchException.java
│ ├── PredictionFetchState.java
│ └── tcpip
│ ├── AsyncFailFastTCPIPExampleProcessor.java
│ ├── TCPIPExampleProcessingManager.java
│ ├── TCPIPExampleProcessorFactory.java
│ ├── TCPIPPredictionsIterator.java
│ ├── TCPIPSocketFactory.java
│ └── TCPIPSocketFactoryImpl.java
└── vw-webservice-jersey
├── README.md
├── pom.xml
└── src
├── main
├── java
│ └── com
│ │ └── eharmony
│ │ └── matching
│ │ └── vw
│ │ └── webservice
│ │ ├── PredictResource.java
│ │ ├── RequestHandler.java
│ │ ├── messagebodyreader
│ │ ├── jsonexamplesmessagebodyreader
│ │ │ ├── GsonJsonExamplesProvider.java
│ │ │ ├── JsonExamplesProvider.java
│ │ │ ├── SimpleJsonExamplesMessageBodyReader.java
│ │ │ ├── StructuredJsonExamplesMessageBodyReader.java
│ │ │ ├── StructuredJsonExamplesProvider.java
│ │ │ ├── StructuredJsonPropertyNames.java
│ │ │ └── TracingJsonReader.java
│ │ └── plaintextexamplesmessagebodyreader
│ │ │ ├── PlainTextExamplesMessageBodyReader.java
│ │ │ └── StringExampleIterator.java
│ │ └── util
│ │ └── StringIterable.java
├── resources
│ ├── logback.xml
│ ├── logging.properties
│ └── vw-webservice.properties
└── webapp
│ └── WEB-INF
│ ├── applicationContext.xml
│ └── web.xml
└── test
├── java
└── com
│ └── eharmony
│ └── matching
│ └── vw
│ └── webservice
│ ├── client
│ ├── AsyncHttpClientTest.java
│ └── TestUtils.java
│ ├── core
│ └── exampleprocessor
│ │ └── tcpip
│ │ └── AsyncFailFastTCPIPExampleProcessorTest.java
│ ├── messagebodyreader
│ ├── jsonexamplesmessagebodyreader
│ │ ├── GsonJsonExamplesProviderTest.java
│ │ ├── JsonTestUtils.java
│ │ ├── SimpleJsonExamplesMessageBodyReaderTest.java
│ │ └── StructuredJsonExamplesMessageBodyReaderTest.java
│ └── plaintextexamplesmessagebodyreader
│ │ └── PlainTextExamplesMessageBodyReaderTest.java
│ └── util
│ └── StringIterableTest.java
└── resources
├── logback-test.xml
├── logging.properties
└── ner.train.gz
/.gitignore:
--------------------------------------------------------------------------------
1 | .settings
2 | .classpath
3 | .project
4 | .springBeans
5 | .gitignore
6 | target/
7 | META-INF/
8 | vw-webservice.np.dc1.eharmony.com
9 | /vowpal_wabbit
10 | ner.train
11 | index.jsp
12 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "vowpal_wabbit"]
2 | path = vowpal_wabbit
3 | url = https://github.com/JohnLangford/vowpal_wabbit.git
4 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2013, eHarmony Inc
2 | Copyright (c) 2014, eHarmony Inc
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without modification,
6 | are permitted provided that the following conditions are met:
7 |
8 | * Redistributions of source code must retain the above copyright notice, this
9 | list of conditions and the following disclaimer.
10 |
11 | * Redistributions in binary form must reproduce the above copyright notice, this
12 | list of conditions and the following disclaimer in the documentation and/or
13 | other materials provided with the distribution.
14 |
15 | * Neither the name of eHarmony nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
23 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/eHarmony/vw-webservice)
2 |
3 | # Vowpal Wabbit Webservice
4 |
5 | This is a simple web service that wraps [vowpal wabbit](https://github.com/JohnLangford/vowpal_wabbit) daemon.
6 |
7 | ## Installation
8 |
9 | ### Dependencies
10 |
11 | * Java 1.7
12 | * Maven 2.2.1 or Maven 3
13 | * Jetty 9.1.10
14 | * Vowpal Wabbit (included as a submodule)
15 |
16 | The current web service was developed and tested on Jetty 9.1.0. You will need Maven (either version 2 or 3) to build the web service. Instructions for both versions have been included in this document, so pick the version of Maven you'd like to use and execute the provided instructions.
17 |
18 | But first, we need to get the right version of Java...
19 |
20 | #### Java 1.7
21 |
22 | You will need Java 7 in order to run Jetty 9.1.10. Furthermore, you need the JDK (instead of the JRE) in order to run Maven.
23 |
24 | From the Maven documentation:
25 |
26 | ```
27 | Make sure that JAVA_HOME is set to the location of your JDK, e.g. export JAVA_HOME=/usr/java/jdk1.7.x and that $JAVA_HOME/bin is in your PATH environment variable.
28 | ```
29 |
30 | Once the Java 7 JDK is ready to go (with JAVA_HOME/bin also properly set on your PATH), you can install the version of Maven you'd like to use (pick either 2.2.1 or 3.1.1 from below).
31 |
32 | #### Maven 2.2.1
33 |
34 | ```
35 | wget http://mirror.tcpdiag.net/apache/maven/maven-2/2.2.1/binaries/apache-maven-2.2.1-bin.tar.gz
36 | tar xzvf apache-maven-2.2.1-bin.tar.gz
37 | export M2_HOME=$PWD/apache-maven-2.2.1
38 | export PATH=$M2_HOME/bin:$PATH
39 |
40 | # check it worked
41 | mvn -version
42 | ```
43 |
44 | #### Maven 3.1.1
45 |
46 | ```
47 | wget http://mirror.tcpdiag.net/apache/maven/maven-3/3.1.1/binaries/apache-maven-3.1.1-bin.tar.gz
48 | tar xzvf apache-maven-3.1.1-bin.tar.gz
49 | export M2_HOME=$PWD/apache-maven-3.1.1
50 | export PATH=$M2_HOME/bin:$PATH
51 |
52 | # check it worked
53 | mvn -version
54 | ```
55 |
56 | Now let's install Jetty which we'll use as our web container for the web service. Although technically you should be able to use the web container of your choice (Tomcat/Glassfish/etc) bear in mind that so far we've only
57 | tested the web service using Jetty.
58 |
59 | #### Jetty 9.1.10
60 |
61 | On the box where you plan on running the web service, install [Jetty 9.1.0](http://eclipse.org/downloads/download.php?file=/jetty/9.1.0.v20131115/dist/jetty-distribution-9.1.0.v20131115.tar.gz&r=1).
62 |
63 | On the command line you can use:
64 |
65 | ```
66 | wget http://mirrors.ibiblio.org/eclipse/jetty/9.1.0.v20131115/dist/jetty-distribution-9.1.0.v20131115.tar.gz
67 | tar xzvf jetty-distribution-9.1.0.v20131115.tar.gz
68 | ```
69 |
70 | That's it for the prerequisites. Now you can go ahead and set up the VW web service.
71 |
72 | ### Building and Deploying the VW Web Service
73 |
74 | This involves 3 steps:
75 |
76 | 1. Build vowpal wabbit from source, then launch it in daemon mode.
77 | 2. Specify the host and port where vowpal wabbit is running in the vw-webservice.properties file, and build+package the webservice to produce the .war (Web Application Resource) file.
78 | 3. Place the .war file into the /webapps folder of Jetty.
79 |
80 | Let's get started.
81 |
82 | Clone this repo:
83 |
84 | ```
85 | git clone --recursive git@github.com:eHarmony/vw-webservice.git
86 | cd vw-webservice
87 | ```
88 |
89 | Note: for the --recursive option to work (it grabs the vowpal wabbit submodule for you), you will need git 1.6.5 or later. Otherwise you can pull the vowpal wabbit submodule in separately using ``git submodule``.
90 |
91 | You should now have a vw-webservice directory with some files and 4 directories inside of it:
92 |
93 | * vowpal_wabbit
94 | * vw-webservice-common
95 | * vw-webservice-core
96 | * vw-webservice-jersey
97 |
98 | #### Building Vowpal Wabbit
99 |
100 | Now that you have the webservice, under the vw-webservice/vowpal_wabbit folder, you should find the C++ source for Vowpal Wabbit. Before you can launch the daemon you will have to build it.
101 |
102 | If you're using Linux, then make sure you already have gcc and g++ installed on your system. Note also that Vowpal Wabbit depends on boost program options (on a Mac this can be installed via [homebrew](http://brew.sh): ``brew install boost`` and on Linux you can try ``sudo apt-get install -y -m libboost-program-options-dev``).
103 |
104 | ```
105 | cd vowpal_wabbit
106 | make clean
107 | make
108 |
109 | #now launch it in daemon mode (from within the vw-webservice/vowpal_wabbit directory)
110 | ./vowpalwabbit/vw --daemon [other options you like]
111 | ```
112 |
113 | #### Building VW Web Service
114 |
115 | Now that we have Vowpal Wabbit up and running, we just need to make sure that the web service knows the host and port where the daemon lives. Edit the config:
116 |
117 | ```
118 | vim vw-webservice/vw-webservice-jersey/src/main/resources/vw-webservice.properties
119 | ```
120 |
121 | and change if necessary:
122 | ```
123 | vw.hostName=localhost
124 | vw.port=26542
125 | ```
126 |
127 | Now let's build and package up the web service:
128 |
129 | ```
130 | mvn package
131 | ```
132 |
133 | In the output, you should see the location where the WAR (Web Application Resource) file has been created:
134 |
135 | ```
136 | ...
137 | ...
138 | ...
139 | [INFO] Webapp assembled in[172 msecs]
140 | [INFO] Building war: vw-webservice/vw-webservice-jersey/target/vw-webservice-jersey.war
141 | [INFO] ------------------------------------------------------------------------
142 | [INFO] BUILD SUCCESSFUL
143 | [INFO] ------------------------------------------------------------------------
144 | [INFO] Total time: 23 seconds
145 | ...
146 | ...
147 | ...
148 | ```
149 |
150 | Now you can deploy the .war file:
151 |
152 | ```
153 | # the maven build (assuming you're using the default directories) will have spit out the WAR file to the 'target' subdirectory
154 | # if you're running the Jetty instance on your local machine, copy the WAR over to the 'webapps' directory of Jetty
155 | cp vw-webservice/vw-webservice-jersey/target/vw-webservice-jersey.war /path/to/jetty-9.1.0/webapps/
156 |
157 | # alternatively, you can scp the war file to the box where you are running your jetty instance:
158 | # scp vw-webservice/vw-webservice-jersey/target/vw-webservice-jersey.war box.running.jetty.com:/path/to/jetty-9.1.0/webapps/
159 |
160 | # Restart the Jetty instance (wherever you have Jetty running).
161 | cd /path/to/jetty-9.1.0
162 | java -jar start.jar
163 | ```
164 |
165 | The last command will start spitting out the Jetty logs to the console. You can keep an eye on this as you submit requests to the vw-webservice, which will log to the console. The web service
166 | uses logback for logging, and the logging configuration can be found under vw-webservice-jersey/src/main/resources/logback.xml.
167 |
168 | ## Using the Web Service
169 |
170 | You can hit the webservice from the command line using curl, or code up your own client (in any language) to communicate with the web service. Something to keep in mind is that the client you use should support chunked transfer encoding, as this will allow you to stream massive amounts of data to/from the webservice, without buffering it all in memory to calculate the value of the Content-Length request header. A Java client that supports this is the [AsynHttpClient](http://sonatype.github.io/async-http-client/). You can find a test that uses this client in ``vw-webservice-jersey/src/test/java/AsyncHttpClientTest.java``.
171 |
172 | Examples should follow the VW format. For more information on the VW input format, refer to the documentation at: https://github.com/JohnLangford/vowpal_wabbit/wiki/Input-format
173 |
174 | However, when examples are submitted to the web service by a client, they can be either in plaintext format, or in a more structured format. In either case, once an example is received by the web service, it will convert the example to the proper VW format before submitting it to the VW daemon.
175 |
176 | #### Plaintext examples
177 |
178 | This means you will be submitting a stream of examples to the web service, with each example being a string in the accepted VW input format.
179 |
180 | For instance:
181 |
182 | ```
183 | 1 first|user name=Adam gender=male age=34 |movie Snatch
184 | -1 second|user name=Adam gender=male age=34 |movie Titanic
185 | 1 third|user name=Adam gender=male age=34 |movie Hangover
186 | ```
187 |
188 | You can submit such examples to the web service from the command line using curl. Assuming all your plaintext VW examples are sitting in some file called examples.txt, you can do the following:
189 |
190 | ```
191 | curl -H "Content-Type:text/plain" -X POST \
192 | -T examples.txt \
193 | http://host.running.jetty.com:8080/vw-webservice-jersey/predict/main \
194 | -v
195 | ```
196 |
197 | If you happen to have a humongous gzipped file containing millions of plaintext examples (eg, ner.train.gz, included under vw-webservice-jersey/src/test/resources, which has ~272K examples), you can do the following:
198 |
199 | ```
200 | # assume we are in the vw-webservice directory
201 | gzcat vw-webservice-jersey/src/test/resources/ner.train.gz \
202 | | curl -H "Content-Type:text/plain" \
203 | -X POST \
204 | -T - \
205 | http://host.running.jetty.com:8080/vw-webservice-jersey/predict/main \
206 | -v
207 | ```
208 |
209 | The curl '-T' switch performs a file transfer, without trying to buffer all the data in memory to compute the Content-Length HTTP request header.
210 |
211 | Of course, you can also use any HTTP client to submit such a stream of plaintext examples to the web service. Just make sure that each example appears on a line by itself.
212 |
213 | #### Structured examples
214 |
215 | This means you will build up each VW example in a structured way using some API, and this structure will be reflected in the format of the data being sent to the web service.
216 |
217 | Currently, there is a class called StructuredExample.java in the package com.eharmony.matching.vw.webservice.common.example in the vw-webservice-common project, that let's you use the Builder pattern to build up an example from it's component parts (a label, a tag, and a set of namespaces, each of which has some number of features).
218 |
219 | To see code that demonstrates this, check out the "simpleExampleBuildingTest" and "simpleExampleBuildingTestWithTag" tests in StructuredExampleTest.java in that same project. These tests demonstrate how to use the API to build up an example piece by piece.
220 |
221 | Once you have an instance of a StructuredExample, you can write that out to some stream. Currently, the web service only supports the json format for submitting structured examples. In json, a stream of structured examples must have the schema described in "vw_example_schema.json" found in the same project (vw-webservice-common) under the src/test/resources folder. Note that this is the schema for the entire stream of structured json examples that will be submitted to the web service.
222 |
223 | The serialized stream of json-formatted VW examples would look like this:
224 |
225 | ```javascript
226 | [
227 | {
228 | "label": "34",
229 | "tag": "someTag",
230 | "namespaces": [{
231 | "name": "one",
232 | "features": [{
233 | "name": "a",
234 | "value": 12.34
235 | }, {
236 | "name": "b",
237 | "value": 45.1
238 | }]
239 | }, {
240 | "name": "two",
241 | "scale": 34.3,
242 | "features": [{
243 | "name": "bah",
244 | "value": 0.038293
245 | }, {
246 | "name": "another",
247 | "value": 3.4
248 | }, {
249 | "name": "andThis",
250 | "value": 2.0
251 | }]
252 | }]
253 | }
254 | ,
255 | {
256 | //the next json example
257 | }
258 | ,
259 | ```
260 |
261 | The first json example in the above chunk would be converted by the web service to the following before submitting to the VW daemon: "34 someTag|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2".
262 |
263 | To see code that shows how to write a single StructuredExample in json format, check out the "writeExample" method in JsonTestUtils.java, which can be found in the vw-webservice-jersey project under src/test/java in the com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader package.
264 |
265 | To see code that writes an entire stream of StructuredExamples in json format, check out the 'getJsonInputStreamBodyGenerator' method of AsyncHttpClientTest.java in the com.eharmony.matching.vw.webservice.client package under src/test/java in the vw-webservice-jersey project.
266 |
267 | ## Benchmarks
268 |
269 | Some basic benchmarks seems to indicate that, as the number of examples increases and hardware memory improves, the web-service seems to perform comparably to netcat. Note that we did not do any performance tweaking of the web-service. VW was running in daemon mode as "vw -b 10 --daemon", and we performed 10 runs with each setup.
270 |
271 | | Setup | # examples | # of features | median time | slowdown |
272 | |:--------------------------------------|-----------:|--------------:|------------:|---------:|
273 | | netcat and vw --daemon on localhost | 27M |1.2B | 239.7s | baseline |
274 | | webservice and vw daemon on localhost | 27M |1.2B | 244.4s | 2% |
275 |
276 | The percentage hit in terms of median times was only about 2%, which seems acceptable.
277 |
278 | ## ToDo
279 |
280 | * Document application/x-vw-text.
281 | * More tests.
282 | * Pull out integration tests into a separate module and have Maven run them as part of the verify phase. Use the failsafe plugin for this.
283 | * Incorporate maven enforcer, findbugs and checkstyle plugin invocations
284 | * Incorporate suggestions made by others during code review
285 | * Protocol buffer support.
286 | * Java client.
287 | * Javascript client.
288 | * Add compression support.
289 | * Automate setup and installation.
290 | * Move all property configuration outside the .war file. Right now the configuration is packaged inside, effectively making the .war files hard-coded.
291 | * Add codahale metrics gathering.
292 | * Go through all the TODO comments in the source code and make changes where necessary.
293 | * CometD support.
294 | * Speed optimizations.
295 | * Document extension points.
296 | * Re-factor tests to instantiate a web-service instance, perhaps using Grizzly http server?
297 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | com.eharmony.matching
4 | vw-webservice
5 | 0.1.0-SNAPSHOT
6 | pom
7 | VW Web Service.
8 | The parent project of all the other vw web service projects.
9 |
10 |
11 |
12 |
13 |
14 | vw-webservice-common
15 | vw-webservice-core
16 | vw-webservice-jersey
17 |
18 |
19 |
20 | 0.1.0-SNAPSHOT
21 | UTF-8
22 |
23 |
24 |
25 |
26 |
27 |
28 | junit
29 | junit
30 | 4.8.2
31 | test
32 |
33 |
34 |
35 | org.mockito
36 | mockito-all
37 | 1.9.5
38 | test
39 |
40 |
41 |
42 |
43 |
44 |
45 | org.springframework
46 | spring-core
47 | 3.1.3.RELEASE
48 |
49 |
50 |
51 | org.springframework
52 | spring-beans
53 | 3.1.3.RELEASE
54 |
55 |
56 |
57 | org.springframework
58 | spring-context
59 | 3.1.3.RELEASE
60 |
61 |
62 |
63 | org.springframework
64 | spring-context-support
65 | 3.1.3.RELEASE
66 |
67 |
68 |
69 | org.springframework
70 | spring-expression
71 | 3.1.3.RELEASE
72 |
73 |
74 |
75 |
76 | org.slf4j
77 | slf4j-api
78 | 1.7.5
79 |
80 |
81 |
82 | ch.qos.logback
83 | logback-classic
84 | 1.0.13
85 | runtime
86 |
87 |
88 |
89 | ch.qos.logback
90 | logback-core
91 | 1.0.13
92 | runtime
93 |
94 |
95 |
96 |
97 | com.google.guava
98 | guava
99 | 15.0
100 |
101 |
102 |
103 |
104 | com.google.code.gson
105 | gson
106 | 2.2.4
107 |
108 |
109 |
110 |
111 | com.fasterxml.jackson.core
112 | jackson-core
113 | 2.3.1
114 |
115 |
116 |
117 |
118 | commons-collections
119 | commons-collections
120 | 3.2.1
121 |
122 |
123 |
124 |
125 | org.apache.commons
126 | commons-lang3
127 | 3.2.1
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 | org.apache.maven.plugins
140 | maven-compiler-plugin
141 | 2.5.1
142 | true
143 |
144 | 1.7
145 | 1.7
146 | true
147 | 1024m
148 | 2048m
149 |
150 |
151 |
152 |
153 | org.apache.maven.plugins
154 | maven-surefire-plugin
155 | 2.13
156 |
157 |
158 | surefire-test
159 | test
160 |
161 | test
162 |
163 |
164 |
165 |
166 | false
167 |
168 | **/*Test.java
169 |
170 | once
171 |
172 |
173 |
174 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
--------------------------------------------------------------------------------
/vw-webservice-common/README.md:
--------------------------------------------------------------------------------
1 | vw-webservice-common
2 | ====================
3 |
4 | Common components for client and server side use, by the VW web service.
5 |
--------------------------------------------------------------------------------
/vw-webservice-common/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 |
5 | com.eharmony.matching
6 | vw-webservice
7 | 0.1.0-SNAPSHOT
8 | ../../vw-webservice
9 |
10 |
11 | vw-webservice-common
12 |
13 | VW Web Service Common.
14 | Common components used by client and server side projects.
15 |
16 |
17 |
18 | vw-webservice-common-${project.version}
19 |
20 |
21 |
22 | org.apache.maven.plugins
23 | maven-compiler-plugin
24 |
25 |
26 |
27 | org.apache.maven.plugins
28 | maven-surefire-plugin
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 | com.google.guava
39 | guava
40 |
41 |
42 |
43 |
44 | org.apache.commons
45 | commons-lang3
46 |
47 |
48 |
49 |
50 | junit
51 | junit
52 |
53 |
54 |
55 |
56 | org.mockito
57 | mockito-all
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/Example.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.common.example;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * An example to be submitted to VW, in it's proper input format.
10 | */
11 | public interface Example {
12 |
13 | /*
14 | * Returns the example exactly as it will be submitted to VW, which expects
15 | * plain text examples.
16 | *
17 | * @throws ExampleFormatException to indicate that the format of the example
18 | * isn't valid.
19 | *
20 | * @returns The plain text VW representation of the example.
21 | */
22 | String getVWStringRepresentation();
23 |
24 | }
25 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/ExampleFormatException.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.common.example;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * Exception indicating that the format of an example isn't valid.
10 | * Examples must be in the proper VW format, after all. I'm making this
11 | * a subclass of RuntimeException as it indicates a programmer error,
12 | * similar to NumberFormatException.
13 | */
14 | public class ExampleFormatException extends IllegalArgumentException {
15 |
16 | /**
17 | * The serial version UID.
18 | */
19 | private static final long serialVersionUID = -6238484930971388916L;
20 |
21 | private long exampleNumber = Long.MIN_VALUE;
22 |
23 | public ExampleFormatException() {
24 | super();
25 | }
26 |
27 | public ExampleFormatException(String message) {
28 | super(message);
29 | }
30 |
31 | public ExampleFormatException(Throwable cause) {
32 | super(cause);
33 | }
34 |
35 | public ExampleFormatException(String message, Throwable cause) {
36 | super(message, cause);
37 | }
38 |
39 | public long getExampleNumber() {
40 | return exampleNumber;
41 | }
42 |
43 | public ExampleFormatException(long exampleNumber) {
44 | super();
45 | this.exampleNumber = exampleNumber;
46 | }
47 |
48 | public ExampleFormatException(long exampleNumber, String message) {
49 | super(message);
50 | this.exampleNumber = exampleNumber;
51 | }
52 |
53 | public ExampleFormatException(long exampleNumber, Throwable cause) {
54 | super(cause);
55 | this.exampleNumber = exampleNumber;
56 | }
57 |
58 | public ExampleFormatException(long exampleNumber, String message, Throwable cause) {
59 | super(message, cause);
60 | this.exampleNumber = exampleNumber;
61 | }
62 |
63 | public void setExampleNumber(long exampleNumber) {
64 | this.exampleNumber = exampleNumber;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/ExampleMediaTypes.java:
--------------------------------------------------------------------------------
1 | package com.eharmony.matching.vw.webservice.common.example;
2 |
3 | /**
4 | * @author vrahimtoola
5 | *
6 | * String constants for acceptable media types for submitting vw
7 | * examples.
8 | */
9 | public class ExampleMediaTypes {
10 |
11 | public static final String PLAINTEXT_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+plaintext";
12 | public static final String SIMPLE_PROTOBUF_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+simpleprotobuf";
13 | public static final String SIMPLE_JSON_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+simplejson";
14 | public static final String STRUCTURED_JSON_0_1_0 = "application/vowpal-wabbit-example-v0.1.0+structuredjson";
15 |
16 | }
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/StringExample.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.common.example;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | /**
9 | * @author vrahimtoola
10 | *
11 | * A VW example that's represented as a simple string, ie, without it
12 | * being possible to access it's individual components separately.
13 | *
14 | */
15 | public class StringExample implements Example {
16 |
17 | private final String vwExampleString;
18 |
19 | /*
20 | * Constructs a VW example using the exact String representation of it.
21 | *
22 | * @param theString The VW example. May be empty, but cannot be null.
23 | */
24 | public StringExample(String theString) {
25 | checkNotNull(theString, "Null string provided as example!");
26 | vwExampleString = theString;
27 | }
28 |
29 | @Override
30 | public String getVWStringRepresentation() {
31 | return vwExampleString;
32 | }
33 |
34 | @Override
35 | public String toString() {
36 | return getVWStringRepresentation();
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/example/StructuredExample.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.common.example;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.util.ArrayList;
9 | import java.util.List;
10 |
11 | import org.apache.commons.lang3.StringUtils;
12 |
13 | import com.google.common.collect.ImmutableList;
14 |
15 | /**
16 | * @author vrahimtoola
17 | *
18 | * An example to be submitted to VW. A structured example can be queried
19 | * to get the various components that make up the example that will be
20 | * submitted to VW.
21 | */
22 | public class StructuredExample implements Example {
23 |
24 | /*
25 | * The various kinds of examples.
26 | */
27 | private enum ExampleType {
28 |
29 | /*
30 | * The empty example. This will be sent to VW as a single newline
31 | * character.
32 | */
33 | EMPTY,
34 |
35 | /*
36 | * The pipe (|) example. This will be sent to VW as a single pipe
37 | * character, ie, '|'.
38 | */
39 | PIPE,
40 |
41 | /*
42 | * A normal VW example.
43 | */
44 | NORMAL
45 | }
46 |
47 | /*
48 | * Some pre-defined examples.
49 | */
50 |
51 | /*
52 | * The empty example. This will be sent to VW as a single newline character.
53 | */
54 | public static final StructuredExample EMPTY_EXAMPLE = new StructuredExample(ExampleType.EMPTY, "", "", new ArrayList());
55 |
56 | /*
57 | * The pipe (|) example. This will be sent to VW as a single character, '|'.
58 | */
59 | public static final StructuredExample PIPE_EXAMPLE = new StructuredExample(ExampleType.PIPE, "", "", new ArrayList());
60 |
61 | private final ExampleType exampleType;
62 | private final String label;
63 | private final Iterable namespaces;
64 | private final String tag;
65 |
66 | private StructuredExample(ExampleType exampleType, String label, String tag, Iterable namespaces) {
67 | this.exampleType = exampleType;
68 | this.label = label;
69 | this.namespaces = namespaces;
70 | this.tag = tag;
71 | }
72 |
73 | /*
74 | * Returns the label of this example.
75 | *
76 | * @returns The label of this example.
77 | */
78 | public String getLabel() {
79 | return label;
80 | }
81 |
82 | /*
83 | * Returns the tag of this example.
84 | *
85 | * @returns The tag of this example.
86 | */
87 | public String getTag() {
88 | return tag;
89 | }
90 |
91 | /*
92 | * Returns the namespaces in this example.
93 | *
94 | * @returns The namespaces in this example. The returned iterable is
95 | * unmodifiable.
96 | */
97 | public Iterable getNamespaces() {
98 | return namespaces;
99 | }
100 |
101 | @Override
102 | public String getVWStringRepresentation() {
103 |
104 | if (exampleType == ExampleType.EMPTY)
105 | return "";
106 | else if (exampleType == ExampleType.PIPE)
107 | return " |"; //note the space before the pipe
108 | else {
109 |
110 | final String SPACE = " ";
111 | final String PIPE = "|";
112 | final String COLON = ":";
113 |
114 | StringBuilder builder = new StringBuilder();
115 |
116 | if (label != null) {
117 | builder.append(label);
118 | builder.append(SPACE);
119 | }
120 |
121 | if (tag != null) {
122 | builder.append(tag);
123 | }
124 |
125 | boolean namespacesAdded = false;
126 |
127 | for (Namespace namespace : namespaces) {
128 |
129 | if (namespacesAdded) builder.append(SPACE);
130 |
131 | builder.append(PIPE);
132 |
133 | String namespaceName = namespace.getName();
134 |
135 | if (StringUtils.isBlank(namespaceName) == false) builder.append(namespaceName);
136 |
137 | if (namespace.getScalingFactor() != null) {
138 | builder.append(COLON);
139 |
140 | Float scalingFactor = namespace.getScalingFactor();
141 |
142 | //this will take care of getting rid of extraneous 0s, eg, 12.3400000
143 | if (scalingFactor.floatValue() == (int) scalingFactor.floatValue())
144 | builder.append(String.format("%d", (int) scalingFactor.floatValue()));
145 | else {
146 | builder.append(String.format("%s", scalingFactor.floatValue()));
147 | }
148 | }
149 |
150 | for (StructuredExample.Namespace.Feature feature : namespace.getFeatures()) {
151 |
152 | builder.append(SPACE);
153 |
154 | String featureName = feature.getName();
155 | Float featureValue = feature.getValue();
156 |
157 | builder.append(featureName);
158 |
159 | if (featureValue != null) {
160 | builder.append(COLON);
161 |
162 | //this will take care of getting rid of extraneous 0s, eg, 12.3400000
163 | if (featureValue.floatValue() == (int) featureValue.floatValue())
164 | builder.append(String.format("%d", (int) featureValue.floatValue()));
165 | else {
166 | builder.append(String.format("%s", featureValue.floatValue()));
167 | }
168 |
169 | }
170 |
171 | }
172 |
173 | namespacesAdded = true;
174 | }
175 |
176 | //if there's a label but no namespaces, add a SPACE and a PIPE after the label
177 | if (!namespacesAdded) {
178 | builder.append(PIPE);
179 | }
180 |
181 | return builder.toString();
182 | }
183 | }
184 |
185 | /*
186 | * Represents a namespace containing 0 or more features. Instances of this
187 | * class are immutable.
188 | */
189 | public static class Namespace {
190 |
191 | private final List features;
192 | private final String namespaceName;
193 | private final Float scalingFactor;
194 |
195 | private Namespace(String namespaceName, Float scalingFactor, List features) {
196 | this.namespaceName = namespaceName == null ? null : namespaceName.trim();
197 | this.scalingFactor = scalingFactor;
198 | this.features = features;
199 | }
200 |
201 | /*
202 | * Returns the features of the map.
203 | *
204 | * @returns The list of features. The list is unmodifiable.
205 | */
206 | public Iterable getFeatures() {
207 | return features;
208 | }
209 |
210 | /*
211 | * Returns the number of features in this namespace.
212 | *
213 | * @returns The number of features in this namespace. Always >= 0.
214 | */
215 | private int getNumberOfFeatures() {
216 | return (features == null ? 0 : features.size());
217 | }
218 |
219 | /*
220 | * Returns the name of this namespace.
221 | *
222 | * @returns The name of this namespace. Can be null/empty.
223 | */
224 | public String getName() {
225 | return namespaceName;
226 | }
227 |
228 | /*
229 | * Returns the scaling factor of this namespace. Can be null, which is
230 | * the same as 1.0 (as per VW documentation).
231 | *
232 | * @returns The scaling factor for this namespace. Can be null.
233 | */
234 | public Float getScalingFactor() {
235 | return scalingFactor;
236 | }
237 |
238 | /*
239 | * Represents a single feature inside a namespace.
240 | */
241 | public static class Feature {
242 | private final String name;
243 | private final Float value;
244 |
245 | private Feature(String name, Float value) {
246 | this.name = name.trim();
247 | this.value = value;
248 | }
249 |
250 | /*
251 | * Returns the name of this feature.
252 | *
253 | * @returns The name of this feature.
254 | */
255 | public String getName() {
256 | return name;
257 | }
258 |
259 | /*
260 | * Returns the value of this feature. The value can be null.
261 | *
262 | * @returns The value of this feature. Can be null.
263 | */
264 | public Float getValue() {
265 | return value;
266 | }
267 | }
268 |
269 | /*
270 | * Builds a single namespace of an example. Instances of this class are
271 | * not thread safe. A NamespaceBuilder can be used repeatedly to build
272 | * namespace instances. Just make sure to call 'clear()' before starting
273 | * to build up the second (or subsequent) namespace. Note that invoking
274 | * 'build' does not implicitly invoke 'clear()' after a Namespace has
275 | * been built; 'clear()' must be invoked explicitly.
276 | */
277 | public static class NamespaceBuilder {
278 |
279 | private List features = null;
280 | private String namespaceName;
281 | private Float scalingFactor;
282 |
283 | /*
284 | * Sets the name for the namespace being built.
285 | *
286 | * @param namespaceName The name of the namespace being built. Can
287 | * be null/empty. From the VW documentation: Currently, the only
288 | * characters that can't be used in feature or namespace names are
289 | * vertical bar, colon, space, and newline.
290 | *
291 | * @returns This builder.
292 | */
293 | public NamespaceBuilder setName(String namespaceName) {
294 |
295 | if (namespaceName != null) {
296 | if (namespaceName.contains("|") || namespaceName.contains(":") || StringUtils.containsWhitespace(namespaceName)) {
297 | throw new ExampleFormatException("The namespace name cannot contain whitespace, '|' or ':'! Namespace passed in was: " + namespaceName);
298 | }
299 | }
300 |
301 | this.namespaceName = namespaceName;
302 | return this;
303 | }
304 |
305 | /*
306 | * Sets the scaling factor for this namespace.
307 | *
308 | * @param scalingFactor The scaling factor. Can be null (which VW
309 | * will interpret as 1.0).
310 | *
311 | * @returns This builder.
312 | */
313 | public NamespaceBuilder setScalingFactor(Float scalingFactor) {
314 | this.scalingFactor = scalingFactor;
315 | return this;
316 | }
317 |
318 | /*
319 | * Adds a feature to this namespace. The value of the feature will
320 | * default to 1.0, as per the VW documentation.
321 | *
322 | * @param feature The feature name to be added.
323 | */
324 | public NamespaceBuilder addFeature(String feature) {
325 | return addFeature(feature, null);
326 | }
327 |
328 | /*
329 | * Adds a feature with the specified value to the namespace.
330 | *
331 | * @param feature The feature to add. Cannot be null/empty. From the
332 | * VW documentation: Currently, the only characters that can't be
333 | * used in feature or namespace names are vertical bar, colon,
334 | * space, and newline.
335 | *
336 | * @param value The float value of the feature.
337 | *
338 | * @returns This builder.
339 | */
340 | public NamespaceBuilder addFeature(String feature, Float value) {
341 | if (StringUtils.isBlank(feature)) throw new ExampleFormatException("The feature name must be provided!");
342 |
343 | if (feature.contains("|") || feature.contains(":") || StringUtils.containsWhitespace(feature))
344 | throw new ExampleFormatException("The feature name cannot contain whitespace, '|' or ':'! Feature name passed in was: " + feature);
345 |
346 | if (features == null) features = new ArrayList();
347 | features.add(new Feature(feature, value));
348 | return this;
349 | }
350 |
351 | /*
352 | * Removes all features from the namespace.
353 | *
354 | * @returns This builder.
355 | */
356 | public NamespaceBuilder clear() {
357 | features = null;
358 | scalingFactor = null;
359 | namespaceName = null;
360 | return this;
361 | }
362 |
363 | /*
364 | * Builds the namespace with a scaling factor of 1.0.
365 | *
366 | * @returns The newly built namespace.
367 | */
368 | public Namespace build() {
369 |
370 | if (StringUtils.isBlank(namespaceName) && scalingFactor != null) throw new ExampleFormatException("A namespace with a scaling factor must be given a name!");
371 |
372 | return new Namespace(namespaceName, scalingFactor, features == null ? new ArrayList() : ImmutableList. builder().addAll(features).build());
373 | }
374 | }
375 |
376 | }
377 |
378 | public static class ExampleBuilder {
379 |
380 | private boolean atLeastOneNamespaceIsNonBlank = false;
381 | private String label = null;
382 | private List namespaces = null;
383 | private String tag = null;
384 |
385 | /*
386 | * Sets the label for the example.
387 | *
388 | * @param label The label for the example. Can be null/empty. Will be
389 | * trimmed (ie, trim() will be called on it) when the example is
390 | * created.
391 | *
392 | * @returns This ExampleBuilder.
393 | */
394 | public ExampleBuilder setLabel(String label) {
395 | this.label = label;
396 | return this;
397 | }
398 |
399 | /*
400 | * Sets the tag for this example.
401 | *
402 | * @param tag The tag for the example. Can be null/empty. Will be
403 | * trimmed when the example is created.
404 | *
405 | * @returns This ExampleBuilder.
406 | */
407 | public ExampleBuilder setTag(String tag) {
408 | this.tag = tag;
409 | return this;
410 | }
411 |
412 | /*
413 | * Clears the builder, thus making it ready for use to create the next
414 | * example.
415 | *
416 | * @returns This ExampleBuilder.
417 | */
418 | public ExampleBuilder clear() {
419 | label = null;
420 | namespaces = null;
421 | tag = null;
422 | return this;
423 | }
424 |
425 | /*
426 | * Adds a namespace to the example.
427 | *
428 | * @param namespace The namespace to add to the builder. Cannot be null.
429 | *
430 | * @returns This builder.
431 | */
432 | public ExampleBuilder addNamespace(Namespace namespace) {
433 | checkNotNull(namespace);
434 | if (namespaces == null) namespaces = new ArrayList();
435 | namespaces.add(namespace);
436 | atLeastOneNamespaceIsNonBlank = namespace.getNumberOfFeatures() > 0 || (StringUtils.isBlank(namespace.getName()) == false);
437 | return this;
438 | }
439 |
440 | /*
441 | * Builds and returns the example.
442 | */
443 | public StructuredExample build() {
444 | if (label != null) label = label.trim();
445 | if (tag != null) tag = tag.trim();
446 |
447 | //If no label and no namespaces (or all namespaces are empty), treat it as the pipe example.
448 | if (StringUtils.isBlank(label) && StringUtils.isBlank(tag) && (namespaces == null || namespaces.size() == 0 || atLeastOneNamespaceIsNonBlank == false))
449 | return StructuredExample.PIPE_EXAMPLE;
450 |
451 | return new StructuredExample(ExampleType.NORMAL, label, tag, namespaces == null ? new ArrayList() : ImmutableList. builder().addAll(namespaces).build());
452 |
453 | }
454 | }
455 |
456 | }
457 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/prediction/Prediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.common.prediction;
5 |
6 | import java.io.IOException;
7 | import java.io.OutputStream;
8 |
9 | /**
10 | * @author vrahimtoola
11 | *
12 | * A prediction spit out by VW.
13 | */
14 | public interface Prediction {
15 |
16 | /*
17 | * Returns the string representation of a Prediction. The returned
18 | * representation should be convertible back to a Prediction. Intead of
19 | * relying on the implementor providing a 'toString()' method that works
20 | * sensibly, I thought it would be a better idea to t a proper
21 | * implementation this way.
22 | *
23 | * @returns The prediction, exactly as returned by VW.
24 | */
25 | String getVWStringRepresentation();
26 |
27 | /*
28 | * Writes a prediction received from VW out to some output stream.
29 | *
30 | * The implementation will write a newline after writing the prediction to
31 | * the stream.
32 | *
33 | * @param outputStream The stream to write the prediction to. The caller
34 | * owns this stream.
35 | */
36 | void write(OutputStream outputStream) throws IOException;
37 | }
38 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/prediction/PredictionMediaTypes.java:
--------------------------------------------------------------------------------
1 | package com.eharmony.matching.vw.webservice.common.prediction;
2 |
3 | /**
4 | * @author vrahimtoola
5 | *
6 | * String constants for prediction media types (when sending back vw
7 | * predictions).
8 | */
9 | public class PredictionMediaTypes {
10 |
11 | public static final String PLAINTEXT_0_1_0 = "application/vowpal-wabbit-prediction-v0.1.0+plaintext";
12 |
13 | }
14 |
15 |
16 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/main/java/com/eharmony/matching/vw/webservice/common/prediction/StringPrediction.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.common.prediction;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.io.IOException;
9 | import java.io.OutputStream;
10 |
11 | import com.google.common.base.Charsets;
12 |
13 | /**
14 | * @author vrahimtoola
15 | *
16 | * A VW prediction represented as a simple string (UTF8 encoded).
17 | */
18 | public class StringPrediction implements Prediction {
19 |
20 | private final String vwPrediction;
21 |
22 | private static final byte[] newlineBytes = System.getProperty("line.separator").getBytes(Charsets.UTF_8);
23 |
24 | public StringPrediction(String theString) {
25 | checkNotNull(theString, "Null prediction provided!");
26 | vwPrediction = theString;
27 | }
28 |
29 | /*
30 | * (non-Javadoc)
31 | *
32 | * @see com.eharmony.matching.vw.webservice.core.VWPrediction#write(java.io.
33 | * OutputStream)
34 | */
35 | @Override
36 | public void write(OutputStream outputStream) throws IOException {
37 | outputStream.write(vwPrediction.getBytes(Charsets.UTF_8));
38 | outputStream.write(newlineBytes);
39 | }
40 |
41 | @Override
42 | public String getVWStringRepresentation() {
43 | return vwPrediction;
44 | }
45 |
46 | }
47 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/test/java/com/eharmony/matching/vw/webservice/example/StructuredExampleTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.example;
5 |
6 | import junit.framework.Assert;
7 |
8 | import org.junit.Test;
9 |
10 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample;
11 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.ExampleBuilder;
12 |
13 | /**
14 | * @author vrahimtoola
15 | *
16 | * Tests the StructuredExample class.
17 | */
18 | public class StructuredExampleTest {
19 |
20 | /*
21 | * Tests that a pipe example gets built properly.
22 | */
23 | @Test
24 | public void testPipeExampleCreation() {
25 |
26 | ExampleBuilder exampleBuilder = new ExampleBuilder();
27 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder();
28 |
29 | Assert.assertTrue(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE);
30 |
31 | exampleBuilder.clear();
32 | namespaceBuilder.clear();
33 |
34 | exampleBuilder.setLabel("some label");
35 |
36 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE);
37 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE);
38 |
39 | exampleBuilder.clear();
40 | namespaceBuilder.clear();
41 |
42 | StructuredExample.Namespace namespace = namespaceBuilder.build();
43 |
44 | exampleBuilder.addNamespace(namespace);
45 |
46 | Assert.assertTrue(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE);
47 |
48 | exampleBuilder.clear();
49 | namespaceBuilder.clear();
50 |
51 | //TODO: consider verifying that labels cannot contain spaces...?
52 | exampleBuilder.setLabel("some label");
53 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE);
54 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE);
55 |
56 | exampleBuilder.clear();
57 | namespaceBuilder.clear();
58 |
59 | namespaceBuilder.setName("some-namespace-name");
60 | namespaceBuilder.setScalingFactor(1.0f);
61 | exampleBuilder.addNamespace(namespaceBuilder.build());
62 |
63 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE);
64 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE);
65 |
66 | exampleBuilder.clear();
67 | namespaceBuilder.clear();
68 |
69 | namespaceBuilder.addFeature("someFeature", null);
70 |
71 | exampleBuilder.addNamespace(namespaceBuilder.build());
72 |
73 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.EMPTY_EXAMPLE);
74 | Assert.assertFalse(exampleBuilder.build() == StructuredExample.PIPE_EXAMPLE);
75 |
76 | }
77 |
78 | /*
79 | * Tests that an empty example returns the empty string when it's supposed
80 | * to.
81 | */
82 | @Test
83 | public void testEmptyExampleReturnsEmptyString() {
84 | Assert.assertEquals("", StructuredExample.EMPTY_EXAMPLE.getVWStringRepresentation());
85 | }
86 |
87 | /*
88 | * Tests that a PIPE example returns the pipe character when it's supposed
89 | * to.
90 | */
91 | @Test
92 | public void testPipeExampleReturnsPipeString() {
93 | Assert.assertEquals(" |", StructuredExample.PIPE_EXAMPLE.getVWStringRepresentation());
94 | }
95 |
96 | /*
97 | * Simple test to verify that basic example building works as expected.
98 | */
99 | @Test
100 | public void simpleExampleBuildingTest() {
101 |
102 | final String expectedOutput = "34 |one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2";
103 |
104 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder();
105 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder();
106 |
107 | exampleBuilder.setLabel("34");
108 |
109 | namespaceBuilder.setName("one");
110 | namespaceBuilder.addFeature("a", 12.34f);
111 | namespaceBuilder.addFeature("b", 45.1f);
112 |
113 | StructuredExample.Namespace firstNamespace = namespaceBuilder.build();
114 |
115 | namespaceBuilder.clear();
116 |
117 | namespaceBuilder.setName("two");
118 | namespaceBuilder.setScalingFactor(34.3f);
119 | namespaceBuilder.addFeature("bah", 0.038293f);
120 | namespaceBuilder.addFeature("another", 3.4000f);
121 | namespaceBuilder.addFeature("andThis", 2.0f);
122 |
123 | StructuredExample.Namespace secondNamespace = namespaceBuilder.build();
124 |
125 | exampleBuilder.addNamespace(firstNamespace);
126 | exampleBuilder.addNamespace(secondNamespace);
127 |
128 | //System.out.println(exampleBuilder.build().getVWStringRepresentation());
129 |
130 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation());
131 | }
132 |
133 | /*
134 | * Like the above, but tests the Tag feature as well (since it was added
135 | * later).
136 | */
137 | @Test
138 | public void simpleExampleBuildingTestWithTag() {
139 |
140 | String expectedOutput = "34 someTag|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2";
141 |
142 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder();
143 | StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder();
144 |
145 | exampleBuilder.setLabel("34");
146 | exampleBuilder.setTag("someTag");
147 |
148 | namespaceBuilder.setName("one");
149 | namespaceBuilder.addFeature("a", 12.34f);
150 | namespaceBuilder.addFeature("b", 45.1f);
151 |
152 | StructuredExample.Namespace firstNamespace = namespaceBuilder.build();
153 |
154 | namespaceBuilder.clear();
155 |
156 | namespaceBuilder.setName("two");
157 | namespaceBuilder.setScalingFactor(34.3f);
158 | namespaceBuilder.addFeature("bah", 0.038293f);
159 | namespaceBuilder.addFeature("another", 3.4000f);
160 | namespaceBuilder.addFeature("andThis", 2.0f);
161 |
162 | StructuredExample.Namespace secondNamespace = namespaceBuilder.build();
163 |
164 | exampleBuilder.addNamespace(firstNamespace);
165 | exampleBuilder.addNamespace(secondNamespace);
166 |
167 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation());
168 |
169 | //-----
170 | exampleBuilder.setLabel(null); //clear out just the label, leaving everything else as is
171 | expectedOutput = "someTag|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2";
172 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation());
173 | //-----
174 |
175 | //-----
176 | exampleBuilder.setTag(null); //clear out the tag as well, leaving just the namespace bit
177 | expectedOutput = "|one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2";
178 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation());
179 | //-----
180 |
181 | //-----
182 | exampleBuilder.setLabel("theLabel"); //set just the label
183 | expectedOutput = "theLabel |one a:12.34 b:45.1 |two:34.3 bah:0.038293 another:3.4 andThis:2";
184 | Assert.assertEquals(expectedOutput, exampleBuilder.build().getVWStringRepresentation());
185 | //-----
186 | }
187 |
188 | }
189 |
--------------------------------------------------------------------------------
/vw-webservice-common/src/test/resources/ner.train.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/eHarmony/vw-webservice/53b4cfa9d8872058a50fcaa9e11926a6bd73306c/vw-webservice-common/src/test/resources/ner.train.gz
--------------------------------------------------------------------------------
/vw-webservice-common/src/test/resources/vw_example_schema.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "http://json-schema.org/draft-04/schema#",
3 | "title": "VW Example",
4 | "type": "array",
5 | "description": "A stream of examples sent to the VW web service, which will submit them to VW",
6 | "items": {
7 | "type" : "object",
8 | "description": "An individual example in the stream",
9 | "properties": {
10 | "label": { "type": "string",
11 | "description": "The label for the example. Can be null/empty, but can contain anything",
12 | "minLength": 0
13 | },
14 | "tag": { "type": "string",
15 | "description": "The tag for the example. Can be null/empty, but can contain anything",
16 | "minLength": 0
17 | },
18 | "namespaces": { "type": "array",
19 | "description": "The list of namespaces in the example",
20 | "items": {
21 | "type": "object",
22 | "description": "An individual namespace inside an example",
23 | "properties": {
24 | "name": { "type": "string",
25 | "description": "The name of the namespace. Can be null/empty, but must not contain whitespace, | or :",
26 | "minLength": 0
27 | },
28 | "scale": { "type": "number",
29 | "description": "The scaling factor for the namespace, optional - defaults to 1 if not present - name must be present for this to be accepted"
30 | },
31 | "features": { "type": "array",
32 | "description": "The list of features in this namespace",
33 | "items": {
34 | "type": "object",
35 | "description": "An individual feature inside a namespace",
36 | "properties": {
37 | "name": { "type": "string",
38 | "description": "The name of the feature. Must be present",
39 | "minLength": 1
40 | },
41 | "value": { "type": "number",
42 | "description": "The value of the feature. Defaults to 1 if not present"
43 | }
44 | },
45 | "additionalProperties": false,
46 | "required": ["name"]
47 | },
48 | "minItems": 0,
49 | "uniqueItems": false,
50 | "additionalItems": false
51 | }
52 | },
53 | "minProperties": 0,
54 | "additionalProperties": false
55 |
56 | }
57 |
58 | },
59 | "minProperties": 0,
60 | "additionalProperties": false
61 | }
62 | },
63 | "minItems": 0,
64 | "uniqueItems": false,
65 | "additionalItems": false
66 |
67 | }
--------------------------------------------------------------------------------
/vw-webservice-core/README.md:
--------------------------------------------------------------------------------
1 | vw-webservice-core
2 | ==================
3 |
4 | The core components of the web service on the server side. This project doesn't refer to any specific web service framework such as Jersey or Restlet or whatever. It only has the code you would need if you were building a web service yourself using the framework of your choice.
5 |
--------------------------------------------------------------------------------
/vw-webservice-core/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 |
4 |
5 | com.eharmony.matching
6 | vw-webservice
7 | 0.1.0-SNAPSHOT
8 | ../../vw-webservice
9 |
10 |
11 |
12 |
13 | vw-webservice-core
14 |
15 | VW Web Service Core.
16 | The core components used when building a web service that wraps the VW daemon. Reference this project when you're building your own web service using the framework of your choice (eg, Jersey, Restlet, etc).
17 |
18 |
19 |
20 |
21 |
22 |
23 | com.eharmony.matching
24 | vw-webservice-common
25 | ${vw.webservice.version}
26 |
27 |
28 |
29 |
30 | org.slf4j
31 | slf4j-api
32 |
33 |
34 |
35 | ch.qos.logback
36 | logback-classic
37 |
38 |
39 |
40 | ch.qos.logback
41 | logback-core
42 |
43 |
44 |
45 |
46 |
47 | vw-webservice-core-${project.version}
48 |
49 |
50 |
51 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/ExampleReadException.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * Exception thrown when there's a problem reading examples submitted to
10 | * the web service.
11 | *
12 | * Need to make this a RuntimeException since an ExampleReadException
13 | * can be thrown from within an iterator, but the iterator interface
14 | * doesn't allow you to declare a throws clause in the 'next' method
15 | * signature in the implementation.
16 | */
17 | public class ExampleReadException extends RuntimeException {
18 |
19 | /**
20 | * The serial version UID.
21 | */
22 | private static final long serialVersionUID = -1744390625692646099L;
23 |
24 | public ExampleReadException() {
25 | super();
26 | }
27 |
28 | public ExampleReadException(String message) {
29 | super(message);
30 | }
31 |
32 | public ExampleReadException(Throwable cause) {
33 | super(cause);
34 | }
35 |
36 | public ExampleReadException(String message, Throwable cause) {
37 | super(message, cause);
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/ExamplesIterable.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core;
5 |
6 | import com.eharmony.matching.vw.webservice.common.example.Example;
7 |
8 | /**
9 | * @author vrahimtoola
10 | *
11 | * An iterable of examples to be submitted to VW. Also provides
12 | * attributes that describe the type/quantity of examples.
13 | *
14 | * The 'next()' method of the iterator may throw an ExampleReadException
15 | * if there's an issue reading in examples for any reason. Clients
16 | * iterating over this iterable can catch that exception and handle is
17 | * accordingly.
18 | */
19 | public interface ExamplesIterable extends Iterable {
20 |
21 | /*
22 | * Returns the number of examples, or Integer.MAX_VALUE if it's a stream of
23 | * examples. This value can be used by components further down the pipeline
24 | * to determine whether or not example submission should occur
25 | * synchronously.
26 | *
27 | * @returns The number of examples (if known) or Integer.MAX_VALUE if
28 | * they're being streamed in and the number of examples isn't known ahead of
29 | * time. This number can never be < 0.
30 | */
31 | int getNumberOfExamples();
32 |
33 | /*
34 | * Returns attributes used to describe the set of examples.
35 | */
36 | String getAttribute(String attributeKey);
37 | }
38 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/ExamplesIterableImpl.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.util.HashMap;
9 | import java.util.Iterator;
10 | import java.util.Map;
11 |
12 | import com.eharmony.matching.vw.webservice.common.example.Example;
13 |
14 | /**
15 | * @author vrahimtoola
16 | *
17 | * An implementation of ExamplesIterable.
18 | */
19 | public class ExamplesIterableImpl implements ExamplesIterable {
20 |
21 | private final Map attributesMap;
22 | private final Iterator exampleIterator;
23 | private final int numberOfExamples;
24 |
25 | public ExamplesIterableImpl(int numberOfExamples,
26 | Map theMapOfAttributes,
27 | Iterator exampleIterator) {
28 |
29 | checkNotNull(exampleIterator);
30 |
31 | this.numberOfExamples = numberOfExamples;
32 |
33 | if (theMapOfAttributes == null)
34 | attributesMap = new HashMap();
35 | else {
36 | attributesMap = theMapOfAttributes;
37 | }
38 |
39 | this.exampleIterator = exampleIterator;
40 | }
41 |
42 | public Iterator iterator() {
43 | return exampleIterator;
44 | }
45 |
46 | public String getAttribute(String attributeKey) {
47 | return attributesMap.get(attributeKey);
48 | }
49 |
50 | public int getNumberOfExamples() {
51 | return this.numberOfExamples;
52 | }
53 |
54 | }
55 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessingEventHandler.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException;
7 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
8 |
9 | /**
10 | * @author vrahimtoola
11 | *
12 | * Callbacks to be fired when the status of example processing
13 | * (submission/prediction fetching) changes.
14 | */
15 | public interface ExampleProcessingEventHandler {
16 |
17 | /*
18 | * Fired whenever there's an exception reading examples.
19 | *
20 | * @param exampleProcessingManager The example processing manager that can
21 | * be queried to find out more info about the example processing.
22 | *
23 | * @param theException The exception that occurred.
24 | */
25 | void onExampleReadException(ExampleProcessingManager exampleProcessingManager, ExampleReadException theException);
26 |
27 | /*
28 | * Fired whenever an invalid example is detected.
29 | *
30 | * @param exampleProcessingManager The example processing manager that can
31 | * be queried to find out more info about the example processing.
32 | *
33 | * @param theException The exception that occurred.
34 | */
35 | void onExampleFormatException(ExampleProcessingManager exampleProcessingManager, ExampleFormatException theException);
36 |
37 | /*
38 | * Fired whenever there's an exception submitting examples.
39 | *
40 | * @param exampleProcessingManager The example processing manager that can
41 | * be queried to find out more info about the example processing.
42 | *
43 | * @param theException The exception that occurred.
44 | */
45 | void onExampleSubmissionException(ExampleProcessingManager exampleProcessingManager, ExampleSubmissionException theException);
46 |
47 | /*
48 | * Fired when all examples have been submitted.
49 | *
50 | * @param exampleProcessingManager The example processing manager that can
51 | * be queried to find out more info about the example processing.
52 | */
53 | void onExampleSubmissionComplete(ExampleProcessingManager exampleProcessingManager);
54 |
55 | /*
56 | * Fired whenever there's an exception fetching predictions.
57 | *
58 | * @param exampleProcessingManager The example processing manager that can
59 | * be queried to find out more info about the example processing.
60 | *
61 | * @param theException The exception that occurred.
62 | */
63 | void onPredictionFetchException(ExampleProcessingManager exampleProcessingManager, PredictionFetchException theException);
64 |
65 | /*
66 | * Fired when all predictions have been fetched.
67 | *
68 | * @param exampleProcessingManager The example processing manager that can
69 | * be queried to find out more info about the example processing.
70 | */
71 | void onPredictionFetchComplete(ExampleProcessingManager exampleProcessingManager);
72 | }
73 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessingManager.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction;
7 |
8 | /**
9 | * @author vrahimtoola
10 | *
11 | * The example processing manager can be used to stop the example
12 | * submission process and retrieve a forward-only iterable of
13 | * predictions.
14 | */
15 | public interface ExampleProcessingManager {
16 |
17 | /*
18 | * Returns the iterable of predictions.
19 | *
20 | * @returns The iterable of predictions.
21 | */
22 | Iterable getPredictionsIterable();
23 |
24 | /*
25 | * Stops the example submission process, if it's still ongoing. If it has
26 | * already been stopped, has no effect. Prediction fetching will continue
27 | * until there are no more predictions to be fetched from VW, ie, the
28 | * iterable returned from 'getPredictionsIterable()' returns no more
29 | * predictions.
30 | */
31 | void stopAll();
32 |
33 | /*
34 | * Gets the total number of examples submitted thus far.
35 | *
36 | * @returns The total number of examples submitted thus far.
37 | */
38 | long getTotalNumberOfExamplesSubmitted();
39 |
40 | /*
41 | * Gets the total number of examples skipped thus far. An example can be
42 | * skipped if it's format is invalid, for instance.
43 | *
44 | * @returns The total number of skipped examples.
45 | */
46 | long getTotalNumberOfExamplesSkipped();
47 |
48 | /*
49 | * Gets the total number of predictions fetched from VW.
50 | *
51 | * @returns The total number of predictions fetched from VW.
52 | */
53 | long getTotalNumberOfPredictionsFetched();
54 |
55 | /*
56 | * Gets the current state of example submission.
57 | *
58 | * @returns The current example submission state.
59 | */
60 | ExampleSubmissionState getExampleSubmissionState();
61 |
62 | /*
63 | * Gets the current state of prediction fetching.
64 | *
65 | * @returns The current prediction fetching state.
66 | */
67 | PredictionFetchState getPredictionFetchState();
68 | }
69 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessor.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * Submits examples to VW.
10 | */
11 | public interface ExampleProcessor {
12 |
13 | /*
14 | * Kicks off the example submission process.
15 | *
16 | * @param callback A callback handler to handle various status changes as
17 | * examples are being processed.
18 | *
19 | * @returns An example processing manager that can be used to stop the
20 | * example submission process.
21 | *
22 | * The manner in which examples are submitted to VW will determine the
23 | * manner in which the predictions get fetched. i.e, if you submit examples
24 | * over TCP-IP, you get the predictions back over the same socket
25 | * connection, etc etc.
26 | */
27 | ExampleProcessingManager submitExamples(ExampleProcessingEventHandler callback) throws ExampleSubmissionException;
28 |
29 | /*
30 | * Returns features describing this example processor. The returned object
31 | * should never be null.
32 | *
33 | * @returns The example processor features provided by the example
34 | * processor.
35 | */
36 | ExampleProcessorFeatures getExampleProcessorFeatures();
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessorFactory.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
7 |
8 | /**
9 | * @author vrahimtoola
10 | *
11 | * Returns an ExampleProcessor.
12 | */
13 | public interface ExampleProcessorFactory {
14 |
15 | /*
16 | * Gets the example processor to use.
17 | *
18 | * @param theExamples The VW examples to be submitted.
19 | *
20 | * @returns The example processor.
21 | */
22 | ExampleProcessor getExampleProcessor(ExamplesIterable theExamples);
23 | }
24 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessorFeatures.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | import java.util.Map;
7 |
8 | /**
9 | * @author vrahimtoola
10 | *
11 | * Features and other stuff describing the example processor. Feature
12 | * keys cannot be null, but feature values may be null.
13 | */
14 | public interface ExampleProcessorFeatures {
15 |
16 | /*
17 | * Whether or not the example processor's submitExamples() method will
18 | * execute synchronously.
19 | *
20 | * @returns True if the example processor submits examples asynchronously,
21 | * false otherwise.
22 | */
23 | boolean isAsync();
24 |
25 | /*
26 | * Returns all the features applicable to this example processor.
27 | *
28 | * @returns All the features that this processor provides. None of the keys
29 | * can be null, but values may be. The types of the values should be
30 | * documented by the example processor. The returned map should never be
31 | * null, but can be empty.
32 | */
33 | Map getAllFeatures();
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleProcessorFeaturesImpl.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | import java.util.HashMap;
7 | import java.util.Map;
8 |
9 | /**
10 | * @author vrahimtoola
11 | *
12 | * A basic implementation of ExampleProcessorFeatures.
13 | */
14 | public class ExampleProcessorFeaturesImpl implements ExampleProcessorFeatures {
15 |
16 | private final Map featuresMap;
17 | private final boolean isAsync;
18 |
19 | public ExampleProcessorFeaturesImpl(boolean isAsync,
20 | Map featuresMap) {
21 |
22 | this.isAsync = isAsync;
23 | this.featuresMap = getShallowCopyOfMap(featuresMap);
24 | }
25 |
26 | /* (non-Javadoc)
27 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFeatures#getAllFeatures()
28 | */
29 | public Map getAllFeatures() {
30 | return getShallowCopyOfMap(featuresMap);
31 | }
32 |
33 |
34 | private Map getShallowCopyOfMap(
35 | Map mapToCopy) {
36 | Map copy = new HashMap();
37 |
38 | if (mapToCopy != null)
39 | for (Map.Entry entry : mapToCopy.entrySet()) {
40 | if (entry.getKey() != null) // skip over null keys.
41 | copy.put(entry.getKey(), entry.getValue());
42 | }
43 |
44 | return copy;
45 | }
46 |
47 | public boolean isAsync() {
48 | return isAsync;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleSubmissionException.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * Exception thrown when something bad happens while submitting examples
10 | * to VW, or before any examples have been submitted to VW.
11 | */
12 | public class ExampleSubmissionException extends Exception {
13 |
14 | /**
15 | * The serial version UID.
16 | */
17 | private static final long serialVersionUID = 5135330791227994409L;
18 |
19 | public ExampleSubmissionException() {
20 | super();
21 | }
22 |
23 | public ExampleSubmissionException(String message) {
24 | super(message);
25 | }
26 |
27 | public ExampleSubmissionException(Throwable cause) {
28 | super(cause);
29 | }
30 |
31 | public ExampleSubmissionException(String message, Throwable cause) {
32 | super(message, cause);
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/ExampleSubmissionState.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * The various states in which the example submission process can be.
10 | * This is the most recent state of the example submission process, so
11 | * it's possible for instance that there was an exampleformatexception,
12 | * and then an examplesubmissionexception but the client only gets to
13 | * observe the most recent state which is the example submission fault.
14 | * This depends on the implementation of the example submitter, it can
15 | * choose to stop submitting examples on an exampleformatexception or
16 | * not.
17 | */
18 | public enum ExampleSubmissionState {
19 |
20 | /*
21 | * All examples have been submitted, with no exceptions.
22 | */
23 | Complete,
24 |
25 | /*
26 | * Some exception occurred making it impossible to read more examples. No
27 | * more examples will be submitted.
28 | */
29 | ExampleReadFault,
30 |
31 | /*
32 | * Some exception occurred making it impossible to submit more examples to
33 | * VW. No more examples will be submitted.
34 | */
35 | ExampleSubmissionFault,
36 |
37 | /*
38 | * One or more examples were invalid. No more examples will be submitted.
39 | */
40 | ExampleFormatFault,
41 |
42 | /*
43 | * Examples are currently being submitted.
44 | */
45 | OnGoing,
46 |
47 | /*
48 | * Example submission was stopped for some reason.
49 | */
50 | Stopped
51 |
52 | }
53 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/PredictionFetchException.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * Exception thrown when something bad happens when reading predictions
10 | * from VW.
11 | */
12 | public class PredictionFetchException extends Exception {
13 |
14 | /**
15 | * The serial version UID.
16 | */
17 | private static final long serialVersionUID = -5193371328499134437L;
18 |
19 | public PredictionFetchException() {
20 | super();
21 | }
22 |
23 | public PredictionFetchException(String message) {
24 | super(message);
25 | }
26 |
27 | public PredictionFetchException(Throwable cause) {
28 | super(cause);
29 | }
30 |
31 | public PredictionFetchException(String message, Throwable cause) {
32 | super(message, cause);
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/PredictionFetchState.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor;
5 |
6 | /**
7 | * @author vrahimtoola
8 | *
9 | * The states in which the prediction fetch process can be.
10 | */
11 | public enum PredictionFetchState {
12 |
13 | /*
14 | * All predictions fetched with no exceptions.
15 | */
16 | Complete,
17 |
18 | /*
19 | * Some exception occurred when reading predictions from VW. No more
20 | * predictions will be read from VW.
21 | */
22 | PredictionFetchFault,
23 |
24 | /*
25 | * An exception occurred when sending predictions back to the client. No
26 | * more predictions will be sent. Typically example submission will also be
27 | * stopped.
28 | */
29 | PredictionWriteFault,
30 |
31 | /*
32 | * Predictions are currently being fetched.
33 | */
34 | OnGoing
35 | }
36 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/AsyncFailFastTCPIPExampleProcessor.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import java.io.BufferedWriter;
7 | import java.io.IOException;
8 | import java.io.OutputStream;
9 | import java.io.OutputStreamWriter;
10 | import java.net.Socket;
11 | import java.util.concurrent.Callable;
12 | import java.util.concurrent.ExecutorService;
13 |
14 | import org.slf4j.LoggerFactory;
15 |
16 | import com.eharmony.matching.vw.webservice.common.example.Example;
17 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException;
18 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
19 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler;
20 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager;
21 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessor;
22 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFeatures;
23 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFeaturesImpl;
24 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionException;
25 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionState;
26 | import com.google.common.base.Charsets;
27 |
28 | /**
29 | * @author vrahimtoola
30 | *
31 | * An asynchronous, fail fast example processor to submit examples to VW
32 | * over a TCP IP socket.
33 | *
34 | * Making this package-private for now.
35 | */
36 | class AsyncFailFastTCPIPExampleProcessor implements ExampleProcessor {
37 |
38 | private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(AsyncFailFastTCPIPExampleProcessor.class);
39 |
40 | private final ExecutorService executorService;
41 | private final TCPIPSocketFactory socketFactory;
42 | private final Iterable examples;
43 |
44 | public AsyncFailFastTCPIPExampleProcessor(TCPIPSocketFactory socketFactory, ExecutorService executorService, Iterable examples) {
45 |
46 | this.executorService = executorService;
47 | this.socketFactory = socketFactory;
48 | this.examples = examples;
49 | }
50 |
51 | public ExampleProcessingManager submitExamples(final ExampleProcessingEventHandler callback) throws ExampleSubmissionException {
52 |
53 | try {
54 | final Socket socket = socketFactory.getSocket();
55 |
56 | final TCPIPExampleProcessingManager exampleProcessingManager = new TCPIPExampleProcessingManager(socket, callback);
57 |
58 | executorService.submit(new Callable() {
59 |
60 | public Void call() {
61 |
62 | OutputStream outputStream;
63 |
64 | boolean faulted = false;
65 |
66 | boolean stoppedPrematurely = false;
67 |
68 | BufferedWriter writer = null;
69 |
70 | long numExamplesSent = 0;
71 |
72 | try {
73 |
74 | outputStream = socket.getOutputStream();
75 |
76 | LOGGER.info("Starting to submit examples to VW...");
77 |
78 | writer = new BufferedWriter(new OutputStreamWriter(outputStream, Charsets.UTF_8));
79 |
80 | for (Example example : examples) {
81 |
82 | String toWrite = null;
83 |
84 | try {
85 | toWrite = example.getVWStringRepresentation();
86 | writer.write(toWrite);
87 | writer.newLine();
88 |
89 | numExamplesSent++;
90 |
91 | if (numExamplesSent == 1) LOGGER.debug("First example: {}", toWrite);
92 |
93 | exampleProcessingManager.incrementNumberOfExamplesSubmitted();
94 |
95 | LOGGER.trace("Submitted example #{}: {}", numExamplesSent, toWrite);
96 | }
97 | catch (ExampleFormatException e) {
98 |
99 | exampleProcessingManager.incrementNumberOfExamplesSkipped();
100 | if (callback != null) callback.onExampleFormatException(exampleProcessingManager, e);
101 |
102 | }
103 |
104 | if (exampleProcessingManager.isStopped()) {
105 | LOGGER.warn("Example submission process was stopped for some reason!");
106 | stoppedPrematurely = true;
107 | break;
108 | }
109 | }
110 |
111 | if (!stoppedPrematurely) LOGGER.info("All examples submitted to VW!");
112 |
113 | LOGGER.info("Sent a total of {} examples to VW", numExamplesSent);
114 |
115 | }
116 | catch (ExampleReadException e) {
117 |
118 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleReadFault);
119 |
120 | if (callback != null) callback.onExampleReadException(exampleProcessingManager, e);
121 |
122 | LOGGER.error("ExampleReadException in ExampleSubmitter: {}", e.getMessage(), e);
123 |
124 | faulted = true;
125 | }
126 | catch (Exception e) {
127 |
128 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleSubmissionFault);
129 |
130 | if (callback != null) callback.onExampleSubmissionException(exampleProcessingManager, new ExampleSubmissionException(e));
131 |
132 | LOGGER.error("Other Exception in ExampleSubmitter: {}", e.getMessage(), e);
133 |
134 | faulted = true;
135 | }
136 | finally {
137 |
138 | if (writer != null) try {
139 | writer.flush(); //make sure that anything buffered by the bufferedwriter is flushed to the underlying stream
140 | }
141 | catch (IOException e) {
142 |
143 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleSubmissionFault);
144 |
145 | if (callback != null) callback.onExampleSubmissionException(exampleProcessingManager, new ExampleSubmissionException(e));
146 |
147 | LOGGER.error("IOException when closing example writer in ExampleProcessor: {}", e.getMessage(), e);
148 |
149 | faulted = true;
150 | }
151 |
152 | if (socket != null) try {
153 |
154 | socket.shutdownOutput();
155 | }
156 | catch (IOException e2) {
157 |
158 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.ExampleSubmissionFault);
159 |
160 | if (callback != null) callback.onExampleSubmissionException(exampleProcessingManager, new ExampleSubmissionException(e2));
161 |
162 | LOGGER.error("IOException when shutting down socket output in ExampleProcessor: {}", e2.getMessage(), e2);
163 |
164 | faulted = true;
165 | }
166 |
167 | if (faulted == false) {
168 | if (stoppedPrematurely == false)
169 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.Complete);
170 | else {
171 | exampleProcessingManager.setExampleSubmissionState(ExampleSubmissionState.Stopped);
172 | }
173 | }
174 |
175 | if (callback != null) callback.onExampleSubmissionComplete(exampleProcessingManager);
176 |
177 | }
178 |
179 | return null;
180 | }
181 |
182 | });
183 |
184 | return exampleProcessingManager;
185 | }
186 | catch (Exception e1) {
187 |
188 | LOGGER.error("Exception in submitExamples(): {}", e1.getMessage());
189 |
190 | throw new ExampleSubmissionException(e1);
191 | }
192 |
193 | }
194 |
195 | public ExampleProcessorFeatures getExampleProcessorFeatures() {
196 |
197 | return new ExampleProcessorFeaturesImpl(true, null);
198 | }
199 |
200 | }
201 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPExampleProcessingManager.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import java.io.IOException;
7 | import java.net.Socket;
8 | import java.util.Iterator;
9 |
10 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction;
11 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler;
12 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager;
13 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionState;
14 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchState;
15 |
16 | /**
17 | * @author vrahimtoola An implementation of ExampleProcessingManager for use by
18 | * the AsyncFailFastTCPIPExampleProcessor.
19 | */
20 | class TCPIPExampleProcessingManager implements ExampleProcessingManager {
21 |
22 | private long numExamplesSubmitted, numExamplesSkipped, numPredictionsFetched;
23 | private ExampleSubmissionState exampleSubmissionState = ExampleSubmissionState.OnGoing;
24 | private final TCPIPPredictionsIterator predictionsIterator;
25 |
26 | private boolean isStopped = false;
27 |
28 | public TCPIPExampleProcessingManager(Socket socket, ExampleProcessingEventHandler callback) throws IOException {
29 | this.predictionsIterator = new TCPIPPredictionsIterator(socket, callback, this);
30 | }
31 |
32 | /*
33 | * (non-Javadoc)
34 | *
35 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
36 | * ExampleProcessingManager#getPredictionsIterable()
37 | */
38 | public Iterable getPredictionsIterable() {
39 |
40 | return new Iterable() {
41 |
42 | public Iterator iterator() {
43 | return predictionsIterator;
44 | }
45 | };
46 | }
47 |
48 | /*
49 | * (non-Javadoc)
50 | *
51 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
52 | * ExampleProcessingManager#stopAll()
53 | */
54 | public synchronized void stopAll() {
55 |
56 | isStopped = true;
57 | }
58 |
59 | /*
60 | * (non-Javadoc)
61 | *
62 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
63 | * ExampleProcessingManager#getTotalNumberOfExamplesSubmitted()
64 | */
65 | public synchronized long getTotalNumberOfExamplesSubmitted() {
66 | return numExamplesSubmitted;
67 | }
68 |
69 | /*
70 | * (non-Javadoc)
71 | *
72 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
73 | * ExampleProcessingManager#getTotalNumberOfExamplesSkipped()
74 | */
75 | public synchronized long getTotalNumberOfExamplesSkipped() {
76 | return numExamplesSkipped;
77 | }
78 |
79 | /*
80 | * (non-Javadoc)
81 | *
82 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
83 | * ExampleProcessingManager#getExampleSubmissionState()
84 | */
85 | public synchronized ExampleSubmissionState getExampleSubmissionState() {
86 | return exampleSubmissionState;
87 | }
88 |
89 | /*
90 | * (non-Javadoc)
91 | *
92 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
93 | * ExampleProcessingManager#getPredictionFetchState()
94 | */
95 | public PredictionFetchState getPredictionFetchState() {
96 |
97 | return predictionsIterator.getPredictionFetchState();
98 | }
99 |
100 | public synchronized void incrementNumberOfExamplesSubmitted() {
101 | numExamplesSubmitted++;
102 | }
103 |
104 | public synchronized void incrementNumberOfExamplesSkipped() {
105 | numExamplesSkipped++;
106 | }
107 |
108 | public synchronized void incrementNumberOfPredictionsFetched() {
109 | numPredictionsFetched++;
110 | }
111 |
112 | public synchronized void setExampleSubmissionState(ExampleSubmissionState newState) {
113 | exampleSubmissionState = newState;
114 | }
115 |
116 | public synchronized boolean isStopped() {
117 | return isStopped;
118 | }
119 |
120 | public synchronized long getTotalNumberOfPredictionsFetched() {
121 | return numPredictionsFetched;
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPExampleProcessorFactory.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.util.concurrent.ExecutorService;
9 |
10 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
11 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessor;
12 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFactory;
13 |
14 | /**
15 | * @author vrahimtoola
16 | *
17 | * A factory that returns example processors that submit examples to VW
18 | * over a TCP-IP socket and read predictions back the same way.
19 | */
20 | public class TCPIPExampleProcessorFactory implements ExampleProcessorFactory {
21 |
22 | private final TCPIPSocketFactory socketFactory;
23 |
24 | /*
25 | * An application wide thread pool service.
26 | */
27 | private final ExecutorService executorService;
28 |
29 | public TCPIPExampleProcessorFactory(TCPIPSocketFactory socketFactory,
30 | ExecutorService executorService) {
31 |
32 | checkNotNull(socketFactory, "A null socket factory cannot be provided!");
33 | checkNotNull(executorService, "A null executor service cannot be provided!");
34 |
35 | this.socketFactory = socketFactory;
36 | this.executorService = executorService;
37 | }
38 |
39 | /*
40 | * (non-Javadoc)
41 | *
42 | * @see com.eharmony.matching.vw.webservice.core.exampleprocessor.
43 | * ExampleProcessorFactory#getExampleSubmitter(java.lang.Iterable,
44 | * java.util.EnumSet)
45 | */
46 | public ExampleProcessor getExampleProcessor(ExamplesIterable theExamples) {
47 |
48 | // TODO: return a proper example submitter based on the provided
49 | // examples iterable by examining its attributes.
50 |
51 | // returning the TCP IP async submitter for now.
52 | return new AsyncFailFastTCPIPExampleProcessor(socketFactory, executorService, theExamples);
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPPredictionsIterator.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import java.io.BufferedReader;
7 | import java.io.IOException;
8 | import java.io.InputStreamReader;
9 | import java.net.Socket;
10 | import java.util.Iterator;
11 |
12 | import org.slf4j.Logger;
13 | import org.slf4j.LoggerFactory;
14 |
15 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction;
16 | import com.eharmony.matching.vw.webservice.common.prediction.StringPrediction;
17 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler;
18 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchException;
19 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchState;
20 |
21 | /**
22 | * @author vrahimtoola
23 | *
24 | * Reads predictions from VW over a TCP-IP socket.
25 | *
26 | * TODO: make it so that the example submitter doesn't start submitting
27 | * examples until someone actually starts to read predictions. TODO:
28 | * test what happens if the prediction fetcher closes the socket before
29 | * all examples have been submitted.
30 | */
31 | class TCPIPPredictionsIterator implements Iterator {
32 |
33 | private static final Logger LOGGER = LoggerFactory.getLogger(TCPIPPredictionsIterator.class);
34 |
35 | private final Socket socket;
36 | private final BufferedReader reader;
37 | private final ExampleProcessingEventHandler callback;
38 | private final TCPIPExampleProcessingManager exampleProcessingManager;
39 |
40 | private String nextLineToReturn = null;
41 | private PredictionFetchState predictionFetchState = PredictionFetchState.OnGoing;
42 |
43 | private boolean firstCallToHasNext = true;
44 |
45 | public TCPIPPredictionsIterator(Socket socket, ExampleProcessingEventHandler callback, TCPIPExampleProcessingManager exampleProcessingManager) throws IOException {
46 |
47 | this.reader = new BufferedReader(new InputStreamReader(socket.getInputStream()));
48 | this.callback = callback;
49 | this.socket = socket;
50 | this.exampleProcessingManager = exampleProcessingManager;
51 | }
52 |
53 | public boolean hasNext() {
54 |
55 | if (firstCallToHasNext) {
56 |
57 | LOGGER.debug("First call to advance in TCP IP iterator!");
58 |
59 | advance(); // don't want to call this in the constructor because
60 | // that could block.
61 |
62 | firstCallToHasNext = false;
63 |
64 | }
65 |
66 | return nextLineToReturn != null;
67 | }
68 |
69 | public Prediction next() {
70 | String toReturn = nextLineToReturn;
71 |
72 | advance();
73 |
74 | return new StringPrediction(toReturn);
75 | }
76 |
77 | public void remove() {
78 | throw new UnsupportedOperationException("The 'remove' operation is not supported!");
79 | }
80 |
81 | private void advance() {
82 |
83 | boolean closeReader = false;
84 | boolean faulted = false;
85 | try {
86 |
87 | nextLineToReturn = reader.readLine();
88 |
89 | LOGGER.trace("Read prediction: {}", nextLineToReturn);
90 |
91 | closeReader = nextLineToReturn == null;
92 |
93 | if (nextLineToReturn != null) exampleProcessingManager.incrementNumberOfPredictionsFetched();
94 |
95 | }
96 | catch (Exception e) {
97 |
98 | LOGGER.error("Error in TCPIPPredictionIterator: {}", e.getMessage(), e);
99 |
100 | faulted = true;
101 |
102 | closeReader = true;
103 |
104 | setPredictionFetchState(PredictionFetchState.PredictionFetchFault);
105 |
106 | if (callback != null) callback.onPredictionFetchException(exampleProcessingManager, new PredictionFetchException(e));
107 |
108 | }
109 | finally {
110 |
111 | if (closeReader) {
112 | try {
113 | if (socket.isClosed() == false) reader.close();
114 | }
115 | catch (Exception e2) {
116 | LOGGER.warn("Failed to close the reader in predictions iterator: {}", e2.getMessage(), e2);
117 | }
118 |
119 | if (socket.isClosed() == false) try {
120 | socket.close();
121 | }
122 | catch (Exception e2) {
123 | LOGGER.warn("Failed to close the socket in predictions iterator: {}", e2.getMessage(), e2);
124 | }
125 |
126 | nextLineToReturn = null; // need to set this explicitly, since
127 | // an exception may have
128 | // occurred
129 | // necessitating the closing of the
130 | // reader.
131 |
132 | if (!faulted)
133 | setPredictionFetchState(PredictionFetchState.Complete);
134 | else {
135 | //faulted, so halt the example submission process
136 | LOGGER.warn("Stopping example submission from within the TCP IP predictions iterator...");
137 | exampleProcessingManager.stopAll();
138 |
139 | //if faulted, the prediction fetch state will already have been set in the exception handling code.
140 | }
141 |
142 | if (callback != null) callback.onPredictionFetchComplete(exampleProcessingManager);
143 | }
144 | }
145 | }
146 |
147 | private synchronized void setPredictionFetchState(PredictionFetchState predictionFetchState) {
148 | this.predictionFetchState = predictionFetchState;
149 | }
150 |
151 | public synchronized PredictionFetchState getPredictionFetchState() {
152 | return predictionFetchState;
153 | }
154 |
155 | }
156 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPSocketFactory.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import java.io.IOException;
7 | import java.net.Socket;
8 | import java.net.UnknownHostException;
9 |
10 | /**
11 | * @author vrahimtoola
12 | *
13 | * Returns a TCP IP socket that can be used for communicating with a VW
14 | * daemon. This abstraction has been added to facilitate testing of the
15 | * TCP IP example submitters and prediction fetchers.
16 | */
17 | public interface TCPIPSocketFactory {
18 |
19 | /*
20 | * Returns a socket connection to a running VW daemon.
21 | *
22 | * @returns A TCP IP socket that can be used for communicating with a VW
23 | * daemon. Note that the caller owns this socket and is responsible for any
24 | * cleanup (ie, shutting it down when done).
25 | */
26 | Socket getSocket() throws UnknownHostException, IOException;
27 | }
28 |
--------------------------------------------------------------------------------
/vw-webservice-core/src/main/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/TCPIPSocketFactoryImpl.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import static com.google.common.base.Preconditions.checkArgument;
7 |
8 | import java.io.IOException;
9 | import java.net.Socket;
10 | import java.net.UnknownHostException;
11 |
12 | import org.apache.commons.lang3.StringUtils;
13 | import org.slf4j.Logger;
14 | import org.slf4j.LoggerFactory;
15 |
16 | /**
17 | * @author vrahimtoola
18 | *
19 | * A basic implementation of the TCPIPSocketFactory interface.
20 | */
21 | public class TCPIPSocketFactoryImpl implements TCPIPSocketFactory {
22 |
23 | private static final Logger LOGGER = LoggerFactory.getLogger(TCPIPSocketFactoryImpl.class);
24 |
25 | private final String vwHost;
26 | private final int vwPort;
27 |
28 | public TCPIPSocketFactoryImpl(String vwHost, int vwPort) {
29 |
30 | checkArgument(StringUtils.isBlank(vwHost) == false, "The hostname for VW must be provided!");
31 | checkArgument(vwPort > 0, "Invalid port specified for VW!");
32 |
33 | this.vwHost = vwHost;
34 | this.vwPort = vwPort;
35 | }
36 |
37 | public Socket getSocket() throws UnknownHostException, IOException {
38 |
39 | LOGGER.debug("Returning socket for host: {} and port: {}", vwHost, vwPort);
40 |
41 | return new Socket(vwHost, vwPort);
42 | }
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/README.md:
--------------------------------------------------------------------------------
1 | vw-webservice-jersey
2 | ==================
3 |
4 | An implementation of the VW web service that uses Jersey to create a RESTful web service.
5 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 | 4.0.0
5 |
6 |
7 | com.eharmony.matching
8 | vw-webservice
9 | 0.1.0-SNAPSHOT
10 | ../../vw-webservice
11 |
12 |
13 | vw-webservice-jersey
14 | war
15 | VW Web Service (Jersey).
16 | Jersey based web service.
17 |
18 |
19 | 2.5.1
20 |
21 |
22 |
23 |
29 |
30 |
31 | vw-webservice-jersey-${project.version}
32 |
33 |
34 |
35 | org.apache.maven.plugins
36 | maven-compiler-plugin
37 |
38 |
39 |
40 | org.apache.maven.plugins
41 | maven-surefire-plugin
42 |
43 |
44 |
45 | org.apache.maven.plugins
46 | maven-war-plugin
47 | 2.1.1
48 |
49 | src/main/webapp/WEB-INF/web.xml
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 | org.glassfish.jersey
60 | jersey-bom
61 | ${jersey.version}
62 | pom
63 | import
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 | com.eharmony.matching
72 | vw-webservice-core
73 | ${project.version}
74 |
75 |
76 |
77 | com.eharmony.matching
78 | vw-webservice-common
79 | ${project.version}
80 |
81 |
82 |
83 | org.glassfish.jersey.containers
84 |
86 |
87 | jersey-container-servlet
88 |
89 |
90 |
94 |
95 |
96 |
97 | junit
98 | junit
99 |
100 |
101 |
102 | org.mockito
103 | mockito-all
104 |
105 |
106 |
107 |
108 |
109 |
114 |
115 |
116 |
117 |
118 | org.apache.commons
119 | commons-lang3
120 |
121 |
122 |
123 |
124 | org.springframework
125 | spring-core
126 |
127 |
128 | org.springframework
129 | spring-beans
130 |
131 |
132 | org.springframework
133 | spring-context
134 |
135 |
136 | org.springframework
137 | spring-context-support
138 |
139 |
140 | org.springframework
141 | spring-expression
142 |
143 |
144 |
145 |
146 | org.glassfish.jersey.ext
147 | jersey-spring3
148 | ${jersey.version}
149 |
150 |
151 |
152 |
153 | org.slf4j
154 | slf4j-api
155 |
156 |
157 |
158 | ch.qos.logback
159 | logback-classic
160 |
161 |
162 |
163 | ch.qos.logback
164 | logback-core
165 |
166 |
167 |
168 |
169 | com.google.guava
170 | guava
171 |
172 |
173 |
174 |
175 | com.google.code.gson
176 | gson
177 |
178 |
179 |
180 |
181 | com.fasterxml.jackson.core
182 | jackson-core
183 |
184 |
185 |
186 |
187 | commons-collections
188 | commons-collections
189 |
190 |
191 |
192 |
193 | com.ning
194 | async-http-client
195 | 1.7.22
196 | test
197 |
198 |
199 |
200 |
201 |
202 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/PredictResource.java:
--------------------------------------------------------------------------------
1 | package com.eharmony.matching.vw.webservice;
2 |
3 | import static com.google.common.base.Preconditions.checkNotNull;
4 |
5 | import java.io.IOException;
6 | import java.util.concurrent.ExecutorService;
7 |
8 | import javax.ws.rs.Consumes;
9 | import javax.ws.rs.GET;
10 | import javax.ws.rs.POST;
11 | import javax.ws.rs.Path;
12 | import javax.ws.rs.Produces;
13 | import javax.ws.rs.core.MediaType;
14 |
15 | import org.glassfish.jersey.server.ChunkedOutput;
16 | import org.slf4j.Logger;
17 | import org.slf4j.LoggerFactory;
18 | import org.springframework.beans.factory.annotation.Autowired;
19 |
20 | import com.eharmony.matching.vw.webservice.common.example.Example;
21 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes;
22 | import com.eharmony.matching.vw.webservice.common.prediction.PredictionMediaTypes;
23 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
24 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFactory;
25 |
26 | /**
27 | * Root resource (exposed at "predict" path)
28 | */
29 | @Path("/predict")
30 | public class PredictResource {
31 |
32 | private final ExampleProcessorFactory exampleProcessorFactory;
33 |
34 | private final ExecutorService executorService;
35 |
36 | private static final Logger LOGGER = LoggerFactory.getLogger(PredictResource.class);
37 |
38 | @Autowired
39 | public PredictResource(ExecutorService executorService, ExampleProcessorFactory exampleProcessorFactory) {
40 |
41 | checkNotNull(exampleProcessorFactory, "An example processor factory must be provided!");
42 |
43 | this.exampleProcessorFactory = exampleProcessorFactory;
44 |
45 | this.executorService = executorService;
46 |
47 | }
48 |
49 | @POST
50 | @Consumes({ ExampleMediaTypes.PLAINTEXT_0_1_0, MediaType.TEXT_PLAIN, ExampleMediaTypes.SIMPLE_PROTOBUF_0_1_0, ExampleMediaTypes.SIMPLE_JSON_0_1_0, ExampleMediaTypes.STRUCTURED_JSON_0_1_0 })
51 | @Produces({ PredictionMediaTypes.PLAINTEXT_0_1_0 })
52 | @Path("/main")
53 | public ChunkedOutput doPredict(ExamplesIterable examplesIterable) throws IOException {
54 |
55 | return new RequestHandler(executorService, exampleProcessorFactory).handleRequest(examplesIterable);
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/RequestHandler.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice;
5 |
6 | import java.io.IOException;
7 | import java.util.concurrent.ExecutorService;
8 |
9 | import javax.ws.rs.WebApplicationException;
10 |
11 | import org.glassfish.jersey.server.ChunkedOutput;
12 | import org.slf4j.Logger;
13 | import org.slf4j.LoggerFactory;
14 |
15 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException;
16 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction;
17 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
18 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
19 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler;
20 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager;
21 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessor;
22 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessorFactory;
23 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionException;
24 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchException;
25 |
26 | /**
27 | * @author vrahimtoola
28 | *
29 | * Handles an individual request to submit examples to VW and read back
30 | * the predictions.
31 | */
32 | class RequestHandler implements ExampleProcessingEventHandler {
33 |
34 | private final ExampleProcessorFactory exampleProcessorFactory;
35 |
36 | private final Logger LOGGER = LoggerFactory.getLogger(RequestHandler.class);
37 |
38 | private final ExecutorService executorService;
39 |
40 | public RequestHandler(ExecutorService executorService, ExampleProcessorFactory exampleProcessorFactory) {
41 |
42 | this.exampleProcessorFactory = exampleProcessorFactory;
43 | this.executorService = executorService;
44 | }
45 |
46 | public ChunkedOutput handleRequest(ExamplesIterable examplesIterable) {
47 |
48 | ChunkedOutput chunkedOutput = new ChunkedOutput(String.class);
49 |
50 | // get the example processor.
51 | ExampleProcessor exampleProcessor = exampleProcessorFactory.getExampleProcessor(examplesIterable);
52 |
53 | if (exampleProcessor.getExampleProcessorFeatures().isAsync() == false)
54 | submitSynchronously(exampleProcessor, chunkedOutput);
55 | else {
56 | submitAsynchronously(exampleProcessor, chunkedOutput);
57 | }
58 |
59 | return chunkedOutput;
60 | }
61 |
62 | private void submitSynchronously(final ExampleProcessor exampleProcessor, ChunkedOutput chunkedOutput) {
63 |
64 | final ExampleProcessingEventHandler eventHandler = this;
65 |
66 | long numPredictionsWritten = 0;
67 |
68 | Iterable predictions = null;
69 |
70 | ExampleProcessingManager exampleProcessingManager = null;
71 |
72 | try {
73 |
74 | LOGGER.info("About to submit examples...");
75 |
76 | // note: depending on the example submitter in use,
77 | // the call to submitExamples could spawn off a separate
78 | // thread to submit examples to VW.
79 | exampleProcessingManager = exampleProcessor.submitExamples(eventHandler);
80 |
81 | predictions = exampleProcessingManager.getPredictionsIterable();
82 |
83 | for (Prediction p : predictions) {
84 |
85 | try {
86 | String toWrite = p.getVWStringRepresentation() + "\n";
87 |
88 | LOGGER.trace("Writing prediction: {}", toWrite);
89 |
90 | chunkedOutput.write(toWrite);
91 |
92 | numPredictionsWritten++;
93 | }
94 | catch (IOException e) {
95 | LOGGER.error("IOException when writing out prediction! Message: {}", e.getMessage(), e);
96 | throw new WebApplicationException(e); //nothing we can do if we can't send any data back to the client!
97 | }
98 | }
99 |
100 | LOGGER.info("Submitted a total of {} examples", exampleProcessingManager.getTotalNumberOfExamplesSubmitted());
101 | LOGGER.info("Skipped a total of {} examples", exampleProcessingManager.getTotalNumberOfExamplesSkipped());
102 | LOGGER.info("Read a total of {} predictions from VW", exampleProcessingManager.getTotalNumberOfPredictionsFetched());
103 | LOGGER.info("Wrote a total of {} predictions", numPredictionsWritten);
104 | LOGGER.info("Final example submission state: {}", exampleProcessingManager.getExampleSubmissionState());
105 | LOGGER.info("Final prediction fetch state: {}", exampleProcessingManager.getPredictionFetchState());
106 |
107 | }
108 | catch (ExampleSubmissionException e) {
109 |
110 | LOGGER.error("Exception when submitting examples! Message: {}", e.getMessage(), e);
111 |
112 | //output.write(("Exception when submitting examples! Message: " + e.getMessage()).getBytes());
113 | }
114 | catch (Exception e) {
115 | //if any other exception occurs, stop the example submission process.
116 | LOGGER.error("Other exception when reading predictions: {}", e.getMessage(), e);
117 |
118 | if (exampleProcessingManager != null) {
119 | LOGGER.info("Stopping example submission...");
120 | exampleProcessingManager.stopAll();
121 | LOGGER.info("Example submission stopped.");
122 | }
123 | else {
124 | LOGGER.warn("Example processing manager was null!");
125 | }
126 | }
127 | finally {
128 | try {
129 | chunkedOutput.close();
130 | }
131 | catch (Exception e2) {
132 |
133 | LOGGER.error("Exception when flushing output stream of predictions! Message: {}", e2.getMessage(), e2);
134 | }
135 |
136 | }
137 |
138 | }
139 |
140 | private void submitAsynchronously(final ExampleProcessor exampleSubmitter, final ChunkedOutput chunkedOutput) {
141 |
142 | executorService.submit(new Runnable() {
143 |
144 | @Override
145 | public void run() {
146 |
147 | submitSynchronously(exampleSubmitter, chunkedOutput);
148 |
149 | }
150 |
151 | });
152 |
153 | }
154 |
155 | @Override
156 | public void onExampleReadException(ExampleProcessingManager exampleProcessingManager, ExampleReadException theException) {
157 | LOGGER.error("Example read exception: {}", theException.getMessage(), theException);
158 | }
159 |
160 | @Override
161 | public void onExampleFormatException(ExampleProcessingManager exampleProcessingManager, ExampleFormatException theException) {
162 | LOGGER.warn("Example format exception: {}", theException.getMessage(), theException);
163 |
164 | }
165 |
166 | @Override
167 | public void onExampleSubmissionException(ExampleProcessingManager exampleProcessingManager, ExampleSubmissionException theException) {
168 | LOGGER.error("Example submission exception: {}", theException.getMessage(), theException);
169 |
170 | }
171 |
172 | @Override
173 | public void onExampleSubmissionComplete(ExampleProcessingManager exampleProcessingManager) {
174 | LOGGER.info("Example submission complete!");
175 |
176 | }
177 |
178 | @Override
179 | public void onPredictionFetchException(ExampleProcessingManager exampleProcessingManager, PredictionFetchException theException) {
180 | LOGGER.error("Prediction fetch exception: {}", theException.getMessage(), theException);
181 |
182 | }
183 |
184 | @Override
185 | public void onPredictionFetchComplete(ExampleProcessingManager exampleProcessingManager) {
186 | LOGGER.info("Prediction fetch complete!");
187 |
188 | }
189 | }
190 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/GsonJsonExamplesProvider.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.io.InputStreamReader;
9 | import java.util.Iterator;
10 |
11 | import com.eharmony.matching.vw.webservice.common.example.Example;
12 | import com.eharmony.matching.vw.webservice.common.example.StringExample;
13 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
14 | import com.google.common.collect.AbstractIterator;
15 | import com.google.gson.stream.JsonReader;
16 |
17 | /**
18 | * @author vrahimtoola
19 | *
20 | * Uses Google's GSON to provide json examples.
21 | */
22 | public class GsonJsonExamplesProvider implements JsonExamplesProvider {
23 |
24 | @Override
25 | public Iterator getExamplesFromStream(InputStream inputStream) throws ExampleReadException {
26 |
27 | final JsonReader jsonReader = new JsonReader(new InputStreamReader(inputStream));
28 |
29 | AbstractIterator theIterator = new AbstractIterator() {
30 |
31 | private boolean readStartOfArray = false;
32 |
33 | @Override
34 | public Example computeNext() {
35 |
36 | try {
37 |
38 | if (!readStartOfArray) {
39 | jsonReader.beginArray();
40 | readStartOfArray = true;
41 | }
42 |
43 | if (jsonReader.hasNext()) {
44 |
45 | return readIndividualJsonExample(jsonReader);
46 |
47 | }
48 | else {
49 | jsonReader.endArray();
50 | return endOfData();
51 | }
52 | }
53 | catch (Exception e) {
54 | throw new ExampleReadException(e);
55 | }
56 |
57 | }
58 | };
59 |
60 | return theIterator;
61 | }
62 |
63 | private Example readIndividualJsonExample(JsonReader reader) throws IOException {
64 | reader.beginObject();
65 |
66 | String exampleString = null;
67 |
68 | while (reader.hasNext()) {
69 | String propertyName = reader.nextName();
70 |
71 | if (propertyName.equalsIgnoreCase("example")) {
72 |
73 | if (exampleString != null) throw new ExampleReadException("The property 'example' was found more than once in a single JSON example!");
74 |
75 | exampleString = reader.nextString();
76 |
77 | }
78 | else {
79 | throw new ExampleReadException("Unexpected property name found in JSON example: " + propertyName);
80 | }
81 | }
82 |
83 | reader.endObject();
84 |
85 | if (exampleString == null) throw new ExampleReadException("Empty JSON example found!");
86 |
87 | return new StringExample(exampleString);
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/JsonExamplesProvider.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | import java.io.InputStream;
7 | import java.util.Iterator;
8 |
9 | import com.eharmony.matching.vw.webservice.common.example.Example;
10 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
11 |
12 | /**
13 | * @author vrahimtoola
14 | *
15 | * Provides VW examples represented as JSON.
16 | */
17 | public interface JsonExamplesProvider {
18 |
19 | /*
20 | * Allows the caller to consume JSON examples from an input stream.
21 | *
22 | * @param inputStream The input stream to consume JSON examples from.
23 | *
24 | * @returns An iterator that allows the caller to iterate over the examples.
25 | */
26 | Iterator getExamplesFromStream(InputStream inputStream) throws ExampleReadException;
27 | }
28 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/SimpleJsonExamplesMessageBodyReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.lang.annotation.Annotation;
9 | import java.lang.reflect.Type;
10 | import java.util.List;
11 | import java.util.Map.Entry;
12 |
13 | import javax.ws.rs.Consumes;
14 | import javax.ws.rs.WebApplicationException;
15 | import javax.ws.rs.core.MediaType;
16 | import javax.ws.rs.core.MultivaluedMap;
17 | import javax.ws.rs.ext.MessageBodyReader;
18 | import javax.ws.rs.ext.Provider;
19 |
20 | import org.apache.commons.lang3.StringUtils;
21 | import org.slf4j.Logger;
22 | import org.slf4j.LoggerFactory;
23 |
24 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes;
25 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
26 | import com.eharmony.matching.vw.webservice.core.ExamplesIterableImpl;
27 |
28 | /**
29 | * @author vrahimtoola
30 | *
31 | */
32 | @Consumes({ ExampleMediaTypes.SIMPLE_JSON_0_1_0 })
33 | @Provider
34 | public class SimpleJsonExamplesMessageBodyReader implements MessageBodyReader {
35 |
36 | private static final Logger LOGGER = LoggerFactory.getLogger(SimpleJsonExamplesMessageBodyReader.class);
37 |
38 | @Override
39 | public boolean isReadable(Class> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
40 |
41 | LOGGER.debug("Called with media type: {} and type: {}", mediaType.toString(), type);
42 |
43 | boolean willReturn = mediaType.toString().equals(ExampleMediaTypes.SIMPLE_JSON_0_1_0) && type == ExamplesIterable.class;
44 |
45 | LOGGER.debug("Returning: {}", willReturn);
46 |
47 | return willReturn;
48 | }
49 |
50 | @Override
51 | public ExamplesIterable readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
52 |
53 | if (LOGGER.isDebugEnabled()) if (httpHeaders != null && httpHeaders.size() > 0) {
54 | LOGGER.debug("Rec'd HTTP headers: ");
55 |
56 | for (Entry> entry : httpHeaders.entrySet()) {
57 | LOGGER.debug("{}:{}", entry.getKey(), StringUtils.join(entry.getValue(), ','));
58 | }
59 | }
60 |
61 | //TODO: hard-coding to GsonJsonExamplesProvider for now
62 | return new ExamplesIterableImpl(Integer.MAX_VALUE, null, new GsonJsonExamplesProvider().getExamplesFromStream(entityStream));
63 |
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonExamplesMessageBodyReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.lang.annotation.Annotation;
9 | import java.lang.reflect.Type;
10 | import java.util.List;
11 | import java.util.Map.Entry;
12 |
13 | import javax.ws.rs.Consumes;
14 | import javax.ws.rs.WebApplicationException;
15 | import javax.ws.rs.core.MediaType;
16 | import javax.ws.rs.core.MultivaluedMap;
17 | import javax.ws.rs.ext.MessageBodyReader;
18 | import javax.ws.rs.ext.Provider;
19 |
20 | import org.apache.commons.lang3.StringUtils;
21 | import org.slf4j.Logger;
22 | import org.slf4j.LoggerFactory;
23 |
24 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes;
25 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
26 | import com.eharmony.matching.vw.webservice.core.ExamplesIterableImpl;
27 |
28 | /**
29 | * @author vrahimtoola
30 | * Reads structured json examples from a stream.
31 | */
32 | @Consumes({ ExampleMediaTypes.STRUCTURED_JSON_0_1_0 })
33 | @Provider
34 | public class StructuredJsonExamplesMessageBodyReader implements MessageBodyReader {
35 |
36 | private static final Logger LOGGER = LoggerFactory.getLogger(StructuredJsonExamplesMessageBodyReader.class);
37 |
38 | @Override
39 | public boolean isReadable(Class> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
40 | LOGGER.debug("Called with media type: {} and type: {}", mediaType.toString(), type);
41 |
42 | boolean willReturn = mediaType.toString().equals(ExampleMediaTypes.STRUCTURED_JSON_0_1_0) && type == ExamplesIterable.class;
43 |
44 | LOGGER.debug("Returning: {}", willReturn);
45 |
46 | return willReturn;
47 | }
48 |
49 | @Override
50 | public ExamplesIterable readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
51 |
52 | if (LOGGER.isDebugEnabled()) if (httpHeaders != null && httpHeaders.size() > 0) {
53 | LOGGER.debug("Rec'd HTTP headers: ");
54 |
55 | for (Entry> entry : httpHeaders.entrySet()) {
56 | LOGGER.debug("{}:{}", entry.getKey(), StringUtils.join(entry.getValue(), ','));
57 | }
58 | }
59 |
60 | //TODO: hard-coding to GsonJsonExamplesProvider for now
61 | return new ExamplesIterableImpl(Integer.MAX_VALUE, null, new StructuredJsonExamplesProvider(-1, -1).getExamplesFromStream(entityStream));
62 |
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonExamplesProvider.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.io.IOException;
9 | import java.io.InputStream;
10 | import java.io.InputStreamReader;
11 | import java.util.Iterator;
12 |
13 | import org.apache.commons.lang3.StringUtils;
14 | import org.slf4j.Logger;
15 | import org.slf4j.LoggerFactory;
16 |
17 | import com.eharmony.matching.vw.webservice.common.example.Example;
18 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException;
19 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample;
20 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.Namespace;
21 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample.Namespace.NamespaceBuilder;
22 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
23 | import com.google.common.collect.AbstractIterator;
24 | import com.google.gson.stream.JsonReader;
25 | import com.google.gson.stream.JsonToken;
26 |
27 | /**
28 | * @author vrahimtoola
29 | *
30 | * An example reader writer for the Json format. The Json
31 | * format is expected to adhere to the format specified in
32 | * vw_example_schema.json, placed under src/test/resources.
33 | * The reason it's been placed under src/test/resources as opposed to
34 | * src/main/resources is that right now, this schema
35 | * file is only being used to document the schema, but not being in a
36 | * programmatic way to verify schema adherence (this is
37 | * being done via hand-coded logic).
38 | */
39 | public class StructuredJsonExamplesProvider implements JsonExamplesProvider {
40 |
41 | private static final Logger LOGGER = LoggerFactory.getLogger(StructuredJsonExamplesProvider.class);
42 |
43 | /*
44 | * The maximum number of features to read into a given namespace.
45 | */
46 | private final int maxNumberOfFeaturesPerNamespace;
47 |
48 | /*
49 | * The maximum number of namespaces to read into a given example.
50 | */
51 | private final int maxNumberOfNamespacesPerExample;
52 |
53 | /*
54 | * Constructor.
55 | *
56 | * @param maxNumberOfFeaturesPerNamespace <= 0 or Integer.MAX_VALUE mean
57 | * there's no limit.
58 | *
59 | * @param maxNumberOfNamespacesPerExample <= 0 or Integer.MAX_VALUE mean
60 | * there's no limit.
61 | */
62 | public StructuredJsonExamplesProvider(int maxNumberOfFeaturesPerNamespace, int maxNumberOfNamespacesPerExample) {
63 | this.maxNumberOfFeaturesPerNamespace = maxNumberOfFeaturesPerNamespace;
64 | this.maxNumberOfNamespacesPerExample = maxNumberOfNamespacesPerExample;
65 | }
66 |
67 | private StructuredExample readExample(long exampleNumber, JsonReader jsonReader) throws IOException {
68 |
69 | jsonReader.beginObject();
70 |
71 | boolean labelRead = false;
72 | boolean namespacesRead = false;
73 | boolean tagRead = false;
74 |
75 | StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder();
76 |
77 | boolean atLeastOnePropertyRead = false;
78 |
79 | while (jsonReader.hasNext()) {
80 |
81 | String propertyNameOriginal = jsonReader.nextName();
82 |
83 | String propertyName = propertyNameOriginal.trim().toLowerCase();
84 |
85 | if (propertyName.equals(StructuredJsonPropertyNames.EXAMPLE_LABEL_PROPERTY)) {
86 |
87 | if (labelRead) {
88 |
89 | throw new ExampleFormatException(exampleNumber, "The 'label' property must only appear once in an example!");
90 |
91 | }
92 |
93 | if (jsonReader.peek() != JsonToken.NULL)
94 | exampleBuilder.setLabel(jsonReader.nextString());
95 | else {
96 | jsonReader.nextNull();
97 | }
98 |
99 | labelRead = true;
100 |
101 | atLeastOnePropertyRead = true;
102 |
103 | }
104 | else if (propertyName.equals(StructuredJsonPropertyNames.EXAMPLE_TAG_PROPERTY)) {
105 |
106 | if (tagRead) throw new ExampleFormatException(exampleNumber, "The 'tag' property must only appear once in an example!");
107 |
108 | if (jsonReader.peek() != JsonToken.NULL)
109 | exampleBuilder.setTag(jsonReader.nextString());
110 | else {
111 | jsonReader.nextNull();
112 | }
113 | }
114 | else if (propertyName.equals(StructuredJsonPropertyNames.EXAMPLE_NAMESPACES_PROPERTY)) {
115 |
116 | if (namespacesRead) {
117 |
118 | throw new ExampleFormatException(exampleNumber, "The 'namespaces' property must only appear once in an example!");
119 | }
120 |
121 | if (jsonReader.peek() != JsonToken.NULL) {
122 |
123 | jsonReader.beginArray();
124 |
125 | int numNamespacesRead = 0;
126 |
127 | while (jsonReader.hasNext()) {
128 |
129 | Namespace namespace = readNamespace(exampleNumber, jsonReader);
130 |
131 | numNamespacesRead++;
132 |
133 | if (maxNumberOfNamespacesPerExample > 0 && maxNumberOfNamespacesPerExample < Integer.MAX_VALUE && numNamespacesRead > maxNumberOfNamespacesPerExample) {
134 | throw new ExampleFormatException(exampleNumber, "The maximum number of namespaces per example, " + maxNumberOfNamespacesPerExample + " was exceeded!");
135 | }
136 |
137 | exampleBuilder.addNamespace(namespace);
138 | }
139 |
140 | jsonReader.endArray();
141 |
142 | }
143 | else {
144 | jsonReader.nextNull();
145 | }
146 |
147 | namespacesRead = true;
148 |
149 | atLeastOnePropertyRead = true;
150 |
151 | }
152 | else {
153 |
154 | throw new ExampleFormatException(exampleNumber, "Unknown property: " + propertyNameOriginal + " found while reading example!");
155 | }
156 |
157 | }
158 |
159 | jsonReader.endObject();
160 |
161 | if (atLeastOnePropertyRead == false)
162 | return StructuredExample.EMPTY_EXAMPLE;
163 | else
164 | return exampleBuilder.build(); //this might return a normal example or a PIPE example.
165 | }
166 |
167 | private Namespace readNamespace(long exampleNumber, JsonReader jsonReader) throws IOException {
168 | jsonReader.beginObject();
169 |
170 | StructuredExample.Namespace.NamespaceBuilder nsBuilder = new StructuredExample.Namespace.NamespaceBuilder();
171 |
172 | boolean nameRead = false, scalingFactorRead = false, featuresRead = false;
173 |
174 | while (jsonReader.hasNext()) {
175 |
176 | String propertyNameOriginal = jsonReader.nextName();
177 | String propertyName = propertyNameOriginal.trim().toLowerCase();
178 |
179 | if (propertyName.equals(StructuredJsonPropertyNames.NAMESPACE_NAME_PROPERTY)) {
180 |
181 | if (nameRead) {
182 |
183 | throw new ExampleFormatException(exampleNumber, "The 'name' property must only appear once in a namespace!");
184 | }
185 |
186 | if (jsonReader.peek() == JsonToken.NULL)
187 | jsonReader.nextNull();
188 | else {
189 | String namespace = jsonReader.nextString();
190 | nsBuilder.setName(namespace);
191 | }
192 | nameRead = true;
193 | }
194 | else if (propertyName.equals(StructuredJsonPropertyNames.NAMESPACE_SCALING_FACTOR_PROPERTY)) {
195 |
196 | if (scalingFactorRead) {
197 |
198 | throw new ExampleFormatException(exampleNumber, "The 'value' property must only appear once in a namespace!");
199 | }
200 |
201 | if (jsonReader.peek() == JsonToken.NULL)
202 | jsonReader.nextNull();
203 | else {
204 | double scalingFactor = jsonReader.nextDouble();
205 | nsBuilder.setScalingFactor(Float.valueOf((float) scalingFactor));
206 | }
207 | scalingFactorRead = true;
208 |
209 | }
210 | else if (propertyName.equals(StructuredJsonPropertyNames.NAMESPACE_FEATURES_PROPERTY)) {
211 |
212 | if (featuresRead) {
213 |
214 | throw new ExampleFormatException(exampleNumber, "The 'features' property must only appear once in a namespace!");
215 | }
216 |
217 | if (jsonReader.peek() == JsonToken.NULL) {
218 | jsonReader.nextNull();
219 | }
220 | else {
221 |
222 | jsonReader.beginArray();
223 |
224 | int numFeaturesAdded = 0;
225 |
226 | while (jsonReader.hasNext()) {
227 | readFeatureIntoNamespace(exampleNumber, nsBuilder, jsonReader);
228 |
229 | numFeaturesAdded++;
230 |
231 | if (maxNumberOfFeaturesPerNamespace > 0 && maxNumberOfFeaturesPerNamespace < Integer.MAX_VALUE && numFeaturesAdded > maxNumberOfFeaturesPerNamespace) {
232 | throw new ExampleFormatException(exampleNumber, "The maximum number of features per namespace, " + maxNumberOfFeaturesPerNamespace + " was exceeded!");
233 | }
234 | }
235 |
236 | jsonReader.endArray();
237 |
238 | }
239 | featuresRead = true;
240 |
241 | }
242 | else {
243 | throw new ExampleFormatException(exampleNumber, "Unknown property: " + propertyNameOriginal + " found while reading namespace!");
244 | }
245 | }
246 |
247 | jsonReader.endObject();
248 |
249 | return nsBuilder.build();
250 | }
251 |
252 | private void readFeatureIntoNamespace(long exampleNumber, NamespaceBuilder nsBuilder, JsonReader jsonReader) throws IOException {
253 | jsonReader.beginObject();
254 |
255 | String name = null;
256 | Float value = null;
257 |
258 | boolean nameRead = false, valueRead = false;
259 |
260 | while (jsonReader.hasNext()) {
261 |
262 | String propertyNameOriginal = jsonReader.nextName();
263 |
264 | String propertyName = propertyNameOriginal.toLowerCase();
265 |
266 | if (propertyName.equals(StructuredJsonPropertyNames.FEATURE_NAME_PROPERTY)) {
267 |
268 | if (nameRead) {
269 |
270 | throw new ExampleFormatException(exampleNumber, "The 'name' property can only appear once in a feature!");
271 | }
272 |
273 | name = jsonReader.nextString(); //feature name should never be null, so not doing the null check here. if it's null, let the exception
274 | //be propagated.
275 |
276 | nameRead = true;
277 |
278 | }
279 | else if (propertyName.equals(StructuredJsonPropertyNames.FEATURE_VALUE_PROPERTY)) {
280 |
281 | if (valueRead) {
282 |
283 | throw new ExampleFormatException(exampleNumber, "The 'value' property can only appear once in a feature!");
284 | }
285 |
286 | if (jsonReader.peek() == JsonToken.NULL)
287 | jsonReader.nextNull();
288 | else
289 | value = Float.valueOf((float) jsonReader.nextDouble());
290 |
291 | valueRead = true;
292 |
293 | }
294 | else {
295 |
296 | throw new ExampleFormatException(exampleNumber, "Unknown property: " + propertyNameOriginal + " found while reading feature!");
297 | }
298 |
299 | }
300 |
301 | jsonReader.endObject();
302 |
303 | if (StringUtils.isBlank(name) == false) //add feature only if the name exists.
304 | nsBuilder.addFeature(name, value);
305 | }
306 |
307 | @Override
308 | public Iterator getExamplesFromStream(InputStream inputStream) throws ExampleReadException {
309 |
310 | checkNotNull(inputStream);
311 |
312 | final InputStream theInputStream = inputStream;
313 |
314 | return new AbstractIterator() {
315 |
316 | private boolean didBeginArray = false;
317 |
318 | private long currentExampleNumber = 1;
319 |
320 | private TracingJsonReader jsonReader;
321 |
322 | private boolean closeReader = false;
323 |
324 | @Override
325 | protected StructuredExample computeNext() {
326 |
327 | try {
328 | if (!didBeginArray) {
329 |
330 | jsonReader = new TracingJsonReader(new InputStreamReader(theInputStream), LOGGER.isTraceEnabled());
331 |
332 | jsonReader.beginArray();
333 | didBeginArray = true;
334 | }
335 |
336 | if (jsonReader.hasNext()) {
337 |
338 | StructuredExample toReturn = readExample(currentExampleNumber++, jsonReader);
339 |
340 | jsonReader.reset(); //prepare for next example
341 |
342 | return toReturn;
343 | }
344 | else {
345 |
346 | jsonReader.endArray();
347 |
348 | closeReader = true;
349 |
350 | return (StructuredExample) endOfData();
351 | }
352 | }
353 | catch (ExampleFormatException ee) {
354 |
355 | if (LOGGER.isTraceEnabled()) {
356 | try {
357 | LOGGER.error("Erroneous JSON example: {}", jsonReader.getAllJsonReadSoFar());
358 | }
359 | catch (IOException e) {
360 | LOGGER.error("Failed to spit out erroneous JSON example!", e);
361 | }
362 | }
363 |
364 | closeReader = true;
365 |
366 | //make sure that we've set the example number, useful for debugging
367 | ee.setExampleNumber(currentExampleNumber - 1);
368 |
369 | throw ee;
370 | }
371 | catch (Exception e) {
372 |
373 | closeReader = true;
374 |
375 | if (jsonReader != null) try {
376 |
377 | LOGGER.error("Example read exception when attempting to read example number {} - tracing json output: {}", currentExampleNumber - 1, jsonReader.getAllJsonReadSoFar());
378 | }
379 | catch (IOException e1) {
380 | LOGGER.error("Error: {}", e1);
381 | }
382 |
383 | throw new ExampleReadException(e);
384 | }
385 | finally {
386 |
387 | if (closeReader) try {
388 | if (jsonReader != null) {
389 | jsonReader.close();
390 | }
391 | }
392 | catch (Exception e2) {
393 | LOGGER.warn("Error closing JSON reader! Message: {}", e2.getMessage(), e2);
394 | }
395 | }
396 |
397 | }
398 | };
399 |
400 | }
401 |
402 | }
403 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/StructuredJsonPropertyNames.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | /**
7 | * @author vrahimtoola
8 | * Names of properties in structured json.
9 | */
10 | public class StructuredJsonPropertyNames {
11 |
12 | public static final String EXAMPLE_LABEL_PROPERTY = "label";
13 | public static final String EXAMPLE_TAG_PROPERTY = "tag";
14 | public static final String EXAMPLE_NAMESPACES_PROPERTY = "namespaces";
15 |
16 | public static final String NAMESPACE_NAME_PROPERTY = "name";
17 | public static final String NAMESPACE_SCALING_FACTOR_PROPERTY = "scale";
18 | public static final String NAMESPACE_FEATURES_PROPERTY = "features";
19 |
20 | public static final String FEATURE_NAME_PROPERTY = "name";
21 | public static final String FEATURE_VALUE_PROPERTY = "value";
22 |
23 | }
24 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/jsonexamplesmessagebodyreader/TracingJsonReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.io.IOException;
9 | import java.io.Reader;
10 | import java.io.StringWriter;
11 |
12 | import org.slf4j.Logger;
13 | import org.slf4j.LoggerFactory;
14 |
15 | import com.fasterxml.jackson.core.JsonFactory;
16 | import com.fasterxml.jackson.core.JsonGenerator;
17 | import com.google.gson.stream.JsonReader;
18 | import com.google.gson.stream.JsonToken;
19 |
20 | /**
21 | * @author vrahimtoola
22 | *
23 | * A Json reader that writes everything it reads about an example, into
24 | * a json writer. The only time it will not do this is when skipValue()
25 | * is called, in which case the string "(SKIPPED VALUE)" is written out
26 | * instead.
27 | *
28 | * The JsonReader uses Gson because I like it's API a lot better than
29 | * Jackson's, which I find to be lower level than Gson's. However, with
30 | * Gson the writer will not write out just the fieldname, you have to
31 | * give it the value of the field as well. Even if you call flush() on
32 | * it, it still won't write out just a fieldname because that's not
33 | * valid Json (you need the value to go along with it).
34 | *
35 | * For the purpose of debugging, I want ALL the Json exactly as it's
36 | * been read, and the Jackson writer lets you spit out just the
37 | * fieldnames if need be, without forcing you to supply a null value.
38 | *
39 | * It's not ideal since it requires the project to pull in 2 separate
40 | * libraries for the same aspect (JSON processing), but I'm not going to
41 | * fuss over it for the moment...
42 | */
43 | public class TracingJsonReader extends JsonReader {
44 |
45 | private static final Logger LOGGER = LoggerFactory.getLogger(TracingJsonReader.class);
46 |
47 | private final String SKIPPED_VALUE_STRING = "(SKIPPED VALUE)";
48 |
49 | private StringWriter exampleJsonWriter;
50 | private JsonGenerator debugWriter;
51 |
52 | public TracingJsonReader(Reader in, boolean isTracingEnabled) throws IOException {
53 | super(in);
54 |
55 | checkNotNull(in);
56 |
57 | if (isTracingEnabled) {
58 | this.exampleJsonWriter = new StringWriter();
59 | this.debugWriter = new JsonFactory().createGenerator(exampleJsonWriter);
60 | }
61 |
62 | }
63 |
64 | /*
65 | * Returns all the Json that has been read in thus far.
66 | */
67 | public String getAllJsonReadSoFar() throws IOException {
68 | if (debugWriter != null) {
69 | debugWriter.flush();
70 | return exampleJsonWriter.toString();
71 | }
72 |
73 | return "";
74 | }
75 |
76 | /*
77 | * Resets the string writer, making this instance ready for the next
78 | * example.
79 | */
80 | public void reset() throws IOException {
81 |
82 | if (debugWriter != null) { //if the debugwriter is null, then tracing is not enabled, so don't do anything.
83 | exampleJsonWriter.getBuffer().setLength(0);
84 | }
85 | }
86 |
87 | @Override
88 | public void beginArray() throws IOException {
89 | super.beginArray();
90 |
91 | if (debugWriter != null) debugWriter.writeStartArray();
92 |
93 | }
94 |
95 | @Override
96 | public void beginObject() throws IOException {
97 | super.beginObject();
98 |
99 | if (debugWriter != null) debugWriter.writeStartObject();
100 | }
101 |
102 | @Override
103 | public void close() throws IOException {
104 | super.close();
105 |
106 | if (debugWriter != null) debugWriter.close();
107 | }
108 |
109 | @Override
110 | public void endArray() throws IOException {
111 | super.endArray();
112 |
113 | if (debugWriter != null) debugWriter.writeEndArray();
114 | }
115 |
116 | @Override
117 | public void endObject() throws IOException {
118 | super.endObject();
119 |
120 | if (debugWriter != null) debugWriter.writeEndObject();
121 | }
122 |
123 | @Override
124 | public boolean hasNext() throws IOException {
125 | return super.hasNext();
126 | }
127 |
128 | @Override
129 | public boolean nextBoolean() throws IOException {
130 | boolean toReturn = super.nextBoolean();
131 |
132 | if (debugWriter != null) debugWriter.writeBoolean(toReturn);
133 |
134 | return toReturn;
135 | }
136 |
137 | @Override
138 | public double nextDouble() throws IOException {
139 | double toReturn = super.nextDouble();
140 |
141 | if (debugWriter != null) debugWriter.writeNumber(toReturn);
142 |
143 | return toReturn;
144 | }
145 |
146 | @Override
147 | public int nextInt() throws IOException {
148 | int toReturn = super.nextInt();
149 |
150 | if (debugWriter != null) debugWriter.writeNumber(toReturn);
151 |
152 | return toReturn;
153 | }
154 |
155 | @Override
156 | public long nextLong() throws IOException {
157 | long toReturn = super.nextLong();
158 |
159 | if (debugWriter != null) debugWriter.writeNumber(toReturn);
160 |
161 | return toReturn;
162 | }
163 |
164 | @Override
165 | public String nextName() throws IOException {
166 | String toReturn = super.nextName();
167 |
168 | if (debugWriter != null) debugWriter.writeFieldName(toReturn);
169 |
170 | return toReturn;
171 | }
172 |
173 | @Override
174 | public void nextNull() throws IOException {
175 | super.nextNull();
176 |
177 | if (debugWriter != null) debugWriter.writeNull(); //write out a null, so we stay in sync with the reader.
178 | }
179 |
180 | @Override
181 | public String nextString() throws IOException {
182 | String toReturn = super.nextString();
183 |
184 | if (debugWriter != null) debugWriter.writeString(toReturn);
185 |
186 | return toReturn;
187 | }
188 |
189 | @Override
190 | public JsonToken peek() throws IOException {
191 | return super.peek();
192 | }
193 |
194 | @Override
195 | public void skipValue() throws IOException {
196 | super.skipValue();
197 |
198 | //write out a null, so we stay in sync with the reader
199 | if (debugWriter != null) debugWriter.writeString(SKIPPED_VALUE_STRING);
200 | }
201 |
202 | @Override
203 | public String toString() {
204 | return super.toString();
205 | }
206 |
207 | }
208 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/plaintextexamplesmessagebodyreader/PlainTextExamplesMessageBodyReader.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.plaintextexamplesmessagebodyreader;
5 |
6 | import java.io.IOException;
7 | import java.io.InputStream;
8 | import java.lang.annotation.Annotation;
9 | import java.lang.reflect.Type;
10 | import java.nio.charset.Charset;
11 | import java.util.List;
12 | import java.util.Map.Entry;
13 |
14 | import javax.ws.rs.Consumes;
15 | import javax.ws.rs.WebApplicationException;
16 | import javax.ws.rs.core.MediaType;
17 | import javax.ws.rs.core.MultivaluedMap;
18 | import javax.ws.rs.ext.MessageBodyReader;
19 | import javax.ws.rs.ext.Provider;
20 |
21 | import org.apache.commons.lang3.StringUtils;
22 | import org.glassfish.jersey.message.internal.ReaderWriter;
23 | import org.slf4j.Logger;
24 | import org.slf4j.LoggerFactory;
25 |
26 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes;
27 | import com.eharmony.matching.vw.webservice.core.ExamplesIterable;
28 | import com.eharmony.matching.vw.webservice.core.ExamplesIterableImpl;
29 |
30 | /**
31 | * @author vrahimtoola
32 | *
33 | * A message body reader that can read an Iterable from the
34 | * message body of an HTTP request.
35 | */
36 | @Consumes({ MediaType.TEXT_PLAIN, ExampleMediaTypes.PLAINTEXT_0_1_0 })
37 | @Provider
38 | public class PlainTextExamplesMessageBodyReader implements MessageBodyReader {
39 |
40 | public PlainTextExamplesMessageBodyReader() {
41 |
42 | }
43 |
44 | private static final Logger LOGGER = LoggerFactory.getLogger(PlainTextExamplesMessageBodyReader.class);
45 |
46 | @Override
47 | public boolean isReadable(Class> type, Type genericType, Annotation[] annotations, MediaType mediaType) {
48 |
49 | LOGGER.debug("Called with media type: {} and type: {}", mediaType.toString(), type);
50 |
51 | boolean willReturn = (mediaType.isCompatible(MediaType.TEXT_PLAIN_TYPE) || mediaType.toString().equals(ExampleMediaTypes.PLAINTEXT_0_1_0)) && type == ExamplesIterable.class;
52 |
53 | LOGGER.debug("Returning: {}", willReturn);
54 |
55 | return willReturn;
56 | }
57 |
58 | /*
59 | * (non-Javadoc)
60 | *
61 | * @see javax.ws.rs.ext.MessageBodyReader#readFrom(java.lang.Class,
62 | * java.lang.reflect.Type, java.lang.annotation.Annotation[],
63 | * javax.ws.rs.core.MediaType, javax.ws.rs.core.MultivaluedMap,
64 | * java.io.InputStream)
65 | *
66 | * Expects data to arrive as url-encoded strings.
67 | *
68 | * TODO: look at specific mediatypes eg text/vw
69 | */
70 | @Override
71 | public ExamplesIterable readFrom(Class type, Type genericType, Annotation[] annotations, MediaType mediaType, MultivaluedMap httpHeaders, InputStream entityStream) throws IOException, WebApplicationException {
72 |
73 | if (LOGGER.isDebugEnabled()) if (httpHeaders != null && httpHeaders.size() > 0) {
74 | LOGGER.debug("Rec'd HTTP headers: ");
75 |
76 | for (Entry> entry : httpHeaders.entrySet()) {
77 | LOGGER.debug("{}:{}", entry.getKey(), StringUtils.join(entry.getValue(), ','));
78 | }
79 | }
80 |
81 | // TODO:
82 | // if a content-length has been provided, then use that to read entire
83 | // string in one go.
84 |
85 | Charset charset = ReaderWriter.getCharset(mediaType);
86 |
87 | LOGGER.debug("Reading examples using charset: {}", charset.displayName());
88 |
89 | StringExampleIterator theIterator = new StringExampleIterator(entityStream, charset);
90 |
91 | // TODO: provide the proper number of examples here
92 | // setting this to Integer.MAX_VALUE for now to force streaming
93 | return new ExamplesIterableImpl(Integer.MAX_VALUE, null, theIterator);
94 | }
95 |
96 | }
97 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/messagebodyreader/plaintextexamplesmessagebodyreader/StringExampleIterator.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.messagebodyreader.plaintextexamplesmessagebodyreader;
5 |
6 | import static com.google.common.base.Preconditions.checkNotNull;
7 |
8 | import java.io.BufferedReader;
9 | import java.io.IOException;
10 | import java.io.InputStream;
11 | import java.io.InputStreamReader;
12 | import java.nio.charset.Charset;
13 | import java.util.Iterator;
14 | import java.util.NoSuchElementException;
15 |
16 | import org.slf4j.Logger;
17 | import org.slf4j.LoggerFactory;
18 |
19 | import com.eharmony.matching.vw.webservice.common.example.Example;
20 | import com.eharmony.matching.vw.webservice.common.example.StringExample;
21 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
22 |
23 | /**
24 | * @author vrahimtoola
25 | *
26 | * Reads 1 string at a time from some input stream.
27 | *
28 | * TODO look at guava's abstract iterator and the test that comes with
29 | * guava
30 | */
31 | public class StringExampleIterator implements Iterator {
32 |
33 | private static final Logger LOGGER = LoggerFactory.getLogger(StringExampleIterator.class);
34 |
35 | /*
36 | * The reader.
37 | */
38 | private final BufferedReader reader;
39 |
40 | /*
41 | * The example to be returned, when 'next()' is called.
42 | */
43 | private String nextExampleToReturn = null;
44 |
45 | private long numTotalExamples = 0;
46 |
47 | public StringExampleIterator(InputStream inputStream, Charset charset) throws IOException {
48 |
49 | checkNotNull(inputStream, "A null input stream was provided!");
50 | reader = new BufferedReader(new InputStreamReader(inputStream, charset));
51 |
52 | advance();
53 | }
54 |
55 | @Override
56 | public boolean hasNext() {
57 | return nextExampleToReturn != null;
58 | }
59 |
60 | @Override
61 | public Example next() {
62 |
63 | String toReturn = nextExampleToReturn;
64 |
65 | if (toReturn == null) throw new NoSuchElementException("No element to return! Make sure to call 'hasNext()' and that it returns true before invoking this method!");
66 |
67 | try {
68 | advance();
69 | }
70 | catch (IOException e) {
71 |
72 | throw new ExampleReadException("Exception reading examples! Message: " + e.getMessage(), e);
73 | }
74 |
75 | //TODO remove this
76 | if (toReturn.length() >= 592) {
77 | LOGGER.trace("Received super long example: {}", toReturn);
78 | }
79 |
80 | return new StringExample(toReturn);
81 | }
82 |
83 | @Override
84 | public void remove() {
85 | throw new UnsupportedOperationException("The 'remove' operation is not supported!");
86 | }
87 |
88 | private void advance() throws IOException {
89 |
90 | nextExampleToReturn = reader.readLine();
91 |
92 | if (nextExampleToReturn != null) {
93 | numTotalExamples++;
94 |
95 | if (numTotalExamples == 1) LOGGER.debug("First example read: {}", nextExampleToReturn);
96 | }
97 | else {
98 | LOGGER.debug("Read a total of {} examples", numTotalExamples);
99 | }
100 | }
101 |
102 | }
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/java/com/eharmony/matching/vw/webservice/util/StringIterable.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.util;
5 |
6 | import java.io.BufferedReader;
7 | import java.io.IOException;
8 | import java.io.StringReader;
9 | import java.util.Iterator;
10 | import java.util.NoSuchElementException;
11 |
12 | /**
13 | * @author vrahimtoola
14 | * An implementation of Iterable that returns an iterator to iterate over the lines of a given chunk of text.
15 | * FYI the iterator returned is not thread safe.
16 | */
17 | public class StringIterable implements Iterable {
18 |
19 | private String theText;
20 |
21 | /*
22 | * Constructor.
23 | * @param chunkOfText The text to iterate over. Cannot be null (but can be empty).
24 | */
25 | public StringIterable(String chunkOfText)
26 | {
27 | if (chunkOfText == null)
28 | throw new IllegalArgumentException("'chunkOfText' cannot be null!");
29 |
30 | theText = chunkOfText;
31 | }
32 |
33 | /* (non-Javadoc)
34 | * @see java.lang.Iterable#iterator()
35 | */
36 | @Override
37 | public Iterator iterator() {
38 |
39 | return new StringBufferedReaderIterator(theText);
40 | }
41 |
42 | /*
43 | * The iterator. Uses a BufferedReader to read lines one at a time from the chunk of text.
44 | * Note that this iterator is not thread-safe.
45 | */
46 | private static class StringBufferedReaderIterator implements Iterator
47 | {
48 | private BufferedReader bufferedReader = null;
49 |
50 | private String nextLineToReturn = null;
51 |
52 | private boolean faultedOrClosed = false;
53 |
54 | public StringBufferedReaderIterator(String theText)
55 | {
56 | bufferedReader = new BufferedReader(new StringReader(theText));
57 | advance();
58 | }
59 |
60 | @Override
61 | public boolean hasNext() {
62 |
63 | return nextLineToReturn != null;
64 | }
65 |
66 | @Override
67 | public String next() {
68 |
69 | if (nextLineToReturn == null)
70 | throw new NoSuchElementException("No element to return! Make sure 'hasNext()' has been called and it returned 'true' before invoking this method.");
71 |
72 | String toReturnString = nextLineToReturn; //save reference, since the call to 'advance' below updates 'nextLineToReturn'.
73 |
74 | advance();
75 |
76 | return toReturnString;
77 | }
78 |
79 | @Override
80 | public void remove() {
81 | throw new UnsupportedOperationException("The 'remove' operation is not supported.");
82 | }
83 |
84 |
85 | private void advance()
86 | {
87 | if (faultedOrClosed)
88 | return;
89 |
90 | try {
91 | nextLineToReturn = bufferedReader.readLine();
92 |
93 | if (nextLineToReturn == null) //close the bufferedReader if no more lines to return
94 | {
95 | bufferedReader.close();
96 | faultedOrClosed = true; //so that we don't try again to read from the bufferedReader
97 | }
98 |
99 | } catch (IOException e) {
100 | faultedOrClosed = true; //so that we don't try again to read from the bufferedReader
101 | nextLineToReturn = null; //so that an exception is thrown if someone calls 'next()'.
102 | }
103 |
104 | }
105 | }
106 |
107 | }
108 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/resources/logback.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
6 |
7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/resources/logging.properties:
--------------------------------------------------------------------------------
1 | .level = ALL
2 |
3 | handlers=java.util.logging.ConsoleHandler
4 |
5 |
6 | java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter
7 | java.util.logging.ConsoleHandler.level = FINEST
8 |
9 | java.util.logging.FileHandler.level = FINEST
10 | #java.util.logging.FileHandler.filter specifies the name of a Filter class to use (defaults to no Filter).
11 | java.util.logging.FileHandler.formatter = java.util.logging.SimpleFormatter
12 | #java.util.logging.FileHandler.encoding the name of the character set encoding to use (defaults to the default platform encoding).
13 | #java.util.logging.FileHandler.limit specifies an approximate maximum amount to write (in bytes) to any one file. If this is zero, then there is no limit. (Defaults to no limit).
14 | #java.util.logging.FileHandler.count specifies how many output files to cycle through (defaults to 1).
15 | java.util.logging.FileHandler.pattern = /Users/vrahimtoola/Desktop/vw-webservice.log
16 | #java.util.logging.FileHandler.append specifies whether the FileHandler should append onto any existing files (defaults to false).
17 |
18 | org.apache.http.level = FINEST
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/resources/vw-webservice.properties:
--------------------------------------------------------------------------------
1 | vw.hostName=localhost
2 | vw.port=26542
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/webapp/WEB-INF/applicationContext.xml:
--------------------------------------------------------------------------------
1 |
2 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/main/webapp/WEB-INF/web.xml:
--------------------------------------------------------------------------------
1 |
2 |
4 |
5 |
6 | contextConfigLocation
7 | /WEB-INF/applicationContext.xml
8 |
9 |
10 | org.springframework.web.context.ContextLoaderListener
11 |
12 |
13 | vw-webservice
14 | org.glassfish.jersey.servlet.ServletContainer
15 |
16 | jersey.config.server.provider.packages
17 | com.eharmony.matching.vw.webservice;com.eharmony.matching.vw.webservice.messagebodyreader
18 |
19 | 1
20 | true
21 |
22 |
23 | vw-webservice
24 | /*
25 |
26 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/client/AsyncHttpClientTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.client;
5 |
6 | import java.io.BufferedReader;
7 | import java.io.IOException;
8 | import java.io.InputStreamReader;
9 | import java.io.OutputStreamWriter;
10 | import java.io.PipedInputStream;
11 | import java.io.PipedOutputStream;
12 | import java.util.concurrent.Callable;
13 | import java.util.concurrent.ExecutionException;
14 | import java.util.concurrent.Executors;
15 | import java.util.concurrent.Future;
16 |
17 | import junit.framework.Assert;
18 |
19 | import org.junit.Before;
20 | import org.junit.Ignore;
21 | import org.junit.Test;
22 | import org.slf4j.Logger;
23 | import org.slf4j.LoggerFactory;
24 |
25 | import com.eharmony.matching.vw.webservice.common.example.ExampleMediaTypes;
26 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample;
27 | import com.eharmony.matching.vw.webservice.messagebodyreader.jsonexamplesmessagebodyreader.JsonTestUtils;
28 | import com.google.gson.stream.JsonWriter;
29 | import com.ning.http.client.AsyncHandler;
30 | import com.ning.http.client.AsyncHttpClient;
31 | import com.ning.http.client.AsyncHttpClientConfig;
32 | import com.ning.http.client.AsyncHttpClientConfig.Builder;
33 | import com.ning.http.client.BodyGenerator;
34 | import com.ning.http.client.HttpResponseBodyPart;
35 | import com.ning.http.client.HttpResponseHeaders;
36 | import com.ning.http.client.HttpResponseStatus;
37 | import com.ning.http.client.Request;
38 | import com.ning.http.client.RequestBuilder;
39 | import com.ning.http.client.Response;
40 | import com.ning.http.client.generators.InputStreamBodyGenerator;
41 |
42 | /**
43 | * @author vrahimtoola
44 | * Uses the Async Http Client to hit the web service. This is the only
45 | * java client that I've been able to get to work!
46 | */
47 | public class AsyncHttpClientTest {
48 |
49 | private static final Logger LOGGER = LoggerFactory.getLogger(AsyncHttpClientTest.class);
50 |
51 | private int roundsOfDataToSubmit = 1;
52 |
53 | private boolean testFailed = false;
54 |
55 | @Before
56 | public void setUp() {
57 | roundsOfDataToSubmit = 3; //this means 3 * (number of examples in ner.train) examples will be submitted to the web service.
58 | testFailed = false;
59 | }
60 |
61 | private synchronized void onTestFailed() {
62 | testFailed = true;
63 | }
64 |
65 | private synchronized boolean getTestFailed() {
66 | return testFailed;
67 | }
68 |
69 | /*
70 | * The ignore annotation is to keep the travis-ci build from failing.
71 | */
72 | @Ignore
73 | @Test
74 | public void plainTextExamplesTest() throws IOException, InterruptedException, ExecutionException {
75 |
76 | RequestBuilder builder = new RequestBuilder("POST");
77 |
78 | //note: assumes that a vw-webservice is running on localhost at 8080.
79 | //modify the address accordingly if it's running on a different host/port.
80 |
81 | Request request = builder.setUrl("http://localhost:8080/vw-webservice-jersey/predict/main").addHeader("Content-Type", ExampleMediaTypes.PLAINTEXT_0_1_0).setBody(getPlainTextInputStreamBodyGenerator()).build();
82 |
83 | doTest(request);
84 | }
85 |
86 | /*
87 | * The ignore annotation is to keep the travis-ci build from failing.
88 | */
89 | @Ignore
90 | @Test
91 | public void structuredJsonExamplesTest() throws IOException, InterruptedException, ExecutionException {
92 |
93 | RequestBuilder builder = new RequestBuilder("POST");
94 |
95 | //note: assumes that a vw-webservice is running on localhost at 8080.
96 | //modify the address accordingly if it's running on a different host/port.
97 |
98 | Request request = builder.setUrl("http://localhost:8080/vw-webservice-jersey/predict/main").addHeader("Content-Type", ExampleMediaTypes.STRUCTURED_JSON_0_1_0).setBody(getJsonInputStreamBodyGenerator()).build();
99 |
100 | doTest(request);
101 | }
102 |
103 | /*
104 | * The main method that carries out the test agains the web service and
105 | * verifies the results.
106 | */
107 | private void doTest(Request request) throws InterruptedException, ExecutionException, IOException {
108 | final PipedOutputStream pipedOutputStream = new PipedOutputStream();
109 | final PipedInputStream pipedInputStream = new PipedInputStream(pipedOutputStream);
110 |
111 | AsyncHandler asyncHandler = new AsyncHandler() {
112 | private final Response.ResponseBuilder builder = new Response.ResponseBuilder();
113 |
114 | @Override
115 | public STATE onBodyPartReceived(final HttpResponseBodyPart content) throws Exception {
116 | content.writeTo(pipedOutputStream);
117 | return STATE.CONTINUE;
118 | }
119 |
120 | @Override
121 | public STATE onStatusReceived(final HttpResponseStatus status) throws Exception {
122 | builder.accumulate(status);
123 | return STATE.CONTINUE;
124 | }
125 |
126 | @Override
127 | public STATE onHeadersReceived(final HttpResponseHeaders headers) throws Exception {
128 | builder.accumulate(headers);
129 | return STATE.CONTINUE;
130 | }
131 |
132 | @Override
133 | public Response onCompleted() throws Exception {
134 |
135 | LOGGER.info("On complete called!");
136 |
137 | pipedOutputStream.flush();
138 | pipedOutputStream.close();
139 |
140 | return builder.build();
141 |
142 | }
143 |
144 | @Override
145 | public void onThrowable(Throwable arg0) {
146 | // TODO Auto-generated method stub
147 | LOGGER.error("Error: {}", arg0);
148 | onTestFailed();
149 | }
150 |
151 | };
152 |
153 | Future readingThreadFuture = Executors.newCachedThreadPool().submit(new Callable() {
154 |
155 | @Override
156 | public Void call() throws Exception {
157 | BufferedReader reader = new BufferedReader(new InputStreamReader(pipedInputStream));
158 |
159 | String readPrediction;
160 |
161 | int numPredictionsRead = 0;
162 |
163 | while ((readPrediction = reader.readLine()) != null) {
164 | //LOGGER.info("Got prediction: {}", readPrediction);
165 | numPredictionsRead++;
166 | }
167 |
168 | LOGGER.info("Read a total of {} predictions", numPredictionsRead);
169 | Assert.assertEquals(roundsOfDataToSubmit * 272274, numPredictionsRead);
170 |
171 | return null;
172 | }
173 | });
174 |
175 | Builder config = new AsyncHttpClientConfig.Builder();
176 |
177 | config.setRequestTimeoutInMs(-1); //need to set this to -1, to indicate wait forever. setting to 0 actually means a 0 ms timeout!
178 |
179 | AsyncHttpClient client = new AsyncHttpClient(config.build());
180 |
181 | client.executeRequest(request, asyncHandler).get();
182 |
183 | readingThreadFuture.get(); //verify no exceptions occurred when reading predictions
184 |
185 | client.close();
186 |
187 | Assert.assertFalse(getTestFailed());
188 | }
189 |
190 | /*
191 | * Returns a body generator that places plain text examples into the request
192 | * body.
193 | */
194 | private BodyGenerator getPlainTextInputStreamBodyGenerator() throws IOException {
195 |
196 | //the examples
197 | //final GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass().getClassLoader().getResourceAsStream("ner.train.gz"));
198 |
199 | PipedInputStream pipedInputStream = new PipedInputStream();
200 |
201 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(pipedInputStream);
202 |
203 | Executors.newCachedThreadPool().submit(new Runnable() {
204 |
205 | @Override
206 | public void run() {
207 |
208 | try {
209 |
210 | for (int x = 0; x < roundsOfDataToSubmit; x++) {
211 |
212 | Iterable structuredExamplesIterable = TestUtils.getStructuredExamplesFromNerTrain();
213 |
214 | for (StructuredExample structuredExample : structuredExamplesIterable) {
215 | pipedOutputStream.write((structuredExample.getVWStringRepresentation() + "\n").getBytes());
216 | pipedOutputStream.flush();
217 | }
218 |
219 | LOGGER.info("Submitted round {} of examples...", (x + 1));
220 | }
221 |
222 | }
223 | catch (Exception e) {
224 | LOGGER.error("Error in submitting examples to piped output stream!", e);
225 | onTestFailed();
226 | }
227 | finally {
228 | try {
229 | pipedOutputStream.close();
230 | }
231 | catch (IOException e) {
232 | LOGGER.error("Failed to close piped outputstream!", e);
233 | onTestFailed();
234 | }
235 | }
236 |
237 | }
238 | });
239 |
240 | return new InputStreamBodyGenerator(pipedInputStream);
241 | }
242 |
243 | /*
244 | * Returns a body generator that places JSON formatted examples into the
245 | * request body.
246 | */
247 | private BodyGenerator getJsonInputStreamBodyGenerator() throws IOException {
248 |
249 | //the examples
250 | //final GZIPInputStream gzipInputStream = new GZIPInputStream(this.getClass().getClassLoader().getResourceAsStream("ner.train.gz"));
251 |
252 | PipedInputStream pipedInputStream = new PipedInputStream();
253 |
254 | final PipedOutputStream pipedOutputStream = new PipedOutputStream(pipedInputStream);
255 |
256 | Executors.newCachedThreadPool().submit(new Runnable() {
257 |
258 | @Override
259 | public void run() {
260 |
261 | OutputStreamWriter outputStreamWriter = new OutputStreamWriter(pipedOutputStream);
262 | JsonWriter jsonWriter = new JsonWriter(outputStreamWriter);
263 |
264 | try {
265 |
266 | int submitRound = 0;
267 |
268 | jsonWriter.beginArray();
269 |
270 | for (int x = 0; x < roundsOfDataToSubmit; x++) {
271 |
272 | Iterable structuredExamplesIterable = TestUtils.getStructuredExamplesFromNerTrain();
273 |
274 | for (StructuredExample structuredExample : structuredExamplesIterable) {
275 | JsonTestUtils.writeExample(jsonWriter, structuredExample);
276 | outputStreamWriter.flush();
277 | pipedOutputStream.flush();
278 | }
279 |
280 | LOGGER.info("Submitted round {} of examples...", ++submitRound);
281 | }
282 |
283 | jsonWriter.endArray();
284 |
285 | }
286 | catch (Exception e) {
287 | LOGGER.error("Error in submitting examples to piped output stream!", e);
288 | onTestFailed();
289 | }
290 | finally {
291 | try {
292 | jsonWriter.flush();
293 | }
294 | catch (IOException e) {
295 | LOGGER.error("Error flushing json writer!", e);
296 | onTestFailed();
297 | }
298 |
299 | try {
300 | jsonWriter.close();
301 | }
302 | catch (IOException e) {
303 | LOGGER.error("Error closing json writer!", e);
304 | onTestFailed();
305 | }
306 |
307 | try {
308 | pipedOutputStream.close();
309 | }
310 | catch (IOException e) {
311 | LOGGER.error("Error closing piped outputstream!", e);
312 | onTestFailed();
313 | }
314 | }
315 |
316 | }
317 | });
318 |
319 | return new InputStreamBodyGenerator(pipedInputStream);
320 | }
321 |
322 | }
323 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/client/TestUtils.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.client;
5 |
6 | import java.io.BufferedReader;
7 | import java.io.IOException;
8 | import java.io.InputStreamReader;
9 | import java.util.Iterator;
10 | import java.util.zip.GZIPInputStream;
11 |
12 | import junit.framework.Assert;
13 |
14 | import com.eharmony.matching.vw.webservice.common.example.StructuredExample;
15 | import com.google.common.collect.AbstractIterator;
16 |
17 | /**
18 | * @author vrahimtoola
19 | * General utility code for tests.
20 | */
21 | public class TestUtils {
22 |
23 | /*
24 | * Returns the examples from ner.train.gz as structured examples.
25 | */
26 | public static Iterable getStructuredExamplesFromNerTrain() {
27 |
28 | return new Iterable() {
29 |
30 | @Override
31 | public Iterator iterator() {
32 | try {
33 | return getStructuredExampleIteratorFromNerTrain();
34 | }
35 | catch (IOException e) {
36 | throw new RuntimeException(e);
37 | }
38 | }
39 | };
40 |
41 | }
42 |
43 | private static Iterator getStructuredExampleIteratorFromNerTrain() throws IOException {
44 |
45 | final GZIPInputStream gzipInputStream = new GZIPInputStream(TestUtils.class.getClassLoader().getResourceAsStream("ner.train.gz"));
46 | final BufferedReader exampleReader = new BufferedReader(new InputStreamReader(gzipInputStream));
47 | final StructuredExample.ExampleBuilder exampleBuilder = new StructuredExample.ExampleBuilder();
48 | final StructuredExample.Namespace.NamespaceBuilder namespaceBuilder = new StructuredExample.Namespace.NamespaceBuilder();
49 |
50 | return new AbstractIterator() {
51 |
52 | @Override
53 | protected StructuredExample computeNext() {
54 |
55 | try {
56 | String readExample = exampleReader.readLine();
57 |
58 | exampleBuilder.clear();
59 | namespaceBuilder.clear();
60 |
61 | if (readExample != null) {
62 | if (readExample.trim().length() == 0) {
63 | //just a line - empty example
64 | return StructuredExample.EMPTY_EXAMPLE;
65 | }
66 | else {
67 | //locate the " | "
68 | int indexOfSpacePipeSpace = readExample.indexOf(" | ");
69 |
70 | Assert.assertTrue(indexOfSpacePipeSpace > 0);
71 |
72 | String[] labelAndAllFeatures = readExample.split(" \\| ");
73 |
74 | Assert.assertEquals(2, labelAndAllFeatures.length);
75 |
76 | exampleBuilder.setLabel(labelAndAllFeatures[0]);
77 |
78 | String allFeaturesString = labelAndAllFeatures[1];
79 |
80 | String[] individualFeatures = allFeaturesString.split(" ");
81 |
82 | for (String individualFeature : individualFeatures) {
83 | namespaceBuilder.addFeature(individualFeature);
84 | }
85 |
86 | exampleBuilder.addNamespace(namespaceBuilder.build());
87 |
88 | return exampleBuilder.build();
89 | }
90 | }
91 | else
92 | return endOfData();
93 | }
94 | catch (Exception e) {
95 | throw new RuntimeException(e);
96 | }
97 |
98 | }
99 | };
100 |
101 | }
102 | }
103 |
--------------------------------------------------------------------------------
/vw-webservice-jersey/src/test/java/com/eharmony/matching/vw/webservice/core/exampleprocessor/tcpip/AsyncFailFastTCPIPExampleProcessorTest.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | package com.eharmony.matching.vw.webservice.core.exampleprocessor.tcpip;
5 |
6 | import static org.mockito.Mockito.mock;
7 | import static org.mockito.Mockito.times;
8 | import static org.mockito.Mockito.verify;
9 | import static org.mockito.Mockito.when;
10 |
11 | import java.io.BufferedReader;
12 | import java.io.ByteArrayInputStream;
13 | import java.io.ByteArrayOutputStream;
14 | import java.io.IOException;
15 | import java.io.InputStream;
16 | import java.io.StringReader;
17 | import java.net.Socket;
18 | import java.net.UnknownHostException;
19 | import java.util.ArrayList;
20 | import java.util.Iterator;
21 | import java.util.List;
22 | import java.util.concurrent.CountDownLatch;
23 | import java.util.concurrent.Executors;
24 | import java.util.concurrent.TimeUnit;
25 |
26 | import org.junit.Assert;
27 | import org.junit.Before;
28 | import org.junit.Test;
29 | import org.mockito.invocation.InvocationOnMock;
30 | import org.mockito.stubbing.Answer;
31 | import org.slf4j.Logger;
32 | import org.slf4j.LoggerFactory;
33 |
34 | import com.eharmony.matching.vw.webservice.common.example.Example;
35 | import com.eharmony.matching.vw.webservice.common.example.ExampleFormatException;
36 | import com.eharmony.matching.vw.webservice.common.example.StringExample;
37 | import com.eharmony.matching.vw.webservice.common.prediction.Prediction;
38 | import com.eharmony.matching.vw.webservice.core.ExampleReadException;
39 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingEventHandler;
40 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleProcessingManager;
41 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionException;
42 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.ExampleSubmissionState;
43 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchException;
44 | import com.eharmony.matching.vw.webservice.core.exampleprocessor.PredictionFetchState;
45 |
46 | /**
47 | * @author vrahimtoola
48 | *
49 | * Tests the AsyncFailFastTCPIPExampleProcessor.
50 | */
51 | public class AsyncFailFastTCPIPExampleProcessorTest implements ExampleProcessingEventHandler {
52 |
53 | private static final Logger LOGGER = LoggerFactory.getLogger(AsyncFailFastTCPIPExampleProcessorTest.class);
54 |
55 | /*
56 | * These variables could get written to by the example submitting thread, so
57 | * we need to make them volatile.
58 | */
59 | private volatile boolean exampleReadExceptionThrown, exampleFormatExceptionThrown, exampleSubmissionExceptionThrown, exampleSubmissionCompleteCalled;
60 | private volatile boolean predictionFetchExceptionThrown, predictionFetchCompleteCalled;
61 |
62 | private ExampleSubmissionState expectedStateOnExampleSubmissionComplete;
63 | private PredictionFetchState expectedStateOnPredictionFetchComplete;
64 | private long expectedNumberOfSkippedExamples, expectedNumberOfSubmittedExamples;
65 |
66 | private CountDownLatch countDownLatch;
67 |
68 | /**
69 | * @throws java.lang.Exception
70 | */
71 | @Before
72 | public void setUp() throws Exception {
73 |
74 | exampleReadExceptionThrown = false;
75 | exampleFormatExceptionThrown = false;
76 | exampleSubmissionExceptionThrown = false;
77 | exampleSubmissionCompleteCalled = false;
78 |
79 | predictionFetchCompleteCalled = false;
80 | predictionFetchExceptionThrown = false;
81 |
82 | expectedNumberOfSkippedExamples = -1;
83 | expectedNumberOfSubmittedExamples = -1;
84 |
85 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete;
86 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete;
87 |
88 | countDownLatch = new CountDownLatch(2);
89 |
90 | }
91 |
92 | /*
93 | * Just a simple test to verify that examples can be submitted and read as
94 | * expected.
95 | */
96 | @Test(timeout = 10000)
97 | public void simpleTest() throws IOException, ExampleSubmissionException, InterruptedException {
98 |
99 | Iterable examples = getExamples("One", "Two", "Three");
100 |
101 | InputStream predictionInputStream = getPredictionInputStream("1", "2", "3");
102 |
103 | Socket socket = mock(Socket.class);
104 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
105 | when(socket.getOutputStream()).thenReturn(outputStream);
106 | when(socket.getInputStream()).thenReturn(predictionInputStream);
107 |
108 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class);
109 | when(socketFactory.getSocket()).thenReturn(socket);
110 |
111 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples);
112 |
113 | expectedNumberOfSkippedExamples = 0;
114 | expectedNumberOfSubmittedExamples = 3;
115 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete;
116 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete;
117 |
118 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable();
119 |
120 | int x = 0;
121 |
122 | for (Prediction p : predictions) {
123 |
124 | switch (x++) {
125 |
126 | case 0:
127 | Assert.assertEquals("1", p.getVWStringRepresentation());
128 | break;
129 |
130 | case 1:
131 | Assert.assertEquals("2", p.getVWStringRepresentation());
132 | break;
133 |
134 | case 2:
135 | Assert.assertEquals("3", p.getVWStringRepresentation());
136 | break;
137 |
138 | default:
139 | Assert.fail("Too many predictions!");
140 | }
141 | }
142 |
143 | Assert.assertEquals(3, x);
144 |
145 | boolean succeeded = countDownLatch.await(9, TimeUnit.SECONDS); //wait till the example thread is done as well.
146 |
147 | Assert.assertTrue("Waited for longer than 9 seconds!!", succeeded);
148 |
149 | //check that all examples got there
150 | BufferedReader bReader = new BufferedReader(new StringReader(new String(outputStream.toByteArray())));
151 |
152 | x = 0;
153 | String line = null;
154 | while ((line = bReader.readLine()) != null) {
155 |
156 | switch (x++) {
157 |
158 | case 0:
159 | Assert.assertEquals("One", line);
160 | break;
161 |
162 | case 1:
163 | Assert.assertEquals("Two", line);
164 | break;
165 |
166 | case 2:
167 | Assert.assertEquals("Three", line);
168 | break;
169 |
170 | default:
171 | Assert.fail("Too many examples!");
172 | }
173 |
174 | }
175 |
176 | Assert.assertEquals(3, x);
177 |
178 | verify(socketFactory, times(1)).getSocket();
179 | verify(socket, times(1)).getInputStream();
180 | verify(socket, times(1)).getOutputStream();
181 | verify(socket, times(1)).shutdownOutput();
182 | verify(socket, times(1)).close();
183 |
184 | //no exceptions should have been thrown
185 | Assert.assertFalse(exampleReadExceptionThrown);
186 | Assert.assertFalse(exampleFormatExceptionThrown);
187 | Assert.assertFalse(exampleSubmissionExceptionThrown);
188 | Assert.assertFalse(predictionFetchExceptionThrown);
189 |
190 | //the completion call backs should have been fired
191 | Assert.assertTrue(exampleSubmissionCompleteCalled);
192 | Assert.assertTrue(predictionFetchCompleteCalled);
193 |
194 | }
195 |
196 | /*
197 | * Tests that an ExampleSubmissionException is thrown when the socket cannot
198 | * be retrieved from the socket factory.
199 | */
200 | @Test(expected = ExampleSubmissionException.class)
201 | public void throwsExampleSubmissionException() throws IOException, ExampleSubmissionException {
202 |
203 | Iterable examples = getExamples("One", "Two", "Three");
204 |
205 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class);
206 | when(socketFactory.getSocket()).thenThrow(UnknownHostException.class);
207 |
208 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples);
209 |
210 | toTest.submitExamples(this);
211 |
212 | }
213 |
214 | /*
215 | * Tests that an ExampleReadException is handled as expected.
216 | */
217 | @Test(timeout = 5000)
218 | public void handlesExampleReadException() throws IOException, ExampleSubmissionException, InterruptedException {
219 |
220 | Iterator iterator = mock(Iterator.class);
221 | when(iterator.hasNext()).thenReturn(true);
222 | when(iterator.next()).thenThrow(ExampleReadException.class);
223 |
224 | Iterable examples = mock(Iterable.class);
225 | when(examples.iterator()).thenReturn(iterator);
226 |
227 | InputStream predictionInputStream = getPredictionInputStream("1", "2", "3");
228 |
229 | Socket socket = mock(Socket.class);
230 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
231 | when(socket.getOutputStream()).thenReturn(outputStream);
232 | when(socket.getInputStream()).thenReturn(predictionInputStream);
233 |
234 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class);
235 | when(socketFactory.getSocket()).thenReturn(socket);
236 |
237 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples);
238 |
239 | expectedNumberOfSkippedExamples = 0;
240 | expectedNumberOfSubmittedExamples = 0;
241 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.ExampleReadFault;
242 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete;
243 |
244 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable();
245 |
246 | int x = 0;
247 |
248 | for (Prediction p : predictions) {
249 |
250 | switch (x++) {
251 |
252 | case 0:
253 | Assert.assertEquals("1", p.getVWStringRepresentation());
254 | break;
255 |
256 | case 1:
257 | Assert.assertEquals("2", p.getVWStringRepresentation());
258 | break;
259 |
260 | case 2:
261 | Assert.assertEquals("3", p.getVWStringRepresentation());
262 | break;
263 |
264 | default:
265 | Assert.fail("Too many predictions!");
266 | }
267 | }
268 |
269 | Assert.assertEquals(3, x);
270 |
271 | countDownLatch.await(); //wait till example submission and prediction fetch are both done.
272 |
273 | verify(socketFactory, times(1)).getSocket();
274 | verify(socket, times(1)).getInputStream();
275 | verify(socket, times(1)).getOutputStream();
276 | verify(socket, times(1)).shutdownOutput();
277 | verify(socket, times(1)).close();
278 |
279 | Assert.assertTrue(exampleReadExceptionThrown);
280 | Assert.assertFalse(exampleFormatExceptionThrown);
281 | Assert.assertFalse(exampleSubmissionExceptionThrown);
282 | Assert.assertFalse(predictionFetchExceptionThrown);
283 |
284 | //the completion call backs should have been fired
285 | Assert.assertTrue(exampleSubmissionCompleteCalled);
286 | Assert.assertTrue(predictionFetchCompleteCalled);
287 |
288 | }
289 |
290 | /*
291 | * Tests that example format exceptions are handled as expected.
292 | */
293 | @Test(timeout = 5000)
294 | public void handlesExampleFormatException() throws IOException, ExampleSubmissionException, InterruptedException {
295 |
296 | StringExample errorExample = mock(StringExample.class);
297 | when(errorExample.getVWStringRepresentation()).thenThrow(ExampleFormatException.class);
298 |
299 | Iterator iterator = mock(Iterator.class);
300 | when(iterator.hasNext()).thenReturn(true).thenReturn(true).thenReturn(true).thenReturn(false);
301 | when(iterator.next()).thenReturn(new StringExample("One")).thenReturn(errorExample).thenReturn(new StringExample("Two"));
302 |
303 | Iterable examples = mock(Iterable.class);
304 | when(examples.iterator()).thenReturn(iterator);
305 |
306 | InputStream predictionInputStream = getPredictionInputStream("1", "2", "3");
307 |
308 | Socket socket = mock(Socket.class);
309 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
310 | when(socket.getOutputStream()).thenReturn(outputStream);
311 | when(socket.getInputStream()).thenReturn(predictionInputStream);
312 |
313 | TCPIPSocketFactory socketFactory = mock(TCPIPSocketFactory.class);
314 | when(socketFactory.getSocket()).thenReturn(socket);
315 |
316 | AsyncFailFastTCPIPExampleProcessor toTest = new AsyncFailFastTCPIPExampleProcessor(socketFactory, Executors.newCachedThreadPool(), examples);
317 |
318 | expectedNumberOfSkippedExamples = 1;
319 | expectedNumberOfSubmittedExamples = 2;
320 | expectedStateOnExampleSubmissionComplete = ExampleSubmissionState.Complete;
321 | expectedStateOnPredictionFetchComplete = PredictionFetchState.Complete;
322 |
323 | Iterable predictions = toTest.submitExamples(this).getPredictionsIterable();
324 |
325 | int x = 0;
326 |
327 | for (Prediction p : predictions) {
328 |
329 | switch (x++) {
330 |
331 | case 0:
332 | Assert.assertEquals("1", p.getVWStringRepresentation());
333 | break;
334 |
335 | case 1:
336 | Assert.assertEquals("2", p.getVWStringRepresentation());
337 | break;
338 |
339 | case 2:
340 | Assert.assertEquals("3", p.getVWStringRepresentation());
341 | break;
342 |
343 | default:
344 | Assert.fail("Too many predictions!");
345 | }
346 | }
347 |
348 | Assert.assertEquals(3, x);
349 |
350 | countDownLatch.await(); //wait till example submission and prediction fetch are both done.
351 |
352 | //check that all examples got there
353 | BufferedReader bReader = new BufferedReader(new StringReader(new String(outputStream.toByteArray())));
354 |
355 | x = 0;
356 | String line = null;
357 | while ((line = bReader.readLine()) != null) {
358 |
359 | switch (x++) {
360 |
361 | case 0:
362 | Assert.assertEquals("One", line);
363 | break;
364 |
365 | case 1:
366 | Assert.assertEquals("Two", line);
367 | break;
368 |
369 | default:
370 | Assert.fail("Too many examples!");
371 | }
372 |
373 | }
374 |
375 | Assert.assertEquals(2, x);
376 |
377 | verify(socketFactory, times(1)).getSocket();
378 | verify(socket, times(1)).getInputStream();
379 | verify(socket, times(1)).getOutputStream();
380 | verify(socket, times(1)).shutdownOutput();
381 | verify(socket, times(1)).close();
382 |
383 | Assert.assertFalse(exampleReadExceptionThrown);
384 | Assert.assertTrue(exampleFormatExceptionThrown);
385 | Assert.assertFalse(exampleSubmissionExceptionThrown);
386 | Assert.assertFalse(predictionFetchExceptionThrown);
387 |
388 | //the completion call backs should have been fired
389 | Assert.assertTrue(exampleSubmissionCompleteCalled);
390 | Assert.assertTrue(predictionFetchCompleteCalled);
391 |
392 | }
393 |
394 | /*
395 | * Tests that a prediction fetch exception is handled correctly.
396 | */
397 | @Test(timeout = 5000)
398 | public void handlePredictionFetchException() throws IOException, ExampleSubmissionException, InterruptedException {
399 |
400 | Iterable examples = getExamples("One", "Two");
401 |
402 | Socket socket = mock(Socket.class);
403 | ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
404 | when(socket.getOutputStream()).thenReturn(outputStream);
405 |
406 | InputStream inputStream = mock(InputStream.class, new Answer