) parent.get(parentQualifier), childQualifier, value);
237 | return;
238 | }
239 | parent.put(this.river.normalizeField(qualifier.replace(this.river.getColumnSeparator(), "")), value);
240 | return;
241 | }
242 | }
243 | parent.put(this.river.normalizeField(qualifier), value);
244 | }
245 |
246 | /**
247 | * Checks if there is an open Scanner or Client and closes them.
248 | */
249 | public synchronized void stopThread() {
250 | this.stopThread = true;
251 | }
252 |
253 | /**
254 | * Sets the minimum time stamp on the HBase scanner, by looking into Elasticsearch for the last entry made.
255 | *
256 | * @param scanner
257 | */
258 | protected long setMinTimestamp(final Scanner scanner) {
259 | this.logger.debug("Looking into ElasticSearch to determine timestamp of last import");
260 | final SearchResponse response = this.river.getEsClient()
261 | .prepareSearch(this.river.getIndex())
262 | .setTypes(this.river.getType())
263 | .setQuery(QueryBuilders.matchAllQuery())
264 | .addFacet(FacetBuilders.statisticalFacet(TIMESTMAP_STATS).field("_timestamp"))
265 | .execute()
266 | .actionGet();
267 |
268 | if (response.facets().facet(TIMESTMAP_STATS) != null) {
269 | this.logger.debug("Got statistical data from ElasticSearch about data timestamps");
270 | final StatisticalFacet facet = (StatisticalFacet) response.facets().facet(TIMESTMAP_STATS);
271 | final long timestamp = (long) Math.max(facet.getMax() + 1, 0);
272 | scanner.setMinTimestamp(timestamp);
273 | this.logger.debug("Found latest timestamp in ElasticSearch to be {}", timestamp);
274 | return timestamp;
275 | }
276 | this.logger.debug("No statistical data about data timestamps could be found -> probably no data there yet");
277 | scanner.setMinTimestamp(0);
278 | this.logger.debug("Found latest timestamp in ElasticSearch to be not present (-> 0)");
279 | return 0L;
280 | }
281 | }
282 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/river/hbase/HBaseRiver.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.river.hbase;
2 |
3 | import java.lang.Thread.UncaughtExceptionHandler;
4 | import java.nio.charset.Charset;
5 | import java.security.InvalidParameterException;
6 | import java.util.Map;
7 |
8 | import org.elasticsearch.ElasticSearchException;
9 | import org.elasticsearch.ExceptionsHelper;
10 | import org.elasticsearch.action.admin.indices.status.ShardStatus;
11 | import org.elasticsearch.client.Client;
12 | import org.elasticsearch.common.inject.Inject;
13 | import org.elasticsearch.common.logging.ESLogger;
14 | import org.elasticsearch.common.util.concurrent.EsExecutors;
15 | import org.elasticsearch.common.xcontent.support.XContentMapValues;
16 | import org.elasticsearch.index.shard.IndexShardState;
17 | import org.elasticsearch.indices.IndexAlreadyExistsException;
18 | import org.elasticsearch.river.AbstractRiverComponent;
19 | import org.elasticsearch.river.River;
20 | import org.elasticsearch.river.RiverName;
21 | import org.elasticsearch.river.RiverSettings;
22 |
23 | /**
24 | * An HBase import river built similar to the MySQL river, that was modeled after the Solr SQL import functionality.
25 | *
26 | * @author Ravi Gairola
27 | */
28 | public class HBaseRiver extends AbstractRiverComponent implements River, UncaughtExceptionHandler {
29 | private static final String CONFIG_SPACE = "hbase";
30 | private final Client esClient;
31 | private volatile Runnable parser;
32 |
33 | /**
34 | * Comma separated list of Zookeeper hosts to which the HBase client can connect to find the cluster.
35 | */
36 | private final String hosts;
37 |
38 | /**
39 | * The HBase table name to be imported from.
40 | */
41 | private final String table;
42 |
43 | /**
44 | * The ElasticSearch index name to be imported to. (default is the river name)
45 | */
46 | private final String index;
47 |
48 | /**
49 | * The ElasticSearch type name to be imported to. (Default is the source table name)
50 | */
51 | private final String type;
52 |
53 | /**
54 | * The interval in ms with which the river is supposed to run (60000 = every minute). (Default is every 10 minutes)
55 | */
56 | private final long interval;
57 |
58 | /**
59 | * How big are the ElasticSearch bulk indexing sizes supposed to be. Tweaking this might improve performance. (Default is
60 | * 100 operations)
61 | */
62 | private final int batchSize;
63 |
64 | /**
65 | * Name of the field from HBase to be used as an idField in ElasticSearch. The mapping will set up accordingly, so that
66 | * the _id field is routed to this field name (you can access it then under both the field name and "_id"). If no id
67 | * field is given, then ElasticSearch will automatically generate an id.
68 | */
69 | private final String idField;
70 |
71 | /**
72 | * The char set which is used to parse data from HBase. (Default is UTF-8)
73 | */
74 | private final Charset charset;
75 |
76 | /**
77 | * Limit the scanning of the HBase table to a certain family.
78 | */
79 | private final byte[] family;
80 |
81 | /**
82 | * Limit the scanning of the HBase table to a number of qualifiers. A family must be set for this to take effect.
83 | * Multiple qualifiers can be set via comma separated list.
84 | */
85 | private final String qualifiers;
86 |
87 | /**
88 | * Some names must be given in a lower case format (the index name for example), others are more flexible. This flag will
89 | * normalize all fields to lower case and remove special characters that ELasticSearch can't handle. (The filter is
90 | * probably stricter than needed in most cases)
91 | */
92 | private final boolean normalizeFields;
93 |
94 | /**
95 | * Splits up the column into further sub columns if a separator is defined. For example:
96 | *
97 | *
98 | * Separator: "-"
99 | * Columns name: "this-is-my-column"
100 | * Result:
101 | * {
102 | * this: {
103 | * is: {
104 | * my: {
105 | * column: -value-
106 | * }
107 | * }
108 | * }
109 | * }
110 | *
111 | *
112 | * If no separator is defined, or the separator is empty, no operation is performed. Try to use single character
113 | * separators, as multi character separators will allow partial hits of a separator to be part of the data. (e.g. A
114 | * separator defined as "()" will leave all "(" and ")" in the parsed data.
115 | */
116 | public final String columnSeparator;
117 |
118 | /**
119 | * Define a custom mapping that will be used instead of an automatically generated one. Make sure to enable time stamps
120 | * and if you want an id-field to be recognized set the proper alias.
121 | */
122 | public final String customMapping;
123 |
124 | /**
125 | * Setting if old entries that have just been read from HBase should be deleted after they've been read.
126 | */
127 | private final boolean deleteOld;
128 |
129 | /**
130 | * Loads and verifies all the configuration needed to run this river.
131 | *
132 | * @param riverName
133 | * @param settings
134 | * @param esClient
135 | */
136 | @Inject
137 | public HBaseRiver(final RiverName riverName, final RiverSettings settings, final Client esClient) {
138 | super(riverName, settings);
139 | this.esClient = esClient;
140 | this.logger.info("Creating HBase Stream River");
141 |
142 | this.normalizeFields = Boolean.parseBoolean(readConfig("normalizeFields", "true"));
143 | this.hosts = readConfig("hosts");
144 | this.table = readConfig("table");
145 | this.columnSeparator = readConfig("columnSeparator", null);
146 | this.idField = normalizeField(readConfig("idField", null));
147 | this.index = normalizeField(readConfig("index", riverName.name()));
148 | this.type = normalizeField(readConfig("type", this.table));
149 | this.interval = Long.parseLong(readConfig("interval", "600000"));
150 | this.batchSize = Integer.parseInt(readConfig("batchSize", "100"));
151 | this.charset = Charset.forName(readConfig("charset", "UTF-8"));
152 | this.deleteOld = Boolean.parseBoolean(readConfig("deleteOld", "false"));
153 |
154 | final String family = readConfig("family", null);
155 | this.family = family != null ? family.getBytes(this.charset) : null;
156 | this.qualifiers = readConfig("qualifiers", null);
157 | this.customMapping = readConfig("customMapping", null);
158 |
159 | if (this.interval <= 0) {
160 | throw new IllegalArgumentException("The interval between runs must be at least 1 ms. The current config is set to "
161 | + this.interval);
162 | }
163 | if (this.batchSize <= 0) {
164 | throw new IllegalArgumentException("The batch size must be set to at least 1. The current config is set to " + this.batchSize);
165 | }
166 | }
167 |
168 | /**
169 | * Fetch the value of a configuration that has no default value and is therefore mandatory. Empty (trimmed) strings are
170 | * as invalid as no value at all (null).
171 | *
172 | * @param config Key of the configuration to fetch
173 | * @throws InvalidParameterException if a configuration is missing (null or empty)
174 | * @return
175 | */
176 | private String readConfig(final String config) {
177 | final String result = readConfig(config, null);
178 | if (result == null || result.trim().isEmpty()) {
179 | this.logger.error("Unable to read required config {}. Aborting!", config);
180 | throw new InvalidParameterException("Unable to read required config " + config);
181 | }
182 | return result;
183 | }
184 |
185 | /**
186 | * Fetch the value of a configuration that has a default value and is therefore optional.
187 | *
188 | * @param config Key of the configuration to fetch
189 | * @param defaultValue The value to set if no value could be found
190 | * @return
191 | */
192 | @SuppressWarnings({ "unchecked" })
193 | private String readConfig(final String config, final String defaultValue) {
194 | if (this.settings.settings().containsKey(CONFIG_SPACE)) {
195 | Map mysqlSettings = (Map) this.settings.settings().get(CONFIG_SPACE);
196 | return XContentMapValues.nodeStringValue(mysqlSettings.get(config), defaultValue);
197 | }
198 | return defaultValue;
199 | }
200 |
201 | /**
202 | * This method is launched by ElasticSearch and starts the HBase River. The method will try to create a mapping with time
203 | * stamps enabled. If a mapping already exists the user should make sure, that time stamps are enabled for this type.
204 | */
205 | @Override
206 | public synchronized void start() {
207 | if (this.parser != null) {
208 | this.logger.warn("Trying to start HBase stream although it is already running");
209 | return;
210 | }
211 | this.parser = new HBaseParser(this);
212 |
213 | this.logger.info("Waiting for Index to be ready for interaction");
214 | waitForESReady();
215 |
216 | this.logger.info("Starting HBase Stream");
217 | String mapping;
218 | if (this.customMapping != null && !this.customMapping.trim().isEmpty()) {
219 | mapping = this.customMapping;
220 | }
221 | else {
222 | if (this.idField == null) {
223 | mapping = "{\"" + this.type + "\":{\"_timestamp\":{\"enabled\":true}}}";
224 | }
225 | if (this.columnSeparator != null) {
226 | mapping = "{\"" + this.type + "\":{\"_timestamp\":{\"enabled\":true},\"_id\":{\"path\":\""
227 | + this.idField.replace(this.columnSeparator, ".") + "\"}}}";
228 | }
229 | else {
230 | mapping = "{\"" + this.type + "\":{\"_timestamp\":{\"enabled\":true},\"_id\":{\"path\":\"" + this.idField + "\"}}}";
231 | }
232 | }
233 |
234 | try {
235 | this.esClient.admin().indices().prepareCreate(this.index).addMapping(this.type, mapping).execute().actionGet();
236 | this.logger.info("Created Index {} with _timestamp mapping for {}", this.index, this.type);
237 | } catch (Exception e) {
238 | if (ExceptionsHelper.unwrapCause(e) instanceof IndexAlreadyExistsException) {
239 | this.logger.debug("Not creating Index {} as it already exists", this.index);
240 | }
241 | else if (ExceptionsHelper.unwrapCause(e) instanceof ElasticSearchException) {
242 | this.logger.debug("Mapping {}.{} already exists and will not be created", this.index, this.type);
243 | }
244 | else {
245 | this.logger.warn("failed to create index [{}], disabling river...", e, this.index);
246 | return;
247 | }
248 | }
249 |
250 | try {
251 | this.esClient.admin()
252 | .indices()
253 | .preparePutMapping(this.index)
254 | .setType(this.type)
255 | .setSource(mapping)
256 | .setIgnoreConflicts(true)
257 | .execute()
258 | .actionGet();
259 | } catch (ElasticSearchException e) {
260 | this.logger.debug("Mapping already exists for index {} and type {}", this.index, this.type);
261 | }
262 |
263 | final Thread t = EsExecutors.daemonThreadFactory(this.settings.globalSettings(), "hbase_slurper").newThread(this.parser);
264 | t.setUncaughtExceptionHandler(this);
265 | t.start();
266 | }
267 |
268 | private void waitForESReady() {
269 | if (!this.esClient.admin().indices().prepareExists(this.index).execute().actionGet().exists()) {
270 | return;
271 | }
272 | for (final ShardStatus status : this.esClient.admin().indices().prepareStatus(this.index).execute().actionGet().getShards()) {
273 | if (status.getState() != IndexShardState.STARTED) {
274 | try {
275 | Thread.sleep(1000);
276 | } catch (InterruptedException e) {
277 | this.logger.trace("HBase thread has been interrupted while waiting for the database to be reachable");
278 | }
279 | this.logger.trace("Waiting...");
280 | waitForESReady();
281 | break;
282 | }
283 | }
284 | }
285 |
286 | /**
287 | * This method is called by ElasticSearch when shutting down the river. The method will stop the thread and close all
288 | * connections to HBase.
289 | */
290 | @Override
291 | public synchronized void close() {
292 | this.logger.info("Closing HBase river");
293 | if (this.parser instanceof HBaseParser) {
294 | ((HBaseParser) this.parser).stopThread();
295 | }
296 | this.parser = null;
297 | }
298 |
299 | /**
300 | * Some of the asynchronous methods of the HBase client will throw Exceptions that are not caught anywhere else.
301 | */
302 | @Override
303 | public void uncaughtException(final Thread arg0, final Throwable arg1) {
304 | this.logger.error("An Exception has been thrown in HBase Import Thread", arg1, (Object[]) null);
305 | }
306 |
307 | /**
308 | * If the normalizeField flag is set, this method will return a lower case representation of the field, as well as
309 | * stripping away all special characters except "-" and "_".
310 | *
311 | * @param fieldName
312 | * @return
313 | */
314 | public String normalizeField(final String fieldName) {
315 | if (!isNormalizeFields() || fieldName == null) {
316 | return fieldName;
317 | }
318 | if (getColumnSeparator() != null) {
319 | String regex = "a-z0-9\\-_";
320 | for (int i = 0; i < getColumnSeparator().length(); i++) {
321 | regex += "\\" + getColumnSeparator().charAt(i);
322 | }
323 | return fieldName.toLowerCase().replaceAll("[^" + regex + "]", "");
324 | }
325 | return fieldName.toLowerCase().replaceAll("[^a-z0-9\\-_]", "");
326 | }
327 |
328 | public boolean isNormalizeFields() {
329 | return this.normalizeFields;
330 | }
331 |
332 | public long getInterval() {
333 | return this.interval;
334 | }
335 |
336 | public String getTable() {
337 | return this.table;
338 | }
339 |
340 | public String getHosts() {
341 | return this.hosts;
342 | }
343 |
344 | public byte[] getFamily() {
345 | return this.family;
346 | }
347 |
348 | public String getQualifiers() {
349 | return this.qualifiers;
350 | }
351 |
352 | public Charset getCharset() {
353 | return this.charset;
354 | }
355 |
356 | public int getBatchSize() {
357 | return this.batchSize;
358 | }
359 |
360 | public Client getEsClient() {
361 | return this.esClient;
362 | }
363 |
364 | public String getIndex() {
365 | return this.index;
366 | }
367 |
368 | public String getType() {
369 | return this.type;
370 | }
371 |
372 | public String getIdField() {
373 | return this.idField;
374 | }
375 |
376 | public String getColumnSeparator() {
377 | return this.columnSeparator;
378 | }
379 |
380 | public ESLogger getLogger() {
381 | return this.logger;
382 | }
383 |
384 | public boolean getDeleteOld() {
385 | return this.deleteOld;
386 | }
387 | }
388 |
--------------------------------------------------------------------------------
/src/main/java/org/elasticsearch/river/hbase/HBaseRiverModule.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.river.hbase;
2 |
3 | import org.elasticsearch.common.inject.AbstractModule;
4 | import org.elasticsearch.river.River;
5 |
6 | /**
7 | * Does the initial configuration of the Module, when it is called by ElasticSearch. Binds the HBase river as an eager
8 | * singleton river.
9 | *
10 | * @author Ravi Gairola
11 | */
12 | public class HBaseRiverModule extends AbstractModule {
13 |
14 | @Override
15 | protected void configure() {
16 | bind(River.class).to(HBaseRiver.class).asEagerSingleton();
17 | }
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/resources/es-plugin.properties:
--------------------------------------------------------------------------------
1 | plugin=org.elasticsearch.plugin.river.hbase.HBaseRiverPlugin
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/river/hbase/HBaseParserTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.river.hbase;
2 |
3 | import static org.testng.Assert.assertEquals;
4 | import static org.testng.Assert.assertNotNull;
5 |
6 | import java.nio.charset.Charset;
7 | import java.util.ArrayList;
8 | import java.util.HashMap;
9 | import java.util.Map;
10 |
11 | import mockit.Mock;
12 | import mockit.MockUp;
13 | import mockit.Mockit;
14 |
15 | import org.elasticsearch.client.Client;
16 | import org.elasticsearch.river.AbstractRiverComponent;
17 | import org.elasticsearch.river.RiverName;
18 | import org.elasticsearch.river.RiverSettings;
19 | import org.hbase.async.KeyValue;
20 | import org.testng.Assert;
21 | import org.testng.annotations.AfterClass;
22 | import org.testng.annotations.BeforeClass;
23 | import org.testng.annotations.Test;
24 |
25 | public class HBaseParserTest {
26 | @AfterClass
27 | public void tearDown() {
28 | Mockit.tearDownMocks();
29 | }
30 |
31 | public class ReadQualifierStructureTest {
32 | public String separator;
33 | public boolean normalize;
34 |
35 | @BeforeClass
36 | public void setUp() {
37 | new MockUp() {
38 | @Mock
39 | void $init(final RiverName riverName, final RiverSettings settings) {}
40 | };
41 | new MockUp() {
42 | @Mock
43 | void $init(final RiverName riverName, final RiverSettings settings, final Client esClient) {}
44 |
45 | @Mock
46 | String getColumnSeparator() {
47 | return ReadQualifierStructureTest.this.separator;
48 | }
49 |
50 | @Mock
51 | boolean isNormalizeFields() {
52 | return ReadQualifierStructureTest.this.normalize;
53 | }
54 | };
55 | }
56 |
57 | @SuppressWarnings("unchecked")
58 | @Test
59 | public void testBase() throws Exception {
60 | this.separator = "::";
61 | this.normalize = false;
62 |
63 | final Map result = new HashMap();
64 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
65 | parser.readQualifierStructure(result, "data::set1::category1", "test1");
66 | parser.readQualifierStructure(result, "data::set1::category2", "test2");
67 | parser.readQualifierStructure(result, "data::set1::category3", "test3");
68 | parser.readQualifierStructure(result, "data::set2::category1", "test4");
69 | parser.readQualifierStructure(result, "data::set2::category2", "test5");
70 |
71 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set1")).get("category1"), "test1");
72 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set1")).get("category2"), "test2");
73 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set1")).get("category3"), "test3");
74 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set2")).get("category1"), "test4");
75 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set2")).get("category2"), "test5");
76 | }
77 |
78 | @Test
79 | public void testNullSeperator() throws Exception {
80 | this.separator = null;
81 | this.normalize = false;
82 |
83 | final Map result = new HashMap();
84 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
85 | parser.readQualifierStructure(result, "data::set1::category1", "test1");
86 | parser.readQualifierStructure(result, "data::set1::category2", "test2");
87 | parser.readQualifierStructure(result, "data::set1::category3", "test3");
88 | parser.readQualifierStructure(result, "data::set2::category1", "test4");
89 | parser.readQualifierStructure(result, "data::set2::category2", "test5");
90 |
91 | Assert.assertEquals(result.get("data::set1::category1"), "test1");
92 | Assert.assertEquals(result.get("data::set1::category2"), "test2");
93 | Assert.assertEquals(result.get("data::set1::category3"), "test3");
94 | Assert.assertEquals(result.get("data::set2::category1"), "test4");
95 | Assert.assertEquals(result.get("data::set2::category2"), "test5");
96 | }
97 |
98 | @Test
99 | public void testEmptySeperator() throws Exception {
100 | this.separator = "";
101 | this.normalize = false;
102 |
103 | final Map result = new HashMap();
104 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
105 | parser.readQualifierStructure(result, "data::set1::category1", "test1");
106 | parser.readQualifierStructure(result, "data::set1::category2", "test2");
107 | parser.readQualifierStructure(result, "data::set1::category3", "test3");
108 | parser.readQualifierStructure(result, "data::set2::category1", "test4");
109 | parser.readQualifierStructure(result, "data::set2::category2", "test5");
110 |
111 | Assert.assertEquals(result.get("data::set1::category1"), "test1");
112 | Assert.assertEquals(result.get("data::set1::category2"), "test2");
113 | Assert.assertEquals(result.get("data::set1::category3"), "test3");
114 | Assert.assertEquals(result.get("data::set2::category1"), "test4");
115 | Assert.assertEquals(result.get("data::set2::category2"), "test5");
116 | }
117 |
118 | @SuppressWarnings("unchecked")
119 | @Test
120 | public void testEmptySubQualifier() throws Exception {
121 | this.separator = "::";
122 | this.normalize = true;
123 |
124 | final Map result = new HashMap();
125 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
126 | parser.readQualifierStructure(result, "data::set1::category1", "test1");
127 | parser.readQualifierStructure(result, "data::set1::category2", "test2");
128 | parser.readQualifierStructure(result, "data::set1::category3", "test3");
129 | parser.readQualifierStructure(result, "data::set2::category1", "test4");
130 | parser.readQualifierStructure(result, "data::set2::", "test5");
131 |
132 | System.out.println(result);
133 |
134 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set1")).get("category1"), "test1");
135 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set1")).get("category2"), "test2");
136 | Assert.assertEquals(((Map) ((Map) result.get("data")).get("set1")).get("category3"), "test3");
137 | Assert.assertEquals(((Map) result.get("data")).get("set2"), "test5");
138 | }
139 |
140 | @Test
141 | public void testWrongSeperator() throws Exception {
142 | this.separator = "--";
143 | this.normalize = false;
144 |
145 | final Map result = new HashMap();
146 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
147 | parser.readQualifierStructure(result, "data::set1::category1", "test1");
148 | parser.readQualifierStructure(result, "data::set1::category2", "test2");
149 | parser.readQualifierStructure(result, "data::set1::category3", "test3");
150 | this.normalize = true;
151 | parser.readQualifierStructure(result, "data::set2::category1", "test4");
152 | parser.readQualifierStructure(result, "data::set2::category2", "test5");
153 |
154 | Assert.assertEquals(result.get("data::set1::category1"), "test1");
155 | Assert.assertEquals(result.get("data::set1::category2"), "test2");
156 | Assert.assertEquals(result.get("data::set1::category3"), "test3");
157 | Assert.assertEquals(result.get("dataset2category1"), "test4");
158 | Assert.assertEquals(result.get("dataset2category2"), "test5");
159 | }
160 | }
161 |
162 | public class ReadDataTreeTest {
163 | private final Charset charset = Charset.forName("UTF-8");
164 | private int rowCounter = 0;
165 |
166 | @BeforeClass
167 | public void setUp() {
168 | new MockUp() {
169 | @Mock
170 | void $init(final RiverName riverName, final RiverSettings settings) {}
171 | };
172 |
173 | new MockUp() {
174 |
175 | @Mock
176 | void $init(final RiverName riverName, final RiverSettings settings, final Client esClient) {}
177 |
178 | @Mock
179 | Charset getCharset() {
180 | return ReadDataTreeTest.this.charset;
181 | }
182 |
183 | @Mock
184 | boolean isNormalizeFields() {
185 | return true;
186 | }
187 | };
188 | }
189 |
190 | @Test
191 | @SuppressWarnings("unchecked")
192 | public void testBase() {
193 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
194 |
195 | final ArrayList input = new ArrayList();
196 |
197 | input.add(getKeyValue("family1", "category1", "value1"));
198 | input.add(getKeyValue("family1", "category2", "value2"));
199 | input.add(getKeyValue("family1", "category3", "value3"));
200 | input.add(getKeyValue("family2", "category1", "value4"));
201 | input.add(getKeyValue("family2", "category4", "value5"));
202 | input.add(getKeyValue("family3", "category5", "value6"));
203 | input.add(getKeyValue("family2", "category6", "value7"));
204 |
205 | final Map output = parser.readDataTree(input);
206 |
207 | assertNotNull(output.get("family1"));
208 | final Map family1 = (Map) output.get("family1");
209 | assertEquals(family1.get("category1"), "value1");
210 | assertEquals(family1.get("category2"), "value2");
211 | assertEquals(family1.get("category3"), "value3");
212 | assertNotNull(output.get("family2"));
213 | final Map family2 = (Map) output.get("family2");
214 | assertEquals(family2.get("category1"), "value4");
215 | assertEquals(family2.get("category4"), "value5");
216 | assertEquals(family2.get("category6"), "value7");
217 | assertNotNull(output.get("family3"));
218 | final Map family3 = (Map) output.get("family3");
219 | assertEquals(family3.get("category5"), "value6");
220 | }
221 |
222 | private KeyValue getKeyValue(final String family, final String qualifier, final String value) {
223 | return new KeyValue(String.valueOf(this.rowCounter++).getBytes(this.charset),
224 | family.getBytes(this.charset),
225 | qualifier.getBytes(this.charset),
226 | value.getBytes(this.charset));
227 | }
228 | }
229 |
230 | public class FindKeyInDataTreeTest {
231 | protected String separator;
232 | protected boolean normalize;
233 |
234 | @BeforeClass
235 | public void setUp() {
236 | new MockUp() {
237 | @Mock
238 | void $init(final RiverName riverName, final RiverSettings settings) {}
239 | };
240 |
241 | new MockUp() {
242 |
243 | @Mock
244 | void $init(final RiverName riverName, final RiverSettings settings, final Client esClient) {}
245 |
246 | @Mock
247 | String getColumnSeparator() {
248 | return FindKeyInDataTreeTest.this.separator;
249 | }
250 |
251 | @Mock
252 | boolean isNormalizeFields() {
253 | return FindKeyInDataTreeTest.this.normalize;
254 | }
255 | };
256 | }
257 |
258 | @Test
259 | public void testBase() {
260 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
261 | this.separator = "::";
262 |
263 | final Map dataTree = new HashMap();
264 | final Map dataBranch = new HashMap();
265 | dataBranch.put("theId", "TheValue");
266 | dataTree.put("aBranch", dataBranch);
267 |
268 | assertEquals(parser.findKeyInDataTree(dataTree, "aBranch::theId"), "TheValue");
269 | }
270 |
271 | @Test
272 | public void testDotSeparator() {
273 | final HBaseParser parser = new HBaseParser(new HBaseRiver(null, null, null));
274 | this.separator = ".";
275 |
276 | final Map dataTree = new HashMap();
277 | final Map dataBranch = new HashMap();
278 | dataBranch.put("theId", "TheValue");
279 | dataTree.put("aBranch", dataBranch);
280 |
281 | assertEquals(parser.findKeyInDataTree(dataTree, "aBranch.theId"), "TheValue");
282 | }
283 | }
284 | }
285 |
--------------------------------------------------------------------------------
/src/test/java/org/elasticsearch/river/hbase/HBaseRiverTest.java:
--------------------------------------------------------------------------------
1 | package org.elasticsearch.river.hbase;
2 |
3 | import mockit.Mock;
4 | import mockit.MockUp;
5 |
6 | import org.elasticsearch.client.Client;
7 | import org.elasticsearch.river.AbstractRiverComponent;
8 | import org.elasticsearch.river.RiverName;
9 | import org.elasticsearch.river.RiverSettings;
10 | import org.testng.Assert;
11 | import org.testng.annotations.Test;
12 |
13 | public class HBaseRiverTest {
14 | @Test
15 | public void testNormalizeField() {
16 | new MockUp() {
17 | @Mock
18 | void $init(final RiverName riverName, final RiverSettings settings) {}
19 | };
20 | new MockUp() {
21 | @Mock
22 | void $init(final RiverName riverName, final RiverSettings settings, final Client esClient) {}
23 |
24 | @Mock
25 | boolean isNormalizeFields() {
26 | return true;
27 | }
28 |
29 | @Mock
30 | String getColumnSeparator() {
31 | return "::";
32 | }
33 | };
34 |
35 | final HBaseRiver river = new HBaseRiver(null, null, null);
36 |
37 | Assert.assertEquals(river.normalizeField(""), "");
38 | Assert.assertEquals(river.normalizeField(" "), "");
39 | Assert.assertEquals(river.normalizeField("a"), "a");
40 | Assert.assertEquals(river.normalizeField("A"), "a");
41 | Assert.assertEquals(river.normalizeField("Aa"), "aa");
42 | Assert.assertEquals(river.normalizeField("a-b"), "a-b");
43 | Assert.assertEquals(river.normalizeField("a_b"), "a_b");
44 | Assert.assertEquals(river.normalizeField("90aS"), "90as");
45 | Assert.assertEquals(river.normalizeField("&*($@#!ui^&$(#\"8ui"), "ui8ui");
46 | Assert.assertEquals(river.normalizeField("bl%^&*ah::blubb"), "blah::blubb");
47 | Assert.assertEquals(river.normalizeField(null), null);
48 | }
49 | }
50 |
--------------------------------------------------------------------------------