├── README.md ├── src └── main │ └── java │ └── com │ └── gavin │ └── observer │ ├── Config.java │ ├── ElasticSearchOperator.java │ └── DataSyncObserver.java └── pom.xml /README.md: -------------------------------------------------------------------------------- 1 | # 测试环境 2 | 3 | - CDH 5.1.0 4 | - HBase 0.98 5 | - ElasticSearch 1.5.0 6 | 7 | # 使用Maven打包 8 | 9 | ``` 10 | mvn clean compile assembly:single 11 | ``` 12 | 13 | 部署请参照:[通过HBase Observer同步数据到ElasticSearch](http://guoze.me/2015/04/23/hbase-observer-sync-elasticsearch/) -------------------------------------------------------------------------------- /src/main/java/com/gavin/observer/Config.java: -------------------------------------------------------------------------------- 1 | package com.gavin.observer; 2 | 3 | import org.apache.commons.lang.StringUtils; 4 | 5 | import java.lang.reflect.Field; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | public class Config { 10 | // ElasticSearch的集群名称 11 | static String clusterName; 12 | // ElasticSearch的host 13 | static String nodeHost; 14 | // ElasticSearch的端口(Java API用的是Transport端口,也就是TCP) 15 | static int nodePort; 16 | // ElasticSearch的索引名称 17 | static String indexName; 18 | // ElasticSearch的类型名称 19 | static String typeName; 20 | 21 | public static String getInfo() { 22 | List fields = new ArrayList(); 23 | try { 24 | for (Field f : Config.class.getDeclaredFields()) { 25 | fields.add(f.getName() + "=" + f.get(null)); 26 | } 27 | } catch (IllegalAccessException ex) { 28 | ex.printStackTrace(); 29 | } 30 | return StringUtils.join(fields, ", "); 31 | } 32 | 33 | public static void main(String[] args) { 34 | Config.clusterName = "elasticsearch"; 35 | Config.nodeHost = "localhost"; 36 | Config.nodePort = 9300; 37 | 38 | System.out.println(Config.getInfo()); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | com.gavin 8 | observer 9 | 1.0-SNAPSHOT 10 | 11 | 12 | 13 | cloudera 14 | https://repository.cloudera.com/artifactory/cloudera-repos/ 15 | 16 | 17 | 18 | 19 | 20 | org.apache.hbase 21 | hbase-server 22 | 0.98.1-cdh5.1.0 23 | 24 | 25 | org.elasticsearch 26 | elasticsearch 27 | 1.5.0 28 | 29 | 30 | 31 | 32 | 33 | 34 | maven-assembly-plugin 35 | 36 | 37 | 38 | fully.qualified.MainClass 39 | 40 | 41 | 42 | jar-with-dependencies 43 | 44 | 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /src/main/java/com/gavin/observer/ElasticSearchOperator.java: -------------------------------------------------------------------------------- 1 | package com.gavin.observer; 2 | 3 | 4 | import org.elasticsearch.action.bulk.BulkRequestBuilder; 5 | import org.elasticsearch.action.bulk.BulkResponse; 6 | import org.elasticsearch.action.delete.DeleteRequestBuilder; 7 | import org.elasticsearch.action.update.UpdateRequestBuilder; 8 | import org.elasticsearch.client.Client; 9 | import org.elasticsearch.client.transport.TransportClient; 10 | import org.elasticsearch.common.settings.ImmutableSettings; 11 | import org.elasticsearch.common.settings.Settings; 12 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 13 | 14 | import java.util.HashMap; 15 | import java.util.Map; 16 | import java.util.Timer; 17 | import java.util.TimerTask; 18 | import java.util.concurrent.locks.Lock; 19 | import java.util.concurrent.locks.ReentrantLock; 20 | 21 | public class ElasticSearchOperator { 22 | 23 | // 缓冲池容量 24 | private static final int MAX_BULK_COUNT = 10; 25 | // 最大提交间隔(秒) 26 | private static final int MAX_COMMIT_INTERVAL = 60 * 5; 27 | 28 | private static Client client = null; 29 | private static BulkRequestBuilder bulkRequestBuilder = null; 30 | 31 | private static Lock commitLock = new ReentrantLock(); 32 | 33 | static { 34 | Settings settings = ImmutableSettings.settingsBuilder() 35 | .put("cluster.name", Config.clusterName).build(); 36 | client = new TransportClient(settings) 37 | .addTransportAddress(new InetSocketTransportAddress( 38 | Config.nodeHost, Config.nodePort)); 39 | bulkRequestBuilder = client.prepareBulk(); 40 | bulkRequestBuilder.setRefresh(true); 41 | 42 | Timer timer = new Timer(); 43 | timer.schedule(new CommitTimer(), 10 * 1000, MAX_COMMIT_INTERVAL * 1000); 44 | } 45 | 46 | /** 47 | * 判断缓存池是否已满,批量提交 48 | * 49 | * @param threshold 50 | */ 51 | private static void bulkRequest(int threshold) { 52 | if (bulkRequestBuilder.numberOfActions() > threshold) { 53 | BulkResponse bulkResponse = bulkRequestBuilder.execute().actionGet(); 54 | if (!bulkResponse.hasFailures()) { 55 | bulkRequestBuilder = client.prepareBulk(); 56 | } 57 | } 58 | } 59 | 60 | /** 61 | * 加入索引请求到缓冲池 62 | * 63 | * @param builder 64 | */ 65 | public static void addUpdateBuilderToBulk(UpdateRequestBuilder builder) { 66 | commitLock.lock(); 67 | try { 68 | bulkRequestBuilder.add(builder); 69 | bulkRequest(MAX_BULK_COUNT); 70 | } catch (Exception ex) { 71 | ex.printStackTrace(); 72 | } finally { 73 | commitLock.unlock(); 74 | } 75 | } 76 | 77 | /** 78 | * 加入删除请求到缓冲池 79 | * 80 | * @param builder 81 | */ 82 | public static void addDeleteBuilderToBulk(DeleteRequestBuilder builder) { 83 | commitLock.lock(); 84 | try { 85 | bulkRequestBuilder.add(builder); 86 | bulkRequest(MAX_BULK_COUNT); 87 | } catch (Exception ex) { 88 | ex.printStackTrace(); 89 | } finally { 90 | commitLock.unlock(); 91 | } 92 | } 93 | 94 | /** 95 | * 定时任务,避免RegionServer迟迟无数据更新,导致ElasticSearch没有与HBase同步 96 | */ 97 | static class CommitTimer extends TimerTask { 98 | @Override 99 | public void run() { 100 | commitLock.lock(); 101 | try { 102 | bulkRequest(0); 103 | } catch (Exception ex) { 104 | ex.printStackTrace(); 105 | } finally { 106 | commitLock.unlock(); 107 | } 108 | } 109 | } 110 | 111 | private static void test() { 112 | for (int i = 0; i < 10; i++) { 113 | Map json = new HashMap(); 114 | json.put("field", "test"); 115 | addUpdateBuilderToBulk(client.prepareUpdate(Config.indexName, Config.typeName, String.valueOf(i)).setUpsert(json)); 116 | } 117 | System.out.println(bulkRequestBuilder.numberOfActions()); 118 | } 119 | 120 | public static void main(String[] args) { 121 | test(); 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/com/gavin/observer/DataSyncObserver.java: -------------------------------------------------------------------------------- 1 | package com.gavin.observer; 2 | 3 | 4 | import org.apache.commons.logging.Log; 5 | import org.apache.commons.logging.LogFactory; 6 | import org.apache.hadoop.conf.Configuration; 7 | import org.apache.hadoop.hbase.Cell; 8 | import org.apache.hadoop.hbase.CellUtil; 9 | import org.apache.hadoop.hbase.CoprocessorEnvironment; 10 | import org.apache.hadoop.hbase.client.Delete; 11 | import org.apache.hadoop.hbase.client.Durability; 12 | import org.apache.hadoop.hbase.client.Put; 13 | import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; 14 | import org.apache.hadoop.hbase.coprocessor.ObserverContext; 15 | import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; 16 | import org.apache.hadoop.hbase.regionserver.wal.WALEdit; 17 | import org.apache.hadoop.hbase.util.Bytes; 18 | import org.elasticsearch.client.Client; 19 | import org.elasticsearch.client.transport.TransportClient; 20 | import org.elasticsearch.common.settings.ImmutableSettings; 21 | import org.elasticsearch.common.settings.Settings; 22 | import org.elasticsearch.common.transport.InetSocketTransportAddress; 23 | 24 | import java.io.IOException; 25 | import java.util.HashMap; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.NavigableMap; 29 | 30 | public class DataSyncObserver extends BaseRegionObserver { 31 | 32 | private static Client client = null; 33 | private static final Log LOG = LogFactory.getLog(DataSyncObserver.class); 34 | 35 | 36 | /** 37 | * 读取HBase Shell的指令参数 38 | * 39 | * @param env 40 | */ 41 | private void readConfiguration(CoprocessorEnvironment env) { 42 | Configuration conf = env.getConfiguration(); 43 | Config.clusterName = conf.get("es_cluster"); 44 | Config.nodeHost = conf.get("es_host"); 45 | Config.nodePort = conf.getInt("es_port", -1); 46 | Config.indexName = conf.get("es_index"); 47 | Config.typeName = conf.get("es_type"); 48 | 49 | LOG.info("observer -- started with config: " + Config.getInfo()); 50 | } 51 | 52 | 53 | @Override 54 | public void start(CoprocessorEnvironment env) throws IOException { 55 | readConfiguration(env); 56 | Settings settings = ImmutableSettings.settingsBuilder() 57 | .put("cluster.name", Config.clusterName).build(); 58 | client = new TransportClient(settings) 59 | .addTransportAddress(new InetSocketTransportAddress( 60 | Config.nodeHost, Config.nodePort)); 61 | } 62 | 63 | 64 | @Override 65 | public void postPut(ObserverContext e, Put put, WALEdit edit, Durability durability) throws IOException { 66 | try { 67 | String indexId = new String(put.getRow()); 68 | NavigableMap> familyMap = put.getFamilyCellMap(); 69 | Map json = new HashMap(); 70 | for (Map.Entry> entry : familyMap.entrySet()) { 71 | for (Cell cell : entry.getValue()) { 72 | String key = Bytes.toString(CellUtil.cloneQualifier(cell)); 73 | String value = Bytes.toString(CellUtil.cloneValue(cell)); 74 | json.put(key, value); 75 | } 76 | } 77 | ElasticSearchOperator.addUpdateBuilderToBulk(client.prepareUpdate(Config.indexName, Config.typeName, indexId).setUpsert(json)); 78 | LOG.info("observer -- add new doc: " + indexId + " to type: " + Config.typeName); 79 | } catch (Exception ex) { 80 | LOG.error(ex); 81 | } 82 | } 83 | 84 | @Override 85 | public void postDelete(final ObserverContext e, final Delete delete, final WALEdit edit, final Durability durability) throws IOException { 86 | try { 87 | String indexId = new String(delete.getRow()); 88 | ElasticSearchOperator.addDeleteBuilderToBulk(client.prepareDelete(Config.indexName, Config.typeName, indexId)); 89 | LOG.info("observer -- delete a doc: " + indexId); 90 | } catch (Exception ex) { 91 | LOG.error(ex); 92 | } 93 | } 94 | 95 | private static void testGetPutData(String rowKey, String columnFamily, String column, String value) { 96 | Put put = new Put(Bytes.toBytes(rowKey)); 97 | put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(column), Bytes.toBytes(value)); 98 | NavigableMap> familyMap = put.getFamilyCellMap(); 99 | System.out.println(Bytes.toString(put.getRow())); 100 | for (Map.Entry> entry : familyMap.entrySet()) { 101 | Cell cell = entry.getValue().get(0); 102 | System.out.println(Bytes.toString(CellUtil.cloneQualifier(cell))); 103 | System.out.println(Bytes.toString(CellUtil.cloneValue(cell))); 104 | } 105 | } 106 | 107 | public static void main(String[] args) { 108 | testGetPutData("111", "cf", "c1", "hello world"); 109 | } 110 | } 111 | --------------------------------------------------------------------------------