├── README.md └── hbase-sql ├── .classpath ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── lib └── ojdbc14-10.2.0.5.jar ├── pom.xml └── src ├── main └── java │ └── org │ └── apache │ └── hadoop │ └── hbase │ └── ext │ ├── HbaseUtil.java │ ├── loader │ ├── RelationalDatabaseLoader.java │ └── impl │ │ ├── ListMapHandler.java │ │ └── OracleDataLoader.java │ └── sql │ ├── HbaseQuery.java │ ├── QueryUtil.java │ └── impl │ ├── HbaseQueryImpl.java │ └── SelectSqlVisitor.java └── test ├── java └── org │ └── apache │ └── hadoop │ └── hbase │ └── ext │ ├── HbaseSqlTestSuite.java │ ├── loader │ └── impl │ │ └── OracleDataLoaderTest.java │ └── sql │ └── impl │ └── HbaseQueryImplTest.java └── resources └── hbase-site.xml /README.md: -------------------------------------------------------------------------------- 1 | # hbase-sql 2 | v0.1.0 2013-1-9 3 | 4 | Automatically exported from http://code.google.com/p/hbase-sql 5 | 6 | 通过sql来查询hbase上的数据 7 | 8 | ## 如何简化从hbase中查询数据 9 | 为了兼容以前从关系型数据库中查询数据的接口, 让hbase可以通过sql语句来查询其中的数据. 10 | 11 | hive有这样的功能, 他支持通过类似sql语句的语法来操作hbase中的数据, 但是速度太慢了, 因为hive本身就不是用来查询数据的, hive是数据仓库, 做数据分析的, 不适合我们的应用场景. 12 | 13 | hbase本身提供的api中, 只有scan是用来查询数据的, 因此我们需要将sql语句转成scan 参考<<[利用hbase的coprocessor机制来在hbase上增加sql解析引擎–(一)原因&架构](http://blog.hummingbird-one.com/?p=10196)>>发现是可行的 14 | 15 | 因此总体架构为 16 | 17 | ``` 18 | sql语句 --sql解析器--> sql语法节点(对象) -> scan -> hbase -> ResultScanner -> List 19 | ``` 20 | 21 | 例如一个简单的sql语句 22 | ``` 23 | select a, b from table1 where a = 1 and b = 2 24 | ``` 25 | 我们通过sql解析器可以得到sql语句的各个部分, 再调用hbase api中相应的语句来达到相同的效果 26 | 27 | ```java 28 | // 要查询的表 29 | HTable table = new HTable(conf, "table1"); 30 | // 要查询的字段 31 | Scan scan = new Scan(); 32 | scan.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("a")); 33 | scan.addColumn(Bytes.toBytes("cf"), Bytes.toBytes("b")); 34 | // where条件 35 | // a = 1 36 | SingleColumnValueFilter a = new SingleColumnValueFilter(Bytes.toBytes("cf"), 37 | Bytes.toBytes("a"), CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(1))); 38 | filterList.addFilter(filter); 39 | // b = 2 40 | SingleColumnValueFilter b = new SingleColumnValueFilter(Bytes.toBytes("cf"), 41 | Bytes.toBytes("b"), CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(2))); 42 | // and 43 | FilterList filterList = new FilterList(Operator.MUST_PASS_ALL, a, b); 44 | scan.setFilter(filterList); 45 | ``` 46 | 47 | ## 目前支持的功能 48 | 具体细节请参考单元测试 49 | 50 | 1. 从oracle数据库中导入表数据到hbase 51 | ```java 52 | OracleDataLoader.loadTable("TABLE_NAME", new String[] { "PK_COLUMN_NAME" }); 53 | ``` 54 | 2. 通过SQL语句来查询hbase中的表数据 55 | ```java 56 | List rows = HbaseQuery.select("SQL"); 57 | ``` 58 | 目前支持的SQL语句 59 | 60 | ```sql 61 | SELECT * FROM report1 /* 查询所有数据 */ 62 | SELECT A, B FROM report1 /* 只查询某些列 */ 63 | SELECT * FROM report1 WHERE A = 1 and B = 2 /* 过滤条件只能是AND逻辑, 而且是等于关系 */ 64 | SELECT * FROM report1 limit 3 offset 2 /* 分页 */ 65 | ``` 66 | 67 | ## 如何使用 68 | 1. 在Download中下载最新版的hbase-sql.jar, 将其放在lib中. 69 | 70 | 注意项目lib的依赖 71 | * commons-beanutils-core-1.8.0.jar 72 | * commons-configuration-1.6.jar 73 | * commons-dbutils-1.5.jar 74 | * commons-lang-2.5.jar 75 | * commons-logging-1.1.1.jar 76 | * hadoop-core-1.0.4.jar 77 | * hbase-0.94.3.jar 78 | * jsqlparser-0.7.0.jar 79 | * log4j-1.2.16.jar 80 | * ojdbc14-10.2.0.5.jar 81 | * protobuf-java-2.4.0a.jar 82 | * slf4j-api-1.4.3.jar 83 | * slf4j-log4j12-1.4.3.jar 84 | * zookeeper-3.4.3.jar 85 | 86 | 2. 在项目的src中配置好hbase-site.xml, 否则无法连接到hbase来体验hbase-sql的功能 87 | 88 | 3. 测试 89 | ```java 90 | List rows = new HbaseQueryImpl().select("select * from report1"); 91 | System.out.println(rows.size()); 92 | ``` 93 | 94 | ## TODO 95 | 支持更复杂的SQL查询语句 96 | -------------------------------------------------------------------------------- /hbase-sql/.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /hbase-sql/.project: -------------------------------------------------------------------------------- 1 | 2 | hbase-sql 3 | 通过sql来查询hbase上的数据. NO_M2ECLIPSE_SUPPORT: Project files created with the maven-eclipse-plugin are not supported in M2Eclipse. 4 | 5 | 6 | 7 | org.eclipse.jdt.core.javabuilder 8 | 9 | 10 | 11 | org.eclipse.jdt.core.javanature 12 | 13 | -------------------------------------------------------------------------------- /hbase-sql/.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Wed Jan 09 15:51:11 CST 2013 2 | encoding//src/test/java=UTF-8 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 4 | eclipse.preferences.version=1 5 | encoding//src/test/resources=UTF-8 6 | org.eclipse.jdt.core.compiler.source=1.6 7 | encoding//src/main/java=UTF-8 8 | encoding//src/main/resources=UTF-8 9 | org.eclipse.jdt.core.compiler.compliance=1.6 10 | -------------------------------------------------------------------------------- /hbase-sql/lib/ojdbc14-10.2.0.5.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufologist/hbase-sql/15f8ecef5bc83cdf9cc6b9a02d2d7969cb78478d/hbase-sql/lib/ojdbc14-10.2.0.5.jar -------------------------------------------------------------------------------- /hbase-sql/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | org.apache.hadoop.hbase 4 | hbase-sql 5 | 0.1 6 | 7 | hbase-sql 8 | 通过sql来查询hbase上的数据 9 | http://code.google.com/p/hbase-sql/ 10 | 11 | 12 | UTF-8 13 | 14 | 15 | 16 | 17 | org.apache.hbase 18 | hbase 19 | 0.94.3 20 | 21 | 22 | org.apache.hadoop 23 | hadoop-core 24 | 1.0.4 25 | 26 | 27 | commons-dbutils 28 | commons-dbutils 29 | 1.5 30 | 31 | 32 | 36 | 37 | com.oracle 38 | ojdbc14 39 | 10.2.0.5.0 40 | system 41 | ${basedir}/lib/ojdbc14-10.2.0.5.jar 42 | 43 | 44 | net.sf.jsqlparser 45 | jsqlparser 46 | 0.7.0 47 | 48 | 49 | junit 50 | junit 51 | 4.11 52 | test 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | org.apache.maven.plugins 61 | maven-compiler-plugin 62 | 63 | 1.6 64 | 1.6 65 | 66 | 67 | 68 | org.apache.maven.plugins 69 | maven-surefire-plugin 70 | 71 | true 72 | 73 | 74 | 75 | org.apache.maven.plugins 76 | maven-eclipse-plugin 77 | 78 | 79 | org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.6 80 | 81 | 82 | 83 | 84 | 85 | 86 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/HbaseUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext; 6 | 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.Iterator; 10 | import java.util.List; 11 | import java.util.Map; 12 | 13 | import org.apache.hadoop.conf.Configuration; 14 | import org.apache.hadoop.hbase.HBaseConfiguration; 15 | import org.apache.hadoop.hbase.HColumnDescriptor; 16 | import org.apache.hadoop.hbase.HTableDescriptor; 17 | import org.apache.hadoop.hbase.KeyValue; 18 | import org.apache.hadoop.hbase.client.Get; 19 | import org.apache.hadoop.hbase.client.HBaseAdmin; 20 | import org.apache.hadoop.hbase.client.HTable; 21 | import org.apache.hadoop.hbase.client.Put; 22 | import org.apache.hadoop.hbase.client.Result; 23 | import org.apache.hadoop.hbase.client.ResultScanner; 24 | import org.apache.hadoop.hbase.client.Scan; 25 | import org.apache.hadoop.hbase.filter.BinaryComparator; 26 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 27 | import org.apache.hadoop.hbase.filter.FilterList; 28 | import org.apache.hadoop.hbase.filter.FilterList.Operator; 29 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; 30 | import org.apache.hadoop.hbase.util.Bytes; 31 | 32 | /** 33 | * 操作hbase的工具类 34 | * 35 | * @author Sun 36 | * @version HbaseUtil.java 2013-1-6 上午10:18:48 37 | * @see HBase Java客户端编程 38 | */ 39 | public class HbaseUtil { 40 | private static Configuration conf = HBaseConfiguration.create(); 41 | 42 | public static void create(String tableName, String[] cfs) { 43 | try { 44 | HBaseAdmin admin = new HBaseAdmin(conf); 45 | if (admin.tableExists(tableName)) { 46 | System.out.println("表已经存在!"); 47 | } else { 48 | HTableDescriptor tableDesc = new HTableDescriptor(tableName); 49 | for (int i = 0; i < cfs.length; i++) { 50 | tableDesc.addFamily(new HColumnDescriptor(cfs[i])); 51 | } 52 | admin.createTable(tableDesc); 53 | System.out.println("表创建成功!"); 54 | } 55 | } catch (Exception e) { 56 | e.printStackTrace(); 57 | } 58 | } 59 | 60 | public static void put(String tableName, String rowKey, String cf, 61 | String key, String value) { 62 | HTable table = null; 63 | try { 64 | table = new HTable(conf, tableName); 65 | Put put = new Put(Bytes.toBytes(rowKey)); 66 | put.add(Bytes.toBytes(cf), Bytes.toBytes(key), Bytes.toBytes(value)); 67 | table.put(put); 68 | } catch (IOException e) { 69 | e.printStackTrace(); 70 | } finally { 71 | closeTable(table); 72 | } 73 | } 74 | 75 | public static void put(String tableName, String cf, 76 | List> rows, String[] pkColumnNames) { 77 | HTable table = null; 78 | try { 79 | table = new HTable(conf, tableName); 80 | } catch (IOException e) { 81 | e.printStackTrace(); 82 | } 83 | List puts = new ArrayList(); 84 | int commitCount = 10000; 85 | 86 | for (Map row : rows) { 87 | Iterator columnNames = row.keySet().iterator(); 88 | 89 | String rowKey = getRowKey(row, pkColumnNames); 90 | Put put = new Put(Bytes.toBytes(rowKey)); 91 | while (columnNames.hasNext()) { 92 | String columnName = columnNames.next(); 93 | String columnValue = String.valueOf(row.get(columnName)); 94 | put.add(Bytes.toBytes(cf), Bytes.toBytes(columnName), 95 | Bytes.toBytes(columnValue)); 96 | puts.add(put); 97 | } 98 | 99 | // 每xx条提交一次 100 | // 防止批量提交数据时OutOfMemoryError错误 101 | if (puts.size() > commitCount) { 102 | putList(table, puts); 103 | puts.clear(); 104 | } 105 | } 106 | 107 | putList(table, puts); 108 | closeTable(table); 109 | } 110 | 111 | private static void putList(HTable table, List puts) { 112 | if (puts.size() > 0) { 113 | try { 114 | table.put(puts); 115 | table.flushCommits(); 116 | } catch (IOException e) { 117 | e.printStackTrace(); 118 | } 119 | } 120 | } 121 | 122 | public static void closeTable(HTable table) { 123 | if (table != null) { 124 | try { 125 | table.close(); 126 | } catch (IOException e) { 127 | e.printStackTrace(); 128 | } 129 | } 130 | } 131 | 132 | private static String getRowKey(Map row, String[] pkColumnNames) { 133 | StringBuilder str = new StringBuilder(); 134 | for (String columnName : pkColumnNames) { 135 | str.append(row.get(columnName)); 136 | } 137 | return str.toString(); 138 | } 139 | 140 | public static void get(String tableName, String rowKey) { 141 | try { 142 | HTable table = new HTable(conf, tableName); 143 | Get get = new Get(Bytes.toBytes(rowKey)); 144 | Result rs = table.get(get); 145 | printResult(rs); 146 | } catch (IOException e) { 147 | e.printStackTrace(); 148 | } 149 | } 150 | 151 | public static void scan(String tableName) { 152 | try { 153 | HTable table =new HTable(conf, tableName); 154 | Scan s = new Scan(); 155 | SingleColumnValueFilter sf1 = new SingleColumnValueFilter( 156 | Bytes.toBytes("cf"), Bytes.toBytes("TIME_ID"), CompareOp.EQUAL, 157 | new BinaryComparator(Bytes.toBytes("201206"))); 158 | SingleColumnValueFilter sf2 = new SingleColumnValueFilter( 159 | Bytes.toBytes("cf"), Bytes.toBytes("AREA_ID"), CompareOp.EQUAL, 160 | new BinaryComparator(Bytes.toBytes("730"))); 161 | SingleColumnValueFilter sf3 = new SingleColumnValueFilter( 162 | Bytes.toBytes("cf"), Bytes.toBytes("SVC_BRND_ID"), CompareOp.EQUAL, 163 | new BinaryComparator(Bytes.toBytes("1"))); 164 | // TIME_ID = 201206 and AREA_ID = 730 and SVC_BRND_ID = 1 165 | FilterList filter = new FilterList(Operator.MUST_PASS_ALL, sf1, 166 | sf2, sf3); 167 | // RowFilter filter = new RowFilter(CompareOp.EQUAL, 168 | // new RegexStringComparator("\\d{9}3")); 169 | s.setFilter(filter); 170 | ResultScanner rs = table.getScanner(s); 171 | for (Result r : rs) { 172 | printResult(r); 173 | } 174 | rs.close(); 175 | } catch (IOException e) { 176 | e.printStackTrace(); 177 | } 178 | } 179 | 180 | public static void testPage(String tableName, int offset, int limit) { 181 | try { 182 | HTable table =new HTable(conf, tableName); 183 | Scan s = new Scan(); 184 | ResultScanner scanner = table.getScanner(s); 185 | // row key有多少, result就有多少 186 | // 一个唯一的row key表示一条记录(一个Result), 包含多个列 187 | Result result = null; 188 | for (int i = 0; (result = scanner.next()) != null; i++) { 189 | if (i < offset) { 190 | continue; 191 | } else if (i == offset + limit) { 192 | break; 193 | } else { 194 | printResult(result); 195 | } 196 | } 197 | } catch (IOException e) { 198 | e.printStackTrace(); 199 | } 200 | } 201 | 202 | public static void printResult(Result result) { 203 | for (KeyValue kv : result.raw()) { 204 | System.out.print(new String(kv.getRow()) + "\t"); 205 | System.out.print(new String(kv.getFamily()) + ":"); 206 | System.out.print(new String(kv.getQualifier()) + "\t"); 207 | System.out.print(kv.getTimestamp() + "\t"); 208 | System.out.println(new String(kv.getValue())); 209 | } 210 | } 211 | } 212 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/loader/RelationalDatabaseLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.loader; 6 | 7 | /** 8 | * 将关系型数据库中的数据加载到hbase 9 | * 10 | * @author Sun 11 | * @version RelationalDatabaseLoader.java 2013-1-5 下午4:27:13 12 | */ 13 | public interface RelationalDatabaseLoader { 14 | public final String DEFAULT_COLUMN_FAMILY = "cf"; 15 | 16 | /** 17 | * 将数据库表中的所有数据导入hbase 18 | * 19 | * @param tableName 20 | * @param pkColumnNames 一个或多个PK字段名, 将PK字段的值组合成row key来存储 21 | */ 22 | public void loadTable(String tableName, String[] pkColumnNames); 23 | } 24 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/loader/impl/ListMapHandler.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.loader.impl; 6 | 7 | import java.sql.ResultSet; 8 | import java.sql.ResultSetMetaData; 9 | import java.sql.SQLException; 10 | import java.util.ArrayList; 11 | import java.util.HashMap; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | import org.apache.commons.dbutils.ResultSetHandler; 16 | 17 | /** 18 | * 将ResultSet封装成ListMap结构的数据 19 | * 一个list表示一行数据, map中存放该行各个列的值 20 | * 21 | * @author Sun 22 | * @version ListMapHandler.java 2013-1-6 上午10:06:57 23 | */ 24 | public class ListMapHandler implements ResultSetHandler>> { 25 | public List> handle(ResultSet rs) throws SQLException { 26 | List> rows = new ArrayList>(); 27 | ResultSetMetaData meta = rs.getMetaData(); 28 | 29 | int colCount = meta.getColumnCount(); 30 | while (rs.next()) { 31 | Map row = new HashMap(); 32 | for (int i = 0; i < colCount; i++) { 33 | String columnName = meta.getColumnName(i + 1); 34 | Object columnValue = rs.getObject(columnName); 35 | row.put(columnName, columnValue); 36 | } 37 | rows.add(row); 38 | } 39 | 40 | return rows; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/loader/impl/OracleDataLoader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.loader.impl; 6 | 7 | 8 | import java.sql.Connection; 9 | import java.sql.DriverManager; 10 | import java.sql.SQLException; 11 | import java.util.List; 12 | import java.util.Map; 13 | 14 | import org.apache.commons.dbutils.DbUtils; 15 | import org.apache.commons.dbutils.QueryRunner; 16 | import org.apache.commons.dbutils.ResultSetHandler; 17 | import org.apache.hadoop.hbase.ext.HbaseUtil; 18 | import org.apache.hadoop.hbase.ext.loader.RelationalDatabaseLoader; 19 | 20 | /** 21 | * 实现从oracle中导入数据到hbase 22 | * 23 | * @author Sun 24 | * @version OracleDataLoader.java 2013-1-5 下午4:30:43 25 | * @see Hbase几种数据入库(load)方式比较 26 | */ 27 | public class OracleDataLoader implements RelationalDatabaseLoader { 28 | private ResultSetHandler>> resultSetHandler = new ListMapHandler(); 29 | 30 | @Override 31 | public void loadTable(String tableName, String[] pkColumnNames) { 32 | List> rows = query(tableName); 33 | HbaseUtil.create(tableName, new String[] { DEFAULT_COLUMN_FAMILY }); 34 | HbaseUtil.put(tableName, DEFAULT_COLUMN_FAMILY, rows, pkColumnNames); 35 | } 36 | 37 | private List> query(String tableName) { 38 | QueryRunner run = new QueryRunner(); 39 | Connection connection = getConnection(); 40 | 41 | String sql = String.format("select * from %s", tableName); 42 | List> rows = null; 43 | try { 44 | rows = run.query(connection, sql, this.resultSetHandler); 45 | } catch (SQLException e) { 46 | e.printStackTrace(); 47 | } finally { 48 | try { 49 | DbUtils.close(connection); 50 | } catch (SQLException e) { 51 | e.printStackTrace(); 52 | } 53 | } 54 | 55 | return rows; 56 | } 57 | 58 | private Connection getConnection() { 59 | Connection conn = null; 60 | 61 | String url = "jdbc:oracle:thin:@192.168.141.10:1521:orcl10g"; 62 | String username = "report"; 63 | String password = "123456"; 64 | try { 65 | Class.forName("oracle.jdbc.OracleDriver"); 66 | conn = DriverManager.getConnection(url, username, password); 67 | } catch (Exception e) { 68 | e.printStackTrace(); 69 | } 70 | return conn; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/sql/HbaseQuery.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.sql; 6 | 7 | import java.io.IOException; 8 | import java.sql.SQLSyntaxErrorException; 9 | import java.util.List; 10 | 11 | import org.apache.commons.beanutils.DynaBean; 12 | 13 | /** 14 | * 通过SQL语句操作hbase中的数据 15 | * 16 | * @author Sun 17 | * @version HbaseQuery.java 2013-1-7 上午10:50:45 18 | */ 19 | public interface HbaseQuery { 20 | public List select(String sql) throws SQLSyntaxErrorException, IOException; 21 | public List select(String sql, String startRow, String stopRow) throws SQLSyntaxErrorException, IOException; 22 | } 23 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/sql/QueryUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.sql; 6 | 7 | import java.io.IOException; 8 | import java.util.ArrayList; 9 | import java.util.List; 10 | 11 | import org.apache.commons.beanutils.DynaBean; 12 | import org.apache.commons.beanutils.LazyDynaBean; 13 | import org.apache.hadoop.hbase.KeyValue; 14 | import org.apache.hadoop.hbase.client.Result; 15 | import org.apache.hadoop.hbase.client.ResultScanner; 16 | 17 | /** 18 | * 查询工具类 19 | * 20 | * @author Sun 21 | * @version QueryUtil.java 2013-1-7 下午1:44:13 22 | */ 23 | public class QueryUtil { 24 | public static List getRows(ResultScanner resultScanner) { 25 | List rows = new ArrayList(); 26 | // hbase中一个result就是一行数据(包含多列), 这个直接与row key挂钩 27 | // 例如hbase的test_table中有3条数据 28 | // row1 cf:a r1a 29 | // row2 cf:a r2a 30 | // row1 cf:b r1b 31 | // 那么无条件查询(Scan)出来的结果(ResultScanner)中会包含2个Result 32 | // 一个Result(row1)包含a, b这2个列 33 | // 另一个Result(row2)只有a列 34 | for (Result result : resultScanner) { 35 | rows.add(getRow(result)); 36 | } 37 | 38 | return rows; 39 | } 40 | 41 | public static List getRows(ResultScanner resultScanner, 42 | long offset, long limit) throws IOException { 43 | List rows = new ArrayList(); 44 | 45 | // 跳过需要offset的数据 46 | resultScanner.next((int) offset); 47 | 48 | if (limit > 0) { 49 | // 只取limit条数据 50 | Result[] limitResults = resultScanner.next((int) limit); 51 | for (Result result : limitResults) { 52 | rows.add(getRow(result)); 53 | } 54 | } else { 55 | // 获取跳过需要offset的数据以后所有的数据 56 | return getRows(resultScanner); 57 | } 58 | 59 | return rows; 60 | } 61 | 62 | private static DynaBean getRow(Result result) { 63 | DynaBean row = new LazyDynaBean(); 64 | for (KeyValue kv : result.raw()) { 65 | String columnName = new String(kv.getQualifier()); 66 | String columnValue = new String(kv.getValue()); 67 | row.set(columnName, columnValue); 68 | } 69 | // HbaseUtil.printResult(result); 70 | return row; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/sql/impl/HbaseQueryImpl.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.sql.impl; 6 | 7 | 8 | import java.io.IOException; 9 | import java.io.StringReader; 10 | import java.sql.SQLSyntaxErrorException; 11 | import java.util.List; 12 | 13 | import net.sf.jsqlparser.parser.CCJSqlParserManager; 14 | import net.sf.jsqlparser.statement.select.Select; 15 | 16 | import org.apache.commons.beanutils.DynaBean; 17 | import org.apache.hadoop.conf.Configuration; 18 | import org.apache.hadoop.hbase.HBaseConfiguration; 19 | import org.apache.hadoop.hbase.client.HTable; 20 | import org.apache.hadoop.hbase.client.ResultScanner; 21 | import org.apache.hadoop.hbase.client.Scan; 22 | import org.apache.hadoop.hbase.ext.sql.HbaseQuery; 23 | import org.apache.hadoop.hbase.ext.sql.QueryUtil; 24 | 25 | /** 26 | * 通过解析SQL语句的方式实现scan hbase的数据 27 | * 28 | * @author Sun 29 | * @version HbaseQueryImpl.java 2013-1-7 上午10:54:12 30 | * @see 利用hbase的coprocessor机制来在hbase上增加sql解析引擎–(一)原因&架构 31 | */ 32 | public class HbaseQueryImpl implements HbaseQuery { 33 | private Configuration conf = HBaseConfiguration.create(); 34 | 35 | @Override 36 | public List select(String sql) throws SQLSyntaxErrorException, IOException { 37 | return select(sql, null, null); 38 | } 39 | 40 | @Override 41 | public List select(String sql, String startRow, 42 | String stopRow) throws IOException, SQLSyntaxErrorException { 43 | SelectSqlVisitor sqlVisitor = parseSql(sql); 44 | 45 | HTable table = new HTable(this.conf, sqlVisitor.getTableName()); 46 | Scan scan = sqlVisitor.getScan(startRow, stopRow); 47 | // HBase 0.95-SNAPSHOT API 48 | // Scan.setMaxResultSize 49 | 50 | ResultScanner resultScanner = table.getScanner(scan); 51 | List rows = QueryUtil.getRows(resultScanner, 52 | sqlVisitor.getOffset(), sqlVisitor.getLimit()); 53 | resultScanner.close(); 54 | return rows; 55 | } 56 | 57 | private SelectSqlVisitor parseSql(String sql) throws SQLSyntaxErrorException { 58 | CCJSqlParserManager parserManager = new CCJSqlParserManager(); 59 | SelectSqlVisitor sqlFinder = null; 60 | try { 61 | Select select = (Select) parserManager.parse(new StringReader(sql)); 62 | sqlFinder = new SelectSqlVisitor(select); 63 | } catch (Exception e) { 64 | throw new SQLSyntaxErrorException(sql, e); 65 | } 66 | return sqlFinder; 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /hbase-sql/src/main/java/org/apache/hadoop/hbase/ext/sql/impl/SelectSqlVisitor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext.sql.impl; 6 | 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import net.sf.jsqlparser.expression.AllComparisonExpression; 11 | import net.sf.jsqlparser.expression.AnyComparisonExpression; 12 | import net.sf.jsqlparser.expression.CaseExpression; 13 | import net.sf.jsqlparser.expression.DateValue; 14 | import net.sf.jsqlparser.expression.DoubleValue; 15 | import net.sf.jsqlparser.expression.ExpressionVisitor; 16 | import net.sf.jsqlparser.expression.Function; 17 | import net.sf.jsqlparser.expression.InverseExpression; 18 | import net.sf.jsqlparser.expression.JdbcParameter; 19 | import net.sf.jsqlparser.expression.LongValue; 20 | import net.sf.jsqlparser.expression.NullValue; 21 | import net.sf.jsqlparser.expression.Parenthesis; 22 | import net.sf.jsqlparser.expression.StringValue; 23 | import net.sf.jsqlparser.expression.TimeValue; 24 | import net.sf.jsqlparser.expression.TimestampValue; 25 | import net.sf.jsqlparser.expression.WhenClause; 26 | import net.sf.jsqlparser.expression.operators.arithmetic.Addition; 27 | import net.sf.jsqlparser.expression.operators.arithmetic.BitwiseAnd; 28 | import net.sf.jsqlparser.expression.operators.arithmetic.BitwiseOr; 29 | import net.sf.jsqlparser.expression.operators.arithmetic.BitwiseXor; 30 | import net.sf.jsqlparser.expression.operators.arithmetic.Concat; 31 | import net.sf.jsqlparser.expression.operators.arithmetic.Division; 32 | import net.sf.jsqlparser.expression.operators.arithmetic.Multiplication; 33 | import net.sf.jsqlparser.expression.operators.arithmetic.Subtraction; 34 | import net.sf.jsqlparser.expression.operators.conditional.AndExpression; 35 | import net.sf.jsqlparser.expression.operators.conditional.OrExpression; 36 | import net.sf.jsqlparser.expression.operators.relational.Between; 37 | import net.sf.jsqlparser.expression.operators.relational.EqualsTo; 38 | import net.sf.jsqlparser.expression.operators.relational.ExistsExpression; 39 | import net.sf.jsqlparser.expression.operators.relational.GreaterThan; 40 | import net.sf.jsqlparser.expression.operators.relational.GreaterThanEquals; 41 | import net.sf.jsqlparser.expression.operators.relational.InExpression; 42 | import net.sf.jsqlparser.expression.operators.relational.IsNullExpression; 43 | import net.sf.jsqlparser.expression.operators.relational.LikeExpression; 44 | import net.sf.jsqlparser.expression.operators.relational.Matches; 45 | import net.sf.jsqlparser.expression.operators.relational.MinorThan; 46 | import net.sf.jsqlparser.expression.operators.relational.MinorThanEquals; 47 | import net.sf.jsqlparser.expression.operators.relational.NotEqualsTo; 48 | import net.sf.jsqlparser.schema.Column; 49 | import net.sf.jsqlparser.schema.Table; 50 | import net.sf.jsqlparser.statement.select.FromItemVisitor; 51 | import net.sf.jsqlparser.statement.select.Limit; 52 | import net.sf.jsqlparser.statement.select.PlainSelect; 53 | import net.sf.jsqlparser.statement.select.Select; 54 | import net.sf.jsqlparser.statement.select.SelectVisitor; 55 | import net.sf.jsqlparser.statement.select.SubJoin; 56 | import net.sf.jsqlparser.statement.select.SubSelect; 57 | import net.sf.jsqlparser.statement.select.Union; 58 | 59 | import org.apache.commons.lang.StringUtils; 60 | import org.apache.hadoop.hbase.client.Scan; 61 | import org.apache.hadoop.hbase.filter.BinaryComparator; 62 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 63 | import org.apache.hadoop.hbase.filter.FilterList; 64 | import org.apache.hadoop.hbase.filter.SingleColumnValueFilter; 65 | import org.apache.hadoop.hbase.util.Bytes; 66 | 67 | /** 68 | * 获取SQL select语句的各个部分 69 | * 70 | * @author Sun 71 | * @version SelectSqlVisitor.java 2013-1-8 上午9:19:50 72 | */ 73 | public class SelectSqlVisitor implements SelectVisitor, FromItemVisitor, 74 | ExpressionVisitor { 75 | private List tables = new ArrayList(); 76 | private long limit = 0L; 77 | private long offset = 0L; 78 | 79 | private Scan scan = new Scan(); 80 | // TODO 目前将全部的where条件都用作and逻辑 81 | private FilterList filters = new FilterList(); 82 | 83 | public SelectSqlVisitor(Select select) { 84 | // 访问select语句 85 | select.getSelectBody().accept(this); 86 | } 87 | 88 | public String getTableName() { 89 | // XXX 由于针对hbase都是简单查询, 只会有一张表 90 | return this.tables.get(0); 91 | } 92 | 93 | public Scan getScan(String startRow, String stopRow) { 94 | setScanRange(startRow, stopRow); 95 | this.scan.setFilter(this.filters); 96 | return this.scan; 97 | } 98 | 99 | private void setScanRange(String startRow, String stopRow) { 100 | if (StringUtils.isNotBlank(startRow)) { 101 | this.scan.setStartRow(Bytes.toBytes(startRow)); 102 | } 103 | if (StringUtils.isNotBlank(stopRow)) { 104 | this.scan.setStopRow(Bytes.toBytes(stopRow)); 105 | } 106 | } 107 | 108 | public long getLimit() { 109 | return this.limit; 110 | } 111 | 112 | public long getOffset() { 113 | return this.offset; 114 | } 115 | 116 | private void setColumn(List selectItems) { 117 | for (Object item : selectItems) { 118 | if (item.toString().equals("*")) { 119 | break; 120 | } 121 | // TODO 写死的cf, 没有处理字段别名等等情况 122 | this.scan.addColumn(Bytes.toBytes("cf"), 123 | Bytes.toBytes(item.toString())); 124 | } 125 | } 126 | 127 | private void initLimitOffset(PlainSelect plainSelect) { 128 | Limit limit = plainSelect.getLimit(); 129 | if (limit == null) { 130 | return; 131 | } 132 | 133 | this.limit = limit.getRowCount(); 134 | this.offset = limit.getOffset(); 135 | } 136 | 137 | @Override 138 | public void visit(PlainSelect plainSelect) { 139 | List selectItems = plainSelect.getSelectItems(); 140 | setColumn(selectItems); 141 | initLimitOffset(plainSelect); 142 | 143 | plainSelect.getFromItem().accept(this); 144 | if (plainSelect.getWhere() != null) { 145 | plainSelect.getWhere().accept(this); 146 | } 147 | } 148 | 149 | @Override 150 | public void visit(Table table) { 151 | String tableWholeName = table.getWholeTableName(); 152 | this.tables.add(tableWholeName); 153 | } 154 | 155 | @Override 156 | public void visit(AndExpression and) { 157 | and.getLeftExpression().accept(this); 158 | and.getRightExpression().accept(this); 159 | } 160 | 161 | @Override 162 | public void visit(EqualsTo equalsTo) { 163 | // XXX 目前假设到EqualsTo这层已经是a = 2这样的表达式了 164 | String column = equalsTo.getLeftExpression().toString(); 165 | String value = equalsTo.getRightExpression().toString(); 166 | 167 | // TODO 写死的cf 168 | SingleColumnValueFilter filter = new SingleColumnValueFilter( 169 | Bytes.toBytes("cf"), Bytes.toBytes(column), 170 | CompareOp.EQUAL, new BinaryComparator(Bytes.toBytes(value))); 171 | this.filters.addFilter(filter); 172 | } 173 | 174 | @Override 175 | public void visit(Parenthesis arg0) { 176 | // TODO 未实现 177 | } 178 | 179 | @Override 180 | public void visit(OrExpression arg0) { 181 | // TODO 未实现 182 | } 183 | 184 | @Override 185 | public void visit(NotEqualsTo arg0) { 186 | // TODO 未实现 187 | } 188 | 189 | @Override 190 | public void visit(Column column) { 191 | // TODO 未实现 192 | } 193 | 194 | public void visit(GreaterThan arg0) {} 195 | public void visit(GreaterThanEquals arg0) {} 196 | public void visit(MinorThan arg0) {} 197 | public void visit(MinorThanEquals arg0) {} 198 | public void visit(InExpression arg0) {} 199 | public void visit(LikeExpression arg0) {} 200 | public void visit(DoubleValue arg0) {} 201 | public void visit(LongValue arg0) {} 202 | public void visit(DateValue arg0) {} 203 | public void visit(TimeValue arg0) {} 204 | public void visit(TimestampValue arg0) {} 205 | public void visit(StringValue arg0) {} 206 | public void visit(Addition arg0) {} 207 | public void visit(Division arg0) {} 208 | public void visit(Multiplication arg0) {} 209 | public void visit(Subtraction arg0) {} 210 | public void visit(Between arg0) {} 211 | public void visit(WhenClause whenClause) {} 212 | public void visit(NullValue arg0) {} 213 | public void visit(Function arg0) {} 214 | public void visit(InverseExpression arg0) {} 215 | public void visit(JdbcParameter arg0) {} 216 | public void visit(IsNullExpression arg0) {} 217 | public void visit(CaseExpression arg0) {} 218 | public void visit(ExistsExpression arg0) {} 219 | public void visit(AllComparisonExpression arg0) {} 220 | public void visit(AnyComparisonExpression arg0) {} 221 | public void visit(Union arg0) {} 222 | public void visit(SubJoin arg0) {} 223 | public void visit(SubSelect arg0) {} 224 | public void visit(BitwiseXor arg0) {} 225 | public void visit(BitwiseOr arg0) {} 226 | public void visit(BitwiseAnd arg0) {} 227 | public void visit(Matches arg0) {} 228 | public void visit(Concat arg0) {} 229 | } 230 | -------------------------------------------------------------------------------- /hbase-sql/src/test/java/org/apache/hadoop/hbase/ext/HbaseSqlTestSuite.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 3 | */ 4 | 5 | package org.apache.hadoop.hbase.ext; 6 | 7 | import org.apache.hadoop.hbase.ext.loader.impl.OracleDataLoaderTest; 8 | import org.apache.hadoop.hbase.ext.sql.impl.HbaseQueryImplTest; 9 | import org.junit.runner.RunWith; 10 | import org.junit.runners.Suite; 11 | import org.junit.runners.Suite.SuiteClasses; 12 | 13 | /** 14 | * 15 | * @author Sun 16 | * @version HbaseSqlTestSuite.java 2013-1-9 下午4:21:53 17 | */ 18 | @RunWith(Suite.class) 19 | @SuiteClasses({ OracleDataLoaderTest.class, HbaseQueryImplTest.class }) 20 | public class HbaseSqlTestSuite { 21 | } 22 | -------------------------------------------------------------------------------- /hbase-sql/src/test/java/org/apache/hadoop/hbase/ext/loader/impl/OracleDataLoaderTest.java: -------------------------------------------------------------------------------- 1 | package org.apache.hadoop.hbase.ext.loader.impl; 2 | 3 | import org.apache.hadoop.hbase.ext.loader.RelationalDatabaseLoader; 4 | import org.junit.Test; 5 | 6 | public class OracleDataLoaderTest { 7 | RelationalDatabaseLoader loader = new OracleDataLoader(); 8 | 9 | @Test 10 | public void testLoadTable() { 11 | String tableName = "report1"; 12 | this.loader.loadTable(tableName, new String[] { "TIME_ID", "AREA_ID", 13 | "SVC_BRND_ID" }); 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /hbase-sql/src/test/java/org/apache/hadoop/hbase/ext/sql/impl/HbaseQueryImplTest.java: -------------------------------------------------------------------------------- 1 | package org.apache.hadoop.hbase.ext.sql.impl; 2 | 3 | import java.io.IOException; 4 | import java.sql.SQLSyntaxErrorException; 5 | import java.util.List; 6 | 7 | import org.apache.commons.beanutils.DynaBean; 8 | import org.apache.commons.beanutils.DynaProperty; 9 | import org.apache.hadoop.hbase.ext.sql.HbaseQuery; 10 | import org.junit.Assert; 11 | import org.junit.Test; 12 | 13 | public class HbaseQueryImplTest { 14 | HbaseQuery hbaseQuery = new HbaseQueryImpl(); 15 | 16 | @Test 17 | public void testSelectAsterisk() throws SQLSyntaxErrorException, IOException { 18 | String sql = "SELECT * FROM report1"; 19 | HbaseQuery hbaseQuery = new HbaseQueryImpl(); 20 | List rows = hbaseQuery.select(sql); 21 | printBean(rows); 22 | Assert.assertEquals(13, rows.size()); 23 | } 24 | 25 | @Test 26 | public void testWhere() throws SQLSyntaxErrorException, IOException { 27 | String sql = "SELECT * FROM report1 WHERE TIME_ID = 201206 and AREA_ID = 730"; 28 | HbaseQuery hbaseQuery = new HbaseQueryImpl(); 29 | List rows = hbaseQuery.select(sql); 30 | printBean(rows); 31 | Assert.assertEquals(1, rows.size()); 32 | } 33 | 34 | @Test 35 | public void testLimit() throws SQLSyntaxErrorException, IOException { 36 | String sql = "SELECT TIME_ID, AREA_NAME FROM report1 limit 3 offset 2"; 37 | HbaseQuery hbaseQuery = new HbaseQueryImpl(); 38 | List rows = hbaseQuery.select(sql); 39 | printBean(rows); 40 | Assert.assertEquals(3, rows.size()); 41 | } 42 | 43 | private static void printBean(List beans) { 44 | DynaProperty[] properties = beans.get(0).getDynaClass() 45 | .getDynaProperties(); 46 | StringBuilder str = new StringBuilder(); 47 | for (DynaProperty property : properties) { 48 | str.append(property.getName()).append("\t"); 49 | } 50 | str.append("\n----------------------------------\n"); 51 | 52 | for (DynaBean bean : beans) { 53 | for (DynaProperty property : properties) { 54 | str.append(bean.get(property.getName())).append("\t"); 55 | } 56 | str.append("\n"); 57 | } 58 | System.out.print(str); 59 | System.out.println("----------------------------------"); 60 | System.out.println(beans.size()); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /hbase-sql/src/test/resources/hbase-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 24 | 25 | 26 | 28 | hbase.rootdir 29 | 35 | hdfs://192.168.31.126:9000/hbase 36 | 37 | 38 | hbase.cluster.distributed 39 | true 40 | 41 | 42 | hbase.zookeeper.quorum 43 | 192.168.31.126 44 | 45 | 46 | --------------------------------------------------------------------------------