├── README.md
├── disct
├── chain.conf
├── sql.jar
├── student.txt
└── teacher.txt
├── doc
└── img
│ ├── comm.png
│ ├── sql1.png
│ ├── sql2.png
│ ├── sql3.png
│ ├── sql4.png
│ ├── sql5.png
│ └── sql6.png
└── src
└── sql-egine
├── .classpath
├── .project
├── .settings
├── org.eclipse.core.resources.prefs
├── org.eclipse.jdt.core.prefs
└── org.eclipse.m2e.core.prefs
├── pom.xml
├── src
├── main
│ ├── java
│ │ ├── com
│ │ │ ├── conf
│ │ │ │ ├── SqlChain.java
│ │ │ │ └── SqlConf.java
│ │ │ ├── engine
│ │ │ │ ├── MrEngine.java
│ │ │ │ └── SqlEngine.java
│ │ │ ├── file
│ │ │ │ ├── FileTable.java
│ │ │ │ ├── HDFSTable.java
│ │ │ │ └── Table.java
│ │ │ ├── hdfs
│ │ │ │ └── HDFSHelper.java
│ │ │ ├── mr
│ │ │ │ ├── SortMapper.java
│ │ │ │ ├── SortReducer.java
│ │ │ │ ├── SqlCombiner.java
│ │ │ │ ├── SqlMapper.java
│ │ │ │ └── SqlReducer.java
│ │ │ └── sql
│ │ │ │ ├── SqlExeEngine.java
│ │ │ │ └── SqlParse.java
│ │ └── org
│ │ │ └── apache
│ │ │ └── hadoop
│ │ │ └── io
│ │ │ └── nativeio
│ │ │ ├── NativeIO.java
│ │ │ └── NativeIO.java.bak
│ └── resources
│ │ ├── chain.conf
│ │ ├── log4j.properties
│ │ ├── sql.conf
│ │ ├── student.txt
│ │ └── teacher.txt
└── test
│ ├── java
│ └── com
│ │ └── file
│ │ ├── FileTableTest.java
│ │ └── FileTableTest.java.bak
│ └── resources
│ ├── log4j.properties
│ ├── student.txt
│ ├── student.txt.bak
│ ├── teacher.txt
│ └── teacher.txt.bak
└── target
├── classes
├── chain.conf
├── com
│ ├── conf
│ │ ├── SqlChain.class
│ │ └── SqlConf.class
│ ├── engine
│ │ ├── MrEngine.class
│ │ └── SqlEngine.class
│ ├── file
│ │ ├── FileTable.class
│ │ ├── HDFSTable.class
│ │ └── Table.class
│ ├── hdfs
│ │ └── HDFSHelper.class
│ ├── mr
│ │ ├── SortMapper.class
│ │ ├── SortReducer.class
│ │ ├── SqlCombiner.class
│ │ ├── SqlMapper.class
│ │ └── SqlReducer.class
│ └── sql
│ │ ├── SqlExeEngine$1.class
│ │ ├── SqlExeEngine$1On.class
│ │ ├── SqlExeEngine.class
│ │ └── SqlParse.class
├── log4j.properties
├── org
│ └── apache
│ │ └── hadoop
│ │ └── io
│ │ └── nativeio
│ │ ├── NativeIO$CachedUid.class
│ │ ├── NativeIO$POSIX$CacheManipulator.class
│ │ ├── NativeIO$POSIX$CachedName.class
│ │ ├── NativeIO$POSIX$IdCache.class
│ │ ├── NativeIO$POSIX$NoMlockCacheManipulator.class
│ │ ├── NativeIO$POSIX$Stat.class
│ │ ├── NativeIO$POSIX.class
│ │ ├── NativeIO$Windows$AccessRight.class
│ │ ├── NativeIO$Windows.class
│ │ ├── NativeIO.class
│ │ └── NativeIO.java.bak
├── sql.conf
├── student.txt
└── teacher.txt
└── test-classes
├── com
└── file
│ ├── FileTableTest.class
│ └── FileTableTest.java.bak
├── log4j.properties
├── student.txt
├── student.txt.bak
├── teacher.txt
└── teacher.txt.bak
/README.md:
--------------------------------------------------------------------------------
1 | # SQL Engine
2 |
3 |
4 |
5 | ---
6 |
7 | # 一、简介
8 | 闲来无事简单是写了写,如何在HDFS上执行SQL,不像Hive不需要MySQL支持,只需要简单配置一下需要执行的SQL和日志的格式,同时支持对日志进行正则过滤和提取必要字段。支持JSON格式的日志。
9 |
10 | # 二、配置需要执行的SQL和日志格式
11 | ```shell
12 | ##############
13 | # HDFS
14 | ##############
15 | log.hdfs=hdfs://192.168.1.32:9000
16 |
17 | ##############
18 | # SQL
19 | ##############
20 |
21 | # 简单查询
22 | log.sql1=create /sql/out1 as select * from s order by id desc
23 |
24 | # 简单查询
25 | log.sql2=create /sql/out2 as select id,name,grade from s where id>10 order by grade desc limit 0,10
26 |
27 | # 查询最高的成绩
28 | log.sql3=create /sql/out3 as select max(grade) as grade from s
29 |
30 | # 表连接
31 | log.sql4=create /sql/out4 as select s.id,s.name,s.grade,t.id,t.name from s join t on s.tid=t.id limit 0,10
32 |
33 | # 分组查询
34 | log.sql5=create /sql/out5 as select s.tid,count(s.id) as s.count from s group by s.tid
35 |
36 | # 表连接分组查询
37 | log.sql6=create /sql/out6 as select t.name,count(t.id) as t.count from s join t on s.tid=t.id group by t.id,t.name order by t.count desc limit 0,5
38 |
39 | # log chain
40 | log.chain=sql1,sql2,sql3,sql4,sql5,sql6
41 |
42 | ##############
43 | # VAR
44 | ##############
45 | # log table
46 | log.table.t=/sql/teacher.txt:id|name:#split:#filter
47 | log.table.s=/sql/student.txt:id|name|grade|tid:#split:#filter
48 |
49 | # split
50 | log.split=|
51 |
52 | # log filter
53 | log.filter=(^[^#].*)
54 | ```
55 |
56 | # 三、执行
57 |
58 | ```shell
59 | # 执行命令
60 | hadoop jar sql.jar com.engine.MrEngine /log/chain.conf
61 | ```
62 | ![comm][1]
63 | ```shell
64 | # 简单查询
65 | select * from s order by id desc
66 | ```
67 | ![sql1][2]
68 | ```shell
69 | # 简单查询
70 | select id,name,grade from s where id>10 order by grade desc limit 0,10
71 | ```
72 | ![sql2][3]
73 | ```shell
74 | # 查询最高的成绩
75 | select max(grade) as grade from s
76 | ```
77 | ![sql3][4]
78 | ```shell
79 | # 表连接
80 | select s.id,s.name,s.grade,t.id,t.name from s join t on s.tid=t.id limit 0,10
81 | ```
82 | ![sql4][5]
83 | ```shell
84 | # 分组查询
85 | select s.tid,count(s.id) as s.count from s group by s.tid
86 | ```
87 | ![sql5][6]
88 | ```shell
89 | # 表连接分组查询
90 | select t.name,count(t.id) as t.count from s join t on s.tid=t.id group by t.id,t.name order by t.count desc limit 0,5
91 | ```
92 | ![sql6][7]
93 |
94 | [1]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/comm.png
95 | [2]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/sql1.png
96 | [3]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/sql2.png
97 | [4]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/sql3.png
98 | [5]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/sql4.png
99 | [6]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/sql5.png
100 | [7]: https://raw.githubusercontent.com/mircode/sql-engine/master/doc/img/sql6.png
101 |
--------------------------------------------------------------------------------
/disct/chain.conf:
--------------------------------------------------------------------------------
1 | ##############
2 | # HDFS
3 | ##############
4 | log.hdfs=hdfs://192.168.1.32:9000
5 |
6 | ##############
7 | # SQL
8 | ##############
9 |
10 | # 简单查询
11 | log.sql1=create /sql/out1 as select * from s order by id desc
12 |
13 | # 简单查询
14 | log.sql2=create /sql/out2 as select id,name,grade from s where id>10 order by grade desc limit 0,10
15 |
16 | # 查询最高的成绩
17 | log.sql3=create /sql/out3 as select max(grade) as grade from s
18 |
19 | # 表连接
20 | log.sql4=create /sql/out4 as select s.id,s.name,s.grade,t.id,t.name from s join t on s.tid=t.id limit 0,10
21 |
22 | # 分组查询
23 | log.sql5=create /sql/out5 as select s.tid,count(s.id) as s.count from s group by s.tid
24 |
25 | # 表连接分组查询
26 | log.sql6=create /sql/out6 as select t.name,count(t.id) as t.count from s join t on s.tid=t.id group by t.id,t.name order by t.count desc limit 0,5
27 |
28 | # log chain
29 | log.chain=sql1,sql2,sql3,sql4,sql5,sql6
30 |
31 | ##############
32 | # VAR
33 | ##############
34 | # log table
35 | log.table.t=/sql/teacher.txt:id|name:#split:#filter
36 | log.table.s=/sql/student.txt:id|name|grade|tid:#split:#filter
37 |
38 | # split
39 | log.split=|
40 |
41 | # log filter
42 | log.filter=(^[^#].*)
--------------------------------------------------------------------------------
/disct/sql.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/disct/sql.jar
--------------------------------------------------------------------------------
/disct/student.txt:
--------------------------------------------------------------------------------
1 | #id|name|grade|tid
2 | 0|student0|90|2
3 | 1|student1|54|9
4 | 2|student2|34|2
5 | 3|student3|21|6
6 | 4|student4|12|0
7 | 5|student5|43|3
8 | 6|student6|87|3
9 | 7|student7|3|1
10 | 8|student8|58|8
11 | 9|student9|41|9
12 | 10|student10|47|7
13 | 11|student11|35|8
14 | 12|student12|42|7
15 | 13|student13|17|4
16 | 14|student14|72|4
17 | 15|student15|15|0
18 | 16|student16|5|6
19 | 17|student17|84|6
20 | 18|student18|78|1
21 | 19|student19|49|0
22 | 20|student20|83|1
23 | 21|student21|67|7
24 | 22|student22|94|0
25 | 23|student23|47|4
26 | 24|student24|11|8
27 | 25|student25|2|4
28 | 26|student26|99|3
29 | 27|student27|43|9
30 | 28|student28|96|7
31 | 29|student29|89|3
32 | 30|student30|85|9
33 | 31|student31|84|9
34 | 32|student32|18|4
35 | 33|student33|0|4
36 | 34|student34|46|3
37 | 35|student35|45|5
38 | 36|student36|34|5
39 | 37|student37|91|4
40 | 38|student38|4|8
41 | 39|student39|94|8
42 | 40|student40|13|8
43 | 41|student41|6|9
44 | 42|student42|93|7
45 | 43|student43|15|4
46 | 44|student44|61|5
47 | 45|student45|25|1
48 | 46|student46|25|8
49 | 47|student47|8|5
50 | 48|student48|32|0
51 | 49|student49|69|0
52 | 50|student50|48|9
53 | 51|student51|53|5
54 | 52|student52|92|4
55 | 53|student53|78|9
56 | 54|student54|83|6
57 | 55|student55|33|8
58 | 56|student56|32|5
59 | 57|student57|30|7
60 | 58|student58|60|7
61 | 59|student59|43|0
62 | 60|student60|50|6
63 | 61|student61|55|6
64 | 62|student62|57|6
65 | 63|student63|84|0
66 | 64|student64|15|9
67 | 65|student65|64|0
68 | 66|student66|77|9
69 | 67|student67|12|3
70 | 68|student68|47|1
71 | 69|student69|44|4
72 | 70|student70|6|5
73 | 71|student71|21|3
74 | 72|student72|7|7
75 | 73|student73|1|0
76 | 74|student74|8|0
77 | 75|student75|77|0
78 | 76|student76|64|2
79 | 77|student77|24|8
80 | 78|student78|77|8
81 | 79|student79|44|6
82 | 80|student80|2|8
83 | 81|student81|31|6
84 | 82|student82|32|6
85 | 83|student83|81|7
86 | 84|student84|0|7
87 | 85|student85|39|4
88 | 86|student86|27|3
89 | 87|student87|64|0
90 | 88|student88|76|3
91 | 89|student89|41|6
92 | 90|student90|32|5
93 | 91|student91|32|4
94 | 92|student92|56|8
95 | 93|student93|69|4
96 | 94|student94|61|3
97 | 95|student95|86|1
98 | 96|student96|82|3
99 | 97|student97|73|5
100 | 98|student98|98|2
101 | 99|student99|7|1
--------------------------------------------------------------------------------
/disct/teacher.txt:
--------------------------------------------------------------------------------
1 | #id|name
2 | 0|teacher0
3 | 1|teacher1
4 | 2|teacher2
5 | 3|teacher3
6 | 4|teacher4
7 | 5|teacher5
8 | 6|teacher6
9 | 7|teacher7
10 | 8|teacher8
11 | 9|teacher9
--------------------------------------------------------------------------------
/doc/img/comm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/comm.png
--------------------------------------------------------------------------------
/doc/img/sql1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/sql1.png
--------------------------------------------------------------------------------
/doc/img/sql2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/sql2.png
--------------------------------------------------------------------------------
/doc/img/sql3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/sql3.png
--------------------------------------------------------------------------------
/doc/img/sql4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/sql4.png
--------------------------------------------------------------------------------
/doc/img/sql5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/sql5.png
--------------------------------------------------------------------------------
/doc/img/sql6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mircode/sql-engine/344939f926467a1df31b4852995ce87d4df23280/doc/img/sql6.png
--------------------------------------------------------------------------------
/src/sql-egine/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
--------------------------------------------------------------------------------
/src/sql-egine/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | sql-egine
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 | org.eclipse.m2e.core.maven2Builder
15 |
16 |
17 |
18 |
19 |
20 | org.eclipse.jdt.core.javanature
21 | org.eclipse.m2e.core.maven2Nature
22 |
23 |
24 |
--------------------------------------------------------------------------------
/src/sql-egine/.settings/org.eclipse.core.resources.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | encoding/=UTF-8
3 |
--------------------------------------------------------------------------------
/src/sql-egine/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.methodParameters=do not generate
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.8
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
13 | org.eclipse.jdt.core.compiler.source=1.8
14 |
--------------------------------------------------------------------------------
/src/sql-egine/.settings/org.eclipse.m2e.core.prefs:
--------------------------------------------------------------------------------
1 | activeProfiles=
2 | eclipse.preferences.version=1
3 | resolveWorkspaceProjects=true
4 | version=1
5 |
--------------------------------------------------------------------------------
/src/sql-egine/pom.xml:
--------------------------------------------------------------------------------
1 |
2 | 4.0.0
3 | sql-egine
4 | sql-egine
5 | 0.0.1-SNAPSHOT
6 |
7 | 2.7.1
8 | 2.3.1
9 |
10 |
11 |
12 | jdk.tools
13 | jdk.tools
14 | 1.8
15 | system
16 | ${JAVA_HOME}/lib/tools.jar
17 |
18 |
19 | org.apache.hadoop
20 | hadoop-common
21 | ${hadoop.version}
22 |
23 |
24 | org.apache.hadoop
25 | hadoop-hdfs
26 | ${hadoop.version}
27 |
28 |
29 | org.apache.hadoop
30 | hadoop-mapreduce-client-core
31 | ${hadoop.version}
32 |
33 |
34 | org.apache.hadoop
35 | hadoop-mapreduce-client-jobclient
36 | ${hadoop.version}
37 |
38 |
39 | org.apache.hadoop
40 | hadoop-mapreduce-client-common
41 | ${hadoop.version}
42 |
43 |
44 |
45 |
--------------------------------------------------------------------------------
/src/sql-egine/src/main/java/com/conf/SqlChain.java:
--------------------------------------------------------------------------------
1 | package com.conf;
2 |
3 | import java.io.FileInputStream;
4 | import java.net.URLDecoder;
5 | import java.util.ArrayList;
6 | import java.util.HashMap;
7 | import java.util.Iterator;
8 | import java.util.List;
9 | import java.util.Map;
10 | import java.util.Properties;
11 |
12 | import com.file.Table;
13 | import com.sql.SqlParse;
14 |
15 | public class SqlChain {
16 |
17 | // 配置前缀
18 | public static final String LOG_PREFIX = "log";
19 | // 变量前缀
20 | public static final String LOG_VAR_PREFIX = "#";
21 | // HDFS路径
22 | public static final String LOG_HDFS = "log.hdfs";
23 | // SQL执行链
24 | public static final String LOG_CHAIN = "log.chain";
25 |
26 | public static List getChain(String path) {
27 |
28 | if(path.startsWith("classpath:")){
29 | path=getResource(path);
30 | }
31 | Properties prop = new Properties();
32 |
33 | // 读取配置文件
34 | try {
35 | FileInputStream in = new FileInputStream(path);
36 | prop.load(in);
37 | in.close();
38 | } catch (Exception e) {
39 | e.printStackTrace();
40 | }
41 | // 解析基本配置
42 | String hdfs = prop.getProperty(SqlChain.LOG_HDFS);
43 | String chain = prop.getProperty(SqlChain.LOG_CHAIN,
44 | SqlChain.LOG_VAR_PREFIX + "sql");
45 |
46 | // 保存配置信息
47 | List confs = new ArrayList();
48 |
49 | // 解析变量
50 | for (String sql : chain.split(",")) {
51 |
52 | if(!sql.startsWith("#")) sql="#"+sql;
53 | sql=SqlChain.get(prop, sql);
54 |
55 | // 解析SQL
56 | SqlParse sqlParse = new SqlParse(sql);
57 |
58 | // 解析SQL的表结构
59 | Map tables = new HashMap();
60 |
61 | // 解析SQL的输出路径
62 | String output = sqlParse.get(SqlParse.OUTPUT);
63 | output = SqlChain.get(prop, output);
64 |
65 | // 解析SQL的输入路径
66 | String inputs = sqlParse.get(SqlParse.INPUT);
67 | // input:name或name
68 | for (String input : inputs.split(",")) {
69 |
70 | String name = null;
71 | // 解析出表名
72 | if (input.contains(":")) {
73 | name = input.split(":")[1];
74 | input = input.replace(":", ".");
75 | } else {
76 | name = input;
77 | input = "*." + input;
78 | }
79 | // 解析输入
80 | String value = SqlChain.get(prop, input);
81 | String[] splits = value.split(":");
82 |
83 | String in = null;
84 | if (splits.length >= 1) {
85 | in = SqlChain.get(prop, splits[0]);
86 | }
87 | String format = null;
88 | if (splits.length >= 2) {
89 | format = SqlChain.get(prop, splits[1]);
90 | }
91 | String split = null;
92 | if (splits.length >= 3) {
93 | split = SqlChain.get(prop, splits[2]);
94 | }
95 | String filter = null;
96 | if (splits.length >= 4) {
97 | filter = SqlChain.get(prop, splits[3]);
98 | }
99 | tables.put(name, new Table(name, in, split, format, filter));
100 | }
101 |
102 | // 创建Conf对象
103 | SqlConf conf = new SqlConf(hdfs, sql, output, tables);
104 | confs.add(conf);
105 |
106 | }
107 |
108 | return confs;
109 |
110 | }
111 |
112 | /**
113 | * 获取变量的值,如果key不是变量,则直接返回key
114 | *
115 | * @param prop
116 | * @param key
117 | * #input.t或*.t
118 | * @return
119 | */
120 | private static String get(Properties prop, String key) {
121 | if (key.startsWith(LOG_VAR_PREFIX)) {
122 | return prop.getProperty(
123 | LOG_PREFIX + "." + key.substring(1), key);
124 | } else if (key.startsWith("*")) {
125 |
126 | String value = null;
127 | String regex = key.replace(".", "\\.").replace("*", ".*");
128 | regex = LOG_PREFIX + "\\." + regex;
129 | Iterator> it = prop.entrySet().iterator();
130 | while (it.hasNext()) {
131 | Map.Entry