originOrders) throws Exception {
52 |
53 | }
54 | }
55 | ```
56 |
57 | ## 使用函数
58 |
59 | ```sql
60 |
61 | !set spark.sql.externalUdfClasses = com.wankun.udfs.recommend.AttributionOrdersUDF;
62 |
63 | SELECT attribution_orders(ab_target, prior_spu_id, spu_id, orders) as orders
64 | FROM trade;
65 | ```
--------------------------------------------------------------------------------
/docs/images/architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/architecture.png
--------------------------------------------------------------------------------
/docs/images/dq2_bollinger_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/dq2_bollinger_model.png
--------------------------------------------------------------------------------
/docs/images/dq2_ewma_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/dq2_ewma_model.png
--------------------------------------------------------------------------------
/docs/images/dq2_row_number.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/dq2_row_number.png
--------------------------------------------------------------------------------
/docs/images/dq_bollinger_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/dq_bollinger_model.png
--------------------------------------------------------------------------------
/docs/images/dq_ewma_model.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/dq_ewma_model.png
--------------------------------------------------------------------------------
/docs/images/dq_row_number.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/wankunde/sql-runner/57998a059d610b358b96988ab9ca43b96bf31d06/docs/images/dq_row_number.png
--------------------------------------------------------------------------------
/src/main/java/one/profiler/AsyncProfilerMXBean.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 Andrei Pangin
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package one.profiler;
18 |
19 | /**
20 | * AsyncProfiler interface for JMX server.
21 | * How to register AsyncProfiler MBean:
22 | *
23 | * {@code
24 | * ManagementFactory.getPlatformMBeanServer().registerMBean(
25 | * AsyncProfiler.getInstance(),
26 | * new ObjectName("one.profiler:type=AsyncProfiler")
27 | * );
28 | * }
29 | */
30 | public interface AsyncProfilerMXBean {
31 | void start(String event, long interval) throws IllegalStateException;
32 | void resume(String event, long interval) throws IllegalStateException;
33 | void stop() throws IllegalStateException;
34 |
35 | long getSamples();
36 | String getVersion();
37 |
38 | String execute(String command) throws IllegalArgumentException, java.io.IOException;
39 |
40 | String dumpCollapsed(Counter counter);
41 | String dumpTraces(int maxTraces);
42 | String dumpFlat(int maxMethods);
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/one/profiler/Counter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 Andrei Pangin
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package one.profiler;
18 |
19 | /**
20 | * Which metrics to use when generating profile in collapsed stack traces format.
21 | */
22 | public enum Counter {
23 | SAMPLES,
24 | TOTAL
25 | }
26 |
--------------------------------------------------------------------------------
/src/main/java/one/profiler/Events.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2018 Andrei Pangin
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | package one.profiler;
18 |
19 | /**
20 | * Predefined event names to use in {@link AsyncProfiler#start(String, long)}
21 | */
22 | public class Events {
23 | public static final String CPU = "cpu";
24 | public static final String ALLOC = "alloc";
25 | public static final String LOCK = "lock";
26 | public static final String WALL = "wall";
27 | public static final String ITIMER = "itimer";
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Licensed to the Apache Software Foundation (ASF) under one or more
3 | # contributor license agreements. See the NOTICE file distributed with
4 | # this work for additional information regarding copyright ownership.
5 | # The ASF licenses this file to You under the Apache License, Version 2.0
6 | # (the "License"); you may not use this file except in compliance with
7 | # the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | #
17 |
18 | # Set everything to be logged to the file core/target/unit-tests.log
19 | insight.root.logger=INFO,CA
20 | insight.file.stdout=/tmp/stdout
21 | log4j.rootLogger=${insight.root.logger}
22 |
23 | #Console Appender
24 | log4j.appender.CA=org.apache.log4j.ConsoleAppender
25 | log4j.appender.CA.layout=org.apache.log4j.PatternLayout
26 | log4j.appender.CA.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %p %c: %m%n
27 | log4j.appender.CA.Threshold = TRACE
28 | log4j.appender.CA.follow = true
29 |
30 | #File Appender
31 | log4j.appender.FA=org.apache.log4j.FileAppender
32 | log4j.appender.FA.append=false
33 | log4j.appender.FA.file=${insight.file.stdout}
34 | log4j.appender.FA.layout=org.apache.log4j.PatternLayout
35 | log4j.appender.FA.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss,SSS} %p %c: %m%n
36 |
37 | # Set the logger level of File Appender to WARN
38 | log4j.appender.FA.Threshold = TRACE
39 |
40 | # Some packages are noisy for no good reason.
41 | log4j.additivity.parquet.hadoop.ParquetRecordReader=false
42 | log4j.logger.parquet.hadoop.ParquetRecordReader=OFF
43 |
44 | log4j.additivity.parquet.hadoop.ParquetOutputCommitter=false
45 | log4j.logger.parquet.hadoop.ParquetOutputCommitter=OFF
46 |
47 | log4j.additivity.org.apache.hadoop.hive.serde2.lazy.LazyStruct=false
48 | log4j.logger.org.apache.hadoop.hive.serde2.lazy.LazyStruct=OFF
49 |
50 | log4j.additivity.org.apache.hadoop.hive.metastore.RetryingHMSHandler=false
51 | log4j.logger.org.apache.hadoop.hive.metastore.RetryingHMSHandler=OFF
52 |
53 | log4j.additivity.hive.ql.metadata.Hive=false
54 | log4j.logger.hive.ql.metadata.Hive=OFF
55 |
56 | # Parquet related logging
57 | log4j.logger.parquet.hadoop=WARN
58 | log4j.logger.org.apache.spark.sql.parquet=WARN
59 |
60 | log4j.logger.org.spark_project.jetty=ERROR
61 | log4j.logger.org.apache.spark=WARN
62 | log4j.logger.org.apache.spark.deploy.yarn=INFO
63 | log4j.logger.org.apache.hadoop.hive.ql=INFO
64 | log4j.logger.org.apache.hadoop.hive.metastore=WARN
65 | log4j.logger.org.apache.hadoop.hive.ql.log.PerfLogger=WARN
66 | log4j.logger.org.apache.hadoop.mapreduce.lib=INFO
67 | log4j.logger.org.apache.spark.sql=INFO
68 |
69 | log4j.logger.BlockManagerMasterEndpoint=ERROR
70 |
71 | log4j.logger.org.apache.spark.sql.execution.datasources.FileSourceStrategy=WARN
72 |
73 | # to enable RuleExecutor log in Spark2
74 | #log4j.logger.org.apache.spark.sql.hive=TRACE
75 | #log4j.logger.org.apache.spark.sql.hive.client=INFO
76 | #log4j.logger.org.apache.spark.sql.hive.HiveMetastoreCatalog=DEBUG
77 | #log4j.logger.org.apache.spark.sql.execution.FileSourceScanExec=DEBUG
78 |
79 | # to enable RuleExecutor log in Spark3, set this configuration in spark_default.xml
80 | #spark.sql.optimizer.planChangeLog.level=INFO
81 |
--------------------------------------------------------------------------------
/src/main/resources/metrics.properties_template:
--------------------------------------------------------------------------------
1 | # USING : --files metrics.properties
2 | *.sink.graphite.class=org.apache.spark.metrics.sink.GraphiteSink
3 | *.sink.graphite.host=graphite_host
4 | *.sink.graphite.port=2003
5 | *.sink.graphite.prefix=java
6 | master.source.jvm.class=org.apache.spark.metrics.source.JvmSource
7 | worker.source.jvm.class=org.apache.spark.metrics.source.JvmSource
8 | driver.source.jvm.class=org.apache.spark.metrics.source.JvmSource
9 | executor.source.jvm.class=org.apache.spark.metrics.source.JvmSource
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCCatalog.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.jdbc
19 |
20 | import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table}
21 | import org.apache.spark.sql.util.Logging
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2021-04-08.
26 | */
27 | class JDBCCatalog extends DelegatingCatalogExtension with Logging {
28 |
29 | override def name(): String = "JDBC"
30 |
31 | override def loadTable(ident: Identifier): Table = JDBCTable(ident)
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCDataWriter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.jdbc
19 |
20 | import java.sql.Connection
21 |
22 | import org.apache.spark.sql.catalyst.InternalRow
23 | import org.apache.spark.sql.connector.write.{DataWriter, WriterCommitMessage}
24 | import org.apache.spark.sql.execution.datasources.jdbc.JdbcUtils.createConnectionFactory
25 | import org.apache.spark.sql.execution.datasources.jdbc.MyJDBCUtils._
26 | import org.apache.spark.sql.types._
27 | import org.apache.spark.sql.util.Logging
28 |
29 | /**
30 | * @author kun.wan,
31 | * @date 2021-04-07.
32 | */
33 | class JDBCDataWriter(schema: StructType, options: MyJDBCOptions)
34 | extends DataWriter[InternalRow] with Logging {
35 |
36 | val table = options.tableOrQuery
37 | val uniqueKeys: Set[String] =
38 | options.uniqueKeys.split(",").map(_.trim.toLowerCase).toSet
39 |
40 | val conn: Connection = createConnectionFactory(options)()
41 | conn.setAutoCommit(false)
42 | val (upsertSql, affectColumns, updateColumns) = upsertSqlAndColumns(conn, options)
43 | val stmt = conn.prepareStatement(upsertSql)
44 |
45 | val nameToIndex = schema.names.map(_.toLowerCase).zipWithIndex.toMap
46 | val setters =
47 | (affectColumns ++ updateColumns).zipWithIndex.map { case (column, pos) =>
48 | val fieldIndex = nameToIndex(column.toLowerCase)
49 | makeSetter(fieldIndex, pos + 1, schema.fields(fieldIndex).dataType)
50 | }
51 |
52 | var rowCount = 0
53 | val batchSize = options.batchSize
54 |
55 | override def write(row: InternalRow): Unit = {
56 | try {
57 | setters.map(_.apply(stmt, row))
58 | } catch {
59 | case e: Exception =>
60 | logError(s"fail to fill prepare statement params. Row=($row), statement=$stmt")
61 | throw e
62 | }
63 |
64 | stmt.addBatch()
65 | rowCount += 1
66 | if (rowCount % batchSize == 0) {
67 | val updateCounts = stmt.executeBatch().length
68 | // upsertCount.add(updateCounts)
69 | logInfo(s"commit JDBC PreparedStatement,affected rows = ${updateCounts}, " +
70 | s"statement counter = ${rowCount}")
71 |
72 | rowCount = 0
73 | }
74 | }
75 |
76 | override def commit(): WriterCommitMessage = {
77 | val updateCounts = stmt.executeBatch().length
78 | // upsertCount.add(updateCounts)
79 | logInfo(s"commit JDBC PreparedStatement,affected rows = ${updateCounts}, " +
80 | s"statement counter = ${rowCount}")
81 | conn.commit()
82 | new WriterCommitMessage() {}
83 | }
84 |
85 | override def abort(): Unit = {
86 | conn.rollback()
87 | }
88 |
89 | override def close(): Unit = {
90 | stmt.close()
91 | conn.close()
92 | }
93 |
94 | def upsertSqlAndColumns(conn: Connection,
95 | options: JDBCOptions): (String, Array[String], Array[String]) = {
96 | val tableSchema = JdbcUtils.getSchemaOption(conn, options)
97 | assert(tableSchema.isDefined, s"Fail to get $table in db, maybe $table does not exist")
98 | val tableColumnNames = tableSchema.get.fieldNames
99 | val rddSchemaNames = schema.names.map(_.toLowerCase)
100 | val affectColumns = tableColumnNames.filter(col => rddSchemaNames.contains(col.toLowerCase))
101 | val updateColumns = affectColumns.filter(col => !uniqueKeys.contains(col.toLowerCase))
102 | tableColumnNames.filterNot(affectColumns.contains)
103 | .foreach(col => logWarning(s"row schema doesn't contains column : {${col} }"))
104 |
105 | val upsertSql =
106 | s"""
107 | |INSERT INTO ${table} (${affectColumns.mkString(", ")})
108 | |VALUES ( ${affectColumns.map(_ => "?").mkString(", ")} )
109 | |ON DUPLICATE KEY UPDATE ${updateColumns.map(_ + "= ?").mkString(", ")}
110 | |""".stripMargin
111 | logInfo(s"upsert sql : $upsertSql")
112 | (upsertSql, affectColumns, updateColumns)
113 | }
114 | }
115 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCTable.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.jdbc
19 |
20 | import java.sql.Connection
21 | import java.util
22 |
23 | import scala.collection.JavaConverters._
24 | import scala.collection.mutable
25 |
26 | import org.apache.spark.sql.connector.catalog._
27 | import org.apache.spark.sql.connector.read.ScanBuilder
28 | import org.apache.spark.sql.connector.write.{LogicalWriteInfo, WriteBuilder}
29 | import org.apache.spark.sql.runner.container.ConfigContainer
30 | import org.apache.spark.sql.types.StructType
31 | import org.apache.spark.sql.util.{CaseInsensitiveStringMap, Logging}
32 |
33 | /**
34 | * @author kun.wan,
35 | * @date 2021-04-07.
36 | *
37 | * 一般的实现里,会有一个Source类,继承 RelationProvider 和 TableProvider,负责提供Relation 和 Table对象。
38 | * 然后调用 DataSourceV2Utils.getTableFromProvider() 方法,从Provider 获取table实例的方法,但是我感觉这样
39 | * 还不如直接new 一个Table实例方便,那样做反而更绕了~~
40 | */
41 | case class JDBCTable(ident: Identifier) extends Table
42 | with SupportsRead
43 | with SupportsWrite
44 | with Logging {
45 |
46 | import MyJDBCOptions._
47 |
48 | val namespace = ident.namespace()(0)
49 | val relationName = ident.name()
50 |
51 | val tableOrQuery =
52 | ConfigContainer.getOrElse(s"$namespace.$relationName.query", ident.name())
53 |
54 | val jdbcOptions = {
55 | val parameters = mutable.Map(
56 | JDBC_URL -> ConfigContainer.get(s"$namespace.url"),
57 | "user" -> ConfigContainer.get(s"$namespace.username"),
58 | "password" -> ConfigContainer.get(s"$namespace.password"),
59 | JDBC_TABLE_NAME -> tableOrQuery
60 | )
61 | Seq(
62 | JDBC_PARTITION_COLUMN,
63 | JDBC_NUM_PARTITIONS,
64 | JDBC_QUERY_TIMEOUT,
65 | JDBC_BATCH_FETCH_SIZE,
66 | JDBC_PUSHDOWN_PREDICATE,
67 | JDBC_UNIQUE_KEYS
68 | ).map(optionName => optionName -> s"$namespace.$relationName.$optionName")
69 | .filter(option => ConfigContainer.contains(option._2))
70 | .foreach { option => parameters += (option._1 -> ConfigContainer.get(option._2)) }
71 |
72 | // 读数据使用新的分区算法,JDBC_PARTITION_COLUMN 为必须参数,JDBC_LOWER_BOUND, JDBC_UPPER_BOUND 传入伪参数
73 | if (parameters.contains(JDBC_PARTITION_COLUMN)) {
74 | parameters += (JDBC_LOWER_BOUND -> "0")
75 | parameters += (JDBC_UPPER_BOUND -> "0")
76 | }
77 |
78 | // JDBC 更新数据时需要准备好更新的表的数据主键
79 | new MyJDBCOptions(parameters.toMap)
80 | }
81 |
82 | override def name(): String = ident.toString
83 |
84 | /**
85 | * JDBC表写的时候,schema通过child Plan自动解析生成
86 | * JDBC表读的时候,进行schema自动推测
87 | * @return
88 | */
89 | override def schema(): StructType = {
90 | if (ConfigContainer.contains(s"${ident.toString}.schemaDDL")) {
91 | StructType.fromDDL(ConfigContainer.get(s"${ident.toString}.schemaDDL"))
92 | } else {
93 | val conn: Connection = MyJDBCUtils.createConnectionFactory(jdbcOptions)()
94 | try {
95 | JdbcUtils.getSchemaOption(conn, jdbcOptions).get
96 | } finally {
97 | conn.close()
98 | }
99 | }
100 | }
101 |
102 | override def capabilities(): util.Set[TableCapability] =
103 | Set(TableCapability.BATCH_READ,
104 | TableCapability.BATCH_WRITE).asJava
105 |
106 | override def newScanBuilder(options: CaseInsensitiveStringMap): ScanBuilder = {
107 | Seq(
108 | JDBC_URL,
109 | "user",
110 | "password",
111 | JDBC_TABLE_NAME,
112 | JDBC_PARTITION_COLUMN,
113 | JDBC_NUM_PARTITIONS
114 | ).foreach { option =>
115 | require(jdbcOptions.parameters.contains(option),
116 | s"parameter $option is needed in JDBC read")
117 | }
118 |
119 | new JDBCScanBuilder(schema, jdbcOptions)
120 | }
121 |
122 | override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder = {
123 | Seq(
124 | JDBC_URL,
125 | "user",
126 | "password",
127 | JDBC_TABLE_NAME,
128 | JDBC_UNIQUE_KEYS
129 | ).foreach { option =>
130 | require(jdbcOptions.parameters.contains(option),
131 | s"parameter $option is needed in JDBC write")
132 | }
133 |
134 | new JDBCWriteBuilder(schema, jdbcOptions)
135 | }
136 | }
137 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/JDBCWriteBuilder.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.jdbc
19 |
20 | import org.apache.spark.sql.catalyst.InternalRow
21 | import org.apache.spark.sql.connector.write._
22 | import org.apache.spark.sql.types.StructType
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2021-04-07.
27 | */
28 | class JDBCWriteBuilder(schema: StructType, options: MyJDBCOptions) extends WriteBuilder {
29 |
30 | override def buildForBatch(): BatchWrite = new JDBCBatchWrite(schema, options)
31 |
32 | }
33 |
34 | class JDBCBatchWrite(schema: StructType, options: MyJDBCOptions) extends BatchWrite {
35 |
36 | override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory =
37 | new JDBCDataWriterFactory(schema, options)
38 |
39 | override def commit(messages: Array[WriterCommitMessage]): Unit = {}
40 |
41 | override def abort(messages: Array[WriterCommitMessage]): Unit = {}
42 | }
43 |
44 | class JDBCDataWriterFactory(schema: StructType, options: MyJDBCOptions) extends DataWriterFactory {
45 |
46 | override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] =
47 | new JDBCDataWriter(schema, options)
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/MyJDBCOptions.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.jdbc
19 |
20 | import java.util.Locale
21 |
22 | import org.apache.spark.sql.catalyst.util.CaseInsensitiveMap
23 |
24 | /**
25 | * @author kun.wan,
26 | *
27 | * @date 2021-04-08.
28 | *
29 | * Spark内置的JDBCOptions 不会序列化用户传入的自定义属性,所以直接自己干
30 | */
31 | case class MyJDBCOptions(@transient override val parameters: CaseInsensitiveMap[String])
32 | extends JDBCOptions(parameters) {
33 |
34 | import JDBCOptions._
35 |
36 | def this(parameters: Map[String, String]) = this(CaseInsensitiveMap(parameters))
37 |
38 | def this(url: String, table: String, parameters: Map[String, String]) = {
39 | this(CaseInsensitiveMap(parameters ++ Map(
40 | JDBCOptions.JDBC_URL -> url,
41 | JDBCOptions.JDBC_TABLE_NAME -> table)))
42 | }
43 |
44 | require(
45 | parameters.get(JDBC_TABLE_NAME).isDefined,
46 | s"Option '$JDBC_TABLE_NAME' is required. " +
47 | s"Option '$JDBC_QUERY_STRING' is not applicable while writing.")
48 |
49 | val uniqueKeys = parameters.getOrElse(MyJDBCOptions.JDBC_UNIQUE_KEYS, "")
50 |
51 | var filterWhereClause = parameters.getOrElse(MyJDBCOptions.JDBC_FILTER_WHERE_CLAUSE, "")
52 |
53 | }
54 |
55 | object MyJDBCOptions {
56 |
57 | private val jdbcOptionNames = collection.mutable.Set[String]()
58 |
59 | private def newOption(name: String): String = {
60 | jdbcOptionNames += name.toLowerCase(Locale.ROOT)
61 | name
62 | }
63 |
64 | val JDBC_URL = newOption("url")
65 | val JDBC_TABLE_NAME = newOption("dbtable")
66 | val JDBC_QUERY_STRING = newOption("query")
67 | val JDBC_DRIVER_CLASS = newOption("driver")
68 | val JDBC_PARTITION_COLUMN = newOption("partitionColumn")
69 | val JDBC_LOWER_BOUND = newOption("lowerBound")
70 | val JDBC_UPPER_BOUND = newOption("upperBound")
71 | val JDBC_NUM_PARTITIONS = newOption("numPartitions")
72 | val JDBC_QUERY_TIMEOUT = newOption("queryTimeout")
73 | val JDBC_BATCH_FETCH_SIZE = newOption("fetchsize")
74 | val JDBC_TRUNCATE = newOption("truncate")
75 | val JDBC_CASCADE_TRUNCATE = newOption("cascadeTruncate")
76 | val JDBC_CREATE_TABLE_OPTIONS = newOption("createTableOptions")
77 | val JDBC_CREATE_TABLE_COLUMN_TYPES = newOption("createTableColumnTypes")
78 | val JDBC_CUSTOM_DATAFRAME_COLUMN_TYPES = newOption("customSchema")
79 | val JDBC_BATCH_INSERT_SIZE = newOption("batchsize")
80 | val JDBC_TXN_ISOLATION_LEVEL = newOption("isolationLevel")
81 | val JDBC_SESSION_INIT_STATEMENT = newOption("sessionInitStatement")
82 | val JDBC_PUSHDOWN_PREDICATE = newOption("pushDownPredicate")
83 | val JDBC_UNIQUE_KEYS = newOption("uniqueKeys")
84 | val JDBC_FILTER_WHERE_CLAUSE = newOption("filterWhereClause")
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/kafka/KafkaCatalog.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.kafka
19 |
20 | import org.apache.spark.sql.connector.catalog.{DelegatingCatalogExtension, Identifier, Table}
21 | import org.apache.spark.sql.util.Logging
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2021-04-08.
26 | */
27 | class KafkaCatalog extends DelegatingCatalogExtension with Logging {
28 |
29 | override def name(): String = "KAFKA"
30 |
31 | override def loadTable(ident: Identifier): Table = KafkaTable(ident)
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/kafka/KafkaOptions.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.kafka
19 |
20 | import java.util.Properties
21 |
22 | import com.fasterxml.jackson.databind.{JsonNode, ObjectMapper}
23 | import io.confluent.kafka.serializers.{AbstractKafkaAvroSerDeConfig, KafkaAvroSerializer}
24 | import org.apache.kafka.clients.producer.{KafkaProducer, ProducerConfig}
25 | import org.apache.kafka.common.serialization.StringSerializer
26 |
27 | import scala.reflect.ClassTag
28 |
29 | import scala.collection.JavaConverters._
30 |
31 | /**
32 | * @author kun.wan,
33 | * @date 2020-07-13.
34 | */
35 | case class KafkaOptions(name: String, config: Map[String, String]) extends Serializable {
36 | val bootstrapServers = config(s"kafka.bootstrap.servers")
37 | val schemaRegistryUrl = config.getOrElse(s"kafka.schema.registry.url", "")
38 |
39 | val topic = config(s"kafka.${name}.kafkaTopic")
40 | val recordType: String = config(s"kafka.${name}.recordType")
41 | val avroName = config.getOrElse(s"kafka.${name}.avro.name", "")
42 | val avroNamespace = config.getOrElse(s"kafka.${name}.avro.namespace", "")
43 | val fieldMapping = config.getOrElse(s"kafka.${name}.avro.fieldMapping", "")
44 | val avroForceCreate = config.getOrElse(s"kafka.${name}.avro.forceCreate", "false")
45 |
46 | val maxRatePerPartition = config.getOrElse(s"kafka.${name}.maxRatePerPartition", "10000000").toInt
47 |
48 | lazy val fieldMappingMap = {
49 | val objectMapper = new ObjectMapper
50 | if (fieldMapping != "") {
51 | objectMapper.readTree(fieldMapping)
52 | .asScala
53 | .map(f => f.path("name").textValue() -> f)
54 | .toMap
55 | } else {
56 | Map[String, JsonNode]()
57 | }
58 | }
59 |
60 | lazy val serialClass: Class[_] = recordType match {
61 | case JSON_TYPE =>
62 | classOf[StringSerializer]
63 | case AVRO_TYPE =>
64 | classOf[KafkaAvroSerializer]
65 | }
66 |
67 | val JSON_TYPE: String = "json"
68 | val AVRO_TYPE: String = "avro"
69 |
70 | def initProducer[T: ClassTag](): KafkaProducer[String, T] = {
71 | val properties = new Properties
72 | properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, bootstrapServers)
73 | properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, classOf[StringSerializer])
74 | properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, serialClass)
75 | properties.put(ProducerConfig.ACKS_CONFIG, "all")
76 | properties.put(AbstractKafkaAvroSerDeConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl)
77 | new KafkaProducer[String, T](properties)
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/execution/datasources/kafka/KafkaTable.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.execution.datasources.kafka
19 |
20 | import java.util
21 |
22 | import org.apache.spark.sql.catalyst.InternalRow
23 | import org.apache.spark.sql.connector.catalog.{Identifier, SupportsWrite, Table, TableCapability}
24 | import org.apache.spark.sql.connector.write._
25 | import org.apache.spark.sql.types.StructType
26 | import scala.collection.JavaConverters._
27 |
28 | import org.apache.spark.sql.runner.container.ConfigContainer
29 |
30 | /**
31 | * @author kun.wan,
32 | * @date 2021-04-06.
33 | */
34 | case class KafkaTable(ident: Identifier) extends Table with SupportsWrite {
35 |
36 | override def name(): String = ident.toString
37 |
38 | override def schema(): StructType =
39 | StructType.fromDDL(ConfigContainer.get(s"${ident.toString}.schemaDDL"))
40 |
41 | override def capabilities(): util.Set[TableCapability] =
42 | Set(TableCapability.BATCH_WRITE).asJava
43 |
44 | override def newWriteBuilder(info: LogicalWriteInfo): WriteBuilder =
45 | new KafkaWriteBuilder(ident.name(), schema())
46 | }
47 |
48 | class KafkaWriteBuilder(name: String, schema: StructType) extends WriteBuilder {
49 |
50 | override def buildForBatch(): BatchWrite = new KafkaBatchWrite(name, schema)
51 |
52 | }
53 |
54 | class KafkaBatchWrite(name: String, schema: StructType) extends BatchWrite {
55 |
56 | override def createBatchWriterFactory(info: PhysicalWriteInfo): DataWriterFactory =
57 | new KafkaDataWriterFactory(name, schema)
58 |
59 | override def commit(messages: Array[WriterCommitMessage]): Unit = {}
60 |
61 | override def abort(messages: Array[WriterCommitMessage]): Unit = {}
62 | }
63 |
64 | class KafkaDataWriterFactory(name: String, schema: StructType) extends DataWriterFactory {
65 |
66 | val kafkaOption: KafkaOptions = KafkaOptions(name, ConfigContainer.valueMap.get())
67 |
68 | override def createWriter(partitionId: Int, taskId: Long): DataWriter[InternalRow] =
69 | new KafkaDataWriter(kafkaOption, schema)
70 | }
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/hive/SqlRunnerMetrics.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.hive
19 |
20 | import org.apache.spark.internal.Logging
21 | import org.apache.spark.sql.execution.SparkPlan
22 | import org.apache.spark.sql.execution.command.DataWritingCommandExec
23 | import org.apache.spark.sql.execution.metric.SQLMetric
24 | import org.apache.spark.sql.hive.execution.{HiveTableScanExec, InsertIntoHiveTable}
25 |
26 | /**
27 | * @author kun.wan,
28 | * @date 2020-04-29.
29 | */
30 | object SqlRunnerMetrics extends Logging {
31 |
32 | def logSparkPlanMetrics(plan: SparkPlan): Unit = plan match {
33 | case HiveTableScanExec(_, relation, _) =>
34 | logInfo(s"source ${relation.nodeName}(${relation.tableMeta.identifier}) metrics : ${formatMetrics(plan.metrics)}")
35 | case DataWritingCommandExec(cmd: InsertIntoHiveTable, _) =>
36 | logInfo(s"Insert table ${cmd.table.identifier} metrics : ${formatMetrics(plan.metrics)}")
37 |
38 | case _ =>
39 | }
40 |
41 | def formatMetrics(metrics: Map[String, SQLMetric]): Map[String, Long] = metrics.map {
42 | case (name: String, metric: SQLMetric) =>
43 | name -> metric.value
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/CollectValueRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import java.util.Locale
21 |
22 | import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnresolvedHint}
23 | import org.apache.spark.sql.catalyst.rules.Rule
24 | import org.apache.spark.sql.util.OptimizerUtil.parseHintParameter
25 |
26 | import org.apache.spark.sql.runner.callback.{ArrayValueCollector, DataCallBackFactory, SingleValueCollector}
27 |
28 | /**
29 | * @author kun.wan,
30 | * @date 2020-09-15.
31 | */
32 | object CollectValueRule extends Rule[LogicalPlan] {
33 |
34 | override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
35 | case hint@UnresolvedHint(hintName, parameters, child) => hintName.toUpperCase(Locale.ROOT) match {
36 | case "COLLECT_VALUE" =>
37 | val name: String = parseHintParameter(parameters(0))
38 | val columnName: String = parseHintParameter(parameters(1))
39 | DataCallBackFactory.registerDataCallBack(SingleValueCollector(name, columnName))
40 |
41 | child
42 |
43 | case "COLLECT_ARRAY" =>
44 | val name: String = parseHintParameter(parameters(0))
45 | val columnName: String = parseHintParameter(parameters(1))
46 | DataCallBackFactory.registerDataCallBack(ArrayValueCollector(name, columnName))
47 |
48 | child
49 |
50 | case _ => hint
51 | }
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/DataQualityRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import java.util.Locale
21 |
22 | import org.apache.spark.internal.Logging
23 | import org.apache.spark.sql.catalyst.expressions.Literal
24 | import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
25 | import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Project, UnresolvedHint}
26 | import org.apache.spark.sql.catalyst.rules.Rule
27 | import org.apache.spark.sql.{Column, SparkSession}
28 | import org.apache.spark.util.IdGenerator
29 |
30 | import org.apache.spark.sql.runner.callback.{DataCallBackFactory, DataCheckCallBack}
31 |
32 | /**
33 | * @author kun.wan,
34 | * @date 2021-02-20.
35 | */
36 | case class DataQualityRule(spark: SparkSession) extends Rule[LogicalPlan] {
37 |
38 | import DataQualityRule._
39 |
40 | override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
41 | case hint @ UnresolvedHint(hintName, parameters, child) => hintName.toUpperCase(Locale.ROOT) match {
42 | case "DATA_CHECK" =>
43 | val checkTitle: String = parameters.head.toString
44 | val dataCheckExpressions =
45 | parameters.tail map { case literal: Literal =>
46 | val expression = literal.toString()
47 | val checkResultColumn = generateDataCheckColumnName()
48 | val column = Column.apply(CatalystSqlParser.parseExpression(expression)).as(checkResultColumn)
49 | column.named.children.head.children.find { expr => child.output.contains(expr) } match {
50 | case Some(originColumnExpr) =>
51 | DataCallBackFactory.registerDataCallBack(
52 | DataCheckCallBack(checkTitle,
53 | child.output.find( p => p == originColumnExpr).get.name,
54 | checkResultColumn,
55 | expression))
56 | column.named
57 |
58 | case _ =>
59 | throw new RuntimeException("Data check column not matched!")
60 | }
61 | }
62 |
63 | Project(child.output ++ dataCheckExpressions, child)
64 |
65 | case _ => hint
66 | }
67 | }
68 | }
69 |
70 | object DataQualityRule extends Logging {
71 | private val ID_GENERATOR = new IdGenerator
72 |
73 | def generateDataCheckColumnName(): String = {
74 | s"__DATA_CHECK_${ID_GENERATOR.next}__"
75 | }
76 | }
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/ExternalSinkRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import java.util.Locale
21 |
22 | import org.apache.spark.sql.SparkSession
23 | import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, UnresolvedHint}
24 | import org.apache.spark.sql.catalyst.rules.Rule
25 | import org.apache.spark.sql.util.OptimizerUtil.parseHintParameter
26 |
27 | import org.apache.spark.sql.runner.callback.{DataCallBackFactory, EmailSink}
28 | import org.apache.spark.sql.runner.container.ConfigContainer
29 |
30 | /**
31 | * @author kun.wan,
32 | * @date 2020-09-15.
33 | */
34 | case class ExternalSinkRule(spark: SparkSession) extends Rule[LogicalPlan] {
35 |
36 | override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperatorsUp {
37 | case hint@UnresolvedHint(hintName, parameters, child) => hintName.toUpperCase(Locale.ROOT) match {
38 | case "EMAIL_SINK" =>
39 | val name = parseHintParameter(parameters(0))
40 | DataCallBackFactory.registerDataCallBack(EmailSink(name, ConfigContainer.valueMap.get()))
41 | child
42 |
43 | case _ => hint
44 | }
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/ExternalTableRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.catalyst.analysis.{AnalysisContext, UnresolvedRelation}
22 | import org.apache.spark.sql.catalyst.plans.logical.{InsertIntoStatement, LogicalPlan, With}
23 | import org.apache.spark.sql.catalyst.rules.Rule
24 | import org.apache.spark.sql.execution.QueryExecution
25 | import org.apache.spark.sql.runner.container.ConfigContainer
26 |
27 | /**
28 | * @author kun.wan,
29 | * @date 2021-04-07.
30 | */
31 | case class ExternalTableRule(spark: SparkSession) extends Rule[LogicalPlan] {
32 |
33 | import spark.sessionState.analyzer._
34 |
35 | // from Analyzer
36 | private def isResolvingView: Boolean = AnalysisContext.get.catalogAndNamespace.nonEmpty
37 |
38 | // If we are resolving relations insides views, we need to expand single-part relation names with
39 | // the current catalog and namespace of when the view was created.
40 | private def expandRelationName(nameParts: Seq[String]): Seq[String] = {
41 | if (!isResolvingView) return nameParts
42 |
43 | if (nameParts.length == 1) {
44 | AnalysisContext.get.catalogAndNamespace :+ nameParts.head
45 | } else if (spark.sessionState.catalogManager.isCatalogRegistered(nameParts.head)) {
46 | nameParts
47 | } else {
48 | AnalysisContext.get.catalogAndNamespace.head +: nameParts
49 | }
50 | }
51 |
52 | def setSchemaDDL(u: UnresolvedRelation, child: LogicalPlan): Unit = {
53 | expandRelationName(u.multipartIdentifier) match {
54 | case NonSessionCatalogAndIdentifier(catalog, ident) =>
55 | val schemaDDL = new QueryExecution(spark, child).analyzed.schema.toDDL
56 | ConfigContainer :+ (s"${ident.toString}.schemaDDL" -> schemaDDL)
57 |
58 | case _ =>
59 | }
60 | }
61 |
62 | override def apply(plan: LogicalPlan): LogicalPlan = {
63 | plan match {
64 | case InsertIntoStatement(u: UnresolvedRelation, _, _, query: LogicalPlan, _, _) =>
65 | setSchemaDDL(u, query)
66 |
67 | case With(InsertIntoStatement(u: UnresolvedRelation, _, _, query: LogicalPlan, _, _), cteRelations) =>
68 | setSchemaDDL(u, With(query, cteRelations))
69 |
70 | case _ =>
71 | }
72 | plan
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/InsightExtensions.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.spark.internal.Logging
21 | import org.apache.spark.sql.SparkSessionExtensions
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2020-04-17.
26 | */
27 | class InsightExtensions extends (SparkSessionExtensions => Unit) with Logging {
28 | def apply(e: SparkSessionExtensions): Unit = {
29 | e.injectOptimizerRule(RepartitionRule)
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/PartitionScanLimitRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.spark.internal.Logging
21 | import org.apache.spark.sql.catalyst.catalog.{CatalogTable, HiveTableRelation}
22 | import org.apache.spark.sql.catalyst.expressions._
23 | import org.apache.spark.sql.catalyst.plans.logical.{Filter, Join, LogicalPlan}
24 | import org.apache.spark.sql.catalyst.rules.Rule
25 | import org.apache.spark.sql.execution.datasources.{HadoopFsRelation, LogicalRelation}
26 | import org.apache.spark.sql.{AnalysisException, SparkSession}
27 |
28 | import scala.collection.mutable.ArrayBuffer
29 |
30 | /**
31 | * @author kun.wan,
32 | * @date 2020-07-28.
33 | */
34 | case class PartitionScanLimitRule(spark: SparkSession) extends Rule[LogicalPlan] with Logging {
35 |
36 | val partitionScanLimitEnable: Boolean =
37 | spark.conf.get("spark.partition.scan.limit.enable", "true").toBoolean
38 |
39 | def conditionCheck(partitionColNames: Seq[String],
40 | filters: ArrayBuffer[Expression],
41 | tableMeta: CatalogTable): Unit = {
42 | val filteredAttributes = filters.flatMap(_.references.map(_.name.toLowerCase))
43 | if ((partitionColNames.map(_.toLowerCase) intersect filteredAttributes).size == 0) {
44 | val table = tableMeta.identifier
45 | throw new AnalysisException(
46 | s"""Does not find partition column filter condition for table $table
47 | |partitionColNames : ${partitionColNames.mkString(", ")}
48 | |filteredAttributes : $filteredAttributes
49 | |""".stripMargin)
50 | }
51 | }
52 |
53 | def checkRelationFilters(plan: LogicalPlan, filters: ArrayBuffer[Expression]): Unit =
54 | plan match {
55 | case Filter(condition, child) if condition.deterministic =>
56 | checkRelationFilters(child, filters :+ condition)
57 |
58 | case HiveTableRelation(catalogTable, _, partitionCols, _, _)
59 | if partitionCols.nonEmpty =>
60 | val partitionColNames = partitionCols.map(_.name)
61 | conditionCheck(partitionColNames, filters, catalogTable)
62 |
63 | case LogicalRelation(relation: HadoopFsRelation, _, catalogTableOpt, _) =>
64 | relation.partitionSchemaOption.map { case partitionSchema =>
65 | val partitionColNames = partitionSchema.fieldNames
66 | conditionCheck(partitionColNames, filters, catalogTableOpt.get)
67 | }
68 |
69 | case Join(left, right, _, _, _) =>
70 | checkRelationFilters(left, ArrayBuffer[Expression]())
71 | checkRelationFilters(right, ArrayBuffer[Expression]())
72 |
73 | case _ =>
74 | plan.children.map(checkRelationFilters(_, filters))
75 | }
76 |
77 | override def apply(plan: LogicalPlan): LogicalPlan = {
78 | if (partitionScanLimitEnable) {
79 | checkRelationFilters(plan, ArrayBuffer[Expression]())
80 | }
81 | plan
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/optimizer/RepartitionRule.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.commons.math3.stat.descriptive.DescriptiveStatistics
21 | import org.apache.hadoop.hive.common.StatsSetupConst
22 | import org.apache.spark.internal.Logging
23 | import org.apache.spark.sql.SparkSession
24 | import org.apache.spark.sql.catalyst.catalog.{CatalogTable, CatalogTablePartition}
25 | import org.apache.spark.sql.catalyst.dsl.expressions._
26 | import org.apache.spark.sql.catalyst.expressions.SortOrder
27 | import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, Repartition, RepartitionByExpression, _}
28 | import org.apache.spark.sql.catalyst.rules.Rule
29 | import org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand
30 | import org.apache.spark.sql.hive.execution.InsertIntoHiveTable
31 | import org.apache.spark.sql.util.SystemVariables.INDEX_COLUMN_NAME
32 |
33 | /**
34 | * @author kun.wan,
35 | * @date 2020-04-17.
36 | */
37 | case class RepartitionRule(spark: SparkSession) extends Rule[LogicalPlan] with Logging {
38 |
39 | val DEFAULT_PARTITION_SIZE = 64 * 1024 * 1024L
40 | val SAMPLING_PARTITIONS = 10
41 |
42 | val analyzer = spark.sessionState.analyzer
43 | val catalog = SparkSession.active.sessionState.catalog
44 |
45 | override def apply(plan: LogicalPlan): LogicalPlan = {
46 | val newPlan = plan transform {
47 | case InsertIntoHiveTable(table, partition, query, overwrite, partitionExists, outputCols)
48 | if table.partitionColumnNames.size > 0 && checkQueryType(query) =>
49 |
50 | val newQuery: LogicalPlan = transformQuery(table, query)
51 | InsertIntoHiveTable(table, partition, newQuery, overwrite, partitionExists, outputCols)
52 |
53 | case InsertIntoHadoopFsRelationCommand(outputPath, staticPartitions, ifPartitionNotExists,
54 | partitionColumns, bucketSpec, fileFormat, options, query, mode, catalogTable, fileIndex,
55 | outputColumnNames)
56 | if catalogTable.isDefined && (staticPartitions.size + partitionColumns.size) > 0
57 | && checkQueryType(query) =>
58 | val newQuery =
59 | transformQuery(catalogTable.get, query)
60 |
61 | InsertIntoHadoopFsRelationCommand(
62 | outputPath,
63 | staticPartitions,
64 | ifPartitionNotExists,
65 | partitionColumns,
66 | bucketSpec,
67 | fileFormat,
68 | options,
69 | newQuery,
70 | mode,
71 | catalogTable,
72 | fileIndex,
73 | outputColumnNames)
74 | }
75 | if (!newPlan.fastEquals(plan)) {
76 | logDebug(s"plan after RepartitionRule:\n$newPlan")
77 | }
78 | newPlan
79 | }
80 |
81 | private def checkQueryType(query: LogicalPlan): Boolean = {
82 | !query.isInstanceOf[Sort] && !query.isInstanceOf[Repartition] &&
83 | !query.isInstanceOf[RepartitionByExpression]
84 | }
85 |
86 | private def transformQuery(table: CatalogTable, query: LogicalPlan): LogicalPlan = {
87 | val tableName = table.identifier
88 | val sortExprsOpt: Option[Seq[SortOrder]] =
89 | table.properties.get(INDEX_COLUMN_NAME).map(indexColumn => {
90 | val order = Symbol(indexColumn).attr.asc
91 | Seq(analyzer.resolveExpressionBottomUp(order, query).asInstanceOf[SortOrder])
92 | })
93 |
94 | val numPartitionsOpt = repartitionNumbers(catalog.listPartitions(tableName))
95 | (sortExprsOpt, numPartitionsOpt) match {
96 | case (Some(sortExprs), Some(numPartitions)) =>
97 | RepartitionByExpression(sortExprs, query, numPartitions)
98 |
99 | case (Some(sortExprs), None) => Sort(sortExprs, true, query)
100 | case (None, Some(numPartitions)) => Repartition(numPartitions, true, query)
101 | case (None, None) => query
102 | }
103 | }
104 |
105 | /**
106 | * 1. 根据分区创建时间倒排序,取最近创建的分区
107 | * 2. sample 采样10个分区元数据来计算分区个数,取结果中位数
108 | * @param partitions
109 | * @return
110 | */
111 | def repartitionNumbers(partitions: Seq[CatalogTablePartition]): Option[Int] = {
112 |
113 | val stats = new DescriptiveStatistics
114 | if (log.isDebugEnabled) {
115 | partitions.foreach(p => logDebug(s"got partition ${p.simpleString}"))
116 | }
117 | partitions.filter(_.parameters.contains(StatsSetupConst.TOTAL_SIZE))
118 | .sortWith((p1, p2) => p1.createTime > p2.createTime)
119 | .slice(0, SAMPLING_PARTITIONS)
120 | .foreach { p =>
121 | stats.addValue(p.parameters.get(StatsSetupConst.TOTAL_SIZE).get.toLong
122 | / DEFAULT_PARTITION_SIZE)
123 | }
124 | if (stats.getPercentile(50).isNaN) {
125 | None
126 | } else {
127 | val number = stats.getPercentile(50).toInt + 1
128 | if (number > 0) {
129 | Some(number)
130 | } else {
131 | None
132 | }
133 | }
134 | }
135 | }
136 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/plugin/AsyncProfilePlugin.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.plugin
19 |
20 | import java.lang.management.ManagementFactory
21 |
22 | import javax.management.ObjectName
23 | import one.profiler.AsyncProfiler
24 |
25 | /**
26 | * @author kun.wan,
27 | * @date 2020-05-14.
28 | */
29 | class AsyncProfilePlugin extends ProfilePlugin {
30 |
31 | var profiler: AsyncProfiler = _
32 |
33 | override def init0(): Unit = {
34 | profileFile = s"${logDir}/${containerId}.${profileType}"
35 |
36 | profiler = AsyncProfiler.getInstance()
37 | ManagementFactory.getPlatformMBeanServer().registerMBean(
38 | profiler,
39 | new ObjectName("one.profiler:type=AsyncProfiler")
40 | )
41 | if (!manualProfile) {
42 | logInfo(profiler.execute(s"start,${profileType},file=${profileFile}"))
43 | }
44 | }
45 |
46 | override def shutdown0(): Unit = {
47 | logInfo(profiler.execute(s"stop,file=${profileFile}"))
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/plugin/ProfilePlugin.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.plugin
19 |
20 | import java.util.{Map => JMap}
21 |
22 | import org.apache.hadoop.conf.Configuration
23 | import org.apache.hadoop.fs.{FileSystem, Path}
24 | import org.apache.spark.deploy.yarn.YarnSparkHadoopUtil
25 | import org.apache.spark.internal.Logging
26 | import org.apache.spark.util.SignalUtils
27 | import org.apache.spark.SparkConf
28 | import org.apache.spark.api.plugin.{ExecutorPlugin, PluginContext}
29 |
30 | import scala.reflect.io.File
31 |
32 |
33 | /**
34 | * @author kun.wan,
35 | * @date 2020-05-26.
36 | */
37 | abstract class ProfilePlugin extends ExecutorPlugin with Logging {
38 |
39 | val pluginName = this.getClass.getName.stripSuffix("$")
40 |
41 | var conf: SparkConf = _
42 | var manualProfile: Boolean = _
43 | var profileType: String = _
44 |
45 | val logDir = System.getProperty("spark.yarn.app.container.log.dir")
46 | val containerId = YarnSparkHadoopUtil.getContainerId
47 | val applicationAttemptId = containerId.getApplicationAttemptId
48 | val applicationId = applicationAttemptId.getApplicationId
49 |
50 | var profileFile: String = _
51 |
52 | val fs = FileSystem.get(new Configuration())
53 | var shutdownFlag = false
54 |
55 | def init0(): Unit = {}
56 |
57 | def shutdown0(): Unit = {}
58 |
59 | override def init(ctx: PluginContext, extraConf: JMap[String, String]): Unit = {
60 | conf = ctx.conf()
61 | manualProfile = conf.getBoolean("spark.profile.manualprofile", false)
62 | profileType = conf.get("spark.profile.type", "jfr")
63 |
64 | init0()
65 | logInfo(s"init ProfileExecutorPlugin")
66 |
67 | // Handle SIGTERM from NodeManager
68 | Seq("TERM", "HUP", "INT").foreach { sig =>
69 | SignalUtils.register(sig) {
70 | log.error("Executor RECEIVED SIGNAL " + sig)
71 | while(!shutdownFlag) {
72 | Thread sleep 100
73 | log.error("Executor shutdown loopback. SIGNAL " + sig)
74 | }
75 | log.error("ProfilePlugin Shutdown loop end. SIGNAL " + sig)
76 | false
77 | }
78 | }
79 | }
80 |
81 | /**
82 | * 1. Shutdown method is already a ShutdownHook.
83 | * 2. Executor may be killed by NodeManager before the shutdown method is finished.
84 | * The default wait time is 250ms defined by sleepDelayBeforeSigKill in ContainerLaunch Service.
85 | */
86 | override def shutdown(): Unit = {
87 | if (!manualProfile) {
88 | logInfo(s"shutdown ${pluginName}")
89 | shutdown0()
90 |
91 | logInfo("begin upload executor profile file.")
92 |
93 | val srcPath = new Path(profileFile)
94 | val dstPath = new Path(s"/metadata/logs/profile/${applicationId}/" +
95 | s"${applicationAttemptId.getAttemptId}/${containerId}.${profileType}")
96 | logInfo(s"profileFile :${srcPath} hdfs path : ${dstPath}")
97 | fs.copyFromLocalFile(true, true, srcPath, dstPath)
98 | File(profileFile).delete()
99 | }
100 | logInfo(s"end ${pluginName}")
101 | shutdownFlag = true
102 | }
103 | }
104 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/plugin/YourkitPlugin.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.plugin
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2020-05-14.
23 | */
24 | class YourkitPlugin extends ProfilePlugin {
25 |
26 | override def shutdown0(): Unit = {
27 | val controllerCls = Class.forName("com.yourkit.api.Controller")
28 | val controller = controllerCls.newInstance()
29 |
30 | val displayNameMethod = controllerCls.getMethod("capturePerformanceSnapshot")
31 | profileFile = displayNameMethod.invoke(controller).asInstanceOf[String]
32 |
33 | val stopCpuProfilingMethod = controllerCls.getMethod("stopCpuProfiling")
34 | stopCpuProfilingMethod.invoke(controller)
35 | }
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/Alert.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner
19 |
20 | import org.apache.spark.internal.Logging
21 | import org.apache.spark.sql.util.{DQUtil, SystemVariables}
22 | import org.apache.spark.sql.runner.container.ConfigContainer
23 |
24 | /**
25 | * 对非测试,运行失败的程序进行告警
26 | *
27 | * @author kun.wan,
28 | * @date 2020-02-26.
29 | */
30 | object Alert extends ArgParser with Logging {
31 | def main(args: Array[String]): Unit = {
32 | if (!args.contains("--test") && !args.contains("--dryrun")) {
33 | parseArgument(args)
34 | val env = ConfigContainer.getOrElse(SystemVariables.ENV, SystemVariables.DEFAULT_ENV)
35 |
36 | val alertMessage = s"$env : 程序 ${args(0)} 运行失败,请检查!"
37 | logError(alertMessage)
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/ArgParser.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner
19 |
20 | import java.time.LocalDateTime
21 |
22 | import org.apache.commons.io.FilenameUtils
23 | import org.apache.commons.lang3.StringUtils
24 |
25 | import org.apache.spark.sql.util.SystemVariables
26 | import org.apache.sql.runner.command.CommandFactory
27 | import scala.collection.mutable.ArrayBuffer
28 | import scala.io.Source
29 |
30 | import org.apache.spark.sql.runner.command.{BaseCommand, BlockCommentCommand, CommandFactory}
31 | import org.apache.spark.sql.runner.config.ApolloClient
32 | import org.apache.spark.sql.runner.container.ConfigContainer
33 |
34 | /**
35 | * @author kun.wan,
36 | * @date 2020-06-03.
37 | */
38 | class ArgParser {
39 |
40 | var batchTimesOpt: Option[Seq[LocalDateTime]] = None
41 | var startDate: Option[LocalDateTime] = None
42 | var endDate: Option[LocalDateTime] = None
43 | var dateRangeStep: Int = 1
44 | var jobFile: String = _
45 | var commands: Array[BaseCommand] = _
46 |
47 | def parseArgument(args: Array[String]): Unit = {
48 | if (args.length < 1) {
49 | println("job configuration file must be found!")
50 | System.exit(-1)
51 | }
52 |
53 | val leftArgs = new ArrayBuffer[String]()
54 | var argv = args.toList
55 |
56 |
57 | while (!argv.isEmpty) {
58 | argv match {
59 | case "--dateRange" :: startDateStr :: endDateStr :: tail =>
60 | startDate = Some(LocalDateTime.parse(startDateStr))
61 | endDate = Some(LocalDateTime.parse(endDateStr))
62 | argv = tail
63 | case "--dates" :: dates :: tail =>
64 | batchTimesOpt = Some(dates.split(",").map(LocalDateTime.parse(_)).toSeq)
65 | argv = tail
66 | case "--config" :: value :: tail =>
67 | val tup = value.split("=")
68 | ConfigContainer :+ (tup(0) -> tup(1))
69 | argv = tail
70 | case "--profile" :: tail =>
71 | ConfigContainer :+ ("spark.profile" -> "true")
72 | argv = tail
73 | case "--dryrun" :: tail =>
74 | ConfigContainer :+ ("dryrun" -> "true")
75 | argv = tail
76 | case "--dateRangeStep" :: dateRangeStepStr :: tail =>
77 | dateRangeStep = dateRangeStepStr.toInt
78 | argv = tail
79 | case head :: tail if head != null =>
80 | leftArgs.append(head)
81 | argv = tail
82 | }
83 | }
84 |
85 | jobFile = leftArgs(0)
86 |
87 | ConfigContainer :+ (SystemVariables.JOB_NAME -> FilenameUtils.getBaseName(jobFile))
88 |
89 | if (StringUtils.isNotBlank(System.getenv(SystemVariables.APOLLO_META))) {
90 | ConfigContainer :+ (SystemVariables.APOLLO_META -> System.getenv(SystemVariables.APOLLO_META))
91 | }
92 |
93 | commands = CommandFactory.parseCommands(Source.fromFile(jobFile).mkString)
94 | require(commands.length > 0 && commands(0).isInstanceOf[BlockCommentCommand],
95 | "sql job must start with job description!")
96 | checkHeader(commands(0).asInstanceOf[BlockCommentCommand])
97 |
98 | // pull variables from apollo
99 | ApolloClient.pollVariablesFromApollo()
100 | }
101 |
102 | def checkHeader(cmd: BlockCommentCommand): Unit = {
103 | val keys = Set("author", "period", "run_env", "describe")
104 | val headerMap: Map[String, String] =
105 | cmd.comment.split('\n')
106 | .filter(_.contains(":"))
107 | .map { line =>
108 | val splits = line.split(":")
109 | splits(0).trim -> splits(1).trim
110 | }.toMap
111 |
112 | val notExistsKeys = keys.filterNot(headerMap.contains(_))
113 | assert(notExistsKeys.isEmpty, s"Header 中缺少 ${notExistsKeys.mkString(", ")} 参数!")
114 | for ((key, value) <- headerMap) {
115 | ConfigContainer :+ (key -> value)
116 | }
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/JobRunner.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner
19 |
20 | import java.time.LocalDateTime
21 | import java.time.temporal.ChronoUnit
22 |
23 | import scala.reflect.io.File
24 |
25 | import org.apache.spark.sql.plugin.{AsyncProfilePlugin, YourkitPlugin}
26 | import org.apache.spark.sql.runner.command.SqlCommand
27 | import org.apache.spark.sql.runner.container.{CollectorContainer, ConfigContainer}
28 | import org.apache.spark.sql.util.SystemVariables._
29 | import org.apache.spark.sql.util.{Logging, SystemVariables}
30 |
31 | /**
32 | * @author kun.wan,
33 | * @date 2019-12-05.
34 | */
35 | object JobRunner extends ArgParser with Logging {
36 | def main(args: Array[String]): Unit = {
37 | parseArgument(args)
38 | logInfo(s"submit job for ${jobFile}")
39 |
40 | prepareRuntimeParameter()
41 |
42 | batchTimesOpt.getOrElse(Seq[LocalDateTime]()).map { batchTime =>
43 | CollectorContainer :+ (SystemVariables.BATCH_TIME -> batchTime)
44 | logInfo(s"submitting job(batchTime = $batchTime)")
45 | if (ConfigContainer.contains("dryrun")) {
46 | commands.foreach(_.dryrun())
47 | } else {
48 | commands.foreach(_.run())
49 | }
50 | }
51 | SqlCommand.stop()
52 |
53 | logInfo(s"end job")
54 | }
55 |
56 | def prepareRuntimeParameter(): Unit = {
57 | // prepare for spark mode
58 | val distJars = Seq(PROJECT_JAR_NAME).map(jar => s"lib/${jar}").mkString(",")
59 | ConfigContainer :+ ("spark.yarn.dist.jars" -> distJars)
60 | if (!ConfigContainer.contains("spark.yarn.queue")) {
61 | ConfigContainer :+ ("spark.yarn.queue" -> s"root.${File(jobFile).parent.name}")
62 | }
63 |
64 | if (ConfigContainer.getOrElse("spark.profile", "false").toBoolean) {
65 | val profileShell = "hdfs:///deploy/config/profile.sh"
66 | val yourkitAgent = "hdfs:///deploy/config/libyjpagent.so"
67 |
68 | ConfigContainer.getOrElse("spark.profile.type", "jfr") match {
69 | case "yourkit" =>
70 | ConfigContainer :+ ("spark.profile.type" -> "snapshot")
71 | ConfigContainer :+ ("spark.yarn.dist.files" -> s"${profileShell},${yourkitAgent}")
72 | ConfigContainer :+ ("spark.yarn.dist.jars" -> s"${distJars},hdfs:///deploy/config/yjp-controller-api-redist.jar")
73 | ConfigContainer :+ ("spark.executor.extraJavaOptions" -> "-agentpath:libyjpagent.so=logdir=,async_sampling_cpu")
74 | ConfigContainer :+ ("spark.executor.plugins" -> classOf[YourkitPlugin].getName)
75 |
76 | case _ =>
77 | ConfigContainer :+ ("spark.yarn.dist.archives" ->
78 | "hdfs:///deploy/config/async-profiler/async-profiler.zip#async-profiler")
79 | ConfigContainer :+ ("spark.yarn.dist.files" -> profileShell)
80 | ConfigContainer :+ ("spark.executor.extraLibraryPath" -> "./async-profiler/build/")
81 | ConfigContainer :+ ("spark.executor.plugins" -> classOf[AsyncProfilePlugin].getName)
82 | }
83 | }
84 |
85 | // 如果日期参数为空,时间设置为上一个执行周期
86 | if (startDate != None && endDate != None) {
87 | batchTimesOpt = ConfigContainer.get("period") match {
88 | case "minute" =>
89 | val rangeSize = ChronoUnit.MINUTES.between(startDate.get, endDate.get)
90 | Some(Range.inclusive(0, rangeSize.toInt, dateRangeStep).map(i => startDate.get.plusMinutes(i)))
91 | case "hour" | "hourly" =>
92 | val rangeSize = ChronoUnit.HOURS.between(startDate.get, endDate.get)
93 | Some(Range.inclusive(0, rangeSize.toInt, dateRangeStep).map(i => startDate.get.plusHours(i)))
94 | case "day" | "daily" =>
95 | val rangeSize = ChronoUnit.DAYS.between(startDate.get, endDate.get)
96 | Some(Range.inclusive(0, rangeSize.toInt, dateRangeStep).map(i => startDate.get.plusDays(i)))
97 | case "month" =>
98 | val rangeSize = ChronoUnit.MONTHS.between(startDate.get, endDate.get)
99 | Some(Range.inclusive(0, rangeSize.toInt, dateRangeStep).map(i => startDate.get.plusMonths(i)))
100 | }
101 | }
102 | if (batchTimesOpt == None) {
103 | val defaultBatchTime = {
104 | ConfigContainer.get("period") match {
105 | case "minute" =>
106 | val dt = LocalDateTime.now.minusMinutes(1)
107 | LocalDateTime.of(dt.getYear, dt.getMonth, dt.getDayOfMonth,
108 | dt.getHour, dt.getMinute, 0)
109 | case "hour" =>
110 | val dt = LocalDateTime.now.minusHours(1)
111 | LocalDateTime.of(dt.getYear, dt.getMonth, dt.getDayOfMonth, dt.getHour, 0, 0)
112 | case "day" =>
113 | val dt = LocalDateTime.now.minusDays(1)
114 | LocalDateTime.of(dt.getYear, dt.getMonth, dt.getDayOfMonth, 0, 0, 0)
115 | case "month" =>
116 | val dt = LocalDateTime.now.minusMonths(1)
117 | LocalDateTime.of(dt.getYear, dt.getMonth, 1, 0, 0, 0)
118 | }
119 | }
120 | batchTimesOpt = Some(Seq(defaultBatchTime))
121 | }
122 | }
123 | }
124 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/ArrayValueCollector.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
21 | import org.apache.spark.sql.util.Logging
22 | import scala.collection.mutable.ArrayBuffer
23 |
24 | import org.apache.spark.sql.runner.container.CollectorContainer
25 |
26 | /**
27 | * @author kun.wan,
28 | * @date 2021-03-08.
29 | */
30 | case class ArrayValueCollector(name: String, columnName: String)
31 | extends DataCallBack with Logging {
32 |
33 | val array = ArrayBuffer[Any]()
34 |
35 | override def next(row: GenericRowWithSchema): Unit = {
36 | array += row.get(row.schema.fieldIndex(columnName))
37 | }
38 |
39 | override def close(): Unit = {
40 | CollectorContainer :+ (name -> array.toArray)
41 | }
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/DataCallBack.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
21 | import org.apache.spark.sql.types.StructType
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2021-02-20.
26 | */
27 | trait DataCallBack {
28 |
29 | var skipEmpty = true
30 |
31 | def init(schema: StructType): Unit = {}
32 |
33 | def next(row: GenericRowWithSchema): Unit
34 |
35 | def close(): Unit
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/DataCallBackFactory.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.util.Logging
21 |
22 | import scala.collection.mutable.ArrayBuffer
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2021-03-08.
27 | */
28 | object DataCallBackFactory extends Logging {
29 |
30 | val callBacks: ThreadLocal[ArrayBuffer[DataCallBack]] =
31 | new ThreadLocal[ArrayBuffer[DataCallBack]] {
32 | override def initialValue(): ArrayBuffer[DataCallBack] = ArrayBuffer[DataCallBack]()
33 | }
34 |
35 | def registerDataCallBack(dataCallBack: DataCallBack): Unit = {
36 | logInfo(s"add new data call back:\n$dataCallBack")
37 | callBacks.get() += dataCallBack
38 | }
39 |
40 | def clearDataCallBack(): Unit = callBacks.get().clear()
41 |
42 | def consumeResult(qr: QueryResult): Unit = {
43 | val iterator = qr.iterator
44 | while (iterator.hasNext) {
45 | iterator.next()
46 | }
47 | }
48 | }
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/DataCheckCallBack.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
21 | import org.apache.spark.sql.util.{DQUtil, Logging}
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2021-03-08.
26 | */
27 | case class DataCheckCallBack(title: String,
28 | originColumn: String,
29 | checkResultColumn: String,
30 | expression: String)
31 | extends DataCallBack with Logging {
32 |
33 | override def next(row: GenericRowWithSchema): Unit = {
34 | val value: Any = row.get(row.schema.fieldIndex(originColumn))
35 | val checkResult: Boolean = row.getAs(checkResultColumn)
36 | val messages =
37 | Seq(title,
38 | s"数据检查${if (checkResult) "正常" else "异常"}",
39 | s"检查条件: $expression",
40 | s"实际值 $value ${if (!checkResult) "不" else ""}满足条件!")
41 |
42 | logInfo(messages.mkString("\n"))
43 | }
44 |
45 | override def close(): Unit = {}
46 | }
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/EmailSink.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import java.util.Properties
21 |
22 | import javax.activation.DataHandler
23 | import javax.mail.internet.{InternetAddress, MimeBodyPart, MimeMessage, MimeMultipart}
24 | import javax.mail.{Message, Session}
25 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
26 | import org.apache.spark.sql.util.ConfigUtil
27 |
28 | import scala.collection.mutable.ArrayBuffer
29 |
30 | case class EmailSink(name: String, config: Map[String, String]) extends Sink {
31 |
32 | // email邮件服务器参数
33 | val hostName = config.getOrElse(
34 | "email.hostname",
35 | throw new IllegalArgumentException("config email.hostname is needed.")
36 | )
37 | val userName = config.getOrElse(
38 | "email.username",
39 | throw new IllegalArgumentException("config email.username is needed.")
40 | )
41 | val password = config.getOrElse(
42 | "email.password",
43 | throw new IllegalArgumentException("config email.password is needed.")
44 | )
45 | val from = config.getOrElse(
46 | "email.from",
47 | throw new IllegalArgumentException("config email.from is needed.")
48 | )
49 |
50 | // email内容构建参数
51 | val names = ConfigUtil.trimConfigArray(
52 | config.getOrElse(
53 | s"$name.columns",
54 | throw new IllegalArgumentException(s"config $name.columns is needed.")
55 | ),
56 | ","
57 | )
58 | val columnNames = ConfigUtil.trimConfigArray(
59 | config.getOrElse(
60 | s"$name.columnNames",
61 | throw new IllegalArgumentException(s"config $name.columnNames is needed.")
62 | ),
63 | ","
64 | )
65 | val to = config.getOrElse(
66 | s"$name.email-to",
67 | throw new IllegalArgumentException(s"config $name.email-to is needed.")
68 | )
69 |
70 | val cc = config.getOrElse(s"$name.email-cc", "")
71 |
72 | val emailPattern = EmailSink.generateTitle(names)
73 | val emailColumnName = EmailSink.generateTitle(columnNames)
74 | val emailTemplate = config.getOrElse(
75 | s"$name.email-template",
76 | s""""""
77 | )
78 | val csvPattern = columnNames
79 | val subject = envName + "环境:" + config.getOrElse(s"$name.subject", "no subject")
80 | val attachedFileName = config.getOrElse("email-attach-filename", subject)
81 |
82 | val emailContent = new ArrayBuffer[String]()
83 | val csvContentBuffer = new ArrayBuffer[String]()
84 | emailContent.append(emailColumnName)
85 | csvContentBuffer.append(columnNames)
86 |
87 | var i = 0
88 |
89 | override def next(row: GenericRowWithSchema): Unit = {
90 | if (i < rowLimit) {
91 | emailContent.append(parsePattern(emailPattern, row))
92 | i = i + 1
93 | }
94 | csvContentBuffer.append(parsePattern(names, row))
95 |
96 | }
97 |
98 | override def close(): Unit = {
99 | val htmlContent = emailTemplate.format(emailContent.mkString("\n"))
100 | val csvContent = csvContentBuffer.mkString("\n")
101 |
102 | // 邮件发送
103 | val properties = new Properties()
104 | properties.put("mail.transport.protocol", "smtp")
105 | properties.put("mail.smtp.host", hostName)
106 | properties.put("mail.smtp.port", "465")
107 | properties.put(
108 | "mail.smtp.socketFactory.class",
109 | "javax.net.ssl.SSLSocketFactory"
110 | )
111 | properties.put("mail.smtp.auth", "true")
112 | properties.put("mail.smtp.ssl.enable", "true")
113 |
114 | val session = Session.getInstance(properties)
115 | val message = new MimeMessage(session)
116 | message.setFrom(new InternetAddress(from, userName))
117 | message.addRecipients(Message.RecipientType.TO, to)
118 | message.addRecipients(Message.RecipientType.CC, cc)
119 | message.setSubject(subject)
120 | val multipart = new MimeMultipart()
121 | val contentPart = new MimeBodyPart()
122 | contentPart.setContent(htmlContent, "text/html;charset=UTF-8")
123 | multipart.addBodyPart(contentPart)
124 | val mdp = new MimeBodyPart()
125 | val dh = new DataHandler(
126 | new String(Array[Byte](0xEF.toByte, 0xBB.toByte, 0xBF.toByte)) + csvContent,
127 | "text/plain;charset=UTF-8"
128 | )
129 | mdp.setFileName(attachedFileName + ".csv")
130 | mdp.setDataHandler(dh)
131 | multipart.addBodyPart(mdp)
132 | message.setContent(multipart)
133 | val transport = session.getTransport
134 | transport.connect(from, password)
135 | transport.sendMessage(message, message.getAllRecipients)
136 | transport.close
137 | logInfo(s"Email sink finished")
138 | }
139 |
140 | override def toString: String = {
141 | s"EmailSink(name = $name, from = $from, to = $to, cc = $cc, " +
142 | s"names = $names, columnNames = $columnNames)"
143 | }
144 |
145 | }
146 |
147 | object EmailSink {
148 | def generateTitle(columnName: String): String = {
149 | val columnTitle = columnName.split(",")
150 | .map(col => s"${ConfigUtil.trimConfigValue(col)} | ")
151 | .mkString
152 |
153 | s"${columnTitle}
"
154 | }
155 | }
156 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/QueryResult.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
21 | import org.apache.spark.sql.types.StructType
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2019-12-05.
26 | */
27 | case class QueryResult(schema: StructType, iterator: Iterator[GenericRowWithSchema])
28 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/SingleValueCollector.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
21 | import org.apache.spark.sql.util.Logging
22 | import org.apache.spark.sql.runner.container.CollectorContainer
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2021-03-08.
27 | */
28 | case class SingleValueCollector(name: String, columnName: String)
29 | extends DataCallBack with Logging {
30 |
31 | var value: Any = _
32 |
33 | override def next(row: GenericRowWithSchema): Unit = {
34 | value = row.get(row.schema.fieldIndex(columnName))
35 | }
36 |
37 | override def close(): Unit = {
38 | CollectorContainer :+ (name -> value)
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/callback/Sink.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.callback
19 |
20 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
21 | import org.apache.spark.sql.runner.metrics.ReporterTrait
22 | import org.apache.spark.sql.util.{Logging, SystemVariables}
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2019-12-12.
27 | */
28 | trait Sink extends DataCallBack with ReporterTrait with Logging {
29 |
30 | val config: Map[String, String]
31 |
32 | val envName = config.getOrElse(SystemVariables.ENV, "UNKNOWN")
33 |
34 | var resultRows: Long = 0
35 |
36 | val defaultRowLimit: String = "1000"
37 |
38 | val rowLimit: Int = config.getOrElse("rowLimit", defaultRowLimit).toInt
39 |
40 | def parsePattern(pattern: String, row: GenericRowWithSchema): String = {
41 | val sb = new StringBuilder
42 | var startIdx = -1
43 | for ((c, idx) <- pattern.zipWithIndex) {
44 | if (c == '{' && startIdx < 0) {
45 | startIdx = idx
46 | } else if (c == '}' && startIdx >= 0) {
47 | val variableName = pattern.substring(startIdx + 1, idx)
48 | val fieldValue: AnyRef = row.getAs(variableName)
49 | sb.append(fieldValue)
50 | startIdx = -1
51 | } else if (startIdx < 0) {
52 | sb.append(c)
53 | }
54 | }
55 |
56 | sb.toString
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/BaseCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | import org.apache.spark.sql.util.{Logging, StringUtil}
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2021-02-23.
25 | */
26 | abstract class BaseCommand(sourceChars: SourceChars) extends Logging {
27 |
28 | val escapeMapping: Map[Array[Char], Array[Char]] = Map(
29 | Array('\"') -> Array('\"'),
30 | Array(''') -> Array('''),
31 | Array('(') -> Array(')'),
32 | )
33 |
34 | val chars = sourceChars.chars
35 |
36 | def readTo(char: Char): (String, Int, Int) = readTo(Array(char))
37 |
38 | def readTo(target: String): (String, Int, Int) = readTo(target.toCharArray)
39 |
40 | private def readTo(target: Array[Char]): (String, Int, Int) = {
41 | val len = target.length
42 | var index = -1
43 | var i = sourceChars.start
44 | while (i < sourceChars.end && index < 0) {
45 | // deal with escape char array
46 | for ((startChars, endChars) <- escapeMapping if startChars.intersect(target).size == 0) {
47 | val slen = startChars.length
48 | if (i > slen && chars(i - slen) != '\\') {
49 | if (chars.slice(i - slen + 1, i + 1) sameElements startChars) {
50 | val elen = endChars.length
51 | i = i + elen
52 | while (i < sourceChars.end && (chars(i - elen) == '\\' ||
53 | !(chars.slice(i - elen + 1, i + 1) sameElements endChars))) {
54 | i = i + 1
55 | }
56 | }
57 | }
58 | }
59 |
60 | if (chars.slice(i - len + 1, i + 1) sameElements target) {
61 | index = i + 1 - len
62 | } else {
63 | i = i + 1
64 | }
65 | }
66 | assert(index >= 0, s"Parse Job Error!\n${new String(chars.slice(sourceChars.start, sourceChars.end))}")
67 | val res =
68 | StringUtil.escapeStringValue(new String(chars.slice(sourceChars.start, index)))
69 | val nextStart = i + 1
70 | (res, index, nextStart)
71 | }
72 |
73 | def run(): Unit = {
74 | throw new Exception("Unsupport Command!")
75 | }
76 |
77 | def dryrun(): Unit = run()
78 | }
79 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/BlockCommentCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-24.
23 | */
24 | case class BlockCommentCommand(sourceChars: SourceChars)
25 | extends BaseCommand(sourceChars) {
26 |
27 | def this(sourceString: String) {
28 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
29 | }
30 |
31 | sourceChars.start = sourceChars.start + CommandFactory.blockCommentPrefix.length
32 |
33 | val (comment, _, nextStart) = readTo("*/")
34 | sourceChars.start = nextStart
35 |
36 | override def toString: String = s"/**${comment}*/"
37 |
38 | override def run(): Unit = {
39 | logInfo(s"\n${this.toString}")
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/CommandFactory.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | import scala.collection.mutable.ArrayBuffer
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2021-02-24.
25 | */
26 | object CommandFactory {
27 | val sqlPrefix = ""
28 | val lineCommentPrefix = "--"
29 | val blockCommentPrefix = "/**"
30 | val setPrefix = "!set"
31 |
32 | val ifPrefix = "!if"
33 | val elsePrefix = "!else"
34 | val fiPrefix = "!fi"
35 |
36 | def skipEmptyChars(sourceChars: SourceChars): Unit = {
37 | while (sourceChars.start < sourceChars.chars.length &&
38 | Character.isWhitespace(sourceChars.chars.charAt(sourceChars.start))) {
39 | sourceChars.start = sourceChars.start + 1
40 | }
41 | }
42 |
43 | /**
44 | * 使用探测法,找到下一条Command
45 | * @param sourceChars
46 | */
47 | def nextCommand(sourceChars: SourceChars): BaseCommand = {
48 | skipEmptyChars(sourceChars)
49 | val commandPrefix: Option[String] =
50 | Seq(
51 | lineCommentPrefix,
52 | blockCommentPrefix,
53 | setPrefix,
54 | ifPrefix,
55 | elsePrefix,
56 | fiPrefix
57 | ) find { prefix =>
58 | val len = prefix.length
59 | if (sourceChars.start + len >= sourceChars.end) {
60 | false
61 | }
62 | else {
63 | prefix.equalsIgnoreCase(new String(sourceChars.chars, sourceChars.start, len))
64 | }
65 | }
66 |
67 | val cmd =
68 | commandPrefix match {
69 | case Some(prefix) if prefix == lineCommentPrefix => LineCommentCommand(sourceChars)
70 | case Some(prefix) if prefix == blockCommentPrefix => BlockCommentCommand(sourceChars)
71 | case Some(prefix) if prefix == setPrefix => SetCommand(sourceChars)
72 | case Some(prefix) if prefix == ifPrefix => IfCommand(sourceChars)
73 | case Some(prefix) if prefix == elsePrefix => ElseCommand(sourceChars)
74 | case Some(prefix) if prefix == fiPrefix => FiCommand(sourceChars)
75 | case None => SqlCommand(sourceChars)
76 | }
77 | skipEmptyChars(sourceChars)
78 | cmd
79 | }
80 |
81 | def parseCommands(source: String): Array[BaseCommand] = {
82 | val commands = ArrayBuffer[BaseCommand]()
83 | val sourceChars = SourceChars(source.toCharArray, 0, source.length)
84 |
85 | while (sourceChars.start < source.length) {
86 | val command = nextCommand(sourceChars)
87 | commands += command
88 | }
89 | commands.toArray
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/ElseCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-24.
23 | */
24 | case class ElseCommand(sourceChars: SourceChars)
25 | extends BaseCommand(sourceChars) {
26 |
27 | def this(sourceString: String) {
28 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
29 | }
30 |
31 | sourceChars.start = sourceChars.start + CommandFactory.elsePrefix.length
32 | }
33 |
34 |
35 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/FiCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-24.
23 | */
24 | case class FiCommand(sourceChars: SourceChars)
25 | extends BaseCommand(sourceChars) {
26 |
27 | def this(sourceString: String) {
28 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
29 | }
30 |
31 | sourceChars.start = sourceChars.start + CommandFactory.fiPrefix.length
32 | }
33 |
34 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/IfCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
21 | import org.apache.spark.sql.catalyst.expressions.{BinaryExpression, Cast, Literal}
22 | import org.apache.spark.sql.catalyst.parser.CatalystSqlParser
23 | import org.apache.spark.sql.types.DataType
24 | import org.apache.sql.runner.container.ConfigContainer
25 | import scala.collection.mutable
26 | import scala.collection.mutable.ArrayBuffer
27 |
28 | import org.apache.spark.sql.runner.config.VariableSubstitution
29 | import org.apache.spark.sql.runner.container.{CollectorContainer, ConfigContainer}
30 |
31 | /**
32 | * @author kun.wan,
33 | * @date 2021-02-24.
34 | */
35 | case class IfCommand(sourceChars: SourceChars)
36 | extends BaseCommand(sourceChars) {
37 |
38 | def this(sourceString: String) {
39 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
40 | }
41 |
42 | sourceChars.start = sourceChars.start + CommandFactory.ifPrefix.length
43 |
44 | val (_, _, nextStart1) = readTo("(")
45 | sourceChars.start = nextStart1
46 | val (ifConditionString, _, nextStart2) = readTo(")")
47 | sourceChars.start = nextStart2
48 |
49 | val ifCommands = new ArrayBuffer[BaseCommand]()
50 | val elseCommands = new ArrayBuffer[BaseCommand]()
51 |
52 | var parseStage = "if"
53 | while (parseStage != "fi") {
54 | val cmd = CommandFactory.nextCommand(sourceChars)
55 | cmd match {
56 | case _: FiCommand =>
57 | parseStage = "fi"
58 |
59 | case _: ElseCommand =>
60 | parseStage = "else"
61 |
62 | case _ =>
63 | parseStage match {
64 | case "if" =>
65 | ifCommands += cmd
66 | case "else" =>
67 | elseCommands += cmd
68 | }
69 | }
70 | }
71 |
72 | override def toString: String = {
73 | val elseString =
74 | if (elseCommands.size > 0) {
75 | s"""\n!else
76 | |${elseCommands.mkString("\n")}
77 | |""".stripMargin
78 | } else {
79 | ""
80 | }
81 |
82 | s"""!if ($ifConditionString)
83 | |${ifCommands.mkString("\n") + elseString}
84 | |!fi
85 | |""".stripMargin
86 |
87 | }
88 |
89 | override def run(): Unit = {
90 | doRun(isDryRun = false)
91 | }
92 |
93 | override def dryrun(): Unit = {
94 | doRun(isDryRun = true)
95 | }
96 |
97 | def doRun(isDryRun: Boolean): Unit = {
98 | VariableSubstitution.withSubstitution { substitution =>
99 | val dataTypeMap = mutable.Map[String, DataType]()
100 |
101 | val originExpr = CatalystSqlParser.parseExpression(substitution.substitute(ifConditionString))
102 |
103 | var lastMapSize = -1
104 | while (lastMapSize != dataTypeMap.size) {
105 | lastMapSize = dataTypeMap.size
106 | originExpr transform {
107 | case expr: BinaryExpression =>
108 | (expr.left, expr.right) match {
109 | case (attr: UnresolvedAttribute, literal: Literal) =>
110 | dataTypeMap += (attr.name -> literal.dataType)
111 |
112 | case (literal: Literal, attr: UnresolvedAttribute) =>
113 | dataTypeMap += (attr.name -> literal.dataType)
114 |
115 | case (attr1: UnresolvedAttribute, attr2: UnresolvedAttribute) =>
116 | if (dataTypeMap.contains(attr1.name)) {
117 | dataTypeMap += (attr2.name -> dataTypeMap(attr1.name))
118 | }
119 | if (dataTypeMap.contains(attr2.name)) {
120 | dataTypeMap += (attr1.name -> dataTypeMap(attr2.name))
121 | }
122 |
123 | case (_, _) =>
124 | }
125 | expr
126 |
127 | case e => e
128 | }
129 | }
130 |
131 | val ifCondition =
132 | originExpr transform {
133 | case e: UnresolvedAttribute =>
134 | val dataType = dataTypeMap(e.name)
135 | val literal = Literal(CollectorContainer.getOrElse(e.name, ConfigContainer.get(e.name)))
136 | if (dataType == literal.dataType) {
137 | literal
138 | } else {
139 | Cast(literal, dataType)
140 | }
141 |
142 | case e => e
143 | }
144 |
145 | val ret = ifCondition.eval().asInstanceOf[Boolean]
146 | if (ret) {
147 | ifCommands.foreach(cmd => if (isDryRun) cmd.run() else cmd.dryrun())
148 | } else {
149 | elseCommands.foreach(cmd => if (isDryRun) cmd.run() else cmd.dryrun())
150 | }
151 | }
152 |
153 | }
154 | }
155 |
156 |
157 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/LineCommentCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-24.
23 | */
24 | case class LineCommentCommand(sourceChars: SourceChars)
25 | extends BaseCommand(sourceChars) {
26 |
27 | def this(sourceString: String) {
28 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
29 | }
30 |
31 | sourceChars.start = sourceChars.start + CommandFactory.lineCommentPrefix.length
32 |
33 | val (comment, _, nextStart) = readTo('\n')
34 | sourceChars.start = nextStart
35 |
36 | override def toString: String = s"${CommandFactory.lineCommentPrefix} ${comment}"
37 |
38 | override def run(): Unit = {
39 | logInfo(s"\n${this.toString}")
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/SetCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | import org.apache.spark.sql.runner.config.VariableSubstitution
21 | import org.apache.spark.sql.runner.container.ConfigContainer
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2021-02-24.
26 | */
27 | case class SetCommand(sourceChars: SourceChars) extends BaseCommand(sourceChars) {
28 |
29 | def this(sourceString: String) {
30 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
31 | }
32 |
33 | sourceChars.start = sourceChars.start + CommandFactory.setPrefix.length
34 |
35 | val (key, _, valueStart) = readTo('=')
36 | sourceChars.start = valueStart
37 |
38 | val (value, _, nextStart) = readTo(';')
39 | sourceChars.start = nextStart
40 |
41 | override def toString: String = s"${CommandFactory.setPrefix} $key = $value;"
42 |
43 | override def run(): Unit = {
44 | val substitutionValue =
45 | VariableSubstitution.withSubstitution { substitution =>
46 | substitution.substitute(value)
47 | }
48 |
49 | ConfigContainer :+ (key -> substitutionValue)
50 | logInfo(s"\n${CommandFactory.setPrefix} $key = $substitutionValue;")
51 | }
52 | }
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/SourceChars.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-24.
23 | */
24 | case class SourceChars(chars: Array[Char], var start: Int, var end: Int)
25 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/command/SqlCommand.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.hive.SparkSqlRunner
22 | import org.apache.spark.sql.util.{Logging, SystemVariables}
23 | import scala.collection.JavaConverters._
24 |
25 | import org.apache.spark.sql.runner.callback.DataCallBackFactory
26 | import org.apache.spark.sql.runner.config.VariableSubstitution
27 | import org.apache.spark.sql.runner.container.ConfigContainer
28 |
29 | /**
30 | * @author kun.wan,
31 | * @date 2021-02-24.
32 | */
33 | case class SqlCommand(sourceChars: SourceChars)
34 | extends BaseCommand(sourceChars) {
35 |
36 | def this(sourceString: String) {
37 | this(SourceChars(sourceString.toCharArray, 0, sourceString.length))
38 | }
39 |
40 | sourceChars.start = sourceChars.start + CommandFactory.sqlPrefix.length
41 |
42 | val (sql, _, nextStart) = readTo(";")
43 | sourceChars.start = nextStart
44 |
45 | override def toString: String = s"$sql;"
46 |
47 | override def run(): Unit = {
48 | doRun(isDryRun = false)
49 | }
50 |
51 | override def dryrun(): Unit = {
52 | doRun(isDryRun = true)
53 | }
54 |
55 | def doRun(isDryRun: Boolean): Unit = {
56 | VariableSubstitution.withSubstitution { substitution =>
57 | // 这里需要注意参数的还原
58 | val sqlText = substitution.substitute(sql)
59 | logInfo(s"sql content:\n$sqlText")
60 | if (!isDryRun) {
61 | DataCallBackFactory.consumeResult(SqlCommand.sparkSqlRunner.run(sqlText))
62 | }
63 | }
64 | }
65 | }
66 |
67 | object SqlCommand extends Logging {
68 |
69 | implicit lazy val sparkSession: SparkSession =
70 | SparkSqlRunner.sparkSession(
71 | Some(ConfigContainer.getOrElse(SystemVariables.JOB_NAME, "Unknown Job Name")))
72 |
73 | lazy val sparkSqlRunner = new SparkSqlRunner
74 |
75 | // val catalogEventListener = InsightCatalogEventListener()
76 | var sqlContext = sparkSession.sqlContext
77 |
78 | // SparkSession.active.sharedState.externalCatalog.addListener(catalogEventListener)
79 |
80 | /** Cleans up and shuts down the Spark SQL environments. */
81 | def stop() {
82 | logDebug("Clear SparkSession and SparkContext")
83 | // TODO
84 | // catalogEventListener.stop()
85 | if (sqlContext != null) {
86 | sqlContext = null
87 | }
88 | if (sparkSession != null) {
89 | sparkSession.stop()
90 | }
91 | SparkSession.clearActiveSession
92 |
93 | val clazz = Class.forName("java.lang.ApplicationShutdownHooks")
94 | val field = clazz.getDeclaredField("hooks")
95 | field.setAccessible(true)
96 | val inheritableThreadLocalsField = classOf[Thread].getDeclaredField("inheritableThreadLocals")
97 | inheritableThreadLocalsField.setAccessible(true)
98 |
99 | val hooks = field.get(clazz).asInstanceOf[java.util.IdentityHashMap[Thread, Thread]].asScala
100 | hooks.keys.map(inheritableThreadLocalsField.set(_, null))
101 | }
102 |
103 | def simpleTypeName(typeName: String): String = {
104 | val i = typeName.indexOf("(")
105 | if (i > 0) {
106 | typeName.substring(0, i)
107 | } else {
108 | typeName
109 | }
110 | }
111 | }
112 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/config/ApolloClient.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.config
19 |
20 | import com.ctrip.framework.apollo.{Config, ConfigService}
21 | import org.apache.commons.lang3.StringUtils
22 |
23 | import org.apache.spark.sql.util.{Logging, SystemVariables}
24 | import scala.collection.JavaConverters._
25 |
26 | import org.apache.spark.sql.runner.container.ConfigContainer
27 |
28 | /**
29 | * @author kun.wan,
30 | * @date 2020-03-04.
31 | */
32 | case class ApolloClient(namespace: String) extends Logging {
33 |
34 | lazy val config: Config = ConfigService.getConfig(namespace)
35 |
36 | def getProperty(key: String, defaultValue: String): String = {
37 | config.getProperty(key, defaultValue)
38 | }
39 | }
40 |
41 | object ApolloClient extends Logging {
42 |
43 | /**
44 | * 去Apollo 获取参数太慢了
45 | *
46 | * @return
47 | */
48 | def pollVariablesFromApollo(): Unit = {
49 | if (StringUtils.isNotBlank(System.getenv(SystemVariables.APOLLO_META))) {
50 | val appId =
51 | ConfigContainer.getOrElse("apollo.app.id",
52 | ConfigContainer.getOrElse("appId",
53 | SystemVariables.DEFAULT_APOLLO_ID))
54 | System.setProperty("app.id", appId)
55 |
56 | val systemClient = ApolloClient("1.above-board")
57 |
58 | systemClient.config.getPropertyNames
59 | .toArray.map { case key: String =>
60 | val value = systemClient.getProperty(key, "")
61 | val encryptedValue = if (key.toLowerCase.contains("password")) "******" else value
62 | logInfo(s"pull variable from apollo, $key = $encryptedValue)")
63 | ConfigContainer :+ (key -> value)
64 | }
65 |
66 | if (ConfigContainer.contains("apollo.namespace")) {
67 | val appClient = ApolloClient(ConfigContainer.get("apollo.namespace"))
68 | appClient.config.getPropertyNames.asScala.map { case key: String =>
69 | val value = appClient.getProperty(key, "")
70 | val encryptedValue = if (key.toLowerCase.contains("password")) "******" else value
71 | logInfo(s"pull variable from apollo, $key = $encryptedValue")
72 | ConfigContainer :+ (key -> value)
73 | }
74 | }
75 | }
76 | }
77 | }
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/container/CollectorContainer.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.container
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-03-08.
23 | */
24 | object CollectorContainer extends ContainerTrait[String, Any]
25 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/container/ConfigContainer.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.container
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2020-03-06.
23 | */
24 | object ConfigContainer extends ContainerTrait[String, String]
25 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/container/ContainerTrait.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.container
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-03-08.
23 | */
24 | class ContainerTrait[A, B] {
25 |
26 | /**
27 | * 这里设计为 ThreadLocal 变量,用于支持多线程运行多job时,维护各自的配置信息.
28 | * 其他线程如果要维护自己的配置信息,从valueMap拷贝出去进行自己维护
29 | */
30 | val valueMap =
31 | new InheritableThreadLocal[Map[A, B]]() {
32 | override def initialValue(): Map[A, B] = Map[A, B]()
33 | }
34 |
35 | /**
36 | * 原有map和新的map合并,如果key冲突,保留新的map值
37 | *
38 | * @param map
39 | */
40 | def ++(map: Map[A, B]): Unit = {
41 | valueMap.set(valueMap.get() ++ map)
42 | }
43 |
44 | /**
45 | * 向map中加入新值,如果key已经存在,使用新值覆盖
46 | * @param kv
47 | */
48 | def :+(kv: (A, B)): Unit = {
49 | valueMap.set(valueMap.get() + kv)
50 | }
51 |
52 | def getOrElse(key: A, default: => B): B = valueMap.get().getOrElse(key, default)
53 |
54 | def get(key: A): B = valueMap.get()(key)
55 |
56 | def getOption(key: A): Option[B] = valueMap.get().get(key)
57 |
58 | def contains(key: A): Boolean = valueMap.get().contains(key)
59 |
60 | def -(key: A): Unit = {
61 | if (valueMap.get().contains(key)) {
62 | valueMap.set(valueMap.get() - key)
63 | }
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/metrics/GraphiteReporter.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.metrics
19 |
20 | import java.io.PrintWriter
21 | import java.net.Socket
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2020-02-26.
26 | */
27 | case class GraphiteReporter(host: String, port: Int) extends AutoCloseable with Serializable {
28 |
29 | @transient val socket: Socket = new Socket(host, port)
30 | @transient val out: PrintWriter = new PrintWriter(socket.getOutputStream, true)
31 |
32 | def reportMetrics(key: String, value: Number): Unit = {
33 | val timestamp = System.currentTimeMillis() / 1000
34 | out.printf(s"${key} ${value} ${timestamp}%n")
35 | }
36 |
37 | override def close(): Unit = {
38 | if (out != null) {
39 | out.close()
40 | }
41 | if (socket != null) {
42 | socket.close()
43 | }
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/runner/metrics/ReporterTrait.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.metrics
19 |
20 | import org.apache.spark.sql.runner.container.ConfigContainer
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2020-02-26.
25 | */
26 | trait ReporterTrait {
27 |
28 | lazy val reporter: Option[GraphiteReporter] = {
29 | val enableMetrics = ConfigContainer.getOrElse("metrics.enable", "true").toBoolean
30 | if (enableMetrics && ConfigContainer.contains("graphite.host")) {
31 | val graphiteHost = ConfigContainer.get("graphite.host")
32 | val graphitePort = ConfigContainer.getOrElse("graphite.port", "2003").toInt
33 | Some(GraphiteReporter(graphiteHost, graphitePort))
34 | } else {
35 | None
36 | }
37 | }
38 |
39 | def reportMetrics(key: String, value: Number): Unit =
40 | reporter.map(_.reportMetrics(key, value))
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/udf/DateFormatUDF.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.udf
19 |
20 | import java.time.format.DateTimeFormatter
21 | import java.time.format.DateTimeFormatter._
22 |
23 | import org.sparkproject.guava.cache.CacheLoader
24 | import org.sparkproject.guava.cache.CacheBuilder
25 |
26 | /**
27 | * @author kun.wan,
28 | * @date 2020-07-20.
29 | */
30 | object DateFormatUDF {
31 |
32 | lazy val cache = CacheBuilder.newBuilder()
33 | .maximumSize(100)
34 | .build(new CacheLoader[String, DateTimeFormatter] {
35 | override def load(key: String): DateTimeFormatter = ofPattern(key)
36 | })
37 |
38 | implicit def toFormatter(pattern: String): DateTimeFormatter = cache.get(pattern)
39 |
40 | // function name : transform_date
41 | val transform_date_udf: (String, String, String) => String = {
42 | (dt: String, srcPattern: String, dstPattern: String) =>
43 | toFormatter(dstPattern).format(srcPattern.parse(dt))
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/udf/UDFFactory.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.udf
19 |
20 | import java.lang.annotation.Annotation
21 |
22 | import org.apache.spark.sql.SparkSession
23 | import org.apache.spark.sql.runner.container.ConfigContainer
24 | import org.apache.spark.sql.types.DataType
25 | import org.apache.spark.sql.util.Logging
26 |
27 | /**
28 | * @author kun.wan,
29 | * @date 2020-07-20.
30 | */
31 | object UDFFactory extends Logging {
32 |
33 | val EXTERNAL_UDFS = "spark.sql.externalUdfClasses"
34 |
35 | def registerExternalUDFs(spark: SparkSession): Unit = {
36 | spark.udf.register("transform_date", DateFormatUDF.transform_date_udf)
37 |
38 | ConfigContainer.getOption(EXTERNAL_UDFS).map {
39 | case udfClasses: String =>
40 | spark.sessionState.resourceLoader.addJar("hdfs:///deploy/config/biz-udfs-1.0.jar")
41 |
42 | val annotationClazz =
43 | Class.forName("org.apachetech.udfs.annotations.UDFDescription",
44 | true,
45 | spark.sharedState.jarClassLoader)
46 | .asInstanceOf[Class[_ <: Annotation]]
47 | val nameMethod = annotationClazz.getMethod("name")
48 | val returnTypeMethod = annotationClazz.getMethod("returnType")
49 |
50 | udfClasses.split(",").map(_.trim).foreach(udfClass => {
51 | val clazz = Class.forName(udfClass, true, spark.sharedState.jarClassLoader)
52 | val annotation = clazz.getAnnotation(annotationClazz)
53 | val name: String = nameMethod.invoke(annotation).asInstanceOf[String]
54 | val returnType: String = returnTypeMethod.invoke(annotation).asInstanceOf[String]
55 |
56 | logInfo(s"register udf ${name} with class ${udfClass}")
57 | spark.udf.registerJava(name, udfClass, DataType.fromDDL(returnType))
58 | })
59 | }
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/ConfigUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.apache.spark.sql.SparkSession
21 | import org.apache.spark.sql.runner.container.ConfigContainer
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2020-02-17.
26 | */
27 | object ConfigUtil {
28 |
29 | def ltrim(s: String): String = s.replaceAll("^\\s+", "")
30 |
31 | def rtrim(s: String): String = s.replaceAll("\\s+$", "")
32 |
33 | def trimConfigValue(configValue: String): String = rtrim(ltrim(configValue))
34 |
35 |
36 | def trimConfigArray(configValue: String, separator: String): String = {
37 | configValue.split(separator)
38 | .map(trimConfigValue(_))
39 | .mkString(separator)
40 | }
41 |
42 | def withConfigs[T](configs: (String, String)*)(func: => T): T = {
43 | val spark = SparkSession.active
44 | try {
45 | configs.foreach(config => {
46 | ConfigContainer :+ (config._1 -> config._2)
47 | spark.conf.set(config._1, config._2)
48 | })
49 |
50 | func
51 | } finally {
52 | configs.foreach(config => {
53 | ConfigContainer - config._1
54 | spark.conf.unset(config._1)
55 | })
56 | }
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/DQUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.apache.spark.sql.runner.container.ConfigContainer
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2020-02-26.
25 | */
26 | object DQUtil {
27 |
28 | val serverUrl = ConfigContainer.getOrElse("dataquality.alert", "")
29 | val title = s"${ConfigContainer.getOrElse(SystemVariables.ENV, SystemVariables.DEFAULT_ENV)}数据质量检查告警"
30 | }
31 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/GenericAvroSchema.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import scala.beans.BeanProperty
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2020-02-26.
25 | */
26 | case class GenericAvroSchema(@BeanProperty name: String,
27 | @BeanProperty namespace: String,
28 | @BeanProperty fields: Array[AvroField],
29 | @BeanProperty `type`: String = "record",
30 | @BeanProperty doc: String = "")
31 |
32 | case class AvroField(@BeanProperty name: String,
33 | @BeanProperty `type`: String,
34 | @BeanProperty doc: String = "")
35 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/JdbcConnector.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import java.sql.{Connection, PreparedStatement, SQLException}
21 |
22 | import org.apache.spark.sql.catalyst.expressions.GenericRowWithSchema
23 | import org.apache.spark.sql.execution.datasources.jdbc.{JDBCOptions, JdbcUtils}
24 | import org.apache.spark.sql.types._
25 |
26 | /**
27 | * 1. 提供JDBC相关配置参数
28 | * 2. 提供JDBCOption实例 作为connect参数
29 | * 3. 提供JDBC相关操作util方法
30 | *
31 | * @author kun.wan,
32 | * @date 2019-12-11.
33 | */
34 | class JdbcConnector(config: Map[String, String]) extends Logging {
35 |
36 | val tag: String = config.getOrElse(
37 | "tag",
38 | throw new IllegalArgumentException("config tag is needed.")
39 | )
40 |
41 | /**
42 | * 1. Get ${tag.key} value from config map
43 | * 2. Return default value if or defaultValue is not empty
44 | * 3. throw parameter should be provided exception
45 | *
46 | * @param key
47 | * @param defaultValue
48 | * @return
49 | */
50 | def getJdbcConfig(key: String, defaultValue: String = ""): String = {
51 | config.get(s"$tag.$key") match {
52 | case Some(v) => v
53 | case None if defaultValue != "" => defaultValue
54 | case None => throw new Exception(s"parameter $key should be provided!")
55 | }
56 | }
57 |
58 | val url = getJdbcConfig("url")
59 | val username = getJdbcConfig("username")
60 | val password = getJdbcConfig("password")
61 | val queryTimeout = getJdbcConfig("query.timeout", "180").toInt
62 | val tableName: String = config("tableName")
63 |
64 | val jdbcConnectOption: JDBCOptions =
65 | new JDBCOptions(Map(
66 | JDBCOptions.JDBC_URL -> url,
67 | "user" -> username,
68 | "password" -> password,
69 | JDBCOptions.JDBC_TABLE_NAME -> tableName,
70 | JDBCOptions.JDBC_QUERY_TIMEOUT -> queryTimeout.toString
71 | ))
72 |
73 | def getConnection(): Connection = JdbcUtils.createConnectionFactory(jdbcConnectOption)()
74 |
75 | def closeConnection(conn: Connection): Unit = {
76 | try {
77 | if (conn != null) {
78 | conn.close()
79 | }
80 | } catch {
81 | case ex: Exception => logError("close jdbc connection error!", ex)
82 | }
83 | }
84 |
85 | def withConnection[T](body: Connection => T): T = {
86 | val conn: Connection = getConnection()
87 | try {
88 | body(conn)
89 | } catch {
90 | case ex: Exception =>
91 | logError("execute jdbc function error!", ex)
92 | throw ex
93 | } finally {
94 | closeConnection(conn)
95 | }
96 | }
97 |
98 | def getTableSchema(): StructType = {
99 | val tableSchemaOption = JdbcUtils.getSchemaOption(getConnection(), jdbcConnectOption)
100 | assert(tableSchemaOption.isDefined, s"Failed to get $tableName schema!")
101 | tableSchemaOption.get
102 | }
103 |
104 | /**
105 | * @param row 准备转换的Row数据
106 | * @param pstmt JDBC PreparedStatement
107 | * @param fields 需要转换的字段列表, pstmt在进行参数转换时的开始下标,默认为1
108 | */
109 | def rowToPreparedStatement(row: GenericRowWithSchema,
110 | pstmt: PreparedStatement,
111 | fields: Seq[StructField]): Unit = {
112 | fields.zipWithIndex.map {
113 | case (field, fieldIndex) =>
114 | field.dataType match {
115 | case _: BooleanType =>
116 | pstmt.setBoolean(fieldIndex + 1, row.getAs(field.name))
117 | case _: DoubleType =>
118 | pstmt.setDouble(fieldIndex + 1, row.getAs(field.name))
119 | case _: DecimalType =>
120 | pstmt.setBigDecimal(fieldIndex + 1, row.getAs(field.name))
121 | case _: FloatType =>
122 | pstmt.setFloat(fieldIndex + 1, row.getAs(field.name))
123 | case _: ByteType =>
124 | pstmt.setByte(fieldIndex + 1, row.getAs(field.name))
125 | case _: ShortType =>
126 | pstmt.setShort(fieldIndex + 1, row.getAs(field.name))
127 | case _: IntegerType =>
128 | pstmt.setInt(fieldIndex + 1, row.getAs(field.name))
129 | case _: LongType =>
130 | pstmt.setLong(fieldIndex + 1, row.getAs(field.name))
131 | case _: StringType =>
132 | pstmt.setString(fieldIndex + 1, row.getAs(field.name))
133 | case _: DateType =>
134 | pstmt.setDate(fieldIndex + 1, row.getAs(field.name))
135 | case _ =>
136 | throw new IllegalArgumentException(
137 | s"Unsupported type ${field.dataType}"
138 | )
139 | }
140 | }
141 | }
142 |
143 | var statementCounter: Long = 0
144 |
145 | def tryStatement[T](pstmt: PreparedStatement, row: Option[GenericRowWithSchema] = None)
146 | (body: PreparedStatement => Unit): Unit = {
147 | try {
148 | statementCounter.synchronized {
149 | if (pstmt != null) {
150 | body(pstmt)
151 | statementCounter = statementCounter + 1
152 | }
153 | if (statementCounter % 10000 == 0) {
154 | val updateCounts = pstmt.executeBatch
155 | logInfo(s"commit JDBC PreparedStatement,affected rows = ${updateCounts.length}, " +
156 | s"statement counter = ${statementCounter}")
157 | pstmt.clearParameters()
158 | }
159 | }
160 | } catch {
161 | case e: Exception =>
162 | logError(s"debug message for pstmt : ${pstmt}, row : ${row}")
163 | throw e
164 | }
165 | }
166 | }
167 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/JobIdUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import java.io.File
21 | import java.time.LocalDateTime
22 | import java.time.format.DateTimeFormatter
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2020-03-06.
27 | */
28 | object JobIdUtil {
29 |
30 | def generatorJobId(jobFile: String): String = {
31 | val ts = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"))
32 | val prefix = new File(jobFile).getName.stripSuffix(".xml")
33 | s"${prefix}-${ts}"
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/Logging.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.slf4j.{Logger, LoggerFactory}
21 |
22 | /**
23 | * Utility trait for classes that want to log data. Creates a SLF4J logger for the class and allows
24 | * logging messages at different levels using methods that only evaluate parameters lazily if the
25 | * log level is enabled.
26 | *
27 | */
28 | trait Logging {
29 | // Make the log field transient so that objects with Logging can
30 | // be serialized and used on another machine
31 | @transient private var log_ : Logger = null
32 |
33 | implicit def anyToString(any: Any): String = any.toString
34 |
35 | // Method to get the logger name for this object
36 | protected def logName = {
37 | // Ignore trailing $'s in the class names for Scala objects
38 | this.getClass.getName.stripSuffix("$")
39 | }
40 |
41 | // Method to get or create the logger for this object
42 | protected def log: Logger = {
43 | if (log_ == null) {
44 | log_ = LoggerFactory.getLogger(logName)
45 | }
46 | log_
47 | }
48 |
49 | // Log methods that take only a String
50 | protected def logInfo(msg: => String) {
51 | if (log.isInfoEnabled) log.info(msg)
52 | }
53 |
54 | protected def logDebug(msg: => String) {
55 | if (log.isDebugEnabled) log.debug(msg)
56 | }
57 |
58 | protected def logTrace(msg: => String) {
59 | if (log.isTraceEnabled) log.trace(msg)
60 | }
61 |
62 | protected def logWarning(msg: => String) {
63 | if (log.isWarnEnabled) log.warn(msg)
64 | }
65 |
66 | protected def logError(msg: => String) {
67 | if (log.isErrorEnabled) log.error(msg)
68 | }
69 |
70 | // Log methods that take Throwables (Exceptions/Errors) too
71 | protected def logInfo(msg: => String, throwable: Throwable) {
72 | if (log.isInfoEnabled) log.info(msg, throwable)
73 | }
74 |
75 | protected def logDebug(msg: => String, throwable: Throwable) {
76 | if (log.isDebugEnabled) log.debug(msg, throwable)
77 | }
78 |
79 | protected def logTrace(msg: => String, throwable: Throwable) {
80 | if (log.isTraceEnabled) log.trace(msg, throwable)
81 | }
82 |
83 | protected def logWarning(msg: => String, throwable: Throwable) {
84 | if (log.isWarnEnabled) log.warn(msg, throwable)
85 | }
86 |
87 | protected def logError(msg: => String, throwable: Throwable) {
88 | if (log.isErrorEnabled) log.error(msg, throwable)
89 | }
90 |
91 | protected def isTraceEnabled(): Boolean = {
92 | log.isTraceEnabled
93 | }
94 |
95 | def runWithErrorLog[T](body: => T): T = {
96 | try {
97 | body
98 | } catch {
99 | case e: Exception =>
100 | logError(s"find exception: $e")
101 | throw e
102 | }
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/NextIterator.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | /** Provides a basic/boilerplate Iterator implementation. */
21 | abstract class NextIterator[U] extends Iterator[U] {
22 |
23 | private var gotNext = false
24 | private var nextValue: U = _
25 | private var closed = false
26 | protected var finished = false
27 |
28 | /**
29 | * Method for subclasses to implement to provide the next element.
30 | *
31 | * If no next element is available, the subclass should set `finished`
32 | * to `true` and may return any value (it will be ignored).
33 | *
34 | * This convention is required because `null` may be a valid value,
35 | * and using `Option` seems like it might create unnecessary Some/None
36 | * instances, given some iterators might be called in a tight loop.
37 | *
38 | * @return U, or set 'finished' when done
39 | */
40 | def getNext(): U
41 |
42 | /**
43 | * Method for subclasses to implement when all elements have been successfully
44 | * iterated, and the iteration is done.
45 | *
46 | * Note: `NextIterator` cannot guarantee that `close` will be
47 | * called because it has no control over what happens when an exception
48 | * happens in the user code that is calling hasNext/next.
49 | *
50 | * Ideally you should have another try/catch, as in HadoopRDD, that
51 | * ensures any resources are closed should iteration fail.
52 | */
53 | def close()
54 |
55 | /**
56 | * Calls the subclass-defined close method, but only once.
57 | *
58 | * Usually calling `close` multiple times should be fine, but historically
59 | * there have been issues with some InputFormats throwing exceptions.
60 | */
61 | def closeIfNeeded() {
62 | if (!closed) {
63 | // Note: it's important that we set closed = true before calling close(), since setting it
64 | // afterwards would permit us to call close() multiple times if close() threw an exception.
65 | closed = true
66 | close()
67 | }
68 | }
69 |
70 | override def hasNext: Boolean = {
71 | if (!finished) {
72 | if (!gotNext) {
73 | nextValue = getNext()
74 | if (finished) {
75 | closeIfNeeded()
76 | }
77 | gotNext = true
78 | }
79 | }
80 | !finished
81 | }
82 |
83 | override def next(): U = {
84 | if (!hasNext) {
85 | throw new NoSuchElementException("End of stream")
86 | }
87 | gotNext = false
88 | nextValue
89 | }
90 | }
91 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/OptimizerUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.apache.spark.sql.AnalysisException
21 | import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
22 | import org.apache.spark.sql.catalyst.expressions.Literal
23 | import org.apache.spark.sql.types.StringType
24 |
25 | /**
26 | * @author kun.wan,
27 | * @date 2021-03-08.
28 | */
29 | object OptimizerUtil {
30 |
31 | def parseHintParameter(value: Any): String = {
32 | value match {
33 | case v: String => UnresolvedAttribute.parseAttributeName(v).mkString(".")
34 | case Literal(v, dt: StringType) => v.toString
35 | case v: UnresolvedAttribute => v.nameParts.mkString(".")
36 | case unsupported => throw new AnalysisException(s"Unable to parse : $unsupported")
37 | }
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/ReflectUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-22.
23 | */
24 | object ReflectUtils {
25 |
26 | /**
27 | * 通过反射执行private方法
28 | * @param clazz
29 | * @param name private 方法名
30 | * @param instance 方法执行的实例,如果是静态方法,直接传入null
31 | * @param parameterTypes 方法参数类型列表,无参数时传入空Seq()
32 | * @param parameters 方法参数实例列表,无参数时传入空Seq()
33 | */
34 | def runMethod(clazz: Class[_],
35 | name: String,
36 | instance: Any,
37 | parameterTypes: Seq[Class[_]],
38 | parameters: Seq[Object]): Unit = {
39 | val method = clazz.getDeclaredMethod(name, parameterTypes: _*)
40 | method.setAccessible(true)
41 | method.invoke(instance, parameters: _*)
42 | }
43 |
44 | def setVariable(instance: Any,
45 | fieldName: String,
46 | value: Any): Unit = {
47 | val field = instance.getClass.getDeclaredField(fieldName)
48 | field.setAccessible(true)
49 | field.set(instance, value)
50 | }
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/StringUtil.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.apache.commons.lang3.StringUtils
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2021-03-17.
25 | */
26 | object StringUtil {
27 |
28 | val escapeMapping: Map[Array[Char], Array[Char]] = Map(
29 | Array('\"') -> Array('\"'),
30 | Array(''') -> Array('''),
31 | Array('(') -> Array(')'),
32 | )
33 |
34 | def escapeStringValue(text: String): String = {
35 | var res = text.trim
36 | for ((startChars, endChars) <- escapeMapping
37 | if res.startsWith(new String(startChars)) && res.endsWith(new String(endChars))) {
38 | res = StringUtils.removeStart(res, new String(startChars))
39 | res = StringUtils.removeEnd(res, new String(endChars)).trim
40 | }
41 | res
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/scala/org/apache/spark/sql/util/SystemVariables.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | /**
21 | * @author kun.wan,
22 | * @date 2021-02-24.
23 | */
24 | object SystemVariables {
25 |
26 | val BATCH_TIME = "batch_time"
27 | val JOB_NAME = "job_name"
28 | val INDEX_COLUMN_NAME = "index_column"
29 | val PROJECT_JAR_NAME = "sql-runner-3.0.jar"
30 | }
31 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/SQLRunnerSuiteUtils.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql
19 |
20 | import java.io.File
21 |
22 | import org.apache.commons.io.FileUtils
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2020-09-16.
27 | */
28 | object SQLRunnerSuiteUtils {
29 |
30 | def cleanTestHiveData(): Unit = {
31 | val metastoreDB = new File("metastore_db")
32 | if (metastoreDB.exists) {
33 | FileUtils.forceDelete(metastoreDB)
34 | }
35 | val sparkWarehouse = new File("spark-warehouse")
36 | if (sparkWarehouse.exists) {
37 | FileUtils.forceDelete(sparkWarehouse)
38 | }
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/SparkSqlRunnerBase.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql
19 |
20 | import org.apache.spark.sql.udf.UDFFactory
21 | import org.apache.spark.internal.config.Tests.IS_TESTING
22 | import org.apache.spark.sql.SQLRunnerSuiteUtils._
23 | import org.apache.spark.sql.hive.SparkSqlRunner
24 | import org.apache.spark.sql.hive.test.TestHiveSingleton
25 | import org.apache.spark.sql.runner.command.SqlCommand
26 | import org.apache.spark.sql.test.SQLTestUtils
27 |
28 | /**
29 | * @author kun.wan,
30 | * @date 2020-04-15.
31 | */
32 | class SparkSqlRunnerBase extends QueryTest with SQLTestUtils with TestHiveSingleton {
33 |
34 | implicit val sparkImp: SparkSession = spark
35 | val sc = spark.sparkContext
36 | var runner: SparkSqlRunner = _
37 |
38 | override def beforeAll(): Unit = {
39 |
40 | super.beforeAll()
41 | System.setProperty(IS_TESTING.key, "true")
42 | cleanTestHiveData()
43 |
44 | SparkSession.active.sharedState.externalCatalog.addListener(SqlRunnerCatalogEventListener())
45 | UDFFactory.registerExternalUDFs(spark)
46 |
47 | runner = new SparkSqlRunner
48 | }
49 |
50 |
51 | override def afterAll() {
52 | cleanTestHiveData()
53 | SqlCommand.stop()
54 | super.afterAll()
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/optimizer/CollectValueRuleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.spark.sql.QueryTest
21 | import org.apache.spark.sql.hive.SparkSqlRunner
22 | import org.apache.spark.sql.test.SQLTestData.TestData
23 | import org.apache.spark.sql.test.SQLTestUtils
24 | import org.scalatest.matchers.should.Matchers._
25 |
26 | import org.apache.spark.sql.runner.callback.DataCallBackFactory
27 | import org.apache.spark.sql.runner.command.SqlCommand
28 | import org.apache.spark.sql.runner.container.CollectorContainer
29 |
30 | /**
31 | * @author kun.wan,
32 | * @date 2020-07-28.
33 | */
34 | class CollectValueRuleSuite extends QueryTest with SQLTestUtils {
35 |
36 | override val spark = {
37 | System.setProperty("spark.master", "local[1]")
38 | SparkSqlRunner.sparkSession(Some("CollectValueRuleSuite"))
39 | }
40 |
41 | import spark.implicits._
42 |
43 | override def beforeAll() {
44 | val df = spark.sparkContext.parallelize(
45 | (1 to 100).map(i => TestData(i, i.toString))).toDF()
46 | df.createOrReplaceTempView("testData")
47 | }
48 |
49 | override def afterAll(): Unit = {
50 | spark.close()
51 | }
52 |
53 |
54 | def runPartitionScanLimitRule(testQuery: String): Unit = {
55 | PartitionScanLimitRule(spark).apply(
56 | spark.sql(testQuery).queryExecution.optimizedPlan
57 | )
58 | }
59 |
60 | def runAndComsume(sql: String): Unit = {
61 | DataCallBackFactory.consumeResult(SqlCommand.sparkSqlRunner.run(sql))
62 | }
63 |
64 | test("test collect Hint") {
65 | runAndComsume(
66 | s"""SELECT /*+ COLLECT_VALUE('single_value', 'count_column') */
67 | | /*+ COLLECT_VALUE('max_key', 'keyColumn') */
68 | | count(1) as count_column,
69 | | concat('prefix_', max(key)) as keyColumn
70 | |from testData
71 | |""".stripMargin)
72 | CollectorContainer.get("single_value") should be(100)
73 | CollectorContainer.get("max_key") should be("prefix_100")
74 |
75 | runAndComsume(
76 | s"""SELECT /*+ COLLECT_ARRAY('intArray', 'key') */
77 | | /*+ COLLECT_ARRAY('stringArray', 'value') */
78 | | key, value
79 | |from testData
80 | |""".stripMargin)
81 | CollectorContainer.get("intArray") should be((1 to 100))
82 | CollectorContainer.get("stringArray") should be((1 to 100).map(_.toString))
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/optimizer/ExternalTableRuleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.spark.sql.SQLRunnerSuiteUtils.cleanTestHiveData
21 | import org.apache.spark.sql.QueryTest
22 | import org.apache.spark.sql.hive.SparkSqlRunner
23 | import org.apache.spark.sql.test.SQLTestUtils
24 | import org.apache.spark.sql.util.ConfigUtil
25 | import org.scalatest.matchers.should.Matchers._
26 |
27 | import org.apache.spark.sql.runner.command.SqlCommand
28 | import org.apache.spark.sql.runner.container.ConfigContainer
29 |
30 | /**
31 | * @author kun.wan,
32 | * @date 2020-09-15.
33 | */
34 | class ExternalTableRuleSuite extends QueryTest with SQLTestUtils {
35 |
36 | override val spark = {
37 | System.setProperty("spark.master", "local[1]")
38 | SparkSqlRunner.sparkSession(Some("ExternalTableRuleSuite"))
39 | }
40 |
41 | val testPath = getClass.getResource("/")
42 |
43 | val bootstrapServers = "10.23.177.40:9092"
44 | val schemaRegistryUrl = "http://10.23.177.40:8081"
45 |
46 | override def beforeAll(): Unit = {
47 | cleanTestHiveData()
48 |
49 | ConfigContainer ++ Map(
50 | "mysql.url" -> "jdbc:mysql://localhost:3306/test",
51 | "mysql.username" -> "root",
52 | "mysql.password" -> "password",
53 | )
54 |
55 | spark.sql(s"CREATE TABLE target(id int, name string) LOCATION '$testPath/target'")
56 |
57 | /**
58 | * mysql> desc stu;
59 | * +-------+------------+------+-----+---------+-------+
60 | * | Field | Type | Null | Key | Default | Extra |
61 | * +-------+------------+------+-----+---------+-------+
62 | * | id | int(11) | NO | PRI | NULL | |
63 | * | name | text | YES | | NULL | |
64 | * | sex | varchar(2) | YES | | NULL | |
65 | * | env | char(20) | YES | | NULL | |
66 | * +-------+------------+------+-----+---------+-------+
67 | */
68 | }
69 |
70 | override def afterAll() {
71 | cleanTestHiveData()
72 | spark.stop()
73 | super.afterAll()
74 | }
75 |
76 | test("query jdbc table") {
77 | ConfigUtil.withConfigs("mysql.stu.numPartitions" -> "3", "mysql.stu.partitionColumn" -> "id") {
78 |
79 | val df = spark.sql(s"""SELECT id, name
80 | |FROM jdbc.mysql.stu
81 | |where id < 10
82 | |""".stripMargin)
83 | df.rdd.partitions.length should equal(3)
84 | df.explain()
85 | df.show()
86 | }
87 | }
88 |
89 | test("query jdbc view") {
90 | ConfigUtil.withConfigs(
91 | "mysql.stu.query" -> "(select * from stu where name !='wankun') as q",
92 | "mysql.stu.numPartitions" -> "3",
93 | "mysql.stu.partitionColumn" -> "id") {
94 |
95 | val df = spark.sql(s"""SELECT id, name
96 | |FROM jdbc.mysql.stu
97 | |""".stripMargin)
98 | df.rdd.partitions.length should equal(3)
99 | df.show()
100 | }
101 | }
102 |
103 | test("write data frame to mysql table") {
104 | ConfigUtil.withConfigs(
105 | "mysql.stu.queryTimeout" -> 100.toString,
106 | "mysql.stu.uniqueKeys" -> "id") {
107 | new SqlCommand(s"""WITH t as (
108 | | SELECT 100 as id, "user_100" as name
109 | | UNION ALL
110 | | SELECT 101 as id, "user_101" as name
111 | |)
112 | |INSERT INTO jdbc.mysql.stu
113 | |SELECT *
114 | |FROM t;
115 | |""".stripMargin).run()
116 | }
117 | }
118 |
119 | test("write json data frame to kafka table") {
120 | ConfigUtil.withConfigs(
121 | "kafka.bootstrap.servers" -> bootstrapServers,
122 | "kafka.stu.recordType" -> "json",
123 | "kafka.stu.kafkaTopic" -> "test_wankun") {
124 | new SqlCommand(s"""WITH t as (
125 | | SELECT 100 as id, "user_100" as name
126 | | UNION ALL
127 | | SELECT 101 as id, "user_101" as name
128 | |)
129 | |INSERT INTO kafka.stu
130 | |SELECT *
131 | |FROM t;
132 | |""".stripMargin).run()
133 | }
134 | }
135 |
136 | test("write avro data frame to kafka using KAFKA_SINK") {
137 | ConfigUtil.withConfigs(
138 | "kafka.bootstrap.servers" -> bootstrapServers,
139 | "kafka.schema.registry.url" -> schemaRegistryUrl,
140 | "kafka.stu.recordType" -> "avro",
141 | "kafka.stu.kafkaTopic" -> "test_wankun2",
142 | // 不根据计算结果DDL自动生成Avro Schema,手动测试时,根据需要调整该参数
143 | "kafka.stu.avro.forceCreate" -> "false",
144 | "kafka.stu.avro.name" -> "student",
145 | "kafka.stu.avro.namespace" -> "com.wankun") {
146 | new SqlCommand(s"""INSERT INTO kafka.stu
147 | |SELECT 1 as id1, 'wankun' as name1,
148 | | '男' as sex1, 'PRD' env1, 18 age1;
149 | |""".stripMargin).run()
150 | }
151 | }
152 |
153 | /*
154 | test("send message with EMAIL_SINK") {
155 | ConfigUtil.withConfigs(
156 | // server config
157 | "email.hostname" -> "smtp.exmail.qq.com",
158 | "email.username" -> "test@leyantech.com",
159 | "email.password" -> "",
160 | "email.from" -> "test@leyantech.com",
161 |
162 | // job config
163 | "email.columns" -> "id, name",
164 | "email.columnNames" -> "ID,名称",
165 | "email.subject" -> "测试邮件",
166 | "email.email-to" -> "wankun@apache.org",
167 | "email.email-cc" -> "wankun@apache.org"
168 | ) {
169 | new SqlCommand(
170 | s"""SELECT /*+ EMAIL_SINK(email) */
171 | | 1 as id, 'wankun' as name;
172 | |""".stripMargin).run()
173 | }
174 | }
175 |
176 | test("send message with DINGDING_SINK") {
177 | ConfigUtil.withConfigs(
178 | "dataquality.alert"-> "https://oapi.dingtalk.com/robot/send?access_token=test_token",
179 | "dataquality.alert.title" -> "测试钉钉告警",
180 | "dataquality.alert.pattern" -> "ID是{id},姓名:{name}"
181 | ) {
182 | new SqlCommand(
183 | s"""SELECT /*+ DINGDING_SINK(dataquality.alert) */
184 | | 1 as id, 'wankun' as name;
185 | |""".stripMargin).run()
186 | }
187 | }
188 | */
189 |
190 | }
191 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/optimizer/PartitionScanLimitRuleSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.optimizer
19 |
20 | import org.apache.spark.sql.hive.test.TestHiveSingleton
21 | import org.apache.spark.sql.test.SQLTestUtils
22 | import org.apache.spark.sql.{AnalysisException, QueryTest}
23 |
24 | /**
25 | * @author kun.wan,
26 | * @date 2020-07-28.
27 | */
28 | class PartitionScanLimitRuleSuite extends QueryTest with SQLTestUtils with TestHiveSingleton {
29 |
30 | override def beforeAll(): Unit = {
31 | super.beforeAll()
32 |
33 | Seq("test1", "test2").map { tableName =>
34 | sql(
35 | s"""
36 | |CREATE TABLE $tableName(i int)
37 | |PARTITIONED BY (p STRING)
38 | |STORED AS textfile""".stripMargin)
39 | sql(s"INSERT OVERWRITE TABLE $tableName PARTITION (p='1') select * from range(10)")
40 | }
41 | }
42 |
43 | override def afterAll(): Unit = {
44 | Seq("test1", "test2").map { tableName =>
45 | sql(s"DROP TABLE IF EXISTS $tableName")
46 | }
47 | super.afterAll()
48 | }
49 |
50 | def runPartitionScanLimitRule(testQuery: String): Unit = {
51 | PartitionScanLimitRule(spark).apply(
52 | spark.sql(testQuery).queryExecution.optimizedPlan
53 | )
54 | }
55 |
56 | test("no filters on partition table scan") {
57 | intercept[AnalysisException] {
58 | runPartitionScanLimitRule("SELECT i FROM test1")
59 | }
60 |
61 | runPartitionScanLimitRule("SELECT i FROM test1 where p='1'")
62 | runPartitionScanLimitRule(
63 | s"""
64 | |WITH t as (
65 | | SELECT count(1) as c
66 | | FROM test1
67 | | WHERE p='1'
68 | |)
69 | |SELECT * FROM t
70 | |""".stripMargin)
71 | }
72 |
73 | test("no filters on partition table join") {
74 | intercept[AnalysisException] {
75 | runPartitionScanLimitRule(
76 | s"""
77 | |SELECT *
78 | |FROM (SELECT i FROM test1 where p='1') t1
79 | |JOIN test2 t2
80 | |ON t1.i > t2.i
81 | |""".stripMargin)
82 | }
83 |
84 | runPartitionScanLimitRule(
85 | s"""
86 | |SELECT *
87 | |FROM (SELECT i FROM test1 where p='1') t1
88 | |JOIN test2 t2
89 | |ON t1.i > t2.i
90 | |AND t2.p = '1'
91 | |""".stripMargin)
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/runner/ArgParserSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner
19 |
20 | import org.apache.spark.sql.util.Logging
21 | import JobRunner.dateRangeStep
22 | import org.scalatest.funsuite.AnyFunSuite
23 | import org.scalatest.matchers.should.Matchers._
24 |
25 | import java.time.LocalDateTime
26 | import java.time.temporal.ChronoUnit
27 |
28 | /**
29 | * @author kun.wan,
30 | * @date 2021-02-04.
31 | */
32 | class ArgParserSuite extends AnyFunSuite with Logging {
33 |
34 | test("test time range option") {
35 | val startDate = Some(LocalDateTime.parse("2021-01-01T00:00:00"))
36 | val endDate = Some(LocalDateTime.parse("2021-01-06T00:00:00"))
37 |
38 | val rangeSize = ChronoUnit.DAYS.between(startDate.get, endDate.get)
39 | Range.inclusive(0, rangeSize.toInt, dateRangeStep).map(i => startDate.get.plusDays(i)) should
40 | be(Seq(LocalDateTime.parse("2021-01-01T00:00:00"),
41 | LocalDateTime.parse("2021-01-02T00:00:00"),
42 | LocalDateTime.parse("2021-01-03T00:00:00"),
43 | LocalDateTime.parse("2021-01-04T00:00:00"),
44 | LocalDateTime.parse("2021-01-05T00:00:00"),
45 | LocalDateTime.parse("2021-01-06T00:00:00")))
46 |
47 |
48 | dateRangeStep = 2
49 | Range.inclusive(0, rangeSize.toInt, dateRangeStep).map(i => startDate.get.plusDays(i)) should
50 | be(Seq(LocalDateTime.parse("2021-01-01T00:00:00"),
51 | LocalDateTime.parse("2021-01-03T00:00:00"),
52 | LocalDateTime.parse("2021-01-05T00:00:00")))
53 |
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/runner/command/CommandSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.command
19 |
20 | import org.scalatest.funsuite.AnyFunSuite
21 | import org.scalatest.matchers.should.Matchers._
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2021-02-24.
26 | */
27 | class CommandSuite extends AnyFunSuite {
28 |
29 | val textHeader =
30 | s"""/************************************************
31 | |
32 | | author: kun.wan
33 | | period: day
34 | | run_env: PRD & PRE
35 | | describe: policy_store_config 店铺数据量检查
36 | | app.id: 303
37 | |
38 | |************************************************/
39 | |""".stripMargin
40 |
41 | test("test parse job text") {
42 | val text =
43 | s"""$textHeader
44 | |-- 测试一下单行注释
45 | |
46 | |!set mykey=myvalue;
47 | |!set longKey = \"(
48 | |select *
49 | |from tab
50 | |WHERE dates = '{date | yyyy - MM - dd}'
51 | |) as q\";
52 | |
53 | |SELECT id, name
54 | |FROM test_db.test_name
55 | |WHERE id in ('001', '002');
56 | |
57 | |-- 测试SQL中包含引号
58 | |SELECT 'a;b' as a, "abc;hhh" as b,'a\\'b' as c;
59 | |""".stripMargin
60 |
61 | val commands = CommandFactory.parseCommands(text)
62 |
63 | commands.length should be(7)
64 | }
65 |
66 | test("test parse if command") {
67 | Seq("kun.wan", "King").map { username =>
68 | val text =
69 | s"""$textHeader
70 | |!set user = $username;
71 | |!if (user = 'kun.wan')
72 | | select 'if command';
73 | |!else
74 | | select 'else command';
75 | |!fi
76 | |""".stripMargin
77 |
78 | val commands = CommandFactory.parseCommands(text)
79 |
80 | commands.length should be(3)
81 |
82 | commands.foreach(_.run())
83 | }
84 |
85 | val text =
86 | s"""$textHeader
87 | |
88 | |SELECT /*+ COLLECT_VALUE('row_count', 'c') */ count(1) as c;
89 | |SELECT /*+ COLLECT_VALUE('row_count2', 'd') */ count(1) as d;
90 | |
91 | |!if (row_count = row_count2 and row_count = 1)
92 | | select 'row count is 1';
93 | |!else
94 | | select 'row count is not 1';
95 | |!fi
96 | |""".stripMargin
97 |
98 | val commands = CommandFactory.parseCommands(text)
99 |
100 | commands.length should be(4)
101 |
102 | commands.foreach(_.run())
103 | }
104 |
105 | }
106 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/runner/config/VariableSubstitutionSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.runner.config
19 |
20 | import org.apache.spark.sql.util.SystemVariables
21 | import org.apache.sql.runner.container.ConfigContainer
22 | import org.scalatest.funsuite.AnyFunSuite
23 | import org.scalatest.matchers.should.Matchers._
24 | import java.time.LocalDateTime
25 |
26 | import org.apache.spark.sql.runner.container.{CollectorContainer, ConfigContainer}
27 |
28 | /**
29 | * @author kun.wan,
30 | * @date 2019-12-10.
31 | */
32 | class VariableSubstitutionSuite extends AnyFunSuite {
33 |
34 | test("test time variable") {
35 | CollectorContainer :+ (SystemVariables.BATCH_TIME -> LocalDateTime.parse("2019-08-07T13:25:41"))
36 | val substitution = new VariableSubstitution()
37 |
38 | substitution.dateParameter("${date}") should be("20190807")
39 | substitution.dateParameter("${date + 2d}") should be("20190809")
40 | substitution.dateParameter("${date + 2d |yyyyMMddHH}") should be("2019080913")
41 | substitution.dateParameter("${date + 2d |yyyyMM00}") should be("20190800")
42 | substitution.dateParameter("${date + 2d |yyyy-MM-dd}") should be("2019-08-09")
43 | substitution.dateParameter("${date + 2d |yyyy_MM_dd}") should be("2019_08_09")
44 | substitution.dateParameter("${date-2m|yyyy-MM-dd HH:mm:ss}") should be("2019-08-07 13:23:41")
45 |
46 | substitution.dateParameter("${date+2d}") should be("20190809")
47 | substitution.dateParameter("${date+4y}") should be("20230807")
48 |
49 | substitution.dateParameter("${date+2D}") should be("20190809")
50 | substitution.dateParameter("${date+3M}") should be("20191107")
51 | substitution.dateParameter("${date+4Y}") should be("20230807")
52 |
53 | substitution.dateParameter("${date-2d}") should be("20190805")
54 | substitution.dateParameter("${date-4y}") should be("20150807")
55 |
56 | substitution.dateParameter("${date-2D}") should be("20190805")
57 | substitution.dateParameter("${date-3M}") should be("20190507")
58 |
59 | substitution.dt should be("20190807")
60 | substitution.yesterday should be("20190806")
61 | substitution.tomorrow should be("20190808")
62 | substitution.hour should be("2019080713")
63 | substitution.lastHour should be("2019080712")
64 | substitution.nextHour should be("2019080714")
65 | }
66 |
67 | test("test variable substitution in sql") {
68 | ConfigContainer :+ ("ab_target" -> "after_trade")
69 | CollectorContainer :+ (SystemVariables.BATCH_TIME -> LocalDateTime.parse("2019-08-07T13:25:41"))
70 | val substitution = new VariableSubstitution()
71 |
72 | substitution.substitute(
73 | """
74 | |SELECT count(1)
75 | |FROM tab
76 | |WHERE start_date = '${yesterday}'
77 | |AND end_date = '${dt}'
78 | |AND start_hour = '${date-23H|hh}'
79 | |AND end_hour = '${date - 24h|hh}'
80 | |AND month = '${date - 24h|MM}'
81 | |AND ab_target = '${ab_target}'
82 | |""".stripMargin) should equal(
83 | s"""
84 | |SELECT count(1)
85 | |FROM tab
86 | |WHERE start_date = '20190806'
87 | |AND end_date = '20190807'
88 | |AND start_hour = '02'
89 | |AND end_hour = '01'
90 | |AND month = '08'
91 | |AND ab_target = 'after_trade'
92 | |""".stripMargin)
93 | }
94 |
95 | test("test nested variable substitution in sql") {
96 | ConfigContainer :+ ("report_days" -> "3")
97 | CollectorContainer :+ (SystemVariables.BATCH_TIME -> LocalDateTime.parse("2019-08-07T13:25:41"))
98 | val substitution = new VariableSubstitution()
99 | substitution.substitute("SELECT * FROM tab WHERE dt = ${date-${report_days}d|yyyyMMdd}") should
100 | equal("SELECT * FROM tab WHERE dt = 20190804")
101 | }
102 |
103 | test("test parameters with default value") {
104 | val substitution = new VariableSubstitution()
105 | substitution.substitute("!set key1 = ${key1, 'DEFAULT_VALUE1'};") should
106 | equal("!set key1 = DEFAULT_VALUE1;")
107 |
108 | substitution.substitute("!set key1 = ${key1, \"DEFAULT_VALUE1\"};") should
109 | equal("!set key1 = DEFAULT_VALUE1;")
110 |
111 | ConfigContainer :+ ("key1" -> "value1")
112 |
113 | substitution.substitute("!set key1 = ${key1, 'DEFAULT_VALUE1'};") should
114 | equal("!set key1 = value1;")
115 |
116 | substitution.substitute("!set key1 = ${key1, \"DEFAULT_VALUE1\"};") should
117 | equal("!set key1 = value1;")
118 | }
119 | }
120 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/udf/DateFormatUDFSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.udf
19 |
20 | import org.apache.spark.sql.{Row, SparkSqlRunnerBase}
21 |
22 | /**
23 | * @author kun.wan,
24 | * @date 2020-07-20.
25 | */
26 | class DateFormatUDFSuite extends SparkSqlRunnerBase {
27 |
28 | test("test date_format function") {
29 | val df = spark.sql("select transform_date('20200710','yyyyMMdd','yyyy-MM-dd')")
30 | checkAnswer(df, Seq(Row("2020-07-10")))
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/util/ConfigUtilSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.scalatest.funsuite.AnyFunSuite
21 | import org.scalatest.matchers.should.Matchers._
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2020-02-17.
26 | */
27 | class ConfigUtilSuite extends AnyFunSuite {
28 |
29 |
30 | test("trim config array") {
31 |
32 | val columnName = "\n 日期,店铺id,店铺名,买家付款\n "
33 | val dbColumnName = "\n {dt},{store_id},{store_name},{buyer_payment}," +
34 | "{buyer_prepaid}," +
35 | "{inquiry_tailing},{no_order_try},\n {size_query_succeeded},{applicable_season}," +
36 | "{enable_filter_applicable_season},{chat_expires_at},{r2_expires_at},{audit_expires_at}\n " +
37 | " ";
38 | ConfigUtil.trimConfigValue(columnName) should be("日期,店铺id,店铺名,买家付款")
39 |
40 | ConfigUtil.trimConfigArray(dbColumnName, ",") should be(
41 | "{dt},{store_id},{store_name},{buyer_payment},{buyer_prepaid},{inquiry_tailing}," +
42 | "{no_order_try},{size_query_succeeded},{applicable_season}," +
43 | "{enable_filter_applicable_season},{chat_expires_at},{r2_expires_at},{audit_expires_at}")
44 |
45 | }
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/test/scala/org/apache/spark/sql/util/JobIdUtilSuite.scala:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one or more
3 | * contributor license agreements. See the NOTICE file distributed with
4 | * this work for additional information regarding copyright ownership.
5 | * The ASF licenses this file to You under the Apache License, Version 2.0
6 | * (the "License"); you may not use this file except in compliance with
7 | * the License. You may obtain a copy of the License at
8 | *
9 | * http://www.apache.org/licenses/LICENSE-2.0
10 | *
11 | * Unless required by applicable law or agreed to in writing, software
12 | * distributed under the License is distributed on an "AS IS" BASIS,
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | * See the License for the specific language governing permissions and
15 | * limitations under the License.
16 | */
17 |
18 | package org.apache.spark.sql.util
19 |
20 | import org.scalatest.funsuite.AnyFunSuite
21 | import org.scalatest.matchers.should.Matchers._
22 |
23 | /**
24 | * @author kun.wan,
25 | * @date 2020-03-06.
26 | */
27 | class JobIdUtilSuite extends AnyFunSuite {
28 |
29 | test("test generatorJobId") {
30 | val jobId = JobIdUtil.generatorJobId("conf/marketing/pdd/dwd_payment_reminder_detail.xml")
31 | jobId should fullyMatch regex ("""dwd_payment_reminder_detail-\d{8}_\d{6}""")
32 | }
33 |
34 | }
35 |
--------------------------------------------------------------------------------