├── .gitignore
├── README.md
├── bin
├── sqlalarm_records_log.sql
└── start-local.sh
├── docs
├── alarm-console-sink.jpg
└── sqlalarm.png
├── pom.xml
├── sa-admin
├── pom.xml
└── src
│ └── main
│ └── java
│ └── dt
│ └── sql
│ └── alarm
│ └── web
│ └── SQLAlarmConsole.scala
└── sa-core
├── pom.xml
└── src
├── main
├── java
│ ├── com
│ │ └── redislabs
│ │ │ └── provider
│ │ │ └── redis
│ │ │ └── ConnectionPool.scala
│ └── dt
│ │ └── sql
│ │ └── alarm
│ │ ├── SQLAlarmBoot.scala
│ │ ├── conf
│ │ ├── AlarmPolicyConf.scala
│ │ ├── AlarmRuleConf.scala
│ │ ├── Conf.scala
│ │ ├── JdbcConf.scala
│ │ ├── KafkaConf.scala
│ │ └── RedisConf.scala
│ │ ├── core
│ │ ├── AlarmAlert.scala
│ │ ├── AlarmFlow.scala
│ │ ├── AlarmReduce.scala
│ │ ├── Base.scala
│ │ ├── Constants.scala
│ │ ├── RecordDetail.scala
│ │ ├── Sink.java
│ │ ├── Source.java
│ │ ├── SparkRuntime.scala
│ │ └── WowLog.scala
│ │ ├── filter
│ │ └── SQLFilter.scala
│ │ ├── input
│ │ ├── BaseInput.scala
│ │ ├── Constants.scala
│ │ ├── KafkaInput.scala
│ │ └── RedisInput.scala
│ │ ├── output
│ │ ├── BaseOutput.scala
│ │ ├── ConsoleOutput.scala
│ │ ├── Constants.scala
│ │ ├── JdbcOutput.scala
│ │ └── KafkaOutput.scala
│ │ └── reduce
│ │ ├── PolicyAnalyzeEngine.scala
│ │ └── engine
│ │ ├── AggWindow.scala
│ │ ├── ReduceByNumScale.scala
│ │ ├── ReduceByTimeScale.scala
│ │ ├── ReduceByWindow.scala
│ │ └── Scale.scala
└── resources
│ ├── application.conf
│ └── log4j.properties
└── test
└── java
└── dt
└── sql
└── alarm
└── test
├── InputSuite.scala
├── LocalSparkApp.scala
├── RedisOperationsSuite.scala
├── SQLAlarmBootTest.scala
└── SparkRedisTest.scala
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by .ignore support plugin (hsz.mobi)
2 | .idea
3 | *.iml
4 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## SQLAlarm
2 | > Big data smart alarm by sql
3 |
4 | SQLAlarm is for event(time-stamped) alarm which is built on spark structured-steaming. This system including following abilities:
5 | 1. Event filtering through SQL
6 | 2. Alarm records noise reduction
7 | 3. Alarm records dispatch in specified channels
8 |
9 | The integral framework idea is as follows:
10 | 
11 |
12 | Introduce of modules:
13 | 1. sa-admin: web console and rest api for sqlalarm
14 | 2. sa-core: core module of sqlalarm(including source/filter/sink(alert))
15 |
16 | ### Developing SQLAlarm
17 | You can use bin/start-local.sh to start a local SQLAlarm serve at IntelliJ IDEA. We recommend to run it use yarn-client or local mode in spark cluster after packaged jar.
18 |
19 | Minimal requirements for a SQLAlarm serve are:
20 | - Java 1.8 +
21 | - Spark 2.4.x
22 | - Redis (Redis 5.0, if use Redis Stream)
23 | - Kafka (this is also needless if you only use Redis Stream for event alerts)
24 |
25 | For example, I started a SQLAlarm serve that consume kafka event message to do alarm flow:
26 | ```bash
27 | spark-submit --class dt.sql.alarm.SQLAlarmBoot \
28 | --driver-memory 2g \
29 | --master local[4] \
30 | --name SQLALARM \
31 | --conf "spark.kryoserializer.buffer=256k" \
32 | --conf "spark.kryoserializer.buffer.max=1024m" \
33 | --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
34 | --conf "spark.redis.host=127.0.0.1" \
35 | --conf "spark.redis.port=6379" \
36 | --conf "spark.redis.db=4" \
37 | sa-core-1.0-SNAPSHOT.jar \
38 | -sqlalarm.name sqlalarm \
39 | -sqlalarm.sources kafka \
40 | -sqlalarm.input.kafka.topic sqlalarm_event \
41 | -sqlalarm.input.kafka.subscribe.topic.pattern 1 \
42 | -sqlalarm.input.kafka.bootstrap.servers "127.0.0.1:9092" \
43 | -sqlalarm.sinks console
44 | ```
45 | > notes: the above simple example takes kafka as the message center, filtering alarm event and output to the console.
46 |
47 | ### Quick Start
48 | 1. Packaged the core jar: sa-core-1.0-SNAPSHOT.jar.
49 | 2. Deploy the jar package in spark cluster.
50 | 3. Add an alarm rule(put at redis):
51 | ```bash
52 | # hset key uuid value
53 | # key: sqlalarm_rule:${sourceType}:${topic}
54 |
55 | HSET "sqlalarm_rule:kafka:sqlalarm_event" "uuid00000001"
56 | {
57 | "item_id":"uuid00000001",
58 | "platform":"alarm",
59 | "title":"sql alarm test",
60 | "source":{
61 | "type":"kafka",
62 | "topic":"sqlalarm_event"
63 | },
64 | "filter":{
65 | "table":"fail_job",
66 | "structure":[
67 | {
68 | "name":"job_name",
69 | "type":"string",
70 | "xpath":"$.job_name"
71 | },
72 | {
73 | "name":"job_owner",
74 | "type":"string",
75 | "xpath":"$.job_owner"
76 | },
77 | {
78 | "name":"job_stat",
79 | "type":"string",
80 | "xpath":"$.job_stat"
81 | },
82 | {
83 | "name":"job_time",
84 | "type":"string",
85 | "xpath":"$.job_time"
86 | }
87 | ],
88 | "sql":"select job_name as job_id,job_stat,job_time as event_time,'job failed' as message, map('job_owner',job_owner) as context from fail_job where job_stat='Fail'"
89 | }
90 | }
91 | ```
92 | 4. Wait for event center(may be kafka or redis) produce alarm events. Produce manually:
93 | > 1.create if not exists topic:
94 | ```bash
95 | kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic sqlalarm_event
96 | ```
97 | > 2.produce events:
98 | ```bash
99 | kafka-console-producer.sh --broker-list localhost:9092 --topic sqlalarm_event
100 |
101 | {
102 | "job_name":"sqlalarm_job_000",
103 | "job_owner":"bebee4java",
104 | "job_stat":"Succeed",
105 | "job_time":"2019-12-26 12:00:00"
106 | }
107 |
108 | {
109 | "job_name":"sqlalarm_job_001",
110 | "job_owner":"bebee4java",
111 | "job_stat":"Fail",
112 | "job_time":"2019-12-26 12:00:00"
113 | }
114 | ```
115 | 5. If use console sink, you will get following info in the console(Observable the fail events are filtered out and succeed events are ignored):
116 | 
117 |
118 | > **notes:** the order of step 2&3 is not required, and the alarm rule is not necessary when starting the SQLAlarm serve.
119 |
120 | ### Features
121 | 1. Supports docking multiple data sources as event center(kafka or redis stream-enabled source), and it's scalable you can customize the data source only extends the class [BaseInput](sa-core/src/main/java/dt/sql/alarm/input/BaseInput.scala)
122 | 2. Supports docking multiple data topics with inconsistent structure
123 | 3. Supports output of alarm events to multiple sinks(kafka/jdbc/es etc.), and it's scalable you can customize the data sink only extends the class [BaseOutput](sa-core/src/main/java/dt/sql/alarm/output/BaseOutput.scala)
124 | 4. Supports alarm filtering for events through SQL
125 | 5. Supports multiple policies(time merge/time window+N counts merge) for alarm noise reduction
126 | 6. Supports alarm rules and policies to take effect dynamically without restarting the serve
127 | 7. Supports adding data source topics dynamically(If your subscription mode is `subscribePattern`)
128 | 8. Supports sending alarm records by specific different channels
129 |
130 | ### Collectors
131 | SQLAlarm does't automatically generate metrics events, it only obtains metrics events from the message center and analyzes them.
132 | However, you can collect and report metrics events in another project called [metrics-exporter](https://github.com/bebee4java/metrics-exporter),
133 | this makes up for this shortage well.
134 |
135 | **In this way, a complete alarm process looks like:
136 | [metrics-exporter](https://github.com/bebee4java/metrics-exporter) —> [sqlalarm](https://github.com/bebee4java/sqlalarm) —> alarm-pigeon**
137 |
138 | ### Documentation
139 | The documentation of SQLAlarm is located on the issues page: [SQLAlarm issues](https://github.com/bebee4java/sqlalarm/issues).
140 | It contains a lot of information such as [configuration](https://github.com/bebee4java/sqlalarm/issues/2) and use tutorial etc. If you have any questions, please free to commit issues.
141 |
142 | ### Fork and Contribute
143 | This is an active open-source project. We are always open to people who want to use the system or contribute to it. Contact us if you are looking for implementation tasks that fit your skills.
144 |
--------------------------------------------------------------------------------
/bin/sqlalarm_records_log.sql:
--------------------------------------------------------------------------------
1 | DROP TABLE IF EXISTS `sqlalarm_records_log`;
2 |
3 | -- 告警记录详情日志表
4 | CREATE TABLE IF NOT EXISTS `sqlalarm_records_log` (
5 | `id` int(20) unsigned NOT NULL AUTO_INCREMENT,
6 | `job_id` varchar(128) NOT NULL COMMENT '作业id',
7 | `job_stat` varchar(128) NOT NULL COMMENT '作业状态',
8 | `event_time` timestamp NOT NULL COMMENT '作业事件时间',
9 | `message` varchar(2000) NOT NULL COMMENT '作业告警消息',
10 | `context` varchar(2000) NOT NULL COMMENT '作业上下文参数',
11 | `title` varchar(128) NOT NULL COMMENT '告警标题',
12 | `platform` varchar(128) NOT NULL COMMENT '告警平台',
13 | `item_id` varchar(128) NOT NULL COMMENT '告警项id',
14 | `source` varchar(128) NOT NULL COMMENT '告警记录数据源',
15 | `topic` varchar(128) NOT NULL COMMENT '告警记录所属主题',
16 | `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
17 | PRIMARY KEY (`id`),
18 | KEY `job_index` (`job_id`, `job_stat`),
19 | KEY `alarm_item_index` (`platform`, `item_id`)
20 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT '告警记录详情日志表';
21 |
--------------------------------------------------------------------------------
/bin/start-local.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #set -x
4 |
5 | for env in SPARK_HOME ; do
6 | if [[ -z "${!env}" ]]; then
7 | echo "$env must be set to run this script"
8 | exit 1
9 | else
10 | echo ${env}=${!env}
11 | fi
12 | done
13 |
14 | if [[ -z "${SQLALARM_HOME}" ]]; then
15 | export SQLALARM_HOME="$(cd "`dirname "$0"`"/../; pwd)"
16 | fi
17 |
18 | echo "SQLALARM_HOME=$SQLALARM_HOME"
19 |
20 | MAIN_JAR=$(find ${SQLALARM_HOME}/*/target -type f -name "*.jar" \
21 | | grep 'sa-core' |grep -v "sources" | grep -v "original" | grep -v "javadoc")
22 |
23 | echo "MAIN_JAR=$MAIN_JAR"
24 |
25 | export DRIVER_MEMORY=${DRIVER_MEMORY:-2g}
26 | ${SPARK_HOME}/bin/spark-submit --class dt.sql.alarm.SQLAlarmBoot \
27 | --driver-memory ${DRIVER_MEMORY} \
28 | --master "local[*]" \
29 | --name SQLALARM \
30 | --conf "spark.driver.extraJavaOptions"="-DREALTIME_LOG_HOME=$SQLALARM_HOME/logs" \
31 | --conf "spark.sql.hive.thriftServer.singleSession=true" \
32 | --conf "spark.kryoserializer.buffer=256k" \
33 | --conf "spark.kryoserializer.buffer.max=1024m" \
34 | --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \
35 | --conf "spark.scheduler.mode=FAIR" \
36 | --conf "spark.redis.host=127.0.0.1" \
37 | --conf "spark.redis.port=6379" \
38 | --conf "spark.redis.db=4" \
39 | ${MAIN_JAR} \
40 | -sqlalarm.name sqlalarm \
41 | -sqlalarm.sources kafka \
42 | -sqlalarm.input.kafka.topic sqlalarm_event \
43 | -sqlalarm.input.kafka.subscribe.topic.pattern 1 \
44 | -sqlalarm.input.kafka.bootstrap.servers "127.0.0.1:9092" \
45 | -sqlalarm.checkpointLocation checkpoint \
46 | -sqlalarm.sinks console
47 |
48 |
--------------------------------------------------------------------------------
/docs/alarm-console-sink.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bebee4java/sqlalarm/76dc595b2f57ffe121bdd0c125d26d1ac4d4b547/docs/alarm-console-sink.jpg
--------------------------------------------------------------------------------
/docs/sqlalarm.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/bebee4java/sqlalarm/76dc595b2f57ffe121bdd0c125d26d1ac4d4b547/docs/sqlalarm.png
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | dt.sql.alarm
8 | sqlalarm
9 | pom
10 | 1.0-SNAPSHOT
11 |
12 | sa-core
13 | sa-admin
14 |
15 |
16 | SQLAlarm Parent POM
17 | https://github.com/bebee4java/sqlalarm
18 | Big data smart alarm by sql.
19 |
20 |
21 |
22 | Apache 2.0 License
23 | http://www.apache.org/licenses/LICENSE-2.0.html
24 | repo
25 |
26 |
27 |
28 |
29 | bebee4java
30 | songgongru
31 | grsong.cn@gmail.com
32 |
33 |
34 |
35 |
36 |
37 | UTF-8
38 | UTF-8
39 | 1.8
40 | 1.8
41 | 1.8
42 |
43 | 2.11.12
44 | 2.11
45 | provided
46 | 3.1.3
47 |
48 | 2.2.1.RELEASE
49 | 2.9.2
50 |
51 | 2.4.3
52 | 2.4
53 | 1.0.3
54 |
55 |
56 |
57 |
58 |
59 | org.scala-lang
60 | scala-library
61 | ${scala.version}
62 |
63 |
64 | org.scalatest
65 | scalatest_${scala.compat.version}
66 | 3.0.0
67 | test
68 |
69 |
70 | tech.sqlclub
71 | common-utils_${scala.compat.version}
72 | ${common-utils-version}
73 |
74 |
75 |
86 |
87 |
92 |
93 |
94 |
95 |
96 |
97 |
98 | org.scala-tools
99 | maven-scala-plugin
100 | 2.15.2
101 |
102 |
103 |
104 | -g:vars
105 |
106 |
107 | true
108 |
109 |
110 |
111 | scala-compile-first
112 | process-resources
113 |
114 | compile
115 |
116 |
117 |
118 | scala-test-compile
119 | process-test-resources
120 |
121 | testCompile
122 |
123 |
124 |
125 |
126 |
127 |
128 | org.apache.maven.plugins
129 | maven-compiler-plugin
130 | 2.3.2
131 |
132 |
133 | -g
134 | true
135 | 1.8
136 | 1.8
137 |
138 |
139 |
140 |
141 |
142 | maven-source-plugin
143 | 2.1
144 |
145 | true
146 |
147 |
148 |
149 | compile
150 |
151 | jar
152 |
153 |
154 |
155 |
156 |
157 | org.apache.maven.plugins
158 | maven-javadoc-plugin
159 | 3.0.1
160 |
161 |
162 | attach-javadocs
163 |
164 | jar
165 |
166 |
167 |
168 |
169 |
170 | org.apache.maven.plugins
171 | maven-jar-plugin
172 | 2.6
173 |
174 |
175 |
176 |
177 |
--------------------------------------------------------------------------------
/sa-admin/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | sqlalarm
7 | dt.sql.alarm
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | sa-admin
13 |
14 |
15 |
--------------------------------------------------------------------------------
/sa-admin/src/main/java/dt/sql/alarm/web/SQLAlarmConsole.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.web
2 |
3 | /**
4 | *
5 | * Created by songgr on 2019/12/26.
6 | */
7 | class SQLAlarmConsole {
8 |
9 | def main(args: Array[String]): Unit = {
10 |
11 | }
12 |
13 | }
14 |
--------------------------------------------------------------------------------
/sa-core/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 |
6 | sqlalarm
7 | dt.sql.alarm
8 | 1.0-SNAPSHOT
9 |
10 | 4.0.0
11 |
12 | sa-core
13 |
14 |
15 | provided
16 |
17 |
18 |
19 |
20 |
21 | org.reflections
22 | reflections
23 | 0.9.11
24 |
30 |
31 |
32 |
33 | org.apache.spark
34 | spark-sql_${scala.compat.version}
35 | ${spark.version}
36 | ${scope}
37 |
38 |
39 | com.fasterxml.jackson.module
40 | jackson-module-scala_${scala.compat.version}
41 |
42 |
43 | jackson-core
44 | com.fasterxml.jackson.core
45 |
46 |
47 | jackson-annotations
48 | com.fasterxml.jackson.core
49 |
50 |
51 | scala-xml_2.11
52 | org.scala-lang.modules
53 |
54 |
55 | jackson-databind
56 | com.fasterxml.jackson.core
57 |
58 |
59 | commons-lang3
60 | org.apache.commons
61 |
62 |
63 | slf4j-api
64 | org.slf4j
65 |
66 |
67 | slf4j-log4j12
68 | org.slf4j
69 |
70 |
71 | guava
72 | com.google.guava
73 |
74 |
75 | jersey-guava
76 | org.glassfish.jersey.bundles.repackaged
77 |
78 |
79 | avro
80 | org.apache.avro
81 |
82 |
83 | activation
84 | javax.activation
85 |
86 |
87 | scala-reflect
88 | org.scala-lang
89 |
90 |
91 | scala-parser-combinators_2.11
92 | org.scala-lang.modules
93 |
94 |
95 | commons-codec
96 | commons-codec
97 |
98 |
99 |
100 |
101 |
102 | org.apache.spark
103 | spark-sql-kafka-0-10_${scala.compat.version}
104 | ${spark.version}
105 |
106 |
107 |
108 |
109 | com.redislabs
110 | spark-redis_${scala.compat.version}
111 | 2.4.2
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 | org.apache.maven.plugins
120 | maven-shade-plugin
121 | 2.4.3
122 |
123 |
124 | package
125 |
126 | shade
127 |
128 |
129 |
130 |
131 | com.google.common.collect
132 | shade.com.google.common.collect
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 | *:*
142 |
143 | META-INF/*.SF
144 | META-INF/*.DSA
145 | META-INF/*.RSA
146 |
147 |
148 |
149 |
150 |
151 |
152 |
153 |
154 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/com/redislabs/provider/redis/ConnectionPool.scala:
--------------------------------------------------------------------------------
1 | package com.redislabs.provider.redis
2 |
3 | import java.util.concurrent.ConcurrentHashMap
4 |
5 | import dt.sql.alarm.core.Constants._
6 | import org.apache.spark.SparkEnv
7 | import redis.clients.jedis._
8 | import redis.clients.jedis.exceptions.JedisConnectionException
9 |
10 | import scala.collection.JavaConversions._
11 |
12 |
13 | object ConnectionPool {
14 | @transient private lazy val pools: ConcurrentHashMap[RedisEndpoint, JedisPoolAbstract] =
15 | new ConcurrentHashMap[RedisEndpoint, JedisPoolAbstract]()
16 |
17 | private lazy val sparkConf = SparkEnv.get.conf
18 |
19 | def connect(re: RedisEndpoint): Jedis = {
20 | val pool = pools.getOrElseUpdate(re,
21 | {
22 | val poolConfig: JedisPoolConfig = new JedisPoolConfig()
23 | poolConfig.setMaxTotal(250)
24 | poolConfig.setMaxIdle(32)
25 | poolConfig.setTestOnBorrow(false)
26 | poolConfig.setTestOnReturn(false)
27 | poolConfig.setTestWhileIdle(false)
28 | poolConfig.setMinEvictableIdleTimeMillis(60000)
29 | poolConfig.setTimeBetweenEvictionRunsMillis(30000)
30 | poolConfig.setNumTestsPerEvictionRun(-1)
31 | poolConfig.setMaxWaitMillis(10000)
32 |
33 | if (SPARK_REDIS_SENTINEL_MODE.equalsIgnoreCase(sparkConf.get(SPARK_REDIS_MODE, SPARK_REDIS_SENTINEL_MODE))){
34 | // 哨兵模式
35 | val master = sparkConf.get(SPARK_REDIS_MASTER, SPARK_REDIS_MASTER_DEFAULT)
36 | val sentinels = new java.util.HashSet[String]()
37 | re.host.split(",").filter(_.nonEmpty).foreach(add => sentinels.add(add))
38 |
39 | new JedisSentinelPool(master, sentinels, poolConfig, re.timeout, re.auth, re.dbNum)
40 | } else {
41 | new JedisPool(poolConfig, re.host, re.port, re.timeout, re.auth, re.dbNum)
42 | }
43 | }
44 | )
45 | var sleepTime: Int = 4
46 | var conn: Jedis = null
47 | while (conn == null) {
48 | try {
49 | conn = pool.getResource
50 | }
51 | catch {
52 | case e: JedisConnectionException if e.getCause.toString.
53 | contains("ERR max number of clients reached") => {
54 | if (sleepTime < 500) sleepTime *= 2
55 | Thread.sleep(sleepTime)
56 | }
57 | case e: Exception => throw e
58 | }
59 | }
60 | conn
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/SQLAlarmBoot.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm
2 |
3 | import dt.sql.alarm.core._
4 | import core.Constants._
5 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils, ParamsUtils}
6 |
7 | object SQLAlarmBoot {
8 |
9 | // 5 min
10 | val daemonCleanInterval = 5*60*1000L
11 |
12 | def main(args: Array[String]): Unit = {
13 |
14 | val params = new ParamsUtils(args)
15 | ConfigUtils.configBuilder(params.getParamsMap)
16 | ConfigUtils.showConf()
17 | // require(ConfigUtils.hasConfig(appName), "Application name must be set")
18 | require(ConfigUtils.hasConfig(checkpoint), s"SQLAlarm stream $checkpoint must be set")
19 | require(ConfigUtils.hasConfig(SQLALARM_SOURCES), s"SQLAlarm stream $SQLALARM_SOURCES must be set")
20 | require(ConfigUtils.hasConfig(INPUT_PREFIX), s"SQLAlarm stream $INPUT_PREFIX must be set")
21 | // require(ConfigUtils.hasConfig(SQLALARM_SINKS), s"SQLAlarm stream $SQLALARM_SINKS must be set")
22 | // require(ConfigUtils.hasConfig(OUTPUT_PREFIX), s"SQLAlarm stream $OUTPUT_PREFIX must be set")
23 |
24 | require(ConfigUtils.hasConfig(SQLALARM_SINKS) || ConfigUtils.hasConfig(SQLALARM_ALERT),
25 | s"SQLAlarm stream $SQLALARM_SINKS or $SQLALARM_ALERT must be set at least one of them")
26 |
27 | val spark = SparkRuntime.getSparkSession
28 |
29 | SparkRuntime.parseProcessAndSink(spark)
30 |
31 | var completed = false
32 | if (ConfigUtils.hasConfig(SQLALARM_ALERT)) {
33 | val partitionNum = SparkRuntime.sparkConfMap.getOrElse(Constants.redisCacheDataPartitionNum,
34 | ConfigUtils.getStringValue(Constants.redisCacheDataPartitionNum, "3")).toInt
35 |
36 | def launchCleaner = {
37 | // 启动alarm cache后台清理
38 | WowLog.logInfo("SQLAlarm cache daemon cleaner start......")
39 | var batchId:Long = 1L
40 | while ( SparkRuntime.streamingQuery != null && SparkRuntime.streamingQuery.isActive ) {
41 | spark.sparkContext.setJobGroup("SQLAlarm cache clean group", s"cache-clean-batch-$batchId", true)
42 | val rdd = RedisOperations.getListCache(ALARM_CACHE + "*", partitionNum)
43 | if (rdd.count() > 0) {
44 | import spark.implicits._
45 | val cacheRecords = rdd.map{
46 | row =>
47 | JacksonUtils.fromJson[RecordDetail](row, classOf[RecordDetail])
48 | }.toDS
49 |
50 | val results = AlarmReduce.cacheReduce(cacheRecords)
51 | AlarmAlert.push(results, true) // Force clean cache after sending
52 | }
53 | batchId = batchId + 1
54 | spark.sparkContext.clearJobGroup()
55 | Thread.sleep(daemonCleanInterval)
56 | }
57 | if ( !SparkRuntime.streamingQuery.isActive ) completed = true
58 | }
59 |
60 | new Thread("launch-cache-cleaner-in-spark-job") {
61 | setDaemon(true)
62 | override def run(): Unit = {
63 | while ( !completed ) {
64 | try {
65 | launchCleaner
66 | }catch {
67 | case e:Exception =>
68 | e.printStackTrace()
69 | }
70 | WowLog.logInfo("SQLAlarm cache daemon cleaner exited, restarted after 60 seconds!")
71 | if (!completed) Thread.sleep(60000)
72 | }
73 |
74 | }
75 | }.start()
76 |
77 | }
78 |
79 | if ( SparkRuntime.streamingQuery != null )
80 | SparkRuntime.streamingQuery.awaitTermination()
81 |
82 | // 设置completed标志为true
83 | completed = true
84 |
85 | if (!spark.sparkContext.isStopped) spark.sparkContext.stop()
86 |
87 | if (spark.sparkContext.isStopped) AlarmFlow.destroy
88 |
89 | }
90 |
91 | }
92 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/conf/AlarmPolicyConf.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.conf
2 |
3 |
4 | import dt.sql.alarm.core.Constants.ALARM_POLICY
5 | import dt.sql.alarm.core.Constants.ALARM_CACHE
6 | import tech.sqlclub.common.utils.JacksonUtils
7 |
8 | case class AlarmPolicyConf(item_id:String, window:Window, policy:Policy)
9 | case class Window(`type`: String, value:Int, unit:String, count:Int){
10 | def getTimeWindowSec = {
11 | import WindowUnit._
12 | val u = unit.unit match {
13 | case WindowUnit.m => 60
14 | case WindowUnit.h => 3600
15 | case WindowUnit.d => 86400
16 | }
17 | value * u
18 | }
19 |
20 | }
21 | case class Policy(`type`:String, unit:String, value: Double, first_alert:Int){
22 | def alertFirst = 1 == first_alert
23 |
24 | import PolicyUnit._
25 | def getValue = if (unit.isPercent) norm(value / 100.0d) else value
26 |
27 | def norm(d:Double) = {
28 | d match {
29 | case x if x>=1 => 1.0d
30 | case x if x<=0 => 0.0d
31 | case _ => d
32 | }
33 | }
34 | }
35 |
36 |
37 | object WindowType extends Enumeration{
38 | implicit class WindowTypeString(s:String){
39 | def windowType:Value = WindowType.withName(s)
40 | def isTime:Boolean = time == windowType
41 | def isNumber:Boolean = number == windowType
42 | def isTimeCount:Boolean = timeCount == windowType
43 | }
44 | type Type = Value
45 | val time,number,timeCount = Value
46 | }
47 |
48 | object WindowUnit extends Enumeration{
49 | implicit class WindowUnitString(s:String){
50 | def unit:Value = WindowUnit.withName(s)
51 | }
52 | type Type = Value
53 | val m,h,d,n = Value
54 | }
55 |
56 | object PolicyType extends Enumeration{
57 | implicit class PolicyTypeString(s:String){
58 | def policyType:Value = PolicyType.withName(s)
59 | def isAbsolute:Boolean = absolute == policyType
60 | def isScale:Boolean = scale == policyType
61 | }
62 | type Type = Value
63 | val absolute,scale = Value
64 | }
65 |
66 | object PolicyUnit extends Enumeration{
67 | type Type = Value
68 | implicit class PolicyUnitString(s:String){
69 | def unit:Value = PolicyUnit.withName(s)
70 | def isPercent:Boolean = percent == unit
71 | }
72 | val number,percent = Value
73 | }
74 |
75 |
76 | object AlarmPolicyConf {
77 |
78 | def getRkey(source:String, topic:String) = List(ALARM_POLICY, source, topic).mkString(":")
79 |
80 | def getCacheKey(itemId:String) = List(ALARM_CACHE,itemId).mkString(":")
81 |
82 | def getCacheKey(itemId:String, jobId:String) = List(ALARM_CACHE,itemId,jobId).mkString(":")
83 |
84 | def getCacheKey(itemId:String, jobId:String, jobStat:String) = List(ALARM_CACHE, itemId, jobId, jobStat).mkString(":")
85 |
86 | def formJson(json:String) = JacksonUtils.fromJson[AlarmPolicyConf](json, classOf[AlarmPolicyConf])
87 |
88 | def prettyString(policyConf: AlarmPolicyConf): String = JacksonUtils.prettyPrint(policyConf)
89 |
90 |
91 | def main(args: Array[String]): Unit = {
92 |
93 | val d = Policy("", "number", 30, 1).getValue
94 |
95 | val s =
96 | """
97 | |{
98 | | "item_id" : "1222",
99 | | "window": {
100 | | "type": "time",
101 | | "value": 10,
102 | | "unit": "m"
103 | | },
104 | | "policy":{
105 | | "type":"absolute"
106 | | }
107 | |}
108 | """.stripMargin
109 |
110 | println(s)
111 |
112 | val policy = JacksonUtils.fromJson(s, classOf[AlarmPolicyConf])
113 |
114 | println(policy.window.`type`)
115 |
116 | println(policy)
117 | }
118 |
119 | /*
120 |
121 | {
122 | "item_id":"1222",
123 | "window":{
124 | "type":"time/number/timeCount",
125 | "value":10,
126 | "unit":"m/h/d/n",
127 | "count":0
128 | },
129 | "policy":{
130 | "type":"absolute/scale",
131 | "unit":"number/percent",
132 | "value":0.9/100,
133 | "first_alert": 0
134 | }
135 | }
136 |
137 | */
138 | }
139 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/conf/AlarmRuleConf.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.conf
2 |
3 | import dt.sql.alarm.core.Constants.ALARM_RULE
4 | import tech.sqlclub.common.utils.JacksonUtils
5 |
6 | case class AlarmRuleConf(item_id:String, platform:String, title:String, source:Source, filter:Filter)
7 | case class Source(`type`:String, topic:String)
8 | case class Filter(table:String, structure:Array[Field], sql:String)
9 | case class Field(name:String, `type`:String, xpath:String)
10 |
11 | object AlarmRuleConf {
12 | def getRkey(source:String, topic:String) = List(ALARM_RULE, source, topic).mkString(":")
13 |
14 | def toJson(ruleConf: AlarmRuleConf) = JacksonUtils.toJson(ruleConf)
15 |
16 | def formJson(json:String) = JacksonUtils.fromJson[AlarmRuleConf](json, classOf[AlarmRuleConf])
17 |
18 | def prettyString(ruleConf: AlarmRuleConf): String = JacksonUtils.prettyPrint(ruleConf)
19 |
20 |
21 | def main(args: Array[String]): Unit = {
22 | println(prettyString(AlarmRuleConf("1222","alarm","sql alarm",
23 | Source("kafka", " sqlalarm_event"),
24 | Filter("error_job",
25 | Array(Field("job_id","string","$.jobid")),
26 | "select jobid from sqlalarm_event"
27 | )
28 | )
29 | ))
30 |
31 | }
32 |
33 | /*
34 | {
35 | "item_id" : "1222",
36 | "platform" : "alarm",
37 | "title" : "sql alarm",
38 | "source" : {
39 | "type" : "kafka",
40 | "topic" : " sqlalarm_event"
41 | },
42 | "filter" : {
43 | "table" : "error_job",
44 | "structure" : [ {
45 | "name" : "job_id",
46 | "type" : "string",
47 | "xpath" : "$.jobid"
48 | } ],
49 | "sql" : "select jobid from sqlalarm_event"
50 | }
51 | }
52 | */
53 | }
54 |
55 |
56 |
57 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/conf/Conf.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.conf
2 |
3 | /**
4 | * 配置接口
5 | * Created by songgr on 2019/12/20.
6 | */
7 | trait Conf
8 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/conf/JdbcConf.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.conf
2 |
3 | /**
4 | *
5 | * Created by songgr on 2020/01/07.
6 | */
7 | case class JdbcConf(
8 | url:String, // jdbc url
9 | driver:String, // jdbc 驱动类
10 | user:String, // jdbc 用户名
11 | password:String, // jdbc 密码
12 | var dbtable:String = "sqlalarm_records_log", // jdbc 表名
13 | var numPartitions:Int = 8, // 表写入可用于并行处理的最大分区数
14 | var batchsize:Int = 1000, // JDBC批处理大小,它确定每次往返要插入多少行。这可以帮助提高JDBC驱动程序的性能
15 | var mode:String = "append" // jdbc 表写入模式
16 | ) extends Conf
17 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/conf/KafkaConf.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.conf
2 |
3 | import dt.sql.alarm.input.Constants.SubscribeType.SubscribeType
4 |
5 | /**
6 | *
7 | * Created by songgr on 2019/12/25.
8 | */
9 | case class KafkaConf(
10 | subscribeType:SubscribeType,
11 | topic:String,
12 | servers:String,
13 | group:String) extends Conf
14 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/conf/RedisConf.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.conf
2 |
3 | /**
4 | *
5 | * Created by songgr on 2020/01/09.
6 | */
7 | case class RedisConf(
8 | keys:String, // redis流的key值,多个逗号分隔
9 | start_offsets:String, // redis流的起始offset
10 | group:String, // redis流消费组
11 | consumer_prefix:String, // redis流消费者前缀
12 | var parallelism:Int = 4, // redis流处理并行度
13 | var batch_size:Int = 200, // redis流批次数据量大小
14 | var read_block_msec:Long = 1000L // redis流批次等待时间毫秒值
15 | ) extends Conf
16 |
17 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/AlarmAlert.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import dt.sql.alarm.conf.AlarmPolicyConf
4 | import dt.sql.alarm.reduce.EngineResult
5 | import tech.sqlclub.common.log.Logging
6 |
7 | object AlarmAlert extends Logging {
8 |
9 | def push(results:Array[EngineResult], forceCleanCache:Boolean = false) : Unit = {
10 | results.filter(_.hasWarning).foreach {
11 | result =>
12 | val recordDetail = result.lastAlarmRecord
13 | val firstEventTime = result.firstAlarmRecord.event_time
14 | val count = result.reduceCount
15 | WowLog.logInfo(s"this moment the record has warning! Agg count: $count")
16 | // forceCleanCache 参数为了处理 首次不告警但过期的告警记录 仅当存在一条这种情况的时候强制删除缓存
17 | if ( send(AlarmRecord.as(recordDetail), firstEventTime, count) && (count >1 || forceCleanCache) ) {
18 | val key = AlarmPolicyConf.getCacheKey(recordDetail.item_id, recordDetail.job_id, recordDetail.job_stat)
19 | RedisOperations.delCache(key)
20 | WowLog.logInfo(s"agg over, del the cache! key: $key")
21 | }
22 | }
23 |
24 | }
25 |
26 | def send(alarmRecord: AlarmRecord, firstTime:String, count:Int):Boolean = {
27 | logInfo("Alarm record call send api...")
28 | true
29 | }
30 |
31 | case class AlarmRecord(
32 | job_id:String,
33 | job_stat:String,
34 | event_time:String,
35 | message:String,
36 | context:String, // map string
37 | title:String,
38 | platform:String,
39 | item_id:String
40 | )
41 |
42 | object AlarmRecord {
43 | def as(recordDetail: RecordDetail) = AlarmRecord(
44 | recordDetail.job_id,
45 | recordDetail.job_stat,
46 | recordDetail.event_time,
47 | recordDetail.message,
48 | recordDetail.context,
49 | recordDetail.title,
50 | recordDetail.platform,
51 | recordDetail.item_id
52 | )
53 | }
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/AlarmFlow.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import java.util.concurrent._
4 | import java.util
5 | import java.util.UUID
6 |
7 | import Constants._
8 | import dt.sql.alarm.conf.{AlarmPolicyConf, AlarmRuleConf}
9 | import dt.sql.alarm.core.Constants.SQLALARM_ALERT
10 | import tech.sqlclub.common.log.Logging
11 | import tech.sqlclub.common.utils.ConfigUtils
12 | import org.apache.spark.sql.{Dataset, Row, SparkSession}
13 | import tech.sqlclub.common.exception.SQLClubException
14 |
15 | object AlarmFlow extends Logging {
16 |
17 | def taskNum:Int = SparkRuntime.sparkConfMap.getOrElse( futureTasksThreadPoolSize,
18 | ConfigUtils.getStringValue(futureTasksThreadPoolSize, "2")).toInt
19 |
20 | lazy private val executors = Executors.newFixedThreadPool(taskNum)
21 | lazy private val taskList = new util.ArrayList[Future[Unit]](taskNum)
22 | lazy private val taskTimeOut = SparkRuntime.sparkConfMap.getOrElse(futureTaskTimeOut,
23 | ConfigUtils.getStringValue(futureTaskTimeOut, "300000")).toLong // Default timeout 5 min
24 |
25 | def run(batchId:Long, data:Dataset[Row])
26 | (filterFunc: (Dataset[Row], AlarmRuleConf, AlarmPolicyConf) => Dataset[RecordDetail])
27 | (sinkFunc: Dataset[RecordDetail] => Unit)
28 | (alertFunc: (Dataset[RecordDetail], AlarmPolicyConf) => Unit)
29 | (implicit spark:SparkSession = data.sparkSession):Unit = {
30 |
31 | WowLog.logInfo("Alarm flow start....")
32 |
33 | val groupId = nextGroupId
34 | val jobName = s"SQLAlarm-batch-$batchId"
35 | spark.sparkContext.setJobGroup(groupId, jobName, true)
36 |
37 | import spark.implicits._
38 | val tableIds = data.groupBy(s"${RecordDetail.source}", s"${RecordDetail.topic}").count().map{
39 | row =>
40 | (row.getAs[String](s"${RecordDetail.source}"), row.getAs[String](s"${RecordDetail.topic}"), row.getAs[Long]("count"))
41 | }.collect()
42 |
43 | WowLog.logInfo(s"batch info (source, topic, count):\n${tableIds.mkString("\n")}")
44 |
45 | if (tableIds.isEmpty) {
46 | WowLog.logInfo("batch tableIds is empty return directly!")
47 | return
48 | }
49 |
50 | val rulesWithItemId:Array[(String,AlarmRuleConf)] = tableIds.flatMap{
51 | case (source, topic, _) =>
52 | val key = AlarmRuleConf.getRkey(source, topic) // rule redis key
53 | RedisOperations.getTableCache(Array(key)).collect() // get rules
54 | }.map{
55 | case (ruleConfId, ruleConf) =>
56 | (ruleConfId, AlarmRuleConf.formJson(ruleConf))
57 | }
58 |
59 | if (rulesWithItemId.isEmpty){
60 | WowLog.logInfo("alarm rule confs is empty return directly!")
61 | return
62 | }
63 |
64 | rulesWithItemId.filter(null != _._2).foreach{
65 | item =>
66 | val rule = item._2 // 告警规则
67 | val policyConf = RedisOperations.getTableCache(AlarmPolicyConf.getRkey(rule.source.`type`, rule.source.topic), rule.item_id)
68 | val policy = if(policyConf != null && policyConf.nonEmpty) AlarmPolicyConf.formJson(policyConf) else null //告警策略
69 |
70 | try {
71 | // sql filter
72 | WowLog.logInfo("AlarmFlow table filter...")
73 | val filterTable = filterFunc(data, rule, policy)
74 | WowLog.logInfo("AlarmFlow table filter pass!")
75 |
76 |
77 | sinkAndAlert(filterTable, sinkFunc, alertFunc){
78 | () =>
79 | val tasks = taskList.iterator()
80 | WowLog.logInfo(s"We will run ${taskList.size()} tasks...")
81 | while (tasks.hasNext){
82 | val task = tasks.next()
83 | val result = runTask(task)
84 | if (result._1) {
85 | tasks.remove()
86 | } else {
87 | killBatchJob(spark, groupId, jobName)
88 | throw result._2.get
89 | }
90 | }
91 | WowLog.logInfo(s"All task completed! Current task list number is: ${taskList.size()}.")
92 | }(rule, policy)
93 | } catch {
94 | case e:SQLClubException =>
95 | logError(e.getMessage, e)
96 | }
97 | }
98 | WowLog.logInfo("Alarm flow end!")
99 | }
100 |
101 | def killBatchJob(spark:SparkSession, groupId:String, jobName: String) = {
102 | logInfo(s"Try to kill batch job: $groupId, job name: $jobName.")
103 | spark.sparkContext.cancelJobGroup(groupId)
104 | logInfo(s"Batch job: $groupId killed! Job name: $jobName.")
105 | }
106 |
107 | def nextGroupId = UUID.randomUUID().toString
108 |
109 | def sinkAndAlert(filterTable:Dataset[RecordDetail],
110 | sinkFunc:Dataset[RecordDetail]=>Unit,
111 | alertFunc:(Dataset[RecordDetail],AlarmPolicyConf)=>Unit)(run:()=>Unit)
112 | (implicit ruleConf: AlarmRuleConf, policyConf: AlarmPolicyConf): Unit ={
113 | try {
114 | filterTable.persist()
115 | if (filterTable.count() == 0) {
116 | WowLog.logInfo("filterTable is empty, don't need to run sink and alert functions return directly!")
117 | return
118 | }
119 |
120 | // alarm data sink
121 | if (ConfigUtils.hasConfig(SQLALARM_SINKS)) {
122 | val sinkTask = executors.submit(new Callable[Unit] {
123 | override def call(): Unit ={
124 | WowLog.logInfo("AlarmFlow table sink...")
125 | sinkFunc(filterTable)
126 | WowLog.logInfo("AlarmFlow table sink task will be executed in the future!")
127 | }
128 | })
129 | taskList.add(sinkTask)
130 | }
131 |
132 | // alarm record alert
133 | if (ConfigUtils.hasConfig(SQLALARM_ALERT)){
134 |
135 | val alertTask = executors.submit(new Callable[Unit] {
136 | override def call(): Unit ={
137 | WowLog.logInfo("AlarmFlow table alert...")
138 | alertFunc(filterTable, policyConf)
139 | WowLog.logInfo("AlarmFlow table alert task will be executed in the future!")
140 | }
141 | })
142 | taskList.add(alertTask)
143 | }
144 | run()
145 | }finally {
146 | filterTable.unpersist()
147 | }
148 | }
149 |
150 | def runTask( task:Future[Unit] ): (Boolean, Option[SQLClubException]) = {
151 | if (task != null && !task.isDone) {
152 | try {
153 | task.get(taskTimeOut, TimeUnit.MILLISECONDS)
154 | } catch {
155 | case e if e.isInstanceOf[InterruptedException] || e.isInstanceOf[ExecutionException] =>
156 | logError(e.getMessage, e)
157 | case e: TimeoutException =>
158 | logWarning(e.getMessage, e)
159 | return (false, Some(new SQLClubException(e.getMessage, e)))
160 | }
161 | }
162 | (true, None)
163 | }
164 |
165 | def destroy = {
166 | if (executors != null) {
167 | import scala.collection.JavaConverters._
168 | val unfinishedTasks = taskList.asScala.filterNot(_.isDone).asJava
169 | WowLog.logInfo(s"There are ${unfinishedTasks.size} outstanding tasks to be executed...")
170 | val tasks = unfinishedTasks.iterator()
171 | while (tasks.hasNext){
172 | val task = tasks.next()
173 | val result = runTask(task)
174 | if (result._1) {
175 | tasks.remove()
176 | } else {
177 | throw result._2.get
178 | }
179 | }
180 | WowLog.logInfo(s"All task completed! Current task list number is: ${unfinishedTasks.size()}.")
181 | if (!executors.isShutdown) executors.shutdownNow()
182 | }
183 | }
184 |
185 | }
186 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/AlarmReduce.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import dt.sql.alarm.conf.AlarmPolicyConf
4 | import tech.sqlclub.common.log.Logging
5 | import org.apache.spark.sql.Dataset
6 | import dt.sql.alarm.reduce.PolicyAnalyzeEngine
7 | import dt.sql.alarm.reduce.engine._
8 | import tech.sqlclub.common.utils.JacksonUtils
9 | import org.apache.spark.sql.functions._
10 | import dt.sql.alarm.core.Constants._
11 | import dt.sql.alarm.reduce.EngineResult
12 | import RecordDetail.{item_id, job_id, _}
13 | import org.apache.spark.sql.expressions.Window
14 | import dt.sql.alarm.conf._
15 | import dt.sql.alarm.conf.PolicyType._
16 | import dt.sql.alarm.conf.WindowType._
17 | import dt.sql.alarm.conf.PolicyUnit._
18 | import tech.sqlclub.common.exception.SQLClubException
19 |
20 | /**
21 | *
22 | * Created by songgr on 2019/12/25.
23 | */
24 | object AlarmReduce extends Logging {
25 |
26 | // RecordDetail all fields
27 | lazy val fields = RecordDetail.getAllFieldName.flatMap(field=> List(lit(field), col(field)) )
28 |
29 | def reduce(data:Dataset[RecordDetail], policy: AlarmPolicyConf): Array[EngineResult] = {
30 | val spark = data.sparkSession
31 | val engine = getPolicyAnalyzeEngine(policy.policy.`type`, policy.window.`type`, policy.policy.unit)
32 | import spark.implicits._
33 | // 获取相关key的信息
34 | val keyInfos = data.groupBy(item_id, job_id).count().map {
35 | row =>
36 | (row.getAs[String](item_id), row.getAs[String](job_id))
37 | }.collect()
38 |
39 | WowLog.logInfo("Alarm reduce starting. Dim key info: " + keyInfos.mkString("\n"))
40 |
41 | // get redis cache
42 | val cacheRdd = keyInfos.map {
43 | case (item_id, job_id) =>
44 | RedisOperations.getListCache(AlarmPolicyConf.getCacheKey(item_id, job_id) + "*")
45 | }.reduce(_ union _)
46 |
47 | val cacheRecord = cacheRdd.map{
48 | row =>
49 | JacksonUtils.fromJson[RecordDetail](row, classOf[RecordDetail])
50 | }.toDS.withColumn(SQL_FIELD_DATAFROM_NAME, lit(SQL_FIELD_CACHE_NAME)) // add dataFrom col
51 |
52 | val streamRecord = data.withColumn(SQL_FIELD_DATAFROM_NAME, lit(SQL_FIELD_STREAM_NAME)) // add dataFrom col
53 | .selectExpr(cacheRecord.columns :_*) //为了防止字段顺序不一致
54 |
55 | // 按比例聚合 不区分job_stat 只按对象分组
56 | val jobStatus = if (policy.policy.`type`.isScale) {
57 | lit("_")
58 | } else {
59 | col(job_stat)
60 | }
61 |
62 | /*
63 | root
64 | |-- job_id: string (nullable = true)
65 | |-- job_stat: string (nullable = false)
66 | |-- event_time: string (nullable = true)
67 | |-- message: string (nullable = true)
68 | |-- context: string (nullable = true)
69 | |-- title: string (nullable = true)
70 | |-- platform: string (nullable = true)
71 | |-- item_id: string (nullable = true)
72 | |-- source: string (nullable = true)
73 | |-- topic: string (nullable = true)
74 | |-- alarm: integer (nullable = false)
75 | |-- dataFrom: string (nullable = false)
76 | |-- value: string (nullable = true)
77 | */
78 |
79 | val table = streamRecord // stream data union cache data
80 | .union(cacheRecord)
81 | .withColumn(job_stat, jobStatus)
82 | .withColumn(SQL_FIELD_VALUE_NAME, to_json(map(fields: _*))) // add all fields value field
83 |
84 | // logInfo("AlarmReduce streamData.union(cacheData) schema: ")
85 | // table.printSchema()
86 |
87 | val result = engine.analyse(policy, table)
88 |
89 | val warningResults = result.filter(_.hasWarning)
90 |
91 | if (warningResults.length > 0) {
92 | WowLog.logInfo("Policy Engine Analyze hasWarning result is :")
93 | logInfo(result.filter(_.hasWarning).mkString("\n"))
94 | } else {
95 | WowLog.logInfo("Policy Engine Analyze done. Has no Warning result!")
96 | }
97 |
98 | result
99 | }
100 |
101 | def cacheReduce(data:Dataset[RecordDetail]): Array[EngineResult] = {
102 | val table = data.withColumn(SQL_FIELD_VALUE_NAME, to_json(map(fields: _*))) // add all fields value field
103 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value
104 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) )
105 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value
106 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
107 | .withColumn(SQL_FIELD_CURRENT_EVENT_TIME_NAME, first(event_time) // current event time
108 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) )
109 | .withColumn(SQL_FIELD_EARLIEST_EVENT_TIME_NAME, last(event_time) // first event time
110 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
111 | .withColumn(SQL_FIELD_RANK_NAME, row_number() // rank value
112 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) )
113 | .withColumn(SQL_FIELD_COUNT_NAME, count(lit(1)) // record count
114 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
115 |
116 |
117 | val pendingRecords = table.filter(col(SQL_FIELD_RANK_NAME) === 1).
118 | select(item_id, job_id, job_stat, SQL_FIELD_CURRENT_EVENT_TIME_NAME,SQL_FIELD_CURRENT_RECORD_NAME,
119 | SQL_FIELD_EARLIEST_EVENT_TIME_NAME,SQL_FIELD_EARLIEST_RECORD_NAME,SQL_FIELD_COUNT_NAME)
120 | // cache duration field
121 | .withColumn(SQL_FIELD_CACHE_DURATION,
122 | unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)))
123 | // cache add interval
124 | .withColumn(SQL_FIELD_CACHE_ADD_INTERVAL,
125 | (unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)))/col(SQL_FIELD_COUNT_NAME)
126 | )
127 | // cache util time
128 | .withColumn(SQL_FIELD_CACHE_UNTIL_TIME,
129 | unix_timestamp() - unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME))
130 | )
131 |
132 | val policies = RedisOperations.getTableCache(ALARM_POLICY + "*")
133 | val policyMap = policies.map(item => (item._1, AlarmPolicyConf.formJson(item._2))).collect().toMap
134 | pendingRecords.collect().map {
135 | row =>
136 | val itemId = row.getAs[String](item_id)
137 | val jobId = row.getAs[String](job_id)
138 | val jobStat = row.getAs[String](job_stat)
139 | val untilTime = row.getAs[Long](SQL_FIELD_CACHE_UNTIL_TIME)
140 | val cacheAddInterval = row.getAs[Double](SQL_FIELD_CACHE_ADD_INTERVAL)
141 | val count = row.getAs[Long](SQL_FIELD_COUNT_NAME)
142 | val key = AlarmPolicyConf.getCacheKey(itemId, jobId, jobStat)
143 | val policyConf = policyMap.get(itemId)
144 | if (policyConf.isDefined) {
145 | val policy = policyConf.get
146 | val windowType = policy.window.`type`.windowType
147 | val policyType = policy.policy.`type`.policyType
148 | val overWindow = windowType match {
149 | case WindowType.time | WindowType.timeCount =>
150 | untilTime > policy.window.getTimeWindowSec * 1.2 // 乘1.2 为了和主线岔开, 有几率和主线相交
151 | case WindowType.number =>
152 | untilTime > cacheAddInterval * count * 1.2
153 |
154 | }
155 | if (overWindow) {
156 | (policyType, windowType) match {
157 | // 按比例聚合 时间+次数聚合 这两种超出窗口了直接清除不需要push
158 | case (PolicyType.scale, _) =>
159 | WowLog.logInfo(s"the cache has not been merged for a long time, the cache is useless, del it! key: $key")
160 | RedisOperations.delCache(key)
161 | EngineResult(false, null, null, -1)
162 | case (PolicyType.absolute, WindowType.timeCount) =>
163 | if (count >= policy.window.count) {
164 | WowLog.logInfo(s"the record cache has warning and merged by daemon clean server. Agg count: $count, key: $key.")
165 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail])
166 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail])
167 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue())
168 | } else {
169 | WowLog.logInfo(s"the cache has not been merged for a long time, the cache is useless, del it! key: $key")
170 | RedisOperations.delCache(key)
171 | EngineResult(false, null, null, -1)
172 | }
173 | // 按时间聚合 次数聚合 这两种超出窗口需要把历史聚合后push
174 | case (PolicyType.absolute, WindowType.time) | (PolicyType.absolute, WindowType.number) =>
175 | if (count == 1 && policy.policy.alertFirst) {
176 | // 缓存仅有一条 且 第一次已告警 直接清理不需要push
177 | WowLog.logInfo(s"this alarm record has been pushed, del it! key:$key")
178 | RedisOperations.delCache(key)
179 | EngineResult(false, null, null, -1)
180 | } else {
181 | WowLog.logInfo(s"the record cache has warning and merged by daemon clean server. Agg count: $count, key: $key.")
182 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail])
183 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail])
184 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue())
185 | }
186 | }
187 | } else {
188 | WowLog.logInfo(s"the record cache is under window, ignore it! key: $key.")
189 | // 没超过窗口 不聚合告警
190 | EngineResult(false, null, null, -1)
191 | }
192 | } else {
193 | // 没有匹配的聚合策略 删除key
194 | logWarning(s"has no policy, ignore it! del the key: $key.")
195 | RedisOperations.delCache(key)
196 | EngineResult(false, null, null, -1)
197 | }
198 | }
199 |
200 | }
201 |
202 | def getPolicyAnalyzeEngine(policyType:String, windowType:String, policyUnit: String):PolicyAnalyzeEngine = {
203 | (policyType.policyType, windowType.windowType) match {
204 | case (PolicyType.absolute, windowType) => {
205 | val window = windowType match {
206 | case WindowType.number => NumberWindow
207 | case WindowType.time => TimeWindow
208 | case WindowType.timeCount => TimeCountWindow
209 | }
210 | new ReduceByWindow(window)
211 | }
212 | case (PolicyType.scale, WindowType.number) => {
213 | if (policyUnit.isPercent) {
214 | new ReduceByNumScale(Percent)
215 | } else {
216 | new ReduceByNumScale(Number)
217 | }
218 | }
219 | case (PolicyType.scale, WindowType.time) => {
220 | if (policyUnit.isPercent) {
221 | new ReduceByTimeScale(Percent)
222 | } else {
223 | new ReduceByTimeScale(Number)
224 | }
225 | }
226 | case _ =>
227 | throw new SQLClubException(s"Unsupported policyAnalyzeEngine type! windowType:$windowType, policyType:$policyType, policyUnit:$policyUnit.")
228 | }
229 | }
230 |
231 | }
232 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/Base.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import dt.sql.alarm.conf.Conf
4 | import org.apache.spark.sql.SparkSession
5 |
6 | trait Base {
7 | /**
8 | * 配置检查
9 | */
10 | protected[this] def checkConfig:Option[Conf]
11 |
12 | /**
13 | * 数据处理
14 | * @param session SparkSession
15 | */
16 | protected[this] def process(session: SparkSession)
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/Constants.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | object Constants {
4 |
5 | val appName = "sqlalarm.name"
6 |
7 | val master = "sqlalarm.master"
8 |
9 | val checkpoint = "sqlalarm.checkpointLocation"
10 |
11 | val trigger = "spark.streaming.trigger.time.interval.msec"
12 | val futureTaskTimeOut = "spark.streaming.future.task.timeout.msec"
13 | val futureTasksThreadPoolSize = "spark.streaming.future.tasks.threadPool.size"
14 | val redisCacheDataPartitionNum = "spark.redis.cache.data.partition.num"
15 |
16 | val SQLALARM_SOURCES = "sqlalarm.sources"
17 | val SQLALARM_SINKS = "sqlalarm.sinks"
18 | val SQLALARM_ALERT = "sqlalarm.alert"
19 |
20 | val INPUT_PREFIX = "sqlalarm.input"
21 | val OUTPUT_PREFIX = "sqlalarm.output"
22 |
23 | val ALARM_RULE = "sqlalarm_rule"
24 | val ALARM_CACHE = "sqlalarm_cache"
25 | val ALARM_POLICY = "sqlalarm_policy"
26 |
27 |
28 | // SQL field name
29 | val SQL_FIELD_TOPIC_NAME = "topic"
30 | val SQL_FIELD_SOURCE_NAME = "source"
31 | val SQL_FIELD_VALUE_NAME = "value"
32 | val SQL_FIELD_EARLIEST_RECORD_NAME = "earliest_record"
33 | val SQL_FIELD_CURRENT_RECORD_NAME = "current_record"
34 | val SQL_FIELD_EARLIEST_EVENT_TIME_NAME = "earliest_event_time"
35 | val SQL_FIELD_CURRENT_EVENT_TIME_NAME = "current_event_time"
36 | val SQL_FIELD_DATAFROM_NAME = "dataFrom"
37 | val SQL_FIELD_CACHE_NAME = "cache"
38 | val SQL_FIELD_STREAM_NAME = "stream"
39 | val SQL_FIELD_RANK_NAME = "rank"
40 | val SQL_FIELD_MAXRANK_NAME = "maxRank"
41 | val SQL_FIELD_COUNT_NAME = "count"
42 | val SQL_FIELD_TOTAL_COUNT_NAME = "total_count"
43 | val SQL_FIELD_ALARM_COUNT_NAME = "alarm_count"
44 | val SQL_FIELD_ALARM_PERCENT_NAME = "alarm_percent"
45 | val SQL_FIELD_EVENT_TIME_DURATION_NAME = "event_time_duration"
46 |
47 | val SQL_FIELD_CACHE_DURATION = "cache_duration"
48 | val SQL_FIELD_CACHE_ADD_INTERVAL = "cache_add_interval"
49 | val SQL_FIELD_CACHE_UNTIL_TIME = "cache_until_time"
50 |
51 | val SPARK_REDIS_MODE = "spark.redis.mode"
52 | val SPARK_REDIS_MASTER = "spark.redis.master"
53 | val SPARK_REDIS_MASTER_DEFAULT = "mymaster"
54 | val SPARK_REDIS_SENTINEL_MODE = "sentinel"
55 | val SPARK_REDIS_SINGLE_MODE = "single"
56 | }
57 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/RecordDetail.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import org.apache.spark.sql.types._
4 | /**
5 | *
6 | * Created by songgr on 2019/12/25.
7 | */
8 | case class RecordDetail(
9 | job_id:String,
10 | job_stat:String,
11 | event_time:String,
12 | message:String,
13 | context:String, // map string
14 | title:String,
15 | platform:String,
16 | item_id:String,
17 | source:String,
18 | topic:String,
19 | alarm:Int // is alarm
20 | )
21 |
22 | object RecordDetail {
23 | val job_id = "job_id"
24 | val job_stat = "job_stat"
25 | val event_time = "event_time"
26 | val message = "message"
27 | val context = "context"
28 | val title = "title"
29 | val platform = "platform"
30 | val item_id = "item_id"
31 | val source = "source"
32 | val topic = "topic"
33 | val alarm = "alarm"
34 |
35 | // sql必须字段
36 | def getAllSQLFieldName = Seq[String](job_id, job_stat, event_time, message, context)
37 |
38 | // 后台自动加入的字段
39 | def getAllBackFieldName = Seq[String](title, platform, item_id, source, topic, alarm)
40 |
41 | def getAllFieldName = getAllSQLFieldName ++ getAllBackFieldName
42 |
43 | def getAllFieldSchema = StructType(Seq(
44 | StructField(job_id, StringType),
45 | StructField(job_stat, StringType),
46 | StructField(event_time, StringType),
47 | StructField(message, StringType),
48 | StructField(context, StringType),
49 | StructField(title, StringType),
50 | StructField(platform, StringType),
51 | StructField(item_id, StringType),
52 | StructField(source, StringType),
53 | StructField(topic, StringType),
54 | StructField(alarm, IntegerType)
55 | ))
56 | }
57 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/Sink.java:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core;
2 |
3 | /**
4 | * Created by songgr on 2019/12/25.
5 | */
6 | import java.lang.annotation.ElementType;
7 | import java.lang.annotation.Retention;
8 | import java.lang.annotation.RetentionPolicy;
9 | import java.lang.annotation.Target;
10 |
11 | @Target(ElementType.TYPE)
12 | @Retention(RetentionPolicy.RUNTIME)
13 | public @interface Sink {
14 | String name();
15 | }
16 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/Source.java:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core;
2 |
3 | /**
4 | * Created by songgr on 2019/12/23.
5 | */
6 |
7 | import java.lang.annotation.ElementType;
8 | import java.lang.annotation.Retention;
9 | import java.lang.annotation.RetentionPolicy;
10 | import java.lang.annotation.Target;
11 |
12 | @Target(ElementType.TYPE)
13 | @Retention(RetentionPolicy.RUNTIME)
14 | public @interface Source {
15 | String name();
16 | }
17 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/SparkRuntime.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import org.apache.spark.{SparkConf, SparkContext, SparkEnv}
4 | import dt.sql.alarm.input.SourceInfo
5 | import Constants._
6 | import dt.sql.alarm.filter.SQLFilter
7 | import dt.sql.alarm.output.SinkInfo
8 | import dt.sql.alarm.reduce.EngineResult
9 | import org.apache.spark.rdd.RDD
10 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession}
11 | import tech.sqlclub.common.log.Logging
12 | import tech.sqlclub.common.utils.ConfigUtils
13 | import org.apache.spark.sql.streaming.{StreamingQuery, Trigger}
14 | import tech.sqlclub.common.exception.SQLClubException
15 | import scala.collection.JavaConverters._
16 |
17 | object SparkRuntime extends Logging {
18 | private var sparkSession :SparkSession = null
19 | var sparkConfMap:Map[String,String] = null
20 | var streamingQuery:StreamingQuery = null
21 |
22 | def getSparkSession:SparkSession = {
23 | if (sparkSession == null) {
24 | this.synchronized {
25 | if (sparkSession == null) {
26 | WowLog.logInfo("create Spark Runtime....")
27 | val params = ConfigUtils.toStringMap
28 | val conf = new SparkConf()
29 | params.filter(f =>
30 | f._1.startsWith("spark.") ||
31 | f._1.startsWith("hive.")
32 | ).foreach { f =>
33 | conf.set(f._1, f._2)
34 | }
35 | if (ConfigUtils.hasConfig(appName)) {
36 | conf.setAppName(ConfigUtils.getStringValue(appName))
37 | }
38 | if (ConfigUtils.hasConfig(master)) {
39 | conf.setMaster(ConfigUtils.getStringValue(master))
40 | }
41 | sparkSession = SparkSession.builder().config(conf).getOrCreate()
42 | sparkConfMap = sparkSession.conf.getAll
43 | WowLog.logInfo("Spark Runtime created!!!")
44 | }
45 | }
46 | }
47 | sparkSession
48 | }
49 |
50 | def parseProcessAndSink(spark:SparkSession) = {
51 | WowLog.logInfo("spark parse process and sink start...")
52 | val sources = getSourceTable(spark)
53 | WowLog.logInfo("spark stream get all source table succeed!")
54 | logInfo("All source data schema: ")
55 | sources.printSchema()
56 | val dStreamWriter = sources.writeStream.foreachBatch{
57 | (batchTable, batchId) =>
58 | WowLog.logInfo(s"start processing batch: $batchId")
59 | val start = System.nanoTime()
60 | AlarmFlow.run(batchId, batchTable){
61 | // filterFunc
62 | (table, rule, policy) =>
63 | val filterTable = SQLFilter.process(table, rule, policy)
64 | import spark.implicits._
65 | filterTable.as[RecordDetail]
66 | }{
67 | // sinkFunc
68 | table =>
69 | sinks.foreach(_ process table.filter(_.alarm == 1) )
70 | }{
71 | // alertFunc
72 | (table, policy)=>
73 | val alarmRecords = if (null != policy) {
74 | AlarmReduce.reduce(table, policy) // alarm noise reduction
75 | } else {
76 | // 没配置策略每条都push
77 | table.collect().map{
78 | record =>
79 | EngineResult(true, record, record, 1)
80 | }
81 | }
82 | AlarmAlert.push(alarmRecords) // alarm alert
83 | }
84 | val end = System.nanoTime()
85 | WowLog.logInfo(s"bath $batchId processing is done. Total time consuming: ${(end-start)/1000000} ms.")
86 | }
87 |
88 | streamingQuery = dStreamWriter
89 | .queryName(ConfigUtils.getStringValue(appName))
90 | .option("checkpointLocation", ConfigUtils.getStringValue(checkpoint))
91 | .trigger(Trigger.ProcessingTime(sparkConfMap.getOrElse(trigger,
92 | ConfigUtils.getStringValue(trigger, "3000")).toLong)) // 默认3s
93 | .start()
94 | }
95 |
96 | private lazy val sinks = getSinks
97 |
98 | def getSinks = {
99 | val sinks = ConfigUtils.getStringValue(SQLALARM_SINKS)
100 | val sinkNames = sinks.split(",").filterNot(_.isEmpty)
101 |
102 | assert(sinkNames.filterNot(SinkInfo.sinkExist(_)).size == 0,
103 | s"Check the configuration of sink, at present only supported: ${SinkInfo.getAllSink}"
104 | )
105 | sinkNames.map(SinkInfo.getSink(_))
106 | }
107 |
108 | def getSourceTable(spark:SparkSession) = {
109 | val sources_ = ConfigUtils.getStringValue(SQLALARM_SOURCES)
110 |
111 | val sourceNames = sources_.split(",").filterNot(_.isEmpty)
112 |
113 | assert(sourceNames.filterNot(SourceInfo.sourceExist(_)).size == 0,
114 | s"Check the configuration of sources, at present only supported: ${SourceInfo.getAllSource}"
115 | )
116 |
117 | val sources = sourceNames.map {
118 | sourceName =>
119 | logInfo(s"spark stream create source $sourceName!")
120 | SourceInfo.getSource(sourceName).getDataSetStream(spark)
121 | }
122 | /*
123 | root
124 | |-- source: string (nullable = false)
125 | |-- topic: string (nullable = false)
126 | |-- value: string (nullable = false)
127 | */
128 | sources.filter(_ != null).reduce(_ union _)
129 | }
130 | }
131 |
132 | object RedisOperations {
133 | import redis.clients.jedis.Jedis
134 | import com.redislabs.provider.redis._
135 | import redis.clients.jedis.ScanParams
136 | import com.redislabs.provider.redis.util.ConnectionUtils
137 |
138 | lazy private val spark = SparkRuntime.getSparkSession
139 | def sc:SparkContext = spark.sparkContext
140 |
141 | lazy private val redisEndpoint = RedisConfig.fromSparkConf(SparkEnv.get.conf).initialHost
142 | lazy private val readWriteConfig = ReadWriteConfig.fromSparkConf(SparkEnv.get.conf)
143 |
144 | def IncorrectMsg = s"RedisOperations keysOrKeyPattern should be String or Array[String]"
145 |
146 | def getTableCache[T](keysOrKeyPattern: T, partitionNum:Int):RDD[(String, String)] = {
147 | keysOrKeyPattern match {
148 | case keyPattern: String => sc.fromRedisHash(keyPattern.asInstanceOf[String], partitionNum)
149 | case keys: Array[String] => sc.fromRedisHash(keys.asInstanceOf[Array[String]], partitionNum)
150 | case _ => throw new SQLClubException(IncorrectMsg)
151 | }
152 | }
153 |
154 | def getTableCache[T](keysOrKeyPattern: T):RDD[(String, String)] = getTableCache(keysOrKeyPattern, 3)
155 |
156 | def getTableCache(key: String, field:String)
157 | (implicit conn:Jedis = redisEndpoint.connect()):String = {
158 | ConnectionUtils.withConnection[String](conn) {
159 | conn =>
160 | conn.hget(key, field)
161 | }
162 | }
163 |
164 |
165 | def addTableCache(key: String, field: String, value: String)
166 | (implicit conn:Jedis = redisEndpoint.connect()): Long = {
167 | ConnectionUtils.withConnection[Long](conn) {
168 | conn =>
169 | conn.hset(key, field, value)
170 | }
171 | }
172 |
173 |
174 | def getListCache[T](keysOrKeyPattern:T, partitionNum:Int=3):RDD[String] = {
175 | keysOrKeyPattern match {
176 | case keyPattern: String => sc.fromRedisList(keyPattern.asInstanceOf[String], partitionNum)
177 | case keys: Array[String] => sc.fromRedisList(keys.asInstanceOf[Array[String]], partitionNum)
178 | case _ => throw new SQLClubException(IncorrectMsg)
179 | }
180 |
181 | }
182 |
183 | def scanListCacheKeys(keyPattern:String)
184 | (implicit conn:Jedis = redisEndpoint.connect(), config:ReadWriteConfig = readWriteConfig):Seq[String]= {
185 | ConnectionUtils.withConnection[Seq[String]](conn) {
186 | conn =>
187 | val keys = new java.util.ArrayList[String]
188 | val params = new ScanParams().`match`(keyPattern).count(config.scanCount)
189 | var cursor = "0"
190 | do {
191 | val scan = conn.scan(cursor, params)
192 | keys.addAll(scan.getResult)
193 | cursor = scan.getCursor
194 | } while (cursor != "0")
195 | keys.asScala
196 | }
197 | }
198 |
199 | def setListCache[T](key:String, data:T, saveMode: SaveMode, ttl:Int=0) = {
200 | if (SaveMode.Overwrite == saveMode) {
201 | val conn = redisEndpoint.connect()
202 | ConnectionUtils.withConnection[Long](conn) {
203 | conn =>
204 | conn.del(key)
205 | }
206 | }
207 | import spark.implicits._
208 | val rdd = data match {
209 | case rdd:RDD[String] => rdd.filter(s => s != null && s.nonEmpty).map(s => s.toString)
210 | case df:DataFrame => df.filter(_ != null).map(row => row.getAs[String](0)).rdd
211 | }
212 |
213 | sc.toRedisLIST(rdd, key, ttl)
214 | }
215 |
216 | def delCache(keys:String*)
217 | (implicit conn:Jedis = redisEndpoint.connect()): Long = {
218 | ConnectionUtils.withConnection[Long](conn) {
219 | conn =>
220 | conn.del(keys:_*)
221 | }
222 | }
223 |
224 | }
225 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/core/WowLog.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.core
2 |
3 | import tech.sqlclub.common.log.Logging
4 |
5 | object WowLog extends Logging {
6 |
7 | override def logInfo(msg: => String): Unit = {
8 | val info = s""" ###### $msg ###### """
9 | super.logInfo(info)
10 | }
11 |
12 | override def logInfo(msg: => String, throwable: Throwable): Unit = {
13 | val info = s""" ###### $msg ###### """
14 | super.logInfo(info, throwable)
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/filter/SQLFilter.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.filter
2 |
3 | import dt.sql.alarm.conf.{AlarmPolicyConf, AlarmRuleConf}
4 | import dt.sql.alarm.core.RecordDetail._
5 | import org.apache.spark.sql.functions._
6 | import org.apache.spark.sql.{DataFrame, Dataset, Row}
7 | import tech.sqlclub.common.exception.SQLClubException
8 | import tech.sqlclub.common.log.Logging
9 | import org.apache.spark.sql.types.{MapType, StringType}
10 | import dt.sql.alarm.core.Constants.SQL_FIELD_VALUE_NAME
11 | import dt.sql.alarm.core.{RedisOperations, WowLog}
12 | import org.apache.spark.sql.catalyst.plans.logical.{Project, Union}
13 |
14 | object SQLFilter extends Logging {
15 |
16 | lazy private val requireCols = getAllSQLFieldName
17 | lazy private val requireSchema = getAllFieldSchema.map(f => (f.name, f.dataType)).toMap
18 |
19 | def process(df:Dataset[Row], ruleConf:AlarmRuleConf, policy:AlarmPolicyConf):DataFrame = {
20 | val spark = df.sparkSession
21 |
22 | val source_ = ruleConf.source
23 | val structures = ruleConf.filter.structure
24 | val tableName = ruleConf.filter.table
25 | val sql = ruleConf.filter.sql.trim
26 |
27 | val fields = structures.map{
28 | field =>
29 | s"cast(get_json_object($SQL_FIELD_VALUE_NAME, '${field.xpath}') as ${field.`type`}) as ${field.name}"
30 | }
31 |
32 | val table = try {
33 | df.filter( col(source) === source_.`type` and col(topic) === source_.topic ).selectExpr(fields :_*)
34 | } catch {
35 | case e:Exception => throw new SQLClubException(e.getMessage, e)
36 | }
37 |
38 | logInfo(s"rule item_id: ${ruleConf.item_id}, the SQLFilter SQL table [ $tableName ] schema: ")
39 | table.printSchema()
40 |
41 | table.createOrReplaceTempView(tableName)
42 |
43 | def checkSQLSyntax(sql: String): (Boolean, String) = {
44 | try {
45 | // 这只是检验sql语法
46 | val logicalPlan = spark.sessionState.sqlParser.parsePlan(sql)
47 | if (!logicalPlan.resolved) {
48 | // 这边才会按表结构去校验
49 | spark.sessionState.executePlan(logicalPlan).assertAnalyzed()
50 | (true, "")
51 | } else {
52 | (true, "")
53 | }
54 | } catch {
55 | case e:Exception =>
56 | (false, e.getMessage)
57 | }
58 | }
59 |
60 | val ck = checkSQLSyntax(sql)
61 | if (!ck._1) throw new SQLClubException(s"input filter sql error! item_id: ${ruleConf.item_id}"+ ".sql:\n" + sql + " .\n\n" + ck._2)
62 |
63 | logInfo(s"input ruleConf:[source:${source_.`type`}, topic:$topic, tableName:$tableName]. item_id: ${ruleConf.item_id}, exec SQL: $sql")
64 |
65 | val sqlPlan = spark.sql(sql).queryExecution.analyzed
66 |
67 | val sqlCols = sqlPlan.output.map{att => att.name.toLowerCase}
68 |
69 | val b = (true /: requireCols){(x,y) => x && sqlCols.contains(y)}
70 |
71 | if(!b){
72 | logError(s"rule item_id: ${ruleConf.item_id}, exec sql output cols must contains col list: " + requireCols)
73 | throw new SQLClubException("exec sql output cols error! find cols: [" + sqlCols.mkString(",") + "],requires: [" + requireCols.mkString(",") + "]!")
74 | }
75 |
76 | /*
77 | root
78 | |-- job_id: string (nullable = true)
79 | |-- job_stat: string (nullable = true)
80 | |-- event_time: string (nullable = true)
81 | |-- message: string (nullable = true)
82 | |-- context: string (nullable = true)
83 | |-- title: string (nullable = false)
84 | |-- platform: string (nullable = false)
85 | |-- item_id: string (nullable = false)
86 | |-- source: string (nullable = false)
87 | |-- topic: string (nullable = false)
88 | |-- alarm: integer (nullable = false)
89 | */
90 | val filtertab = spark.sql(sql).selectExpr(requireCols :_* ).selectExpr("*" ,
91 | s"'${ruleConf.title}' as $title",
92 | s"'${ruleConf.platform}' as $platform",
93 | s"'${ruleConf.item_id}' as $item_id",
94 | s"'${source_.`type`}' as $source",
95 | s"'${source_.topic}' as $topic"
96 | ).withColumn(context, to_json(col(context)))
97 | .withColumn(alarm, lit(1))
98 |
99 | // logInfo("SQLFilter SQL table filter result schema: ")
100 | // filtertab.printSchema()
101 |
102 | import dt.sql.alarm.conf.PolicyType._
103 | val result = if (policy != null && policy.policy.`type`.isScale){
104 |
105 | // 目前过滤sql只支持单条简单sql 可以union
106 | val project = sqlPlan match {
107 | case p if p.isInstanceOf[Union] => p.children.head.asInstanceOf[Project]
108 | case p if p.isInstanceOf[Project] => p.asInstanceOf[Project]
109 | case _ => null
110 | }
111 |
112 | if (project == null) throw new SQLClubException(s"Only supports simple SQL! item_id: ${ruleConf.item_id}"+ ". sql:\n" + sql + " .")
113 |
114 | val output = project.projectList.map(_.sql).mkString(",")
115 | val ssql = s"SELECT $output FROM $tableName"
116 |
117 | logInfo(s"rule item_id: ${ruleConf.item_id}, the simplified SQL: \n" + ssql)
118 | if (!checkSQLSyntax(ssql)._1) throw new SQLClubException(s"Simplified sql error! item_id: ${ruleConf.item_id}"+ ". Simplified sql:\n" + ssql + " .\n\n" + ck._2)
119 |
120 | val table = spark.sql(ssql)
121 | .withColumn(item_id, lit(ruleConf.item_id))
122 | .withColumn(context, to_json(col(context)))
123 |
124 | // 需要取出redis已经缓存的job数据,因为比例策略需要放入正常数据,及时当前流里的记录都是正常也需要放入相关的缓存
125 | val redisCacheKeys = RedisOperations.scanListCacheKeys(AlarmPolicyConf.getCacheKey(policy.item_id) + "*")
126 | WowLog.logInfo(s"Under the rule id: ${policy.item_id},redis caches existing keys: [" + redisCacheKeys.mkString(", ") + "]")
127 | import spark.implicits._
128 | val cacheKeys = redisCacheKeys.map{
129 | key =>
130 | val its = key.split(":")
131 | if (its.size >= 3) {
132 | (its(1), its(2))
133 | } else null
134 | }.toDF(item_id, job_id)
135 |
136 | val dimTab = filtertab.select(item_id,job_id).union(cacheKeys).groupBy(item_id, job_id).count()
137 |
138 | // Join 维表去除与当前无关的记录
139 | val pendingTab = table.join(dimTab, Seq(item_id, job_id), "inner")
140 | .join(filtertab, getAllSQLFieldName :+ item_id, "left_outer")
141 | .withColumn(alarm, when(isnull(col(alarm)), 0).otherwise(1))
142 |
143 | pendingTab.selectExpr(getAllFieldName :_*)
144 |
145 | } else {
146 | filtertab
147 | }
148 |
149 | val schema = result.schema.map{
150 | structField =>
151 | val name = structField.name
152 | val dataType = if(structField.dataType.isInstanceOf[MapType]) MapType(StringType,StringType) else structField.dataType
153 | (name, dataType)
154 | }.toMap
155 |
156 | if ( !requireSchema.equals(schema) ){
157 | throw new SQLClubException(s"the filter sql exec result schema error!item_id: ${ruleConf.item_id}, schema: ${filtertab.schema}")
158 | }
159 |
160 | // 为了过滤脏数据 if job_id and event_time is null
161 | result.filter(not(isnull(col(job_id))) and not(isnull(col(event_time)))).distinct()
162 | }
163 | }
164 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/input/BaseInput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.input
2 |
3 |
4 | import dt.sql.alarm.core.{Base, Source}
5 | import org.apache.spark.sql.{Dataset, Row, SparkSession}
6 | import org.reflections.Reflections
7 |
8 |
9 | abstract class BaseInput extends Base {
10 |
11 | def getDataSetStream(spark:SparkSession):Dataset[Row]
12 |
13 | def fullFormat: String
14 |
15 | def shortFormat: String
16 |
17 | }
18 |
19 |
20 | object SourceInfo {
21 |
22 | import scala.collection.JavaConverters._
23 | private val inputWithAnnotation = new Reflections(this.getClass.getPackage.getName)
24 | .getTypesAnnotatedWith(classOf[Source])
25 |
26 | private val sourceMapping = inputWithAnnotation.asScala.map{subclass =>
27 | val name = subclass.getAnnotation(classOf[Source]).name()
28 | (name, subclass)
29 | }.toMap[String, Class[_]]
30 |
31 |
32 | def getSource(name:String):BaseInput = sourceMapping(name).newInstance().asInstanceOf[BaseInput]
33 |
34 | def getAllSource = sourceMapping.keySet
35 |
36 | def sourceExist(name:String) = sourceMapping.contains(name)
37 |
38 | }
39 |
40 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/input/Constants.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.input
2 |
3 | object Constants {
4 | val KAFKA_TOPIC = "kafka.topic"
5 | val KAFKA_SUBSCRIBE_TOPIC_PATTERN = "kafka.subscribe.topic.pattern"
6 |
7 | object SubscribeType extends Enumeration{
8 | type SubscribeType = Value
9 | val assign = Value(0, "assign")
10 | val subscribe = Value(1, "subscribe")
11 | val subscribePattern = Value(2,"subscribePattern")
12 |
13 | override def toString(): String = {
14 | s"{0:$assign, 1:$subscribe, 2:$subscribePattern}"
15 | }
16 | }
17 |
18 | val KAFKA_SERVERS = "kafka.bootstrap.servers"
19 | val KAFKA_GROUP = "kafka.group"
20 | val KAFKA_DEFAULT_GROUP = "sqlalarm_kafka_group"
21 |
22 |
23 | val REDIS_KEYS = "redis.keys"
24 | val REDIS_GROUP = "redis.group"
25 | val REDIS_DEFAULT_GROUP = "sqlalarm_redis_group"
26 | val REDIS_START_OFFSETS = "redis.start.offsets"
27 | val REDIS_CONSUMER_PREFIX = "redis.consumer.prefix"
28 | val REDIS_STREAM_PARALLELISM = "redis.stream.parallelism"
29 | val REDIS_STREAM_BATCH_SIZE = "redis.stream.batch.size"
30 | val REDIS_STREAM_READ_BLOCK_MSEC = "redis.stream.read.block.msec"
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/input/KafkaInput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.input
2 | import org.apache.commons.lang3.StringUtils
3 | import org.apache.spark.sql.{Dataset, Row, SparkSession}
4 | import Constants._
5 | import dt.sql.alarm.conf.KafkaConf
6 | import dt.sql.alarm.core.{Source, WowLog}
7 | import tech.sqlclub.common.exception.SQLClubException
8 | import tech.sqlclub.common.log.Logging
9 | import tech.sqlclub.common.utils.ConfigUtils
10 | import dt.sql.alarm.core.Constants._
11 |
12 | /**
13 | * kafka消息输入
14 | * Created by songgr on 2019/12/20.
15 | */
16 |
17 | @Source(name = "kafka")
18 | class KafkaInput extends BaseInput with Logging {
19 | @transient private var dStream:Dataset[Row] = _
20 | val max_poll_records = 1000
21 | val startingOffsets = "latest"
22 |
23 | override def getDataSetStream(spark: SparkSession): Dataset[Row] = {
24 | process(spark)
25 | dStream
26 | }
27 |
28 | override protected[this] def checkConfig: Option[KafkaConf] = {
29 | val topic = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$KAFKA_TOPIC")
30 | val subscribeTypeIndex = ConfigUtils.getIntValue(s"$INPUT_PREFIX.$KAFKA_SUBSCRIBE_TOPIC_PATTERN", 2)
31 | val servers = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$KAFKA_SERVERS")
32 | val group = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$KAFKA_GROUP", KAFKA_DEFAULT_GROUP)
33 |
34 | val isValid = StringUtils.isNoneBlank(topic) &&
35 | StringUtils.isNoneBlank(servers) &&
36 | StringUtils.isNoneBlank(group)
37 |
38 | if (!isValid) {
39 | throw new SQLClubException(s"$KAFKA_TOPIC and $KAFKA_SERVERS are needed in kafka input conf and cant be empty!")
40 | }
41 |
42 | if (subscribeTypeIndex <0 || subscribeTypeIndex >2)
43 | throw new SQLClubException(s"$KAFKA_SUBSCRIBE_TOPIC_PATTERN must between 0 and 2. Reference:$SubscribeType")
44 |
45 | Some(KafkaConf(SubscribeType(subscribeTypeIndex), topic, servers, group))
46 | }
47 |
48 | override protected[this] def process(session: SparkSession) = {
49 | WowLog.logInfo("Alarm kafka source process....")
50 | val conf = checkConfig
51 | if (conf.isDefined) {
52 | val kafkaConf = conf.get
53 | var options = Map("kafka.bootstrap.servers" -> kafkaConf.servers,
54 | s"${kafkaConf.subscribeType}" -> kafkaConf.topic,
55 | "group.id" -> kafkaConf.group
56 | )
57 | // 默认配置
58 | options += ("startingOffsets" -> startingOffsets, "max.poll.records" -> max_poll_records.toString, "failOnDataLoss" -> "false")
59 | val lines = session.readStream
60 | .format(fullFormat)
61 | .options(options)
62 | .load()
63 |
64 | dStream = lines.selectExpr(s"'${shortFormat}' as ${SQL_FIELD_SOURCE_NAME}", s"${SQL_FIELD_TOPIC_NAME}", s"CAST(value AS STRING) as ${SQL_FIELD_VALUE_NAME}")
65 | WowLog.logInfo("Alarm kafka source process over!")
66 | }
67 |
68 | }
69 |
70 | override def fullFormat: String = shortFormat
71 |
72 | override def shortFormat: String = "kafka"
73 | }
74 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/input/RedisInput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.input
2 |
3 | import dt.sql.alarm.conf.RedisConf
4 | import dt.sql.alarm.core.{Source, WowLog}
5 | import org.apache.spark.sql.{Dataset, Row, SparkSession}
6 | import dt.sql.alarm.input.Constants._
7 | import org.apache.commons.lang3.StringUtils
8 | import org.apache.spark.sql.types.{StringType, StructField, StructType}
9 | import tech.sqlclub.common.exception.SQLClubException
10 | import tech.sqlclub.common.log.Logging
11 | import tech.sqlclub.common.utils.ConfigUtils
12 | import dt.sql.alarm.core.Constants._
13 |
14 | /**
15 | *
16 | * Created by songgr on 2019/12/20.
17 | */
18 |
19 | @Source(name = "redis")
20 | class RedisInput extends BaseInput with Logging {
21 | @transient private var dStream:Dataset[Row] = _
22 |
23 | override def getDataSetStream(spark: SparkSession): Dataset[Row] = {
24 | process(spark)
25 | dStream
26 | }
27 |
28 | /**
29 | * 配置检查
30 | */
31 | override protected[this] def checkConfig: Option[RedisConf] = {
32 | val keys = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_KEYS")
33 | val group = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_GROUP", REDIS_DEFAULT_GROUP)
34 | val offsets = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_START_OFFSETS")
35 | val consumer_prefix = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_CONSUMER_PREFIX")
36 | val parallelism = ConfigUtils.getIntValue(s"$INPUT_PREFIX.$REDIS_STREAM_PARALLELISM")
37 | val batch_size = ConfigUtils.getIntValue(s"$INPUT_PREFIX.$REDIS_STREAM_BATCH_SIZE")
38 | val block_msec = ConfigUtils.getLongValue(s"$INPUT_PREFIX.$REDIS_STREAM_READ_BLOCK_MSEC")
39 |
40 | val isValid = StringUtils.isNoneBlank(keys)
41 |
42 | if (!isValid) throw new SQLClubException(s"$REDIS_KEYS is needed in redis input conf and cant be empty!")
43 |
44 | val conf = RedisConf(keys,offsets,group,consumer_prefix)
45 | if (parallelism > 0) conf.parallelism = parallelism
46 | if (batch_size > 0) conf.batch_size = batch_size
47 | if (block_msec > 0) conf.read_block_msec = block_msec
48 |
49 | Some(conf)
50 | }
51 |
52 | /**
53 | * 数据处理
54 | *
55 | * @param session SparkSession
56 | */
57 | override protected[this] def process(session: SparkSession): Unit = {
58 | WowLog.logInfo("Alarm redis source process....")
59 | val conf = checkConfig
60 | if (conf.isDefined) {
61 | val redisConf = conf.get
62 |
63 | var options = Map("stream.keys" -> redisConf.keys,
64 | "stream.group.name" -> redisConf.group,
65 | "stream.parallelism" -> redisConf.parallelism,
66 | "stream.read.batch.size" -> redisConf.batch_size,
67 | "stream.read.block" -> redisConf.read_block_msec
68 | )
69 |
70 | if (redisConf.consumer_prefix != null && redisConf.consumer_prefix.nonEmpty)
71 | options += ("stream.consumer.prefix" -> redisConf.consumer_prefix)
72 |
73 | if (redisConf.start_offsets != null && redisConf.start_offsets.nonEmpty)
74 | options += ("stream.offsets" -> redisConf.start_offsets)
75 |
76 | val lines = session.readStream
77 | .format(fullFormat)
78 | .options(options.map(kv => (kv._1, kv._2.toString)))
79 | .schema(StructType(Array( // stream fields
80 | StructField("_id", StringType),
81 | StructField("key", StringType),
82 | StructField("value", StringType)
83 | )))
84 | .load()
85 |
86 | dStream = lines.selectExpr(s"'${shortFormat}' as ${SQL_FIELD_SOURCE_NAME}", s"CAST(key AS STRING) as ${SQL_FIELD_TOPIC_NAME}", s"CAST(value AS STRING) as ${SQL_FIELD_VALUE_NAME}")
87 | WowLog.logInfo("Alarm redis source process over!")
88 | }
89 | }
90 |
91 | override def fullFormat: String = shortFormat
92 |
93 | override def shortFormat: String = "redis"
94 | }
95 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/output/BaseOutput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.output
2 |
3 | import dt.sql.alarm.core.{RecordDetail, Base, Sink}
4 | import org.apache.spark.sql.Dataset
5 | import org.reflections.Reflections
6 |
7 | /**
8 | *
9 | * Created by songgr on 2019/12/25.
10 | */
11 | abstract class BaseOutput extends Base {
12 |
13 | def process(data:Dataset[RecordDetail])
14 |
15 | def fullFormat: String
16 |
17 | def shortFormat: String
18 |
19 | }
20 |
21 | object SinkInfo {
22 |
23 | import scala.collection.JavaConverters._
24 | private val outputwithAnnotation = new Reflections(this.getClass.getPackage.getName)
25 | .getTypesAnnotatedWith(classOf[Sink])
26 |
27 | private val sinkMapping = outputwithAnnotation.asScala.map{ subclass =>
28 | val name = subclass.getAnnotation(classOf[Sink]).name()
29 | (name, subclass)
30 | }.toMap[String, Class[_]]
31 |
32 |
33 | def getSink(name:String):BaseOutput = sinkMapping(name).newInstance().asInstanceOf[BaseOutput]
34 |
35 | def getAllSink = sinkMapping.keySet
36 |
37 | def sinkExist(name:String) = sinkMapping.contains(name)
38 |
39 | }
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/output/ConsoleOutput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.output
2 | import java.util.concurrent.atomic.AtomicBoolean
3 |
4 | import dt.sql.alarm.conf.Conf
5 | import dt.sql.alarm.core.{RecordDetail, Sink, WowLog}
6 | import tech.sqlclub.common.log.Logging
7 | import tech.sqlclub.common.utils.ConfigUtils
8 | import org.apache.spark.sql.{Dataset, SparkSession}
9 |
10 |
11 | @Sink(name = "console")
12 | class ConsoleOutput extends BaseOutput with Logging {
13 | var runtimeConfig:Map[String,String] = _
14 | var numRows = 20
15 | var truncate = true
16 | var flag = new AtomicBoolean(false)
17 | WowLog.logInfo("Console sink initialization......")
18 |
19 | override protected[this] def checkConfig: Option[Conf] = None
20 |
21 |
22 | override protected[this] def process(session: SparkSession): Unit = {
23 | if (!flag.get) {
24 | flag.synchronized {
25 | if (!flag.get) {
26 | runtimeConfig = session.conf.getAll
27 | numRows = runtimeConfig.getOrElse(Constants.showNumRows,
28 | ConfigUtils.getStringValue(Constants.showNumRows, "20")).toInt
29 | truncate = runtimeConfig.getOrElse(Constants.showTruncate,
30 | ConfigUtils.getStringValue(Constants.showTruncate, "true")).toBoolean
31 | flag.set(true)
32 | }
33 | }
34 | }
35 | }
36 |
37 | override def process(data: Dataset[RecordDetail]): Unit = {
38 | process(data.sparkSession)
39 | WowLog.logInfo("Alarm console sink process....")
40 | data.show(numRows, truncate)
41 | WowLog.logInfo("Alarm console sink process over!")
42 | }
43 |
44 | override def fullFormat: String = shortFormat
45 |
46 | override def shortFormat: String = "console"
47 | }
48 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/output/Constants.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.output
2 |
3 | object Constants {
4 |
5 | val showNumRows = "spark.show.table.numRows"
6 | val showTruncate = "spark.show.table.truncate"
7 |
8 | val jdbcUrl = "jdbc.url"
9 | val jdbcDriver = "jdbc.driver"
10 | val jdbcUser = "jdbc.user"
11 | val jdbcPassword = "jdbc.password"
12 | val jdbcTable = "jdbc.table"
13 | val jdbcImplClass = "jdbc.implClass"
14 | val jdbcNumPartitions = "jdbc.numPartitions"
15 | val jdbcBatchsize = "jdbc.batchsize"
16 | val jdbcMode = "jdbc.mode"
17 |
18 |
19 | val kafkaImplClass = "kafka.implClass"
20 | val KAFKA_ACKS = "kafka.acks"
21 | val KAFKA_KEY_SERIALIZER_CLASS = "key.serializer.class"
22 | val KAFKA_VALUE_SERIALIZER_CLASS = "value.serializer.class"
23 | val KAFKA_TOPIC = dt.sql.alarm.input.Constants.KAFKA_TOPIC
24 | val KAFKA_SERVERS = dt.sql.alarm.input.Constants.KAFKA_SERVERS
25 | }
26 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/output/JdbcOutput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.output
2 |
3 | import java.util.concurrent.atomic.AtomicBoolean
4 |
5 | import dt.sql.alarm.conf.JdbcConf
6 | import dt.sql.alarm.core.{RecordDetail, Sink, WowLog}
7 | import org.apache.spark.sql.{Dataset, SparkSession}
8 | import tech.sqlclub.common.log.Logging
9 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils}
10 | import dt.sql.alarm.core.Constants._
11 | import dt.sql.alarm.output.Constants._
12 | import org.apache.commons.lang3.StringUtils
13 | import tech.sqlclub.common.exception.SQLClubException
14 |
15 | /**
16 | * jdbc sink
17 | * Created by songgr on 2020/01/06.
18 | */
19 | @Sink(name = "jdbc")
20 | class JdbcOutput extends BaseOutput with Logging {
21 | var jdbcConf:JdbcConf = _
22 | var flag = new AtomicBoolean(false)
23 | WowLog.logInfo("JDBC sink initialization......")
24 |
25 | override def fullFormat: String = shortFormat
26 |
27 | override def shortFormat: String = "jdbc"
28 |
29 | override def process(data: Dataset[RecordDetail]): Unit = {
30 | process(data.sparkSession)
31 | WowLog.logInfo("Alarm JDBC sink process....")
32 |
33 | val format = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcImplClass", fullFormat)
34 |
35 | val json = JacksonUtils.toJson(jdbcConf)
36 | val options = JacksonUtils.fromJson(json, classOf[Map[String,AnyRef]]).map(kv => (kv._1, kv._2.toString))
37 |
38 | data.drop(RecordDetail.alarm).write.format(format).options(options).mode(jdbcConf.mode).save(jdbcConf.dbtable)
39 |
40 | WowLog.logInfo("Alarm JDBC sink process over!")
41 |
42 | }
43 |
44 | /**
45 | * 配置检查
46 | */
47 | override protected[this] def checkConfig: Option[JdbcConf] = {
48 | val url = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcUrl")
49 | val driver = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcDriver")
50 | val user = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcUser")
51 | val password = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcPassword")
52 | val table = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcTable")
53 | val numPartitions = ConfigUtils.getIntValue(s"$OUTPUT_PREFIX.$jdbcNumPartitions")
54 | val batchsize = ConfigUtils.getIntValue(s"$OUTPUT_PREFIX.$jdbcBatchsize")
55 | val mode = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcMode")
56 |
57 | val isValid = StringUtils.isNoneBlank(url) &&
58 | StringUtils.isNoneBlank(driver) &&
59 | StringUtils.isNoneBlank(user)
60 |
61 | if (!isValid) {
62 | throw new SQLClubException(s"$jdbcUrl and $jdbcDriver and $jdbcUser are needed in jdbc sink conf and cant be empty!")
63 | }
64 |
65 | val conf = JdbcConf(url, driver, user, password)
66 | if (StringUtils.isNoneBlank(table))
67 | conf.dbtable = table
68 | if (numPartitions > 0)
69 | conf.numPartitions = numPartitions
70 | if (batchsize > 0)
71 | conf.batchsize = batchsize
72 | if (StringUtils.isNoneBlank(mode))
73 | conf.mode = mode
74 |
75 | Some(conf)
76 | }
77 |
78 | /**
79 | * 数据处理
80 | *
81 | * @param session SparkSession
82 | */
83 | override protected[this] def process(session:SparkSession): Unit = {
84 | if (!flag.get) {
85 | flag.synchronized {
86 | if (!flag.get) {
87 | jdbcConf = checkConfig.get
88 | flag.set(true)
89 | }
90 | }
91 | }
92 | }
93 | }
94 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/output/KafkaOutput.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.output
2 |
3 | import java.util.concurrent.atomic.AtomicBoolean
4 |
5 | import dt.sql.alarm.conf.KafkaConf
6 | import dt.sql.alarm.core.{RecordDetail, Sink, WowLog}
7 | import org.apache.spark.sql.{Dataset, SparkSession}
8 | import tech.sqlclub.common.log.Logging
9 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils}
10 | import dt.sql.alarm.core.Constants.OUTPUT_PREFIX
11 | import dt.sql.alarm.output.Constants._
12 | import org.apache.commons.lang3.StringUtils
13 | import org.apache.kafka.clients.producer.ProducerConfig
14 | import org.apache.kafka.common.serialization.StringSerializer
15 | import tech.sqlclub.common.exception.SQLClubException
16 |
17 | /**
18 | * kafka sink
19 | * Created by songgr on 2020/01/08.
20 | */
21 |
22 | @Sink(name = "kafka")
23 | class KafkaOutput extends BaseOutput with Logging {
24 | val KAFKA_KEY_ATTRIBUTE_NAME = "key"
25 | val KAFKA_VALUE_ATTRIBUTE_NAME = "value"
26 | val KAFKA_BOOTSTRAP_SERVERS_NAME = "kafka.bootstrap.servers"
27 | val KAFKA_TOPIC_NAME = "topic"
28 |
29 | var kafkaConf:KafkaConf = _
30 | var flag = new AtomicBoolean(false)
31 | WowLog.logInfo("Kafka sink initialization......")
32 |
33 | override def process(data: Dataset[RecordDetail]): Unit = {
34 | val spark = data.sparkSession
35 | process(spark)
36 | WowLog.logInfo("Alarm Kafka sink process....")
37 |
38 | val format = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$kafkaImplClass", fullFormat)
39 | var options = Map(KAFKA_BOOTSTRAP_SERVERS_NAME -> kafkaConf.servers,
40 | KAFKA_TOPIC_NAME -> kafkaConf.topic
41 | )
42 | options += (ProducerConfig.ACKS_CONFIG -> ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_ACKS", "-1"))
43 | options += (ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG ->
44 | ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_KEY_SERIALIZER_CLASS", classOf[StringSerializer].getName))
45 | options += (ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG ->
46 | ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_VALUE_SERIALIZER_CLASS", classOf[StringSerializer].getName))
47 |
48 | import spark.implicits._
49 | data.map{
50 | record =>
51 | (StringUtils.join(Array(record.job_id,record.job_stat), ":")
52 | , JacksonUtils.toJson(record)
53 | )
54 | }.toDF(KAFKA_KEY_ATTRIBUTE_NAME,KAFKA_VALUE_ATTRIBUTE_NAME).write
55 | .format(format).options(options).mode("append").save()
56 |
57 | WowLog.logInfo("Alarm Kafka sink process over!")
58 | }
59 |
60 | override def fullFormat: String = shortFormat
61 |
62 | override def shortFormat: String = "kafka"
63 |
64 | /**
65 | * 配置检查
66 | */
67 | override protected[this] def checkConfig: Option[KafkaConf] = {
68 | val topic = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_TOPIC")
69 | val servers = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_SERVERS")
70 |
71 | val isValid = StringUtils.isNoneBlank(topic) &&
72 | StringUtils.isNoneBlank(servers)
73 |
74 | if (!isValid) {
75 | throw new SQLClubException(s"$KAFKA_TOPIC and $KAFKA_SERVERS are needed in kafka sink conf and cant be empty!")
76 | }
77 |
78 | Some(KafkaConf(null, topic, servers, null))
79 | }
80 |
81 | /**
82 | * 数据处理
83 | *
84 | * @param session SparkSession
85 | */
86 | override protected[this] def process(session: SparkSession): Unit = {
87 | if (!flag.get) {
88 | flag.synchronized {
89 | if (!flag.get) {
90 | kafkaConf = checkConfig.get
91 | flag.set(true)
92 | }
93 | }
94 | }
95 | }
96 | }
97 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/reduce/PolicyAnalyzeEngine.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.reduce
2 |
3 | import dt.sql.alarm.conf.AlarmPolicyConf
4 | import dt.sql.alarm.core.Constants.SQL_FIELD_VALUE_NAME
5 | import dt.sql.alarm.core.{RecordDetail, RedisOperations, WowLog}
6 | import dt.sql.alarm.core.RecordDetail.{event_time, item_id, job_id, job_stat}
7 | import org.apache.spark.sql.functions.col
8 | import org.apache.spark.sql.{Dataset, Row, SaveMode}
9 |
10 | /**
11 | * 降噪策略分析引擎
12 | * Created by songgr on 2020/01/09.
13 | */
14 | abstract class PolicyAnalyzeEngine {
15 |
16 | def analyse(policy: AlarmPolicyConf, records:Dataset[Row]):Array[EngineResult]
17 |
18 | def addCache(cacheDf: Dataset[Row], mode:SaveMode):Unit = {
19 | WowLog.logInfo("Add alarm records into redis cache...")
20 | cacheDf.persist()
21 | try {
22 | if (cacheDf.count() > 0) {
23 | val jobInfos = cacheDf.groupBy(item_id, job_id, job_stat).count().collect().map{
24 | row =>
25 | (row.getAs[String](item_id), row.getAs[String](job_id), row.getAs[String](job_stat))
26 | }
27 | WowLog.logInfo(s"cache infos:\n ${jobInfos.mkString("\n")}")
28 | jobInfos.foreach{
29 | jobInfo =>
30 | val cache = cacheDf.filter(col(item_id) === jobInfo._1 and col(job_id) === jobInfo._2 and col(job_stat) === jobInfo._3)
31 | .select(col(SQL_FIELD_VALUE_NAME)).orderBy(col(event_time))
32 | .repartition(1) // 重新partition 为了保证单分区写有序
33 |
34 | val key = AlarmPolicyConf.getCacheKey(jobInfo._1, jobInfo._2, jobInfo._3)
35 | RedisOperations.setListCache(key, cache, mode)
36 | WowLog.logInfo(s"add cache records, key: $key, mode: ${mode.name}")
37 | }
38 | }
39 | } finally {
40 | cacheDf.unpersist()
41 | }
42 | }
43 |
44 | }
45 |
46 |
47 | case class EngineResult(hasWarning:Boolean,
48 | lastAlarmRecord:RecordDetail,
49 | firstAlarmRecord:RecordDetail,
50 | reduceCount:Int
51 | )
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/reduce/engine/AggWindow.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.reduce.engine
2 |
3 | // 窗口
4 | trait AggWindow
5 | // 时间窗口
6 | object TimeWindow extends AggWindow
7 | // 时间+次数 窗口
8 | object TimeCountWindow extends AggWindow
9 | // 数量窗口
10 | object NumberWindow extends AggWindow
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/reduce/engine/ReduceByNumScale.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.reduce.engine
2 |
3 | import dt.sql.alarm.conf.AlarmPolicyConf
4 | import dt.sql.alarm.core.Constants._
5 | import dt.sql.alarm.core.RecordDetail._
6 | import dt.sql.alarm.core.{RecordDetail, WowLog}
7 | import dt.sql.alarm.reduce.{EngineResult, PolicyAnalyzeEngine}
8 | import org.apache.spark.sql.expressions.Window
9 | import org.apache.spark.sql.functions._
10 | import org.apache.spark.sql.{Dataset, Row, SaveMode}
11 | import tech.sqlclub.common.utils.JacksonUtils
12 |
13 | /**
14 | *
15 | * Created by songgr on 2020/03/10.
16 | */
17 | class ReduceByNumScale(scale: Scale) extends PolicyAnalyzeEngine{
18 |
19 | override def analyse(policy: AlarmPolicyConf, records: Dataset[Row]): Array[EngineResult] = {
20 | WowLog.logInfo("Noise Reduction Policy: ReduceByNumScale analyzing....")
21 |
22 | val table_rank = records.withColumn(SQL_FIELD_RANK_NAME, row_number() // rank value
23 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) )
24 | .filter(col(SQL_FIELD_RANK_NAME) <= policy.window.value) // 取出近n条进行分析
25 |
26 | table_rank.persist()
27 |
28 | try {
29 | val alarmEndpoints = table_rank
30 | .filter(col(alarm) === 1)
31 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value
32 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) )
33 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value
34 | over( Window.partitionBy(item_id, job_id) ) )
35 | .groupBy(item_id, job_id)
36 | .agg(
37 | first(SQL_FIELD_CURRENT_RECORD_NAME).alias(SQL_FIELD_CURRENT_RECORD_NAME), //当前告警记录
38 | first(SQL_FIELD_EARLIEST_RECORD_NAME).alias(SQL_FIELD_EARLIEST_RECORD_NAME) //历史最早告警记录
39 | )
40 |
41 | val pendingRecords = table_rank.groupBy(item_id, job_id)
42 | .agg(
43 | count(alarm).alias(SQL_FIELD_TOTAL_COUNT_NAME), // 总条数
44 | sum(alarm).alias(SQL_FIELD_ALARM_COUNT_NAME), // 告警条数
45 | (sum(alarm) / count(alarm)).alias(SQL_FIELD_ALARM_PERCENT_NAME) // 告警记录比例
46 | )
47 |
48 | val alarmRecords =
49 | scale match {
50 | case Number =>
51 | pendingRecords.filter(col(SQL_FIELD_ALARM_COUNT_NAME) > policy.policy.getValue)
52 | case Percent =>
53 | pendingRecords.filter(col(SQL_FIELD_TOTAL_COUNT_NAME) >= policy.window.value and // 总数必须达到要求条数
54 | col(SQL_FIELD_ALARM_PERCENT_NAME) > policy.policy.getValue)
55 | }
56 |
57 | val result = alarmRecords.join(alarmEndpoints, Seq(item_id,job_id), "left_outer").collect().map{
58 | row =>
59 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail])
60 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail])
61 | val count = row.getAs[Long](SQL_FIELD_ALARM_COUNT_NAME)
62 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue())
63 | }
64 |
65 | // 没有产生告警的记录需要入cache
66 | val cacheDF = table_rank.join(alarmRecords, Seq(item_id,job_id) , "left_outer")
67 | .filter(isnull(alarmRecords(SQL_FIELD_ALARM_PERCENT_NAME)))
68 | .select(col(item_id), col(job_id), col(job_stat), col(event_time), col(SQL_FIELD_VALUE_NAME))
69 |
70 | addCache(cacheDF, SaveMode.Overwrite)
71 |
72 | result
73 |
74 | } finally {
75 | table_rank.unpersist()
76 | }
77 |
78 | }
79 | }
80 |
81 |
82 |
83 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/reduce/engine/ReduceByTimeScale.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.reduce.engine
2 |
3 | import dt.sql.alarm.conf.AlarmPolicyConf
4 | import dt.sql.alarm.core.Constants._
5 | import dt.sql.alarm.core.RecordDetail._
6 | import dt.sql.alarm.core.{RecordDetail, WowLog}
7 | import dt.sql.alarm.reduce.{EngineResult, PolicyAnalyzeEngine}
8 | import org.apache.spark.sql.expressions.Window
9 | import org.apache.spark.sql.functions._
10 | import org.apache.spark.sql.{Dataset, Row, SaveMode}
11 | import tech.sqlclub.common.utils.JacksonUtils
12 |
13 | /**
14 | *
15 | * Created by songgr on 2020/03/11.
16 | */
17 | class ReduceByTimeScale(scale: Scale) extends PolicyAnalyzeEngine{
18 |
19 | override def analyse(policy: AlarmPolicyConf, records: Dataset[Row]): Array[EngineResult] = {
20 | WowLog.logInfo("Noise Reduction Policy: ReduceByTimeScale analyzing....")
21 |
22 | val table = records
23 | .withColumn(SQL_FIELD_CURRENT_EVENT_TIME_NAME, first(event_time) // current event time
24 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) )
25 | // 取出近T时间进行分析
26 | .filter(unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) -
27 | unix_timestamp(col(event_time)) <= policy.window.getTimeWindowSec
28 | )
29 |
30 | table.persist()
31 |
32 | try {
33 | val alarmEndpoints = table
34 | .filter(col(alarm) === 1)
35 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value
36 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) )
37 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value
38 | over( Window.partitionBy(item_id, job_id) ) )
39 | .groupBy(item_id, job_id)
40 | .agg(
41 | first(SQL_FIELD_CURRENT_RECORD_NAME).alias(SQL_FIELD_CURRENT_RECORD_NAME), //当前告警记录
42 | first(SQL_FIELD_EARLIEST_RECORD_NAME).alias(SQL_FIELD_EARLIEST_RECORD_NAME) //历史最早告警记录
43 | )
44 |
45 |
46 | val pendingRecords = table.groupBy(item_id, job_id)
47 | .agg(
48 | (unix_timestamp(max(event_time)) - unix_timestamp(min(event_time))).alias(SQL_FIELD_EVENT_TIME_DURATION_NAME), //时间距离差
49 | count(alarm).alias(SQL_FIELD_TOTAL_COUNT_NAME), // 总条数
50 | sum(alarm).alias(SQL_FIELD_ALARM_COUNT_NAME), // 告警条数
51 | (sum(alarm) / count(alarm)).alias(SQL_FIELD_ALARM_PERCENT_NAME) // 告警记录比例
52 | )
53 |
54 |
55 | val alarmRecords =
56 | scale match {
57 | case Number =>
58 | pendingRecords.filter(col(SQL_FIELD_ALARM_COUNT_NAME) > policy.policy.getValue) // 告警条数达到要求
59 | case Percent =>
60 | pendingRecords.filter(col(SQL_FIELD_EVENT_TIME_DURATION_NAME) >= (policy.window.getTimeWindowSec * policy.policy.getValue) and // 时长间隔达到窗口
61 | col(SQL_FIELD_ALARM_PERCENT_NAME) > policy.policy.getValue)
62 | }
63 |
64 | val result = alarmRecords.join(alarmEndpoints, Seq(item_id,job_id), "left_outer").collect().map{
65 | row =>
66 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail])
67 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail])
68 | val count = row.getAs[Long](SQL_FIELD_ALARM_COUNT_NAME)
69 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue())
70 | }
71 |
72 | // 没有产生告警的记录需要入cache
73 | val cacheDF = table.join(alarmRecords, Seq(item_id,job_id) , "left_outer")
74 | .filter(isnull(alarmRecords(SQL_FIELD_ALARM_PERCENT_NAME)))
75 | .select(col(item_id), col(job_id), col(job_stat), col(event_time), col(SQL_FIELD_VALUE_NAME))
76 |
77 | addCache(cacheDF, SaveMode.Overwrite)
78 |
79 | result
80 |
81 | } finally {
82 | table.unpersist()
83 | }
84 |
85 | }
86 |
87 | }
88 |
89 |
90 |
91 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/reduce/engine/ReduceByWindow.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.reduce.engine
2 |
3 | import dt.sql.alarm.conf.AlarmPolicyConf
4 | import dt.sql.alarm.core.{RecordDetail, WowLog}
5 | import dt.sql.alarm.reduce.{EngineResult, PolicyAnalyzeEngine}
6 | import org.apache.spark.sql.expressions.Window
7 | import org.apache.spark.sql.{Dataset, Row, SaveMode}
8 | import org.apache.spark.sql.functions._
9 | import dt.sql.alarm.core.Constants._
10 | import tech.sqlclub.common.utils.JacksonUtils
11 | import dt.sql.alarm.core.RecordDetail._
12 |
13 | /**
14 | *
15 | * Created by songgr on 2020/01/09.
16 | */
17 | class ReduceByWindow(window: AggWindow) extends PolicyAnalyzeEngine {
18 |
19 | override def analyse(policy: AlarmPolicyConf, records: Dataset[Row]):Array[EngineResult] = {
20 | WowLog.logInfo("Noise Reduction Policy: ReduceByWindow analyzing....")
21 |
22 | // filter alarm records
23 | val table = records.filter(col(alarm) === 1)
24 |
25 | // group by job_id,job_stat order by event_time desc
26 | val table_rank = table
27 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value
28 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) )
29 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value
30 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
31 | .withColumn(SQL_FIELD_CURRENT_EVENT_TIME_NAME, first(event_time) // current event time
32 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) )
33 | .withColumn(SQL_FIELD_EARLIEST_EVENT_TIME_NAME, last(event_time) // first event time
34 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
35 | .withColumn(SQL_FIELD_RANK_NAME, row_number() // rank value
36 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) )
37 | .withColumn(SQL_FIELD_DATAFROM_NAME, min(SQL_FIELD_DATAFROM_NAME) // datafrom value is cache if has record which from redis cache
38 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
39 | .withColumn(SQL_FIELD_COUNT_NAME, count(lit(1)) // record count
40 | over( Window.partitionBy(item_id, job_id, job_stat) ) )
41 |
42 | val pendingRecords = table_rank.filter(col(SQL_FIELD_RANK_NAME) === 1).
43 | select(item_id, job_id, job_stat, SQL_FIELD_CURRENT_EVENT_TIME_NAME,SQL_FIELD_CURRENT_RECORD_NAME,
44 | SQL_FIELD_EARLIEST_EVENT_TIME_NAME,SQL_FIELD_EARLIEST_RECORD_NAME,SQL_FIELD_DATAFROM_NAME,SQL_FIELD_COUNT_NAME)
45 |
46 | pendingRecords.persist()
47 |
48 | try {
49 | // first alarm
50 | val firstAlarmRecords = if (policy.policy.alertFirst) {
51 | val firstAlarmRecords = pendingRecords.filter(
52 | col(SQL_FIELD_DATAFROM_NAME) === SQL_FIELD_STREAM_NAME and // only from stream
53 | col(SQL_FIELD_COUNT_NAME) >= 1 // and count>=1
54 | )
55 |
56 | firstAlarmRecords.collect().map {
57 | row=>
58 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail])
59 | EngineResult(true, firstAlarmRecord, firstAlarmRecord, 1)
60 | }
61 |
62 | } else {
63 | Array(EngineResult(false, null, null, -1))
64 | }
65 |
66 | val alarmRecords = window match {
67 | case NumberWindow =>
68 | pendingRecords.filter(col(SQL_FIELD_COUNT_NAME) >= policy.window.value )
69 | case TimeWindow =>
70 | pendingRecords.filter(
71 | unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) -
72 | unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)) >= policy.window.getTimeWindowSec
73 | )
74 | // 近T时间达到n条
75 | case TimeCountWindow =>
76 | pendingRecords.filter(
77 | unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) -
78 | unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)) <= policy.window.getTimeWindowSec
79 | and
80 | col(SQL_FIELD_COUNT_NAME) >= policy.window.count
81 | )
82 | }
83 |
84 | val streamAlarmRecords = alarmRecords.collect().map{
85 | row =>
86 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail])
87 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail])
88 | val count = row.getAs[Long](SQL_FIELD_COUNT_NAME)
89 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue())
90 | }
91 |
92 | WowLog.logInfo(s"Noise Reduction Policy: ReduceByWindow analysis completed! windowType:${policy.window.`type`}, alarm records size:${streamAlarmRecords.length}")
93 |
94 | // 没有产生告警的记录需要入cache
95 | val cacheDF = table.join(alarmRecords, Seq(item_id,job_id,job_stat) , "left_outer")
96 | .filter(isnull(alarmRecords(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) and table(SQL_FIELD_DATAFROM_NAME) === SQL_FIELD_STREAM_NAME) // 只加流记录
97 | .select(col(item_id), col(job_id), col(job_stat), col(event_time), col(SQL_FIELD_VALUE_NAME))
98 |
99 | addCache(cacheDF, SaveMode.Append)
100 |
101 | firstAlarmRecords ++ streamAlarmRecords
102 | } finally {
103 | pendingRecords.unpersist()
104 | }
105 | }
106 | }
107 |
108 |
109 |
--------------------------------------------------------------------------------
/sa-core/src/main/java/dt/sql/alarm/reduce/engine/Scale.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.reduce.engine
2 |
3 | // 刻度
4 | trait Scale
5 | // 百分比
6 | object Percent extends Scale
7 | // 次数
8 | object Number extends Scale
9 |
10 |
--------------------------------------------------------------------------------
/sa-core/src/main/resources/application.conf:
--------------------------------------------------------------------------------
1 | //// spark conf
2 | //spark {
3 | // streaming.trigger.time.interval.msec = 1000
4 | // streaming.future.task.timeout.msec = 300000
5 | // show.table.numRows = 100
6 | // show.table.truncate = true
7 | // redis.cache.data.partition.num = 8
8 | //
9 | // redis.host = 127.0.0.1
10 | // redis.port = 6379
11 | // redis.db = 4
12 | //// redis.auth =
13 | //// redis.timeout =
14 | //// redis.max.pipeline.size =
15 | //// redis.scan.count =
16 | //}
17 | //
18 | //
19 | //sqlalarm {
20 | // // event sources, can more than one
21 | // sources = "kafka,redis"
22 | //
23 | // // alarm event input source conf
24 | // input {
25 | // kafka {
26 | // topic = "sqlalarm_event"
27 | // subscribe.topic.pattern = 1
28 | // bootstrap.servers = "127.0.0.1:9092"
29 | // group = "sqlalarm_group"
30 | // }
31 | // redis {
32 | // keys = "sqlalarm_redis_event"
33 | // group = "sqlalarm_redis_group"
34 | // batch.size = 100
35 | // }
36 | // }
37 | //
38 | // // alarm sink, can more than one
39 | // sinks = "console,kafka,jdbc"
40 | //
41 | // // alarm record sink canal conf
42 | // output {
43 | // kafka {
44 | //
45 | // }
46 | // jdbc {
47 | // url = "jdbc:mysql://127.0.0.1:3306/test?characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false"
48 | // driver = "com.mysql.jdbc.Driver"
49 | // user = "xxx"
50 | // password = "xxx"
51 | // }
52 | // }
53 | //
54 | // checkpointLocation = "checkpoint"
55 | //
56 | // // alarm alert conf, use rest api usually
57 | // alert {
58 | // pigeonApi = "https://dt.sqlclub/api/pigeon"
59 | // }
60 | //}
--------------------------------------------------------------------------------
/sa-core/src/main/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # This is the configuring for logging displayed in the Application Server
2 | #log4j.rootCategory=info, stdout, Rolling, debug
3 | log4j.rootCategory=info, stdout
4 |
5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
7 | log4j.appender.stdout.layout.ConversionPattern=[sql-alarm] %d{yyyy-MM-dd HH:mm:ss,SSS} %p [%t] %c{1}.%M(%L) | %m%n
8 |
9 | log4j.appender.Rolling=org.apache.log4j.RollingFileAppender
10 | log4j.appender.Rolling.Encoding=UTF-8
11 | log4j.appender.Rolling.File=log/sql-alarm.log
12 | log4j.appender.Rolling.MaxFileSize=5120KB
13 | log4j.appender.Rolling.MaxBackupIndex=10
14 | log4j.appender.Rolling.layout=org.apache.log4j.PatternLayout
15 | log4j.appender.Rolling.layout.ConversionPattern=[sql-alarm] %d{yyyy-MM-dd HH:mm:ss,SSS} %p [%t] %c{1}.%M(%L) | %m%n
16 |
17 | log4j.logger.org.apache.spark.storage.ShuffleBlockFetcherIterator=WARN
18 | log4j.logger.org.apache.spark.executor.Executor=WARN
19 | log4j.logger.org.apache.spark.ContextCleaner=WARN
20 | log4j.logger.org.apache.spark.scheduler.TaskSetManager=WARN
21 | log4j.logger.org.apache.spark.scheduler.DAGScheduler=WARN
22 | log4j.logger.org.apache.spark.sql.execution.columnar.InMemoryTableScanExec=WARN
23 | log4j.logger.org.apache.spark.storage.BlockManager=WARN
24 | log4j.logger.org.apache.spark.storage.BlockManagerInfo=WARN
25 | log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN
--------------------------------------------------------------------------------
/sa-core/src/test/java/dt/sql/alarm/test/InputSuite.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.test
2 |
3 | import dt.sql.alarm.input.{KafkaInput, RedisInput}
4 | import org.scalatest.FunSuite
5 |
6 |
7 | class InputSuite extends FunSuite with LocalSparkApp {
8 |
9 | test("kafka input test") {
10 | val session = spark
11 | val ds = new KafkaInput().getDataSetStream(session)
12 | assert(ds != null)
13 | }
14 |
15 | test("redis stream input") {
16 | val session = spark
17 | val ds = new RedisInput().getDataSetStream(session)
18 | assert(ds != null)
19 | }
20 |
21 | }
22 |
--------------------------------------------------------------------------------
/sa-core/src/test/java/dt/sql/alarm/test/LocalSparkApp.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.test
2 |
3 | import org.apache.spark.sql.SparkSession
4 |
5 | trait LocalSparkApp {
6 |
7 | def spark = {
8 | SparkSession.builder()
9 | .appName("LocalSparkApp")
10 | .master("local[*]")
11 | .getOrCreate()
12 | }
13 |
14 | }
15 |
--------------------------------------------------------------------------------
/sa-core/src/test/java/dt/sql/alarm/test/RedisOperationsSuite.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.test
2 |
3 | import dt.sql.alarm.core.Constants.{ALARM_CACHE, appName, master}
4 | import dt.sql.alarm.core.{RecordDetail, RedisOperations, SparkRuntime}
5 | import org.apache.spark.sql.SaveMode
6 | import org.scalatest.FunSuite
7 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils}
8 |
9 | /**
10 | *
11 | * Created by songgr on 2020/01/13.
12 | */
13 | class RedisOperationsSuite extends FunSuite {
14 |
15 | test("rule") {
16 | ConfigUtils.configBuilder(Map(
17 | appName -> "RedisOperationsSuite",
18 | master -> "local[2]",
19 | "spark.redis.host" -> "127.0.0.1",
20 | "spark.redis.port" -> "6379",
21 | "spark.redis.db" -> "4"
22 | ))
23 |
24 | val key = "sqlalarm_rule:kafka:sqlalarm_event"
25 | val field = "uuid00000001"
26 |
27 | val value =
28 | """
29 | |{
30 | | "item_id":"uuid00000001",
31 | | "platform":"alarm",
32 | | "title":"sql alarm test",
33 | | "source":{
34 | | "type":"kafka",
35 | | "topic":"sqlalarm_event"
36 | | },
37 | | "filter":{
38 | | "table":"fail_job",
39 | | "structure":[
40 | | {
41 | | "name":"job_name",
42 | | "type":"string",
43 | | "xpath":"$.job_name"
44 | | },
45 | | {
46 | | "name":"job_owner",
47 | | "type":"string",
48 | | "xpath":"$.job_owner"
49 | | },
50 | | {
51 | | "name":"job_stat",
52 | | "type":"string",
53 | | "xpath":"$.job_stat"
54 | | },
55 | | {
56 | | "name":"job_time",
57 | | "type":"string",
58 | | "xpath":"$.job_time"
59 | | }
60 | | ],
61 | | "sql":"select job_name as job_id,job_stat,job_time as event_time, job_stat as message, map('job_owner',job_owner) as context from fail_job where job_stat='Fail'"
62 | | }
63 | |}
64 | """.stripMargin
65 |
66 |
67 | RedisOperations.addTableCache(key, field, value)
68 |
69 |
70 | }
71 |
72 |
73 | test("policy") {
74 | ConfigUtils.configBuilder(Map(
75 | appName -> "RedisOperationsSuite",
76 | master -> "local[2]",
77 | "spark.redis.host" -> "127.0.0.1",
78 | "spark.redis.port" -> "6379",
79 | "spark.redis.db" -> "4"
80 | ))
81 |
82 | val key = "sqlalarm_policy:kafka:sqlalarm_event"
83 | val field = "uuid00000001"
84 |
85 | val value =
86 | """
87 | |{
88 | | "item_id" : "uuid00000001",
89 | | "window": {
90 | | "type": "time",
91 | | "value": 10,
92 | | "unit": "m"
93 | | },
94 | | "policy":{
95 | | "type":"scale",
96 | | "agg":"count",
97 | | "value":100,
98 | | "first_alert": 1
99 | | }
100 | |}
101 | """.stripMargin
102 |
103 | val value1 =
104 | """
105 | |{
106 | | "item_id" : "uuid00000001",
107 | | "window": {
108 | | "type": "time",
109 | | "value": 10,
110 | | "unit": "m"
111 | | },
112 | | "policy":{
113 | | "type":"absolute"
114 | | }
115 | |}
116 | """.stripMargin
117 |
118 | val value2 =
119 | """
120 | |{
121 | | "item_id" : "uuid00000001",
122 | | "window": {
123 | | "type": "number",
124 | | "value": 4,
125 | | "unit": "n"
126 | | },
127 | | "policy":{
128 | | "type":"scale",
129 | | "unit":"number",
130 | | "value":2,
131 | | "first_alert": 1
132 | | }
133 | |}
134 | """.stripMargin
135 |
136 | val value3 =
137 | """
138 | |{
139 | | "item_id" : "uuid00000001",
140 | | "window": {
141 | | "type": "time",
142 | | "value": 10,
143 | | "unit": "m"
144 | | },
145 | | "policy":{
146 | | "type":"scale",
147 | | "unit":"number",
148 | | "value":2,
149 | | "first_alert": 1
150 | | }
151 | |}
152 | """.stripMargin
153 |
154 |
155 | RedisOperations.addTableCache(key, field, value3)
156 |
157 |
158 | }
159 |
160 |
161 | test("cache") {
162 | ConfigUtils.configBuilder(Map(
163 | appName -> "RedisOperationsSuite",
164 | master -> "local[2]",
165 | "spark.redis.host" -> "127.0.0.1",
166 | "spark.redis.port" -> "6379",
167 | "spark.redis.db" -> "4"
168 | ))
169 |
170 | val spark = SparkRuntime.getSparkSession
171 |
172 | val key = "sqlalarm_cache:uuid00000001:sqlalarm_job_001:Fail"
173 |
174 | val json = JacksonUtils.prettyPrint[RecordDetail](RecordDetail(
175 | "jobid",
176 | "fail",
177 | "2019",
178 | "sss",
179 | "cont",
180 | "title",
181 | "ppp",
182 | "001",
183 | "sss",
184 | "tt",
185 | 1
186 | ))
187 |
188 | val rdd = spark.sparkContext.parallelize(Seq(json), 1)
189 |
190 |
191 | RedisOperations.setListCache(key, rdd, SaveMode.Overwrite)
192 |
193 |
194 | }
195 |
196 | test("ops") {
197 | ConfigUtils.configBuilder(Map(
198 | appName -> "RedisOperationsSuite",
199 | master -> "local[2]",
200 | "spark.redis.host" -> "127.0.0.1",
201 | "spark.redis.port" -> "6379",
202 | "spark.redis.db" -> "4"
203 | ))
204 | val spark = SparkRuntime.getSparkSession
205 | val rdd = RedisOperations.getListCache("test:111*")
206 |
207 | val map = RedisOperations.getTableCache("sqlalarm_policy*")
208 | val conf = map.collect().toMap
209 |
210 |
211 | import spark.implicits._
212 | val ds = rdd.toDS()
213 |
214 | ds.printSchema()
215 |
216 | ds.show()
217 |
218 | println(ds.count())
219 |
220 | val tb = Seq("a","b","c").toDS()
221 |
222 | tb.printSchema()
223 | tb.show()
224 | println(tb.count())
225 |
226 | val c = tb.union(ds).count()
227 |
228 | println(c)
229 |
230 | }
231 |
232 | }
233 |
--------------------------------------------------------------------------------
/sa-core/src/test/java/dt/sql/alarm/test/SQLAlarmBootTest.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.test
2 |
3 | import dt.sql.alarm.SQLAlarmBoot
4 |
5 | object SQLAlarmBootTest {
6 |
7 | def main(args: Array[String]): Unit = {
8 | SQLAlarmBoot.main(
9 | Array(
10 | "-sqlalarm.master", "local[*]",
11 | "-sqlalarm.name", "sqlalarm",
12 | "-spark.redis.host", "127.0.0.1",
13 | "-spark.redis.port", "6379",
14 | "-spark.redis.db", "4",
15 | "-sqlalarm.sources", "kafka",
16 | "-sqlalarm.input.kafka.topic", "sqlalarm_event",
17 | "-sqlalarm.input.kafka.subscribe.topic.pattern", "1",
18 | "-sqlalarm.input.kafka.bootstrap.servers", "127.0.0.1:9092",
19 | "-sqlalarm.sinks", "console",
20 | "-sqlalarm.output.kafka.topic", "sqlalarm_output",
21 | "-sqlalarm.output.kafka.bootstrap.servers", "127.0.0.1:9092",
22 | "-sqlalarm.checkpointLocation", "checkpoint",
23 | "sqlalarm.alert.pigeonApi", "https://dt.sqlclub/api/pigeon"
24 |
25 | )
26 | )
27 | }
28 |
29 | }
30 |
--------------------------------------------------------------------------------
/sa-core/src/test/java/dt/sql/alarm/test/SparkRedisTest.scala:
--------------------------------------------------------------------------------
1 | package dt.sql.alarm.test
2 |
3 | import com.redislabs.provider.redis._
4 | import com.redislabs.provider.redis.util.ConnectionUtils
5 | import org.apache.spark.sql.SparkSession
6 |
7 |
8 | object SparkRedisTest {
9 |
10 | def main(args: Array[String]): Unit = {
11 |
12 | val spark = SparkSession.builder()
13 | .appName("SparkRedisTest")
14 | .master("local[4]")
15 | .config("spark.redis.host", "127.0.0.1")
16 | .config("spark.redis.port", "6379")
17 | .getOrCreate()
18 |
19 | val sc = spark.sparkContext
20 |
21 | val keysRDD = sc.fromRedisKeyPattern()
22 |
23 | val stringRDD = sc.fromRedisKV(Array("test"))
24 | val strs = stringRDD.collect()
25 |
26 | val keys = keysRDD.collect()
27 |
28 | import spark.implicits._
29 | val df = stringRDD.toDF()
30 |
31 |
32 | println(keys.mkString(","))
33 |
34 |
35 | val listRDD = sc.fromRedisList(Array("list1"))
36 |
37 | val table = listRDD.toDF()
38 | table.printSchema()
39 | table.show()
40 | val tb = spark.read.json(listRDD.toDS())
41 | tb.printSchema()
42 | tb.show()
43 |
44 | val conn = RedisConfig.fromSparkConf(sc.getConf).initialHost.connect()
45 |
46 | val str = new RedisEndpoint(sc.getConf).connect().rpop("list1")
47 |
48 | println(str)
49 |
50 | val s = ConnectionUtils.withConnection[String](conn){
51 | conn =>
52 | conn.get("test")
53 | }
54 |
55 | println(s)
56 |
57 | }
58 |
59 |
60 | }
61 |
--------------------------------------------------------------------------------