├── .gitignore ├── README.md ├── bin ├── sqlalarm_records_log.sql └── start-local.sh ├── docs ├── alarm-console-sink.jpg └── sqlalarm.png ├── pom.xml ├── sa-admin ├── pom.xml └── src │ └── main │ └── java │ └── dt │ └── sql │ └── alarm │ └── web │ └── SQLAlarmConsole.scala └── sa-core ├── pom.xml └── src ├── main ├── java │ ├── com │ │ └── redislabs │ │ │ └── provider │ │ │ └── redis │ │ │ └── ConnectionPool.scala │ └── dt │ │ └── sql │ │ └── alarm │ │ ├── SQLAlarmBoot.scala │ │ ├── conf │ │ ├── AlarmPolicyConf.scala │ │ ├── AlarmRuleConf.scala │ │ ├── Conf.scala │ │ ├── JdbcConf.scala │ │ ├── KafkaConf.scala │ │ └── RedisConf.scala │ │ ├── core │ │ ├── AlarmAlert.scala │ │ ├── AlarmFlow.scala │ │ ├── AlarmReduce.scala │ │ ├── Base.scala │ │ ├── Constants.scala │ │ ├── RecordDetail.scala │ │ ├── Sink.java │ │ ├── Source.java │ │ ├── SparkRuntime.scala │ │ └── WowLog.scala │ │ ├── filter │ │ └── SQLFilter.scala │ │ ├── input │ │ ├── BaseInput.scala │ │ ├── Constants.scala │ │ ├── KafkaInput.scala │ │ └── RedisInput.scala │ │ ├── output │ │ ├── BaseOutput.scala │ │ ├── ConsoleOutput.scala │ │ ├── Constants.scala │ │ ├── JdbcOutput.scala │ │ └── KafkaOutput.scala │ │ └── reduce │ │ ├── PolicyAnalyzeEngine.scala │ │ └── engine │ │ ├── AggWindow.scala │ │ ├── ReduceByNumScale.scala │ │ ├── ReduceByTimeScale.scala │ │ ├── ReduceByWindow.scala │ │ └── Scale.scala └── resources │ ├── application.conf │ └── log4j.properties └── test └── java └── dt └── sql └── alarm └── test ├── InputSuite.scala ├── LocalSparkApp.scala ├── RedisOperationsSuite.scala ├── SQLAlarmBootTest.scala └── SparkRedisTest.scala /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | .idea 3 | *.iml 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## SQLAlarm 2 | > Big data smart alarm by sql 3 | 4 | SQLAlarm is for event(time-stamped) alarm which is built on spark structured-steaming. This system including following abilities: 5 | 1. Event filtering through SQL 6 | 2. Alarm records noise reduction 7 | 3. Alarm records dispatch in specified channels 8 | 9 | The integral framework idea is as follows: 10 | ![sqlalarm](docs/sqlalarm.png) 11 | 12 | Introduce of modules: 13 | 1. sa-admin: web console and rest api for sqlalarm 14 | 2. sa-core: core module of sqlalarm(including source/filter/sink(alert)) 15 | 16 | ### Developing SQLAlarm 17 | You can use bin/start-local.sh to start a local SQLAlarm serve at IntelliJ IDEA. We recommend to run it use yarn-client or local mode in spark cluster after packaged jar. 18 | 19 | Minimal requirements for a SQLAlarm serve are: 20 | - Java 1.8 + 21 | - Spark 2.4.x 22 | - Redis (Redis 5.0, if use Redis Stream) 23 | - Kafka (this is also needless if you only use Redis Stream for event alerts) 24 | 25 | For example, I started a SQLAlarm serve that consume kafka event message to do alarm flow: 26 | ```bash 27 | spark-submit --class dt.sql.alarm.SQLAlarmBoot \ 28 | --driver-memory 2g \ 29 | --master local[4] \ 30 | --name SQLALARM \ 31 | --conf "spark.kryoserializer.buffer=256k" \ 32 | --conf "spark.kryoserializer.buffer.max=1024m" \ 33 | --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ 34 | --conf "spark.redis.host=127.0.0.1" \ 35 | --conf "spark.redis.port=6379" \ 36 | --conf "spark.redis.db=4" \ 37 | sa-core-1.0-SNAPSHOT.jar \ 38 | -sqlalarm.name sqlalarm \ 39 | -sqlalarm.sources kafka \ 40 | -sqlalarm.input.kafka.topic sqlalarm_event \ 41 | -sqlalarm.input.kafka.subscribe.topic.pattern 1 \ 42 | -sqlalarm.input.kafka.bootstrap.servers "127.0.0.1:9092" \ 43 | -sqlalarm.sinks console 44 | ``` 45 | > notes: the above simple example takes kafka as the message center, filtering alarm event and output to the console. 46 | 47 | ### Quick Start 48 | 1. Packaged the core jar: sa-core-1.0-SNAPSHOT.jar. 49 | 2. Deploy the jar package in spark cluster. 50 | 3. Add an alarm rule(put at redis): 51 | ```bash 52 | # hset key uuid value 53 | # key: sqlalarm_rule:${sourceType}:${topic} 54 | 55 | HSET "sqlalarm_rule:kafka:sqlalarm_event" "uuid00000001" 56 | { 57 | "item_id":"uuid00000001", 58 | "platform":"alarm", 59 | "title":"sql alarm test", 60 | "source":{ 61 | "type":"kafka", 62 | "topic":"sqlalarm_event" 63 | }, 64 | "filter":{ 65 | "table":"fail_job", 66 | "structure":[ 67 | { 68 | "name":"job_name", 69 | "type":"string", 70 | "xpath":"$.job_name" 71 | }, 72 | { 73 | "name":"job_owner", 74 | "type":"string", 75 | "xpath":"$.job_owner" 76 | }, 77 | { 78 | "name":"job_stat", 79 | "type":"string", 80 | "xpath":"$.job_stat" 81 | }, 82 | { 83 | "name":"job_time", 84 | "type":"string", 85 | "xpath":"$.job_time" 86 | } 87 | ], 88 | "sql":"select job_name as job_id,job_stat,job_time as event_time,'job failed' as message, map('job_owner',job_owner) as context from fail_job where job_stat='Fail'" 89 | } 90 | } 91 | ``` 92 | 4. Wait for event center(may be kafka or redis) produce alarm events. Produce manually: 93 | > 1.create if not exists topic: 94 | ```bash 95 | kafka-topics.sh --create --bootstrap-server localhost:9092 --replication-factor 1 --partitions 1 --topic sqlalarm_event 96 | ``` 97 | > 2.produce events: 98 | ```bash 99 | kafka-console-producer.sh --broker-list localhost:9092 --topic sqlalarm_event 100 | 101 | { 102 | "job_name":"sqlalarm_job_000", 103 | "job_owner":"bebee4java", 104 | "job_stat":"Succeed", 105 | "job_time":"2019-12-26 12:00:00" 106 | } 107 | 108 | { 109 | "job_name":"sqlalarm_job_001", 110 | "job_owner":"bebee4java", 111 | "job_stat":"Fail", 112 | "job_time":"2019-12-26 12:00:00" 113 | } 114 | ``` 115 | 5. If use console sink, you will get following info in the console(Observable the fail events are filtered out and succeed events are ignored): 116 | ![alarm-console-sink](docs/alarm-console-sink.jpg) 117 | 118 | > **notes:** the order of step 2&3 is not required, and the alarm rule is not necessary when starting the SQLAlarm serve. 119 | 120 | ### Features 121 | 1. Supports docking multiple data sources as event center(kafka or redis stream-enabled source), and it's scalable you can customize the data source only extends the class [BaseInput](sa-core/src/main/java/dt/sql/alarm/input/BaseInput.scala) 122 | 2. Supports docking multiple data topics with inconsistent structure 123 | 3. Supports output of alarm events to multiple sinks(kafka/jdbc/es etc.), and it's scalable you can customize the data sink only extends the class [BaseOutput](sa-core/src/main/java/dt/sql/alarm/output/BaseOutput.scala) 124 | 4. Supports alarm filtering for events through SQL 125 | 5. Supports multiple policies(time merge/time window+N counts merge) for alarm noise reduction 126 | 6. Supports alarm rules and policies to take effect dynamically without restarting the serve 127 | 7. Supports adding data source topics dynamically(If your subscription mode is `subscribePattern`) 128 | 8. Supports sending alarm records by specific different channels 129 | 130 | ### Collectors 131 | SQLAlarm does't automatically generate metrics events, it only obtains metrics events from the message center and analyzes them. 132 | However, you can collect and report metrics events in another project called [metrics-exporter](https://github.com/bebee4java/metrics-exporter), 133 | this makes up for this shortage well. 134 | 135 | **In this way, a complete alarm process looks like: 136 | [metrics-exporter](https://github.com/bebee4java/metrics-exporter) —> [sqlalarm](https://github.com/bebee4java/sqlalarm) —> alarm-pigeon** 137 | 138 | ### Documentation 139 | The documentation of SQLAlarm is located on the issues page: [SQLAlarm issues](https://github.com/bebee4java/sqlalarm/issues). 140 | It contains a lot of information such as [configuration](https://github.com/bebee4java/sqlalarm/issues/2) and use tutorial etc. If you have any questions, please free to commit issues. 141 | 142 | ### Fork and Contribute 143 | This is an active open-source project. We are always open to people who want to use the system or contribute to it. Contact us if you are looking for implementation tasks that fit your skills. 144 | -------------------------------------------------------------------------------- /bin/sqlalarm_records_log.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE IF EXISTS `sqlalarm_records_log`; 2 | 3 | -- 告警记录详情日志表 4 | CREATE TABLE IF NOT EXISTS `sqlalarm_records_log` ( 5 | `id` int(20) unsigned NOT NULL AUTO_INCREMENT, 6 | `job_id` varchar(128) NOT NULL COMMENT '作业id', 7 | `job_stat` varchar(128) NOT NULL COMMENT '作业状态', 8 | `event_time` timestamp NOT NULL COMMENT '作业事件时间', 9 | `message` varchar(2000) NOT NULL COMMENT '作业告警消息', 10 | `context` varchar(2000) NOT NULL COMMENT '作业上下文参数', 11 | `title` varchar(128) NOT NULL COMMENT '告警标题', 12 | `platform` varchar(128) NOT NULL COMMENT '告警平台', 13 | `item_id` varchar(128) NOT NULL COMMENT '告警项id', 14 | `source` varchar(128) NOT NULL COMMENT '告警记录数据源', 15 | `topic` varchar(128) NOT NULL COMMENT '告警记录所属主题', 16 | `create_time` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间', 17 | PRIMARY KEY (`id`), 18 | KEY `job_index` (`job_id`, `job_stat`), 19 | KEY `alarm_item_index` (`platform`, `item_id`) 20 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT '告警记录详情日志表'; 21 | -------------------------------------------------------------------------------- /bin/start-local.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #set -x 4 | 5 | for env in SPARK_HOME ; do 6 | if [[ -z "${!env}" ]]; then 7 | echo "$env must be set to run this script" 8 | exit 1 9 | else 10 | echo ${env}=${!env} 11 | fi 12 | done 13 | 14 | if [[ -z "${SQLALARM_HOME}" ]]; then 15 | export SQLALARM_HOME="$(cd "`dirname "$0"`"/../; pwd)" 16 | fi 17 | 18 | echo "SQLALARM_HOME=$SQLALARM_HOME" 19 | 20 | MAIN_JAR=$(find ${SQLALARM_HOME}/*/target -type f -name "*.jar" \ 21 | | grep 'sa-core' |grep -v "sources" | grep -v "original" | grep -v "javadoc") 22 | 23 | echo "MAIN_JAR=$MAIN_JAR" 24 | 25 | export DRIVER_MEMORY=${DRIVER_MEMORY:-2g} 26 | ${SPARK_HOME}/bin/spark-submit --class dt.sql.alarm.SQLAlarmBoot \ 27 | --driver-memory ${DRIVER_MEMORY} \ 28 | --master "local[*]" \ 29 | --name SQLALARM \ 30 | --conf "spark.driver.extraJavaOptions"="-DREALTIME_LOG_HOME=$SQLALARM_HOME/logs" \ 31 | --conf "spark.sql.hive.thriftServer.singleSession=true" \ 32 | --conf "spark.kryoserializer.buffer=256k" \ 33 | --conf "spark.kryoserializer.buffer.max=1024m" \ 34 | --conf "spark.serializer=org.apache.spark.serializer.KryoSerializer" \ 35 | --conf "spark.scheduler.mode=FAIR" \ 36 | --conf "spark.redis.host=127.0.0.1" \ 37 | --conf "spark.redis.port=6379" \ 38 | --conf "spark.redis.db=4" \ 39 | ${MAIN_JAR} \ 40 | -sqlalarm.name sqlalarm \ 41 | -sqlalarm.sources kafka \ 42 | -sqlalarm.input.kafka.topic sqlalarm_event \ 43 | -sqlalarm.input.kafka.subscribe.topic.pattern 1 \ 44 | -sqlalarm.input.kafka.bootstrap.servers "127.0.0.1:9092" \ 45 | -sqlalarm.checkpointLocation checkpoint \ 46 | -sqlalarm.sinks console 47 | 48 | -------------------------------------------------------------------------------- /docs/alarm-console-sink.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bebee4java/sqlalarm/76dc595b2f57ffe121bdd0c125d26d1ac4d4b547/docs/alarm-console-sink.jpg -------------------------------------------------------------------------------- /docs/sqlalarm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bebee4java/sqlalarm/76dc595b2f57ffe121bdd0c125d26d1ac4d4b547/docs/sqlalarm.png -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | dt.sql.alarm 8 | sqlalarm 9 | pom 10 | 1.0-SNAPSHOT 11 | 12 | sa-core 13 | sa-admin 14 | 15 | 16 | SQLAlarm Parent POM 17 | https://github.com/bebee4java/sqlalarm 18 | Big data smart alarm by sql. 19 | 20 | 21 | 22 | Apache 2.0 License 23 | http://www.apache.org/licenses/LICENSE-2.0.html 24 | repo 25 | 26 | 27 | 28 | 29 | bebee4java 30 | songgongru 31 | grsong.cn@gmail.com 32 | 33 | 34 | 35 | 36 | 37 | UTF-8 38 | UTF-8 39 | 1.8 40 | 1.8 41 | 1.8 42 | 43 | 2.11.12 44 | 2.11 45 | provided 46 | 3.1.3 47 | 48 | 2.2.1.RELEASE 49 | 2.9.2 50 | 51 | 2.4.3 52 | 2.4 53 | 1.0.3 54 | 55 | 56 | 57 | 58 | 59 | org.scala-lang 60 | scala-library 61 | ${scala.version} 62 | 63 | 64 | org.scalatest 65 | scalatest_${scala.compat.version} 66 | 3.0.0 67 | test 68 | 69 | 70 | tech.sqlclub 71 | common-utils_${scala.compat.version} 72 | ${common-utils-version} 73 | 74 | 75 | 86 | 87 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | org.scala-tools 99 | maven-scala-plugin 100 | 2.15.2 101 | 102 | 103 | 104 | -g:vars 105 | 106 | 107 | true 108 | 109 | 110 | 111 | scala-compile-first 112 | process-resources 113 | 114 | compile 115 | 116 | 117 | 118 | scala-test-compile 119 | process-test-resources 120 | 121 | testCompile 122 | 123 | 124 | 125 | 126 | 127 | 128 | org.apache.maven.plugins 129 | maven-compiler-plugin 130 | 2.3.2 131 | 132 | 133 | -g 134 | true 135 | 1.8 136 | 1.8 137 | 138 | 139 | 140 | 141 | 142 | maven-source-plugin 143 | 2.1 144 | 145 | true 146 | 147 | 148 | 149 | compile 150 | 151 | jar 152 | 153 | 154 | 155 | 156 | 157 | org.apache.maven.plugins 158 | maven-javadoc-plugin 159 | 3.0.1 160 | 161 | 162 | attach-javadocs 163 | 164 | jar 165 | 166 | 167 | 168 | 169 | 170 | org.apache.maven.plugins 171 | maven-jar-plugin 172 | 2.6 173 | 174 | 175 | 176 | 177 | -------------------------------------------------------------------------------- /sa-admin/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | sqlalarm 7 | dt.sql.alarm 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | sa-admin 13 | 14 | 15 | -------------------------------------------------------------------------------- /sa-admin/src/main/java/dt/sql/alarm/web/SQLAlarmConsole.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.web 2 | 3 | /** 4 | * 5 | * Created by songgr on 2019/12/26. 6 | */ 7 | class SQLAlarmConsole { 8 | 9 | def main(args: Array[String]): Unit = { 10 | 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /sa-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | sqlalarm 7 | dt.sql.alarm 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | sa-core 13 | 14 | 15 | provided 16 | 17 | 18 | 19 | 20 | 21 | org.reflections 22 | reflections 23 | 0.9.11 24 | 30 | 31 | 32 | 33 | org.apache.spark 34 | spark-sql_${scala.compat.version} 35 | ${spark.version} 36 | ${scope} 37 | 38 | 39 | com.fasterxml.jackson.module 40 | jackson-module-scala_${scala.compat.version} 41 | 42 | 43 | jackson-core 44 | com.fasterxml.jackson.core 45 | 46 | 47 | jackson-annotations 48 | com.fasterxml.jackson.core 49 | 50 | 51 | scala-xml_2.11 52 | org.scala-lang.modules 53 | 54 | 55 | jackson-databind 56 | com.fasterxml.jackson.core 57 | 58 | 59 | commons-lang3 60 | org.apache.commons 61 | 62 | 63 | slf4j-api 64 | org.slf4j 65 | 66 | 67 | slf4j-log4j12 68 | org.slf4j 69 | 70 | 71 | guava 72 | com.google.guava 73 | 74 | 75 | jersey-guava 76 | org.glassfish.jersey.bundles.repackaged 77 | 78 | 79 | avro 80 | org.apache.avro 81 | 82 | 83 | activation 84 | javax.activation 85 | 86 | 87 | scala-reflect 88 | org.scala-lang 89 | 90 | 91 | scala-parser-combinators_2.11 92 | org.scala-lang.modules 93 | 94 | 95 | commons-codec 96 | commons-codec 97 | 98 | 99 | 100 | 101 | 102 | org.apache.spark 103 | spark-sql-kafka-0-10_${scala.compat.version} 104 | ${spark.version} 105 | 106 | 107 | 108 | 109 | com.redislabs 110 | spark-redis_${scala.compat.version} 111 | 2.4.2 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | org.apache.maven.plugins 120 | maven-shade-plugin 121 | 2.4.3 122 | 123 | 124 | package 125 | 126 | shade 127 | 128 | 129 | 130 | 131 | com.google.common.collect 132 | shade.com.google.common.collect 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | *:* 142 | 143 | META-INF/*.SF 144 | META-INF/*.DSA 145 | META-INF/*.RSA 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | -------------------------------------------------------------------------------- /sa-core/src/main/java/com/redislabs/provider/redis/ConnectionPool.scala: -------------------------------------------------------------------------------- 1 | package com.redislabs.provider.redis 2 | 3 | import java.util.concurrent.ConcurrentHashMap 4 | 5 | import dt.sql.alarm.core.Constants._ 6 | import org.apache.spark.SparkEnv 7 | import redis.clients.jedis._ 8 | import redis.clients.jedis.exceptions.JedisConnectionException 9 | 10 | import scala.collection.JavaConversions._ 11 | 12 | 13 | object ConnectionPool { 14 | @transient private lazy val pools: ConcurrentHashMap[RedisEndpoint, JedisPoolAbstract] = 15 | new ConcurrentHashMap[RedisEndpoint, JedisPoolAbstract]() 16 | 17 | private lazy val sparkConf = SparkEnv.get.conf 18 | 19 | def connect(re: RedisEndpoint): Jedis = { 20 | val pool = pools.getOrElseUpdate(re, 21 | { 22 | val poolConfig: JedisPoolConfig = new JedisPoolConfig() 23 | poolConfig.setMaxTotal(250) 24 | poolConfig.setMaxIdle(32) 25 | poolConfig.setTestOnBorrow(false) 26 | poolConfig.setTestOnReturn(false) 27 | poolConfig.setTestWhileIdle(false) 28 | poolConfig.setMinEvictableIdleTimeMillis(60000) 29 | poolConfig.setTimeBetweenEvictionRunsMillis(30000) 30 | poolConfig.setNumTestsPerEvictionRun(-1) 31 | poolConfig.setMaxWaitMillis(10000) 32 | 33 | if (SPARK_REDIS_SENTINEL_MODE.equalsIgnoreCase(sparkConf.get(SPARK_REDIS_MODE, SPARK_REDIS_SENTINEL_MODE))){ 34 | // 哨兵模式 35 | val master = sparkConf.get(SPARK_REDIS_MASTER, SPARK_REDIS_MASTER_DEFAULT) 36 | val sentinels = new java.util.HashSet[String]() 37 | re.host.split(",").filter(_.nonEmpty).foreach(add => sentinels.add(add)) 38 | 39 | new JedisSentinelPool(master, sentinels, poolConfig, re.timeout, re.auth, re.dbNum) 40 | } else { 41 | new JedisPool(poolConfig, re.host, re.port, re.timeout, re.auth, re.dbNum) 42 | } 43 | } 44 | ) 45 | var sleepTime: Int = 4 46 | var conn: Jedis = null 47 | while (conn == null) { 48 | try { 49 | conn = pool.getResource 50 | } 51 | catch { 52 | case e: JedisConnectionException if e.getCause.toString. 53 | contains("ERR max number of clients reached") => { 54 | if (sleepTime < 500) sleepTime *= 2 55 | Thread.sleep(sleepTime) 56 | } 57 | case e: Exception => throw e 58 | } 59 | } 60 | conn 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/SQLAlarmBoot.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm 2 | 3 | import dt.sql.alarm.core._ 4 | import core.Constants._ 5 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils, ParamsUtils} 6 | 7 | object SQLAlarmBoot { 8 | 9 | // 5 min 10 | val daemonCleanInterval = 5*60*1000L 11 | 12 | def main(args: Array[String]): Unit = { 13 | 14 | val params = new ParamsUtils(args) 15 | ConfigUtils.configBuilder(params.getParamsMap) 16 | ConfigUtils.showConf() 17 | // require(ConfigUtils.hasConfig(appName), "Application name must be set") 18 | require(ConfigUtils.hasConfig(checkpoint), s"SQLAlarm stream $checkpoint must be set") 19 | require(ConfigUtils.hasConfig(SQLALARM_SOURCES), s"SQLAlarm stream $SQLALARM_SOURCES must be set") 20 | require(ConfigUtils.hasConfig(INPUT_PREFIX), s"SQLAlarm stream $INPUT_PREFIX must be set") 21 | // require(ConfigUtils.hasConfig(SQLALARM_SINKS), s"SQLAlarm stream $SQLALARM_SINKS must be set") 22 | // require(ConfigUtils.hasConfig(OUTPUT_PREFIX), s"SQLAlarm stream $OUTPUT_PREFIX must be set") 23 | 24 | require(ConfigUtils.hasConfig(SQLALARM_SINKS) || ConfigUtils.hasConfig(SQLALARM_ALERT), 25 | s"SQLAlarm stream $SQLALARM_SINKS or $SQLALARM_ALERT must be set at least one of them") 26 | 27 | val spark = SparkRuntime.getSparkSession 28 | 29 | SparkRuntime.parseProcessAndSink(spark) 30 | 31 | var completed = false 32 | if (ConfigUtils.hasConfig(SQLALARM_ALERT)) { 33 | val partitionNum = SparkRuntime.sparkConfMap.getOrElse(Constants.redisCacheDataPartitionNum, 34 | ConfigUtils.getStringValue(Constants.redisCacheDataPartitionNum, "3")).toInt 35 | 36 | def launchCleaner = { 37 | // 启动alarm cache后台清理 38 | WowLog.logInfo("SQLAlarm cache daemon cleaner start......") 39 | var batchId:Long = 1L 40 | while ( SparkRuntime.streamingQuery != null && SparkRuntime.streamingQuery.isActive ) { 41 | spark.sparkContext.setJobGroup("SQLAlarm cache clean group", s"cache-clean-batch-$batchId", true) 42 | val rdd = RedisOperations.getListCache(ALARM_CACHE + "*", partitionNum) 43 | if (rdd.count() > 0) { 44 | import spark.implicits._ 45 | val cacheRecords = rdd.map{ 46 | row => 47 | JacksonUtils.fromJson[RecordDetail](row, classOf[RecordDetail]) 48 | }.toDS 49 | 50 | val results = AlarmReduce.cacheReduce(cacheRecords) 51 | AlarmAlert.push(results, true) // Force clean cache after sending 52 | } 53 | batchId = batchId + 1 54 | spark.sparkContext.clearJobGroup() 55 | Thread.sleep(daemonCleanInterval) 56 | } 57 | if ( !SparkRuntime.streamingQuery.isActive ) completed = true 58 | } 59 | 60 | new Thread("launch-cache-cleaner-in-spark-job") { 61 | setDaemon(true) 62 | override def run(): Unit = { 63 | while ( !completed ) { 64 | try { 65 | launchCleaner 66 | }catch { 67 | case e:Exception => 68 | e.printStackTrace() 69 | } 70 | WowLog.logInfo("SQLAlarm cache daemon cleaner exited, restarted after 60 seconds!") 71 | if (!completed) Thread.sleep(60000) 72 | } 73 | 74 | } 75 | }.start() 76 | 77 | } 78 | 79 | if ( SparkRuntime.streamingQuery != null ) 80 | SparkRuntime.streamingQuery.awaitTermination() 81 | 82 | // 设置completed标志为true 83 | completed = true 84 | 85 | if (!spark.sparkContext.isStopped) spark.sparkContext.stop() 86 | 87 | if (spark.sparkContext.isStopped) AlarmFlow.destroy 88 | 89 | } 90 | 91 | } 92 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/conf/AlarmPolicyConf.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.conf 2 | 3 | 4 | import dt.sql.alarm.core.Constants.ALARM_POLICY 5 | import dt.sql.alarm.core.Constants.ALARM_CACHE 6 | import tech.sqlclub.common.utils.JacksonUtils 7 | 8 | case class AlarmPolicyConf(item_id:String, window:Window, policy:Policy) 9 | case class Window(`type`: String, value:Int, unit:String, count:Int){ 10 | def getTimeWindowSec = { 11 | import WindowUnit._ 12 | val u = unit.unit match { 13 | case WindowUnit.m => 60 14 | case WindowUnit.h => 3600 15 | case WindowUnit.d => 86400 16 | } 17 | value * u 18 | } 19 | 20 | } 21 | case class Policy(`type`:String, unit:String, value: Double, first_alert:Int){ 22 | def alertFirst = 1 == first_alert 23 | 24 | import PolicyUnit._ 25 | def getValue = if (unit.isPercent) norm(value / 100.0d) else value 26 | 27 | def norm(d:Double) = { 28 | d match { 29 | case x if x>=1 => 1.0d 30 | case x if x<=0 => 0.0d 31 | case _ => d 32 | } 33 | } 34 | } 35 | 36 | 37 | object WindowType extends Enumeration{ 38 | implicit class WindowTypeString(s:String){ 39 | def windowType:Value = WindowType.withName(s) 40 | def isTime:Boolean = time == windowType 41 | def isNumber:Boolean = number == windowType 42 | def isTimeCount:Boolean = timeCount == windowType 43 | } 44 | type Type = Value 45 | val time,number,timeCount = Value 46 | } 47 | 48 | object WindowUnit extends Enumeration{ 49 | implicit class WindowUnitString(s:String){ 50 | def unit:Value = WindowUnit.withName(s) 51 | } 52 | type Type = Value 53 | val m,h,d,n = Value 54 | } 55 | 56 | object PolicyType extends Enumeration{ 57 | implicit class PolicyTypeString(s:String){ 58 | def policyType:Value = PolicyType.withName(s) 59 | def isAbsolute:Boolean = absolute == policyType 60 | def isScale:Boolean = scale == policyType 61 | } 62 | type Type = Value 63 | val absolute,scale = Value 64 | } 65 | 66 | object PolicyUnit extends Enumeration{ 67 | type Type = Value 68 | implicit class PolicyUnitString(s:String){ 69 | def unit:Value = PolicyUnit.withName(s) 70 | def isPercent:Boolean = percent == unit 71 | } 72 | val number,percent = Value 73 | } 74 | 75 | 76 | object AlarmPolicyConf { 77 | 78 | def getRkey(source:String, topic:String) = List(ALARM_POLICY, source, topic).mkString(":") 79 | 80 | def getCacheKey(itemId:String) = List(ALARM_CACHE,itemId).mkString(":") 81 | 82 | def getCacheKey(itemId:String, jobId:String) = List(ALARM_CACHE,itemId,jobId).mkString(":") 83 | 84 | def getCacheKey(itemId:String, jobId:String, jobStat:String) = List(ALARM_CACHE, itemId, jobId, jobStat).mkString(":") 85 | 86 | def formJson(json:String) = JacksonUtils.fromJson[AlarmPolicyConf](json, classOf[AlarmPolicyConf]) 87 | 88 | def prettyString(policyConf: AlarmPolicyConf): String = JacksonUtils.prettyPrint(policyConf) 89 | 90 | 91 | def main(args: Array[String]): Unit = { 92 | 93 | val d = Policy("", "number", 30, 1).getValue 94 | 95 | val s = 96 | """ 97 | |{ 98 | | "item_id" : "1222", 99 | | "window": { 100 | | "type": "time", 101 | | "value": 10, 102 | | "unit": "m" 103 | | }, 104 | | "policy":{ 105 | | "type":"absolute" 106 | | } 107 | |} 108 | """.stripMargin 109 | 110 | println(s) 111 | 112 | val policy = JacksonUtils.fromJson(s, classOf[AlarmPolicyConf]) 113 | 114 | println(policy.window.`type`) 115 | 116 | println(policy) 117 | } 118 | 119 | /* 120 | 121 | { 122 | "item_id":"1222", 123 | "window":{ 124 | "type":"time/number/timeCount", 125 | "value":10, 126 | "unit":"m/h/d/n", 127 | "count":0 128 | }, 129 | "policy":{ 130 | "type":"absolute/scale", 131 | "unit":"number/percent", 132 | "value":0.9/100, 133 | "first_alert": 0 134 | } 135 | } 136 | 137 | */ 138 | } 139 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/conf/AlarmRuleConf.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.conf 2 | 3 | import dt.sql.alarm.core.Constants.ALARM_RULE 4 | import tech.sqlclub.common.utils.JacksonUtils 5 | 6 | case class AlarmRuleConf(item_id:String, platform:String, title:String, source:Source, filter:Filter) 7 | case class Source(`type`:String, topic:String) 8 | case class Filter(table:String, structure:Array[Field], sql:String) 9 | case class Field(name:String, `type`:String, xpath:String) 10 | 11 | object AlarmRuleConf { 12 | def getRkey(source:String, topic:String) = List(ALARM_RULE, source, topic).mkString(":") 13 | 14 | def toJson(ruleConf: AlarmRuleConf) = JacksonUtils.toJson(ruleConf) 15 | 16 | def formJson(json:String) = JacksonUtils.fromJson[AlarmRuleConf](json, classOf[AlarmRuleConf]) 17 | 18 | def prettyString(ruleConf: AlarmRuleConf): String = JacksonUtils.prettyPrint(ruleConf) 19 | 20 | 21 | def main(args: Array[String]): Unit = { 22 | println(prettyString(AlarmRuleConf("1222","alarm","sql alarm", 23 | Source("kafka", " sqlalarm_event"), 24 | Filter("error_job", 25 | Array(Field("job_id","string","$.jobid")), 26 | "select jobid from sqlalarm_event" 27 | ) 28 | ) 29 | )) 30 | 31 | } 32 | 33 | /* 34 | { 35 | "item_id" : "1222", 36 | "platform" : "alarm", 37 | "title" : "sql alarm", 38 | "source" : { 39 | "type" : "kafka", 40 | "topic" : " sqlalarm_event" 41 | }, 42 | "filter" : { 43 | "table" : "error_job", 44 | "structure" : [ { 45 | "name" : "job_id", 46 | "type" : "string", 47 | "xpath" : "$.jobid" 48 | } ], 49 | "sql" : "select jobid from sqlalarm_event" 50 | } 51 | } 52 | */ 53 | } 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/conf/Conf.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.conf 2 | 3 | /** 4 | * 配置接口 5 | * Created by songgr on 2019/12/20. 6 | */ 7 | trait Conf 8 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/conf/JdbcConf.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.conf 2 | 3 | /** 4 | * 5 | * Created by songgr on 2020/01/07. 6 | */ 7 | case class JdbcConf( 8 | url:String, // jdbc url 9 | driver:String, // jdbc 驱动类 10 | user:String, // jdbc 用户名 11 | password:String, // jdbc 密码 12 | var dbtable:String = "sqlalarm_records_log", // jdbc 表名 13 | var numPartitions:Int = 8, // 表写入可用于并行处理的最大分区数 14 | var batchsize:Int = 1000, // JDBC批处理大小,它确定每次往返要插入多少行。这可以帮助提高JDBC驱动程序的性能 15 | var mode:String = "append" // jdbc 表写入模式 16 | ) extends Conf 17 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/conf/KafkaConf.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.conf 2 | 3 | import dt.sql.alarm.input.Constants.SubscribeType.SubscribeType 4 | 5 | /** 6 | * 7 | * Created by songgr on 2019/12/25. 8 | */ 9 | case class KafkaConf( 10 | subscribeType:SubscribeType, 11 | topic:String, 12 | servers:String, 13 | group:String) extends Conf 14 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/conf/RedisConf.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.conf 2 | 3 | /** 4 | * 5 | * Created by songgr on 2020/01/09. 6 | */ 7 | case class RedisConf( 8 | keys:String, // redis流的key值,多个逗号分隔 9 | start_offsets:String, // redis流的起始offset 10 | group:String, // redis流消费组 11 | consumer_prefix:String, // redis流消费者前缀 12 | var parallelism:Int = 4, // redis流处理并行度 13 | var batch_size:Int = 200, // redis流批次数据量大小 14 | var read_block_msec:Long = 1000L // redis流批次等待时间毫秒值 15 | ) extends Conf 16 | 17 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/AlarmAlert.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import dt.sql.alarm.conf.AlarmPolicyConf 4 | import dt.sql.alarm.reduce.EngineResult 5 | import tech.sqlclub.common.log.Logging 6 | 7 | object AlarmAlert extends Logging { 8 | 9 | def push(results:Array[EngineResult], forceCleanCache:Boolean = false) : Unit = { 10 | results.filter(_.hasWarning).foreach { 11 | result => 12 | val recordDetail = result.lastAlarmRecord 13 | val firstEventTime = result.firstAlarmRecord.event_time 14 | val count = result.reduceCount 15 | WowLog.logInfo(s"this moment the record has warning! Agg count: $count") 16 | // forceCleanCache 参数为了处理 首次不告警但过期的告警记录 仅当存在一条这种情况的时候强制删除缓存 17 | if ( send(AlarmRecord.as(recordDetail), firstEventTime, count) && (count >1 || forceCleanCache) ) { 18 | val key = AlarmPolicyConf.getCacheKey(recordDetail.item_id, recordDetail.job_id, recordDetail.job_stat) 19 | RedisOperations.delCache(key) 20 | WowLog.logInfo(s"agg over, del the cache! key: $key") 21 | } 22 | } 23 | 24 | } 25 | 26 | def send(alarmRecord: AlarmRecord, firstTime:String, count:Int):Boolean = { 27 | logInfo("Alarm record call send api...") 28 | true 29 | } 30 | 31 | case class AlarmRecord( 32 | job_id:String, 33 | job_stat:String, 34 | event_time:String, 35 | message:String, 36 | context:String, // map string 37 | title:String, 38 | platform:String, 39 | item_id:String 40 | ) 41 | 42 | object AlarmRecord { 43 | def as(recordDetail: RecordDetail) = AlarmRecord( 44 | recordDetail.job_id, 45 | recordDetail.job_stat, 46 | recordDetail.event_time, 47 | recordDetail.message, 48 | recordDetail.context, 49 | recordDetail.title, 50 | recordDetail.platform, 51 | recordDetail.item_id 52 | ) 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/AlarmFlow.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import java.util.concurrent._ 4 | import java.util 5 | import java.util.UUID 6 | 7 | import Constants._ 8 | import dt.sql.alarm.conf.{AlarmPolicyConf, AlarmRuleConf} 9 | import dt.sql.alarm.core.Constants.SQLALARM_ALERT 10 | import tech.sqlclub.common.log.Logging 11 | import tech.sqlclub.common.utils.ConfigUtils 12 | import org.apache.spark.sql.{Dataset, Row, SparkSession} 13 | import tech.sqlclub.common.exception.SQLClubException 14 | 15 | object AlarmFlow extends Logging { 16 | 17 | def taskNum:Int = SparkRuntime.sparkConfMap.getOrElse( futureTasksThreadPoolSize, 18 | ConfigUtils.getStringValue(futureTasksThreadPoolSize, "2")).toInt 19 | 20 | lazy private val executors = Executors.newFixedThreadPool(taskNum) 21 | lazy private val taskList = new util.ArrayList[Future[Unit]](taskNum) 22 | lazy private val taskTimeOut = SparkRuntime.sparkConfMap.getOrElse(futureTaskTimeOut, 23 | ConfigUtils.getStringValue(futureTaskTimeOut, "300000")).toLong // Default timeout 5 min 24 | 25 | def run(batchId:Long, data:Dataset[Row]) 26 | (filterFunc: (Dataset[Row], AlarmRuleConf, AlarmPolicyConf) => Dataset[RecordDetail]) 27 | (sinkFunc: Dataset[RecordDetail] => Unit) 28 | (alertFunc: (Dataset[RecordDetail], AlarmPolicyConf) => Unit) 29 | (implicit spark:SparkSession = data.sparkSession):Unit = { 30 | 31 | WowLog.logInfo("Alarm flow start....") 32 | 33 | val groupId = nextGroupId 34 | val jobName = s"SQLAlarm-batch-$batchId" 35 | spark.sparkContext.setJobGroup(groupId, jobName, true) 36 | 37 | import spark.implicits._ 38 | val tableIds = data.groupBy(s"${RecordDetail.source}", s"${RecordDetail.topic}").count().map{ 39 | row => 40 | (row.getAs[String](s"${RecordDetail.source}"), row.getAs[String](s"${RecordDetail.topic}"), row.getAs[Long]("count")) 41 | }.collect() 42 | 43 | WowLog.logInfo(s"batch info (source, topic, count):\n${tableIds.mkString("\n")}") 44 | 45 | if (tableIds.isEmpty) { 46 | WowLog.logInfo("batch tableIds is empty return directly!") 47 | return 48 | } 49 | 50 | val rulesWithItemId:Array[(String,AlarmRuleConf)] = tableIds.flatMap{ 51 | case (source, topic, _) => 52 | val key = AlarmRuleConf.getRkey(source, topic) // rule redis key 53 | RedisOperations.getTableCache(Array(key)).collect() // get rules 54 | }.map{ 55 | case (ruleConfId, ruleConf) => 56 | (ruleConfId, AlarmRuleConf.formJson(ruleConf)) 57 | } 58 | 59 | if (rulesWithItemId.isEmpty){ 60 | WowLog.logInfo("alarm rule confs is empty return directly!") 61 | return 62 | } 63 | 64 | rulesWithItemId.filter(null != _._2).foreach{ 65 | item => 66 | val rule = item._2 // 告警规则 67 | val policyConf = RedisOperations.getTableCache(AlarmPolicyConf.getRkey(rule.source.`type`, rule.source.topic), rule.item_id) 68 | val policy = if(policyConf != null && policyConf.nonEmpty) AlarmPolicyConf.formJson(policyConf) else null //告警策略 69 | 70 | try { 71 | // sql filter 72 | WowLog.logInfo("AlarmFlow table filter...") 73 | val filterTable = filterFunc(data, rule, policy) 74 | WowLog.logInfo("AlarmFlow table filter pass!") 75 | 76 | 77 | sinkAndAlert(filterTable, sinkFunc, alertFunc){ 78 | () => 79 | val tasks = taskList.iterator() 80 | WowLog.logInfo(s"We will run ${taskList.size()} tasks...") 81 | while (tasks.hasNext){ 82 | val task = tasks.next() 83 | val result = runTask(task) 84 | if (result._1) { 85 | tasks.remove() 86 | } else { 87 | killBatchJob(spark, groupId, jobName) 88 | throw result._2.get 89 | } 90 | } 91 | WowLog.logInfo(s"All task completed! Current task list number is: ${taskList.size()}.") 92 | }(rule, policy) 93 | } catch { 94 | case e:SQLClubException => 95 | logError(e.getMessage, e) 96 | } 97 | } 98 | WowLog.logInfo("Alarm flow end!") 99 | } 100 | 101 | def killBatchJob(spark:SparkSession, groupId:String, jobName: String) = { 102 | logInfo(s"Try to kill batch job: $groupId, job name: $jobName.") 103 | spark.sparkContext.cancelJobGroup(groupId) 104 | logInfo(s"Batch job: $groupId killed! Job name: $jobName.") 105 | } 106 | 107 | def nextGroupId = UUID.randomUUID().toString 108 | 109 | def sinkAndAlert(filterTable:Dataset[RecordDetail], 110 | sinkFunc:Dataset[RecordDetail]=>Unit, 111 | alertFunc:(Dataset[RecordDetail],AlarmPolicyConf)=>Unit)(run:()=>Unit) 112 | (implicit ruleConf: AlarmRuleConf, policyConf: AlarmPolicyConf): Unit ={ 113 | try { 114 | filterTable.persist() 115 | if (filterTable.count() == 0) { 116 | WowLog.logInfo("filterTable is empty, don't need to run sink and alert functions return directly!") 117 | return 118 | } 119 | 120 | // alarm data sink 121 | if (ConfigUtils.hasConfig(SQLALARM_SINKS)) { 122 | val sinkTask = executors.submit(new Callable[Unit] { 123 | override def call(): Unit ={ 124 | WowLog.logInfo("AlarmFlow table sink...") 125 | sinkFunc(filterTable) 126 | WowLog.logInfo("AlarmFlow table sink task will be executed in the future!") 127 | } 128 | }) 129 | taskList.add(sinkTask) 130 | } 131 | 132 | // alarm record alert 133 | if (ConfigUtils.hasConfig(SQLALARM_ALERT)){ 134 | 135 | val alertTask = executors.submit(new Callable[Unit] { 136 | override def call(): Unit ={ 137 | WowLog.logInfo("AlarmFlow table alert...") 138 | alertFunc(filterTable, policyConf) 139 | WowLog.logInfo("AlarmFlow table alert task will be executed in the future!") 140 | } 141 | }) 142 | taskList.add(alertTask) 143 | } 144 | run() 145 | }finally { 146 | filterTable.unpersist() 147 | } 148 | } 149 | 150 | def runTask( task:Future[Unit] ): (Boolean, Option[SQLClubException]) = { 151 | if (task != null && !task.isDone) { 152 | try { 153 | task.get(taskTimeOut, TimeUnit.MILLISECONDS) 154 | } catch { 155 | case e if e.isInstanceOf[InterruptedException] || e.isInstanceOf[ExecutionException] => 156 | logError(e.getMessage, e) 157 | case e: TimeoutException => 158 | logWarning(e.getMessage, e) 159 | return (false, Some(new SQLClubException(e.getMessage, e))) 160 | } 161 | } 162 | (true, None) 163 | } 164 | 165 | def destroy = { 166 | if (executors != null) { 167 | import scala.collection.JavaConverters._ 168 | val unfinishedTasks = taskList.asScala.filterNot(_.isDone).asJava 169 | WowLog.logInfo(s"There are ${unfinishedTasks.size} outstanding tasks to be executed...") 170 | val tasks = unfinishedTasks.iterator() 171 | while (tasks.hasNext){ 172 | val task = tasks.next() 173 | val result = runTask(task) 174 | if (result._1) { 175 | tasks.remove() 176 | } else { 177 | throw result._2.get 178 | } 179 | } 180 | WowLog.logInfo(s"All task completed! Current task list number is: ${unfinishedTasks.size()}.") 181 | if (!executors.isShutdown) executors.shutdownNow() 182 | } 183 | } 184 | 185 | } 186 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/AlarmReduce.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import dt.sql.alarm.conf.AlarmPolicyConf 4 | import tech.sqlclub.common.log.Logging 5 | import org.apache.spark.sql.Dataset 6 | import dt.sql.alarm.reduce.PolicyAnalyzeEngine 7 | import dt.sql.alarm.reduce.engine._ 8 | import tech.sqlclub.common.utils.JacksonUtils 9 | import org.apache.spark.sql.functions._ 10 | import dt.sql.alarm.core.Constants._ 11 | import dt.sql.alarm.reduce.EngineResult 12 | import RecordDetail.{item_id, job_id, _} 13 | import org.apache.spark.sql.expressions.Window 14 | import dt.sql.alarm.conf._ 15 | import dt.sql.alarm.conf.PolicyType._ 16 | import dt.sql.alarm.conf.WindowType._ 17 | import dt.sql.alarm.conf.PolicyUnit._ 18 | import tech.sqlclub.common.exception.SQLClubException 19 | 20 | /** 21 | * 22 | * Created by songgr on 2019/12/25. 23 | */ 24 | object AlarmReduce extends Logging { 25 | 26 | // RecordDetail all fields 27 | lazy val fields = RecordDetail.getAllFieldName.flatMap(field=> List(lit(field), col(field)) ) 28 | 29 | def reduce(data:Dataset[RecordDetail], policy: AlarmPolicyConf): Array[EngineResult] = { 30 | val spark = data.sparkSession 31 | val engine = getPolicyAnalyzeEngine(policy.policy.`type`, policy.window.`type`, policy.policy.unit) 32 | import spark.implicits._ 33 | // 获取相关key的信息 34 | val keyInfos = data.groupBy(item_id, job_id).count().map { 35 | row => 36 | (row.getAs[String](item_id), row.getAs[String](job_id)) 37 | }.collect() 38 | 39 | WowLog.logInfo("Alarm reduce starting. Dim key info: " + keyInfos.mkString("\n")) 40 | 41 | // get redis cache 42 | val cacheRdd = keyInfos.map { 43 | case (item_id, job_id) => 44 | RedisOperations.getListCache(AlarmPolicyConf.getCacheKey(item_id, job_id) + "*") 45 | }.reduce(_ union _) 46 | 47 | val cacheRecord = cacheRdd.map{ 48 | row => 49 | JacksonUtils.fromJson[RecordDetail](row, classOf[RecordDetail]) 50 | }.toDS.withColumn(SQL_FIELD_DATAFROM_NAME, lit(SQL_FIELD_CACHE_NAME)) // add dataFrom col 51 | 52 | val streamRecord = data.withColumn(SQL_FIELD_DATAFROM_NAME, lit(SQL_FIELD_STREAM_NAME)) // add dataFrom col 53 | .selectExpr(cacheRecord.columns :_*) //为了防止字段顺序不一致 54 | 55 | // 按比例聚合 不区分job_stat 只按对象分组 56 | val jobStatus = if (policy.policy.`type`.isScale) { 57 | lit("_") 58 | } else { 59 | col(job_stat) 60 | } 61 | 62 | /* 63 | root 64 | |-- job_id: string (nullable = true) 65 | |-- job_stat: string (nullable = false) 66 | |-- event_time: string (nullable = true) 67 | |-- message: string (nullable = true) 68 | |-- context: string (nullable = true) 69 | |-- title: string (nullable = true) 70 | |-- platform: string (nullable = true) 71 | |-- item_id: string (nullable = true) 72 | |-- source: string (nullable = true) 73 | |-- topic: string (nullable = true) 74 | |-- alarm: integer (nullable = false) 75 | |-- dataFrom: string (nullable = false) 76 | |-- value: string (nullable = true) 77 | */ 78 | 79 | val table = streamRecord // stream data union cache data 80 | .union(cacheRecord) 81 | .withColumn(job_stat, jobStatus) 82 | .withColumn(SQL_FIELD_VALUE_NAME, to_json(map(fields: _*))) // add all fields value field 83 | 84 | // logInfo("AlarmReduce streamData.union(cacheData) schema: ") 85 | // table.printSchema() 86 | 87 | val result = engine.analyse(policy, table) 88 | 89 | val warningResults = result.filter(_.hasWarning) 90 | 91 | if (warningResults.length > 0) { 92 | WowLog.logInfo("Policy Engine Analyze hasWarning result is :") 93 | logInfo(result.filter(_.hasWarning).mkString("\n")) 94 | } else { 95 | WowLog.logInfo("Policy Engine Analyze done. Has no Warning result!") 96 | } 97 | 98 | result 99 | } 100 | 101 | def cacheReduce(data:Dataset[RecordDetail]): Array[EngineResult] = { 102 | val table = data.withColumn(SQL_FIELD_VALUE_NAME, to_json(map(fields: _*))) // add all fields value field 103 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value 104 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) ) 105 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value 106 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 107 | .withColumn(SQL_FIELD_CURRENT_EVENT_TIME_NAME, first(event_time) // current event time 108 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) ) 109 | .withColumn(SQL_FIELD_EARLIEST_EVENT_TIME_NAME, last(event_time) // first event time 110 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 111 | .withColumn(SQL_FIELD_RANK_NAME, row_number() // rank value 112 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) ) 113 | .withColumn(SQL_FIELD_COUNT_NAME, count(lit(1)) // record count 114 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 115 | 116 | 117 | val pendingRecords = table.filter(col(SQL_FIELD_RANK_NAME) === 1). 118 | select(item_id, job_id, job_stat, SQL_FIELD_CURRENT_EVENT_TIME_NAME,SQL_FIELD_CURRENT_RECORD_NAME, 119 | SQL_FIELD_EARLIEST_EVENT_TIME_NAME,SQL_FIELD_EARLIEST_RECORD_NAME,SQL_FIELD_COUNT_NAME) 120 | // cache duration field 121 | .withColumn(SQL_FIELD_CACHE_DURATION, 122 | unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME))) 123 | // cache add interval 124 | .withColumn(SQL_FIELD_CACHE_ADD_INTERVAL, 125 | (unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)))/col(SQL_FIELD_COUNT_NAME) 126 | ) 127 | // cache util time 128 | .withColumn(SQL_FIELD_CACHE_UNTIL_TIME, 129 | unix_timestamp() - unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)) 130 | ) 131 | 132 | val policies = RedisOperations.getTableCache(ALARM_POLICY + "*") 133 | val policyMap = policies.map(item => (item._1, AlarmPolicyConf.formJson(item._2))).collect().toMap 134 | pendingRecords.collect().map { 135 | row => 136 | val itemId = row.getAs[String](item_id) 137 | val jobId = row.getAs[String](job_id) 138 | val jobStat = row.getAs[String](job_stat) 139 | val untilTime = row.getAs[Long](SQL_FIELD_CACHE_UNTIL_TIME) 140 | val cacheAddInterval = row.getAs[Double](SQL_FIELD_CACHE_ADD_INTERVAL) 141 | val count = row.getAs[Long](SQL_FIELD_COUNT_NAME) 142 | val key = AlarmPolicyConf.getCacheKey(itemId, jobId, jobStat) 143 | val policyConf = policyMap.get(itemId) 144 | if (policyConf.isDefined) { 145 | val policy = policyConf.get 146 | val windowType = policy.window.`type`.windowType 147 | val policyType = policy.policy.`type`.policyType 148 | val overWindow = windowType match { 149 | case WindowType.time | WindowType.timeCount => 150 | untilTime > policy.window.getTimeWindowSec * 1.2 // 乘1.2 为了和主线岔开, 有几率和主线相交 151 | case WindowType.number => 152 | untilTime > cacheAddInterval * count * 1.2 153 | 154 | } 155 | if (overWindow) { 156 | (policyType, windowType) match { 157 | // 按比例聚合 时间+次数聚合 这两种超出窗口了直接清除不需要push 158 | case (PolicyType.scale, _) => 159 | WowLog.logInfo(s"the cache has not been merged for a long time, the cache is useless, del it! key: $key") 160 | RedisOperations.delCache(key) 161 | EngineResult(false, null, null, -1) 162 | case (PolicyType.absolute, WindowType.timeCount) => 163 | if (count >= policy.window.count) { 164 | WowLog.logInfo(s"the record cache has warning and merged by daemon clean server. Agg count: $count, key: $key.") 165 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail]) 166 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail]) 167 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue()) 168 | } else { 169 | WowLog.logInfo(s"the cache has not been merged for a long time, the cache is useless, del it! key: $key") 170 | RedisOperations.delCache(key) 171 | EngineResult(false, null, null, -1) 172 | } 173 | // 按时间聚合 次数聚合 这两种超出窗口需要把历史聚合后push 174 | case (PolicyType.absolute, WindowType.time) | (PolicyType.absolute, WindowType.number) => 175 | if (count == 1 && policy.policy.alertFirst) { 176 | // 缓存仅有一条 且 第一次已告警 直接清理不需要push 177 | WowLog.logInfo(s"this alarm record has been pushed, del it! key:$key") 178 | RedisOperations.delCache(key) 179 | EngineResult(false, null, null, -1) 180 | } else { 181 | WowLog.logInfo(s"the record cache has warning and merged by daemon clean server. Agg count: $count, key: $key.") 182 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail]) 183 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail]) 184 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue()) 185 | } 186 | } 187 | } else { 188 | WowLog.logInfo(s"the record cache is under window, ignore it! key: $key.") 189 | // 没超过窗口 不聚合告警 190 | EngineResult(false, null, null, -1) 191 | } 192 | } else { 193 | // 没有匹配的聚合策略 删除key 194 | logWarning(s"has no policy, ignore it! del the key: $key.") 195 | RedisOperations.delCache(key) 196 | EngineResult(false, null, null, -1) 197 | } 198 | } 199 | 200 | } 201 | 202 | def getPolicyAnalyzeEngine(policyType:String, windowType:String, policyUnit: String):PolicyAnalyzeEngine = { 203 | (policyType.policyType, windowType.windowType) match { 204 | case (PolicyType.absolute, windowType) => { 205 | val window = windowType match { 206 | case WindowType.number => NumberWindow 207 | case WindowType.time => TimeWindow 208 | case WindowType.timeCount => TimeCountWindow 209 | } 210 | new ReduceByWindow(window) 211 | } 212 | case (PolicyType.scale, WindowType.number) => { 213 | if (policyUnit.isPercent) { 214 | new ReduceByNumScale(Percent) 215 | } else { 216 | new ReduceByNumScale(Number) 217 | } 218 | } 219 | case (PolicyType.scale, WindowType.time) => { 220 | if (policyUnit.isPercent) { 221 | new ReduceByTimeScale(Percent) 222 | } else { 223 | new ReduceByTimeScale(Number) 224 | } 225 | } 226 | case _ => 227 | throw new SQLClubException(s"Unsupported policyAnalyzeEngine type! windowType:$windowType, policyType:$policyType, policyUnit:$policyUnit.") 228 | } 229 | } 230 | 231 | } 232 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/Base.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import dt.sql.alarm.conf.Conf 4 | import org.apache.spark.sql.SparkSession 5 | 6 | trait Base { 7 | /** 8 | * 配置检查 9 | */ 10 | protected[this] def checkConfig:Option[Conf] 11 | 12 | /** 13 | * 数据处理 14 | * @param session SparkSession 15 | */ 16 | protected[this] def process(session: SparkSession) 17 | 18 | } 19 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/Constants.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | object Constants { 4 | 5 | val appName = "sqlalarm.name" 6 | 7 | val master = "sqlalarm.master" 8 | 9 | val checkpoint = "sqlalarm.checkpointLocation" 10 | 11 | val trigger = "spark.streaming.trigger.time.interval.msec" 12 | val futureTaskTimeOut = "spark.streaming.future.task.timeout.msec" 13 | val futureTasksThreadPoolSize = "spark.streaming.future.tasks.threadPool.size" 14 | val redisCacheDataPartitionNum = "spark.redis.cache.data.partition.num" 15 | 16 | val SQLALARM_SOURCES = "sqlalarm.sources" 17 | val SQLALARM_SINKS = "sqlalarm.sinks" 18 | val SQLALARM_ALERT = "sqlalarm.alert" 19 | 20 | val INPUT_PREFIX = "sqlalarm.input" 21 | val OUTPUT_PREFIX = "sqlalarm.output" 22 | 23 | val ALARM_RULE = "sqlalarm_rule" 24 | val ALARM_CACHE = "sqlalarm_cache" 25 | val ALARM_POLICY = "sqlalarm_policy" 26 | 27 | 28 | // SQL field name 29 | val SQL_FIELD_TOPIC_NAME = "topic" 30 | val SQL_FIELD_SOURCE_NAME = "source" 31 | val SQL_FIELD_VALUE_NAME = "value" 32 | val SQL_FIELD_EARLIEST_RECORD_NAME = "earliest_record" 33 | val SQL_FIELD_CURRENT_RECORD_NAME = "current_record" 34 | val SQL_FIELD_EARLIEST_EVENT_TIME_NAME = "earliest_event_time" 35 | val SQL_FIELD_CURRENT_EVENT_TIME_NAME = "current_event_time" 36 | val SQL_FIELD_DATAFROM_NAME = "dataFrom" 37 | val SQL_FIELD_CACHE_NAME = "cache" 38 | val SQL_FIELD_STREAM_NAME = "stream" 39 | val SQL_FIELD_RANK_NAME = "rank" 40 | val SQL_FIELD_MAXRANK_NAME = "maxRank" 41 | val SQL_FIELD_COUNT_NAME = "count" 42 | val SQL_FIELD_TOTAL_COUNT_NAME = "total_count" 43 | val SQL_FIELD_ALARM_COUNT_NAME = "alarm_count" 44 | val SQL_FIELD_ALARM_PERCENT_NAME = "alarm_percent" 45 | val SQL_FIELD_EVENT_TIME_DURATION_NAME = "event_time_duration" 46 | 47 | val SQL_FIELD_CACHE_DURATION = "cache_duration" 48 | val SQL_FIELD_CACHE_ADD_INTERVAL = "cache_add_interval" 49 | val SQL_FIELD_CACHE_UNTIL_TIME = "cache_until_time" 50 | 51 | val SPARK_REDIS_MODE = "spark.redis.mode" 52 | val SPARK_REDIS_MASTER = "spark.redis.master" 53 | val SPARK_REDIS_MASTER_DEFAULT = "mymaster" 54 | val SPARK_REDIS_SENTINEL_MODE = "sentinel" 55 | val SPARK_REDIS_SINGLE_MODE = "single" 56 | } 57 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/RecordDetail.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import org.apache.spark.sql.types._ 4 | /** 5 | * 6 | * Created by songgr on 2019/12/25. 7 | */ 8 | case class RecordDetail( 9 | job_id:String, 10 | job_stat:String, 11 | event_time:String, 12 | message:String, 13 | context:String, // map string 14 | title:String, 15 | platform:String, 16 | item_id:String, 17 | source:String, 18 | topic:String, 19 | alarm:Int // is alarm 20 | ) 21 | 22 | object RecordDetail { 23 | val job_id = "job_id" 24 | val job_stat = "job_stat" 25 | val event_time = "event_time" 26 | val message = "message" 27 | val context = "context" 28 | val title = "title" 29 | val platform = "platform" 30 | val item_id = "item_id" 31 | val source = "source" 32 | val topic = "topic" 33 | val alarm = "alarm" 34 | 35 | // sql必须字段 36 | def getAllSQLFieldName = Seq[String](job_id, job_stat, event_time, message, context) 37 | 38 | // 后台自动加入的字段 39 | def getAllBackFieldName = Seq[String](title, platform, item_id, source, topic, alarm) 40 | 41 | def getAllFieldName = getAllSQLFieldName ++ getAllBackFieldName 42 | 43 | def getAllFieldSchema = StructType(Seq( 44 | StructField(job_id, StringType), 45 | StructField(job_stat, StringType), 46 | StructField(event_time, StringType), 47 | StructField(message, StringType), 48 | StructField(context, StringType), 49 | StructField(title, StringType), 50 | StructField(platform, StringType), 51 | StructField(item_id, StringType), 52 | StructField(source, StringType), 53 | StructField(topic, StringType), 54 | StructField(alarm, IntegerType) 55 | )) 56 | } 57 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/Sink.java: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core; 2 | 3 | /** 4 | * Created by songgr on 2019/12/25. 5 | */ 6 | import java.lang.annotation.ElementType; 7 | import java.lang.annotation.Retention; 8 | import java.lang.annotation.RetentionPolicy; 9 | import java.lang.annotation.Target; 10 | 11 | @Target(ElementType.TYPE) 12 | @Retention(RetentionPolicy.RUNTIME) 13 | public @interface Sink { 14 | String name(); 15 | } 16 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/Source.java: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core; 2 | 3 | /** 4 | * Created by songgr on 2019/12/23. 5 | */ 6 | 7 | import java.lang.annotation.ElementType; 8 | import java.lang.annotation.Retention; 9 | import java.lang.annotation.RetentionPolicy; 10 | import java.lang.annotation.Target; 11 | 12 | @Target(ElementType.TYPE) 13 | @Retention(RetentionPolicy.RUNTIME) 14 | public @interface Source { 15 | String name(); 16 | } 17 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/SparkRuntime.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import org.apache.spark.{SparkConf, SparkContext, SparkEnv} 4 | import dt.sql.alarm.input.SourceInfo 5 | import Constants._ 6 | import dt.sql.alarm.filter.SQLFilter 7 | import dt.sql.alarm.output.SinkInfo 8 | import dt.sql.alarm.reduce.EngineResult 9 | import org.apache.spark.rdd.RDD 10 | import org.apache.spark.sql.{DataFrame, SaveMode, SparkSession} 11 | import tech.sqlclub.common.log.Logging 12 | import tech.sqlclub.common.utils.ConfigUtils 13 | import org.apache.spark.sql.streaming.{StreamingQuery, Trigger} 14 | import tech.sqlclub.common.exception.SQLClubException 15 | import scala.collection.JavaConverters._ 16 | 17 | object SparkRuntime extends Logging { 18 | private var sparkSession :SparkSession = null 19 | var sparkConfMap:Map[String,String] = null 20 | var streamingQuery:StreamingQuery = null 21 | 22 | def getSparkSession:SparkSession = { 23 | if (sparkSession == null) { 24 | this.synchronized { 25 | if (sparkSession == null) { 26 | WowLog.logInfo("create Spark Runtime....") 27 | val params = ConfigUtils.toStringMap 28 | val conf = new SparkConf() 29 | params.filter(f => 30 | f._1.startsWith("spark.") || 31 | f._1.startsWith("hive.") 32 | ).foreach { f => 33 | conf.set(f._1, f._2) 34 | } 35 | if (ConfigUtils.hasConfig(appName)) { 36 | conf.setAppName(ConfigUtils.getStringValue(appName)) 37 | } 38 | if (ConfigUtils.hasConfig(master)) { 39 | conf.setMaster(ConfigUtils.getStringValue(master)) 40 | } 41 | sparkSession = SparkSession.builder().config(conf).getOrCreate() 42 | sparkConfMap = sparkSession.conf.getAll 43 | WowLog.logInfo("Spark Runtime created!!!") 44 | } 45 | } 46 | } 47 | sparkSession 48 | } 49 | 50 | def parseProcessAndSink(spark:SparkSession) = { 51 | WowLog.logInfo("spark parse process and sink start...") 52 | val sources = getSourceTable(spark) 53 | WowLog.logInfo("spark stream get all source table succeed!") 54 | logInfo("All source data schema: ") 55 | sources.printSchema() 56 | val dStreamWriter = sources.writeStream.foreachBatch{ 57 | (batchTable, batchId) => 58 | WowLog.logInfo(s"start processing batch: $batchId") 59 | val start = System.nanoTime() 60 | AlarmFlow.run(batchId, batchTable){ 61 | // filterFunc 62 | (table, rule, policy) => 63 | val filterTable = SQLFilter.process(table, rule, policy) 64 | import spark.implicits._ 65 | filterTable.as[RecordDetail] 66 | }{ 67 | // sinkFunc 68 | table => 69 | sinks.foreach(_ process table.filter(_.alarm == 1) ) 70 | }{ 71 | // alertFunc 72 | (table, policy)=> 73 | val alarmRecords = if (null != policy) { 74 | AlarmReduce.reduce(table, policy) // alarm noise reduction 75 | } else { 76 | // 没配置策略每条都push 77 | table.collect().map{ 78 | record => 79 | EngineResult(true, record, record, 1) 80 | } 81 | } 82 | AlarmAlert.push(alarmRecords) // alarm alert 83 | } 84 | val end = System.nanoTime() 85 | WowLog.logInfo(s"bath $batchId processing is done. Total time consuming: ${(end-start)/1000000} ms.") 86 | } 87 | 88 | streamingQuery = dStreamWriter 89 | .queryName(ConfigUtils.getStringValue(appName)) 90 | .option("checkpointLocation", ConfigUtils.getStringValue(checkpoint)) 91 | .trigger(Trigger.ProcessingTime(sparkConfMap.getOrElse(trigger, 92 | ConfigUtils.getStringValue(trigger, "3000")).toLong)) // 默认3s 93 | .start() 94 | } 95 | 96 | private lazy val sinks = getSinks 97 | 98 | def getSinks = { 99 | val sinks = ConfigUtils.getStringValue(SQLALARM_SINKS) 100 | val sinkNames = sinks.split(",").filterNot(_.isEmpty) 101 | 102 | assert(sinkNames.filterNot(SinkInfo.sinkExist(_)).size == 0, 103 | s"Check the configuration of sink, at present only supported: ${SinkInfo.getAllSink}" 104 | ) 105 | sinkNames.map(SinkInfo.getSink(_)) 106 | } 107 | 108 | def getSourceTable(spark:SparkSession) = { 109 | val sources_ = ConfigUtils.getStringValue(SQLALARM_SOURCES) 110 | 111 | val sourceNames = sources_.split(",").filterNot(_.isEmpty) 112 | 113 | assert(sourceNames.filterNot(SourceInfo.sourceExist(_)).size == 0, 114 | s"Check the configuration of sources, at present only supported: ${SourceInfo.getAllSource}" 115 | ) 116 | 117 | val sources = sourceNames.map { 118 | sourceName => 119 | logInfo(s"spark stream create source $sourceName!") 120 | SourceInfo.getSource(sourceName).getDataSetStream(spark) 121 | } 122 | /* 123 | root 124 | |-- source: string (nullable = false) 125 | |-- topic: string (nullable = false) 126 | |-- value: string (nullable = false) 127 | */ 128 | sources.filter(_ != null).reduce(_ union _) 129 | } 130 | } 131 | 132 | object RedisOperations { 133 | import redis.clients.jedis.Jedis 134 | import com.redislabs.provider.redis._ 135 | import redis.clients.jedis.ScanParams 136 | import com.redislabs.provider.redis.util.ConnectionUtils 137 | 138 | lazy private val spark = SparkRuntime.getSparkSession 139 | def sc:SparkContext = spark.sparkContext 140 | 141 | lazy private val redisEndpoint = RedisConfig.fromSparkConf(SparkEnv.get.conf).initialHost 142 | lazy private val readWriteConfig = ReadWriteConfig.fromSparkConf(SparkEnv.get.conf) 143 | 144 | def IncorrectMsg = s"RedisOperations keysOrKeyPattern should be String or Array[String]" 145 | 146 | def getTableCache[T](keysOrKeyPattern: T, partitionNum:Int):RDD[(String, String)] = { 147 | keysOrKeyPattern match { 148 | case keyPattern: String => sc.fromRedisHash(keyPattern.asInstanceOf[String], partitionNum) 149 | case keys: Array[String] => sc.fromRedisHash(keys.asInstanceOf[Array[String]], partitionNum) 150 | case _ => throw new SQLClubException(IncorrectMsg) 151 | } 152 | } 153 | 154 | def getTableCache[T](keysOrKeyPattern: T):RDD[(String, String)] = getTableCache(keysOrKeyPattern, 3) 155 | 156 | def getTableCache(key: String, field:String) 157 | (implicit conn:Jedis = redisEndpoint.connect()):String = { 158 | ConnectionUtils.withConnection[String](conn) { 159 | conn => 160 | conn.hget(key, field) 161 | } 162 | } 163 | 164 | 165 | def addTableCache(key: String, field: String, value: String) 166 | (implicit conn:Jedis = redisEndpoint.connect()): Long = { 167 | ConnectionUtils.withConnection[Long](conn) { 168 | conn => 169 | conn.hset(key, field, value) 170 | } 171 | } 172 | 173 | 174 | def getListCache[T](keysOrKeyPattern:T, partitionNum:Int=3):RDD[String] = { 175 | keysOrKeyPattern match { 176 | case keyPattern: String => sc.fromRedisList(keyPattern.asInstanceOf[String], partitionNum) 177 | case keys: Array[String] => sc.fromRedisList(keys.asInstanceOf[Array[String]], partitionNum) 178 | case _ => throw new SQLClubException(IncorrectMsg) 179 | } 180 | 181 | } 182 | 183 | def scanListCacheKeys(keyPattern:String) 184 | (implicit conn:Jedis = redisEndpoint.connect(), config:ReadWriteConfig = readWriteConfig):Seq[String]= { 185 | ConnectionUtils.withConnection[Seq[String]](conn) { 186 | conn => 187 | val keys = new java.util.ArrayList[String] 188 | val params = new ScanParams().`match`(keyPattern).count(config.scanCount) 189 | var cursor = "0" 190 | do { 191 | val scan = conn.scan(cursor, params) 192 | keys.addAll(scan.getResult) 193 | cursor = scan.getCursor 194 | } while (cursor != "0") 195 | keys.asScala 196 | } 197 | } 198 | 199 | def setListCache[T](key:String, data:T, saveMode: SaveMode, ttl:Int=0) = { 200 | if (SaveMode.Overwrite == saveMode) { 201 | val conn = redisEndpoint.connect() 202 | ConnectionUtils.withConnection[Long](conn) { 203 | conn => 204 | conn.del(key) 205 | } 206 | } 207 | import spark.implicits._ 208 | val rdd = data match { 209 | case rdd:RDD[String] => rdd.filter(s => s != null && s.nonEmpty).map(s => s.toString) 210 | case df:DataFrame => df.filter(_ != null).map(row => row.getAs[String](0)).rdd 211 | } 212 | 213 | sc.toRedisLIST(rdd, key, ttl) 214 | } 215 | 216 | def delCache(keys:String*) 217 | (implicit conn:Jedis = redisEndpoint.connect()): Long = { 218 | ConnectionUtils.withConnection[Long](conn) { 219 | conn => 220 | conn.del(keys:_*) 221 | } 222 | } 223 | 224 | } 225 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/core/WowLog.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.core 2 | 3 | import tech.sqlclub.common.log.Logging 4 | 5 | object WowLog extends Logging { 6 | 7 | override def logInfo(msg: => String): Unit = { 8 | val info = s""" ###### $msg ###### """ 9 | super.logInfo(info) 10 | } 11 | 12 | override def logInfo(msg: => String, throwable: Throwable): Unit = { 13 | val info = s""" ###### $msg ###### """ 14 | super.logInfo(info, throwable) 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/filter/SQLFilter.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.filter 2 | 3 | import dt.sql.alarm.conf.{AlarmPolicyConf, AlarmRuleConf} 4 | import dt.sql.alarm.core.RecordDetail._ 5 | import org.apache.spark.sql.functions._ 6 | import org.apache.spark.sql.{DataFrame, Dataset, Row} 7 | import tech.sqlclub.common.exception.SQLClubException 8 | import tech.sqlclub.common.log.Logging 9 | import org.apache.spark.sql.types.{MapType, StringType} 10 | import dt.sql.alarm.core.Constants.SQL_FIELD_VALUE_NAME 11 | import dt.sql.alarm.core.{RedisOperations, WowLog} 12 | import org.apache.spark.sql.catalyst.plans.logical.{Project, Union} 13 | 14 | object SQLFilter extends Logging { 15 | 16 | lazy private val requireCols = getAllSQLFieldName 17 | lazy private val requireSchema = getAllFieldSchema.map(f => (f.name, f.dataType)).toMap 18 | 19 | def process(df:Dataset[Row], ruleConf:AlarmRuleConf, policy:AlarmPolicyConf):DataFrame = { 20 | val spark = df.sparkSession 21 | 22 | val source_ = ruleConf.source 23 | val structures = ruleConf.filter.structure 24 | val tableName = ruleConf.filter.table 25 | val sql = ruleConf.filter.sql.trim 26 | 27 | val fields = structures.map{ 28 | field => 29 | s"cast(get_json_object($SQL_FIELD_VALUE_NAME, '${field.xpath}') as ${field.`type`}) as ${field.name}" 30 | } 31 | 32 | val table = try { 33 | df.filter( col(source) === source_.`type` and col(topic) === source_.topic ).selectExpr(fields :_*) 34 | } catch { 35 | case e:Exception => throw new SQLClubException(e.getMessage, e) 36 | } 37 | 38 | logInfo(s"rule item_id: ${ruleConf.item_id}, the SQLFilter SQL table [ $tableName ] schema: ") 39 | table.printSchema() 40 | 41 | table.createOrReplaceTempView(tableName) 42 | 43 | def checkSQLSyntax(sql: String): (Boolean, String) = { 44 | try { 45 | // 这只是检验sql语法 46 | val logicalPlan = spark.sessionState.sqlParser.parsePlan(sql) 47 | if (!logicalPlan.resolved) { 48 | // 这边才会按表结构去校验 49 | spark.sessionState.executePlan(logicalPlan).assertAnalyzed() 50 | (true, "") 51 | } else { 52 | (true, "") 53 | } 54 | } catch { 55 | case e:Exception => 56 | (false, e.getMessage) 57 | } 58 | } 59 | 60 | val ck = checkSQLSyntax(sql) 61 | if (!ck._1) throw new SQLClubException(s"input filter sql error! item_id: ${ruleConf.item_id}"+ ".sql:\n" + sql + " .\n\n" + ck._2) 62 | 63 | logInfo(s"input ruleConf:[source:${source_.`type`}, topic:$topic, tableName:$tableName]. item_id: ${ruleConf.item_id}, exec SQL: $sql") 64 | 65 | val sqlPlan = spark.sql(sql).queryExecution.analyzed 66 | 67 | val sqlCols = sqlPlan.output.map{att => att.name.toLowerCase} 68 | 69 | val b = (true /: requireCols){(x,y) => x && sqlCols.contains(y)} 70 | 71 | if(!b){ 72 | logError(s"rule item_id: ${ruleConf.item_id}, exec sql output cols must contains col list: " + requireCols) 73 | throw new SQLClubException("exec sql output cols error! find cols: [" + sqlCols.mkString(",") + "],requires: [" + requireCols.mkString(",") + "]!") 74 | } 75 | 76 | /* 77 | root 78 | |-- job_id: string (nullable = true) 79 | |-- job_stat: string (nullable = true) 80 | |-- event_time: string (nullable = true) 81 | |-- message: string (nullable = true) 82 | |-- context: string (nullable = true) 83 | |-- title: string (nullable = false) 84 | |-- platform: string (nullable = false) 85 | |-- item_id: string (nullable = false) 86 | |-- source: string (nullable = false) 87 | |-- topic: string (nullable = false) 88 | |-- alarm: integer (nullable = false) 89 | */ 90 | val filtertab = spark.sql(sql).selectExpr(requireCols :_* ).selectExpr("*" , 91 | s"'${ruleConf.title}' as $title", 92 | s"'${ruleConf.platform}' as $platform", 93 | s"'${ruleConf.item_id}' as $item_id", 94 | s"'${source_.`type`}' as $source", 95 | s"'${source_.topic}' as $topic" 96 | ).withColumn(context, to_json(col(context))) 97 | .withColumn(alarm, lit(1)) 98 | 99 | // logInfo("SQLFilter SQL table filter result schema: ") 100 | // filtertab.printSchema() 101 | 102 | import dt.sql.alarm.conf.PolicyType._ 103 | val result = if (policy != null && policy.policy.`type`.isScale){ 104 | 105 | // 目前过滤sql只支持单条简单sql 可以union 106 | val project = sqlPlan match { 107 | case p if p.isInstanceOf[Union] => p.children.head.asInstanceOf[Project] 108 | case p if p.isInstanceOf[Project] => p.asInstanceOf[Project] 109 | case _ => null 110 | } 111 | 112 | if (project == null) throw new SQLClubException(s"Only supports simple SQL! item_id: ${ruleConf.item_id}"+ ". sql:\n" + sql + " .") 113 | 114 | val output = project.projectList.map(_.sql).mkString(",") 115 | val ssql = s"SELECT $output FROM $tableName" 116 | 117 | logInfo(s"rule item_id: ${ruleConf.item_id}, the simplified SQL: \n" + ssql) 118 | if (!checkSQLSyntax(ssql)._1) throw new SQLClubException(s"Simplified sql error! item_id: ${ruleConf.item_id}"+ ". Simplified sql:\n" + ssql + " .\n\n" + ck._2) 119 | 120 | val table = spark.sql(ssql) 121 | .withColumn(item_id, lit(ruleConf.item_id)) 122 | .withColumn(context, to_json(col(context))) 123 | 124 | // 需要取出redis已经缓存的job数据,因为比例策略需要放入正常数据,及时当前流里的记录都是正常也需要放入相关的缓存 125 | val redisCacheKeys = RedisOperations.scanListCacheKeys(AlarmPolicyConf.getCacheKey(policy.item_id) + "*") 126 | WowLog.logInfo(s"Under the rule id: ${policy.item_id},redis caches existing keys: [" + redisCacheKeys.mkString(", ") + "]") 127 | import spark.implicits._ 128 | val cacheKeys = redisCacheKeys.map{ 129 | key => 130 | val its = key.split(":") 131 | if (its.size >= 3) { 132 | (its(1), its(2)) 133 | } else null 134 | }.toDF(item_id, job_id) 135 | 136 | val dimTab = filtertab.select(item_id,job_id).union(cacheKeys).groupBy(item_id, job_id).count() 137 | 138 | // Join 维表去除与当前无关的记录 139 | val pendingTab = table.join(dimTab, Seq(item_id, job_id), "inner") 140 | .join(filtertab, getAllSQLFieldName :+ item_id, "left_outer") 141 | .withColumn(alarm, when(isnull(col(alarm)), 0).otherwise(1)) 142 | 143 | pendingTab.selectExpr(getAllFieldName :_*) 144 | 145 | } else { 146 | filtertab 147 | } 148 | 149 | val schema = result.schema.map{ 150 | structField => 151 | val name = structField.name 152 | val dataType = if(structField.dataType.isInstanceOf[MapType]) MapType(StringType,StringType) else structField.dataType 153 | (name, dataType) 154 | }.toMap 155 | 156 | if ( !requireSchema.equals(schema) ){ 157 | throw new SQLClubException(s"the filter sql exec result schema error!item_id: ${ruleConf.item_id}, schema: ${filtertab.schema}") 158 | } 159 | 160 | // 为了过滤脏数据 if job_id and event_time is null 161 | result.filter(not(isnull(col(job_id))) and not(isnull(col(event_time)))).distinct() 162 | } 163 | } 164 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/input/BaseInput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.input 2 | 3 | 4 | import dt.sql.alarm.core.{Base, Source} 5 | import org.apache.spark.sql.{Dataset, Row, SparkSession} 6 | import org.reflections.Reflections 7 | 8 | 9 | abstract class BaseInput extends Base { 10 | 11 | def getDataSetStream(spark:SparkSession):Dataset[Row] 12 | 13 | def fullFormat: String 14 | 15 | def shortFormat: String 16 | 17 | } 18 | 19 | 20 | object SourceInfo { 21 | 22 | import scala.collection.JavaConverters._ 23 | private val inputWithAnnotation = new Reflections(this.getClass.getPackage.getName) 24 | .getTypesAnnotatedWith(classOf[Source]) 25 | 26 | private val sourceMapping = inputWithAnnotation.asScala.map{subclass => 27 | val name = subclass.getAnnotation(classOf[Source]).name() 28 | (name, subclass) 29 | }.toMap[String, Class[_]] 30 | 31 | 32 | def getSource(name:String):BaseInput = sourceMapping(name).newInstance().asInstanceOf[BaseInput] 33 | 34 | def getAllSource = sourceMapping.keySet 35 | 36 | def sourceExist(name:String) = sourceMapping.contains(name) 37 | 38 | } 39 | 40 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/input/Constants.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.input 2 | 3 | object Constants { 4 | val KAFKA_TOPIC = "kafka.topic" 5 | val KAFKA_SUBSCRIBE_TOPIC_PATTERN = "kafka.subscribe.topic.pattern" 6 | 7 | object SubscribeType extends Enumeration{ 8 | type SubscribeType = Value 9 | val assign = Value(0, "assign") 10 | val subscribe = Value(1, "subscribe") 11 | val subscribePattern = Value(2,"subscribePattern") 12 | 13 | override def toString(): String = { 14 | s"{0:$assign, 1:$subscribe, 2:$subscribePattern}" 15 | } 16 | } 17 | 18 | val KAFKA_SERVERS = "kafka.bootstrap.servers" 19 | val KAFKA_GROUP = "kafka.group" 20 | val KAFKA_DEFAULT_GROUP = "sqlalarm_kafka_group" 21 | 22 | 23 | val REDIS_KEYS = "redis.keys" 24 | val REDIS_GROUP = "redis.group" 25 | val REDIS_DEFAULT_GROUP = "sqlalarm_redis_group" 26 | val REDIS_START_OFFSETS = "redis.start.offsets" 27 | val REDIS_CONSUMER_PREFIX = "redis.consumer.prefix" 28 | val REDIS_STREAM_PARALLELISM = "redis.stream.parallelism" 29 | val REDIS_STREAM_BATCH_SIZE = "redis.stream.batch.size" 30 | val REDIS_STREAM_READ_BLOCK_MSEC = "redis.stream.read.block.msec" 31 | 32 | } 33 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/input/KafkaInput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.input 2 | import org.apache.commons.lang3.StringUtils 3 | import org.apache.spark.sql.{Dataset, Row, SparkSession} 4 | import Constants._ 5 | import dt.sql.alarm.conf.KafkaConf 6 | import dt.sql.alarm.core.{Source, WowLog} 7 | import tech.sqlclub.common.exception.SQLClubException 8 | import tech.sqlclub.common.log.Logging 9 | import tech.sqlclub.common.utils.ConfigUtils 10 | import dt.sql.alarm.core.Constants._ 11 | 12 | /** 13 | * kafka消息输入 14 | * Created by songgr on 2019/12/20. 15 | */ 16 | 17 | @Source(name = "kafka") 18 | class KafkaInput extends BaseInput with Logging { 19 | @transient private var dStream:Dataset[Row] = _ 20 | val max_poll_records = 1000 21 | val startingOffsets = "latest" 22 | 23 | override def getDataSetStream(spark: SparkSession): Dataset[Row] = { 24 | process(spark) 25 | dStream 26 | } 27 | 28 | override protected[this] def checkConfig: Option[KafkaConf] = { 29 | val topic = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$KAFKA_TOPIC") 30 | val subscribeTypeIndex = ConfigUtils.getIntValue(s"$INPUT_PREFIX.$KAFKA_SUBSCRIBE_TOPIC_PATTERN", 2) 31 | val servers = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$KAFKA_SERVERS") 32 | val group = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$KAFKA_GROUP", KAFKA_DEFAULT_GROUP) 33 | 34 | val isValid = StringUtils.isNoneBlank(topic) && 35 | StringUtils.isNoneBlank(servers) && 36 | StringUtils.isNoneBlank(group) 37 | 38 | if (!isValid) { 39 | throw new SQLClubException(s"$KAFKA_TOPIC and $KAFKA_SERVERS are needed in kafka input conf and cant be empty!") 40 | } 41 | 42 | if (subscribeTypeIndex <0 || subscribeTypeIndex >2) 43 | throw new SQLClubException(s"$KAFKA_SUBSCRIBE_TOPIC_PATTERN must between 0 and 2. Reference:$SubscribeType") 44 | 45 | Some(KafkaConf(SubscribeType(subscribeTypeIndex), topic, servers, group)) 46 | } 47 | 48 | override protected[this] def process(session: SparkSession) = { 49 | WowLog.logInfo("Alarm kafka source process....") 50 | val conf = checkConfig 51 | if (conf.isDefined) { 52 | val kafkaConf = conf.get 53 | var options = Map("kafka.bootstrap.servers" -> kafkaConf.servers, 54 | s"${kafkaConf.subscribeType}" -> kafkaConf.topic, 55 | "group.id" -> kafkaConf.group 56 | ) 57 | // 默认配置 58 | options += ("startingOffsets" -> startingOffsets, "max.poll.records" -> max_poll_records.toString, "failOnDataLoss" -> "false") 59 | val lines = session.readStream 60 | .format(fullFormat) 61 | .options(options) 62 | .load() 63 | 64 | dStream = lines.selectExpr(s"'${shortFormat}' as ${SQL_FIELD_SOURCE_NAME}", s"${SQL_FIELD_TOPIC_NAME}", s"CAST(value AS STRING) as ${SQL_FIELD_VALUE_NAME}") 65 | WowLog.logInfo("Alarm kafka source process over!") 66 | } 67 | 68 | } 69 | 70 | override def fullFormat: String = shortFormat 71 | 72 | override def shortFormat: String = "kafka" 73 | } 74 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/input/RedisInput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.input 2 | 3 | import dt.sql.alarm.conf.RedisConf 4 | import dt.sql.alarm.core.{Source, WowLog} 5 | import org.apache.spark.sql.{Dataset, Row, SparkSession} 6 | import dt.sql.alarm.input.Constants._ 7 | import org.apache.commons.lang3.StringUtils 8 | import org.apache.spark.sql.types.{StringType, StructField, StructType} 9 | import tech.sqlclub.common.exception.SQLClubException 10 | import tech.sqlclub.common.log.Logging 11 | import tech.sqlclub.common.utils.ConfigUtils 12 | import dt.sql.alarm.core.Constants._ 13 | 14 | /** 15 | * 16 | * Created by songgr on 2019/12/20. 17 | */ 18 | 19 | @Source(name = "redis") 20 | class RedisInput extends BaseInput with Logging { 21 | @transient private var dStream:Dataset[Row] = _ 22 | 23 | override def getDataSetStream(spark: SparkSession): Dataset[Row] = { 24 | process(spark) 25 | dStream 26 | } 27 | 28 | /** 29 | * 配置检查 30 | */ 31 | override protected[this] def checkConfig: Option[RedisConf] = { 32 | val keys = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_KEYS") 33 | val group = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_GROUP", REDIS_DEFAULT_GROUP) 34 | val offsets = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_START_OFFSETS") 35 | val consumer_prefix = ConfigUtils.getStringValue(s"$INPUT_PREFIX.$REDIS_CONSUMER_PREFIX") 36 | val parallelism = ConfigUtils.getIntValue(s"$INPUT_PREFIX.$REDIS_STREAM_PARALLELISM") 37 | val batch_size = ConfigUtils.getIntValue(s"$INPUT_PREFIX.$REDIS_STREAM_BATCH_SIZE") 38 | val block_msec = ConfigUtils.getLongValue(s"$INPUT_PREFIX.$REDIS_STREAM_READ_BLOCK_MSEC") 39 | 40 | val isValid = StringUtils.isNoneBlank(keys) 41 | 42 | if (!isValid) throw new SQLClubException(s"$REDIS_KEYS is needed in redis input conf and cant be empty!") 43 | 44 | val conf = RedisConf(keys,offsets,group,consumer_prefix) 45 | if (parallelism > 0) conf.parallelism = parallelism 46 | if (batch_size > 0) conf.batch_size = batch_size 47 | if (block_msec > 0) conf.read_block_msec = block_msec 48 | 49 | Some(conf) 50 | } 51 | 52 | /** 53 | * 数据处理 54 | * 55 | * @param session SparkSession 56 | */ 57 | override protected[this] def process(session: SparkSession): Unit = { 58 | WowLog.logInfo("Alarm redis source process....") 59 | val conf = checkConfig 60 | if (conf.isDefined) { 61 | val redisConf = conf.get 62 | 63 | var options = Map("stream.keys" -> redisConf.keys, 64 | "stream.group.name" -> redisConf.group, 65 | "stream.parallelism" -> redisConf.parallelism, 66 | "stream.read.batch.size" -> redisConf.batch_size, 67 | "stream.read.block" -> redisConf.read_block_msec 68 | ) 69 | 70 | if (redisConf.consumer_prefix != null && redisConf.consumer_prefix.nonEmpty) 71 | options += ("stream.consumer.prefix" -> redisConf.consumer_prefix) 72 | 73 | if (redisConf.start_offsets != null && redisConf.start_offsets.nonEmpty) 74 | options += ("stream.offsets" -> redisConf.start_offsets) 75 | 76 | val lines = session.readStream 77 | .format(fullFormat) 78 | .options(options.map(kv => (kv._1, kv._2.toString))) 79 | .schema(StructType(Array( // stream fields 80 | StructField("_id", StringType), 81 | StructField("key", StringType), 82 | StructField("value", StringType) 83 | ))) 84 | .load() 85 | 86 | dStream = lines.selectExpr(s"'${shortFormat}' as ${SQL_FIELD_SOURCE_NAME}", s"CAST(key AS STRING) as ${SQL_FIELD_TOPIC_NAME}", s"CAST(value AS STRING) as ${SQL_FIELD_VALUE_NAME}") 87 | WowLog.logInfo("Alarm redis source process over!") 88 | } 89 | } 90 | 91 | override def fullFormat: String = shortFormat 92 | 93 | override def shortFormat: String = "redis" 94 | } 95 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/output/BaseOutput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.output 2 | 3 | import dt.sql.alarm.core.{RecordDetail, Base, Sink} 4 | import org.apache.spark.sql.Dataset 5 | import org.reflections.Reflections 6 | 7 | /** 8 | * 9 | * Created by songgr on 2019/12/25. 10 | */ 11 | abstract class BaseOutput extends Base { 12 | 13 | def process(data:Dataset[RecordDetail]) 14 | 15 | def fullFormat: String 16 | 17 | def shortFormat: String 18 | 19 | } 20 | 21 | object SinkInfo { 22 | 23 | import scala.collection.JavaConverters._ 24 | private val outputwithAnnotation = new Reflections(this.getClass.getPackage.getName) 25 | .getTypesAnnotatedWith(classOf[Sink]) 26 | 27 | private val sinkMapping = outputwithAnnotation.asScala.map{ subclass => 28 | val name = subclass.getAnnotation(classOf[Sink]).name() 29 | (name, subclass) 30 | }.toMap[String, Class[_]] 31 | 32 | 33 | def getSink(name:String):BaseOutput = sinkMapping(name).newInstance().asInstanceOf[BaseOutput] 34 | 35 | def getAllSink = sinkMapping.keySet 36 | 37 | def sinkExist(name:String) = sinkMapping.contains(name) 38 | 39 | } -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/output/ConsoleOutput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.output 2 | import java.util.concurrent.atomic.AtomicBoolean 3 | 4 | import dt.sql.alarm.conf.Conf 5 | import dt.sql.alarm.core.{RecordDetail, Sink, WowLog} 6 | import tech.sqlclub.common.log.Logging 7 | import tech.sqlclub.common.utils.ConfigUtils 8 | import org.apache.spark.sql.{Dataset, SparkSession} 9 | 10 | 11 | @Sink(name = "console") 12 | class ConsoleOutput extends BaseOutput with Logging { 13 | var runtimeConfig:Map[String,String] = _ 14 | var numRows = 20 15 | var truncate = true 16 | var flag = new AtomicBoolean(false) 17 | WowLog.logInfo("Console sink initialization......") 18 | 19 | override protected[this] def checkConfig: Option[Conf] = None 20 | 21 | 22 | override protected[this] def process(session: SparkSession): Unit = { 23 | if (!flag.get) { 24 | flag.synchronized { 25 | if (!flag.get) { 26 | runtimeConfig = session.conf.getAll 27 | numRows = runtimeConfig.getOrElse(Constants.showNumRows, 28 | ConfigUtils.getStringValue(Constants.showNumRows, "20")).toInt 29 | truncate = runtimeConfig.getOrElse(Constants.showTruncate, 30 | ConfigUtils.getStringValue(Constants.showTruncate, "true")).toBoolean 31 | flag.set(true) 32 | } 33 | } 34 | } 35 | } 36 | 37 | override def process(data: Dataset[RecordDetail]): Unit = { 38 | process(data.sparkSession) 39 | WowLog.logInfo("Alarm console sink process....") 40 | data.show(numRows, truncate) 41 | WowLog.logInfo("Alarm console sink process over!") 42 | } 43 | 44 | override def fullFormat: String = shortFormat 45 | 46 | override def shortFormat: String = "console" 47 | } 48 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/output/Constants.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.output 2 | 3 | object Constants { 4 | 5 | val showNumRows = "spark.show.table.numRows" 6 | val showTruncate = "spark.show.table.truncate" 7 | 8 | val jdbcUrl = "jdbc.url" 9 | val jdbcDriver = "jdbc.driver" 10 | val jdbcUser = "jdbc.user" 11 | val jdbcPassword = "jdbc.password" 12 | val jdbcTable = "jdbc.table" 13 | val jdbcImplClass = "jdbc.implClass" 14 | val jdbcNumPartitions = "jdbc.numPartitions" 15 | val jdbcBatchsize = "jdbc.batchsize" 16 | val jdbcMode = "jdbc.mode" 17 | 18 | 19 | val kafkaImplClass = "kafka.implClass" 20 | val KAFKA_ACKS = "kafka.acks" 21 | val KAFKA_KEY_SERIALIZER_CLASS = "key.serializer.class" 22 | val KAFKA_VALUE_SERIALIZER_CLASS = "value.serializer.class" 23 | val KAFKA_TOPIC = dt.sql.alarm.input.Constants.KAFKA_TOPIC 24 | val KAFKA_SERVERS = dt.sql.alarm.input.Constants.KAFKA_SERVERS 25 | } 26 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/output/JdbcOutput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.output 2 | 3 | import java.util.concurrent.atomic.AtomicBoolean 4 | 5 | import dt.sql.alarm.conf.JdbcConf 6 | import dt.sql.alarm.core.{RecordDetail, Sink, WowLog} 7 | import org.apache.spark.sql.{Dataset, SparkSession} 8 | import tech.sqlclub.common.log.Logging 9 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils} 10 | import dt.sql.alarm.core.Constants._ 11 | import dt.sql.alarm.output.Constants._ 12 | import org.apache.commons.lang3.StringUtils 13 | import tech.sqlclub.common.exception.SQLClubException 14 | 15 | /** 16 | * jdbc sink 17 | * Created by songgr on 2020/01/06. 18 | */ 19 | @Sink(name = "jdbc") 20 | class JdbcOutput extends BaseOutput with Logging { 21 | var jdbcConf:JdbcConf = _ 22 | var flag = new AtomicBoolean(false) 23 | WowLog.logInfo("JDBC sink initialization......") 24 | 25 | override def fullFormat: String = shortFormat 26 | 27 | override def shortFormat: String = "jdbc" 28 | 29 | override def process(data: Dataset[RecordDetail]): Unit = { 30 | process(data.sparkSession) 31 | WowLog.logInfo("Alarm JDBC sink process....") 32 | 33 | val format = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcImplClass", fullFormat) 34 | 35 | val json = JacksonUtils.toJson(jdbcConf) 36 | val options = JacksonUtils.fromJson(json, classOf[Map[String,AnyRef]]).map(kv => (kv._1, kv._2.toString)) 37 | 38 | data.drop(RecordDetail.alarm).write.format(format).options(options).mode(jdbcConf.mode).save(jdbcConf.dbtable) 39 | 40 | WowLog.logInfo("Alarm JDBC sink process over!") 41 | 42 | } 43 | 44 | /** 45 | * 配置检查 46 | */ 47 | override protected[this] def checkConfig: Option[JdbcConf] = { 48 | val url = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcUrl") 49 | val driver = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcDriver") 50 | val user = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcUser") 51 | val password = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcPassword") 52 | val table = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcTable") 53 | val numPartitions = ConfigUtils.getIntValue(s"$OUTPUT_PREFIX.$jdbcNumPartitions") 54 | val batchsize = ConfigUtils.getIntValue(s"$OUTPUT_PREFIX.$jdbcBatchsize") 55 | val mode = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$jdbcMode") 56 | 57 | val isValid = StringUtils.isNoneBlank(url) && 58 | StringUtils.isNoneBlank(driver) && 59 | StringUtils.isNoneBlank(user) 60 | 61 | if (!isValid) { 62 | throw new SQLClubException(s"$jdbcUrl and $jdbcDriver and $jdbcUser are needed in jdbc sink conf and cant be empty!") 63 | } 64 | 65 | val conf = JdbcConf(url, driver, user, password) 66 | if (StringUtils.isNoneBlank(table)) 67 | conf.dbtable = table 68 | if (numPartitions > 0) 69 | conf.numPartitions = numPartitions 70 | if (batchsize > 0) 71 | conf.batchsize = batchsize 72 | if (StringUtils.isNoneBlank(mode)) 73 | conf.mode = mode 74 | 75 | Some(conf) 76 | } 77 | 78 | /** 79 | * 数据处理 80 | * 81 | * @param session SparkSession 82 | */ 83 | override protected[this] def process(session:SparkSession): Unit = { 84 | if (!flag.get) { 85 | flag.synchronized { 86 | if (!flag.get) { 87 | jdbcConf = checkConfig.get 88 | flag.set(true) 89 | } 90 | } 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/output/KafkaOutput.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.output 2 | 3 | import java.util.concurrent.atomic.AtomicBoolean 4 | 5 | import dt.sql.alarm.conf.KafkaConf 6 | import dt.sql.alarm.core.{RecordDetail, Sink, WowLog} 7 | import org.apache.spark.sql.{Dataset, SparkSession} 8 | import tech.sqlclub.common.log.Logging 9 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils} 10 | import dt.sql.alarm.core.Constants.OUTPUT_PREFIX 11 | import dt.sql.alarm.output.Constants._ 12 | import org.apache.commons.lang3.StringUtils 13 | import org.apache.kafka.clients.producer.ProducerConfig 14 | import org.apache.kafka.common.serialization.StringSerializer 15 | import tech.sqlclub.common.exception.SQLClubException 16 | 17 | /** 18 | * kafka sink 19 | * Created by songgr on 2020/01/08. 20 | */ 21 | 22 | @Sink(name = "kafka") 23 | class KafkaOutput extends BaseOutput with Logging { 24 | val KAFKA_KEY_ATTRIBUTE_NAME = "key" 25 | val KAFKA_VALUE_ATTRIBUTE_NAME = "value" 26 | val KAFKA_BOOTSTRAP_SERVERS_NAME = "kafka.bootstrap.servers" 27 | val KAFKA_TOPIC_NAME = "topic" 28 | 29 | var kafkaConf:KafkaConf = _ 30 | var flag = new AtomicBoolean(false) 31 | WowLog.logInfo("Kafka sink initialization......") 32 | 33 | override def process(data: Dataset[RecordDetail]): Unit = { 34 | val spark = data.sparkSession 35 | process(spark) 36 | WowLog.logInfo("Alarm Kafka sink process....") 37 | 38 | val format = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$kafkaImplClass", fullFormat) 39 | var options = Map(KAFKA_BOOTSTRAP_SERVERS_NAME -> kafkaConf.servers, 40 | KAFKA_TOPIC_NAME -> kafkaConf.topic 41 | ) 42 | options += (ProducerConfig.ACKS_CONFIG -> ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_ACKS", "-1")) 43 | options += (ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG -> 44 | ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_KEY_SERIALIZER_CLASS", classOf[StringSerializer].getName)) 45 | options += (ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG -> 46 | ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_VALUE_SERIALIZER_CLASS", classOf[StringSerializer].getName)) 47 | 48 | import spark.implicits._ 49 | data.map{ 50 | record => 51 | (StringUtils.join(Array(record.job_id,record.job_stat), ":") 52 | , JacksonUtils.toJson(record) 53 | ) 54 | }.toDF(KAFKA_KEY_ATTRIBUTE_NAME,KAFKA_VALUE_ATTRIBUTE_NAME).write 55 | .format(format).options(options).mode("append").save() 56 | 57 | WowLog.logInfo("Alarm Kafka sink process over!") 58 | } 59 | 60 | override def fullFormat: String = shortFormat 61 | 62 | override def shortFormat: String = "kafka" 63 | 64 | /** 65 | * 配置检查 66 | */ 67 | override protected[this] def checkConfig: Option[KafkaConf] = { 68 | val topic = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_TOPIC") 69 | val servers = ConfigUtils.getStringValue(s"$OUTPUT_PREFIX.$KAFKA_SERVERS") 70 | 71 | val isValid = StringUtils.isNoneBlank(topic) && 72 | StringUtils.isNoneBlank(servers) 73 | 74 | if (!isValid) { 75 | throw new SQLClubException(s"$KAFKA_TOPIC and $KAFKA_SERVERS are needed in kafka sink conf and cant be empty!") 76 | } 77 | 78 | Some(KafkaConf(null, topic, servers, null)) 79 | } 80 | 81 | /** 82 | * 数据处理 83 | * 84 | * @param session SparkSession 85 | */ 86 | override protected[this] def process(session: SparkSession): Unit = { 87 | if (!flag.get) { 88 | flag.synchronized { 89 | if (!flag.get) { 90 | kafkaConf = checkConfig.get 91 | flag.set(true) 92 | } 93 | } 94 | } 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/reduce/PolicyAnalyzeEngine.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.reduce 2 | 3 | import dt.sql.alarm.conf.AlarmPolicyConf 4 | import dt.sql.alarm.core.Constants.SQL_FIELD_VALUE_NAME 5 | import dt.sql.alarm.core.{RecordDetail, RedisOperations, WowLog} 6 | import dt.sql.alarm.core.RecordDetail.{event_time, item_id, job_id, job_stat} 7 | import org.apache.spark.sql.functions.col 8 | import org.apache.spark.sql.{Dataset, Row, SaveMode} 9 | 10 | /** 11 | * 降噪策略分析引擎 12 | * Created by songgr on 2020/01/09. 13 | */ 14 | abstract class PolicyAnalyzeEngine { 15 | 16 | def analyse(policy: AlarmPolicyConf, records:Dataset[Row]):Array[EngineResult] 17 | 18 | def addCache(cacheDf: Dataset[Row], mode:SaveMode):Unit = { 19 | WowLog.logInfo("Add alarm records into redis cache...") 20 | cacheDf.persist() 21 | try { 22 | if (cacheDf.count() > 0) { 23 | val jobInfos = cacheDf.groupBy(item_id, job_id, job_stat).count().collect().map{ 24 | row => 25 | (row.getAs[String](item_id), row.getAs[String](job_id), row.getAs[String](job_stat)) 26 | } 27 | WowLog.logInfo(s"cache infos:\n ${jobInfos.mkString("\n")}") 28 | jobInfos.foreach{ 29 | jobInfo => 30 | val cache = cacheDf.filter(col(item_id) === jobInfo._1 and col(job_id) === jobInfo._2 and col(job_stat) === jobInfo._3) 31 | .select(col(SQL_FIELD_VALUE_NAME)).orderBy(col(event_time)) 32 | .repartition(1) // 重新partition 为了保证单分区写有序 33 | 34 | val key = AlarmPolicyConf.getCacheKey(jobInfo._1, jobInfo._2, jobInfo._3) 35 | RedisOperations.setListCache(key, cache, mode) 36 | WowLog.logInfo(s"add cache records, key: $key, mode: ${mode.name}") 37 | } 38 | } 39 | } finally { 40 | cacheDf.unpersist() 41 | } 42 | } 43 | 44 | } 45 | 46 | 47 | case class EngineResult(hasWarning:Boolean, 48 | lastAlarmRecord:RecordDetail, 49 | firstAlarmRecord:RecordDetail, 50 | reduceCount:Int 51 | ) -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/reduce/engine/AggWindow.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.reduce.engine 2 | 3 | // 窗口 4 | trait AggWindow 5 | // 时间窗口 6 | object TimeWindow extends AggWindow 7 | // 时间+次数 窗口 8 | object TimeCountWindow extends AggWindow 9 | // 数量窗口 10 | object NumberWindow extends AggWindow -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/reduce/engine/ReduceByNumScale.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.reduce.engine 2 | 3 | import dt.sql.alarm.conf.AlarmPolicyConf 4 | import dt.sql.alarm.core.Constants._ 5 | import dt.sql.alarm.core.RecordDetail._ 6 | import dt.sql.alarm.core.{RecordDetail, WowLog} 7 | import dt.sql.alarm.reduce.{EngineResult, PolicyAnalyzeEngine} 8 | import org.apache.spark.sql.expressions.Window 9 | import org.apache.spark.sql.functions._ 10 | import org.apache.spark.sql.{Dataset, Row, SaveMode} 11 | import tech.sqlclub.common.utils.JacksonUtils 12 | 13 | /** 14 | * 15 | * Created by songgr on 2020/03/10. 16 | */ 17 | class ReduceByNumScale(scale: Scale) extends PolicyAnalyzeEngine{ 18 | 19 | override def analyse(policy: AlarmPolicyConf, records: Dataset[Row]): Array[EngineResult] = { 20 | WowLog.logInfo("Noise Reduction Policy: ReduceByNumScale analyzing....") 21 | 22 | val table_rank = records.withColumn(SQL_FIELD_RANK_NAME, row_number() // rank value 23 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) ) 24 | .filter(col(SQL_FIELD_RANK_NAME) <= policy.window.value) // 取出近n条进行分析 25 | 26 | table_rank.persist() 27 | 28 | try { 29 | val alarmEndpoints = table_rank 30 | .filter(col(alarm) === 1) 31 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value 32 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) ) 33 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value 34 | over( Window.partitionBy(item_id, job_id) ) ) 35 | .groupBy(item_id, job_id) 36 | .agg( 37 | first(SQL_FIELD_CURRENT_RECORD_NAME).alias(SQL_FIELD_CURRENT_RECORD_NAME), //当前告警记录 38 | first(SQL_FIELD_EARLIEST_RECORD_NAME).alias(SQL_FIELD_EARLIEST_RECORD_NAME) //历史最早告警记录 39 | ) 40 | 41 | val pendingRecords = table_rank.groupBy(item_id, job_id) 42 | .agg( 43 | count(alarm).alias(SQL_FIELD_TOTAL_COUNT_NAME), // 总条数 44 | sum(alarm).alias(SQL_FIELD_ALARM_COUNT_NAME), // 告警条数 45 | (sum(alarm) / count(alarm)).alias(SQL_FIELD_ALARM_PERCENT_NAME) // 告警记录比例 46 | ) 47 | 48 | val alarmRecords = 49 | scale match { 50 | case Number => 51 | pendingRecords.filter(col(SQL_FIELD_ALARM_COUNT_NAME) > policy.policy.getValue) 52 | case Percent => 53 | pendingRecords.filter(col(SQL_FIELD_TOTAL_COUNT_NAME) >= policy.window.value and // 总数必须达到要求条数 54 | col(SQL_FIELD_ALARM_PERCENT_NAME) > policy.policy.getValue) 55 | } 56 | 57 | val result = alarmRecords.join(alarmEndpoints, Seq(item_id,job_id), "left_outer").collect().map{ 58 | row => 59 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail]) 60 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail]) 61 | val count = row.getAs[Long](SQL_FIELD_ALARM_COUNT_NAME) 62 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue()) 63 | } 64 | 65 | // 没有产生告警的记录需要入cache 66 | val cacheDF = table_rank.join(alarmRecords, Seq(item_id,job_id) , "left_outer") 67 | .filter(isnull(alarmRecords(SQL_FIELD_ALARM_PERCENT_NAME))) 68 | .select(col(item_id), col(job_id), col(job_stat), col(event_time), col(SQL_FIELD_VALUE_NAME)) 69 | 70 | addCache(cacheDF, SaveMode.Overwrite) 71 | 72 | result 73 | 74 | } finally { 75 | table_rank.unpersist() 76 | } 77 | 78 | } 79 | } 80 | 81 | 82 | 83 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/reduce/engine/ReduceByTimeScale.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.reduce.engine 2 | 3 | import dt.sql.alarm.conf.AlarmPolicyConf 4 | import dt.sql.alarm.core.Constants._ 5 | import dt.sql.alarm.core.RecordDetail._ 6 | import dt.sql.alarm.core.{RecordDetail, WowLog} 7 | import dt.sql.alarm.reduce.{EngineResult, PolicyAnalyzeEngine} 8 | import org.apache.spark.sql.expressions.Window 9 | import org.apache.spark.sql.functions._ 10 | import org.apache.spark.sql.{Dataset, Row, SaveMode} 11 | import tech.sqlclub.common.utils.JacksonUtils 12 | 13 | /** 14 | * 15 | * Created by songgr on 2020/03/11. 16 | */ 17 | class ReduceByTimeScale(scale: Scale) extends PolicyAnalyzeEngine{ 18 | 19 | override def analyse(policy: AlarmPolicyConf, records: Dataset[Row]): Array[EngineResult] = { 20 | WowLog.logInfo("Noise Reduction Policy: ReduceByTimeScale analyzing....") 21 | 22 | val table = records 23 | .withColumn(SQL_FIELD_CURRENT_EVENT_TIME_NAME, first(event_time) // current event time 24 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) ) 25 | // 取出近T时间进行分析 26 | .filter(unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - 27 | unix_timestamp(col(event_time)) <= policy.window.getTimeWindowSec 28 | ) 29 | 30 | table.persist() 31 | 32 | try { 33 | val alarmEndpoints = table 34 | .filter(col(alarm) === 1) 35 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value 36 | over( Window.partitionBy(item_id, job_id) orderBy col(event_time).desc ) ) 37 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value 38 | over( Window.partitionBy(item_id, job_id) ) ) 39 | .groupBy(item_id, job_id) 40 | .agg( 41 | first(SQL_FIELD_CURRENT_RECORD_NAME).alias(SQL_FIELD_CURRENT_RECORD_NAME), //当前告警记录 42 | first(SQL_FIELD_EARLIEST_RECORD_NAME).alias(SQL_FIELD_EARLIEST_RECORD_NAME) //历史最早告警记录 43 | ) 44 | 45 | 46 | val pendingRecords = table.groupBy(item_id, job_id) 47 | .agg( 48 | (unix_timestamp(max(event_time)) - unix_timestamp(min(event_time))).alias(SQL_FIELD_EVENT_TIME_DURATION_NAME), //时间距离差 49 | count(alarm).alias(SQL_FIELD_TOTAL_COUNT_NAME), // 总条数 50 | sum(alarm).alias(SQL_FIELD_ALARM_COUNT_NAME), // 告警条数 51 | (sum(alarm) / count(alarm)).alias(SQL_FIELD_ALARM_PERCENT_NAME) // 告警记录比例 52 | ) 53 | 54 | 55 | val alarmRecords = 56 | scale match { 57 | case Number => 58 | pendingRecords.filter(col(SQL_FIELD_ALARM_COUNT_NAME) > policy.policy.getValue) // 告警条数达到要求 59 | case Percent => 60 | pendingRecords.filter(col(SQL_FIELD_EVENT_TIME_DURATION_NAME) >= (policy.window.getTimeWindowSec * policy.policy.getValue) and // 时长间隔达到窗口 61 | col(SQL_FIELD_ALARM_PERCENT_NAME) > policy.policy.getValue) 62 | } 63 | 64 | val result = alarmRecords.join(alarmEndpoints, Seq(item_id,job_id), "left_outer").collect().map{ 65 | row => 66 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail]) 67 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail]) 68 | val count = row.getAs[Long](SQL_FIELD_ALARM_COUNT_NAME) 69 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue()) 70 | } 71 | 72 | // 没有产生告警的记录需要入cache 73 | val cacheDF = table.join(alarmRecords, Seq(item_id,job_id) , "left_outer") 74 | .filter(isnull(alarmRecords(SQL_FIELD_ALARM_PERCENT_NAME))) 75 | .select(col(item_id), col(job_id), col(job_stat), col(event_time), col(SQL_FIELD_VALUE_NAME)) 76 | 77 | addCache(cacheDF, SaveMode.Overwrite) 78 | 79 | result 80 | 81 | } finally { 82 | table.unpersist() 83 | } 84 | 85 | } 86 | 87 | } 88 | 89 | 90 | 91 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/reduce/engine/ReduceByWindow.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.reduce.engine 2 | 3 | import dt.sql.alarm.conf.AlarmPolicyConf 4 | import dt.sql.alarm.core.{RecordDetail, WowLog} 5 | import dt.sql.alarm.reduce.{EngineResult, PolicyAnalyzeEngine} 6 | import org.apache.spark.sql.expressions.Window 7 | import org.apache.spark.sql.{Dataset, Row, SaveMode} 8 | import org.apache.spark.sql.functions._ 9 | import dt.sql.alarm.core.Constants._ 10 | import tech.sqlclub.common.utils.JacksonUtils 11 | import dt.sql.alarm.core.RecordDetail._ 12 | 13 | /** 14 | * 15 | * Created by songgr on 2020/01/09. 16 | */ 17 | class ReduceByWindow(window: AggWindow) extends PolicyAnalyzeEngine { 18 | 19 | override def analyse(policy: AlarmPolicyConf, records: Dataset[Row]):Array[EngineResult] = { 20 | WowLog.logInfo("Noise Reduction Policy: ReduceByWindow analyzing....") 21 | 22 | // filter alarm records 23 | val table = records.filter(col(alarm) === 1) 24 | 25 | // group by job_id,job_stat order by event_time desc 26 | val table_rank = table 27 | .withColumn(SQL_FIELD_CURRENT_RECORD_NAME, first(SQL_FIELD_VALUE_NAME) // current record value 28 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) ) 29 | .withColumn(SQL_FIELD_EARLIEST_RECORD_NAME, last(SQL_FIELD_VALUE_NAME) // first record value 30 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 31 | .withColumn(SQL_FIELD_CURRENT_EVENT_TIME_NAME, first(event_time) // current event time 32 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) ) 33 | .withColumn(SQL_FIELD_EARLIEST_EVENT_TIME_NAME, last(event_time) // first event time 34 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 35 | .withColumn(SQL_FIELD_RANK_NAME, row_number() // rank value 36 | over( Window.partitionBy(item_id, job_id, job_stat) orderBy col(event_time).desc ) ) 37 | .withColumn(SQL_FIELD_DATAFROM_NAME, min(SQL_FIELD_DATAFROM_NAME) // datafrom value is cache if has record which from redis cache 38 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 39 | .withColumn(SQL_FIELD_COUNT_NAME, count(lit(1)) // record count 40 | over( Window.partitionBy(item_id, job_id, job_stat) ) ) 41 | 42 | val pendingRecords = table_rank.filter(col(SQL_FIELD_RANK_NAME) === 1). 43 | select(item_id, job_id, job_stat, SQL_FIELD_CURRENT_EVENT_TIME_NAME,SQL_FIELD_CURRENT_RECORD_NAME, 44 | SQL_FIELD_EARLIEST_EVENT_TIME_NAME,SQL_FIELD_EARLIEST_RECORD_NAME,SQL_FIELD_DATAFROM_NAME,SQL_FIELD_COUNT_NAME) 45 | 46 | pendingRecords.persist() 47 | 48 | try { 49 | // first alarm 50 | val firstAlarmRecords = if (policy.policy.alertFirst) { 51 | val firstAlarmRecords = pendingRecords.filter( 52 | col(SQL_FIELD_DATAFROM_NAME) === SQL_FIELD_STREAM_NAME and // only from stream 53 | col(SQL_FIELD_COUNT_NAME) >= 1 // and count>=1 54 | ) 55 | 56 | firstAlarmRecords.collect().map { 57 | row=> 58 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail]) 59 | EngineResult(true, firstAlarmRecord, firstAlarmRecord, 1) 60 | } 61 | 62 | } else { 63 | Array(EngineResult(false, null, null, -1)) 64 | } 65 | 66 | val alarmRecords = window match { 67 | case NumberWindow => 68 | pendingRecords.filter(col(SQL_FIELD_COUNT_NAME) >= policy.window.value ) 69 | case TimeWindow => 70 | pendingRecords.filter( 71 | unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - 72 | unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)) >= policy.window.getTimeWindowSec 73 | ) 74 | // 近T时间达到n条 75 | case TimeCountWindow => 76 | pendingRecords.filter( 77 | unix_timestamp(col(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) - 78 | unix_timestamp(col(SQL_FIELD_EARLIEST_EVENT_TIME_NAME)) <= policy.window.getTimeWindowSec 79 | and 80 | col(SQL_FIELD_COUNT_NAME) >= policy.window.count 81 | ) 82 | } 83 | 84 | val streamAlarmRecords = alarmRecords.collect().map{ 85 | row => 86 | val lastAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_CURRENT_RECORD_NAME), classOf[RecordDetail]) 87 | val firstAlarmRecord = JacksonUtils.fromJson(row.getAs[String](SQL_FIELD_EARLIEST_RECORD_NAME), classOf[RecordDetail]) 88 | val count = row.getAs[Long](SQL_FIELD_COUNT_NAME) 89 | EngineResult(true, lastAlarmRecord, firstAlarmRecord, count.intValue()) 90 | } 91 | 92 | WowLog.logInfo(s"Noise Reduction Policy: ReduceByWindow analysis completed! windowType:${policy.window.`type`}, alarm records size:${streamAlarmRecords.length}") 93 | 94 | // 没有产生告警的记录需要入cache 95 | val cacheDF = table.join(alarmRecords, Seq(item_id,job_id,job_stat) , "left_outer") 96 | .filter(isnull(alarmRecords(SQL_FIELD_CURRENT_EVENT_TIME_NAME)) and table(SQL_FIELD_DATAFROM_NAME) === SQL_FIELD_STREAM_NAME) // 只加流记录 97 | .select(col(item_id), col(job_id), col(job_stat), col(event_time), col(SQL_FIELD_VALUE_NAME)) 98 | 99 | addCache(cacheDF, SaveMode.Append) 100 | 101 | firstAlarmRecords ++ streamAlarmRecords 102 | } finally { 103 | pendingRecords.unpersist() 104 | } 105 | } 106 | } 107 | 108 | 109 | -------------------------------------------------------------------------------- /sa-core/src/main/java/dt/sql/alarm/reduce/engine/Scale.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.reduce.engine 2 | 3 | // 刻度 4 | trait Scale 5 | // 百分比 6 | object Percent extends Scale 7 | // 次数 8 | object Number extends Scale 9 | 10 | -------------------------------------------------------------------------------- /sa-core/src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | //// spark conf 2 | //spark { 3 | // streaming.trigger.time.interval.msec = 1000 4 | // streaming.future.task.timeout.msec = 300000 5 | // show.table.numRows = 100 6 | // show.table.truncate = true 7 | // redis.cache.data.partition.num = 8 8 | // 9 | // redis.host = 127.0.0.1 10 | // redis.port = 6379 11 | // redis.db = 4 12 | //// redis.auth = 13 | //// redis.timeout = 14 | //// redis.max.pipeline.size = 15 | //// redis.scan.count = 16 | //} 17 | // 18 | // 19 | //sqlalarm { 20 | // // event sources, can more than one 21 | // sources = "kafka,redis" 22 | // 23 | // // alarm event input source conf 24 | // input { 25 | // kafka { 26 | // topic = "sqlalarm_event" 27 | // subscribe.topic.pattern = 1 28 | // bootstrap.servers = "127.0.0.1:9092" 29 | // group = "sqlalarm_group" 30 | // } 31 | // redis { 32 | // keys = "sqlalarm_redis_event" 33 | // group = "sqlalarm_redis_group" 34 | // batch.size = 100 35 | // } 36 | // } 37 | // 38 | // // alarm sink, can more than one 39 | // sinks = "console,kafka,jdbc" 40 | // 41 | // // alarm record sink canal conf 42 | // output { 43 | // kafka { 44 | // 45 | // } 46 | // jdbc { 47 | // url = "jdbc:mysql://127.0.0.1:3306/test?characterEncoding=utf8&zeroDateTimeBehavior=convertToNull&tinyInt1isBit=false" 48 | // driver = "com.mysql.jdbc.Driver" 49 | // user = "xxx" 50 | // password = "xxx" 51 | // } 52 | // } 53 | // 54 | // checkpointLocation = "checkpoint" 55 | // 56 | // // alarm alert conf, use rest api usually 57 | // alert { 58 | // pigeonApi = "https://dt.sqlclub/api/pigeon" 59 | // } 60 | //} -------------------------------------------------------------------------------- /sa-core/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # This is the configuring for logging displayed in the Application Server 2 | #log4j.rootCategory=info, stdout, Rolling, debug 3 | log4j.rootCategory=info, stdout 4 | 5 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 6 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 7 | log4j.appender.stdout.layout.ConversionPattern=[sql-alarm] %d{yyyy-MM-dd HH:mm:ss,SSS} %p [%t] %c{1}.%M(%L) | %m%n 8 | 9 | log4j.appender.Rolling=org.apache.log4j.RollingFileAppender 10 | log4j.appender.Rolling.Encoding=UTF-8 11 | log4j.appender.Rolling.File=log/sql-alarm.log 12 | log4j.appender.Rolling.MaxFileSize=5120KB 13 | log4j.appender.Rolling.MaxBackupIndex=10 14 | log4j.appender.Rolling.layout=org.apache.log4j.PatternLayout 15 | log4j.appender.Rolling.layout.ConversionPattern=[sql-alarm] %d{yyyy-MM-dd HH:mm:ss,SSS} %p [%t] %c{1}.%M(%L) | %m%n 16 | 17 | log4j.logger.org.apache.spark.storage.ShuffleBlockFetcherIterator=WARN 18 | log4j.logger.org.apache.spark.executor.Executor=WARN 19 | log4j.logger.org.apache.spark.ContextCleaner=WARN 20 | log4j.logger.org.apache.spark.scheduler.TaskSetManager=WARN 21 | log4j.logger.org.apache.spark.scheduler.DAGScheduler=WARN 22 | log4j.logger.org.apache.spark.sql.execution.columnar.InMemoryTableScanExec=WARN 23 | log4j.logger.org.apache.spark.storage.BlockManager=WARN 24 | log4j.logger.org.apache.spark.storage.BlockManagerInfo=WARN 25 | log4j.logger.org.apache.spark.storage.memory.MemoryStore=WARN -------------------------------------------------------------------------------- /sa-core/src/test/java/dt/sql/alarm/test/InputSuite.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.test 2 | 3 | import dt.sql.alarm.input.{KafkaInput, RedisInput} 4 | import org.scalatest.FunSuite 5 | 6 | 7 | class InputSuite extends FunSuite with LocalSparkApp { 8 | 9 | test("kafka input test") { 10 | val session = spark 11 | val ds = new KafkaInput().getDataSetStream(session) 12 | assert(ds != null) 13 | } 14 | 15 | test("redis stream input") { 16 | val session = spark 17 | val ds = new RedisInput().getDataSetStream(session) 18 | assert(ds != null) 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /sa-core/src/test/java/dt/sql/alarm/test/LocalSparkApp.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.test 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | trait LocalSparkApp { 6 | 7 | def spark = { 8 | SparkSession.builder() 9 | .appName("LocalSparkApp") 10 | .master("local[*]") 11 | .getOrCreate() 12 | } 13 | 14 | } 15 | -------------------------------------------------------------------------------- /sa-core/src/test/java/dt/sql/alarm/test/RedisOperationsSuite.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.test 2 | 3 | import dt.sql.alarm.core.Constants.{ALARM_CACHE, appName, master} 4 | import dt.sql.alarm.core.{RecordDetail, RedisOperations, SparkRuntime} 5 | import org.apache.spark.sql.SaveMode 6 | import org.scalatest.FunSuite 7 | import tech.sqlclub.common.utils.{ConfigUtils, JacksonUtils} 8 | 9 | /** 10 | * 11 | * Created by songgr on 2020/01/13. 12 | */ 13 | class RedisOperationsSuite extends FunSuite { 14 | 15 | test("rule") { 16 | ConfigUtils.configBuilder(Map( 17 | appName -> "RedisOperationsSuite", 18 | master -> "local[2]", 19 | "spark.redis.host" -> "127.0.0.1", 20 | "spark.redis.port" -> "6379", 21 | "spark.redis.db" -> "4" 22 | )) 23 | 24 | val key = "sqlalarm_rule:kafka:sqlalarm_event" 25 | val field = "uuid00000001" 26 | 27 | val value = 28 | """ 29 | |{ 30 | | "item_id":"uuid00000001", 31 | | "platform":"alarm", 32 | | "title":"sql alarm test", 33 | | "source":{ 34 | | "type":"kafka", 35 | | "topic":"sqlalarm_event" 36 | | }, 37 | | "filter":{ 38 | | "table":"fail_job", 39 | | "structure":[ 40 | | { 41 | | "name":"job_name", 42 | | "type":"string", 43 | | "xpath":"$.job_name" 44 | | }, 45 | | { 46 | | "name":"job_owner", 47 | | "type":"string", 48 | | "xpath":"$.job_owner" 49 | | }, 50 | | { 51 | | "name":"job_stat", 52 | | "type":"string", 53 | | "xpath":"$.job_stat" 54 | | }, 55 | | { 56 | | "name":"job_time", 57 | | "type":"string", 58 | | "xpath":"$.job_time" 59 | | } 60 | | ], 61 | | "sql":"select job_name as job_id,job_stat,job_time as event_time, job_stat as message, map('job_owner',job_owner) as context from fail_job where job_stat='Fail'" 62 | | } 63 | |} 64 | """.stripMargin 65 | 66 | 67 | RedisOperations.addTableCache(key, field, value) 68 | 69 | 70 | } 71 | 72 | 73 | test("policy") { 74 | ConfigUtils.configBuilder(Map( 75 | appName -> "RedisOperationsSuite", 76 | master -> "local[2]", 77 | "spark.redis.host" -> "127.0.0.1", 78 | "spark.redis.port" -> "6379", 79 | "spark.redis.db" -> "4" 80 | )) 81 | 82 | val key = "sqlalarm_policy:kafka:sqlalarm_event" 83 | val field = "uuid00000001" 84 | 85 | val value = 86 | """ 87 | |{ 88 | | "item_id" : "uuid00000001", 89 | | "window": { 90 | | "type": "time", 91 | | "value": 10, 92 | | "unit": "m" 93 | | }, 94 | | "policy":{ 95 | | "type":"scale", 96 | | "agg":"count", 97 | | "value":100, 98 | | "first_alert": 1 99 | | } 100 | |} 101 | """.stripMargin 102 | 103 | val value1 = 104 | """ 105 | |{ 106 | | "item_id" : "uuid00000001", 107 | | "window": { 108 | | "type": "time", 109 | | "value": 10, 110 | | "unit": "m" 111 | | }, 112 | | "policy":{ 113 | | "type":"absolute" 114 | | } 115 | |} 116 | """.stripMargin 117 | 118 | val value2 = 119 | """ 120 | |{ 121 | | "item_id" : "uuid00000001", 122 | | "window": { 123 | | "type": "number", 124 | | "value": 4, 125 | | "unit": "n" 126 | | }, 127 | | "policy":{ 128 | | "type":"scale", 129 | | "unit":"number", 130 | | "value":2, 131 | | "first_alert": 1 132 | | } 133 | |} 134 | """.stripMargin 135 | 136 | val value3 = 137 | """ 138 | |{ 139 | | "item_id" : "uuid00000001", 140 | | "window": { 141 | | "type": "time", 142 | | "value": 10, 143 | | "unit": "m" 144 | | }, 145 | | "policy":{ 146 | | "type":"scale", 147 | | "unit":"number", 148 | | "value":2, 149 | | "first_alert": 1 150 | | } 151 | |} 152 | """.stripMargin 153 | 154 | 155 | RedisOperations.addTableCache(key, field, value3) 156 | 157 | 158 | } 159 | 160 | 161 | test("cache") { 162 | ConfigUtils.configBuilder(Map( 163 | appName -> "RedisOperationsSuite", 164 | master -> "local[2]", 165 | "spark.redis.host" -> "127.0.0.1", 166 | "spark.redis.port" -> "6379", 167 | "spark.redis.db" -> "4" 168 | )) 169 | 170 | val spark = SparkRuntime.getSparkSession 171 | 172 | val key = "sqlalarm_cache:uuid00000001:sqlalarm_job_001:Fail" 173 | 174 | val json = JacksonUtils.prettyPrint[RecordDetail](RecordDetail( 175 | "jobid", 176 | "fail", 177 | "2019", 178 | "sss", 179 | "cont", 180 | "title", 181 | "ppp", 182 | "001", 183 | "sss", 184 | "tt", 185 | 1 186 | )) 187 | 188 | val rdd = spark.sparkContext.parallelize(Seq(json), 1) 189 | 190 | 191 | RedisOperations.setListCache(key, rdd, SaveMode.Overwrite) 192 | 193 | 194 | } 195 | 196 | test("ops") { 197 | ConfigUtils.configBuilder(Map( 198 | appName -> "RedisOperationsSuite", 199 | master -> "local[2]", 200 | "spark.redis.host" -> "127.0.0.1", 201 | "spark.redis.port" -> "6379", 202 | "spark.redis.db" -> "4" 203 | )) 204 | val spark = SparkRuntime.getSparkSession 205 | val rdd = RedisOperations.getListCache("test:111*") 206 | 207 | val map = RedisOperations.getTableCache("sqlalarm_policy*") 208 | val conf = map.collect().toMap 209 | 210 | 211 | import spark.implicits._ 212 | val ds = rdd.toDS() 213 | 214 | ds.printSchema() 215 | 216 | ds.show() 217 | 218 | println(ds.count()) 219 | 220 | val tb = Seq("a","b","c").toDS() 221 | 222 | tb.printSchema() 223 | tb.show() 224 | println(tb.count()) 225 | 226 | val c = tb.union(ds).count() 227 | 228 | println(c) 229 | 230 | } 231 | 232 | } 233 | -------------------------------------------------------------------------------- /sa-core/src/test/java/dt/sql/alarm/test/SQLAlarmBootTest.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.test 2 | 3 | import dt.sql.alarm.SQLAlarmBoot 4 | 5 | object SQLAlarmBootTest { 6 | 7 | def main(args: Array[String]): Unit = { 8 | SQLAlarmBoot.main( 9 | Array( 10 | "-sqlalarm.master", "local[*]", 11 | "-sqlalarm.name", "sqlalarm", 12 | "-spark.redis.host", "127.0.0.1", 13 | "-spark.redis.port", "6379", 14 | "-spark.redis.db", "4", 15 | "-sqlalarm.sources", "kafka", 16 | "-sqlalarm.input.kafka.topic", "sqlalarm_event", 17 | "-sqlalarm.input.kafka.subscribe.topic.pattern", "1", 18 | "-sqlalarm.input.kafka.bootstrap.servers", "127.0.0.1:9092", 19 | "-sqlalarm.sinks", "console", 20 | "-sqlalarm.output.kafka.topic", "sqlalarm_output", 21 | "-sqlalarm.output.kafka.bootstrap.servers", "127.0.0.1:9092", 22 | "-sqlalarm.checkpointLocation", "checkpoint", 23 | "sqlalarm.alert.pigeonApi", "https://dt.sqlclub/api/pigeon" 24 | 25 | ) 26 | ) 27 | } 28 | 29 | } 30 | -------------------------------------------------------------------------------- /sa-core/src/test/java/dt/sql/alarm/test/SparkRedisTest.scala: -------------------------------------------------------------------------------- 1 | package dt.sql.alarm.test 2 | 3 | import com.redislabs.provider.redis._ 4 | import com.redislabs.provider.redis.util.ConnectionUtils 5 | import org.apache.spark.sql.SparkSession 6 | 7 | 8 | object SparkRedisTest { 9 | 10 | def main(args: Array[String]): Unit = { 11 | 12 | val spark = SparkSession.builder() 13 | .appName("SparkRedisTest") 14 | .master("local[4]") 15 | .config("spark.redis.host", "127.0.0.1") 16 | .config("spark.redis.port", "6379") 17 | .getOrCreate() 18 | 19 | val sc = spark.sparkContext 20 | 21 | val keysRDD = sc.fromRedisKeyPattern() 22 | 23 | val stringRDD = sc.fromRedisKV(Array("test")) 24 | val strs = stringRDD.collect() 25 | 26 | val keys = keysRDD.collect() 27 | 28 | import spark.implicits._ 29 | val df = stringRDD.toDF() 30 | 31 | 32 | println(keys.mkString(",")) 33 | 34 | 35 | val listRDD = sc.fromRedisList(Array("list1")) 36 | 37 | val table = listRDD.toDF() 38 | table.printSchema() 39 | table.show() 40 | val tb = spark.read.json(listRDD.toDS()) 41 | tb.printSchema() 42 | tb.show() 43 | 44 | val conn = RedisConfig.fromSparkConf(sc.getConf).initialHost.connect() 45 | 46 | val str = new RedisEndpoint(sc.getConf).connect().rpop("list1") 47 | 48 | println(str) 49 | 50 | val s = ConnectionUtils.withConnection[String](conn){ 51 | conn => 52 | conn.get("test") 53 | } 54 | 55 | println(s) 56 | 57 | } 58 | 59 | 60 | } 61 | --------------------------------------------------------------------------------