├── data-collector ├── pom.xml └── src │ └── main │ ├── java │ └── cn │ │ ├── doitedu │ │ └── datacollect │ │ │ ├── doris │ │ │ ├── DorisConnectorTest.java │ │ │ ├── MysqlFlinkcdc2Doris.java │ │ │ └── Stu.java │ │ │ └── flume │ │ │ ├── DesensitizationInterceptor.java │ │ │ ├── TestOrderTimestampExtractInterceptor.java │ │ │ └── TimestampExtractInterceptor.java │ │ └── dotiedu │ │ └── datacollect │ │ └── cdc │ │ ├── FlinkCdcTest.java │ │ └── TestOrderSync2Kafka.java │ └── resources │ └── log4j.properties ├── data-etl ├── pom.xml └── src │ ├── main │ ├── java │ │ └── cn │ │ │ └── doitedu │ │ │ ├── etl │ │ │ ├── AppTrafficFactTableBuilder.scala │ │ │ ├── GeoHashDimTableBuilder.scala │ │ │ ├── MallAppUserActionBitMapBuilder.scala │ │ │ ├── MallApplogOds2DwdStep3.scala │ │ │ ├── MallEventAttribute.scala │ │ │ ├── MallShotStatistic.scala │ │ │ ├── MallUserRetentionRPT_A.scala │ │ │ ├── Test.scala │ │ │ └── TreeTest.scala │ │ │ ├── profile │ │ │ ├── BulkloadDemo.scala │ │ │ ├── LoadUserIds.scala │ │ │ └── UserProfileBulkLoadTest.scala │ │ │ └── utils │ │ │ ├── EsJavaClient.java │ │ │ ├── EventAttrUtil.scala │ │ │ ├── Functions.scala │ │ │ ├── GaodeGpsUtil.java │ │ │ ├── PageContributeUtil.scala │ │ │ └── PropertiesHolder.java │ ├── resources │ │ ├── hive-site.xml │ │ └── user_profile_tags_bulkload.properties │ └── scala │ │ └── akka │ │ ├── demo1 │ │ └── Demo.scala │ │ └── demo2 │ │ ├── Client.scala │ │ ├── Message.scala │ │ └── Server.scala │ └── test │ └── java │ └── RoaringBitmapTest.scala ├── data-export ├── pom.xml └── src │ └── main │ └── java │ └── cn │ └── doitedu │ └── profile │ └── export │ ├── EsSpark.scala │ └── HiveTags2Es.scala ├── lib ├── flink-doris-connector-1.14_2.12-1.0.3.jar └── flink-sql-connector-mysql-cdc-2.3-SNAPSHOT.jar ├── pom.xml ├── realtime-dw ├── pom.xml └── src │ ├── main │ ├── java │ │ └── cn │ │ │ └── doitedu │ │ │ └── rtdw │ │ │ ├── etl │ │ │ ├── AdShowClickPatternRecognize.java │ │ │ ├── MallAppEventsPreprocess.java │ │ │ ├── MallAppTrafficDwsEtl.java │ │ │ ├── MallAppTrafficReport1.java │ │ │ ├── MallAppTrafficReport2.java │ │ │ ├── MallAppTrafficReport3.java │ │ │ ├── MallOrderBrandTopnHour.java │ │ │ ├── MallOrderItemCdcdDwsEtl.java │ │ │ ├── functions │ │ │ │ ├── EventsDataFilterFunction.java │ │ │ │ ├── GeoHashAreaQueryFunction.java │ │ │ │ ├── GuidGenerateFunction.java │ │ │ │ ├── JsonToEventBeanMapFunction.java │ │ │ │ └── TrafficAnalyseFunc.java │ │ │ └── pojo │ │ │ │ ├── DeviceAccountBindInfo.java │ │ │ │ ├── EventBean.java │ │ │ │ └── TrafficBean.java │ │ │ └── utils │ │ │ ├── GuidUtils.java │ │ │ └── SqlHolder.java │ └── resources │ │ └── log4j.properties │ └── test │ └── java │ ├── CepTest.java │ └── Test.java ├── realtime-marketing-common ├── pom.xml └── src │ └── main │ └── java │ └── cn │ └── doitedu │ └── rtmk │ └── common │ ├── interfaces │ ├── RuleCalculator.java │ └── TimerRuleCalculator.java │ ├── pojo │ ├── ActionSeqParam.java │ ├── AttributeParam.java │ ├── EventParam.java │ └── UserEvent.java │ └── utils │ └── UserEventComparator.java ├── realtime-marketing-engine ├── pom.xml └── src │ └── main │ ├── java │ └── cn │ │ └── doitedu │ │ └── rtmk │ │ ├── engine │ │ ├── functions │ │ │ ├── Json2UserEventMapFunction.java │ │ │ ├── Row2RuleMetaBeanMapFunction.java │ │ │ ├── RuleMatchProcessFunction.java │ │ │ └── RuleMatchProcessFunctionOld.java │ │ ├── main │ │ │ └── RuleEngine.java │ │ ├── pojo │ │ │ ├── RuleMatchResult.java │ │ │ └── RuleMetaBean.java │ │ └── utils │ │ │ └── FlinkStateDescriptors.java │ │ └── tech_test │ │ ├── bitmap_inject │ │ ├── _01_RulePublisher.java │ │ ├── _02_BitmapFromMySqlBytes.java │ │ ├── _03_FlinkCdcBitmapAndCall.java │ │ └── _04_FlinkInjectRuleBimtapProcessEvents.java │ │ ├── enjoy_test │ │ ├── ConditionCalcTest.java │ │ ├── ConditionCalculator.groovy │ │ ├── EnjoyHello.java │ │ ├── EventAttributeParam.java │ │ ├── EventBean.java │ │ └── IConditionCalculator.java │ │ ├── groovytest │ │ ├── groovy │ │ │ ├── Caculator.groovy │ │ │ └── HelloWorld.groovy │ │ └── java │ │ │ ├── CallGroovy.java │ │ │ ├── DynamicCallGroovy.java │ │ │ ├── DynamicCallGroovy2.java │ │ │ └── Person.java │ │ └── whole_test │ │ ├── pojo │ │ ├── EventCountParam.java │ │ ├── PropertyParam.java │ │ └── RuleInfo.java │ │ └── publisher │ │ └── SimpleRulePulishMoni.java │ └── resources │ └── log4j.properties ├── realtime-marketing-manager ├── pom.xml ├── src │ ├── main │ │ ├── java │ │ │ └── cn │ │ │ │ └── doitedu │ │ │ │ └── rulemgmt │ │ │ │ ├── RealtimeMarketingManagerApplication.java │ │ │ │ ├── controller │ │ │ │ └── RuleManagementController.java │ │ │ │ ├── dao │ │ │ │ ├── DorisQueryDao.java │ │ │ │ ├── DorisQueryDaoImpl.java │ │ │ │ ├── RuleSystemMetaDao.java │ │ │ │ └── RuleSystemMetaDaoImpl.java │ │ │ │ ├── pojo │ │ │ │ └── ActionAttributeParam.java │ │ │ │ └── service │ │ │ │ ├── ActionConditionQueryService.java │ │ │ │ ├── ActionConditionQueryServiceImpl.java │ │ │ │ ├── ProfileConditionQueryService.java │ │ │ │ ├── ProfileConditionQueryServiceImpl.java │ │ │ │ ├── RuleSystemMetaService.java │ │ │ │ └── RuleSystemMetaServiceImpl.java │ │ └── resources │ │ │ └── application.properties │ └── test │ │ └── java │ │ └── cn │ │ └── doitedu │ │ └── rulemgmt │ │ ├── EnjoyHelloWorld.java │ │ └── SeqMatchTest.java └── 说明文档 │ ├── 测试说明.md │ └── 规则_模型_1的参数结构.json ├── rule_model_resources ├── pom.xml ├── src │ ├── main │ │ ├── java │ │ │ └── cn │ │ │ │ └── doitedu │ │ │ │ └── rtmk │ │ │ │ └── rulemodel │ │ │ │ ├── caculator │ │ │ │ └── groovy │ │ │ │ │ ├── RuleModel_01_Calculator_Groovy.groovy │ │ │ │ │ ├── RuleModel_02_Calculator_Groovy.groovy │ │ │ │ │ ├── RuleModel_03_Calculator_Groovy.groovy │ │ │ │ │ └── TestSplit.groovy │ │ │ │ └── template_test │ │ │ │ ├── Test_EventSeqQueryTemplate.java │ │ │ │ ├── Test_Rulemodel_01_calculatorTemplate.java │ │ │ │ └── Test_Rulemodel_02_calculatorTemplate.java │ │ └── resources │ │ │ └── log4j.properties │ └── test │ │ └── java │ │ ├── groovy │ │ └── A.groovy │ │ └── template │ │ └── test │ │ └── EventCountConditionCalculatorTest.groovy └── templates │ ├── doirs_sql │ ├── action_seq_condition_query.sql │ ├── action_seq_condition_query.sql.enjoy │ └── event_count_condition_query.sql.enjoy │ ├── rule_calculator │ ├── rulemodel_01_caculator.enjoy │ ├── rulemodel_01_caculator_old.template │ ├── rulemodel_02_caculator.enjoy │ └── rulemodel_03_caculator.enjoy │ └── rule_param_json │ ├── rulemodel_01_param.json │ ├── rulemodel_02_param.json │ └── rulemodel_03_param.json ├── sqls └── doris明细数据表建表.sql ├── tech-test ├── pom.xml └── src │ └── main │ ├── java │ └── cn │ │ └── doitedu │ │ ├── ActionSeqCalcFlinkTest.java │ │ ├── ActionSeqCalcTest.java │ │ ├── Event.java │ │ ├── GroovyHello.java │ │ ├── GroovyTest.java │ │ ├── GroovyUtil.java │ │ ├── HugeBitmapTest.java │ │ ├── IActionRuleCalc.java │ │ ├── JudgeStringIsNumeric.java │ │ ├── ProfileInjectTest.java │ │ ├── dynamic │ │ ├── Calculator.java │ │ └── DynamicCallTest.java │ │ ├── groovy │ │ ├── ActionRuleCalc.groovy │ │ ├── HelloWorld.groovy │ │ └── Person.groovy │ │ └── utils │ │ ├── BitmapSchema.java │ │ ├── KafkaBitmapSerializer.java │ │ ├── ProduceBitmapUtil.java │ │ └── Utils.java │ └── resources │ └── log4j.properties └── x.json /data-collector/src/main/java/cn/doitedu/datacollect/doris/DorisConnectorTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.datacollect.doris; 2 | 3 | import org.apache.flink.api.common.functions.MapFunction; 4 | import org.apache.flink.streaming.api.CheckpointingMode; 5 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 6 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 7 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 9 | 10 | public class DorisConnectorTest { 11 | 12 | public static void main(String[] args) throws Exception { 13 | 14 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 15 | env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); 16 | env.getCheckpointConfig().setCheckpointStorage("file:/d:/checkpoint"); 17 | 18 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 19 | 20 | // 1,18,ZS 21 | DataStreamSource ds = env.socketTextStream("localhost", 4444); 22 | SingleOutputStreamOperator stuDs = ds.map(new MapFunction() { 23 | @Override 24 | public Stu map(String value) throws Exception { 25 | String[] arr = value.split(","); 26 | return new Stu(Integer.parseInt(arr[0]), Byte.parseByte(arr[1]), arr[2]); 27 | } 28 | }); 29 | 30 | tenv.createTemporaryView("tmp",stuDs); 31 | 32 | /*tenv.executeSql("select * from tmp").print();*/ 33 | 34 | // 创建doris连接器表 35 | tenv.executeSql( 36 | " CREATE TABLE flink_doris_sink ( " + 37 | " id INT , " + 38 | " age TINYINT, " + 39 | " name STRING " + 40 | " ) " + 41 | " WITH " + 42 | " ( " + 43 | " 'connector' = 'doris', " + 44 | " 'fenodes' = 'doitedu:8030', " + 45 | " 'table.identifier' = 'doit31.stu', " + 46 | " 'username' = 'root', " + 47 | " 'password' = '', " + 48 | " 'sink.label-prefix' = 'flink_doris_stu' " + 49 | " ) " 50 | ); 51 | 52 | 53 | // 从socket数据表中,select数据 ,insert到doris连接器表 54 | tenv.executeSql("insert into flink_doris_sink select id,age,name from tmp"); 55 | 56 | 57 | 58 | env.execute(); 59 | 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /data-collector/src/main/java/cn/doitedu/datacollect/doris/MysqlFlinkcdc2Doris.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.datacollect.doris; 2 | 3 | import org.apache.flink.streaming.api.CheckpointingMode; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | 7 | /** 8 | * @Author: deep as the sea 9 | * @Site: 多易教育 10 | * @QQ: 657270652 11 | * @Date: 2022/8/8 12 | * @Desc: 从mysql同步变更数据到doris的测试程序 13 | * 14 | * -- doris中的目标表: 15 | * create table stu_score( 16 | * id int not null comment "学员id" 17 | * ,name string 18 | * ,gender string 19 | * ,score float 20 | * ) 21 | * unique key(id) 22 | * distributed by hash(id) buckets 2 23 | * properties( 24 | * "replication_num"="1" 25 | * ); 26 | * 27 | * 28 | * 29 | **/ 30 | public class MysqlFlinkcdc2Doris { 31 | public static void main(String[] args) { 32 | 33 | 34 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 35 | env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); 36 | env.getCheckpointConfig().setCheckpointStorage("file:/d:/checkpoint"); 37 | 38 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 39 | 40 | // 创建一个mysql的cdc连接器表 41 | tenv.executeSql("CREATE TABLE flink_mysql_cdc_stuscore ( " + 42 | " id INT, " + 43 | " name STRING, " + 44 | " gender STRING, " + 45 | " score FLOAT, " + 46 | " PRIMARY KEY (id) NOT ENFORCED " + 47 | " ) WITH ( " + 48 | " 'connector' = 'mysql-cdc', " + 49 | " 'hostname' = 'doitedu' , " + 50 | " 'port' = '3306' , " + 51 | " 'username' = 'root' , " + 52 | " 'password' = 'root' , " + 53 | " 'database-name' = 'flinktest', " + 54 | " 'table-name' = 'flink_score' " + 55 | ")"); 56 | 57 | /*tenv.executeSql("select * from flink_mysql_cdc_stuscore").print();*/ 58 | 59 | 60 | // 创建一个 doris的连接器表 61 | tenv.executeSql( 62 | " CREATE TABLE flink_doris_sink_stuscore ( " + 63 | " id INT, " + 64 | " name STRING, " + 65 | " gender STRING, " + 66 | " score FLOAT, " + 67 | " PRIMARY KEY (id) NOT ENFORCED " + 68 | " ) " + 69 | " WITH " + 70 | " ( " + 71 | " 'connector' = 'doris', " + 72 | " 'fenodes' = 'doitedu:8030', " + 73 | " 'table.identifier' = 'doit31.stu_score', " + 74 | " 'username' = 'root', " + 75 | " 'password' = '', " + 76 | " 'sink.label-prefix' = 'flink_stu_score' " + 77 | " ) " 78 | ); 79 | 80 | // insert into .. select .. 81 | tenv.executeSql("insert into flink_doris_sink_stuscore select * from flink_mysql_cdc_stuscore"); 82 | 83 | 84 | 85 | 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /data-collector/src/main/java/cn/doitedu/datacollect/doris/Stu.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.datacollect.doris; 2 | 3 | import java.io.Serializable; 4 | 5 | public class Stu implements Serializable { 6 | private int id; 7 | private byte age; 8 | private String name; 9 | 10 | public Stu() { 11 | } 12 | 13 | public Stu(int id, byte age, String name) { 14 | this.id = id; 15 | this.age = age; 16 | this.name = name; 17 | } 18 | 19 | public int getId() { 20 | return id; 21 | } 22 | 23 | public void setId(int id) { 24 | this.id = id; 25 | } 26 | 27 | public byte getAge() { 28 | return age; 29 | } 30 | 31 | public void setAge(byte age) { 32 | this.age = age; 33 | } 34 | 35 | public String getName() { 36 | return name; 37 | } 38 | 39 | public void setName(String name) { 40 | this.name = name; 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /data-collector/src/main/java/cn/doitedu/datacollect/flume/DesensitizationInterceptor.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.datacollect.flume; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.commons.codec.digest.DigestUtils; 6 | import org.apache.flume.Context; 7 | import org.apache.flume.Event; 8 | import org.apache.flume.interceptor.Interceptor; 9 | 10 | import java.util.List; 11 | 12 | /** 13 | * 字段脱敏拦截器 14 | * 本拦截器,要脱敏哪个字段,不是写死的,需要在采集配置中通过参数来指定,如下: 15 | * desensitive.field = account 16 | */ 17 | public class DesensitizationInterceptor implements Interceptor { 18 | String desField; 19 | 20 | public DesensitizationInterceptor(String desField) { 21 | this.desField = desField; 22 | } 23 | 24 | @Override 25 | public void initialize() { 26 | 27 | } 28 | 29 | @Override 30 | public Event intercept(Event event) { 31 | try { 32 | byte[] dataBytes = event.getBody(); 33 | String json = new String(dataBytes); 34 | 35 | JSONObject jsonObject = JSON.parseObject(json); 36 | 37 | // 取到待脱敏字段的原初值 38 | String originValue = jsonObject.getString(desField); 39 | 40 | // 加密,得到脱敏的密文 41 | String desensitiveValue = DigestUtils.md5Hex(originValue); 42 | 43 | // 将密文,覆盖掉json中的原初值 44 | jsonObject.put(desField, desensitiveValue); 45 | 46 | // 把jsonObject变回json字符串 47 | String desensitiveJson = jsonObject.toJSONString(); 48 | 49 | // 将处理好的数据json字符串,替换掉event中原来的数据 50 | event.setBody(desensitiveJson.getBytes()); 51 | 52 | } catch (Exception e) { 53 | e.printStackTrace(); 54 | } 55 | return event; 56 | } 57 | 58 | @Override 59 | public List intercept(List list) { 60 | 61 | for (Event event : list) { 62 | intercept(event); 63 | } 64 | 65 | return list; 66 | } 67 | 68 | @Override 69 | public void close() { 70 | 71 | } 72 | 73 | 74 | public static class DesensitizationInterceptorBuilder implements Interceptor.Builder { 75 | String desField; 76 | 77 | @Override 78 | public Interceptor build() { 79 | 80 | return new DesensitizationInterceptor(desField); 81 | } 82 | 83 | @Override 84 | public void configure(Context context) { 85 | desField = context.getString("desensitive.field"); 86 | 87 | } 88 | } 89 | 90 | 91 | } 92 | -------------------------------------------------------------------------------- /data-collector/src/main/java/cn/doitedu/datacollect/flume/TestOrderTimestampExtractInterceptor.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.datacollect.flume; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.commons.lang.math.RandomUtils; 6 | import org.apache.flume.Context; 7 | import org.apache.flume.Event; 8 | import org.apache.flume.interceptor.Interceptor; 9 | 10 | import java.util.List; 11 | 12 | public class TestOrderTimestampExtractInterceptor implements Interceptor { 13 | 14 | String timeField; 15 | 16 | public TestOrderTimestampExtractInterceptor(String timeField) { 17 | this.timeField = timeField; 18 | } 19 | 20 | 21 | /** 22 | * 初始化方法:当拦截器类被实例化后,会调用一次的方法 23 | */ 24 | @Override 25 | public void initialize() { 26 | // 比如,创建一个mysql连接 27 | } 28 | 29 | /** 30 | * 拦截器的核心功能方法 31 | * 逐条拦截 32 | * 33 | * @param event 从source得到的一条数据 34 | * @return 处理过后的数据 35 | */ 36 | @Override 37 | public Event intercept(Event event) { 38 | // 还多放入一个header 数据(用来支撑下游的 channel selector 进行负载均衡) 39 | event.getHeaders().put("cs", RandomUtils.nextInt(2)+""); 40 | 41 | try { 42 | // 要从event中拿到日志json字符串 43 | byte[] bodyBytes = event.getBody(); 44 | String json = new String(bodyBytes); 45 | 46 | // 从json字符串中,根据配置参数中的 timeField (时间字段名) ,去抽取时间戳 47 | JSONObject jsonObject = JSON.parseObject(json); 48 | Long eventTime = jsonObject.getLong(timeField); 49 | 50 | // 将时间戳,放入event的 headers中 51 | event.getHeaders().put("timestamp", eventTime + ""); 52 | 53 | 54 | 55 | // 返回 event 56 | return event; 57 | 58 | } catch (Exception e) { 59 | e.printStackTrace(); 60 | 61 | event.getHeaders().put("timestamp","0"); 62 | return event; 63 | } 64 | 65 | } 66 | 67 | /** 68 | * 拦截器的核心功能方法 69 | * 批次拦截 70 | * 71 | * @param list 72 | * @return 73 | */ 74 | @Override 75 | public List intercept(List list) { 76 | for (Event event : list) { 77 | intercept(event); 78 | } 79 | 80 | return list; 81 | } 82 | 83 | /** 84 | * 做一些退出之前的资源清理工作 85 | */ 86 | @Override 87 | public void close() { 88 | // 比如,关闭数据库连接、关闭文件流 89 | } 90 | 91 | public static class TimeExtractInterceptorBuilder implements Builder { 92 | 93 | String timeField; 94 | 95 | /** 96 | * builder的功能所在:帮助构建拦截器类的实例对象 97 | * 98 | * @return 99 | */ 100 | @Override 101 | public Interceptor build() { 102 | 103 | return new TestOrderTimestampExtractInterceptor(timeField); 104 | } 105 | 106 | /** 107 | * 配置功能 108 | * 它会接收到flume agent传入的 上下文对象:context 109 | * 而 context中就包含这采集配置文件中的所有参数 110 | *

111 | * 如: 112 | * 配置文件中,会配置参数: 113 | * time.filed = timeStamp 114 | * 115 | * @param context 116 | */ 117 | @Override 118 | public void configure(Context context) { 119 | timeField = context.getString("time.field"); 120 | 121 | } 122 | } 123 | 124 | 125 | } 126 | -------------------------------------------------------------------------------- /data-collector/src/main/java/cn/doitedu/datacollect/flume/TimestampExtractInterceptor.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.datacollect.flume; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.commons.codec.digest.DigestUtils; 6 | import org.apache.commons.lang.math.RandomUtils; 7 | import org.apache.flume.Context; 8 | import org.apache.flume.Event; 9 | import org.apache.flume.interceptor.Interceptor; 10 | 11 | import java.util.List; 12 | 13 | public class TimestampExtractInterceptor implements Interceptor { 14 | 15 | String timeField; 16 | 17 | public TimestampExtractInterceptor(String timeField) { 18 | this.timeField = timeField; 19 | } 20 | 21 | 22 | /** 23 | * 初始化方法:当拦截器类被实例化后,会调用一次的方法 24 | */ 25 | @Override 26 | public void initialize() { 27 | // 比如,创建一个mysql连接 28 | } 29 | 30 | /** 31 | * 拦截器的核心功能方法 32 | * 逐条拦截 33 | * 34 | * @param event 从source得到的一条数据 35 | * @return 处理过后的数据 36 | */ 37 | @Override 38 | public Event intercept(Event event) { 39 | // 还多放入一个header 数据(用来支撑下游的 channel selector 进行负载均衡) 40 | event.getHeaders().put("cs", RandomUtils.nextInt(2)+""); 41 | 42 | try { 43 | // 要从event中拿到日志json字符串 44 | byte[] bodyBytes = event.getBody(); 45 | String json = new String(bodyBytes); 46 | 47 | // 从json字符串中,根据配置参数中的 timeField (时间字段名) ,去抽取时间戳 48 | JSONObject jsonObject = JSON.parseObject(json); 49 | Long eventTime = jsonObject.getLong(timeField); 50 | 51 | // 将时间戳,放入event的 headers中 52 | event.getHeaders().put("timestamp", eventTime + ""); 53 | 54 | 55 | 56 | // 返回 event 57 | return event; 58 | 59 | } catch (Exception e) { 60 | e.printStackTrace(); 61 | 62 | event.getHeaders().put("timestamp","0"); 63 | return event; 64 | } 65 | 66 | } 67 | 68 | /** 69 | * 拦截器的核心功能方法 70 | * 批次拦截 71 | * 72 | * @param list 73 | * @return 74 | */ 75 | @Override 76 | public List intercept(List list) { 77 | for (Event event : list) { 78 | intercept(event); 79 | } 80 | 81 | return list; 82 | } 83 | 84 | /** 85 | * 做一些退出之前的资源清理工作 86 | */ 87 | @Override 88 | public void close() { 89 | // 比如,关闭数据库连接、关闭文件流 90 | } 91 | 92 | public static class TimeExtractInterceptorBuilder implements Interceptor.Builder { 93 | 94 | String timeField; 95 | 96 | /** 97 | * builder的功能所在:帮助构建拦截器类的实例对象 98 | * 99 | * @return 100 | */ 101 | @Override 102 | public Interceptor build() { 103 | 104 | return new TimestampExtractInterceptor(timeField); 105 | } 106 | 107 | /** 108 | * 配置功能 109 | * 它会接收到flume agent传入的 上下文对象:context 110 | * 而 context中就包含这采集配置文件中的所有参数 111 | *

112 | * 如: 113 | * 配置文件中,会配置参数: 114 | * time.filed = timeStamp 115 | * 116 | * @param context 117 | */ 118 | @Override 119 | public void configure(Context context) { 120 | timeField = context.getString("time.field"); 121 | 122 | } 123 | } 124 | 125 | 126 | } 127 | -------------------------------------------------------------------------------- /data-collector/src/main/java/cn/dotiedu/datacollect/cdc/FlinkCdcTest.java: -------------------------------------------------------------------------------- 1 | package cn.dotiedu.datacollect.cdc; 2 | 3 | import org.apache.flink.streaming.api.CheckpointingMode; 4 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 5 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 6 | 7 | /** 8 | * @Author: deep as the sea 9 | * @Site: 多易教育 10 | * @QQ: 657270652 11 | * @Date: 2022/7/31 12 | * @Desc: flink-cdc 捕获 mysql变更数据测试代码 13 | **/ 14 | public class FlinkCdcTest { 15 | 16 | public static void main(String[] args) { 17 | 18 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 19 | env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); 20 | env.getCheckpointConfig().setCheckpointStorage("file:/d:/checkpoint"); 21 | 22 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 23 | 24 | // 建cdc连接器源表 25 | tableEnv.executeSql("CREATE TABLE flink_score ( " + 26 | " id INT, " + 27 | " name string, " + 28 | " gender string, " + 29 | " score double, " + 30 | " tname string metadata from 'table_name', " + 31 | " dbname string metadata from 'database_name', " + 32 | " PRIMARY KEY (id) NOT ENFORCED " + 33 | " ) WITH ( " + 34 | " 'connector' = 'mysql-cdc', " + 35 | " 'hostname' = 'doitedu' , " + 36 | " 'port' = '3306' , " + 37 | " 'username' = 'root' , " + 38 | " 'password' = 'root' , " + 39 | " 'database-name' = 'flinktest', " + 40 | " 'table-name' = 'flink_score' " + 41 | ")"); 42 | 43 | // 从上面定义的表中,读取数据,本质上,就是通过表定义中的连接器,去抓取数据 44 | tableEnv.executeSql("select * from flink_score")/*.print()*/; 45 | 46 | 47 | // 实时报表统计: 查询每种性别中,成绩最高的前2个同学 48 | tableEnv.executeSql( 49 | " select "+ 50 | " id,name,gender,score "+ 51 | " from "+ 52 | " ( "+ 53 | " select "+ 54 | " id, "+ 55 | " name, "+ 56 | " gender, "+ 57 | " score, "+ 58 | " row_number() over(partition by gender order by score desc) as rn "+ 59 | " from flink_score ) o "+ 60 | " where rn<=2 " 61 | ).print(); 62 | 63 | 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /data-collector/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger = INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout = org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/GeoHashDimTableBuilder.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import cn.doitedu.utils.Functions.gps2GeoHashcode 4 | import org.apache.spark.sql.{SaveMode, SparkSession} 5 | 6 | import java.util.Properties 7 | 8 | object GeoHashDimTableBuilder { 9 | 10 | def main(args: Array[String]): Unit = { 11 | 12 | val spark = SparkSession.builder() 13 | .master("local[*]") 14 | .appName("geohash码地域维表构建任务") 15 | .config("spark.sql.shuffle.partitions", 2) 16 | .enableHiveSupport() 17 | .getOrCreate() 18 | 19 | // 加载mysql中的原始地理位置信息数据表 20 | val props = new Properties() 21 | props.setProperty("user","root") 22 | props.setProperty("password","root") 23 | 24 | val df = spark.read.jdbc("jdbc:mysql://doitedu:3306/realtimedw", "t_md_areas", props) 25 | df.createTempView("t") 26 | 27 | spark.udf.register("geo",gps2GeoHashcode) 28 | 29 | spark.sql( 30 | """ 31 | |insert overwrite table dim.geohash_area 32 | |select 33 | | geohash, 34 | | province, 35 | | city, 36 | | region 37 | |from( 38 | |select 39 | | geohash, 40 | | province, 41 | | city, 42 | | region, 43 | | row_number() over(partition by geohash order by province) as rn 44 | |from 45 | |( 46 | | SELECT 47 | | geo(lv4.BD09_LAT, lv4.BD09_LNG) as geohash, 48 | | lv1.AREANAME as province, 49 | | lv2.AREANAME as city, 50 | | lv3.AREANAME as region 51 | | from t lv4 52 | | join t lv3 on lv4.`LEVEL`=4 and lv4.bd09_lat is not null and lv4.bd09_lng is not null and lv4.PARENTID = lv3.ID 53 | | join t lv2 on lv3.PARENTID = lv2.ID 54 | | join t lv1 on lv2.PARENTID = lv1.ID 55 | |) o1 56 | |)o2 57 | | 58 | |where rn=1 59 | | 60 | |""".stripMargin) 61 | 62 | // res.write.format("hive").mode(SaveMode.Append).saveAsTable("dim.geohash_area") 63 | 64 | spark.close() 65 | 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/MallAppUserActionBitMapBuilder.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import org.apache.spark.sql.SparkSession 4 | import org.roaringbitmap.RoaringBitmap 5 | 6 | import java.io.{ByteArrayInputStream, ByteArrayOutputStream, DataInputStream, DataOutputStream} 7 | 8 | /** 9 | * @Author: deep as the sea 10 | * @Site: 多易教育 11 | * @QQ: 657270652 12 | * @Date: 2022/7/26 13 | * @Desc: 14 | * 将数仓中已经存在的用户活跃情况,生成一个活跃bitmap记录表的初始状态 15 | * -- 从 活跃区间记录表 来处理 16 | * 17 | * --先得到每一个活跃区间的bitmap 18 | * g01,2022-07-01,2022-07-10 -> bm 19 | * g01,2022-07-15,2022-07-17 -> bm 20 | * g02,2022-07-18,9999-12-31 21 | * 22 | * --然后,将相同用户分组,把他的所有 bm 收集到一个数组中,然后进行合并,得到最终的bm 23 | * 24 | * -- 目标结果 25 | * g01,2022-07-01, [00000000000000000000000001010000100000000] 26 | * g02,2022-07-18, [00000000000000000000000101000000000000000] 27 | * */ 28 | object MallAppUserActionBitMapBuilder { 29 | 30 | def main(args: Array[String]): Unit = { 31 | 32 | val spark = SparkSession.builder() 33 | .master("local") 34 | .enableHiveSupport() 35 | .config("spark.sql.shuffle.partitions", "2") 36 | .appName("用户活跃bitmap模型表初始构建") 37 | .getOrCreate() 38 | 39 | val genBitMap = (start: Int, end: Int) => { 40 | val bm = RoaringBitmap.bitmapOf(start.to(end).toArray: _*) 41 | val baout = new ByteArrayOutputStream() 42 | val dout = new DataOutputStream(baout) 43 | bm.serialize(dout) 44 | 45 | baout.toByteArray 46 | } 47 | 48 | val orMergeBitMap = (bmArr: Array[Array[Byte]]) => { 49 | 50 | val bm = RoaringBitmap.bitmapOf() 51 | for (bmBytes <- bmArr) { 52 | 53 | // 反序列化本次遍历到的bitmap的序列化字节 54 | val bmTmp = RoaringBitmap.bitmapOf(); 55 | val bin = new ByteArrayInputStream(bmBytes) 56 | val din = new DataInputStream(bin) 57 | 58 | bmTmp.deserialize(din) 59 | 60 | // 合并 61 | bm.or(bmTmp) 62 | } 63 | 64 | // 将合并好的bitmap,序列化成字节返回 65 | val baout = new ByteArrayOutputStream() 66 | val dout = new DataOutputStream(baout) 67 | bm.serialize(dout) 68 | 69 | baout.toByteArray 70 | } 71 | 72 | 73 | spark.udf.register("gen_bitmap", genBitMap) 74 | spark.udf.register("or_merge_bitmap", orMergeBitMap) 75 | 76 | spark.sql( 77 | """ 78 | |insert into table dws.doitedu_mall_app_user_active_bm partition(dt='2022-07-16') 79 | |select 80 | | o1.guid 81 | | ,o2.first_login_dt 82 | | ,o1.bm 83 | |from 84 | |( 85 | | select 86 | | guid 87 | | ,or_merge_bitmap( collect_list(bm) ) as bm 88 | | from ( 89 | | select 90 | | guid 91 | | ,gen_bitmap(datediff(range_start_dt,'2000-01-01'),datediff(range_end_dt,'2000-01-01')) as bm 92 | | from dws.doitedu_mall_app_user_actrang 93 | | where dt='2022-07-16' 94 | | ) t 95 | | group by guid 96 | |) o1 97 | | 98 | |join 99 | |( 100 | | select 101 | | guid 102 | | ,min(range_start_dt) as first_login_dt 103 | | from dws.doitedu_mall_app_user_actrang 104 | | where dt='2022-07-16' 105 | | group by guid 106 | |) o2 107 | |on o1.guid = o2.guid 108 | | 109 | | 110 | |""".stripMargin) 111 | 112 | 113 | spark.close() 114 | } 115 | 116 | } 117 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/MallApplogOds2DwdStep3.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import cn.doitedu.utils.Functions 4 | import org.apache.spark.sql.{SaveMode, SparkSession} 5 | 6 | object MallApplogOds2DwdStep3 { 7 | 8 | def main(args: Array[String]): Unit = { 9 | var dt = "2022-07-16" 10 | 11 | if(args.length>0){ 12 | dt = args(0) 13 | } 14 | 15 | val spark = SparkSession.builder() 16 | .appName("MallApplogOds2DwdStep3") 17 | .config("spark.sql.shuffle.partitions","1") 18 | .master("local") 19 | .enableHiveSupport() 20 | .getOrCreate() 21 | 22 | 23 | 24 | spark.udf.register("geo",Functions.gps2GeoHashcode) 25 | 26 | val joined = spark.sql( 27 | """ 28 | |select 29 | | a.*, 30 | | b.province, 31 | | b.city, 32 | | b.region 33 | |from 34 | | tmp.app_log_ods2dwd_step2 a -- 日志表 35 | |left join 36 | | dim.geohash_area b -- 地域维表 37 | |on geo(a.latitude,a.longitude)=b.geohash 38 | | 39 | |""".stripMargin) 40 | 41 | joined.createTempView("joined") 42 | 43 | // 主输出:就是把关联处理后的日志输出,插入到dwd日志明细表中区 44 | spark.sql( 45 | s""" 46 | |insert overwrite table dwd.doitedu_mall_app_events partition(dt='${dt}') 47 | |select * from joined 48 | | 49 | |""".stripMargin) 50 | 51 | 52 | // 从查询结果中挑出关联地域信息失败的gps座标,进行侧输出 53 | // 以便于后续可以用异步任务去对这些gps座标请求高德来得到地域信息 54 | spark.sql( 55 | """ 56 | |select 57 | | concat_ws(',',latitude,longitude) 58 | |from joined 59 | |where province is null 60 | |group by latitude,longitude 61 | | 62 | |""".stripMargin) 63 | .write.mode(SaveMode.Overwrite).text(s"hdfs://doitedu:8020/unknown-gps/${dt}") 64 | 65 | spark.close() 66 | } 67 | 68 | } 69 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/MallEventAttribute.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import cn.doitedu.utils.EventAttrUtil 4 | import org.apache.spark.sql.SparkSession 5 | import scala.collection.mutable.ListBuffer 6 | 7 | object MallEventAttribute { 8 | 9 | def main(args: Array[String]): Unit = { 10 | 11 | val spark = SparkSession.builder() 12 | .appName("商城app用户事件归因主题事实表计算任务") 13 | .enableHiveSupport() 14 | .master("local") 15 | .config("spark.sql.shuffle.partitions", "1") 16 | .config("spark.sql.hive.convertMetastoreParquet","false") 17 | .config("spark.sql.hive.convertMetastoreOrc","false") 18 | .getOrCreate() 19 | 20 | import spark.implicits._ 21 | 22 | 23 | 24 | val df = spark.sql( 25 | """ 26 | | 27 | |with tmp as ( 28 | |select 29 | | guid 30 | | ,event_id 31 | | ,event_time 32 | | ,if(event_id = 'e1',1,0) as flag 33 | |from test.doitedu_app_funnel_test 34 | |where dt='2022-07-16'and 35 | | ( 36 | | (event_id='e1' and properties['p1']='v1') OR 37 | | (event_id='e2') OR 38 | | (event_id='e3') OR 39 | | (event_id='e4') 40 | | ) 41 | |) 42 | | 43 | |SELECT 44 | | guid 45 | | ,collect_list(event_id) as event_seq 46 | |from 47 | |( 48 | |SELECT 49 | | guid 50 | | ,event_id 51 | | ,event_time 52 | | ,flag 53 | | ,sum(flag) over(partition by guid order by event_time rows between unbounded preceding and current row) - flag as flag2 54 | |from tmp 55 | |) o 56 | |group by guid,flag2 57 | |having array_contains(collect_list(event_id),'e1') 58 | | 59 | |""".stripMargin) 60 | 61 | val resultRdd = df.rdd.flatMap(row => { 62 | val guid = row.getAs[Long]("guid") 63 | val eventSeq = row.getSeq[String](1).toArray 64 | // (1,WrappedArray(e3, e2, e3, e4, e2, e1)) 65 | 66 | // 创建一个收集结果的list 67 | val resultList = new ListBuffer[(Long, String, String, Double)] 68 | 69 | 70 | // 根据用户的行为序列,用 首次触点归因,计算一次结果,结果是一行 71 | // (guid,算法,待归因事件,归因权重) 72 | // (1,首次触点,e3,100%) 73 | val tupleFirst = EventAttrUtil.firstTouchAttr(eventSeq) 74 | resultList += ((guid, "首次触点", tupleFirst._1, tupleFirst._2)) 75 | 76 | 77 | // 根据用户的行为序列,用 末次触点归因,计算一次结果,结果是一行 78 | // (guid,算法,待归因事件,归因权重) 79 | // (1,末次触点,e2,100%) 80 | val tupleLast = EventAttrUtil.lastTouchAttr(eventSeq) 81 | resultList += ((guid, "末次触点", tupleLast._1, tupleLast._2)) 82 | 83 | 84 | // 根据用户的行为序列,用 线性归因,计算一次结果,结果是多行 85 | // (guid,算法,待归因事件,归因权重) 86 | // (1,线性归因,e3,40%) 87 | // (1,线性归因,e2,40%) 88 | // (1,线性归因,e4,20%) 89 | val linearResultTuples = EventAttrUtil.linearAttr(eventSeq) 90 | for (tuple <- linearResultTuples) { 91 | resultList += ((guid, "线性归因", tuple._1, tuple._2)) 92 | } 93 | 94 | resultList 95 | 96 | }) 97 | 98 | 99 | resultRdd.toDF("guid","attr_method","attr_event","attr_weight") 100 | .show(100,false) 101 | 102 | spark.close() 103 | } 104 | 105 | } 106 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/MallShotStatistic.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import org.apache.spark.sql.SparkSession 4 | 5 | object MallShotStatistic { 6 | 7 | def main(args: Array[String]): Unit = { 8 | 9 | val spark = SparkSession.builder() 10 | .appName("打靶") 11 | .enableHiveSupport() 12 | .config("hive.metastore.uris","thrift://doitedu:9083") 13 | .getOrCreate() 14 | 15 | val dt = args(0) 16 | 17 | spark.sql( 18 | s""" 19 | | 20 | |insert into table default.dol_test2 partition(dt='${dt}') 21 | |select 22 | | gender, 23 | | count(1) as shot_cnt, 24 | | avg(score) as avg_score, 25 | | max(score) as max_score, 26 | | min(score) as min_score 27 | |from default.dol_test1 28 | |group by gender 29 | | 30 | |""".stripMargin) 31 | 32 | 33 | spark.close() 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/Test.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import org.apache.spark.SparkContext 4 | import org.apache.spark.sql.SparkSession 5 | 6 | case class DNS(ip:String,domain:String,hour:String) 7 | object Test { 8 | def main(args: Array[String]): Unit = { 9 | 10 | val sc = new SparkContext() 11 | val rdd = sc.textFile("/inpath") 12 | rdd.map(s => { 13 | // 192.168.1.2|www.baidu.com|2022-01-12 09:35:40 14 | val split = s.split("\\|") 15 | val hour = split(2).split(":")(0) 16 | ((split(0), split(1),hour),1) 17 | }).reduceByKey(_+_) 18 | .map(tp=>(tp._1._1,tp._1._3,tp._1._2,tp._2)) // ip,hour,domain,cnt 19 | .groupBy(tp=>(tp._1,tp._2)) 20 | .flatMap(tp=>{ 21 | tp._2.toList.sortBy(tp=> -tp._4).slice(0,100) 22 | }).saveAsTextFile("/outpath") 23 | 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/etl/TreeTest.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.etl 2 | 3 | import scala.collection.mutable 4 | import scala.collection.mutable.ListBuffer 5 | 6 | case class Node(val pageId: String, val children: ListBuffer[Node]) 7 | 8 | object TreeTest { 9 | def main(args: Array[String]): Unit = { 10 | 11 | val ints = new mutable.PriorityQueue[Int]() 12 | 13 | /* 14 | a -- | 15 | |---b---|--- d 16 | |--- e 17 | |---c---| 18 | |---a--| 19 | |---x 20 | 21 | */ 22 | val str = "a,1|b,a|d,b|e,b|c,a|a,c|x,a" 23 | 24 | val pairs = str.split("\\|") 25 | var node: Node = null 26 | for (pair <- pairs) { 27 | val split = pair.split(",") 28 | val pageId = split(0) 29 | val refId = split(1) 30 | if (node == null) { 31 | node = Node(pageId, ListBuffer.empty[Node]) 32 | } else { 33 | findAndAppend(node, pageId, refId) 34 | } 35 | } 36 | 37 | val tmp = ListBuffer.empty[(String, Int)] 38 | calcNodeContribute(node, tmp) 39 | println(tmp) 40 | 41 | val tuples = calcNodeContribute2(node) 42 | println("---------") 43 | println(tuples) 44 | 45 | 46 | } 47 | 48 | // 挂载节点到树 49 | def findAndAppend(node: Node, pageId: String, refId: String): Boolean = { 50 | for (childNode <- node.children.reverse) { // 反转遍历,是为了先找右子树 51 | val flag = findAndAppend(childNode, pageId, refId) 52 | if (flag) return flag 53 | } 54 | 55 | if (node.pageId.equals(refId)) { 56 | node.children += Node(pageId, ListBuffer.empty[Node]) 57 | true 58 | } else { 59 | false 60 | } 61 | 62 | 63 | } 64 | 65 | 66 | // 计算总贡献量 67 | def calcNodeContribute(node: Node, tmp: ListBuffer[(String, Int)]): Int = { 68 | var pv = 0 69 | pv += node.children.size 70 | for (cnode <- node.children) { 71 | pv += calcNodeContribute(cnode, tmp) 72 | } 73 | tmp += ((node.pageId, pv)) 74 | pv 75 | } 76 | 77 | 78 | // 计算直接贡献量 79 | def calcNodeContribute2(node: Node): ListBuffer[(String, Int)] = { 80 | val lst = ListBuffer.empty[(String, Int)] 81 | for (cnode <- node.children) { 82 | lst ++= calcNodeContribute2(cnode) 83 | } 84 | lst += ((node.pageId, node.children.size)) 85 | 86 | } 87 | 88 | } 89 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/profile/BulkloadDemo.scala: -------------------------------------------------------------------------------- 1 | //package cn.doitedu.profile 2 | // 3 | //import org.apache.hadoop.fs.Path 4 | //import org.apache.hadoop.hbase.{HBaseConfiguration, KeyValue, TableName} 5 | //import org.apache.hadoop.hbase.client.{Connection, ConnectionFactory, RegionLocator, Table} 6 | //import org.apache.hadoop.hbase.io.ImmutableBytesWritable 7 | //import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2 8 | //import org.apache.hadoop.hbase.tool.BulkLoadHFiles 9 | //import org.apache.hadoop.hbase.util.Bytes 10 | //import org.apache.hadoop.mapreduce.Job 11 | //import org.apache.spark.sql.{Row, SparkSession} 12 | // 13 | //object BulkloadDemo { 14 | // def main(args: Array[String]): Unit = { 15 | // 16 | // val spark = SparkSession.builder() 17 | // .appName("") 18 | // .enableHiveSupport() 19 | // .master("local") 20 | // .config("spark.default.parallelism",1) 21 | // .getOrCreate() 22 | // 23 | // // t.geohash | t.province | t.city | t.region | 24 | // val df = spark.read.table("dim.geohash_area").where("province is not null").distinct() 25 | // val rdd = df.rdd.map({ 26 | // case Row(geohash: String, province: String, city: String, region: String) 27 | // => (geohash, "f", "q", province + "," + city + "," + region) 28 | // }).sortBy(tp=>(tp._1,tp._2,tp._3)) 29 | // .map(tp=>{ 30 | // (new ImmutableBytesWritable(tp._1.getBytes()),new KeyValue(tp._1.getBytes(),tp._2.getBytes(),tp._3.getBytes(),tp._4.getBytes())) 31 | // }).coalesce(1,false) 32 | // 33 | // 34 | // val conf = HBaseConfiguration.create() 35 | // conf.set("fs.defaultFS","hdfs://doitedu:8020/") 36 | // conf.set("hbase.zookeeper.quorum","doitedu:2181") 37 | // val job: Job = Job.getInstance(conf) 38 | // 39 | // // 构造一个hbase的客户端 40 | // val conn: Connection = ConnectionFactory.createConnection(conf) 41 | // val tableName: TableName = TableName.valueOf("dim_geo_area") 42 | // val table: Table = conn.getTable(tableName) 43 | // val locator: RegionLocator = conn.getRegionLocator(tableName) 44 | // 45 | // // HfileOutputFormat的参数配置 46 | // HFileOutputFormat2.configureIncrementalLoad(job,table,locator) 47 | // 48 | // 49 | // // 将rdd数据输出成Hfile文件 50 | // rdd.saveAsNewAPIHadoopFile("hdfs://doitedu:8020/tmp/geohash",classOf[ImmutableBytesWritable],classOf[KeyValue],classOf[HFileOutputFormat2],job.getConfiguration) 51 | // 52 | // // 将生成好的hfile导入到 hbase 53 | // val loader = BulkLoadHFiles.create(conf) 54 | // loader.bulkLoad(tableName,new Path("hdfs://doitedu:8020/tmp/geohash")) 55 | // 56 | // // deprecated api 57 | // //val loader = new LoadIncrementalHFiles(conf) 58 | // //loader.doBulkLoad(new Path("hdfs://doitedu:8020/tmp/tags"), conn.getAdmin, table, conn.getRegionLocator(tableName)) 59 | // 60 | // table.close() 61 | // conn.close() 62 | // spark.close() 63 | // 64 | // 65 | // } 66 | // 67 | //} 68 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/profile/LoadUserIds.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.profile 2 | 3 | import com.alibaba.fastjson.JSON 4 | import org.apache.commons.lang3.{RandomUtils, StringUtils} 5 | import org.apache.spark.sql.{SaveMode, SparkSession} 6 | 7 | import java.util.Properties 8 | 9 | 10 | object LoadUserIds { 11 | def main(args: Array[String]): Unit = { 12 | 13 | val spark = SparkSession.builder() 14 | .appName("") 15 | .master("local") 16 | .config("spark.shuffle.partitions", 1) 17 | .getOrCreate() 18 | 19 | import spark.implicits._ 20 | 21 | val ds = spark.read.textFile("hdfs://doitedu:8020/userid/") 22 | val df = ds.rdd.map(s => { 23 | val nObject = JSON.parseObject(s) 24 | val account = nObject.getString("account") 25 | 26 | val day = StringUtils.leftPad(RandomUtils.nextInt(1, 11) + "", 2, "0") 27 | val hour = StringUtils.leftPad(RandomUtils.nextInt(1, 13) + "", 2, "0") 28 | val minute = StringUtils.leftPad(RandomUtils.nextInt(1, 60) + "", 2, "0") 29 | val second = StringUtils.leftPad(RandomUtils.nextInt(1, 60) + "", 2, "0") 30 | 31 | (account,s"2022-08-${day} ${hour}:${minute}:${second}") 32 | }).toDF("account","register_time") 33 | 34 | df.distinct().createTempView("tv") 35 | 36 | val res = spark.sql("select row_number() over(order by account) as id,account,register_time from tv ") 37 | 38 | val props = new Properties() 39 | props.setProperty("user","root") 40 | props.setProperty("password","root") 41 | res.write.mode(SaveMode.Append).jdbc("jdbc:mysql://doitedu:3306/rtmk","ums_member",props) 42 | 43 | 44 | spark.close() 45 | 46 | 47 | 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/utils/EsJavaClient.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | 4 | import org.apache.http.HttpHost; 5 | import org.elasticsearch.action.get.GetRequest; 6 | import org.elasticsearch.action.get.GetResponse; 7 | import org.elasticsearch.action.search.SearchRequest; 8 | import org.elasticsearch.action.search.SearchResponse; 9 | import org.elasticsearch.client.RequestOptions; 10 | import org.elasticsearch.client.RestClient; 11 | import org.elasticsearch.client.RestHighLevelClient; 12 | import org.elasticsearch.index.query.BoolQueryBuilder; 13 | import org.elasticsearch.index.query.MatchQueryBuilder; 14 | import org.elasticsearch.index.query.QueryBuilders; 15 | import org.elasticsearch.search.SearchHit; 16 | import org.elasticsearch.search.SearchHits; 17 | import org.elasticsearch.search.builder.SearchSourceBuilder; 18 | 19 | import java.io.IOException; 20 | 21 | public class EsJavaClient { 22 | 23 | public static void main(String[] args) throws IOException { 24 | 25 | RestHighLevelClient client = new RestHighLevelClient(RestClient.builder(new HttpHost("doitedu", 9200, "http"))); 26 | 27 | //------------------根据docid获取文档------------------ 28 | //GetRequest request = new GetRequest().index("docs").id("1"); 29 | //客户端发送请求,获取响应对象 30 | //GetResponse response = client.get(request, RequestOptions.DEFAULT); 31 | //System.out.println("index:" + response.getIndex()); 32 | //System.out.println("type:" + response.getType()); 33 | //System.out.println("id:" + response.getId()); 34 | //System.out.println("source:" + response.getSourceAsString()); 35 | 36 | // -----------搜索条件查询------- 37 | SearchRequest request = new SearchRequest("docs"); 38 | 39 | // 精确查询 40 | //request.source(new SearchSourceBuilder().query(QueryBuilders.termQuery("tg04", "幼儿园"))); 41 | 42 | // 全文检索,或精确查询(基本类型值) 43 | //request.source(new SearchSourceBuilder().query(QueryBuilders.matchQuery("tg04", "幼儿园"))); 44 | 45 | // 范围查询 46 | //request.source(new SearchSourceBuilder().query(QueryBuilders.rangeQuery("tg01").gt(4))); 47 | 48 | // 多条件查询 49 | MatchQueryBuilder matchQueryBuilder1 = QueryBuilders.matchQuery("tg04", "幼儿园"); 50 | MatchQueryBuilder matchQueryBuilder2 = QueryBuilders.matchQuery("tg04", "城市"); 51 | BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery(); 52 | boolQueryBuilder.must(matchQueryBuilder1).should(matchQueryBuilder2); 53 | request.source(new SearchSourceBuilder().query(boolQueryBuilder)); 54 | 55 | 56 | SearchResponse response2 = client.search(request, RequestOptions.DEFAULT); 57 | SearchHits hits = response2.getHits(); 58 | 59 | System.out.println("耗时:" + response2.getTook()); 60 | System.out.println("命中条数:" + hits.getTotalHits()); 61 | 62 | for (SearchHit hit : hits) { 63 | System.out.println("------------------"); 64 | System.out.println(hit.getSourceAsString()); 65 | } 66 | 67 | // 关闭ES客户端 68 | client.close(); 69 | 70 | } 71 | 72 | 73 | } 74 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/utils/EventAttrUtil.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils 2 | 3 | object EventAttrUtil { 4 | 5 | /** 6 | * 首次触点归因算法 7 | * eventSeq ==> ["e3","e2","e3","e2","e1"] 8 | * eventSeq ==> ["e1"] 9 | */ 10 | def firstTouchAttr(eventSeq:Array[String]):(String,Double) ={ 11 | if(eventSeq.length > 1) { 12 | (eventSeq(0),1.0) 13 | }else{ 14 | (null,0.0) 15 | } 16 | } 17 | 18 | 19 | 20 | /** 21 | * 末次触点归因算法 22 | * eventSeq ==> ["e3","e2","e3","e2","e1"] 23 | * eventSeq ==> ["e1"] 24 | */ 25 | def lastTouchAttr(eventSeq:Array[String]):(String,Double) ={ 26 | if(eventSeq.length > 1) { 27 | (eventSeq(eventSeq.length-2),1.0) // [e3,e1] 28 | }else{ 29 | (null,0.0) 30 | } 31 | } 32 | 33 | 34 | 35 | /** 36 | * 线性归因算法 37 | * eventSeq ==> ["e3","e2","e3","e2","e1"] 38 | */ 39 | def linearAttr(eventSeq:Array[String]):List[(String,Double)] ={ 40 | if(eventSeq.length > 1) { 41 | 42 | // ["e3","e2","e3","e2","e1"] => [(e3,0.2),(e2,0.2),(e3,0.2),(e2,0.2),(e1,0.2)] 43 | val res: Map[String, Double] = eventSeq 44 | .reverse 45 | .tail 46 | .reverse 47 | .map(e => (e, 1.0 / (eventSeq.length - 1))) 48 | .groupBy(tp => tp._1) 49 | .mapValues(arr => arr.map(tp => tp._2).sum) 50 | 51 | res.toList 52 | 53 | }else{ 54 | List((null,0.0)) 55 | } 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/utils/Functions.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils 2 | 3 | import ch.hsr.geohash.GeoHash 4 | 5 | object Functions { 6 | 7 | val gps2GeoHashcode = (lat:Double, lng:Double)=> GeoHash.geoHashStringWithCharacterPrecision(lat,lng,5) 8 | 9 | } 10 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/utils/GaodeGpsUtil.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | import ch.hsr.geohash.GeoHash; 4 | import com.alibaba.fastjson.JSON; 5 | import com.alibaba.fastjson.JSONObject; 6 | import org.apache.commons.io.FileUtils; 7 | import org.apache.commons.lang3.StringUtils; 8 | import org.apache.hadoop.conf.Configuration; 9 | import org.apache.hadoop.fs.*; 10 | import org.apache.http.HttpEntity; 11 | import org.apache.http.client.methods.CloseableHttpResponse; 12 | import org.apache.http.client.methods.HttpGet; 13 | import org.apache.http.impl.client.CloseableHttpClient; 14 | import org.apache.http.impl.client.HttpClientBuilder; 15 | import org.apache.http.util.EntityUtils; 16 | 17 | import java.io.*; 18 | 19 | public class GaodeGpsUtil { 20 | 21 | public static void main(String[] args) throws IOException { 22 | 23 | // 构造hdfs的客户端 24 | Configuration conf = new Configuration(); 25 | conf.set("fs.defaultFS", "hdfs://doitedu:8020/"); 26 | FileSystem fs = FileSystem.get(conf); 27 | 28 | // 创建一个http请求客户端 29 | CloseableHttpClient httpClient = HttpClientBuilder.create().build(); 30 | String requestUrl = "https://restapi.amap.com/v3/geocode/regeo?key=565bbb9a75ad9f030c51b4e42fde3373&location="; 31 | 32 | // 创建一个写出结果数据的hdfs的文件及输出流 33 | FSDataOutputStream output = fs.create(new Path("/unknown-gps-know/2022-07-16/res.txt")); 34 | BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(output)); 35 | 36 | // 列出 日期目录下的 gps待请求清单文件 37 | RemoteIterator filesIterator = fs.listFiles(new Path("/unknown-gps/2022-07-16/"), false); 38 | while (filesIterator.hasNext()) { 39 | LocatedFileStatus file = filesIterator.next(); 40 | if (file.getPath().getName().contains("_SUCCESS")) continue; 41 | 42 | System.out.println("找到一个待处理文件: " + file.getPath()); 43 | // 打开文件得到读取的输入流 44 | FSDataInputStream inputStream = fs.open(file.getPath()); 45 | BufferedReader br = new BufferedReader(new InputStreamReader(inputStream)); 46 | String line = null; 47 | while ((line = br.readLine()) != null) { 48 | 49 | try { 50 | // 取到一个gps经纬度座标 51 | String[] gps = line.split(","); 52 | System.out.println("拿到一个gps座标: " + line); 53 | String geohash = GeoHash.geoHashStringWithCharacterPrecision(Double.parseDouble(gps[0]), Double.parseDouble(gps[1]), 5); 54 | HttpGet get = new HttpGet(requestUrl + gps[1] + "," + gps[0]); 55 | 56 | // 发出请求 57 | CloseableHttpResponse response = httpClient.execute(get); 58 | 59 | // 从响应中提取出结果json串 60 | HttpEntity entity = response.getEntity(); 61 | String resJson = EntityUtils.toString(entity); 62 | JSONObject jsonObject = JSON.parseObject(resJson); 63 | 64 | // 判断请求状态是否成功 65 | Integer status = jsonObject.getInteger("status"); 66 | if (status == 1) { 67 | // 从响应json中解析出 省、市、区 68 | JSONObject regeocodes = jsonObject.getJSONObject("regeocode"); 69 | JSONObject addressComponent = regeocodes.getJSONObject("addressComponent"); 70 | String province = addressComponent.getString("province"); 71 | String city = addressComponent.getString("city"); 72 | String district = addressComponent.getString("district"); 73 | 74 | // 只要得到了省市区,就开始输出结果 75 | if (StringUtils.isNotBlank(province)) { 76 | System.out.println(geohash + "," + province + "," + city + "," + district); 77 | // geohash,province,city,region 78 | bw.write(geohash + "," + province + "," + city + "," + district); 79 | bw.newLine(); 80 | } 81 | } 82 | } catch (Exception e) { 83 | //e.printStackTrace(); 84 | } 85 | 86 | } 87 | 88 | br.close(); 89 | inputStream.close(); 90 | 91 | } 92 | 93 | bw.close(); 94 | output.close(); 95 | httpClient.close(); 96 | 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/utils/PageContributeUtil.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils 2 | 3 | import scala.collection.mutable.ListBuffer 4 | 5 | case class TreeNode(pageId: String, children: ListBuffer[TreeNode]) 6 | 7 | object PageContributeUtil { 8 | 9 | def main(args: Array[String]): Unit = { 10 | 11 | val recordsStr = "a,1|b,a|d,b|e,b|c,a|a,c|x,a" 12 | val records = recordsStr.split("\\|") 13 | 14 | var node: TreeNode = null 15 | // 遍历每一次页面访问记录,形成树节点,并挂载到树的正确位置上去 16 | for (record <- records) { 17 | val splits = record.split(",") 18 | val pageId = splits(0) 19 | val referPageId = splits(1) 20 | 21 | if (node == null) { 22 | node = TreeNode(pageId, ListBuffer.empty) 23 | } else { 24 | findAndAppend(node, pageId, referPageId) 25 | } 26 | } 27 | 28 | // 打印树 29 | println(node) 30 | 31 | println("------------------------") 32 | 33 | // 打印每个节点的总贡献量 34 | val lst1 = ListBuffer.empty[(String, Int)] 35 | calcWholeContributePv(node,lst1) 36 | println(lst1) 37 | 38 | println("------------------------") 39 | 40 | // 打印每个节点的总贡献量 41 | val lst2 = ListBuffer.empty[(String, Int)] 42 | calcDirectContributePv(node,lst2) 43 | println(lst2) 44 | 45 | 46 | 47 | } 48 | 49 | // 将一条页面访问记录,挂载到一棵树上的计算逻辑 50 | def findAndAppend(node: TreeNode, pageId: String, referPageId: String): Boolean = { 51 | 52 | // 先去节点的所有子节点中去寻找目标挂载点 53 | for (childNode <- node.children.reverse) { 54 | val find = findAndAppend(childNode, pageId, referPageId) 55 | // 如果在某一个子节点上找到了目标挂载点,则返回 56 | if (find) return true 57 | } 58 | 59 | // 如果在上面的过程中没有返回,说明整个for循环遍历的每一个子节点上都没找到目标挂载点 60 | // 则判断我自己是不是目标挂载点 61 | 62 | if (node.pageId.equals(referPageId)) { 63 | node.children += TreeNode(pageId, ListBuffer.empty) 64 | true 65 | } else { 66 | false 67 | } 68 | } 69 | 70 | 71 | // 计算一棵树上每个节点(页面)的总贡献量 72 | def calcWholeContributePv(node:TreeNode, lst:ListBuffer[(String,Int)]):Int = { 73 | 74 | var contributePv = 0 75 | 76 | // 本节点的总贡献量 = 本节点子节点个数 + 每个子节点的总贡献量 77 | 78 | // 先加上子节点个数 79 | contributePv += node.children.size 80 | 81 | for (childNode <- node.children) { 82 | // 然后去加 每个子节点的总贡献量 83 | val childContributePv = calcWholeContributePv(childNode, lst) 84 | contributePv += childContributePv 85 | } 86 | 87 | // 把本节点算出来的总贡献量,放入结果list中去 88 | lst += ((node.pageId,contributePv)) 89 | 90 | contributePv 91 | } 92 | 93 | 94 | // 计算一棵树上每个节点(页面)的直接贡献量 95 | def calcDirectContributePv(node:TreeNode, lst:ListBuffer[(String,Int)]):Unit = { 96 | 97 | // 本节点的直接贡献量 = 本节点子节点个数 98 | // 先加上子节点个数 99 | //var contributePv = node.children.size 100 | 101 | for (childNode <- node.children) { 102 | // 然后去加 每个子节点的总贡献量 103 | calcDirectContributePv(childNode, lst) 104 | } 105 | 106 | // 把本节点算出来的总贡献量,放入结果list中去 107 | lst += ((node.pageId,node.children.size)) 108 | } 109 | 110 | 111 | 112 | 113 | 114 | } 115 | -------------------------------------------------------------------------------- /data-etl/src/main/java/cn/doitedu/utils/PropertiesHolder.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | import java.io.IOException; 4 | import java.util.Properties; 5 | 6 | public class PropertiesHolder { 7 | 8 | private static Properties props = null; 9 | 10 | public static String getProperty(String key) throws IOException { 11 | if(props == null) { 12 | props = new Properties(); 13 | props.load(PropertiesHolder.class.getClassLoader().getResourceAsStream("user_profile_tags_bulkload.properties")); 14 | } 15 | 16 | return props.getProperty(key); 17 | 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /data-etl/src/main/resources/hive-site.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | hive.metastore.uris 6 | thrift://doitedu:9083 7 | 8 | 9 | -------------------------------------------------------------------------------- /data-etl/src/main/resources/user_profile_tags_bulkload.properties: -------------------------------------------------------------------------------- 1 | hive.table.name=test_bulk_profile2 2 | hive.table.field.names=tag010,tag011,tag012,tag013,tag014,tag015,tag016 -------------------------------------------------------------------------------- /data-etl/src/main/scala/akka/demo1/Demo.scala: -------------------------------------------------------------------------------- 1 | package akka.demo1 2 | 3 | import akka.actor.{Actor, ActorSystem, Props} 4 | 5 | class HelloActor extends Actor { 6 | override def receive: Receive = { 7 | case "haha" => { 8 | println(sender() + " haha") 9 | sender() ! "你好" 10 | } 11 | case "名字" => { 12 | println(sender() + " 名字") 13 | sender() ! "深似海" 14 | } 15 | case "好了" => { 16 | println(sender() + "好了") 17 | sender() ! "马上关闭" 18 | context.stop(self) 19 | } 20 | } 21 | } 22 | 23 | 24 | class HeiheiActor extends Actor { 25 | override def receive: Receive = { 26 | case "你好" => { 27 | println(sender() + " 你好") 28 | sender() ! "名字" 29 | } 30 | case "深似海" => { 31 | println(sender() + " 深似海") 32 | sender() ! "好了" 33 | } 34 | case "马上关闭" => { 35 | println(sender() + " 马上关闭") 36 | context.stop(self) 37 | context.system.terminate() 38 | } 39 | } 40 | } 41 | 42 | object Demo { 43 | 44 | private val actorSystem = ActorSystem("demo") 45 | 46 | def main(args: Array[String]): Unit = { 47 | val helloActorRef = actorSystem.actorOf(Props(classOf[HelloActor]), "haha") 48 | val heiheiActorRef = actorSystem.actorOf(Props(classOf[HeiheiActor]), "heihei") 49 | 50 | helloActorRef.tell("haha",heiheiActorRef) 51 | 52 | 53 | } 54 | 55 | 56 | } 57 | -------------------------------------------------------------------------------- /data-etl/src/main/scala/akka/demo2/Client.scala: -------------------------------------------------------------------------------- 1 | package akka.demo2 2 | 3 | import akka.actor.{Actor, ActorSelection, ActorSystem, Props} 4 | import com.typesafe.config.ConfigFactory 5 | 6 | 7 | class ClientActor(host:String,port:Int) extends Actor{ 8 | 9 | var serverActorRef: ActorSelection = _ // 服务端的代理对象 10 | 11 | override def preStart(): Unit = { 12 | // akka.tcp://Server@127.0.0.1:8088 13 | serverActorRef = context.actorSelection(s"akka://Server@${host}:${port}/user/Miao~miao") 14 | } 15 | 16 | override def receive: Receive = { 17 | case "start" => 18 | serverActorRef ! Message("握手") 19 | case Message(msg:String)=> 20 | println(s"客户端收到$msg") 21 | serverActorRef ! Message("你好") 22 | case _ => 23 | serverActorRef ! Message("听不懂") 24 | } 25 | 26 | 27 | } 28 | 29 | 30 | object Client { 31 | 32 | def main(args: Array[String]): Unit = { 33 | 34 | 35 | //指定客户端的IP和端口 36 | val host = "127.0.0.1" 37 | val port = 8089 38 | 39 | //指定服务端的IP和端口 40 | val serverHost = "127.0.0.1" 41 | val serverPort = 8088 42 | 43 | /** 44 | * 使用ConfigFactory的parseString方法解析字符串,指定客户端IP和端口 45 | */ 46 | val config = ConfigFactory.parseString( 47 | s""" 48 | |akka.actor.provider="akka.remote.RemoteActorRefProvider" 49 | |akka.remote.artery.enable="on" 50 | |akka.remote.artery.canonical.hostname=$host 51 | |akka.remote.artery.canonical.port=$port 52 | |akka.actor.allow-java-serialization=true 53 | """.stripMargin) 54 | 55 | /** 56 | * 将config对象传递给ActorSystem并起名为"Server",为了是创建客户端工厂对象(clientActorSystem)。 57 | */ 58 | val clientActorSystem = ActorSystem("client", config) 59 | 60 | // 创建dispatch | mailbox 61 | val clientActorRef = clientActorSystem.actorOf(Props(new ClientActor(serverHost, serverPort)), "Client") 62 | clientActorRef ! "start" // 自己给自己发送了一条消息 到自己的mailbox => receive 63 | 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /data-etl/src/main/scala/akka/demo2/Message.scala: -------------------------------------------------------------------------------- 1 | package akka.demo2 2 | 3 | case class Message(msg:String) 4 | -------------------------------------------------------------------------------- /data-etl/src/main/scala/akka/demo2/Server.scala: -------------------------------------------------------------------------------- 1 | package akka.demo2 2 | 3 | import akka.actor.{Actor, ActorSystem, Props} 4 | import com.typesafe.config.ConfigFactory 5 | 6 | 7 | class ServerActor extends Actor{ 8 | 9 | override def receive: Receive = { 10 | 11 | case Message(msg)=> 12 | println("服务端收到消息:" + msg) 13 | sender() ! Message("服务端来了") 14 | case _ => 15 | println("服务端没听懂") 16 | sender() ! "没听懂" 17 | } 18 | } 19 | 20 | object Server { 21 | def main(args: Array[String]): Unit = { 22 | //定义服务端的ip和端口 23 | val host = "127.0.0.1" 24 | val port = 8088 25 | /** 26 | * 使用ConfigFactory的parseString方法解析字符串,指定服务端IP和端口 27 | */ 28 | val config = ConfigFactory.parseString( 29 | s""" 30 | |akka.actor.provider="akka.remote.RemoteActorRefProvider" 31 | |akka.remote.artery.enable="on" 32 | |akka.remote.artery.canonical.hostname=$host 33 | |akka.remote.artery.canonical.port=$port 34 | |akka.actor.allow-java-serialization=true 35 | """.stripMargin) 36 | /** 37 | * 将config对象传递给ActorSystem并起名为"Server",为了是创建服务端工厂对象(ServerActorSystem)。 38 | */ 39 | val ServerActorSystem = ActorSystem("Server", config) 40 | /** 41 | * 通过工厂对象创建服务端的ActorRef 42 | */ 43 | val serverActorRef = ServerActorSystem.actorOf(Props[ServerActor], "Miao~miao") 44 | 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /data-etl/src/test/java/RoaringBitmapTest.scala: -------------------------------------------------------------------------------- 1 | import cn.hutool.core.date.{DateUnit, DateUtil} 2 | import jodd.datetime.TimeZoneUtil 3 | import org.apache.commons.lang3.time.DateUtils 4 | import org.roaringbitmap.RoaringBitmap 5 | 6 | import java.time.ZonedDateTime 7 | import java.util.{Calendar, TimeZone} 8 | 9 | object RoaringBitmapTest { 10 | 11 | 12 | def main(args: Array[String]): Unit = { 13 | 14 | // 创建一个bitmap 15 | 16 | val bitmap: RoaringBitmap = RoaringBitmap.bitmapOf(1, 3, 4,5, 6, 7) 17 | bitmap.add(9) 18 | 19 | println(bitmap.contains(3,6)) 20 | println(bitmap.contains(9)) 21 | 22 | // 我们要记录 是: "2022-07-01" , "2022-07-03" , "2022-07-04" , "2022-07-05" 23 | // bitmap中的角标: 日期- 固定起始日(“2010-01-01”) 24 | val initDate = DateUtils.parseDate("2010-01-01", "yyyy-MM-dd") 25 | val date1= DateUtils.parseDate("2022-07-01", "yyyy-MM-dd") 26 | val date3 = DateUtils.parseDate("2022-07-03", "yyyy-MM-dd") 27 | val date4 = DateUtils.parseDate("2022-07-04", "yyyy-MM-dd") 28 | val date5 = DateUtils.parseDate("2022-07-05", "yyyy-MM-dd") 29 | val date6 = DateUtils.parseDate("2022-07-06", "yyyy-MM-dd") 30 | 31 | 32 | /** 33 | * 构造活跃记录 34 | */ 35 | val bitmap2 = RoaringBitmap.bitmapOf(DateUtil.between(date1, initDate, DateUnit.DAY).toInt) 36 | bitmap2.add(DateUtil.between(date3, initDate, DateUnit.DAY).toInt) 37 | bitmap2.add(DateUtil.between(date4, initDate, DateUnit.DAY).toInt) 38 | bitmap2.add(DateUtil.between(date5, initDate, DateUnit.DAY).toInt) 39 | 40 | 41 | 42 | // 查询 2022-07-03 是否有活跃 43 | println(bitmap2.contains(DateUtil.between(date3, initDate, DateUnit.DAY).toInt)) 44 | 45 | // 查询 2022-07-03 ~ 2022-07-05 是否连续活跃 46 | val res = bitmap2.contains(DateUtil.between(date3, initDate, DateUnit.DAY).toInt, DateUtil.between(date5, initDate, DateUnit.DAY).toInt) 47 | println(res) 48 | 49 | 50 | 51 | } 52 | 53 | 54 | } 55 | -------------------------------------------------------------------------------- /data-export/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | doe-data 7 | cn.doitedu 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | data-export 13 | 14 | 15 | 16 | 8 17 | 8 18 | 2.11.12 19 | 2.3.0 20 | 2.11 21 | 22 | 23 | 24 | 25 | 26 | 27 | org.scala-lang 28 | scala-library 29 | ${scala.version} 30 | 31 | 32 | 33 | 34 | org.apache.spark 35 | spark-sql_${spark.scala} 36 | ${spark.version} 37 | 38 | 39 | 40 | 41 | 42 | org.apache.spark 43 | spark-hive_${spark.scala} 44 | ${spark.version} 45 | 46 | 47 | 48 | 49 | 50 | mysql 51 | mysql-connector-java 52 | 8.0.29 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | org.elasticsearch 62 | elasticsearch-hadoop 63 | 7.17.5 64 | 65 | 66 | 67 | org.elasticsearch 68 | elasticsearch 69 | 7.17.5 70 | 71 | 72 | 73 | org.elasticsearch.client 74 | elasticsearch-rest-high-level-client 75 | 7.17.5 76 | 77 | 78 | 79 | 80 | org.projectlombok 81 | lombok 82 | 1.18.24 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | net.alchim31.maven 92 | scala-maven-plugin 93 | 4.3.1 94 | 95 | 96 | 97 | compile 98 | testCompile 99 | 100 | 101 | 102 | -dependencyfile 103 | ${project.build.directory}/.scala_dependencies 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /data-export/src/main/java/cn/doitedu/profile/export/EsSpark.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.profile.`export` 2 | 3 | import org.apache.spark.{SparkConf, SparkContext} 4 | import org.elasticsearch.spark._ 5 | 6 | /** 7 | * @Author: deep as the sea 8 | * @Site: 多易教育 9 | * @QQ: 657270652 10 | * @Date: 2022/8/13 11 | * @Desc: spark写入数据到es,简单示例 12 | 13 | 14 | curl -X GET "localhost:9200/docs/_search?from=0&size=20&pretty" -H 'Content-Type: application/json' -d' 15 | { 16 | "query": { 17 | "match": { 18 | "tg04": "服装" 19 | } 20 | } 21 | } 22 | ' 23 | 24 | * 25 | * 26 | * */ 27 | object EsSpark { 28 | 29 | def main(args: Array[String]): Unit = { 30 | 31 | val conf = new SparkConf() 32 | conf.setMaster("local") 33 | conf.setAppName("") 34 | conf.set("es.index.auto.create", "true") 35 | conf.set("es.nodes", "doitedu") 36 | .set("es.port", "9200") 37 | .set("es.nodes.wan.only", "true") 38 | // .set("es.net.http.auth.user", "elxxxxastic") 39 | // .set("es.net.http.auth.pass", "xxxx") 40 | val sc = new SparkContext(conf) 41 | 42 | val rdd = sc.makeRDD(Seq( 43 | Map("guid"->1,"tg01" -> 5, "tg02" -> 10, "tg03" -> "高富帅", "tg04" -> List("高端家具","汽车保养", "小罐咖啡")), 44 | Map("guid"->2,"tg01" -> 4, "tg02" -> 20, "tg03" -> "白富美", "tg04" -> List("兰蔻精华液","特仑苏牛奶", "香奈儿","高尔夫球场","高尔夫运动服饰","汽车内饰"),"tg05"->"女"), 45 | Map("guid"->3,"tg01" -> 3, "tg02" -> 15, "tg03" -> "全职妈妈", "tg04" -> List("惠氏奶粉牛奶", "宝宝润肤露","运动健身计划")), 46 | Map("guid"->4,"tg01" -> 3, "tg02" -> 14, "tg03" -> "职场人士", "tg04" -> List("兰蔻小黑瓶", "宝宝润肤露","家用汽车购置攻略"),"tg05"->"男") 47 | )) 48 | 49 | rdd.saveToEs("doeusers/",Map("es.mapping.id" -> "guid")) 50 | 51 | sc.stop() 52 | 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /data-export/src/main/java/cn/doitedu/profile/export/HiveTags2Es.scala: -------------------------------------------------------------------------------- 1 | package cn.doitedu.profile.`export` 2 | 3 | import org.apache.spark.SparkConf 4 | import org.apache.spark.sql.SparkSession 5 | 6 | object HiveTags2Es { 7 | 8 | def main(args: Array[String]): Unit = { 9 | 10 | val conf = new SparkConf() 11 | conf.set("es.index.auto.create", "true") 12 | conf.set("es.nodes", "doitedu") 13 | .set("es.port", "9200") 14 | .set("es.nodes.wan.only", "true") 15 | 16 | val spark = SparkSession.builder() 17 | .config(conf) 18 | .master("local") 19 | .appName("hive标签数据导入es") 20 | .enableHiveSupport() 21 | .getOrCreate() 22 | 23 | val df = spark.read.table("test.user_profile_test").where("dt='2022-08-17'") 24 | 25 | val tagsRdd = df.rdd.map(row => { 26 | Map("guid" -> row.getAs[Int]("guid"), 27 | "tg01" -> row.getAs[Int]("tg01"), 28 | "tg02" -> row.getAs[Int]("tg02"), 29 | "tg03" -> row.getAs[String]("tg03"), 30 | "tg04" -> row.getAs[Array[String]]("tg04") 31 | ) 32 | }) 33 | 34 | 35 | import org.elasticsearch.spark._ 36 | tagsRdd.saveToEs("doeusers/",Map("es.mapping.id" -> "guid")) 37 | 38 | 39 | spark.close() 40 | 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /lib/flink-doris-connector-1.14_2.12-1.0.3.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderblack/doe-data/8c1d7a306bf52f3f9121f0ea87e5fc4713306191/lib/flink-doris-connector-1.14_2.12-1.0.3.jar -------------------------------------------------------------------------------- /lib/flink-sql-connector-mysql-cdc-2.3-SNAPSHOT.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coderblack/doe-data/8c1d7a306bf52f3f9121f0ea87e5fc4713306191/lib/flink-sql-connector-mysql-cdc-2.3-SNAPSHOT.jar -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | cn.doitedu 8 | doe-data 9 | pom 10 | 1.0-SNAPSHOT 11 | 12 | data-collector 13 | data-etl 14 | tech-test 15 | realtime-dw 16 | data-export 17 | rule_model_resources 18 | realtime-marketing-common 19 | realtime-marketing-engine 20 | 21 | 22 | 23 | 8 24 | 8 25 | 26 | 27 | 28 | 29 | 30 | 31 | com.alibaba 32 | fastjson 33 | 1.2.83 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | nexus-aliyun 44 | Nexus aliyun 45 | default 46 | https://maven.aliyun.com/nexus/content/groups/public 47 | 48 | false 49 | never 50 | 51 | 52 | true 53 | never 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | ali-plugin 62 | https://maven.aliyun.com/nexus/content/groups/public/ 63 | 64 | false 65 | never 66 | 67 | 68 | true 69 | never 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-assembly-plugin 81 | 2.6 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | jar-with-dependencies 90 | 91 | 92 | 93 | 94 | 95 | make-assemble 96 | package 97 | 98 | single 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/MallAppTrafficDwsEtl.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl; 2 | 3 | import cn.doitedu.rtdw.etl.functions.TrafficAnalyseFunc; 4 | import cn.doitedu.rtdw.etl.pojo.EventBean; 5 | import cn.doitedu.rtdw.etl.pojo.TrafficBean; 6 | import com.alibaba.fastjson.JSON; 7 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 8 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 9 | import org.apache.flink.api.java.functions.KeySelector; 10 | import org.apache.flink.api.java.tuple.Tuple2; 11 | import org.apache.flink.connector.base.DeliveryGuarantee; 12 | import org.apache.flink.connector.kafka.sink.KafkaRecordSerializationSchema; 13 | import org.apache.flink.connector.kafka.sink.KafkaSink; 14 | import org.apache.flink.connector.kafka.source.KafkaSource; 15 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; 16 | import org.apache.flink.streaming.api.datastream.DataStream; 17 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 18 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 19 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 20 | import org.apache.kafka.clients.consumer.OffsetResetStrategy; 21 | 22 | public class MallAppTrafficDwsEtl { 23 | 24 | public static void main(String[] args) throws Exception { 25 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 26 | env.setParallelism(1); 27 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 28 | 29 | // 构造一个 kafka 的source 30 | KafkaSource kafkaSource = KafkaSource.builder() 31 | .setTopics("dwd-applog-detail2") 32 | .setBootstrapServers("doitedu:9092") 33 | .setValueOnlyDeserializer(new SimpleStringSchema()) 34 | .setGroupId("tr") 35 | .setStartingOffsets(OffsetsInitializer.committedOffsets(OffsetResetStrategy.LATEST)) 36 | .build(); 37 | 38 | // 从kafka的dwd明细topic读取数据 39 | DataStreamSource sourceStream = env.fromSource(kafkaSource, 40 | WatermarkStrategy.noWatermarks(), 41 | "dwd-applog"); 42 | 43 | // 带上事件时间语义和watermark生成策略的 bean对象数据流 44 | DataStream beanStream = sourceStream.map(json -> JSON.parseObject(json, EventBean.class)); 45 | 46 | DataStream trafficStream = beanStream 47 | .keyBy(new KeySelector>() { 48 | @Override 49 | public Tuple2 getKey(EventBean value) throws Exception { 50 | return Tuple2.of(value.getGuid(), value.getSessionid()); 51 | } 52 | }) 53 | .process(new TrafficAnalyseFunc()); 54 | 55 | 56 | // 构造用于输出到kafka的sink算子 57 | KafkaSink resultSink = KafkaSink.builder() 58 | .setBootstrapServers("doitedu:9092") 59 | .setRecordSerializer(KafkaRecordSerializationSchema.builder() 60 | .setValueSerializationSchema(new SimpleStringSchema()) 61 | .setTopic("dws-traffic-analyse") 62 | .build()) 63 | .setDeliverGuarantee(DeliveryGuarantee.AT_LEAST_ONCE) 64 | .build(); 65 | 66 | // 将结果数据,转成json输出到kafka 67 | trafficStream.map(JSON::toJSONString) 68 | .sinkTo(resultSink); 69 | 70 | env.execute(); 71 | 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/functions/EventsDataFilterFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl.functions; 2 | 3 | import cn.doitedu.rtdw.etl.pojo.EventBean; 4 | import org.apache.commons.lang3.StringUtils; 5 | import org.apache.flink.api.common.functions.FilterFunction; 6 | 7 | public class EventsDataFilterFunction implements FilterFunction { 8 | @Override 9 | public boolean filter(EventBean bean) throws Exception { 10 | return bean!=null && StringUtils.isNotBlank(bean.getDeviceid()) && StringUtils.isNotBlank(bean.getSessionid()); 11 | } 12 | } 13 | -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/functions/GeoHashAreaQueryFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl.functions; 2 | 3 | import cn.doitedu.rtdw.etl.pojo.EventBean; 4 | import org.apache.commons.lang3.StringUtils; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | import org.apache.flink.configuration.Configuration; 7 | import org.apache.flink.streaming.api.functions.KeyedProcessFunction; 8 | import org.apache.flink.util.Collector; 9 | import org.apache.flink.util.OutputTag; 10 | import org.apache.hadoop.hbase.HBaseConfiguration; 11 | import org.apache.hadoop.hbase.TableName; 12 | import org.apache.hadoop.hbase.client.*; 13 | import org.apache.hadoop.hbase.util.Bytes; 14 | 15 | public class GeoHashAreaQueryFunction extends KeyedProcessFunction { 16 | Connection hbaseConn; 17 | Table geoTable; 18 | 19 | @Override 20 | public void open(Configuration parameters) throws Exception { 21 | 22 | org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create(); 23 | conf.set("hbase.zookeeper.quorum", "doitedu:2181"); 24 | hbaseConn = ConnectionFactory.createConnection(conf); 25 | geoTable = hbaseConn.getTable(TableName.valueOf("dim_geo_area")); 26 | 27 | 28 | } 29 | 30 | @Override 31 | public void processElement(EventBean bean, KeyedProcessFunction.Context ctx, Collector out) throws Exception { 32 | 33 | String geoHashCode = bean.getGeoHashCode(); 34 | String province = "未知"; 35 | String city = "未知"; 36 | String region = "未知"; 37 | 38 | boolean flag = false; 39 | 40 | if (StringUtils.isNotBlank(geoHashCode)) { 41 | // hbase中地理位置信息的数据结构: geohash码 -> f:q -> "江西省,南昌市,鄱阳湖区" 42 | Result result = geoTable.get(new Get(Bytes.toBytes(geoHashCode))); 43 | byte[] value = result.getValue(Bytes.toBytes("f"), Bytes.toBytes("q")); 44 | 45 | if (value != null) { 46 | String[] split = new String(value).split(","); 47 | if (split.length == 3) { 48 | province = split[0]; 49 | city = split[1]; 50 | region = split[2]; 51 | 52 | flag = true; 53 | 54 | } 55 | } 56 | } 57 | 58 | bean.setProvince(province); 59 | bean.setCity(city); 60 | bean.setRegion(region); 61 | 62 | 63 | out.collect(bean); 64 | 65 | // 如果地理位置解析失败,则将本条数据的gps座标,输出到测流 66 | if(!flag ) { 67 | ctx.output(new OutputTag("unknown_gps", TypeInformation.of(String.class)), bean.getLatitude() + "," + bean.getLongitude()); 68 | } 69 | } 70 | 71 | 72 | } 73 | -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/functions/JsonToEventBeanMapFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl.functions; 2 | 3 | import cn.doitedu.rtdw.etl.pojo.EventBean; 4 | import com.alibaba.fastjson.JSON; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | 7 | public class JsonToEventBeanMapFunction implements MapFunction { 8 | 9 | @Override 10 | public EventBean map(String jsonLine) throws Exception { 11 | 12 | EventBean eventBean = null; 13 | 14 | try{ 15 | eventBean = JSON.parseObject(jsonLine,EventBean.class); 16 | }catch (Exception e){ 17 | e.printStackTrace(); 18 | } 19 | 20 | return eventBean; 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/pojo/DeviceAccountBindInfo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @NoArgsConstructor 9 | @AllArgsConstructor 10 | public class DeviceAccountBindInfo { 11 | 12 | private String deviceId; 13 | private String account; 14 | private Float weight; 15 | private Long userId; 16 | private Long registerTime; 17 | } 18 | -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/pojo/EventBean.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl.pojo; 2 | 3 | /** 4 | * Copyright 2022 bejson.com 5 | */ 6 | 7 | import lombok.*; 8 | 9 | import java.util.Map; 10 | 11 | 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | @Getter 15 | @Setter 16 | @ToString 17 | public class EventBean { 18 | 19 | private String account; 20 | private String appid; 21 | private String appversion; 22 | private String carrier; 23 | private String deviceid; 24 | private String devicetype; 25 | private String eventid; 26 | private String ip; 27 | private double latitude; 28 | private double longitude; 29 | private String nettype; 30 | private String osname; 31 | private String osversion; 32 | private Map properties; 33 | private String releasechannel; 34 | private String resolution; 35 | private String sessionid; 36 | private long timestamp; 37 | private long guid; 38 | // 如果是注册用户,则这里表示注册的时间 39 | private long registerTime; 40 | // 如果是非注册用户,则这里表示首次到访时间 41 | private long firstAccessTime; 42 | 43 | // 新老访客属性 44 | private int isNew; 45 | 46 | // geohash码 47 | private String geoHashCode; 48 | 49 | // 省市区维度字段 50 | private String province; 51 | private String city; 52 | private String region; 53 | 54 | // properties的json格式字段 55 | // 本字段,是为了方便将处理后的明细日志数据写入doris 56 | // doris并不支持Map类型 57 | private String propsJson; 58 | 59 | } -------------------------------------------------------------------------------- /realtime-dw/src/main/java/cn/doitedu/rtdw/etl/pojo/TrafficBean.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtdw.etl.pojo; 2 | 3 | import lombok.*; 4 | 5 | import java.io.Serializable; 6 | 7 | /*** 8 | * @author hunter.d 9 | * @qq 657270652 10 | * @wx haitao-duan 11 | * @date 2022/4/6 12 | **/ 13 | @NoArgsConstructor 14 | @AllArgsConstructor 15 | @Getter 16 | @Setter 17 | @ToString 18 | public class TrafficBean implements Serializable { 19 | private long guid; 20 | private String sessionId; 21 | private String splitSessionId; 22 | private String eventId; 23 | private long ts; 24 | private String pageId; 25 | private long pageLoadTime; 26 | private String province; 27 | private String city; 28 | private String region; 29 | private String deviceType; 30 | private int isNew; 31 | private String releaseChannel; 32 | } 33 | -------------------------------------------------------------------------------- /realtime-dw/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger = INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout = org.apache.log4j.PatternLayout 23 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /realtime-dw/src/test/java/CepTest.java: -------------------------------------------------------------------------------- 1 | import org.apache.flink.streaming.api.CheckpointingMode; 2 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 3 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 4 | 5 | public class CepTest { 6 | public static void main(String[] args) throws Exception { 7 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 8 | env.enableCheckpointing(2000, CheckpointingMode.EXACTLY_ONCE); 9 | env.getCheckpointConfig().setCheckpointStorage("file:/d:/checkpoint"); 10 | env.setParallelism(1); 11 | 12 | StreamTableEnvironment tableEnv = StreamTableEnvironment.create(env); 13 | 14 | tableEnv.executeSql( 15 | " CREATE TABLE kafka_ad ( " + 16 | " gid INT, " + 17 | " adid INT, " + 18 | " ts BIGINT, " + 19 | " eid STRING , " + 20 | " rt as to_timestamp_ltz(ts,3) ,"+ 21 | " watermark for rt as rt "+ 22 | " ) WITH ( " + 23 | " 'connector' = 'kafka', " + 24 | " 'topic' = 'test-ad', " + 25 | " 'properties.bootstrap.servers' = 'doitedu:9092', " + 26 | " 'properties.group.id' = 'testGroup', " + 27 | " 'scan.startup.mode' = 'latest-offset', " + 28 | " 'format' = 'csv' " + 29 | " ) "); 30 | 31 | tableEnv.executeSql("select * from kafka_ad")/*.print()*/; 32 | 33 | tableEnv.executeSql("SELECT *\n" + 34 | "FROM kafka_ad\n" + 35 | " MATCH_RECOGNIZE(\n" + 36 | " PARTITION BY gid,adid\n" + 37 | " ORDER BY rt\n" + 38 | " MEASURES\n" + 39 | " A.eid AS view_eid,\n" + 40 | " A.ts AS view_ts,\n" + 41 | " B.eid AS click_eid,\n" + 42 | " B.ts AS click_ts \n" + 43 | " ONE ROW PER MATCH\n" + 44 | " AFTER MATCH SKIP TO NEXT ROW \n" + 45 | " PATTERN (A C* B) \n" + 46 | " DEFINE\n" + 47 | " A AS A.eid='view',\n" + 48 | " B AS B.eid='click',\n" + 49 | " C AS NOT C.eid='view' AND NOT C.eid='click' \n" + 50 | ") AS T").print(); 51 | 52 | env.execute(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /realtime-dw/src/test/java/Test.java: -------------------------------------------------------------------------------- 1 | import org.apache.commons.lang3.StringUtils; 2 | 3 | public class Test { 4 | 5 | public static void main(String[] args) { 6 | 7 | 8 | System.out.println(StringUtils.isNumeric("13")); 9 | System.out.println(StringUtils.isNumeric("13.2385")); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /realtime-marketing-common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | doe-data 7 | cn.doitedu 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | realtime-marketing-common 13 | 14 | 15 | 8 16 | 8 17 | 18 | 19 | 20 | 21 | redis.clients 22 | jedis 23 | 4.2.3 24 | 25 | 26 | 27 | org.projectlombok 28 | lombok 29 | 1.18.24 30 | 31 | 32 | 33 | org.roaringbitmap 34 | RoaringBitmap 35 | 0.9.31 36 | 37 | 38 | 39 | org.apache.flink 40 | flink-core 41 | 1.14.4 42 | provided 43 | 44 | 45 | 46 | org.apache.flink 47 | flink-streaming-java_2.12 48 | 1.14.4 49 | provided 50 | 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/interfaces/RuleCalculator.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.interfaces; 2 | 3 | import cn.doitedu.rtmk.common.pojo.UserEvent; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.roaringbitmap.RoaringBitmap; 6 | 7 | import java.util.List; 8 | 9 | /** 10 | * @Author: deep as the sea 11 | * @Site: 多易教育 12 | * @QQ: 657270652 13 | * @Date: 2022/8/19 14 | * @Desc: 规则运算机的统一接口 15 | **/ 16 | public interface RuleCalculator { 17 | 18 | 19 | /** 20 | * 规则运算机的初始化方法 21 | * @param ruleDefineParamJsonObject 规则定义参数整体json 22 | * @param profileUserBitmap 人群画像bitmap 23 | */ 24 | void init(JSONObject ruleDefineParamJsonObject, RoaringBitmap profileUserBitmap); 25 | 26 | 27 | /** 28 | * 对输入事件进行规则处理的入口方法 29 | * @param userEvent 输入的用户行为事件 30 | */ 31 | List process(UserEvent userEvent); 32 | 33 | 34 | /** 35 | * 规则条件运算逻辑 36 | * @param userEvent 用户事件 37 | */ 38 | void calc(UserEvent userEvent); 39 | 40 | /** 41 | * 规则条件是否满足的判断逻辑 42 | * @param guid 用户标识 43 | * @return 是否满足 44 | */ 45 | boolean isMatch(int guid); 46 | } 47 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/interfaces/TimerRuleCalculator.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.interfaces; 2 | 3 | import cn.doitedu.rtmk.common.pojo.UserEvent; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.apache.flink.api.common.state.MapState; 6 | import org.apache.flink.streaming.api.TimerService; 7 | 8 | import java.util.List; 9 | 10 | /** 11 | * 需要用到定时器功能规则模型的运算机接口 12 | */ 13 | public abstract class TimerRuleCalculator implements RuleCalculator { 14 | 15 | public abstract List onTimer(long timestamp, int guid,MapState timerState,TimerService timerService); 16 | 17 | public abstract List process(UserEvent userEvent,MapState timerState,TimerService timerService); 18 | 19 | @Override 20 | public void calc(UserEvent userEvent) { 21 | 22 | } 23 | 24 | @Override 25 | public boolean isMatch(int guid) { 26 | return false; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/pojo/ActionSeqParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.pojo; 2 | 3 | /* 4 | * 5 | { 6 | "eventParams": [ 7 | { 8 | "eventId": "e1", 9 | "attributeParams": [ 10 | { 11 | "attributeName": "pageId", 12 | "compareType": "=", 13 | "compareValue": "page001" 14 | } 15 | ] 16 | } 17 | ], 18 | "windowStart": "2022-08-01 12:00:00", 19 | "windowEnd": "2022-08-30 12:00:00", 20 | "conditionId": 3, 21 | "dorisQueryTemplate": "action_seq", 22 | "seqCount": 2 23 | } 24 | */ 25 | 26 | import lombok.AllArgsConstructor; 27 | import lombok.Data; 28 | import lombok.NoArgsConstructor; 29 | 30 | import java.util.List; 31 | 32 | @NoArgsConstructor 33 | @AllArgsConstructor 34 | @Data 35 | public class ActionSeqParam { 36 | 37 | private String windowStart; 38 | private String windowEnd; 39 | private int conditionId; 40 | private String dorisQueryTemplate; 41 | private int seqCount; 42 | 43 | private List eventParams; 44 | } 45 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/pojo/AttributeParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | "attributeName": "pageId", 9 | "compareType": "=", 10 | "compareValue": "page001" 11 | */ 12 | @NoArgsConstructor 13 | @AllArgsConstructor 14 | @Data 15 | public class AttributeParam { 16 | private String attributeName; 17 | private String compareType ; 18 | private String compareValue ; 19 | 20 | } 21 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/pojo/EventParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | import java.util.List; 8 | 9 | @NoArgsConstructor 10 | @AllArgsConstructor 11 | @Data 12 | public class EventParam { 13 | 14 | private String eventId; 15 | private List attributeParams; 16 | 17 | 18 | } 19 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/pojo/UserEvent.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | import java.util.Map; 8 | 9 | @Data 10 | @NoArgsConstructor 11 | @AllArgsConstructor 12 | public class UserEvent { 13 | private int guid; 14 | private String eventId; 15 | private Map properties; 16 | private long eventTime; 17 | 18 | } 19 | -------------------------------------------------------------------------------- /realtime-marketing-common/src/main/java/cn/doitedu/rtmk/common/utils/UserEventComparator.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.common.utils; 2 | 3 | import cn.doitedu.rtmk.common.pojo.UserEvent; 4 | import com.alibaba.fastjson.JSONArray; 5 | import com.alibaba.fastjson.JSONObject; 6 | import lombok.extern.slf4j.Slf4j; 7 | 8 | @Slf4j 9 | public class UserEventComparator { 10 | 11 | public static boolean userEventIsEqualParam(UserEvent userEvent, JSONObject eventParam){ 12 | String eventIdParam = eventParam.getString("eventId"); 13 | JSONArray attributeParams = eventParam.getJSONArray("attributeParams"); 14 | 15 | if(eventIdParam.equals(userEvent.getEventId())) { 16 | // 对每一个属性条件进行判断 17 | for (int j = 0; j < attributeParams.size(); j++) { 18 | // 取出一个属性参数 19 | JSONObject attributeParam = attributeParams.getJSONObject(j); 20 | 21 | String paramAttributeName = attributeParam.getString("attributeName"); 22 | String paramCompareType = attributeParam.getString("compareType"); 23 | String paramValue = attributeParam.getString("compareValue"); 24 | String eventAttributeValue = userEvent.getProperties().get(paramAttributeName); 25 | log.info("比较事件是否匹配条件参数,paramAttributeName:{} , paramCompareType:{} , paramValue:{},eventAttributeValue:{}",paramAttributeName,paramCompareType,paramValue,eventAttributeValue); 26 | 27 | if(eventAttributeValue!=null) { 28 | if ("=".equals(paramCompareType) && !(paramValue.compareTo(eventAttributeValue) == 0)) { 29 | return false; 30 | } 31 | 32 | if (">".equals(paramCompareType) && !(paramValue.compareTo(eventAttributeValue) > 0)) { 33 | return false; 34 | } 35 | 36 | if ("<".equals(paramCompareType) && !(paramValue.compareTo(eventAttributeValue) < 0)) { 37 | return false; 38 | } 39 | 40 | if ("<=".equals(paramCompareType) && !(paramValue.compareTo(eventAttributeValue) <= 0)) { 41 | return false; 42 | } 43 | 44 | if (">=".equals(paramCompareType) && !(paramValue.compareTo(eventAttributeValue) >= 0)) { 45 | return false; 46 | } 47 | } 48 | } 49 | return true; 50 | } 51 | 52 | return false; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/engine/functions/Json2UserEventMapFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.engine.functions; 2 | 3 | import cn.doitedu.rtmk.common.pojo.UserEvent; 4 | import com.alibaba.fastjson.JSON; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | 7 | public class Json2UserEventMapFunction implements MapFunction { 8 | @Override 9 | public UserEvent map(String eventJson) throws Exception { 10 | UserEvent userEvent = JSON.parseObject(eventJson, UserEvent.class); 11 | return userEvent; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/engine/functions/Row2RuleMetaBeanMapFunction.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.engine.functions; 2 | 3 | import cn.doitedu.rtmk.common.pojo.UserEvent; 4 | import cn.doitedu.rtmk.engine.pojo.RuleMetaBean; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.types.Row; 7 | import org.apache.flink.types.RowKind; 8 | import org.roaringbitmap.RoaringBitmap; 9 | 10 | import java.io.IOException; 11 | import java.nio.ByteBuffer; 12 | 13 | /** 14 | * @Author: deep as the sea 15 | * @Site: 多易教育 16 | * @QQ: 657270652 17 | * @Date: 2022/8/20 18 | * @Desc: 19 | * 20 | * `id` int(11) NOT NULL AUTO_INCREMENT, 21 | * `rule_id` varchar(50) DEFAULT NULL, 22 | * `rule_model_id` int(11) DEFAULT NULL, 23 | * `rule_profile_user_bitmap` binary(255) DEFAULT NULL, 24 | * `caculator_groovy_code` text, 25 | * `creator_name` varchar(255) DEFAULT NULL, 26 | * `rule_status` int(11) DEFAULT NULL, 27 | * `create_time` datetime DEFAULT NULL, 28 | * `update_time` datetime DEFAULT NULL, 29 | * 30 | **/ 31 | public class Row2RuleMetaBeanMapFunction implements MapFunction { 32 | @Override 33 | public RuleMetaBean map(Row row) throws Exception { 34 | 35 | RuleMetaBean ruleMetaBean = new RuleMetaBean(); 36 | 37 | if(row.getKind() == RowKind.DELETE){ 38 | ruleMetaBean.setOperateType("DELETE"); 39 | 40 | String ruleId = row.getFieldAs("rule_id"); 41 | ruleMetaBean.setRuleId(ruleId); 42 | 43 | }else if(row.getKind() == RowKind.UPDATE_AFTER){ 44 | ruleMetaBean.setOperateType("UPDATE"); 45 | setRuleMetaBeanAttributes(ruleMetaBean,row); 46 | 47 | } else if (row.getKind() == RowKind.INSERT) { 48 | ruleMetaBean.setOperateType("INSERT"); 49 | setRuleMetaBeanAttributes(ruleMetaBean,row); 50 | }else{ 51 | return null; 52 | } 53 | 54 | return ruleMetaBean; 55 | } 56 | 57 | 58 | public void setRuleMetaBeanAttributes(RuleMetaBean ruleMetaBean,Row row) throws IOException { 59 | String ruleId = row.getFieldAs("rule_id"); 60 | int ruleModelId = row.getFieldAs("rule_model_id"); 61 | byte[] bitmapBytes = row.getFieldAs("rule_profile_user_bitmap"); 62 | 63 | RoaringBitmap bitmap = null; 64 | 65 | if(bitmapBytes != null) { 66 | bitmap = RoaringBitmap.bitmapOf(); 67 | bitmap.deserialize(ByteBuffer.wrap(bitmapBytes)); 68 | } 69 | String caculatorGroovyCode = row.getFieldAs("caculator_groovy_code"); 70 | String ruleParamJson = row.getFieldAs("rule_param_json"); 71 | 72 | 73 | String creatorName = row.getFieldAs("creator_name"); 74 | int ruleStatus = row.getFieldAs("rule_status"); 75 | 76 | ruleMetaBean.setRuleId(ruleId); 77 | ruleMetaBean.setRuleModelId(ruleModelId); 78 | ruleMetaBean.setProfileUserBitmap(bitmap); 79 | ruleMetaBean.setCaculatorGroovyCode(caculatorGroovyCode); 80 | ruleMetaBean.setRuleParamJson(ruleParamJson); 81 | ruleMetaBean.setCreatorName(creatorName); 82 | ruleMetaBean.setRuleStatus(ruleStatus); 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/engine/pojo/RuleMatchResult.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.engine.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | /** 8 | * @Author: deep as the sea 9 | * @Site: 多易教育 10 | * @QQ: 657270652 11 | * @Date: 2022/8/20 12 | * @Desc: 封装规则触达结果的javabean 13 | **/ 14 | 15 | @Data 16 | @NoArgsConstructor 17 | @AllArgsConstructor 18 | public class RuleMatchResult { 19 | 20 | private int guid; 21 | private String ruleId; 22 | private long matchTime; 23 | 24 | } 25 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/engine/pojo/RuleMetaBean.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.engine.pojo; 2 | 3 | 4 | import cn.doitedu.rtmk.common.interfaces.RuleCalculator; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Data; 7 | import lombok.NoArgsConstructor; 8 | import org.roaringbitmap.RoaringBitmap; 9 | 10 | /** 11 | * @Author: deep as the sea 12 | * @Site: 多易教育 13 | * @QQ: 657270652 14 | * @Date: 2022/8/20 15 | * @Desc: 16 | * 17 | * CREATE TABLE `rule_instance_definition` ( 18 | * `id` int(11) NOT NULL AUTO_INCREMENT, 19 | * `rule_id` varchar(50) DEFAULT NULL, 20 | * `rule_model_id` int(11) DEFAULT NULL, 21 | * `rule_profile_user_bitmap` binary(255) DEFAULT NULL, 22 | * `caculator_groovy_code` text, 23 | * `creator_name` varchar(255) DEFAULT NULL, 24 | * `rule_status` int(11) DEFAULT NULL, 25 | * `create_time` datetime DEFAULT NULL, 26 | * `update_time` datetime DEFAULT NULL, 27 | * PRIMARY KEY (`id`) 28 | * ) ENGINE=InnoDB AUTO_INCREMENT=2 DEFAULT CHARSET=latin1; 29 | * 30 | * 31 | **/ 32 | @Data 33 | @NoArgsConstructor 34 | @AllArgsConstructor 35 | public class RuleMetaBean { 36 | 37 | private String operateType; // 规则管理操作类型: 新增,停用 ,更新 38 | private String ruleId; 39 | private int ruleModelId; 40 | private RoaringBitmap profileUserBitmap; 41 | private String caculatorGroovyCode; 42 | private String ruleParamJson; 43 | private String creatorName; 44 | private int ruleStatus; 45 | 46 | private RuleCalculator ruleCalculator; 47 | 48 | 49 | 50 | } 51 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/engine/utils/FlinkStateDescriptors.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.engine.utils; 2 | 3 | import cn.doitedu.rtmk.engine.pojo.RuleMetaBean; 4 | import org.apache.flink.api.common.state.MapStateDescriptor; 5 | 6 | public class FlinkStateDescriptors { 7 | 8 | public static MapStateDescriptor ruleMetaBeanMapStateDescriptor = new MapStateDescriptor<>("rule-meta-state", String.class, RuleMetaBean.class); 9 | } 10 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/bitmap_inject/_01_RulePublisher.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.bitmap_inject; 2 | 3 | 4 | import org.roaringbitmap.RoaringBitmap; 5 | 6 | import java.io.ByteArrayOutputStream; 7 | import java.io.DataOutputStream; 8 | import java.io.IOException; 9 | import java.sql.*; 10 | 11 | /** 12 | * @Author: deep as the sea 13 | * @Site: 多易教育 14 | * @QQ: 657270652 15 | * @Date: 2022/8/16 16 | * @Desc: 模拟规则发布平台,并测试其人群bitmap生成及发布功能 17 | **/ 18 | public class _01_RulePublisher { 19 | 20 | public static void main(String[] args) throws IOException, SQLException { 21 | 22 | //String ruleId = "g01_rule01"; 23 | //String ruleId = "g01_rule02"; 24 | String ruleId = "g01_rule03"; 25 | 26 | // 根据规则的条件,去es中查询人群 27 | //int[] ruleProfileUsers = {1,3,5,7,101,201}; 28 | //int[] ruleProfileUsers = {11,3,5,7,301,202,666}; 29 | int[] ruleProfileUsers = {55,3,5,7}; 30 | 31 | // 把查询出来的人群的guid列表,变成bitmap 32 | RoaringBitmap bitmap = RoaringBitmap.bitmapOf(ruleProfileUsers); 33 | 34 | // 把生成好的bitmap,序列到一个字节数组中 35 | ByteArrayOutputStream bout = new ByteArrayOutputStream(); 36 | DataOutputStream dout = new DataOutputStream(bout); 37 | bitmap.serialize(dout); 38 | byte[] bitmapBytes = bout.toByteArray(); 39 | 40 | // 将这个bitmap连同本规则的其他信息,一同发布到规则平台的元数据库中 41 | Connection conn = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk", "root", "root"); 42 | PreparedStatement statement = conn.prepareStatement("insert into rtmk_rule_def values(?,?)"); 43 | statement.setString(1,ruleId); 44 | statement.setBytes(2,bitmapBytes); 45 | 46 | statement.execute(); 47 | 48 | statement.close(); 49 | conn.close(); 50 | 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/bitmap_inject/_02_BitmapFromMySqlBytes.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.bitmap_inject; 2 | 3 | import org.roaringbitmap.RoaringBitmap; 4 | 5 | import java.nio.ByteBuffer; 6 | import java.sql.*; 7 | 8 | public class _02_BitmapFromMySqlBytes { 9 | 10 | public static void main(String[] args) throws Exception { 11 | 12 | Connection conn = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk", "root", "root"); 13 | 14 | // 先从mysql中获得 g01_rule01 15 | PreparedStatement statement = conn.prepareStatement("select rule_id,profile_users_bitmap from rtmk_rule_def where rule_id = ? "); 16 | statement.setString(1,"g01_rule01"); 17 | 18 | ResultSet resultSet = statement.executeQuery(); 19 | resultSet.next(); 20 | byte[] bitmapBytes = resultSet.getBytes("profile_users_bitmap"); 21 | 22 | statement.close(); 23 | conn.close(); 24 | 25 | 26 | 27 | // 反序列化bitmap字节数组 28 | RoaringBitmap bitmap = RoaringBitmap.bitmapOf(); 29 | bitmap.deserialize(ByteBuffer.wrap(bitmapBytes)); 30 | 31 | 32 | // 测试,反序列化出来bitmap是否是之前序列化的数据 33 | // 序列化之前的bitmap中包含的guid: 1,3,5,7,101,201 34 | System.out.println(bitmap.contains(201)); 35 | System.out.println(bitmap.contains(101)); 36 | System.out.println(bitmap.contains(7)); 37 | System.out.println(bitmap.contains(77)); 38 | System.out.println(bitmap.contains(88)); 39 | 40 | 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/bitmap_inject/_03_FlinkCdcBitmapAndCall.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.bitmap_inject; 2 | 3 | import org.apache.flink.streaming.api.CheckpointingMode; 4 | import org.apache.flink.streaming.api.datastream.DataStream; 5 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 6 | import org.apache.flink.streaming.api.functions.ProcessFunction; 7 | import org.apache.flink.table.api.Table; 8 | import org.apache.flink.table.api.bridge.java.StreamTableEnvironment; 9 | import org.apache.flink.types.Row; 10 | import org.apache.flink.types.RowKind; 11 | import org.apache.flink.util.Collector; 12 | import org.roaringbitmap.RoaringBitmap; 13 | 14 | import java.nio.ByteBuffer; 15 | 16 | public class _03_FlinkCdcBitmapAndCall { 17 | 18 | public static void main(String[] args) throws Exception { 19 | 20 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 21 | env.enableCheckpointing(5000, CheckpointingMode.EXACTLY_ONCE); 22 | env.getCheckpointConfig().setCheckpointStorage("file:/d:/checkpoint/"); 23 | 24 | StreamTableEnvironment tenv = StreamTableEnvironment.create(env); 25 | 26 | 27 | // 创建cdc连接器表,去读mysql中的规则定义表的binlog 28 | tenv.executeSql("CREATE TABLE rtmk_rule_define ( " + 29 | " rule_id STRING PRIMARY KEY NOT ENFORCED, " + 30 | " profile_users_bitmap BINARY " + 31 | " ) WITH ( " + 32 | " 'connector' = 'mysql-cdc', " + 33 | " 'hostname' = 'doitedu' , " + 34 | " 'port' = '3306' , " + 35 | " 'username' = 'root' , " + 36 | " 'password' = 'root' , " + 37 | " 'database-name' = 'rtmk', " + 38 | " 'table-name' = 'rtmk_rule_def' " + 39 | ")"); 40 | 41 | Table table = tenv.sqlQuery("select rule_id,profile_users_bitmap from rtmk_rule_define"); 42 | 43 | DataStream rowDataStream = tenv.toChangelogStream(table); 44 | 45 | rowDataStream.process(new ProcessFunction() { 46 | @Override 47 | public void processElement(Row row, ProcessFunction.Context ctx, Collector out) throws Exception { 48 | RowKind kind = row.getKind(); 49 | if(kind == RowKind.INSERT){ 50 | String rule_id = row.getFieldAs("rule_id"); 51 | byte[] bitmapBytes = row.getFieldAs("profile_users_bitmap"); 52 | 53 | // 反序列化本次拿到的规则的bitmap 54 | RoaringBitmap bitmap = RoaringBitmap.bitmapOf(); 55 | bitmap.deserialize(ByteBuffer.wrap(bitmapBytes)); 56 | 57 | // 判断201,101,77 用户是否在其中 58 | boolean res201 = bitmap.contains(201); 59 | boolean res101 = bitmap.contains(101); 60 | boolean res77 = bitmap.contains(77); 61 | 62 | out.collect(String.format("规则:%s, 用户:201, 存在于规则人群否: %s ",rule_id,res201)); 63 | out.collect(String.format("规则:%s, 用户:101, 存在于规则人群否: %s ",rule_id,res101)); 64 | out.collect(String.format("规则:%s, 用户:77 , 存在于规则人群否: %s ",rule_id,res77)); 65 | } 66 | 67 | } 68 | }).print(); 69 | 70 | 71 | env.execute(); 72 | 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/enjoy_test/ConditionCalcTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.enjoy_test; 2 | 3 | import com.alibaba.fastjson.JSON; 4 | import com.alibaba.fastjson.JSONObject; 5 | import com.jfinal.template.Engine; 6 | import com.jfinal.template.Template; 7 | import groovy.lang.GroovyClassLoader; 8 | import org.apache.commons.io.FileUtils; 9 | 10 | import java.io.File; 11 | import java.io.IOException; 12 | import java.util.ArrayList; 13 | import java.util.HashMap; 14 | 15 | public class ConditionCalcTest { 16 | public static void main(String[] args) throws IOException, InstantiationException, IllegalAccessException { 17 | 18 | 19 | String conditionJson = FileUtils.readFileToString(new File("param_jsons/simpleCondition.json"),"utf-8"); 20 | Template template = Engine.use().getTemplateByString(FileUtils.readFileToString(new File("groovy_templates/ConditionCalculator.template"),"utf-8")); 21 | 22 | JSONObject jsonObject = JSON.parseObject(conditionJson); 23 | int eventParams = jsonObject.getJSONArray("eventParams").size(); 24 | 25 | 26 | int[] conditions = new int[eventParams]; 27 | HashMap data = new HashMap<>(); 28 | 29 | data.put("conditions",conditions); 30 | data.put("exp","res0 && ( res1 || res2 )"); 31 | 32 | String groovyCode = template.renderToString(data); 33 | //System.out.println(groovyCode); 34 | 35 | Class aClass = new GroovyClassLoader().parseClass(groovyCode); 36 | IConditionCalculator caculator = (IConditionCalculator) aClass.newInstance(); 37 | caculator.init(conditionJson); 38 | 39 | HashMap props = new HashMap<>(); 40 | props.put("p1","2"); 41 | EventBean eventBean = new EventBean("e4", props); 42 | boolean res = caculator.calc(eventBean); 43 | 44 | System.out.println(res); 45 | 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/enjoy_test/ConditionCalculator.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.groovy 2 | 3 | import cn.doitedu.rtmk.tech_test.enjoy_test.EventBean 4 | import cn.doitedu.rtmk.tech_test.enjoy_test.IConditionCalculator 5 | import com.alibaba.fastjson.JSON 6 | import com.alibaba.fastjson.JSONArray 7 | import com.alibaba.fastjson.JSONObject 8 | 9 | class ConditionCalculator implements IConditionCalculator{ 10 | JSONObject conditionObject; 11 | JSONArray eventParams; 12 | public void init(String condition){ 13 | conditionObject = JSON.parseObject(condition) 14 | eventParams = conditionObject.getJSONArray("eventParams") 15 | 16 | } 17 | 18 | 19 | @Override 20 | boolean calc(EventBean eventBean) { 21 | 22 | JSONObject param0 = eventParams.getJSONObject(0) 23 | String eventId0 = param0.getString("eventId") 24 | String attributeName0 = param0.getString("attributeName") 25 | String operatorType0= param0.getString("operatorType") 26 | String value0 = param0.getString("value") 27 | 28 | boolean res0 = false; 29 | 30 | if(eventBean.getEventId() == eventId0){ 31 | //println( "json " + eventId0 + "," + eventBean.getEventId()) 32 | println("oper: " + operatorType0) 33 | if(operatorType0.equals("eq")){ 34 | res0 = eventBean.getProperties().get(attributeName0) == value0; 35 | }else if(operatorType0.equals("lt")){ 36 | res0 = eventBean.getProperties().get(attributeName0) > value0; 37 | } 38 | } 39 | 40 | println(res0) 41 | JSONObject param1 = eventParams.getJSONObject(1) 42 | String eventId1 = param1.getString("eventId") 43 | String attributeName1 = param1.getString("attributeName") 44 | String operatorType1= param1.getString("operatorType") 45 | String value1 = param1.getString("value") 46 | 47 | boolean res1 = false; 48 | 49 | if(eventBean.getEventId() == eventId1){ 50 | //println( "json " + eventId1 + "," + eventBean.getEventId()) 51 | println("oper: " + operatorType1) 52 | if(operatorType1.equals("eq")){ 53 | res1 = eventBean.getProperties().get(attributeName1) == value1; 54 | }else if(operatorType1.equals("lt")){ 55 | res1 = eventBean.getProperties().get(attributeName1) > value1; 56 | } 57 | } 58 | 59 | println(res1) 60 | JSONObject param2 = eventParams.getJSONObject(2) 61 | String eventId2 = param2.getString("eventId") 62 | String attributeName2 = param2.getString("attributeName") 63 | String operatorType2= param2.getString("operatorType") 64 | String value2 = param2.getString("value") 65 | 66 | boolean res2 = false; 67 | 68 | if(eventBean.getEventId() == eventId2){ 69 | //println( "json " + eventId2 + "," + eventBean.getEventId()) 70 | println("oper: " + operatorType2) 71 | if(operatorType2.equals("eq")){ 72 | res2 = eventBean.getProperties().get(attributeName2) == value2; 73 | }else if(operatorType2.equals("lt")){ 74 | res2 = eventBean.getProperties().get(attributeName2) > value2; 75 | } 76 | } 77 | 78 | println(res2) 79 | 80 | boolean res = res0 && ( res1 || res2 ) 81 | 82 | return res 83 | } 84 | } -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/enjoy_test/EnjoyHello.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.enjoy_test; 2 | 3 | import com.jfinal.template.Engine; 4 | import com.jfinal.template.Template; 5 | import lombok.AllArgsConstructor; 6 | import lombok.Data; 7 | import lombok.NoArgsConstructor; 8 | 9 | import java.util.ArrayList; 10 | import java.util.Arrays; 11 | import java.util.HashMap; 12 | 13 | public class EnjoyHello { 14 | public static void main(String[] args) { 15 | /*Template template = Engine.use().getTemplateByString("a b c #(id)"); 16 | HashMap data = new HashMap<>(); 17 | data.put("id","1"); 18 | String res = template.renderToString(data); 19 | System.out.println(res);*/ 20 | 21 | 22 | Template template = Engine.use().getTemplateByString("#for(x:conditions) \n" + 23 | "boolean res1 = calc(#(x)); \n" + 24 | "#end\n" + 25 | "\n" + 26 | "boolean res = #(exp)"); 27 | 28 | HashMap data = new HashMap<>(); 29 | ArrayList conditions = new ArrayList<>(); 30 | conditions.add("a"); 31 | conditions.add("b"); 32 | conditions.add("c"); 33 | conditions.add("d"); 34 | 35 | data.put("conditions",conditions); 36 | data.put("exp","res1 && ( res2 || res3 ) && res4"); 37 | 38 | String res = template.renderToString(data); 39 | System.out.println(res); 40 | 41 | System.out.println("----------------------"); 42 | Template template2 = Engine.use().getTemplateByString("SELECT\n" + 43 | " guid,\n" + 44 | " count(1) as cnt\n" + 45 | "FROM mall_app_events_detail\n" + 46 | "WHERE 1=1 \n" + 47 | "#if(windowStart != null)\n" + 48 | "AND envet_time>='#(windowStart)' \n" + 49 | "#end\n" + 50 | "\n" + 51 | "#if(windowEnd != null)\n" + 52 | "AND envet_time<=\"#(windowEnd)\"\n" + 53 | "#end\n" + 54 | "AND event_id = #(eventId) \n" + 55 | "#for(x : attributeParams)\n" + 56 | "AND get_json_string(propJson,'$.#(x.attributeName)') #(x.compareType) '#(x.attributeValue)'\n" + 57 | "#end\n" + 58 | "GROUP BY guid"); 59 | HashMap data2 = new HashMap<>(); 60 | data2.put("windowStart","2022-08-01 00:00:00"); 61 | data2.put("windowEnd","2022-08-31 12:00:00"); 62 | data2.put("eventId","addcart"); 63 | data2.put("attributeParams", Arrays.asList(new EventAttributeParam("p1","=","v1"),new EventAttributeParam("p2",">",3))); 64 | 65 | System.out.println(template2.renderToString(data2)); 66 | 67 | } 68 | } 69 | 70 | 71 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/enjoy_test/EventAttributeParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.enjoy_test; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @NoArgsConstructor 9 | @AllArgsConstructor 10 | public class EventAttributeParam { 11 | private String attributeName; 12 | private String compareType; 13 | private Object attributeValue; 14 | } 15 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/enjoy_test/EventBean.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.enjoy_test; 2 | 3 | import java.util.Map; 4 | 5 | public class EventBean{ 6 | private String eventId; 7 | private Map properties; 8 | 9 | public EventBean() { 10 | } 11 | 12 | public EventBean(String eventId, Map properties) { 13 | this.eventId = eventId; 14 | this.properties = properties; 15 | } 16 | 17 | public String getEventId() { 18 | return eventId; 19 | } 20 | 21 | public void setEventId(String eventId) { 22 | this.eventId = eventId; 23 | } 24 | 25 | public Map getProperties() { 26 | return properties; 27 | } 28 | 29 | public void setProperties(Map properties) { 30 | this.properties = properties; 31 | } 32 | } -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/enjoy_test/IConditionCalculator.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.enjoy_test; 2 | 3 | public interface IConditionCalculator { 4 | void init(String condition); 5 | boolean calc(EventBean eventBean); 6 | } 7 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/groovytest/groovy/Caculator.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.groovy 2 | 3 | class Caculator { 4 | def int add(int a ,int b ){ 5 | return a+b; 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/groovytest/groovy/HelloWorld.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.groovy 2 | 3 | class HelloWorld { 4 | 5 | static void main(String[] args) { 6 | 7 | println("hello groovy") 8 | 9 | // 调用另外一个groovy类 10 | def caculator = new Caculator() 11 | int res = caculator.add(10, 20) 12 | 13 | println("工具调用的结果为: " + res) 14 | 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/groovytest/java/CallGroovy.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.java; 2 | 3 | import cn.doitedu.rtmk.tech_test.groovytest.groovy.Caculator; 4 | 5 | public class CallGroovy { 6 | 7 | public static void main(String[] args) { 8 | 9 | Caculator groovyCaculator = new Caculator(); 10 | 11 | int res = groovyCaculator.add(20, 30); 12 | 13 | System.out.println("调用groovy类add方法的结果: " + res); 14 | 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/groovytest/java/DynamicCallGroovy.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.java; 2 | 3 | import groovy.lang.GroovyClassLoader; 4 | import groovy.lang.GroovyObject; 5 | 6 | import java.sql.*; 7 | 8 | public class DynamicCallGroovy { 9 | public static void main(String[] args) throws SQLException, InstantiationException, IllegalAccessException { 10 | 11 | Connection conn = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk", "root", "root"); 12 | 13 | 14 | Statement statement = conn.createStatement(); 15 | ResultSet resultSet = statement.executeQuery("select groovy_code from t_dynamic_code"); 16 | 17 | resultSet.next(); 18 | String groovyCodeStr = resultSet.getString("groovy_code"); 19 | 20 | // 21 | GroovyClassLoader classLoader = new GroovyClassLoader(); 22 | 23 | // 解析源代码,编译成class 24 | Class groovyClass = classLoader.parseClass(groovyCodeStr); 25 | GroovyObject person = (GroovyObject) groovyClass.newInstance(); 26 | 27 | String param1 = "taoge"; 28 | String result1 = (String) person.invokeMethod("saySomeThing", param1); 29 | 30 | System.out.println("在java中代码中,打印 groovy代码调用后的返回值: " + result1); 31 | 32 | 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/groovytest/java/DynamicCallGroovy2.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.java; 2 | 3 | import groovy.lang.GroovyClassLoader; 4 | 5 | import java.sql.*; 6 | 7 | /** 8 | * @Author: deep as the sea 9 | * @Site: 多易教育 10 | * @QQ: 657270652 11 | * @Date: 2022/8/16 12 | * @Desc: groovy代码动态调用示例2 13 | **/ 14 | public class DynamicCallGroovy2 { 15 | public static void main(String[] args) throws SQLException, InstantiationException, IllegalAccessException { 16 | 17 | Connection conn = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk", "root", "root"); 18 | Statement statement = conn.createStatement(); 19 | ResultSet resultSet = statement.executeQuery("select groovy_code from t_dynamic_code"); 20 | 21 | while(resultSet.next()) { 22 | String groovyCodeStr = resultSet.getString("groovy_code"); 23 | GroovyClassLoader classLoader = new GroovyClassLoader(); 24 | // 解析源代码,编译成class 25 | Class groovyClass = classLoader.parseClass(groovyCodeStr); 26 | 27 | // 对加载好的class,反射对象 28 | Person person = (Person) groovyClass.newInstance(); 29 | 30 | // 调用对象方法 31 | String result = person.saySomeThing("涛哥"); 32 | 33 | System.out.println("在java中代码中,打印 groovy代码调用后的返回值: " + result); 34 | } 35 | 36 | resultSet.close(); 37 | conn.close(); 38 | 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/groovytest/java/Person.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.groovytest.java; 2 | 3 | public interface Person { 4 | 5 | String saySomeThing(String name); 6 | } 7 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/whole_test/pojo/EventCountParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.whole_test.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | import lombok.ToString; 7 | 8 | import java.util.Base64; 9 | import java.util.List; 10 | 11 | 12 | @Data 13 | @ToString 14 | public class EventCountParam { 15 | 16 | private String eventId; 17 | private int count; 18 | private List propertyParams; 19 | private String windowStart; 20 | private String windowEnd; 21 | private String paramId; 22 | 23 | } 24 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/whole_test/pojo/PropertyParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.whole_test.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @NoArgsConstructor 9 | @AllArgsConstructor 10 | public class PropertyParam { 11 | 12 | private String propName; 13 | private String compareType; 14 | private String compareValue; 15 | 16 | } 17 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/whole_test/pojo/RuleInfo.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.whole_test.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | import org.roaringbitmap.RoaringBitmap; 7 | 8 | import java.util.List; 9 | 10 | @Data 11 | @NoArgsConstructor 12 | @AllArgsConstructor 13 | public class RuleInfo { 14 | 15 | // 规则id 16 | private String ruleId; 17 | 18 | // 触发条件 19 | EventCountParam triggerEventCondition; 20 | 21 | 22 | // 人群圈选条件 23 | private List profileCondition; 24 | 25 | // 事件次数条件 26 | private EventCountParam eventCountCondition; 27 | 28 | // 人群圈选bitmap 29 | private RoaringBitmap profileUsersBitmap; 30 | 31 | // 规则条件运算groovy代码 32 | private String ruleCaculatorCode; 33 | 34 | 35 | } 36 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/java/cn/doitedu/rtmk/tech_test/whole_test/publisher/SimpleRulePulishMoni.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.tech_test.whole_test.publisher; 2 | 3 | import cn.doitedu.rtmk.tech_test.whole_test.pojo.EventCountParam; 4 | import cn.doitedu.rtmk.tech_test.whole_test.pojo.PropertyParam; 5 | import cn.doitedu.rtmk.tech_test.whole_test.pojo.RuleInfo; 6 | 7 | import java.util.Arrays; 8 | import java.util.Collections; 9 | 10 | /** 11 | * @Author: deep as the sea 12 | * @Site: 多易教育 13 | * @QQ: 657270652 14 | * @Date: 2022/8/16 15 | * @Desc: 模拟测试规则发布平台的工作流程 16 | **/ 17 | public class SimpleRulePulishMoni { 18 | public static void main(String[] args) { 19 | 20 | /** 21 | * 1. 从前端接收到了营销人员定义的规则的条件、参数信息 22 | */ 23 | RuleInfo ruleInfo = new RuleInfo(); 24 | 25 | // 前端传入的规则名称 26 | ruleInfo.setRuleId("rule_001"); 27 | 28 | 29 | // 前端传入的触发条件 30 | EventCountParam triggerEventCondition = new EventCountParam(); 31 | triggerEventCondition.setEventId("sumitOrder"); 32 | ruleInfo.setTriggerEventCondition(triggerEventCondition); 33 | 34 | // 前端传入的画像条件 35 | PropertyParam tagParam1 = new PropertyParam("tag01", "eq", "C"); 36 | PropertyParam tagParam2 = new PropertyParam("tag03", "lt", "5"); 37 | ruleInfo.setProfileCondition(Arrays.asList(tagParam1,tagParam2)); 38 | 39 | // 前端传入的事件次数条件 40 | EventCountParam eventCountParam = new EventCountParam(); 41 | eventCountParam.setCount(3); 42 | eventCountParam.setEventId("addcart"); 43 | PropertyParam propParam = new PropertyParam("itemId", "eq", "item01"); 44 | eventCountParam.setPropertyParams(Collections.singletonList(propParam)); 45 | eventCountParam.setWindowStart("2022-08-01 00:00:00"); 46 | eventCountParam.setWindowEnd("2022-08-28 12:00:00"); 47 | eventCountParam.setParamId("1"); 48 | ruleInfo.setEventCountCondition(eventCountParam); 49 | 50 | 51 | /** 52 | * 2 . 根据前端传入的规则的画像条件,去es中圈选人群,并生成bitmap,并填充到 ruleInfo对象中 53 | */ 54 | 55 | 56 | 57 | 58 | /** 59 | * 3.找到本规则模板对应的groovy运算模型,填充到 ruleInfo对象中 60 | */ 61 | 62 | 63 | 64 | 65 | /** 66 | * 4. 根据前端传入的规则的事件行为次数受众条件,去doris中查询统计所有用户的结果,并将结果按groovy模板中的数据结构要求,写入redis 67 | */ 68 | 69 | 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /realtime-marketing-engine/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger = INFO, console 20 | 21 | log4j.logger.cn=INFO,console 22 | log4j.logger.org.apache.hadoop = ERROR,console 23 | log4j.additivity.cn=false 24 | 25 | log4j.appender.console=org.apache.log4j.ConsoleAppender 26 | log4j.appender.console.layout = org.apache.log4j.PatternLayout 27 | #log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n 28 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d --> %m %x %n 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /realtime-marketing-manager/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4 | 4.0.0 5 | 6 | org.springframework.boot 7 | spring-boot-starter-parent 8 | 2.7.2 9 | 10 | 11 | cn.doitedu 12 | realtime-marketing-manager 13 | 0.0.1-SNAPSHOT 14 | realtime-marketing-manager 15 | realtime-marketing-manager 16 | 17 | 1.8 18 | 19 | 20 | 21 | org.springframework.boot 22 | spring-boot-starter-web 23 | 24 | 25 | 26 | org.springframework.boot 27 | spring-boot-devtools 28 | runtime 29 | true 30 | 31 | 32 | 33 | org.springframework.boot 34 | spring-boot-configuration-processor 35 | true 36 | 37 | 38 | 39 | org.projectlombok 40 | lombok 41 | true 42 | 43 | 44 | 45 | org.springframework.boot 46 | spring-boot-starter-test 47 | test 48 | 49 | 50 | 51 | 52 | com.alibaba 53 | fastjson 54 | 1.2.83 55 | 56 | 57 | 58 | 59 | org.roaringbitmap 60 | RoaringBitmap 61 | 0.9.31 62 | 63 | 64 | 65 | 66 | 67 | org.elasticsearch.client 68 | elasticsearch-rest-high-level-client 69 | 7.17.5 70 | 71 | 72 | 73 | com.jfinal 74 | enjoy 75 | 5.0.0 76 | 77 | 78 | 79 | mysql 80 | mysql-connector-java 81 | 8.0.29 82 | 83 | 84 | 85 | 86 | redis.clients 87 | jedis 88 | 4.2.3 89 | 90 | 91 | 92 | org.apache.commons 93 | commons-lang3 94 | 95 | 96 | 97 | cn.doitedu 98 | realtime-marketing-common 99 | 1.0-SNAPSHOT 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | org.springframework.boot 109 | spring-boot-maven-plugin 110 | 111 | 112 | 113 | org.projectlombok 114 | lombok 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/RealtimeMarketingManagerApplication.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt; 2 | 3 | import org.springframework.boot.SpringApplication; 4 | import org.springframework.boot.autoconfigure.SpringBootApplication; 5 | 6 | @SpringBootApplication 7 | public class RealtimeMarketingManagerApplication { 8 | 9 | public static void main(String[] args) { 10 | SpringApplication.run(RealtimeMarketingManagerApplication.class, args); 11 | } 12 | 13 | } 14 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/dao/DorisQueryDao.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.dao; 2 | 3 | import cn.doitedu.rtmk.common.pojo.ActionSeqParam; 4 | import org.roaringbitmap.RoaringBitmap; 5 | 6 | import java.sql.SQLException; 7 | 8 | public interface DorisQueryDao { 9 | // 根据给定的sql来查询行为次数 10 | void queryActionCount(String sql, String ruleId, String conditionId, RoaringBitmap profileBitmap) throws SQLException; 11 | 12 | void queryActionSeq(String sql, String ruleId, ActionSeqParam actionSeqParam, RoaringBitmap profileBitmap) throws SQLException; 13 | } 14 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/dao/RuleSystemMetaDao.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.dao; 2 | 3 | import java.sql.SQLException; 4 | import java.sql.Timestamp; 5 | 6 | public interface RuleSystemMetaDao { 7 | String getSqlTemplateByTemplateName(String conditionTemplateName) throws SQLException; 8 | 9 | String queryGroovyTemplateByModelId(int ruleModelId) throws SQLException; 10 | 11 | boolean insertRuleInfo(String rule_id, 12 | int rule_model_id, 13 | String creator_name, 14 | int rule_status, 15 | Timestamp create_time, 16 | Timestamp update_time, 17 | byte[] bitmapBytes, 18 | String ruleDefineParamsJson, 19 | String ruleModelCaculatorGroovyCode) throws SQLException; 20 | } 21 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/dao/RuleSystemMetaDaoImpl.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.dao; 2 | 3 | import org.springframework.stereotype.Repository; 4 | 5 | import java.sql.*; 6 | 7 | @Repository 8 | public class RuleSystemMetaDaoImpl implements RuleSystemMetaDao { 9 | 10 | Connection conn; 11 | public RuleSystemMetaDaoImpl() throws SQLException { 12 | conn = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk?useUnicode=true&characterEncoding=utf8", "root", "root"); 13 | } 14 | 15 | @Override 16 | public String getSqlTemplateByTemplateName(String conditionTemplateName) throws SQLException { 17 | PreparedStatement preparedStatement = conn.prepareStatement("select template_sql from condition_doris_sql_template where template_name = ?"); 18 | preparedStatement.setString(1,conditionTemplateName); 19 | 20 | ResultSet resultSet = preparedStatement.executeQuery(); 21 | String template_sql = null; 22 | while(resultSet.next()){ 23 | template_sql = resultSet.getString("template_sql"); 24 | } 25 | 26 | return template_sql; 27 | } 28 | 29 | @Override 30 | public String queryGroovyTemplateByModelId(int ruleModelId) throws SQLException { 31 | PreparedStatement preparedStatement = conn.prepareStatement("select caculator_groovy_template from rulemodel_calculator_templates where rule_model_id = ? and status=1"); 32 | preparedStatement.setInt(1,ruleModelId); 33 | 34 | ResultSet resultSet = preparedStatement.executeQuery(); 35 | String groovyTemplate = null; 36 | while(resultSet.next()){ 37 | groovyTemplate = resultSet.getString("caculator_groovy_template"); 38 | } 39 | 40 | return groovyTemplate; 41 | } 42 | 43 | 44 | /** 45 | * CREATE TABLE `rule_instance_definition` ( 46 | * `id` int(11) NOT NULL AUTO_INCREMENT, 47 | * `rule_id` int(11) DEFAULT NULL, 48 | * `rule_model_id` int(11) DEFAULT NULL, 49 | * `rule_profile_user_bitmap` binary(255) DEFAULT NULL, 50 | * `caculator_groovy_code` text, 51 | * `creator_name` varchar(255) DEFAULT NULL, 52 | * `rule_status` int(11) DEFAULT NULL, 53 | * `create_time` datetime DEFAULT NULL, 54 | * `update_time` datetime DEFAULT NULL, 55 | * PRIMARY KEY (`id`) 56 | * ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 57 | * @param bitmapBytes 58 | * @param ruleDefineParamsJson 59 | * @param ruleModelCaculatorGroovyCode 60 | */ 61 | @Override 62 | public boolean insertRuleInfo(String rule_id, 63 | int rule_model_id, 64 | String creator_name, 65 | int rule_status, 66 | Timestamp create_time, 67 | Timestamp update_time, 68 | byte[] bitmapBytes, 69 | String ruleDefineParamsJson, 70 | String ruleModelCaculatorGroovyCode) throws SQLException { 71 | 72 | PreparedStatement preparedStatement = conn.prepareStatement( 73 | "insert into rule_instance_definition (rule_id,rule_model_id,rule_profile_user_bitmap,caculator_groovy_code,rule_param_json,creator_name,rule_status,create_time,update_time) values (?,?,?,?,?,?,?,?,?)"); 74 | 75 | preparedStatement.setString(1,rule_id); 76 | preparedStatement.setInt(2,rule_model_id); 77 | preparedStatement.setBytes(3,bitmapBytes); 78 | preparedStatement.setString(4,ruleModelCaculatorGroovyCode); 79 | preparedStatement.setString(5,ruleDefineParamsJson); 80 | preparedStatement.setString(6,creator_name); 81 | preparedStatement.setInt(7,rule_status); 82 | preparedStatement.setTimestamp(8,create_time); 83 | preparedStatement.setTimestamp(9,update_time); 84 | 85 | 86 | boolean execute = preparedStatement.execute(); 87 | return execute; 88 | } 89 | 90 | 91 | } 92 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/pojo/ActionAttributeParam.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.pojo; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @NoArgsConstructor 9 | @AllArgsConstructor 10 | public class ActionAttributeParam { 11 | 12 | private String attributeName; 13 | private String compareType; 14 | private Object compareValue; 15 | } 16 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/service/ActionConditionQueryService.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.service; 2 | 3 | import cn.doitedu.rtmk.common.pojo.ActionSeqParam; 4 | import com.alibaba.fastjson.JSONObject; 5 | import org.roaringbitmap.RoaringBitmap; 6 | 7 | import java.sql.SQLException; 8 | 9 | public interface ActionConditionQueryService { 10 | void processActionCountCondition(JSONObject eventParamJsonObject, String ruleId, RoaringBitmap profileBitmap) throws SQLException; 11 | 12 | void processActionSeqCondition(ActionSeqParam actionSeqParam, String ruleId, RoaringBitmap profileBitmap) throws SQLException; 13 | } 14 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/service/ProfileConditionQueryService.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.service; 2 | 3 | import com.alibaba.fastjson.JSONArray; 4 | import org.roaringbitmap.RoaringBitmap; 5 | 6 | import java.io.IOException; 7 | 8 | public interface ProfileConditionQueryService { 9 | // 接口文档: 10 | // [{"tagId":"tg01","compareType":"eq","compareValue":"3"},{"tagId":"tg04","compareType":"match","compareValue":"运动"}] 11 | RoaringBitmap queryProfileUsers(JSONArray jsonArray) throws IOException; 12 | } 13 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/service/RuleSystemMetaService.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.service; 2 | 3 | import org.roaringbitmap.RoaringBitmap; 4 | 5 | import java.io.IOException; 6 | import java.sql.SQLException; 7 | 8 | public interface RuleSystemMetaService { 9 | String findRuleModelGroovyTemplate(int ruleModelId) throws SQLException; 10 | 11 | void publishRuleInstance( 12 | String rule_id, 13 | int rule_model_id, 14 | String creator_name, 15 | int rule_status, 16 | RoaringBitmap profileUserBitmap, 17 | String ruleDefineParamsJson, 18 | String ruleModelCaculatorGroovyCode 19 | ) throws IOException, SQLException; 20 | } 21 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/java/cn/doitedu/rulemgmt/service/RuleSystemMetaServiceImpl.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt.service; 2 | 3 | import cn.doitedu.rulemgmt.dao.RuleSystemMetaDao; 4 | import org.roaringbitmap.RoaringBitmap; 5 | import org.springframework.beans.factory.annotation.Autowired; 6 | import org.springframework.stereotype.Service; 7 | 8 | import java.io.ByteArrayOutputStream; 9 | import java.io.DataOutputStream; 10 | import java.io.IOException; 11 | import java.sql.SQLException; 12 | import java.sql.Timestamp; 13 | 14 | @Service 15 | public class RuleSystemMetaServiceImpl implements RuleSystemMetaService { 16 | RuleSystemMetaDao ruleSystemMetaDao; 17 | 18 | @Autowired 19 | public RuleSystemMetaServiceImpl(RuleSystemMetaDao ruleSystemMetaDao){ 20 | this.ruleSystemMetaDao = ruleSystemMetaDao; 21 | } 22 | 23 | /** 24 | * 根据规则模型的id,查询规则模型的运算代码模板 25 | * @param ruleModelId 26 | * @return 27 | * @throws SQLException 28 | */ 29 | @Override 30 | public String findRuleModelGroovyTemplate(int ruleModelId) throws SQLException { 31 | 32 | String template = ruleSystemMetaDao.queryGroovyTemplateByModelId(ruleModelId); 33 | 34 | return template; 35 | } 36 | 37 | 38 | /** 39 | * 发布新规则到元数据库中 40 | */ 41 | @Override 42 | public void publishRuleInstance( 43 | String rule_id, 44 | int rule_model_id, 45 | String creator_name, 46 | int rule_status, 47 | RoaringBitmap profileUserBitmap, 48 | String ruleDefineParamsJson, 49 | String ruleModelCaculatorGroovyCode 50 | ) throws IOException, SQLException { 51 | 52 | byte[] bitmapBytes = null; 53 | if(profileUserBitmap !=null ){ 54 | ByteArrayOutputStream bao = new ByteArrayOutputStream(); 55 | DataOutputStream dao = new DataOutputStream(bao); 56 | profileUserBitmap.serialize(dao); 57 | bitmapBytes = bao.toByteArray(); 58 | } 59 | 60 | Timestamp timestamp = new Timestamp(System.currentTimeMillis()); 61 | 62 | 63 | ruleSystemMetaDao.insertRuleInfo(rule_id,rule_model_id,creator_name,rule_status,timestamp,timestamp,bitmapBytes, 64 | ruleDefineParamsJson,ruleModelCaculatorGroovyCode); 65 | 66 | } 67 | 68 | 69 | } 70 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/main/resources/application.properties: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/test/java/cn/doitedu/rulemgmt/EnjoyHelloWorld.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt; 2 | 3 | import cn.doitedu.rulemgmt.pojo.ActionAttributeParam; 4 | import com.jfinal.kit.Kv; 5 | import com.jfinal.template.Engine; 6 | import com.jfinal.template.Template; 7 | 8 | import java.util.Arrays; 9 | import java.util.HashMap; 10 | 11 | public class EnjoyHelloWorld { 12 | 13 | public static void main(String[] args) { 14 | 15 | 16 | // demo示例1 17 | /*Template template = Engine.use().getTemplateByString("i am #(name)"); 18 | Kv data = Kv.by("name", "taoge"); 19 | String s = template.renderToString(data); 20 | System.out.println(s);*/ 21 | 22 | 23 | // demo示例2 24 | /*Template template = Engine.use().getTemplateByString("i am #if(name.equals(\"taoge\")) #(name) 本尊 #else #(name) 小可爱 #end"); 25 | Kv data = Kv.by("name", "幺妹"); 26 | String s = template.renderToString(data); 27 | System.out.println(s);*/ 28 | 29 | // demo示例3 30 | /*String templateStr = "我的学生有:\n" + 31 | "#for(stu: students)\n" + 32 | " 学生#(for.count):#(stu)\n" + 33 | "#end "; 34 | Template template = Engine.use().getTemplateByString(templateStr); 35 | Kv data = Kv.by("students", Arrays.asList("阳哥","宇妹","飞哥","幺妹","敏妹")); 36 | String s = template.renderToString(data); 37 | System.out.println(s);*/ 38 | /** 39 | * 我的学生有: 40 | * 学生1:阳哥 41 | * 学生2:宇妹 42 | * 学生3:飞哥 43 | * 学生4:幺妹 44 | * 学生5:敏妹 45 | */ 46 | 47 | // demo 示例4 48 | String sqlTemplateStr = "SELECT\n" + 49 | " guid,\n" + 50 | " count(1) as cnt\n" + 51 | "FROM mall_app_events_detail\n" + 52 | "WHERE 1=1 \n" + 53 | "#if( windowStart != null )\n" + 54 | "AND event_time>='#(windowStart)' \n" + 55 | "#end\n" + 56 | "#if( windowEnd != null )\n" + 57 | "AND event_time<='#(windowEnd)'\n" + 58 | "#end\n" + 59 | "#if(eventId != null)\n" + 60 | "AND event_id = '#(eventId)'\n" + 61 | "#end\n" + 62 | "#for(attrParam: attrParamList)\n" + 63 | "AND get_json_string(propJson,'$.#(attrParam.attributeName)') #(attrParam.compareType) '#(attrParam.compareValue)'\n" + 64 | "#end\n" + 65 | "GROUP BY guid"; 66 | 67 | ActionAttributeParam p1 = new ActionAttributeParam("pageId", "=", "page002"); 68 | ActionAttributeParam p2 = new ActionAttributeParam("itemId", "=", "item002"); 69 | ActionAttributeParam p3 = new ActionAttributeParam("attr2", ">", 3); 70 | 71 | HashMap data2 = new HashMap<>(); 72 | data2.put("eventId","e1"); 73 | data2.put("windowStart","2022-08-01 10:00:00"); 74 | data2.put("windowEnd","2022-08-31 12:00:00"); 75 | data2.put("attrParamList",Arrays.asList(p1)); 76 | 77 | Template template2 = Engine.use().getTemplateByString(sqlTemplateStr); 78 | String sql = template2.renderToString(data2); 79 | 80 | System.out.println(sql); 81 | 82 | 83 | } 84 | 85 | } 86 | -------------------------------------------------------------------------------- /realtime-marketing-manager/src/test/java/cn/doitedu/rulemgmt/SeqMatchTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rulemgmt; 2 | 3 | import java.util.Arrays; 4 | import java.util.List; 5 | 6 | public class SeqMatchTest { 7 | 8 | public static void main(String[] args) { 9 | 10 | 11 | String[] split = {"a","a","b","a","c","c","b","a","b"}; 12 | List eventParams = Arrays.asList("a", "b", "c"); 13 | 14 | 15 | // 条件序列中的比较位置 16 | int k = 0; 17 | int matchCount = 0; 18 | 19 | // 遍历查询出来的行为序列 20 | for (int i = 0; i < split.length; i++) { 21 | if (split[i].split("_")[0].equals(eventParams.get(k))) { 22 | k++; 23 | if (k == eventParams.size()) { 24 | k = 0; 25 | matchCount++; 26 | } 27 | } 28 | } 29 | 30 | System.out.println("matchCount: " +matchCount); 31 | System.out.println("k : " + k); 32 | 33 | 34 | } 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /realtime-marketing-manager/说明文档/测试说明.md: -------------------------------------------------------------------------------- 1 | # 测试用的doris中行为明细建表 2 | ```sql 3 | DROP TABLE IF EXISTS test.mall_app_events_detail; 4 | CREATE TABLE IF NOT EXISTS test.mall_app_events_detail 5 | ( 6 | `guid` INT NOT NULL COMMENT "用户id", 7 | `event_id` VARCHAR(20) NOT NULL , 8 | `event_time` DATETIME NOT NULL , 9 | `propJson` VARCHAR(100) NOT NULL 10 | ) 11 | DUPLICATE KEY(`guid`, `event_id`, `event_time`) 12 | DISTRIBUTED BY HASH(`guid`) BUCKETS 1 13 | PROPERTIES ( 14 | "replication_num" = "1", 15 | "replication_allocation" = "tag.location.default: 1" 16 | ); 17 | ``` 18 | 19 | # 测试用的doris中行为明细表导入测试数据 20 | 21 | ## 数据,放到一个文本文件中 /root/rtmk_events.txt 22 | ```text 23 | 1|e1|2022-08-01 14:30:35|{"pageId":"page001"} 24 | 1|e1|2022-08-01 14:31:35|{"pageId":"page002","itemId":"item001"} 25 | 1|e2|2022-08-01 14:32:35|{"pageId":"page001","itemId":"item002"} 26 | 1|e3|2022-08-02 15:33:35|{"pageId":"page002","itemId":"item003"} 27 | 1|e2|2022-08-02 15:30:35|{"pageId":"page001","itemId":"item003"} 28 | 2|e1|2022-08-01 14:30:35|{"pageId":"page002"} 29 | 2|e1|2022-08-01 14:31:35|{"pageId":"page002","itemId":"item001"} 30 | 2|e2|2022-08-01 14:32:35|{"pageId":"page001","itemId":"item002"} 31 | 2|e3|2022-08-02 15:33:35|{"pageId":"page002","itemId":"item002"} 32 | 2|e2|2022-08-02 15:30:35|{"pageId":"page001","itemId":"item002"} 33 | 3|e1|2022-08-01 14:30:35|{"pageId":"page001"} 34 | 3|e3|2022-08-01 14:31:35|{"pageId":"page001","itemId":"item001"} 35 | 3|e2|2022-08-01 14:32:35|{"pageId":"page002","itemId":"item003"} 36 | 3|e2|2022-08-02 15:33:35|{"pageId":"page002","itemId":"item002"} 37 | 3|e2|2022-08-02 15:30:35|{"pageId":"page002","itemId":"item001"} 38 | 3|e3|2022-08-02 15:36:35|{"pageId":"page002","itemId":"item002"} 39 | 4|e1|2022-08-01 14:30:35|{"pageId":"page001"} 40 | 4|e2|2022-08-01 14:31:35|{"pageId":"page001","itemId":"item001"} 41 | 4|e2|2022-08-01 14:32:35|{"pageId":"page002","itemId":"item003"} 42 | 4|e1|2022-08-02 15:33:35|{"pageId":"page002","itemId":"item002"} 43 | 4|e2|2022-08-02 15:30:35|{"pageId":"page002","itemId":"item001"} 44 | 4|e3|2022-08-02 15:30:35|{"pageId":"page001","itemId":"item002"} 45 | 4|e3|2022-08-02 15:30:35|{"pageId":"page001","itemId":"item002"} 46 | 5|e1|2022-08-01 14:30:35|{"pageId":"page001"} 47 | 5|e2|2022-08-01 14:31:35|{"pageId":"page001","itemId":"item001"} 48 | 5|e2|2022-08-01 14:32:35|{"pageId":"page002","itemId":"item003"} 49 | 5|e1|2022-08-02 15:33:35|{"pageId":"page002","itemId":"item002"} 50 | 5|e2|2022-08-02 15:30:35|{"pageId":"page002","itemId":"item002"} 51 | 5|e3|2022-08-02 15:30:35|{"pageId":"page001","itemId":"item002"} 52 | 5|e3|2022-08-02 15:30:35|{"pageId":"page001","itemId":"item002"} 53 | ``` 54 | 55 | ## 导入命令 56 | ```shell 57 | curl --location-trusted -u root \ 58 | -H "label:rtmk_events_20220817_2" \ 59 | -H "column_separator:|" \ 60 | -T /root/rtmk_events.txt \ 61 | http://doitedu:8030/api/test/mall_app_events_detail/_stream_load 62 | 63 | ``` 64 | 65 | # 测试用的mysql中的条件查询sql模板表建表及导入数据 66 | ```sql 67 | -- ---------------------------- 68 | -- Table structure for condition_doris_sql_template 69 | -- ---------------------------- 70 | DROP TABLE IF EXISTS `condition_doris_sql_template`; 71 | CREATE TABLE `condition_doris_sql_template` ( 72 | `template_name` varchar(50) NOT NULL, 73 | `template_sql` varchar(10240) DEFAULT NULL, 74 | PRIMARY KEY (`template_name`) 75 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 76 | 77 | -- ---------------------------- 78 | -- Records of condition_doris_sql_template 79 | -- ---------------------------- 80 | INSERT INTO `condition_doris_sql_template` VALUES ('action_count', 'SELECT\n guid,\n count(1) as cnt\nFROM mall_app_events_detail\nWHERE 1=1 \n#if( windowStart != null )\nAND event_time>=\'#(windowStart)\' \n#end\n#if( windowEnd != null )\nAND event_time<=\'#(windowEnd)\'\n#end\n#if(eventId != null)\nAND event_id = \'#(eventId)\'\n#end\n#for(attrParam: attrParamList)\nAND get_json_string(propJson,\'$.#(attrParam.attributeName)\') #(attrParam.compareType) \'#(attrParam.compareValue)\'\n#end\nGROUP BY guid'); 81 | 82 | ``` 83 | ## 导入数据 84 | -------------------------------------------------------------------------------- /realtime-marketing-manager/说明文档/规则_模型_1的参数结构.json: -------------------------------------------------------------------------------- 1 | { 2 | "ruleModelId": 1, 3 | "ruleId": "rule001", 4 | "//": "**规则画像条件参数", 5 | "profileCondition": [ 6 | { 7 | "tagId": "tg01", 8 | "compareType": "eq", 9 | "compareValue": "3" 10 | }, 11 | { 12 | "tagId": "tg04", 13 | "compareType": "match", 14 | "compareValue": "运动" 15 | } 16 | ], 17 | "//": "**规则行为次数条件参数", 18 | "actionCountCondition": { 19 | "eventParams": [ 20 | { 21 | "eventId": "e1", 22 | "attributeParams": [ 23 | { 24 | "attributeName": "pageId", 25 | "compareType": "=", 26 | "compareValue": "page001" 27 | } 28 | ], 29 | "windowStart": "2022-08-01 12:00:00", 30 | "windowEnd": "2022-08-30 12:00:00", 31 | "eventCount": 3, 32 | "conditionId": 1, 33 | "dorisQueryTemplate": "action_count" 34 | }, 35 | { 36 | "eventId": "e2", 37 | "attributeParams": [ 38 | { 39 | "attributeName": "itemId", 40 | "compareType": "=", 41 | "compareValue": "item002" 42 | }, 43 | { 44 | "attributeName": "pageId", 45 | "compareType": "=", 46 | "compareValue": "page001" 47 | } 48 | ], 49 | "windowStart": "2022-08-01 12:00:00", 50 | "windowEnd": "2022-08-30 12:00:00", 51 | "eventCount": 1, 52 | "conditionId": 2, 53 | "dorisQueryTemplate": "action_count" 54 | }, 55 | { 56 | "eventId": "e3", 57 | "attributeParams": [ 58 | { 59 | "attributeName": "pageId", 60 | "compareType": "=", 61 | "compareValue": "page002" 62 | } 63 | ], 64 | "windowStart": "2022-08-01 12:00:00", 65 | "windowEnd": "2022-08-30 12:00:00", 66 | "eventCount": 1, 67 | "conditionId": 3, 68 | "dorisQueryTemplate": "action_count" 69 | } 70 | ], 71 | "combineExpr": " res0 && (res1 || res2) " 72 | } 73 | } -------------------------------------------------------------------------------- /rule_model_resources/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | doe-data 7 | cn.doitedu 8 | 1.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | rule_model_resources 13 | 14 | 15 | 8 16 | 8 17 | 18 | 19 | 20 | 21 | org.codehaus.groovy 22 | groovy-all 23 | 3.0.12 24 | pom 25 | 26 | 27 | 28 | 29 | redis.clients 30 | jedis 31 | 4.2.3 32 | 33 | 34 | 35 | cn.doitedu 36 | realtime-marketing-common 37 | 1.0-SNAPSHOT 38 | 39 | 40 | 41 | com.jfinal 42 | enjoy 43 | 5.0.0 44 | 45 | 46 | 47 | 48 | org.roaringbitmap 49 | RoaringBitmap 50 | 0.9.31 51 | 52 | 53 | 54 | cn.doitedu 55 | realtime-marketing-common 56 | 1.0-SNAPSHOT 57 | 58 | 59 | 60 | org.apache.flink 61 | flink-core 62 | 1.14.4 63 | provided 64 | 65 | 66 | 67 | org.apache.flink 68 | flink-streaming-java_2.12 69 | 1.14.4 70 | provided 71 | 72 | 73 | 74 | 75 | org.apache.commons 76 | commons-lang3 77 | 3.12.0 78 | 79 | 80 | 81 | 82 | org.slf4j 83 | slf4j-log4j12 84 | 1.7.30 85 | 86 | 87 | 88 | log4j 89 | log4j 90 | 1.2.16 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | -------------------------------------------------------------------------------- /rule_model_resources/src/main/java/cn/doitedu/rtmk/rulemodel/caculator/groovy/TestSplit.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.rulemodel.caculator.groovy 2 | 3 | class TestSplit { 4 | static void main(String[] args) { 5 | 6 | println("m3-r01:od006".split(":")[1]) 7 | 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /rule_model_resources/src/main/java/cn/doitedu/rtmk/rulemodel/template_test/Test_EventSeqQueryTemplate.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.rulemodel.template_test; 2 | 3 | import cn.doitedu.rtmk.common.pojo.ActionSeqParam; 4 | import cn.doitedu.rtmk.common.pojo.AttributeParam; 5 | import cn.doitedu.rtmk.common.pojo.EventParam; 6 | import com.jfinal.template.Engine; 7 | import com.jfinal.template.Template; 8 | 9 | import java.util.Arrays; 10 | import java.util.HashMap; 11 | 12 | public class Test_EventSeqQueryTemplate { 13 | 14 | public static void main(String[] args) { 15 | 16 | 17 | Template template = Engine.use().getTemplate("D:\\IdeaProjects\\doe-data\\rule_model_resources\\templates\\doirs_sql\\action_seq_condition_query.sql.enjoy"); 18 | 19 | AttributeParam p11 = new AttributeParam("pageId", "=", "page001"); 20 | AttributeParam p12 = new AttributeParam("itemId", "=", "item003"); 21 | EventParam e1 = new EventParam("e1", Arrays.asList(p11, p12)); 22 | 23 | 24 | AttributeParam p21 = new AttributeParam("pageId", "=", "page002"); 25 | AttributeParam p22 = new AttributeParam("itemId", "=", "item003"); 26 | EventParam e2 = new EventParam("e2", Arrays.asList(p21, p22)); 27 | 28 | 29 | AttributeParam p31 = new AttributeParam("itemId", "=", "item005"); 30 | EventParam e3 = new EventParam("e3", Arrays.asList(p31)); 31 | 32 | ActionSeqParam seqParam = new ActionSeqParam("2022-08-01 00:00:00", "2022-08-31:00:00:00", 3, "xxx", 2, Arrays.asList(e1, e2, e3)); 33 | 34 | HashMap data = new HashMap<>(); 35 | data.put("windowStart",seqParam.getWindowStart()); 36 | data.put("windowEnd",seqParam.getWindowEnd()); 37 | data.put("eventParams",seqParam.getEventParams()); 38 | 39 | System.out.println(template.renderToString(data)); 40 | 41 | 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /rule_model_resources/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger = INFO, console 20 | 21 | log4j.logger.cn=INFO,console 22 | log4j.logger.org.apache.hadoop = ERROR,console 23 | log4j.additivity.cn=false 24 | 25 | log4j.appender.console=org.apache.log4j.ConsoleAppender 26 | log4j.appender.console.layout = org.apache.log4j.PatternLayout 27 | #log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n 28 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d --> %m %x %n 29 | 30 | 31 | 32 | -------------------------------------------------------------------------------- /rule_model_resources/src/test/java/template/test/EventCountConditionCalculatorTest.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.rulemodel.caculator.groovy 2 | 3 | import cn.doitedu.rtmk.common.interfaces.RuleCalculator 4 | import cn.doitedu.rtmk.common.pojo.UserEvent 5 | import com.alibaba.fastjson.JSONArray 6 | import com.alibaba.fastjson.JSONObject 7 | import redis.clients.jedis.Jedis 8 | 9 | class EventCountCalculatorGroovy implements RuleCalculator { 10 | 11 | private Jedis jedis; 12 | private JSONObject ruleDefineParamJsonObject; 13 | private JSONObject eventCountConditionParam; 14 | private String ruleId; 15 | 16 | @Override 17 | public void init(Jedis jedis, JSONObject ruleDefineParamJsonObject) { 18 | this.jedis = jedis; 19 | this.ruleDefineParamJsonObject = ruleDefineParamJsonObject; 20 | 21 | ruleId = ruleDefineParamJsonObject.getString("ruleId"); 22 | 23 | this.eventCountConditionParam = ruleDefineParamJsonObject.getJSONObject("actionCountCondition"); 24 | 25 | } 26 | 27 | @Override 28 | public void calc(UserEvent userEvent) { 29 | 30 | JSONArray eventParams = eventCountConditionParam.getJSONArray("eventParams"); 31 | int size = eventParams.size(); 32 | 33 | for (int i = 0; i < size; i++) { 34 | 35 | JSONObject eventParam = eventParams.getJSONObject(i); 36 | 37 | Integer conditionId = eventParam.getInteger("conditionId"); 38 | 39 | if (userEvent.getEventId().equals(eventParam.getString("eventId"))) { 40 | 41 | JSONArray attributeParams = eventParam.getJSONArray("attributeParams"); 42 | boolean b = judgeEventAttribute(userEvent, attributeParams); 43 | 44 | if (b) { 45 | jedis.hincrBy(ruleId + ":" + conditionId, userEvent.getGuid() + "", 1); 46 | } 47 | } 48 | } 49 | 50 | 51 | } 52 | 53 | 54 | @Override 55 | public boolean isMatch(int guid) { 56 | JSONArray eventParams = eventCountConditionParam.getJSONArray("eventParams"); 57 | 58 | 59 | JSONObject eventParam_0 = eventParams.getJSONObject(0); 60 | Integer conditionId_0 = eventParam_0.getInteger("conditionId"); 61 | 62 | Integer eventCountParam_0 = eventParam_0.getInteger("eventCount"); 63 | 64 | String realCountStr_0 = jedis.hget(ruleId + ":" + conditionId_0, guid + ""); 65 | int realCount_0 = Integer.parseInt(realCountStr_0 == null ? "0" : realCountStr_0); 66 | 67 | boolean res_0 = realCount_0 >= eventCountParam_0 ; 68 | 69 | JSONObject eventParam_1 = eventParams.getJSONObject(1); 70 | Integer conditionId_1 = eventParam_1.getInteger("conditionId"); 71 | 72 | Integer eventCountParam_1 = eventParam_1.getInteger("eventCount"); 73 | 74 | String realCountStr_1 = jedis.hget(ruleId + ":" + conditionId_1, guid + ""); 75 | int realCount_1 = Integer.parseInt(realCountStr_1 == null ? "0" : realCountStr_1); 76 | 77 | boolean res_1 = realCount_1 >= eventCountParam_1 ; 78 | 79 | JSONObject eventParam_2 = eventParams.getJSONObject(2); 80 | Integer conditionId_2 = eventParam_2.getInteger("conditionId"); 81 | 82 | Integer eventCountParam_2 = eventParam_2.getInteger("eventCount"); 83 | 84 | String realCountStr_2 = jedis.hget(ruleId + ":" + conditionId_2, guid + ""); 85 | int realCount_2 = Integer.parseInt(realCountStr_2 == null ? "0" : realCountStr_2); 86 | 87 | boolean res_2 = realCount_2 >= eventCountParam_2 ; 88 | 89 | 90 | 91 | 92 | return res_0 && (res_1 || res_2) ; 93 | } 94 | 95 | private boolean judgeEventAttribute(UserEvent userEvent, JSONArray attributeParams) { 96 | for (int j = 0; j < attributeParams.size(); j++) { 97 | JSONObject attributeParam = attributeParams.getJSONObject(j); 98 | 99 | String paramAttributeName = attributeParam.getString("attributeName"); 100 | String paramCompareType = attributeParam.getString("compareType"); 101 | String paramValue = attributeParam.getString("compareValue"); 102 | 103 | String eventAttributeValue = userEvent.getProperties().get(paramAttributeName); 104 | 105 | if ("=" == paramCompareType && !(paramValue == eventAttributeValue)) { 106 | return false; 107 | } 108 | 109 | if (">" == paramCompareType && !(paramValue > eventAttributeValue)) { 110 | return false; 111 | } 112 | 113 | if ("<" == paramCompareType && !(paramValue < eventAttributeValue)) { 114 | return false; 115 | } 116 | 117 | if ("<=" == paramCompareType && !(paramValue <= eventAttributeValue)) { 118 | return false; 119 | } 120 | 121 | if (">=" == paramCompareType && !(paramValue >= eventAttributeValue)) { 122 | return false; 123 | } 124 | 125 | } 126 | return true; 127 | } 128 | 129 | 130 | } -------------------------------------------------------------------------------- /rule_model_resources/templates/doirs_sql/action_seq_condition_query.sql: -------------------------------------------------------------------------------- 1 | select 2 | guid, 3 | group_concat(concat_ws('_',event_id,event_time)) 4 | from mall_app_events_detail 5 | where event_time >= '2022-08-01 12:00:00' 6 | and event_time <= '2022-08-30 12:00:00' 7 | and ( 8 | (event_id = 'e1' and get_json_string(propJson,'$.pageId')='page001') 9 | OR 10 | (event_id = 'e2' and get_json_string(propJson,'$.itemId')='item002') 11 | OR 12 | (event_id = 'e3' and get_json_string(propJson,'$.itemId')='item003') 13 | ) 14 | GROUP BY guid -------------------------------------------------------------------------------- /rule_model_resources/templates/doirs_sql/action_seq_condition_query.sql.enjoy: -------------------------------------------------------------------------------- 1 | select 2 | guid, 3 | group_concat(concat_ws('_',event_id,event_time),'^') 4 | from mall_app_events_detail 5 | where event_time >= '#(windowStart)' 6 | and event_time <= '#(windowEnd)' 7 | and 8 | ( 9 | #for(eventParam: eventParams) 10 | (event_id = '#(eventParam.eventId)' #for(attrParam: eventParam.attributeParams) and get_json_string(propJson,'$.#(attrParam.attributeName)') #(attrParam.compareType) '#(attrParam.compareValue)' #end) 11 | #if(for.last) #else OR #end 12 | #end 13 | ) 14 | GROUP BY guid -------------------------------------------------------------------------------- /rule_model_resources/templates/doirs_sql/event_count_condition_query.sql.enjoy: -------------------------------------------------------------------------------- 1 | SELECT 2 | guid, 3 | count(1) as cnt 4 | FROM mall_app_events_detail 5 | WHERE 1=1 6 | #if( windowStart != null ) 7 | AND event_time>='#(windowStart)' 8 | #end 9 | #if( windowEnd != null ) 10 | AND event_time<='#(windowEnd)' 11 | #end 12 | #if(eventId != null) 13 | AND event_id = '#(eventId)' 14 | #end 15 | #for(attrParam: attrParamList) 16 | AND get_json_string(propJson,'$.#(attrParam.attributeName)') #(attrParam.compareType) '#(attrParam.compareValue)' 17 | #end 18 | GROUP BY guid -------------------------------------------------------------------------------- /rule_model_resources/templates/rule_calculator/rulemodel_01_caculator_old.template: -------------------------------------------------------------------------------- 1 | package cn.doitedu.rtmk.rulemodel.caculator.groovy 2 | 3 | import cn.doitedu.rtmk.common.interfaces.RuleCalculator 4 | import cn.doitedu.rtmk.common.pojo.UserEvent 5 | import com.alibaba.fastjson.JSONArray 6 | import com.alibaba.fastjson.JSONObject 7 | import redis.clients.jedis.Jedis 8 | 9 | class EventCountConditionCalculatorGroovy implements RuleConditionCalculator { 10 | 11 | private Jedis jedis; 12 | private JSONObject ruleDefineParamJsonObject; 13 | private JSONObject eventCountConditionParam; 14 | private String ruleId; 15 | 16 | @Override 17 | public void init(Jedis jedis, JSONObject ruleDefineParamJsonObject) { 18 | this.jedis = jedis; 19 | this.ruleDefineParamJsonObject = ruleDefineParamJsonObject; 20 | 21 | ruleId = ruleDefineParamJsonObject.getString("ruleId"); 22 | 23 | this.eventCountConditionParam = ruleDefineParamJsonObject.getJSONObject("actionCountCondition"); 24 | 25 | } 26 | 27 | @Override 28 | public void calc(UserEvent userEvent) { 29 | 30 | JSONArray eventParams = eventCountConditionParam.getJSONArray("eventParams"); 31 | int size = eventParams.size(); 32 | 33 | for (int i = 0; i < size; i++) { 34 | 35 | JSONObject eventParam = eventParams.getJSONObject(i); 36 | 37 | Integer conditionId = eventParam.getInteger("conditionId"); 38 | 39 | if (userEvent.getEventId().equals(eventParam.getString("eventId"))) { 40 | 41 | JSONArray attributeParams = eventParam.getJSONArray("attributeParams"); 42 | boolean b = judgeEventAttribute(userEvent, attributeParams); 43 | 44 | if (b) { 45 | jedis.hincrBy(ruleId + ":" + conditionId, userEvent.getGuid() + "", 1); 46 | } 47 | } 48 | } 49 | 50 | 51 | } 52 | 53 | 54 | @Override 55 | public boolean isMatch(int guid) { 56 | JSONArray eventParams = eventCountConditionParam.getJSONArray("eventParams"); 57 | 58 | 59 | #for(eventParam : eventParams) 60 | JSONObject eventParam_#(for.index) = eventParams.getJSONObject(#(for.index)); 61 | Integer conditionId_#(for.index) = eventParam_#(for.index).getInteger("conditionId"); 62 | 63 | Integer eventCountParam_#(for.index) = eventParam_#(for.index).getInteger("eventCount"); 64 | 65 | String realCountStr_#(for.index) = jedis.hget(ruleId + ":" + conditionId_#(for.index), guid + ""); 66 | int realCount_#(for.index) = Integer.parseInt(realCountStr_#(for.index) == null ? "0" : realCountStr_#(for.index)); 67 | 68 | boolean res_#(for.index) = realCount_#(for.index) >= eventCountParam_#(for.index) ; 69 | 70 | #end 71 | 72 | 73 | 74 | return #(combineExpr); 75 | } 76 | 77 | private boolean judgeEventAttribute(UserEvent userEvent, JSONArray attributeParams) { 78 | for (int j = 0; j < attributeParams.size(); j++) { 79 | JSONObject attributeParam = attributeParams.getJSONObject(j); 80 | 81 | String paramAttributeName = attributeParam.getString("attributeName"); 82 | String paramCompareType = attributeParam.getString("compareType"); 83 | String paramValue = attributeParam.getString("compareValue"); 84 | 85 | String eventAttributeValue = userEvent.getProperties().get(paramAttributeName); 86 | 87 | if ("=" == paramCompareType && !(paramValue == eventAttributeValue)) { 88 | return false; 89 | } 90 | 91 | if (">" == paramCompareType && !(paramValue > eventAttributeValue)) { 92 | return false; 93 | } 94 | 95 | if ("<" == paramCompareType && !(paramValue < eventAttributeValue)) { 96 | return false; 97 | } 98 | 99 | if ("<=" == paramCompareType && !(paramValue <= eventAttributeValue)) { 100 | return false; 101 | } 102 | 103 | if (">=" == paramCompareType && !(paramValue >= eventAttributeValue)) { 104 | return false; 105 | } 106 | 107 | } 108 | return true; 109 | } 110 | 111 | 112 | } -------------------------------------------------------------------------------- /rule_model_resources/templates/rule_param_json/rulemodel_01_param.json: -------------------------------------------------------------------------------- 1 | { 2 | "ruleModelId": "1", 3 | "ruleId": "m1-r01", 4 | "ruleTrigEvent": { 5 | "eventId": "e5", 6 | "attributeParams": [ 7 | { 8 | "attributeName": "pageId", 9 | "compareType": "=", 10 | "compareValue": "page001" 11 | } 12 | ], 13 | "windowStart": "2022-08-01 12:00:00", 14 | "windowEnd": "2022-08-30 12:00:00" 15 | }, 16 | "profileCondition": [ 17 | { 18 | "tagId": "tg01", 19 | "compareType": "gt", 20 | "compareValue": "2" 21 | }, 22 | { 23 | "tagId": "tg04", 24 | "compareType": "match", 25 | "compareValue": "汽车" 26 | } 27 | ], 28 | "actionCountCondition": { 29 | "eventParams": [ 30 | { 31 | "eventId": "e1", 32 | "attributeParams": [ 33 | { 34 | "attributeName": "pageId", 35 | "compareType": "=", 36 | "compareValue": "page001" 37 | } 38 | ], 39 | "windowStart": "2022-08-01 12:00:00", 40 | "windowEnd": "2022-08-30 12:00:00", 41 | "eventCount": 3, 42 | "conditionId": 1, 43 | "dorisQueryTemplate": "action_count" 44 | }, 45 | { 46 | "eventId": "e3", 47 | "attributeParams": [ 48 | { 49 | "attributeName": "pageId", 50 | "compareType": "=", 51 | "compareValue": "page002" 52 | }, 53 | { 54 | "attributeName": "itemId", 55 | "compareType": "=", 56 | "compareValue": "item003" 57 | } 58 | ], 59 | "windowStart": "2022-08-01 12:00:00", 60 | "windowEnd": "2022-08-30 12:00:00", 61 | "eventCount": 1, 62 | "conditionId": 2, 63 | "dorisQueryTemplate": "action_count" 64 | }, 65 | { 66 | "eventId": "e2", 67 | "attributeParams": [ 68 | { 69 | "attributeName": "pageId", 70 | "compareType": "=", 71 | "compareValue": "page001" 72 | } 73 | ], 74 | "windowStart": "2022-08-01 12:00:00", 75 | "windowEnd": "2022-08-30 12:00:00", 76 | "eventCount": 2, 77 | "conditionId": 3, 78 | "dorisQueryTemplate": "action_count" 79 | } 80 | ], 81 | "combineExpr": " res_0 && res_1 && res_2 " 82 | } 83 | } -------------------------------------------------------------------------------- /rule_model_resources/templates/rule_param_json/rulemodel_02_param.json: -------------------------------------------------------------------------------- 1 | { 2 | "ruleModelId": "2", 3 | "ruleId": "m2-r01", 4 | "ruleTrigEvent": { 5 | "eventId": "e5", 6 | "attributeParams": [ 7 | { 8 | "attributeName": "pageId", 9 | "compareType": "=", 10 | "compareValue": "page001" 11 | } 12 | ], 13 | "windowStart": "2022-08-01 12:00:00", 14 | "windowEnd": "2022-08-30 12:00:00" 15 | }, 16 | "profileCondition": [ 17 | { 18 | "tagId": "tg01", 19 | "compareType": "gt", 20 | "compareValue": "2" 21 | }, 22 | { 23 | "tagId": "tg04", 24 | "compareType": "match", 25 | "compareValue": "汽车" 26 | } 27 | ], 28 | "actionCountCondition": { 29 | "eventParams": [ 30 | { 31 | "eventId": "e1", 32 | "attributeParams": [ 33 | { 34 | "attributeName": "pageId", 35 | "compareType": "=", 36 | "compareValue": "page001" 37 | } 38 | ], 39 | "windowStart": "2022-08-01 12:00:00", 40 | "windowEnd": "2022-08-30 12:00:00", 41 | "eventCount": 3, 42 | "conditionId": 1, 43 | "dorisQueryTemplate": "action_count" 44 | }, 45 | { 46 | "eventId": "e3", 47 | "attributeParams": [ 48 | { 49 | "attributeName": "pageId", 50 | "compareType": "=", 51 | "compareValue": "page002" 52 | }, 53 | { 54 | "attributeName": "itemId", 55 | "compareType": "=", 56 | "compareValue": "item003" 57 | } 58 | ], 59 | "windowStart": "2022-08-01 12:00:00", 60 | "windowEnd": "2022-08-30 12:00:00", 61 | "eventCount": 1, 62 | "conditionId": 2, 63 | "dorisQueryTemplate": "action_count" 64 | }, 65 | { 66 | "eventId": "e2", 67 | "attributeParams": [ 68 | { 69 | "attributeName": "pageId", 70 | "compareType": "=", 71 | "compareValue": "page001" 72 | } 73 | ], 74 | "windowStart": "2022-08-01 12:00:00", 75 | "windowEnd": "2022-08-30 12:00:00", 76 | "eventCount": 2, 77 | "conditionId": 3, 78 | "dorisQueryTemplate": "action_count" 79 | } 80 | ], 81 | "combineExpr": " res_0 && res_1 && res_2 " 82 | }, 83 | "actionSeqCondition": { 84 | "eventParams": [ 85 | { 86 | "eventId": "e1", 87 | "attributeParams": [ 88 | { 89 | "attributeName": "pageId", 90 | "compareType": "=", 91 | "compareValue": "page001" 92 | } 93 | ] 94 | }, 95 | { 96 | "eventId": "e3", 97 | "attributeParams": [ 98 | { 99 | "attributeName": "pageId", 100 | "compareType": "=", 101 | "compareValue": "page002" 102 | }, 103 | { 104 | "attributeName": "itemId", 105 | "compareType": "=", 106 | "compareValue": "item003" 107 | } 108 | ] 109 | }, 110 | { 111 | "eventId": "e2", 112 | "attributeParams": [ 113 | { 114 | "attributeName": "pageId", 115 | "compareType": "=", 116 | "compareValue": "page001" 117 | } 118 | ] 119 | } 120 | ], 121 | "windowStart": "2022-08-01 12:00:00", 122 | "windowEnd": "2022-08-30 12:00:00", 123 | "conditionId": 4, 124 | "dorisQueryTemplate": "action_seq", 125 | "seqCount": 2 126 | }, 127 | "rule_match_count": 2 , 128 | "combineExpr" : "res_0 && res_1" 129 | } -------------------------------------------------------------------------------- /rule_model_resources/templates/rule_param_json/rulemodel_03_param.json: -------------------------------------------------------------------------------- 1 | { 2 | "ruleModelId": "3", 3 | "ruleId": "m3-r01", 4 | "ruleTrigEvent": { 5 | "eventId": "submitOrder", 6 | "attributeParams": [ 7 | { 8 | "attributeName": "pageId", 9 | "compareType": "=", 10 | "compareValue": "page001" 11 | } 12 | ], 13 | "windowStart": "", 14 | "windowEnd": "2022-08-30 12:00:00" 15 | }, 16 | "interval_time": "10000", 17 | "checkEvent": { 18 | "eventId": "payOrder", 19 | "eventAttribute": "orderId" 20 | }, 21 | "rule_match_count": 2 22 | } -------------------------------------------------------------------------------- /sqls/doris明细数据表建表.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE dwd.app_log_detail ( 2 | guid BIGINT , 3 | eventid varchar(20) , 4 | releasechannel String , 5 | account String , 6 | appid String , 7 | appversion String , 8 | carrier String , 9 | deviceid String , 10 | devicetype String , 11 | ip String , 12 | latitude double , 13 | longitude double , 14 | nettype String , 15 | osname String , 16 | osversion String , 17 | resolution String , 18 | sessionid String , 19 | `timestamp` BIGINT , 20 | registerTime BIGINT , 21 | firstAccessTime BIGINT , 22 | isNew int , 23 | geoHashCode String , 24 | province String , 25 | city String , 26 | region String , 27 | propsJson String , 28 | dw_date date 29 | ) 30 | DUPLICATE KEY(`guid`, `eventid`) 31 | PARTITION BY RANGE(`dw_date`) 32 | ( 33 | PARTITION p20220809 VALUES LESS THAN ("2022-08-10"), 34 | PARTITION p20220810 VALUES LESS THAN ("2022-08-11"), 35 | PARTITION p20220811 VALUES LESS THAN ("2022-08-12") 36 | ) 37 | DISTRIBUTED BY HASH(`guid`) BUCKETS 4 38 | PROPERTIES ( 39 | "replication_num" = "1" 40 | ); 41 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/ActionSeqCalcFlinkTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import cn.doitedu.utils.Utils; 4 | import com.alibaba.fastjson.JSONObject; 5 | import groovy.lang.GroovyClassLoader; 6 | import lombok.AllArgsConstructor; 7 | import lombok.Data; 8 | import lombok.NoArgsConstructor; 9 | import org.apache.commons.io.FileUtils; 10 | import org.apache.flink.api.common.functions.MapFunction; 11 | import org.apache.flink.configuration.Configuration; 12 | import org.apache.flink.streaming.api.datastream.DataStream; 13 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 14 | import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; 15 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 16 | import org.apache.flink.streaming.api.functions.ProcessFunction; 17 | import org.apache.flink.util.Collector; 18 | import redis.clients.jedis.Jedis; 19 | 20 | import java.io.File; 21 | import java.util.HashMap; 22 | 23 | 24 | 25 | public class ActionSeqCalcFlinkTest { 26 | 27 | public static void main(String[] args) throws Exception { 28 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 29 | DataStreamSource ds = env.socketTextStream("localhost", 4444); 30 | DataStream eventDs = Utils.getEventDataStream(ds); 31 | 32 | 33 | SingleOutputStreamOperator res = eventDs.process(new ProcessFunction() { 34 | IActionRuleCalc calculator; 35 | JSONObject ruleParam; 36 | 37 | @Override 38 | public void open(Configuration parameters) throws Exception { 39 | String scriptText = FileUtils.readFileToString(new File("tech-test/src/main/java/cn/doitedu/groovy/ActionRuleCalc.groovy")); 40 | 41 | // 解析加载字符串形式的groovy状态机类 42 | GroovyClassLoader classLoader = new GroovyClassLoader(); 43 | Class groovyClass = classLoader.parseClass(scriptText); 44 | calculator = (IActionRuleCalc) groovyClass.newInstance(); 45 | 46 | // 构造redis客户端 47 | Jedis jedis = new Jedis("doitedu", 6379); 48 | 49 | // 准备规则条件参数 50 | // {"ruleId":"r01","conditionId":"c01","eventSeq":["A","E","C"],"minCount":1,"maxCount":3} 51 | String ruleParamStr = "{\"ruleId\":\"r01\",\"conditionId\":\"c01\",\"eventSeq\":[\"A\",\"E\",\"C\"],\"minCount\":3,\"maxCount\":3}"; 52 | ruleParam = JSONObject.parseObject(ruleParamStr); 53 | 54 | // 模拟flink的mapState 55 | HashMap flinkMapState = new HashMap<>(); 56 | 57 | // 初始化规则条件计算状态机 58 | calculator.init(ruleParam, jedis, flinkMapState); 59 | 60 | 61 | } 62 | 63 | @Override 64 | public void processElement(Event event, ProcessFunction.Context ctx, Collector out) throws Exception { 65 | boolean res = calculator.calc(event.getUserId()+"", event.getEventId()); 66 | if (res) 67 | out.collect(String.format("用户:%s ,规则:%s ,条件:%s ,最小次数:%d ,已满足 ", event.getUserId(), ruleParam.getString("ruleId"), ruleParam.getString("conditionId"), ruleParam.getInteger("minCount"))); 68 | } 69 | }); 70 | 71 | res.print(); 72 | 73 | 74 | env.execute(); 75 | 76 | } 77 | 78 | 79 | } 80 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/ActionSeqCalcTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import groovy.lang.GroovyClassLoader; 5 | import org.apache.commons.io.FileUtils; 6 | import redis.clients.jedis.Jedis; 7 | 8 | import java.io.File; 9 | import java.util.Arrays; 10 | import java.util.HashMap; 11 | import java.util.List; 12 | 13 | public class ActionSeqCalcTest { 14 | public static void main(String[] args) throws Exception { 15 | String scriptText = FileUtils.readFileToString(new File("tech-test/src/main/java/cn/doitedu/groovy/ActionRuleCalc.groovy")); 16 | 17 | // 解析加载字符串形式的groovy状态机类 18 | GroovyClassLoader classLoader = new GroovyClassLoader(); 19 | Class groovyClass = classLoader.parseClass(scriptText); 20 | IActionRuleCalc calculator = (IActionRuleCalc) groovyClass.newInstance(); 21 | 22 | // 构造redis客户端 23 | Jedis jedis = new Jedis("doitedu", 6379); 24 | 25 | // 准备规则条件参数 26 | // {"ruleId":"r01","conditionId":"c01","eventSeq":["A","E","C"],"minCount":1,"maxCount":3} 27 | String ruleParamStr = "{\"ruleId\":\"r01\",\"conditionId\":\"c01\",\"eventSeq\":[\"A\",\"E\",\"C\"],\"minCount\":3,\"maxCount\":3}"; 28 | JSONObject ruleParam = JSONObject.parseObject(ruleParamStr); 29 | 30 | // 模拟flink的mapState 31 | HashMap flinkMapState = new HashMap<>(); 32 | 33 | 34 | // 初始化规则条件计算状态机 35 | calculator.init(ruleParam,jedis,flinkMapState); 36 | 37 | // 模拟用户事件流片段,片段中刚好包含一次完整条件序列 38 | List strings = Arrays.asList("X", "Y", "A", "X", "Y","X", "Y", "E", "E", "X", "Y","A", "C"); 39 | 40 | // 将模拟事件流重复10000次,调用状态机进行规则运算,并测试性能 41 | boolean res = false; 42 | long start = System.currentTimeMillis(); 43 | for(int i=0;i<10000;i++) { 44 | for (String event : strings) { 45 | res = calculator.calc("u01", event); 46 | } 47 | } 48 | long end = System.currentTimeMillis(); 49 | 50 | // 输出总耗时 51 | System.out.println(end-start); 52 | 53 | // 最终匹配结果 54 | System.out.println(res); 55 | 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/Event.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @AllArgsConstructor 9 | @NoArgsConstructor 10 | public class Event{ 11 | private int userId; 12 | private String eventId; 13 | } -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/GroovyHello.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import cn.doitedu.groovy.Person; 4 | 5 | public class GroovyHello { 6 | public static void main(String[] args) { 7 | Person person = new Person(); 8 | System.out.println(person.sayName1("taoge")); 9 | System.out.println(person.sayName2("涛哥", " 深似海男人")); 10 | System.out.println(person.add(3, 5)); 11 | 12 | } 13 | } -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/GroovyTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import groovy.lang.GroovyClassLoader; 4 | import groovy.lang.GroovyObject; 5 | import org.apache.commons.io.FileUtils; 6 | 7 | import java.io.File; 8 | 9 | public class GroovyTest { 10 | public static void main(String[] args) throws Exception { 11 | 12 | //String s = FileUtils.readFileToString(new File("tech-test/src/main/java/cn/doitedu/groovy/Person.groovy")); 13 | 14 | String s = "package cn.doitedu.groovy\n" + 15 | "\n" + 16 | "import groovy.util.logging.Slf4j\n" + 17 | "\n" + 18 | "@Slf4j\n" + 19 | "class Person {\n" + 20 | " def String sayName1(String name) {\n" + 21 | " log.error(\"hahahaha\")\n" + 22 | " return \"hello \" + name\n" + 23 | " }\n" + 24 | "\n" + 25 | " def String sayName2(String name, String suffix) {\n" + 26 | " return name + \" \" + suffix\n" + 27 | " }\n" + 28 | "\n" + 29 | "\n" + 30 | " def int add(int a, int b) {\n" + 31 | " return a + b\n" + 32 | " }\n" + 33 | "}\n"; 34 | 35 | 36 | GroovyClassLoader classLoader = new GroovyClassLoader(); 37 | 38 | Class groovyClass = classLoader.parseClass(s); 39 | GroovyObject groovyObject = (GroovyObject) groovyClass.newInstance(); 40 | 41 | String param1 = "taoge"; 42 | String[] param2 = {"taoge", "总裁"}; 43 | Integer[] param3 = {8, 7}; 44 | 45 | Object result1 = groovyObject.invokeMethod("sayName1", param1); 46 | Object result2 = groovyObject.invokeMethod("sayName2", param2); 47 | Object result3 = groovyObject.invokeMethod("add", param3); 48 | System.out.println(result1); 49 | System.out.println(result2); 50 | System.out.println(result3); 51 | 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/GroovyUtil.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import groovy.lang.GroovyClassLoader; 4 | import groovy.lang.GroovyCodeSource; 5 | import groovy.lang.GroovyObject; 6 | 7 | import java.io.File; 8 | 9 | public class GroovyUtil { 10 | /** 11 | * 加载Groovy文件,返回GroovyObject对象 12 | */ 13 | public static GroovyObject getGroovyObjectBy(File file) { 14 | GroovyObject groovyObject = null; 15 | try { 16 | GroovyClassLoader classLoader = new GroovyClassLoader(Thread.currentThread().getContextClassLoader()); 17 | Class clazz = classLoader.parseClass(new GroovyCodeSource(file)); 18 | groovyObject = (GroovyObject)clazz.newInstance(); 19 | } catch (Exception e) {} 20 | return groovyObject; 21 | } 22 | 23 | 24 | public static GroovyObject getGroovyObjectByText(String text) { 25 | GroovyObject groovyObject = null; 26 | try { 27 | GroovyClassLoader classLoader = new GroovyClassLoader(Thread.currentThread().getContextClassLoader()); 28 | Class clazz = classLoader.parseClass(text); 29 | groovyObject = (GroovyObject)clazz.newInstance(); 30 | } catch (Exception e) {} 31 | return groovyObject; 32 | } 33 | 34 | 35 | public static GroovyObject getGroovyObject(String filePath) { 36 | return getGroovyObjectBy(new File(filePath)); 37 | } 38 | 39 | /** 40 | * 执行GroovyObject对象的方法 41 | */ 42 | static Object invokeMethod(GroovyObject groovyObject, String name, Object args) { 43 | return groovyObject.invokeMethod(name, args); 44 | } 45 | } -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/HugeBitmapTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import org.roaringbitmap.RoaringBitmap; 4 | 5 | import java.io.*; 6 | import java.sql.Connection; 7 | import java.sql.DriverManager; 8 | import java.sql.PreparedStatement; 9 | import java.sql.SQLException; 10 | 11 | public class HugeBitmapTest { 12 | 13 | public static void main(String[] args) throws Exception { 14 | // 存10亿个用户id到 bitmap 15 | 16 | RoaringBitmap bm = RoaringBitmap.bitmapOf(); 17 | 18 | for(int i=0;i<100000000;i++){ 19 | bm.add(i); 20 | } 21 | 22 | // 12513208 23 | System.out.println(bm.serializedSizeInBytes()); 24 | 25 | FileOutputStream fout = new FileOutputStream(new File("d:/bitmap.dat")); 26 | ByteArrayOutputStream bout = new ByteArrayOutputStream(); 27 | DataOutputStream dout = new DataOutputStream(bout); 28 | 29 | bm.serialize(dout); 30 | 31 | 32 | Connection connection = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk", "root", "root"); 33 | PreparedStatement stmt = connection.prepareStatement("insert into bm_test (id,bm) values(?,?)"); 34 | stmt.setInt(1,1); 35 | stmt.setBytes(2,bout.toByteArray()); 36 | stmt.execute(); 37 | 38 | dout.close(); 39 | bout.close(); 40 | fout.close(); 41 | 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/IActionRuleCalc.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import com.alibaba.fastjson.JSONObject; 4 | import redis.clients.jedis.Jedis; 5 | 6 | import java.util.HashMap; 7 | 8 | public interface IActionRuleCalc { 9 | // 用于初始化状态机 10 | void init(JSONObject ruleParam, Jedis jedis , HashMap flinkMapState); 11 | 12 | // 用于对一个用户的一个流入事件进行规则运算 13 | boolean calc(String userId ,String event); 14 | } 15 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/JudgeStringIsNumeric.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | public class JudgeStringIsNumeric { 4 | public static void main(String[] args) { 5 | 6 | 7 | System.out.println("13.5".matches("\\d+(.\\d+)?")); 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/ProfileInjectTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu; 2 | 3 | import cn.doitedu.utils.Utils; 4 | import lombok.extern.slf4j.Slf4j; 5 | import org.apache.flink.api.common.state.BroadcastState; 6 | import org.apache.flink.api.common.state.MapStateDescriptor; 7 | import org.apache.flink.api.common.typeinfo.TypeInformation; 8 | import org.apache.flink.streaming.api.datastream.BroadcastStream; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.flink.streaming.api.functions.co.KeyedBroadcastProcessFunction; 13 | import org.apache.flink.util.Collector; 14 | import org.roaringbitmap.RoaringBitmap; 15 | 16 | @Slf4j 17 | public class ProfileInjectTest { 18 | 19 | public static void main(String[] args) throws Exception { 20 | 21 | StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); 22 | env.setParallelism(1); 23 | DataStreamSource ds = env.socketTextStream("doitedu", 4444); 24 | 25 | DataStream eventDataStream = Utils.getEventDataStream(ds); 26 | 27 | DataStream kafkaBitmapStream = Utils.getKafkaBitmap(env); 28 | MapStateDescriptor broadcastStateDesc = new MapStateDescriptor<>("broadcastStateDesc", TypeInformation.of(String.class), TypeInformation.of(RoaringBitmap.class)); 29 | BroadcastStream broadcastStream = kafkaBitmapStream.broadcast(broadcastStateDesc); 30 | 31 | eventDataStream 32 | .keyBy(Event::getUserId) 33 | .connect(broadcastStream) 34 | .process(new KeyedBroadcastProcessFunction() { 35 | @Override 36 | public void processElement(Event value, KeyedBroadcastProcessFunction.ReadOnlyContext ctx, Collector out) throws Exception { 37 | // 从广播状态取到人群bitmap 38 | RoaringBitmap bitmap = ctx.getBroadcastState(broadcastStateDesc).get("rule-1"); 39 | 40 | if(bitmap != null && bitmap.contains(value.getUserId())){ 41 | out.collect("用户: "+value.getUserId()+" 存在于画像人群中"); 42 | }else if( bitmap == null){ 43 | out.collect("bitmap为null"); 44 | }else { 45 | out.collect("用户: "+value.getUserId()+ " 不存在"); 46 | } 47 | } 48 | 49 | @Override 50 | public void processBroadcastElement(RoaringBitmap bitmap, KeyedBroadcastProcessFunction.Context ctx, Collector out) throws Exception { 51 | log.error("收到广播变量"); 52 | BroadcastState broadcastState = ctx.getBroadcastState(broadcastStateDesc); 53 | broadcastState.put("rule-1",bitmap); 54 | } 55 | }) 56 | .print(); 57 | 58 | env.execute(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/dynamic/Calculator.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.dynamic; 2 | 3 | public interface Calculator { 4 | public int add(int a, int b); 5 | } 6 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/dynamic/DynamicCallTest.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.dynamic; 2 | 3 | import java.sql.*; 4 | 5 | public class DynamicCallTest { 6 | 7 | 8 | public static void main(String[] args) throws InterruptedException, SQLException, InstantiationException, IllegalAccessException, ClassNotFoundException { 9 | 10 | System.out.println("我在工作中....."); 11 | 12 | System.out.println("我在工作中....."); 13 | 14 | 15 | System.out.println("我要调一个工具来做加法"); 16 | 17 | 18 | Connection conn = DriverManager.getConnection("jdbc:mysql://doitedu:3306/rtmk", "root", "root"); 19 | Statement stmt = conn.createStatement(); 20 | 21 | 22 | while(true) { 23 | 24 | ResultSet rs = stmt.executeQuery("select class_name,java_code from t_dynamic_code"); 25 | while(rs.next()) { 26 | String class_name = rs.getString("class_name"); 27 | String java_code = rs.getString("java_code"); 28 | 29 | // 利用编译工具,对源代码进行编译 30 | 31 | // 编译完成后,加载class 32 | 33 | // 反射调用 34 | Class aClass = Class.forName(class_name); // 加载class(编译好的东西) ,不是 java源代码 35 | Calculator calculator = (Calculator) aClass.newInstance(); 36 | int res = calculator.add(10, 20); 37 | 38 | System.out.println("调用工具完毕,得到了结果:" + res); 39 | Thread.sleep(2000); 40 | } 41 | } 42 | 43 | } 44 | 45 | 46 | } 47 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/groovy/ActionRuleCalc.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.groovy 2 | 3 | import cn.doitedu.IActionRuleCalc 4 | import com.alibaba.fastjson.JSON 5 | import com.alibaba.fastjson.JSONObject 6 | import redis.clients.jedis.Jedis 7 | 8 | class ActionRuleCalc implements IActionRuleCalc { 9 | 10 | JSONObject ruleParam 11 | Jedis jedis 12 | HashMap flinkMapState 13 | 14 | String ruleId 15 | String conditionId 16 | List eventSeq 17 | int minCount 18 | int maxCount 19 | 20 | 21 | @Override 22 | void init(JSONObject ruleParam, Jedis jedis, HashMap flinkMapState) { 23 | this.ruleParam = ruleParam 24 | this.jedis = jedis 25 | this.flinkMapState = flinkMapState 26 | 27 | this.ruleId = ruleParam.getString("ruleId") 28 | this.conditionId = ruleParam.getString("conditionId") 29 | this.eventSeq = ruleParam.getJSONArray("eventSeq").toJavaList(String.class) 30 | this.minCount = ruleParam.getInteger("minCount") 31 | this.maxCount = ruleParam.getInteger("maxCount") 32 | 33 | } 34 | 35 | /** 36 | * 滚动聚合,中间状态数据结构: 37 | * {"seq":"AE","cnt":1,"cflag":0} 38 | */ 39 | boolean calc(String userId, String event) { 40 | 41 | if (flinkMapState.get(ruleId + ":" + conditionId + ":" + userId) == 1) return true 42 | 43 | boolean res = false 44 | String oldSeq = "" 45 | int oldCnt = 0 46 | 47 | // 先判断,输入事件是否是条件序列所包含的事件 48 | if (eventSeq.contains(event)) { 49 | String json = jedis.hget(ruleId + ":" + userId, conditionId) 50 | JSONObject redisJsonObject 51 | 52 | // 如果redis中已有中间聚合状态(有可能是上线后计算所得,也有可能是发布时历史数据的查询结果) 53 | if (json != null) { 54 | redisJsonObject = JSON.parseObject(json) 55 | oldSeq = redisJsonObject.getString("seq") 56 | oldCnt = redisJsonObject.getInteger("cnt") 57 | int cflag = redisJsonObject.getInteger("cflag") 58 | 59 | // 如果该用户该条件在redis中的查询结果是已经满足,则直接返回 60 | // 主要是为了job重启后且flink内部状态有丢失的情况下,可以快速重建已完成状态 61 | if(cflag==1) { 62 | flinkMapState.put(ruleId + ":" + conditionId + ":" + userId,1) 63 | return true 64 | } 65 | } 66 | // 如果redis中没有中间聚合状态,则初始化一个聚合状态 67 | else { 68 | redisJsonObject = new JSONObject() 69 | redisJsonObject.put("seq", oldSeq) 70 | redisJsonObject.put("cnt", oldCnt) 71 | redisJsonObject.put("cflag", 0) 72 | } 73 | 74 | // 如果输入事件,是序列中的下一个目标事件 75 | if (event == eventSeq.get(oldSeq.length())) { 76 | 77 | // 如果是序列中欠缺的最后一个事件 78 | if (oldSeq.length() == eventSeq.size() - 1) { 79 | 80 | // 则发生次数+1 , 序列清空 81 | redisJsonObject.put("cnt", oldCnt + 1) 82 | redisJsonObject.put("seq", "") 83 | 84 | res = (oldCnt + 1 >= minCount) 85 | 86 | // 已经完全满足条件,则将该用户的满足状态直接放入flink state中,以避免无谓的redis查询 87 | if (res) { 88 | flinkMapState.put(ruleId + ":" + conditionId + ":" + userId, 1) 89 | redisJsonObject.put("cflag",1) 90 | } 91 | } 92 | 93 | // 如果是中间事件,则序列追加此事件 94 | else { 95 | redisJsonObject.put("seq", oldSeq + event) 96 | } 97 | 98 | // 将本次结果写入redis 99 | jedis.hset(ruleId + ":" + userId, conditionId, redisJsonObject.toJSONString()) 100 | } 101 | 102 | } 103 | 104 | return res 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/groovy/HelloWorld.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.groovy 2 | 3 | class HelloWorld { 4 | static void main(String[] args) { 5 | println("hello world") 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/groovy/Person.groovy: -------------------------------------------------------------------------------- 1 | package cn.doitedu.groovy 2 | 3 | import groovy.util.logging.Slf4j 4 | 5 | @Slf4j 6 | class Person { 7 | def String sayName1(String name) { 8 | log.error("hahahaha") 9 | return "hello " + name 10 | } 11 | 12 | def String sayName2(String name, String suffix) { 13 | return name + " " + suffix 14 | } 15 | 16 | 17 | def int add(int a, int b) { 18 | return a + b 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/utils/BitmapSchema.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | import org.apache.flink.api.common.serialization.DeserializationSchema; 4 | import org.apache.flink.api.common.serialization.SerializationSchema; 5 | import org.apache.flink.api.common.typeinfo.TypeInformation; 6 | import org.roaringbitmap.RoaringBitmap; 7 | 8 | import java.io.*; 9 | 10 | /** 11 | * @Author: deep as the sea 12 | * @Site: 多易教育 13 | * @QQ: 657270652 14 | * @Date: 2022/8/12 15 | * @Desc: 用户flink-kafka连接器的bitmap序列化器 16 | **/ 17 | public class BitmapSchema implements DeserializationSchema, SerializationSchema { 18 | @Override 19 | public void open(DeserializationSchema.InitializationContext context) throws Exception { 20 | DeserializationSchema.super.open(context); 21 | } 22 | 23 | @Override 24 | public RoaringBitmap deserialize(byte[] message) throws IOException { 25 | ByteArrayInputStream bi = new ByteArrayInputStream(message); 26 | DataInputStream din = new DataInputStream(bi); 27 | RoaringBitmap bm = RoaringBitmap.bitmapOf(); 28 | 29 | bm.deserialize(din); 30 | din.close(); 31 | return bm; 32 | } 33 | 34 | @Override 35 | public boolean isEndOfStream(RoaringBitmap nextElement) { 36 | return false; 37 | } 38 | 39 | @Override 40 | public void open(SerializationSchema.InitializationContext context) throws Exception { 41 | SerializationSchema.super.open(context); 42 | } 43 | 44 | @Override 45 | public byte[] serialize(RoaringBitmap bm) { 46 | 47 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 48 | DataOutputStream dos = new DataOutputStream(bos); 49 | byte[] bytes ; 50 | try { 51 | bm.serialize(dos); 52 | bytes = bos.toByteArray(); 53 | } catch (IOException e) { 54 | throw new RuntimeException(e); 55 | } 56 | 57 | return bytes; 58 | } 59 | 60 | @Override 61 | public TypeInformation getProducedType() { 62 | return TypeInformation.of(RoaringBitmap.class); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/utils/KafkaBitmapSerializer.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | import org.apache.kafka.common.header.Headers; 4 | import org.apache.kafka.common.serialization.Serializer; 5 | import org.roaringbitmap.RoaringBitmap; 6 | 7 | import java.io.ByteArrayOutputStream; 8 | import java.io.DataOutputStream; 9 | import java.io.IOException; 10 | import java.util.Map; 11 | 12 | public class KafkaBitmapSerializer implements Serializer { 13 | @Override 14 | public void configure(Map configs, boolean isKey) { 15 | Serializer.super.configure(configs, isKey); 16 | } 17 | 18 | @Override 19 | public byte[] serialize(String topic, RoaringBitmap bm) { 20 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 21 | DataOutputStream dos = new DataOutputStream(bos); 22 | byte[] bytes = new byte[0]; 23 | if(bm != null) { 24 | try { 25 | bm.serialize(dos); 26 | bytes = bos.toByteArray(); 27 | } catch (IOException e) { 28 | 29 | throw new RuntimeException(e); 30 | } 31 | } 32 | return bytes; 33 | } 34 | 35 | @Override 36 | public byte[] serialize(String topic, Headers headers, RoaringBitmap data) { 37 | return Serializer.super.serialize(topic, headers, data); 38 | } 39 | 40 | @Override 41 | public void close() { 42 | Serializer.super.close(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/utils/ProduceBitmapUtil.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | import org.apache.kafka.clients.producer.KafkaProducer; 4 | import org.apache.kafka.clients.producer.ProducerConfig; 5 | import org.apache.kafka.clients.producer.ProducerRecord; 6 | import org.apache.kafka.common.serialization.StringSerializer; 7 | import org.roaringbitmap.RoaringBitmap; 8 | 9 | import java.util.Properties; 10 | 11 | public class ProduceBitmapUtil { 12 | 13 | public static void main(String[] args) { 14 | 15 | Properties props = new Properties(); 16 | //设置kafka集群的地址 17 | props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "doitedu:9092"); 18 | 19 | props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); 20 | // 指定自定义的roaringbitmap序列化器 21 | props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, KafkaBitmapSerializer.class.getName()); 22 | 23 | 24 | props.put(ProducerConfig.ACKS_CONFIG, "all"); 25 | props.put("retries", 3); 26 | props.put("batch.size", 1); 27 | props.put(ProducerConfig.MAX_REQUEST_SIZE_CONFIG,204800); 28 | props.put("linger.ms", 10); 29 | props.put("buffer.memory", 102400000); // 默认32M 30 | 31 | // 构造一个生产者实例对象 32 | KafkaProducer producer = new KafkaProducer<>(props); 33 | 34 | 35 | // 构造一个测试用的bitmap 36 | RoaringBitmap bm = RoaringBitmap.bitmapOf(); 37 | //bm.add(1,3,4,5,6,7,8,11); 38 | bm.add(1,6,7); 39 | 40 | // 构造producer消息对象 41 | ProducerRecord record = new ProducerRecord<>("bm-test", bm); 42 | // 发送消息 43 | producer.send(record); 44 | 45 | // 关闭生产者 46 | producer.flush(); 47 | producer.close(); 48 | 49 | } 50 | 51 | 52 | } 53 | -------------------------------------------------------------------------------- /tech-test/src/main/java/cn/doitedu/utils/Utils.java: -------------------------------------------------------------------------------- 1 | package cn.doitedu.utils; 2 | 3 | import cn.doitedu.Event; 4 | import org.apache.flink.api.common.eventtime.WatermarkStrategy; 5 | import org.apache.flink.api.common.functions.MapFunction; 6 | import org.apache.flink.api.common.serialization.SimpleStringSchema; 7 | import org.apache.flink.connector.kafka.source.KafkaSource; 8 | import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer; 9 | import org.apache.flink.streaming.api.datastream.DataStream; 10 | import org.apache.flink.streaming.api.datastream.DataStreamSource; 11 | import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; 12 | import org.apache.kafka.clients.consumer.OffsetResetStrategy; 13 | import org.roaringbitmap.RoaringBitmap; 14 | 15 | public class Utils { 16 | 17 | 18 | public static DataStream getEventDataStream(DataStreamSource ds) { 19 | DataStream eventDs = ds.map(new MapFunction() { 20 | @Override 21 | public Event map(String value) throws Exception { 22 | String[] split = value.split(","); 23 | return new Event(Integer.parseInt(split[0]), split[1]); 24 | } 25 | }); 26 | 27 | return eventDs; 28 | } 29 | 30 | 31 | 32 | public static DataStream getKafkaBitmap( StreamExecutionEnvironment env){ 33 | 34 | // 构造一个 kafka 的source 35 | KafkaSource kafkaSource = KafkaSource.builder() 36 | .setTopics("bm-test") 37 | .setBootstrapServers("doitedu:9092") 38 | .setValueOnlyDeserializer(new BitmapSchema()) 39 | .setGroupId("tr") 40 | .setStartingOffsets(OffsetsInitializer.committedOffsets(OffsetResetStrategy.LATEST)) 41 | .build(); 42 | 43 | // 人群画像bitmap topic获取数据 44 | DataStreamSource ds = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"bm"); 45 | 46 | return ds; 47 | } 48 | 49 | public static DataStream getKafkaBitmapBase64(StreamExecutionEnvironment env) { 50 | // 构造一个 kafka 的source 51 | KafkaSource kafkaSource = KafkaSource.builder() 52 | .setTopics("bm-test-base64") 53 | .setBootstrapServers("doitedu:9092") 54 | .setValueOnlyDeserializer(new SimpleStringSchema()) 55 | .setGroupId("tr") 56 | .setStartingOffsets(OffsetsInitializer.committedOffsets(OffsetResetStrategy.LATEST)) 57 | .build(); 58 | 59 | // 人群画像bitmap topic获取数据 60 | DataStreamSource ds = env.fromSource(kafkaSource,WatermarkStrategy.noWatermarks(),"bmbase64"); 61 | 62 | return ds; 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /tech-test/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, software 13 | # distributed under the License is distributed on an "AS IS" BASIS, 14 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | # See the License for the specific language governing permissions and 16 | # limitations under the License. 17 | ################################################################################ 18 | 19 | log4j.rootLogger = INFO, console 20 | 21 | log4j.appender.console=org.apache.log4j.ConsoleAppender 22 | log4j.appender.console.layout = org.apache.log4j.PatternLayout 23 | #log4j.appender.console.layout.ConversionPattern = [%-5p] %d(%r) --> [%t] %l: %m %x %n 24 | log4j.appender.console.layout.ConversionPattern = [%-5p] %d %l: %m %x %n 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /x.json: -------------------------------------------------------------------------------- 1 | { 2 | "ruleModelId": "1", 3 | "ruleId": "rule001", 4 | "profileCondition": [ 5 | { 6 | "tagId": "tg01", 7 | "compareType": "gt", 8 | "compareValue": "3" 9 | }, 10 | { 11 | "tagId": "tg04", 12 | "compareType": "match", 13 | "compareValue": "汽车" 14 | } 15 | ], 16 | "actionCountCondition": { 17 | "eventParams": [ 18 | { 19 | "eventId": "e1", 20 | "attributeParams": [ 21 | { 22 | "attributeName": "pageId", 23 | "compareType": "=", 24 | "compareValue": "page001" 25 | } 26 | ], 27 | "windowStart": "2022-08-01 12:00:00", 28 | "windowEnd": "2022-08-30 12:00:00", 29 | "eventCount": 3, 30 | "conditionId": 1, 31 | "dorisQueryTemplate": "action_count" 32 | }, 33 | { 34 | "eventId": "e3", 35 | "attributeParams": [ 36 | { 37 | "attributeName": "pageId", 38 | "compareType": "=", 39 | "compareValue": "page002" 40 | }, 41 | { 42 | "attributeName": "itemId", 43 | "compareType": "=", 44 | "compareValue": "item003" 45 | } 46 | ], 47 | "windowStart": "2022-08-01 12:00:00", 48 | "windowEnd": "2022-08-30 12:00:00", 49 | "eventCount": 1, 50 | "conditionId": 2, 51 | "dorisQueryTemplate": "action_count" 52 | }, 53 | { 54 | "eventId": "e2", 55 | "attributeParams": [ 56 | { 57 | "attributeName": "pageId", 58 | "compareType": "=", 59 | "compareValue": "page001" 60 | } 61 | ], 62 | "windowStart": "2022-08-01 12:00:00", 63 | "windowEnd": "2022-08-30 12:00:00", 64 | "eventCount": 2, 65 | "conditionId": 3, 66 | "dorisQueryTemplate": "action_count" 67 | } 68 | ], 69 | "combineExpr": " res0 && (res1 || res2) " 70 | } 71 | } --------------------------------------------------------------------------------