├── .env ├── scripts ├── ksql-config-file.txt ├── python │ ├── credentials.py │ └── kafka_notifier.py ├── read_power ├── power_20180805.json └── do_tests ├── docs ├── notif.png ├── smarts.png ├── notification.md ├── java.md └── notes.md ├── ext └── ksql-udf-iot.jar ├── .gitignore ├── docker └── Dockerfile ├── java ├── src │ └── main │ │ └── java │ │ └── com │ │ └── vsimon │ │ └── kafka │ │ └── streams │ │ └── AnomolyPower.java └── pom.xml ├── docker-compose.yml ├── README.md └── LICENSE /.env: -------------------------------------------------------------------------------- 1 | CONF_VER=5.0.0 2 | LAND_VER=0.9.4 3 | -------------------------------------------------------------------------------- /scripts/ksql-config-file.txt: -------------------------------------------------------------------------------- 1 | ksql.streams.auto.offset.reset=earliest 2 | -------------------------------------------------------------------------------- /docs/notif.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saubury/stream-smarts/HEAD/docs/notif.png -------------------------------------------------------------------------------- /docs/smarts.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saubury/stream-smarts/HEAD/docs/smarts.png -------------------------------------------------------------------------------- /ext/ksql-udf-iot.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saubury/stream-smarts/HEAD/ext/ksql-udf-iot.jar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | docker-compose-ui.yml 3 | */__pycache__/* 4 | *.pyc 5 | credentials.py 6 | scripts/python/credentials.py 7 | .vscode/* 8 | java/target/* 9 | -------------------------------------------------------------------------------- /scripts/python/credentials.py: -------------------------------------------------------------------------------- 1 | # Push Bullet API Token Here 2 | # https://www.pushbullet.com/#settings/account 3 | 4 | login = { 5 | 'pushbullet_api_token' : 'ITSASECRET', 6 | 'hassio_password' : 'ITSASECRET' 7 | } -------------------------------------------------------------------------------- /scripts/read_power: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | export BROKER_LIST=localhost:9092 4 | 5 | if [ ! -z ${1} ]; then 6 | export BROKER_LIST=${1} 7 | fi 8 | 9 | echo "Sending to broker ${BROKER_LIST}" 10 | 11 | 12 | kafka-avro-console-producer \ 13 | --broker-list ${BROKER_LIST} \ 14 | --topic raw_power \ 15 | --property value.schema=' 16 | { 17 | "type": "record", 18 | "name": "myrecord", 19 | "fields": [ 20 | { 21 | "name": "hour", 22 | "type": "float" 23 | }, 24 | { 25 | "name": "kwh", 26 | "type": "float" 27 | } 28 | ] 29 | }' 30 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7 2 | 3 | LABEL version="1.0" 4 | LABEL description="Extending 2.7 image adding a few more pip packages" 5 | 6 | # Install confluent-kafka and pushbullet packages 7 | # Bypass enterprise ssl interception by ignoring validation - which is really bad 8 | RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --index-url=https://pypi.org/simple/ confluent-kafka 9 | RUN pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --index-url=https://pypi.org/simple/ pushbullet.py 10 | 11 | # I know you are not supposed to do this, it's not very docker-like 12 | ENTRYPOINT ["tail", "-f", "/dev/null"] -------------------------------------------------------------------------------- /scripts/power_20180805.json: -------------------------------------------------------------------------------- 1 | {"hour": 0, "kwh": 136} 2 | {"hour": 1, "kwh": 129} 3 | {"hour": 2, "kwh": 149} 4 | {"hour": 3, "kwh": 128} 5 | {"hour": 4, "kwh": 126} 6 | {"hour": 5, "kwh": 144} 7 | {"hour": 6, "kwh": 298} 8 | {"hour": 7, "kwh": 276} 9 | {"hour": 8, "kwh": 160} 10 | {"hour": 9, "kwh": 175} 11 | {"hour": 10, "kwh": 152} 12 | {"hour": 11, "kwh": 271} 13 | {"hour": 12, "kwh": 146} 14 | {"hour": 13, "kwh": 289} 15 | {"hour": 14, "kwh": 137} 16 | {"hour": 15, "kwh": 153} 17 | {"hour": 16, "kwh": 285} 18 | {"hour": 17, "kwh": 221} 19 | {"hour": 18, "kwh": 264} 20 | {"hour": 19, "kwh": 308} 21 | {"hour": 20, "kwh": 434} 22 | {"hour": 21, "kwh": 687} 23 | {"hour": 22, "kwh": 1890} 24 | {"hour": 23, "kwh": 189} 25 | -------------------------------------------------------------------------------- /scripts/do_tests: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo '{"hour": 4, "kwh": 1500}' | docker-compose exec -T schema-registry /scripts/read_power kafka:29092 4 | 5 | echo 6 | echo 7 | echo 8 | 9 | cat << EOF 10 | 11 | ************************************************************** 12 | create stream raw_power_stream with (kafka_topic='raw_power', value_format='avro'); 13 | 14 | create stream power_stream_rekeyed as select rowtime, hour, kwh, anomoly_power(hour, kwh) as fn from raw_power_stream partition by rowtime; 15 | 16 | create stream anomoly_power with (value_format='JSON') as select rowtime as event_ts, hour, kwh, fn from power_stream_rekeyed where fn>1.0; 17 | 18 | select * from anomoly_power limit 1; 19 | ************************************************************** 20 | 21 | EOF 22 | 23 | 24 | docker-compose exec ksql-cli ksql "http://ksql-server:8088" --config-file /scripts/ksql-config-file.txt 25 | 26 | -------------------------------------------------------------------------------- /docs/notification.md: -------------------------------------------------------------------------------- 1 | # Stream Smarts - Personal Data Anomoly Detection 2 | 3 | 4 | 5 | | [Overview](/README.md) | [Java](/docs/java.md) | Notification|[Random Notes](/docs/notes.md) | 6 | |---|----|----|-----| 7 | 8 | 9 | 10 | 11 | ## Setup 12 | - Follow the setup in [readme](/README.md) 13 | - The stream `anomoly_power` should be running 14 | - By _subscribing_ to the `anomoly_power` topic we can build a notification for significant events 15 | 16 | ## Configure Pushbullet API 17 | 18 | - Get a Push Bullet account 19 | - Install on your phone 20 | - Get an API Token at 21 | https://www.pushbullet.com/#settings/account 22 | - Set this line in `credentials.py` 23 | 24 | ``` 25 | login = { 26 | 'api_token' : 'ITISASECRET' 27 | } 28 | ``` 29 | 30 | ## Run Python Subscriber 31 | 32 | ``` 33 | docker-compose exec kafka-notifier python /scripts/python/kafka_notifier.py 34 | ``` 35 | 36 | ![Notification](/docs/notif.png) 37 | 38 | -------------------------------------------------------------------------------- /docs/java.md: -------------------------------------------------------------------------------- 1 | # Stream Smarts - Personal Data Anomoly Detection 2 | 3 | 4 | 5 | | [Overview](/README.md) | Java | [Notification](/docs/notification.md) |[Random Notes](/docs/notes.md) | 6 | |---|----|-----|-----| 7 | 8 | 9 | 10 | # Java Overview 11 | 12 | # Build and deploy KSQL User Defined Anomoly Functions 13 | 14 | ## Compile Code to Create Anomoly Functions 15 | ``` 16 | cd java 17 | mvn clean package 18 | ls target/ksql-udf-iot-1.0.jar 19 | ``` 20 | 21 | ## Deploy KSQL User Defined Functions 22 | 23 | ``` 24 | # build ksql-udf-iot.jar as above 25 | ls -l ext/ksql-udf-iot.jar 26 | 27 | # stop KSQL server 28 | docker-compose stop ksql-server 29 | 30 | # Replace the UDF ksql-udf-iot.jar 31 | rm ext/ksql-udf-iot.jar 32 | cp java/out/artifacts/ksql-udf-iot.jar ext 33 | 34 | # Restrt KSQL server 35 | docker-compose start ksql-server 36 | ``` 37 | 38 | 39 | ## Check KSQL User Defined Functions Available 40 | 41 | Start `ksql` client and verify 42 | 43 | ``` 44 | ksql> list functions; 45 | 46 | Function Name | Type 47 | ------------------------------------- 48 | . . . 49 | ANOMOLY_LOCATION | SCALAR 50 | ANOMOLY_POWER | SCALAR <--- You need this one 51 | ANOMOLY_WATER | SCALAR 52 | ``` 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /scripts/python/kafka_notifier.py: -------------------------------------------------------------------------------- 1 | #!/bin/python 2 | # Heavily borrowing from https://www.confluent.io/blog/real-time-syslog-processing-with-apache-kafka-and-ksql-part-2-event-driven-alerting-with-slack/ 3 | # rmoff / 05 Apr 2018 4 | 5 | from confluent_kafka import Consumer, KafkaError 6 | from pushbullet import Pushbullet 7 | import json 8 | import requests 9 | 10 | 11 | # API keys held in a non-commited file 12 | import credentials 13 | 14 | # Subscribe to ANOMOLY_POWER topic 15 | settings = { 16 | 'bootstrap.servers': 'kafka:29092', 17 | 'group.id': 'python_pushbullet', 18 | 'default.topic.config': {'auto.offset.reset': 'largest'} 19 | } 20 | c = Consumer(settings) 21 | c.subscribe(['ANOMOLY_POWER']) 22 | 23 | # Connect to pushbullet service 24 | pb = Pushbullet(credentials.login['pushbullet_api_token']) 25 | 26 | # Poll for messages; and extract JSON and call pushbullet for any messages 27 | while True: 28 | msg = c.poll() 29 | if msg.error(): 30 | if msg.error().code() == KafkaError._PARTITION_EOF: 31 | continue 32 | else: 33 | print(msg.error()) 34 | break 35 | 36 | app_json_msg = json.loads(msg.value().decode('utf-8')) 37 | 38 | # Send a push notification to phone via push-bullet 39 | push = pb.push_note('Unusual power usage of {:.0f} Wh at {:.0f}:00. Please check!'.format( app_json_msg['KWH'] 40 | , app_json_msg['HOUR']), 'Full message: {}'.format( app_json_msg)) 41 | 42 | # Notifiy GoogleHome via Hass.io - Home Assistant 43 | # url = 'http://192.168.1.195:8123/api/services/tts/google_say?api_password={}'.format(credentials.login['hassio_password']) 44 | # data = '{"entity_id": "media_player.office_speaker", "message": "Warning. The power usage is more than expected"}' 45 | # response = requests.post(url, data=data) 46 | 47 | 48 | c.close() 49 | -------------------------------------------------------------------------------- /java/src/main/java/com/vsimon/kafka/streams/AnomolyPower.java: -------------------------------------------------------------------------------- 1 | package com.vsimon.kafka.streams; 2 | 3 | import io.confluent.ksql.function.udf.Udf; 4 | import io.confluent.ksql.function.udf.UdfDescription; 5 | 6 | @UdfDescription(name = "anomoly_power", description = "Return anomility score from 0..1 from expected value") 7 | 8 | public class AnomolyPower { 9 | 10 | 11 | @Udf(description = "Return anomility score from 0..1 from expected value") 12 | public double anomoly_power(final double hr, final double mwh) { 13 | 14 | double ret = -1; 15 | 16 | if (hr==0) { ret = (mwh-110) / (464-110); } 17 | if (hr==1) { ret = (mwh-112) / (346-112); } 18 | if (hr==2) { ret = (mwh-109) / (390-109); } 19 | if (hr==3) { ret = (mwh-114) / (361-114); } 20 | if (hr==4) { ret = (mwh-108) / (363-108); } 21 | if (hr==5) { ret = (mwh-110) / (348-110); } 22 | if (hr==6) { ret = (mwh-111) / (1954-111); } 23 | if (hr==7) { ret = (mwh-108) / (2689-108); } 24 | if (hr==8) { ret = (mwh-107) / (2607-107); } 25 | if (hr==9) { ret = (mwh-111) / (2120-111); } 26 | if (hr==10) { ret = (mwh-106) / (2032-106); } 27 | if (hr==11) { ret = (mwh-107) / (2272-107); } 28 | if (hr==12) { ret = (mwh-108) / (1639-108); } 29 | if (hr==13) { ret = (mwh-108) / (1333-108); } 30 | if (hr==14) { ret = (mwh-109) / (1555-109); } 31 | if (hr==15) { ret = (mwh-108) / (1458-108); } 32 | if (hr==16) { ret = (mwh-108) / (2371-108); } 33 | if (hr==17) { ret = (mwh-108) / (2716-108); } 34 | if (hr==18) { ret = (mwh-114) / (3296-114); } 35 | if (hr==19) { ret = (mwh-109) / (3380-109); } 36 | if (hr==20) { ret = (mwh-108) / (3745-108); } 37 | if (hr==21) { ret = (mwh-110) / (3381-110); } 38 | if (hr==22) { ret = (mwh-107) / (2463-107); } 39 | if (hr==23) { ret = (mwh-112) / (566-112); } 40 | 41 | return ret; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /java/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 13 | 14 | 17 | 4.0.0 18 | 19 | com.vsimon 20 | ksql-udf-iot 21 | 1.0 22 | 23 | 24 | 25 | confluent 26 | http://packages.confluent.io/maven/ 27 | 28 | 29 | 30 | 31 | 32 | 33 | com.vsimon.kafka.streams.AnomolyPower 34 | 1.8 35 | 2.0.0 36 | 2.11 37 | ${kafka.scala.version}.8 38 | 5.0.0 39 | UTF-8 40 | 41 | 42 | 43 | 44 | 45 | io.confluent.ksql 46 | ksql-udf 47 | 5.0.0 48 | 49 | 50 | 51 | 52 | 53 | 54 | org.apache.maven.plugins 55 | maven-compiler-plugin 56 | 3.6.1 57 | 58 | 1.8 59 | 1.8 60 | 61 | 62 | 63 | 64 | 65 | org.apache.maven.plugins 66 | maven-assembly-plugin 67 | 2.5.2 68 | 69 | 70 | jar-with-dependencies 71 | 72 | 73 | 74 | true 75 | ${exec.mainClass} 76 | 77 | 78 | 79 | 80 | 81 | assemble-all 82 | package 83 | 84 | single 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /docs/notes.md: -------------------------------------------------------------------------------- 1 | | [Overview](/README.md) | [Java](/docs/java.md) | [Notification](/docs/notification.md) |Random Notes | 2 | |---|----|----|-----| 3 | 4 | 5 | # Python 6 | ``` 7 | docker-compose exec schema-registry bash 8 | 9 | pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --index-url=https://pypi.org/simple/ confluent-kafka 10 | pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --index-url=https://pypi.org/simple/ pushbullet.py 11 | 12 | echo '{"f1": "value1"}' | kafka-console-producer --broker-list localhost:9092 --topic TEST 13 | ``` 14 | 15 | 16 | # CLI 17 | ``` 18 | echo $PS1 19 | \[\033[36m\]\u:\[\033[33;1m\]\W\[\033[m\]\$ 20 | ``` 21 | 22 | # KSQL 23 | ``` 24 | SET 'auto.offset.reset' = 'earliest'; 25 | create stream raw_power with (kafka_topic='raw_mwh', value_format='avro'); 26 | 27 | create stream power_stream_rekeyed as select rowtime, hour, mwh, anomoly_power(hour, mwh) as fn from raw_power partition by rowtime; 28 | 29 | select timestamptostring(rowtime, 'yyyy-MM-dd HH:mm:ss'), rowtime as event_ts, hour, mwh, fn from power_stream_rekeyed2 where anomoly_power(hour, mwh)>1.0; 30 | 31 | 32 | create stream anomoly_power with (value_format='JSON') as select rowtime as event_ts, hour, mwh, fn from power_stream_rekeyed where anomoly_power(hour, mwh)>1.0; 33 | 34 | 35 | 36 | run script 'ksql_commands.ksql'; 37 | 38 | ``` 39 | 40 | # Docker build 41 | ``` 42 | WARNING: Image for service kafka-notifier was built because it did not already exist. To rebuild this image you must use `docker-compose build` or `docker-compose up --build`. 43 | ``` 44 | 45 | # Git - don't checkin secrets 46 | ``` 47 | git update-index --assume-unchanged scripts/python/credentials.py 48 | ``` 49 | 50 | # Function Results 51 | 52 | ``` 53 | 54 | 2018-09-08 04:51:49 | 4.0 | 100.0 | -0.03137254901960784 55 | 2018-09-08 04:51:49 | 4.0 | 1500.0 | 5.458823529411765 56 | 2018-09-08 04:51:49 | 4.0 | 3000.0 | 11.341176470588236 57 | 58 | 2018-09-08 04:51:49 | 9.0 | 100.0 | -0.0054753608760577405 59 | 2018-09-08 04:51:49 | 9.0 | 1500.0 | 0.6913887506222001 60 | 2018-09-08 04:51:49 | 9.0 | 3000.0 | 1.438028870084619 61 | 62 | 2018-09-08 04:51:49 | 20.0 | 100.0 | -0.0021996150673632116 63 | 2018-09-08 04:51:49 | 20.0 | 1500.0 | 0.3827330217211988 64 | 2018-09-08 04:51:49 | 20.0 | 3000.0 | 0.7951608468518009 65 | 66 | 67 | {"hour": 4, "kwh": 100} 68 | {"hour": 9, "kwh": 100} 69 | {"hour": 20, "kwh": 100} 70 | {"hour": 4, "kwh": 1500} 71 | {"hour": 9, "kwh": 1500} 72 | {"hour": 20, "kwh": 1500} 73 | {"hour": 4, "kwh": 3000} 74 | {"hour": 9, "kwh": 3000} 75 | {"hour": 20, "kwh": 3000} 76 | 77 | ``` 78 | 79 | 80 | ## Other Data Loading Examples 81 | 82 | Jump _into_ the `schema-registry` container 83 | 84 | ``` 85 | docker-compose exec schema-registry bash 86 | 87 | 88 | # Note, you are navigating within the container 89 | cd /scripts 90 | 91 | 92 | # Load a day worth of data 93 | cat mwh_20180805.json | ./read_mwh kafka:29092 94 | 95 | # Or pass in slowly like this 96 | cat mwh_20180805.json | while read line; do echo $line; sleep 1; done | ./read_mwh kafka:29092 97 | 98 | # Now exit 99 | exit 100 | ``` 101 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: '3' 3 | # Example based on https://github.com/confluentinc/demo-scene/blob/master/mysql-debezium-ksql-elasticsearch/docker-compose/docker-compose.yml 4 | 5 | services: 6 | zookeeper: 7 | image: "confluentinc/cp-zookeeper:${CONF_VER}" 8 | ports: 9 | - 2181:2181 10 | environment: 11 | ZOOKEEPER_CLIENT_PORT: 2181 12 | ZOOKEEPER_TICK_TIME: 2000 13 | 14 | kafka: 15 | image: "confluentinc/cp-enterprise-kafka:${CONF_VER}" 16 | depends_on: 17 | - zookeeper 18 | volumes: 19 | - ./scripts:/scripts 20 | ports: 21 | # Exposes 9092 for external connections to the broker 22 | # Use kafka:29092 for connections internal on the docker network 23 | # See https://rmoff.net/2018/08/02/kafka-listeners-explained/ for details 24 | - '9092:9092' 25 | environment: 26 | KAFKA_BROKER_ID: 1 27 | KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181 28 | KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT,PLAINTEXT_HOST:PLAINTEXT 29 | KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT 30 | KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://kafka:29092,PLAINTEXT_HOST://localhost:9092 31 | KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true" 32 | KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1 33 | KAFKA_GROUP_INITIAL_REBALANCE_DELAY_MS: 100 34 | CONFLUENT_METRICS_ENABLE: 'false' 35 | 36 | schema-registry: 37 | image: "confluentinc/cp-schema-registry:${CONF_VER}" 38 | ports: 39 | - 8081:8081 40 | depends_on: 41 | - zookeeper 42 | - kafka 43 | volumes: 44 | - ./scripts:/scripts 45 | environment: 46 | SCHEMA_REGISTRY_HOST_NAME: schema-registry 47 | SCHEMA_REGISTRY_KAFKASTORE_CONNECTION_URL: zookeeper:2181 48 | SCHEMA_REGISTRY_LOG4J_ROOT_LOGLEVEL: "WARN" 49 | 50 | # kafka-connect: 51 | # image: confluentinc/cp-kafka-connect:${CONF_VER} 52 | # depends_on: 53 | # - zookeeper 54 | # - kafka 55 | # - schema-registry 56 | # ports: 57 | # - '8083:8083' 58 | # environment: 59 | # CONNECT_BOOTSTRAP_SERVERS: "kafka:29092" 60 | # CONNECT_REST_PORT: 8083 61 | # CONNECT_GROUP_ID: compose-connect-group 62 | # CONNECT_CONFIG_STORAGE_TOPIC: docker-connect-configs 63 | # CONNECT_OFFSET_STORAGE_TOPIC: docker-connect-offsets 64 | # CONNECT_STATUS_STORAGE_TOPIC: docker-connect-status 65 | # CONNECT_KEY_CONVERTER: io.confluent.connect.avro.AvroConverter 66 | # CONNECT_KEY_CONVERTER_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081' 67 | # CONNECT_VALUE_CONVERTER: io.confluent.connect.avro.AvroConverter 68 | # CONNECT_VALUE_CONVERTER_SCHEMA_REGISTRY_URL: 'http://schema-registry:8081' 69 | # CONNECT_INTERNAL_KEY_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 70 | # CONNECT_INTERNAL_VALUE_CONVERTER: "org.apache.kafka.connect.json.JsonConverter" 71 | # CONNECT_REST_ADVERTISED_HOST_NAME: "kafka-connect" 72 | 73 | 74 | 75 | 76 | ksql-server: 77 | image: confluentinc/cp-ksql-server:${CONF_VER} 78 | ports: 79 | - '8088:8088' 80 | depends_on: 81 | - kafka 82 | - schema-registry 83 | environment: 84 | KSQL_BOOTSTRAP_SERVERS: kafka:29092 85 | KSQL_LISTENERS: http://0.0.0.0:8088 86 | KSQL_KSQL_SCHEMA_REGISTRY_URL: http://schema-registry:8081 87 | KSQL_KSQL_SERVICE_ID: "cld-ksql-server" 88 | KSQL_KSQL_EXTENSION_DIR: "/etc/ksql/ext" 89 | volumes: 90 | - ./ext:/etc/ksql/ext 91 | - ./scripts:/scripts 92 | 93 | ksql-cli: 94 | image: confluentinc/cp-ksql-cli:5.0.0 95 | depends_on: 96 | - ksql-server 97 | volumes: 98 | - ./scripts:/scripts 99 | entrypoint: /bin/sh 100 | tty: true 101 | 102 | kafka-notifier: 103 | image: 'simonaubury/kafka-notifier:latest' 104 | build: 105 | context: docker/ 106 | hostname: kafka-notifier 107 | volumes: 108 | - ./scripts:/scripts 109 | extra_hosts: 110 | - "moby:127.0.0.1" 111 | 112 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Stream Smarts - Personal Data Anomoly Detection 2 | 3 | 4 | 5 | | Overview | [Java](/docs/java.md) | [Notification](/docs/notification.md) |[Random Notes](/docs/notes.md) | 6 | |---|----|----|-----| 7 | 8 | ## Architectural overview 9 | 10 | Continuous home power consumption monitoring .. with anomaly detection. 11 | 12 | ![Architecture](/docs/smarts.png) 13 | 14 | Alerting using deep learning function for Apache Kafka's KSQL 15 | 16 | # Inspiration 17 | https://github.com/kaiwaehner/ksql-udf-deep-learning-mqtt-iot 18 | 19 | ## Prerequisites & setup 20 | - clone this repo! 21 | - install docker/docker-compose 22 | - set your Docker maximum memory to something really big, such as 10GB. (preferences -> advanced -> memory) 23 | 24 | 25 | ## Startup 26 | ``` 27 | # This will take a while the first time 28 | docker-compose up -d 29 | ``` 30 | 31 | 32 | ## Data Loading 33 | 34 | Jump _into_ the `schema-registry` container 35 | 36 | ``` 37 | docker-compose exec schema-registry bash 38 | 39 | # Note, you are navigating within the container 40 | cd /scripts 41 | 42 | # Load demonstration records 43 | echo '{"hour": 9, "kwh": 1500}' | ./read_power kafka:29092 44 | 45 | # Now exit 46 | exit 47 | ``` 48 | 49 | ## Running KSQL CLI 50 | To connect to KSQL CLI 51 | ``` 52 | docker-compose exec ksql-cli ksql "http://ksql-server:8088" 53 | 54 | =========================================== 55 | = _ __ _____ ____ _ = 56 | = | |/ // ____|/ __ \| | = 57 | = | ' /| (___ | | | | | = 58 | = | < \___ \| | | | | = 59 | = | . \ ____) | |__| | |____ = 60 | = |_|\_\_____/ \___\_\______| = 61 | = = 62 | = Streaming SQL Engine for Apache Kafka® = 63 | =========================================== 64 | 65 | Copyright 2017-2018 Confluent Inc. 66 | 67 | CLI v5.0.0, Server v5.0.0 located at http://ksql-server:8088 68 | ``` 69 | 70 | And try something like 71 | ``` 72 | ksql> show topics; 73 | ``` 74 | 75 | 76 | # Build and deploy KSQL User Defined Anomoly Functions 77 | 78 | - For this quickstart you'll find the Java class `ksql-udf-iot.jar` is already in the `ext` directory; and the ksql-server should have loaded it 79 | - *Optional* : if you want to build your own `ksql-udf-iot.jar` and deploy follow the [Java Steps](/docs/java.md) 80 | - Run these ksql commands to check you can see the UDF `ANOMOLY_POWER` 81 | ``` 82 | ksql> list functions; 83 | 84 | Function Name | Type 85 | ------------------------------------- 86 | . . . 87 | ANOMOLY_LOCATION | SCALAR 88 | ANOMOLY_POWER | SCALAR <--- Has been loaded from ksql-udf-iot.jar 89 | ANOMOLY_WATER | SCALAR 90 | ``` 91 | 92 | 93 | 94 | 95 | 96 | ## Setting up streams 97 | 98 | - Now we will create the streams 99 | ``` 100 | SET 'auto.offset.reset' = 'earliest'; 101 | 102 | create stream raw_power_stream with (kafka_topic='raw_power', value_format='avro'); 103 | 104 | create stream power_stream_rekeyed as \ 105 | select rowtime, hour, kwh, anomoly_power(hour, kwh) as fn \ 106 | from raw_power_stream partition by rowtime; 107 | 108 | select timestamptostring(rowtime, 'yyyy-MM-dd HH:mm:ss'), hour, kwh, fn \ 109 | from power_stream_rekeyed limit 1; 110 | 111 | 2018-09-08 11:34:37 | 9.0 | 1500.0 | 0.6913887506222001 112 | Limit Reached 113 | Query terminated 114 | 115 | create stream anomoly_power with (value_format='JSON') as \ 116 | select rowtime as event_ts, hour, kwh, fn \ 117 | from power_stream_rekeyed where fn>1.0; 118 | ``` 119 | 120 | - The stream `anomoly_power` is now running. It will only produce records for significant events. By _subscribing_ to the `anomoly_power` topic we can build a notification for significant events 121 | 122 | ## View 123 | - In the ksql window, start a query like this looking for anomoly records. Keep this query running 124 | ``` 125 | select * from anomoly_power; 126 | ``` 127 | 128 | - In *another* session 129 | ``` 130 | docker-compose exec schema-registry bash 131 | 132 | # Note, you are navigating within the container 133 | cd /scripts 134 | 135 | # Load a non-anomoly record 136 | # Note that no records are returned in the other terminal ksql query 137 | echo '{"hour": 9, "kwh": 1500}' | ./read_power kafka:29092 138 | 139 | # Load an anomoly record 140 | # Note that a record should be returned in the other terminal ksql query 141 | echo '{"hour": 4, "kwh": 1500}' | ./read_power kafka:29092 142 | 143 | # Now exit 144 | exit 145 | ``` 146 | 147 | ## What did we see? 148 | - Running a query using the `ANOMOLY_POWER` function allowed a predifined model to be used within a KSQL query 149 | - The stream `anomoly_power` stream created a topic which only had events that breached the limit of the model 150 | - By _subscribing_ to the `anomoly_power` we can build a notification for significant events 151 | 152 | ## Build a notifier 153 | - If you are keen - go on to [Notification](/docs/notification.md) 154 | 155 | ## Shutdown and cleanup 156 | ``` 157 | docker-compose down 158 | ``` 159 | 160 | 161 | 162 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------