├── custom_components └── google_vision │ ├── __init__.py │ ├── manifest.json │ └── image_processing.py ├── development ├── usage.png ├── detail.png ├── family.jpg └── test-image3.jpg ├── .gitignore ├── LICENSE └── README.md /custom_components/google_vision/__init__.py: -------------------------------------------------------------------------------- 1 | """The Google Vision component.""" 2 | -------------------------------------------------------------------------------- /development/usage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robmarkcole/HASS-Google-Vision/HEAD/development/usage.png -------------------------------------------------------------------------------- /development/detail.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robmarkcole/HASS-Google-Vision/HEAD/development/detail.png -------------------------------------------------------------------------------- /development/family.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robmarkcole/HASS-Google-Vision/HEAD/development/family.jpg -------------------------------------------------------------------------------- /development/test-image3.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/robmarkcole/HASS-Google-Vision/HEAD/development/test-image3.jpg -------------------------------------------------------------------------------- /custom_components/google_vision/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "domain": "google_vision", 3 | "name": "Google Vision object detection", 4 | "documentation": "https://github.com/robmarkcole/HASS-Google-Vision", 5 | "requirements": ["simple-google-vision==0.3"], 6 | "version": "1.0.0", 7 | "dependencies": [], 8 | "codeowners": ["@robmarkcole"] 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build and Release Folders 2 | bin-debug/ 3 | bin-release/ 4 | [Oo]bj/ 5 | [Bb]in/ 6 | 7 | # Other files and folders 8 | .settings/ 9 | 10 | # Executables 11 | *.swf 12 | *.air 13 | *.ipa 14 | *.apk 15 | 16 | # Project files, i.e. `.project`, `.actionScriptProperties` and `.flexProperties` 17 | # should NOT be excluded as they contain compiler settings and other important 18 | # information for Eclipse / Flash Builder. 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Robin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HASS-Google-Vision 2 | [Home Assistant](https://www.home-assistant.io/) custom component for image processing with Google Vision. 3 | 4 | Detect objects in camera feeds using Google Vision. [Upload a photo to try out the processing here.](https://cloud.google.com/vision/)(see the `Objects` tab). Trigger the image processing by calling the `image_processing.scan` service (note default scanning is disabled to prevent unintentionally racking up bills). The component adds an image_processing entity where the state of the entity is the total number of `target` objects that are above a confidence threshold which has a default value of 80%. The time of the last detection of the target object is in the `last_detection` attribute. The type and number of objects (of any confidence) is listed in the `summary` attribute. If `save_file_folder` is configured, on each new detection of the target an annotated image with the name `google_vision_latest_{target}.jpg` is saved if it doesnt already exist, and over-written if it does exist. This image shows the bounding box around detected targets and can be displayed on the Home Assistant front end using a local_file camera (see later in this readme). Use the [foler_watcher](https://www.home-assistant.io/integrations/folder_watcher/) to detect when this file has changed if you want to include it in notifications. An event `image_processing.object_detected` is fired for each object detected and can be used to track multiple object types, for example incrementing a [counter](https://www.home-assistant.io/integrations/counter/), or kicking off an automation. 5 | 6 |

7 | 8 |

9 | 10 |

11 | 12 |

13 | 14 | ## Google Vision API key file & API Pricing 15 | Follow the instructions on https://cloud.google.com/docs/authentication/getting-started to download your API key, which is a `.json` file. Place the file in your Home Assistant config folder. 16 | 17 | [Read pricing](https://cloud.google.com/vision/pricing). The first 1000 calls per month are free, additional calls are charged. Be sure that you understand how the image processing scan_interval works, or risk running up bills. 18 | 19 | ## Home Assistant config 20 | Place the `custom_components` folder in your configuration directory (or add its contents to an existing `custom_components` folder). Add to your Home-Assistant config: 21 | 22 | ```yaml 23 | image_processing: 24 | - platform: google_vision 25 | api_key_file: /config/Google_API_key.json 26 | save_file_folder: /config/www/ 27 | source: 28 | - entity_id: camera.local_file 29 | ``` 30 | 31 | Configuration variables: 32 | - **api_key_file**: the path to your API key file. 33 | - **target**: (optional) The target object class, default `person`. 34 | - **save_file_folder**: (Optional) The folder to save processed images to. Note that folder path should be added to [whitelist_external_dirs](https://www.home-assistant.io/docs/configuration/basic/) 35 | - **confidence**: (Optional) The confidence (in %) above which detected targets are counted in the sensor state. Default value: 80 36 | - **name**: (Optional) A custom name for the the entity. 37 | - **source**: Must be a camera. 38 | 39 | ## Displaying the `google_vision_latest_{target}.jpg` file 40 | It easy to display the `google_vision_latest_{target}.jpg` image with a [local_file](https://www.home-assistant.io/integrations/local_file) camera. An example configuration is: 41 | 42 | ```yaml 43 | camera: 44 | - platform: local_file 45 | file_path: /config/www/google_vision_latest_person.jpg 46 | name: google_vision_latest_person 47 | ``` 48 | 49 | ## Automation to send the `google_vision_latest_{target}.jpg` file 50 | Configure the [folder_watcher](https://www.home-assistant.io/integrations/folder_watcher/) in `configuration.yaml`, e.g.: 51 | 52 | ```yaml 53 | folder_watcher: 54 | - folder: /config/www/ 55 | ``` 56 | Then in `automations.yaml` we will send a photo when `google_vision_latest_{target}.jpg` is modified. Note that I [have included](https://community.home-assistant.io/t/limit-automation-triggering/14915) a couple of delays which disable the automation as the `folder_watcher` events can fire multiple times duing the image saving process: 57 | 58 | ```yaml 59 | - id: '1527837198169' 60 | alias: New detection 61 | trigger: 62 | platform: event 63 | event_type: folder_watcher 64 | event_data: 65 | file : google_vision_latest_person.jpg 66 | action: 67 | - service: automation.turn_off 68 | entity_id: automation.new_detection 69 | ## Make sure file is saved 70 | - delay: 71 | seconds: 1 72 | - service: telegram_bot.send_photo 73 | data: 74 | file: /config/www/google_vision_latest_person.jpg 75 | ## Throttle notifications 76 | - delay: 77 | seconds: 2 78 | - service: automation.turn_on 79 | entity_id: automation.new_detection 80 | ``` 81 | 82 | #### Event `image_processing.object_detected` 83 | An event `image_processing.object_detected` is fired for each object detected above the configured `confidence` threshold. This is the recommended way to check the confidence of detections, and to keep track of objects that are not configured as the `target` (configure logger level to `debug` to observe events in the Home Assistant logs). An example use case for event is to get an alert when some rarely appearing object is detected, or to increment a [counter](https://www.home-assistant.io/components/counter/). The `image_processing.object_detected` event payload includes: 84 | 85 | - `entity_id` : the entity id responsible for the event 86 | - `object` : the object detected 87 | - `confidence` : the confidence in detection in the range 0 - 1 where 1 is 100% confidence. 88 | 89 | An example automation using the `image_processing.object_detected` event is given below: 90 | 91 | ```yaml 92 | - action: 93 | - data_template: 94 | title: "New object detection" 95 | message: "{{ trigger.event.data.object }} with confidence {{ trigger.event.data.confidence }}" 96 | service: telegram_bot.send_message 97 | alias: Object detection automation 98 | condition: [] 99 | id: '1120092824622' 100 | trigger: 101 | - platform: event 102 | event_type: image_processing.object_detected 103 | event_data: 104 | object: person 105 | ``` -------------------------------------------------------------------------------- /custom_components/google_vision/image_processing.py: -------------------------------------------------------------------------------- 1 | """ 2 | Perform image processing with Google Vision 3 | """ 4 | import base64 5 | import json 6 | import logging 7 | import time 8 | import io 9 | import os 10 | from datetime import timedelta 11 | from typing import Union, List, Set, Dict 12 | 13 | from PIL import Image, ImageDraw 14 | 15 | import voluptuous as vol 16 | 17 | import gvision.core as gv 18 | 19 | from homeassistant.util.pil import draw_box 20 | import homeassistant.util.dt as dt_util 21 | from homeassistant.const import ATTR_ENTITY_ID 22 | from homeassistant.core import split_entity_id 23 | import homeassistant.helpers.config_validation as cv 24 | from homeassistant.components.image_processing import ( 25 | PLATFORM_SCHEMA, 26 | ImageProcessingEntity, 27 | ATTR_CONFIDENCE, 28 | CONF_SOURCE, 29 | CONF_ENTITY_ID, 30 | CONF_NAME, 31 | ) 32 | 33 | 34 | _LOGGER = logging.getLogger(__name__) 35 | 36 | SCAN_INTERVAL = timedelta(days=365) # Effectively disable scan. 37 | 38 | CONF_API_KEY_FILE = "api_key_file" 39 | CONF_SAVE_FILE_FOLDER = "save_file_folder" 40 | CONF_TARGET = "target" 41 | DEFAULT_TARGET = "person" 42 | EVENT_OBJECT_DETECTED = "image_processing.object_detected" 43 | EVENT_FILE_SAVED = "image_processing.file_saved" 44 | FILE = "file" 45 | OBJECT = "object" 46 | 47 | PLATFORM_SCHEMA = PLATFORM_SCHEMA.extend( 48 | { 49 | vol.Required(CONF_API_KEY_FILE): cv.string, 50 | vol.Optional(CONF_TARGET, default=DEFAULT_TARGET): cv.string, 51 | vol.Optional(CONF_SAVE_FILE_FOLDER): cv.isdir, 52 | } 53 | ) 54 | 55 | 56 | def setup_platform(hass, config, add_devices, discovery_info=None): 57 | """Set up platform.""" 58 | 59 | save_file_folder = config.get(CONF_SAVE_FILE_FOLDER) 60 | if save_file_folder: 61 | save_file_folder = os.path.join(save_file_folder, "") # If no trailing / add it 62 | 63 | entities = [] 64 | for camera in config[CONF_SOURCE]: 65 | entities.append( 66 | Gvision( 67 | config.get(CONF_TARGET), 68 | config.get(CONF_API_KEY_FILE), 69 | config.get(ATTR_CONFIDENCE), 70 | save_file_folder, 71 | camera[CONF_ENTITY_ID], 72 | camera.get(CONF_NAME), 73 | ) 74 | ) 75 | add_devices(entities) 76 | 77 | 78 | class Gvision(ImageProcessingEntity): 79 | """Perform object recognition with Google Vision.""" 80 | 81 | def __init__( 82 | self, target, api_key_file, confidence, save_file_folder, camera_entity, name=None 83 | ): 84 | """Init with the client.""" 85 | self._target = target 86 | self._api = gv.Vision(api_key_file) 87 | self._confidence = confidence # the confidence threshold 88 | if name: # Since name is optional. 89 | self._name = name 90 | else: 91 | entity_name = split_entity_id(camera_entity)[1] 92 | self._name = "{} {} {}".format("google vision", target, entity_name) 93 | self._camera_entity = camera_entity 94 | self._state = None # The number of instances of interest 95 | self._summary = {} 96 | self._last_detection = None 97 | if save_file_folder: 98 | self._save_file_folder = save_file_folder 99 | 100 | def process_image(self, image): 101 | """Process an image.""" 102 | self._state = None 103 | self._summary = {} 104 | 105 | response = self._api.object_localization(image) 106 | objects = response.localized_object_annotations 107 | 108 | if not len(objects) > 0: 109 | return 110 | 111 | self._state = len(gv.get_object_confidences(objects, self._target)) 112 | self._summary = gv.get_objects_summary(objects) 113 | self.fire_object_detected_events(objects, self._confidence) 114 | 115 | if self._state > 0: 116 | self._last_detection = dt_util.now() 117 | 118 | if hasattr(self, "_save_file_folder") and self._state > 0: 119 | self.save_image(image, objects, self._target, self._save_file_folder) 120 | 121 | def save_image(self, image, objects, target, directory): 122 | """Save a timestamped image with bounding boxes around targets.""" 123 | 124 | img = Image.open(io.BytesIO(bytearray(image))).convert("RGB") 125 | draw = ImageDraw.Draw(img) 126 | 127 | for obj in objects: 128 | obj_confidence = gv.format_confidence(obj.score) 129 | if obj_confidence > self._confidence: 130 | if obj.name.lower() == target and obj_confidence >= self._confidence: 131 | box = gv.get_box(obj.bounding_poly.normalized_vertices) 132 | draw_box(draw, box, img.width, img.height) 133 | 134 | latest_save_path = directory + "google_vision_latest_{}.jpg".format(target) 135 | img.save(latest_save_path) 136 | 137 | def fire_object_detected_events(self, objects, confidence_threshold): 138 | """Fire event if detection above confidence threshold.""" 139 | 140 | for obj in objects: 141 | obj_confidence = gv.format_confidence(obj.score) 142 | if obj_confidence > confidence_threshold: 143 | self.hass.bus.fire( 144 | EVENT_OBJECT_DETECTED, 145 | { 146 | ATTR_ENTITY_ID: self.entity_id, 147 | OBJECT: obj.name.lower(), 148 | ATTR_CONFIDENCE: obj_confidence, 149 | }, 150 | ) 151 | 152 | @property 153 | def camera_entity(self): 154 | """Return camera entity id from process pictures.""" 155 | return self._camera_entity 156 | 157 | @property 158 | def state(self): 159 | """Return the state of the entity.""" 160 | return self._state 161 | 162 | @property 163 | def name(self): 164 | """Return the name of the sensor.""" 165 | return self._name 166 | 167 | @property 168 | def unit_of_measurement(self): 169 | """Return the unit of measurement.""" 170 | target = self._target 171 | if self._state != None and self._state > 1: 172 | target += "s" 173 | return target 174 | 175 | @property 176 | def device_state_attributes(self): 177 | """Return device specific state attributes.""" 178 | attr = {} 179 | attr["target"] = self._target 180 | attr["summary"] = self._summary 181 | if self._last_detection: 182 | attr[ 183 | "last_{}_detection".format(self._target) 184 | ] = self._last_detection.strftime("%Y-%m-%d %H:%M:%S") 185 | return attr 186 | --------------------------------------------------------------------------------