├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── pull_request_template.md └── workflows │ └── publish-comfyui-registry.yml ├── .gitignore ├── README.md ├── __init__.py ├── examples ├── Moondream Gaze Detection Image.json └── Moondream Gaze Detection Video.json ├── modules └── inferencer │ ├── moondream_inferencer.py │ └── pyvips_dll_handler.py ├── nodes.py ├── pyproject.toml └── requirements.txt /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: bug 6 | assignees: 'jhj0517' 7 | --- 8 | 9 | **Which OS are you using?** 10 | - OS: [e.g. Linux or Windows] 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Any feature you want 4 | title: '' 5 | labels: enhancement 6 | assignees: 'jhj0517' 7 | --- 8 | 9 | **Describe feature you want** 10 | 11 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Related issues / PRs. 2 | - # 3 | 4 | ## Summarize Changes 5 | 1. 6 | -------------------------------------------------------------------------------- /.github/workflows/publish-comfyui-registry.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | 5 | push: 6 | branches: 7 | - main 8 | - master 9 | paths: 10 | - "pyproject.toml" 11 | 12 | jobs: 13 | publish-node: 14 | name: Publish Custom Node to registry 15 | runs-on: ubuntu-latest 16 | # if this is a forked repository. Skipping the workflow. 17 | if: github.event.repository.fork == false 18 | steps: 19 | - name: Check out code 20 | uses: actions/checkout@v4 21 | - name: Publish Custom Node 22 | uses: Comfy-Org/publish-node-action@main 23 | with: 24 | ## Add your own personal access token to your Github Repository secrets and reference it here. 25 | ## Get your personal access token from https://docs.comfy.org/registry/publishing#create-an-api-key-for-publishing 26 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.idea/ 2 | /venv/ 3 | /__pycache__/ 4 | models/ 5 | *.png 6 | *.jpg 7 | *.webp 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI Moondream Gaze Detection 2 | 3 | This is the [ComfyUI](https://github.com/comfyanonymous/ComfyUI) custom node for [Moondream](https://github.com/vikhyat/moondream)'s [gaze detection feature](https://huggingface.co/spaces/moondream/gaze-demo). 4 | 5 | 6 | 7 | https://github.com/user-attachments/assets/58ff74d8-5ab3-4269-b335-fbe3d38062fd 8 | 9 | 10 | 11 | 12 | 13 | ## Installation 14 | 15 | 1. Place this repository into `ComfyUI\custom_nodes\` 16 | ``` 17 | git clone https://github.com/jhj0517/ComfyUI-Moondream-Gaze-Detection.git 18 | ``` 19 | 20 | 2. Go to `ComfyUI\custom_nodes\ComfyUI-Moondream-Gaze-Detection` and run 21 | ``` 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | If you are using the portable version of ComfyUI, do this: 26 | ``` 27 | python_embeded\python.exe -m pip install -r ComfyUI\custom_nodes\ComfyUI-Moondream-Gaze-Detection\requirements.txt 28 | ``` 29 | 30 | ## Workflows 31 | Example workflows that generate from single image and video can be found in the [examples/](https://github.com/jhj0517/ComfyUI-Moondream-Gaze-Detection/tree/master/examples) directory. 32 | 33 | ## Models 34 | 35 | Models are automatically downloaded from: 36 | https://huggingface.co/vikhyatk/moondream2/tree/main 37 | 38 | To the path of your `ComfyUI/models/moondream`. 39 | 40 | ### VRAM Usage 41 | Peak VRAM for the model was 6GB on my end. 42 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes import * 2 | 3 | 4 | # Map all your custom nodes classes with the names that will be displayed in the UI. 5 | NODE_CLASS_MAPPINGS = { 6 | "(Down)Load Moondream Model": MoondreamModelLoader, 7 | "Gaze Detection": GazeDetection, 8 | "Gaze Detection Video": GazeDetectionVideo 9 | } 10 | 11 | 12 | __all__ = ['NODE_CLASS_MAPPINGS'] 13 | -------------------------------------------------------------------------------- /examples/Moondream Gaze Detection Image.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 21, 3 | "last_link_id": 14, 4 | "nodes": [ 5 | { 6 | "id": 13, 7 | "type": "(Down)Load Moondream Model", 8 | "pos": [ 9 | -104.5158920288086, 10 | -324.65252685546875 11 | ], 12 | "size": [ 13 | 327.5999755859375, 14 | 58 15 | ], 16 | "flags": {}, 17 | "order": 0, 18 | "mode": 0, 19 | "inputs": [], 20 | "outputs": [ 21 | { 22 | "name": "model", 23 | "type": "MOONDREAM_MODEL", 24 | "links": [ 25 | 12 26 | ], 27 | "slot_index": 0 28 | } 29 | ], 30 | "properties": { 31 | "Node name for S&R": "(Down)Load Moondream Model" 32 | }, 33 | "widgets_values": [ 34 | "cuda" 35 | ] 36 | }, 37 | { 38 | "id": 20, 39 | "type": "LoadImage", 40 | "pos": [ 41 | -98.14690399169922, 42 | -209.35580444335938 43 | ], 44 | "size": [ 45 | 315, 46 | 314 47 | ], 48 | "flags": {}, 49 | "order": 1, 50 | "mode": 0, 51 | "inputs": [], 52 | "outputs": [ 53 | { 54 | "name": "IMAGE", 55 | "type": "IMAGE", 56 | "links": [ 57 | 14 58 | ], 59 | "slot_index": 0 60 | }, 61 | { 62 | "name": "MASK", 63 | "type": "MASK", 64 | "links": null 65 | } 66 | ], 67 | "properties": { 68 | "Node name for S&R": "LoadImage" 69 | }, 70 | "widgets_values": [ 71 | "ComfyUI_00448_.png", 72 | "image" 73 | ] 74 | }, 75 | { 76 | "id": 19, 77 | "type": "Gaze Detection", 78 | "pos": [ 79 | 280.18670654296875, 80 | -327.49951171875 81 | ], 82 | "size": [ 83 | 315, 84 | 78 85 | ], 86 | "flags": {}, 87 | "order": 3, 88 | "mode": 0, 89 | "inputs": [ 90 | { 91 | "name": "model", 92 | "type": "MOONDREAM_MODEL", 93 | "link": 12 94 | }, 95 | { 96 | "name": "image", 97 | "type": "IMAGE", 98 | "link": 14 99 | } 100 | ], 101 | "outputs": [ 102 | { 103 | "name": "image", 104 | "type": "IMAGE", 105 | "links": [ 106 | 13 107 | ], 108 | "slot_index": 0 109 | } 110 | ], 111 | "properties": { 112 | "Node name for S&R": "Gaze Detection" 113 | }, 114 | "widgets_values": [ 115 | false 116 | ] 117 | }, 118 | { 119 | "id": 21, 120 | "type": "PreviewImage", 121 | "pos": [ 122 | 655.6904296875, 123 | -326.3870849609375 124 | ], 125 | "size": [ 126 | 210, 127 | 26 128 | ], 129 | "flags": {}, 130 | "order": 4, 131 | "mode": 0, 132 | "inputs": [ 133 | { 134 | "name": "images", 135 | "type": "IMAGE", 136 | "link": 13 137 | } 138 | ], 139 | "outputs": [], 140 | "properties": { 141 | "Node name for S&R": "PreviewImage" 142 | } 143 | }, 144 | { 145 | "id": 9, 146 | "type": "Note", 147 | "pos": [ 148 | -171.00161743164062, 149 | -515.4930419921875 150 | ], 151 | "size": [ 152 | 395.0805969238281, 153 | 121.18170928955078 154 | ], 155 | "flags": {}, 156 | "order": 2, 157 | "mode": 0, 158 | "inputs": [], 159 | "outputs": [], 160 | "properties": {}, 161 | "widgets_values": [ 162 | "(Down)Load Moondream Model node will automatically download model from:\nhttps://huggingface.co/vikhyatk/moondream2/tree/main\n\nTo your \"ComfyUI/models/moondream\" path.\n\nDownloading the model for the first time will take some time. ( about 5 minutes )" 163 | ], 164 | "color": "#432", 165 | "bgcolor": "#653" 166 | } 167 | ], 168 | "links": [ 169 | [ 170 | 12, 171 | 13, 172 | 0, 173 | 19, 174 | 0, 175 | "MOONDREAM_MODEL" 176 | ], 177 | [ 178 | 13, 179 | 19, 180 | 0, 181 | 21, 182 | 0, 183 | "IMAGE" 184 | ], 185 | [ 186 | 14, 187 | 20, 188 | 0, 189 | 19, 190 | 1, 191 | "IMAGE" 192 | ] 193 | ], 194 | "groups": [], 195 | "config": {}, 196 | "extra": { 197 | "ds": { 198 | "scale": 1.0731406124180265, 199 | "offset": [ 200 | 454.0207909586871, 201 | 642.07738521807 202 | ] 203 | }, 204 | "ue_links": [], 205 | "node_versions": { 206 | "ComfyUI-Moondream-Gaze-Detection": "1f57a4c6ad09d318330b0b29b3785f24231a05ee", 207 | "comfy-core": "v0.3.10" 208 | } 209 | }, 210 | "version": 0.4 211 | } -------------------------------------------------------------------------------- /examples/Moondream Gaze Detection Video.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 17, 3 | "last_link_id": 14, 4 | "nodes": [ 5 | { 6 | "id": 1, 7 | "type": "(Down)Load Moondream Model", 8 | "pos": [ 9 | -92.72958374023438, 10 | -321.7299499511719 11 | ], 12 | "size": [ 13 | 327.5999755859375, 14 | 58 15 | ], 16 | "flags": {}, 17 | "order": 0, 18 | "mode": 0, 19 | "inputs": [], 20 | "outputs": [ 21 | { 22 | "name": "model", 23 | "type": "MOONDREAM_MODEL", 24 | "links": [ 25 | 7 26 | ], 27 | "slot_index": 0 28 | } 29 | ], 30 | "properties": { 31 | "Node name for S&R": "(Down)Load Moondream Model" 32 | }, 33 | "widgets_values": [ 34 | "cuda" 35 | ] 36 | }, 37 | { 38 | "id": 12, 39 | "type": "Gaze Detection Video", 40 | "pos": [ 41 | 375.0546569824219, 42 | -323.3594970703125 43 | ], 44 | "size": [ 45 | 252, 46 | 78 47 | ], 48 | "flags": {}, 49 | "order": 3, 50 | "mode": 0, 51 | "inputs": [ 52 | { 53 | "name": "model", 54 | "type": "MOONDREAM_MODEL", 55 | "link": 7 56 | }, 57 | { 58 | "name": "video", 59 | "type": "IMAGE", 60 | "link": 14 61 | } 62 | ], 63 | "outputs": [ 64 | { 65 | "name": "images", 66 | "type": "IMAGE", 67 | "links": [ 68 | 13 69 | ], 70 | "slot_index": 0 71 | } 72 | ], 73 | "properties": { 74 | "Node name for S&R": "Gaze Detection Video" 75 | }, 76 | "widgets_values": [ 77 | false 78 | ] 79 | }, 80 | { 81 | "id": 16, 82 | "type": "VHS_VideoCombine", 83 | "pos": [ 84 | 730.4083862304688, 85 | -329.4408264160156 86 | ], 87 | "size": [ 88 | 214.7587890625, 89 | 334 90 | ], 91 | "flags": {}, 92 | "order": 4, 93 | "mode": 0, 94 | "inputs": [ 95 | { 96 | "name": "images", 97 | "type": "IMAGE", 98 | "link": 13 99 | }, 100 | { 101 | "name": "audio", 102 | "type": "AUDIO", 103 | "link": null, 104 | "shape": 7 105 | }, 106 | { 107 | "name": "meta_batch", 108 | "type": "VHS_BatchManager", 109 | "link": null, 110 | "shape": 7 111 | }, 112 | { 113 | "name": "vae", 114 | "type": "VAE", 115 | "link": null, 116 | "shape": 7 117 | } 118 | ], 119 | "outputs": [ 120 | { 121 | "name": "Filenames", 122 | "type": "VHS_FILENAMES", 123 | "links": null 124 | } 125 | ], 126 | "properties": { 127 | "Node name for S&R": "VHS_VideoCombine" 128 | }, 129 | "widgets_values": { 130 | "frame_rate": 15, 131 | "loop_count": 0, 132 | "filename_prefix": "Moondream", 133 | "format": "video/h264-mp4", 134 | "pix_fmt": "yuv420p", 135 | "crf": 19, 136 | "save_metadata": true, 137 | "trim_to_audio": false, 138 | "pingpong": false, 139 | "save_output": true, 140 | "videopreview": { 141 | "hidden": false, 142 | "paused": false, 143 | "params": {}, 144 | "muted": false 145 | } 146 | } 147 | }, 148 | { 149 | "id": 9, 150 | "type": "Note", 151 | "pos": [ 152 | -163.30043029785156, 153 | -511.2145080566406 154 | ], 155 | "size": [ 156 | 395.0805969238281, 157 | 121.18170928955078 158 | ], 159 | "flags": {}, 160 | "order": 1, 161 | "mode": 0, 162 | "inputs": [], 163 | "outputs": [], 164 | "properties": {}, 165 | "widgets_values": [ 166 | "(Down)Load Moondream Model node will automatically download model from:\nhttps://huggingface.co/vikhyatk/moondream2/tree/main\n\nTo your \"ComfyUI/models/moondream\" path.\n\nDownloading the model for the first time will take some time. ( about 5 minutes )" 167 | ], 168 | "color": "#432", 169 | "bgcolor": "#653" 170 | }, 171 | { 172 | "id": 17, 173 | "type": "VHS_LoadVideo", 174 | "pos": [ 175 | -6.481391429901123, 176 | -191.2500762939453 177 | ], 178 | "size": [ 179 | 247.455078125, 180 | 262 181 | ], 182 | "flags": {}, 183 | "order": 2, 184 | "mode": 0, 185 | "inputs": [ 186 | { 187 | "name": "meta_batch", 188 | "type": "VHS_BatchManager", 189 | "link": null, 190 | "shape": 7 191 | }, 192 | { 193 | "name": "vae", 194 | "type": "VAE", 195 | "link": null, 196 | "shape": 7 197 | } 198 | ], 199 | "outputs": [ 200 | { 201 | "name": "IMAGE", 202 | "type": "IMAGE", 203 | "links": [ 204 | 14 205 | ], 206 | "slot_index": 0 207 | }, 208 | { 209 | "name": "frame_count", 210 | "type": "INT", 211 | "links": null 212 | }, 213 | { 214 | "name": "audio", 215 | "type": "AUDIO", 216 | "links": null 217 | }, 218 | { 219 | "name": "video_info", 220 | "type": "VHS_VIDEOINFO", 221 | "links": null 222 | } 223 | ], 224 | "properties": { 225 | "Node name for S&R": "VHS_LoadVideo" 226 | }, 227 | "widgets_values": { 228 | "force_rate": 0, 229 | "force_size": "Disabled", 230 | "custom_width": 512, 231 | "custom_height": 512, 232 | "frame_load_cap": 0, 233 | "skip_first_frames": 0, 234 | "select_every_nth": 1, 235 | "choose video to upload": "image", 236 | "videopreview": { 237 | "hidden": false, 238 | "paused": false, 239 | "params": { 240 | "force_rate": 0, 241 | "frame_load_cap": 0, 242 | "skip_first_frames": 0, 243 | "select_every_nth": 1 244 | }, 245 | "muted": false 246 | } 247 | } 248 | } 249 | ], 250 | "links": [ 251 | [ 252 | 7, 253 | 1, 254 | 0, 255 | 12, 256 | 0, 257 | "MOONDREAM_MODEL" 258 | ], 259 | [ 260 | 13, 261 | 12, 262 | 0, 263 | 16, 264 | 0, 265 | "IMAGE" 266 | ], 267 | [ 268 | 14, 269 | 17, 270 | 0, 271 | 12, 272 | 1, 273 | "IMAGE" 274 | ] 275 | ], 276 | "groups": [], 277 | "config": {}, 278 | "extra": { 279 | "ds": { 280 | "scale": 0.8868930681140714, 281 | "offset": [ 282 | 674.3723265187537, 283 | 729.820747848737 284 | ] 285 | }, 286 | "ue_links": [], 287 | "VHS_latentpreview": false, 288 | "VHS_latentpreviewrate": 0, 289 | "node_versions": { 290 | "ComfyUI-Moondream-Gaze-Detection": "1f57a4c6ad09d318330b0b29b3785f24231a05ee", 291 | "comfyui-videohelpersuite": "1.4.5" 292 | } 293 | }, 294 | "version": 0.4 295 | } -------------------------------------------------------------------------------- /modules/inferencer/moondream_inferencer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import matplotlib.pyplot as plt 4 | import numpy as np 5 | import matplotlib 6 | import PIL 7 | from PIL import Image 8 | from transformers import AutoModelForCausalLM 9 | from typing import (Union, Tuple, List, Dict, Optional, Any) 10 | import cv2 11 | import io 12 | 13 | from .pyvips_dll_handler import handle_pyvips_dll_error 14 | 15 | 16 | class MoondreamInferencer: 17 | def __init__(self, 18 | model_dir: str): 19 | self.model = None 20 | self.model_dir = model_dir 21 | os.makedirs(self.model_dir, exist_ok=True) 22 | 23 | def load_model(self, 24 | device: str = "cuda"): 25 | self.model = AutoModelForCausalLM.from_pretrained( 26 | "vikhyatk/moondream2", 27 | trust_remote_code=True, 28 | device_map={"": device}, 29 | cache_dir=self.model_dir, 30 | ) 31 | 32 | def process_image( 33 | self, 34 | input_image: Union[np.ndarray, Any], 35 | use_ensemble: bool 36 | ): 37 | if self.model is None: 38 | self.load_model() 39 | 40 | if input_image is None: 41 | return None, "" 42 | 43 | try: 44 | if isinstance(input_image, torch.Tensor): 45 | if input_image.dim() == 4 and input_image.shape[0] == 1: 46 | input_image = input_image[0] # shape now (H, W, 3) 47 | image_np = (input_image.cpu().numpy() * 255).astype(np.uint8) 48 | 49 | pil_image = Image.fromarray(image_np).convert("RGB") 50 | 51 | elif isinstance(input_image, np.ndarray): 52 | pil_image = Image.fromarray(input_image) 53 | 54 | else: 55 | pil_image = input_image 56 | 57 | enc_image = self.model.encode_image(pil_image) 58 | if use_ensemble: 59 | flipped_pil = pil_image.copy().transpose(method=Image.FLIP_LEFT_RIGHT) 60 | flip_enc_image = self.model.encode_image(flipped_pil) 61 | else: 62 | flip_enc_image = None 63 | 64 | faces = self.model.detect(enc_image, "face")["objects"] 65 | faces.sort(key=lambda x: (x["x_min"], x["y_min"])) 66 | 67 | face_boxes = [] 68 | gaze_points = [] 69 | 70 | if faces: 71 | for face in faces: 72 | # Add face bounding box regardless of gaze detection 73 | face_box = ( 74 | face["x_min"] * pil_image.width, 75 | face["y_min"] * pil_image.height, 76 | (face["x_max"] - face["x_min"]) * pil_image.width, 77 | (face["y_max"] - face["y_min"]) * pil_image.height, 78 | ) 79 | face_center = ( 80 | (face["x_min"] + face["x_max"]) / 2, 81 | (face["y_min"] + face["y_max"]) / 2 82 | ) 83 | face_boxes.append(face_box) 84 | 85 | # Try to detect gaze 86 | gaze_settings = { 87 | "prioritize_accuracy": use_ensemble, 88 | "flip_enc_img": flip_enc_image 89 | } 90 | gaze = self.model.detect_gaze(enc_image, face=face, eye=face_center, unstable_settings=gaze_settings)["gaze"] 91 | 92 | if gaze is not None: 93 | gaze_point = ( 94 | gaze["x"] * pil_image.width, 95 | gaze["y"] * pil_image.height, 96 | ) 97 | gaze_points.append(gaze_point) 98 | else: 99 | gaze_points.append(None) 100 | 101 | # Create visualization 102 | image_array = np.array(pil_image) 103 | fig = self.visualize_faces_and_gaze( 104 | face_boxes, gaze_points, image=image_array, show_plot=False 105 | ) 106 | 107 | faces_with_gaze = sum(1 for gp in gaze_points if gp is not None) 108 | status = f"Found {len(faces)} faces. {len(faces) - faces_with_gaze} gazing out of frame." 109 | return fig, status 110 | 111 | except Exception as e: 112 | return None, f"Error processing image: {str(e)}" 113 | 114 | @staticmethod 115 | def visualize_faces_and_gaze(face_boxes, gaze_points=None, image=None, show_plot=True): 116 | """Visualization function that can handle faces without gaze data""" 117 | # Calculate figure size based on image aspect ratio 118 | if image is not None: 119 | height, width = image.shape[:2] 120 | aspect_ratio = width / height 121 | fig_height = 6 # Base height 122 | fig_width = fig_height * aspect_ratio 123 | else: 124 | width, height = 800, 600 125 | fig_width, fig_height = 10, 8 126 | 127 | fig = plt.figure(figsize=(fig_width, fig_height)) 128 | ax = fig.add_subplot(111) 129 | 130 | if image is not None: 131 | ax.imshow(image) 132 | else: 133 | ax.set_facecolor("#1a1a1a") 134 | fig.patch.set_facecolor("#1a1a1a") 135 | 136 | colors = plt.cm.rainbow(np.linspace(0, 1, len(face_boxes))) 137 | 138 | for i, (face_box, color) in enumerate(zip(face_boxes, colors)): 139 | hex_color = "#{:02x}{:02x}{:02x}".format( 140 | int(color[0] * 255), int(color[1] * 255), int(color[2] * 255) 141 | ) 142 | 143 | x, y, width_box, height_box = face_box 144 | face_center_x = x + width_box / 2 145 | face_center_y = y + height_box / 2 146 | 147 | # Draw face bounding box 148 | face_rect = plt.Rectangle( 149 | (x, y), width_box, height_box, fill=False, color=hex_color, linewidth=2 150 | ) 151 | ax.add_patch(face_rect) 152 | 153 | # Draw gaze line if gaze data is available 154 | if gaze_points is not None and i < len(gaze_points) and gaze_points[i] is not None: 155 | gaze_x, gaze_y = gaze_points[i] 156 | 157 | points = 50 158 | alphas = np.linspace(0.8, 0, points) 159 | 160 | x_points = np.linspace(face_center_x, gaze_x, points) 161 | y_points = np.linspace(face_center_y, gaze_y, points) 162 | 163 | for j in range(points - 1): 164 | ax.plot( 165 | [x_points[j], x_points[j + 1]], 166 | [y_points[j], y_points[j + 1]], 167 | color=hex_color, 168 | alpha=alphas[j], 169 | linewidth=4, 170 | ) 171 | 172 | ax.scatter(gaze_x, gaze_y, color=hex_color, s=100, zorder=5) 173 | ax.scatter(gaze_x, gaze_y, color="white", s=50, zorder=6) 174 | 175 | # Set plot limits and remove axes 176 | ax.set_xlim(0, width) 177 | ax.set_ylim(height, 0) 178 | ax.set_aspect("equal") 179 | ax.set_xticks([]) 180 | ax.set_yticks([]) 181 | 182 | # Remove padding around the plot 183 | plt.subplots_adjust(left=0, right=1, bottom=0, top=1) 184 | 185 | return fig 186 | 187 | @staticmethod 188 | def figure_to_tensor(fig) -> torch.Tensor: 189 | """ 190 | Converts a matplotlib Figure into a PyTorch tensor of shape. 191 | """ 192 | buf = io.BytesIO() 193 | fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0) 194 | plt.close(fig) 195 | 196 | buf.seek(0) 197 | pil_img = Image.open(buf).convert("RGB") 198 | 199 | np_img = np.array(pil_img, dtype=np.float32) / 255.0 200 | 201 | tensor_img = torch.from_numpy(np_img).unsqueeze(0) 202 | return tensor_img 203 | -------------------------------------------------------------------------------- /modules/inferencer/pyvips_dll_handler.py: -------------------------------------------------------------------------------- 1 | # To fix : https://github.com/libvips/pyvips/issues/489 2 | # You can manually download binaries from : https://github.com/libvips/build-win64-mxe/releases/tag/v8.16.0 3 | 4 | import platform 5 | import os 6 | import requests 7 | import zipfile 8 | 9 | 10 | PYVIPS_WIN_DLL_URL = { 11 | "AMD64": "https://github.com/libvips/build-win64-mxe/releases/download/v8.16.0/vips-dev-w64-all-8.16.0.zip", 12 | "X86": "https://github.com/libvips/build-win64-mxe/releases/download/v8.16.0/vips-dev-w32-all-8.16.0.zip" 13 | } 14 | 15 | 16 | def handle_pyvips_dll_error(download_dir: str): 17 | """Download Windows dll for pyvips and add the bin directory to the PATH.""" 18 | pyvips_dll_dir = os.path.join(download_dir, "vips-dev-8.16") 19 | os.makedirs(pyvips_dll_dir, exist_ok=True) 20 | 21 | if not os.path.exists(pyvips_dll_dir) or not os.listdir(pyvips_dll_dir): 22 | system = platform.system() 23 | 24 | if system.upper() == "WINDOWS": 25 | print(f"pyvips dll directory not detected. Downloading it to \"{pyvips_dll_dir}\"..") 26 | 27 | arch = os.environ.get("PROCESSOR_ARCHITECTURE", "") 28 | arch = arch.upper() 29 | url = PYVIPS_WIN_DLL_URL.get(arch, PYVIPS_WIN_DLL_URL["AMD64"]) 30 | else: 31 | return 32 | 33 | zip_filename = os.path.join(download_dir, "pyvips_dll.zip") 34 | 35 | response = requests.get(url, stream=True) 36 | response.raise_for_status() 37 | 38 | with open(zip_filename, 'wb') as f: 39 | for chunk in response.iter_content(chunk_size=8192): 40 | f.write(chunk) 41 | 42 | with zipfile.ZipFile(zip_filename, 'r') as zip_ref: 43 | zip_ref.extractall(download_dir) 44 | 45 | os.remove(zip_filename) 46 | 47 | # Add PATH 48 | pyvips_bin_dir = os.path.join(pyvips_dll_dir, "bin") 49 | os.environ['PATH'] = os.pathsep.join((pyvips_bin_dir, os.environ['PATH'])) 50 | 51 | -------------------------------------------------------------------------------- /nodes.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import (Union, BinaryIO, Dict, List, Tuple, Optional, Any) 3 | import torch 4 | import time 5 | 6 | import folder_paths 7 | from comfy.utils import ProgressBar 8 | 9 | from .modules.inferencer.moondream_inferencer import MoondreamInferencer 10 | from .modules.inferencer.pyvips_dll_handler import handle_pyvips_dll_error 11 | 12 | 13 | custom_nodes_script_dir = os.path.dirname(os.path.abspath(__file__)) 14 | custom_nodes_model_dir = os.path.join(folder_paths.models_dir, "moondream") 15 | os.makedirs(custom_nodes_model_dir, exist_ok=True) 16 | 17 | 18 | def get_category_name(): 19 | return "Moondream Gaze Detection" 20 | 21 | 22 | class MoondreamModelLoader: 23 | @classmethod 24 | def INPUT_TYPES(s): 25 | return { 26 | "required": { 27 | "device": (['cuda', 'cpu'],), 28 | }, 29 | } 30 | 31 | RETURN_TYPES = ("MOONDREAM_MODEL",) 32 | RETURN_NAMES = ("model",) 33 | FUNCTION = "load_model" 34 | CATEGORY = get_category_name() 35 | 36 | def load_model(self, 37 | device: str, 38 | ) -> Tuple[MoondreamInferencer]: 39 | handle_pyvips_dll_error(download_dir=custom_nodes_script_dir) 40 | model_inferencer = MoondreamInferencer(model_dir=custom_nodes_model_dir) 41 | model_inferencer.load_model(device=device) 42 | 43 | return (model_inferencer, ) 44 | 45 | 46 | class GazeDetection: 47 | @classmethod 48 | def INPUT_TYPES(s): 49 | return { 50 | "required": { 51 | "model": ("MOONDREAM_MODEL", ), 52 | "image": ("IMAGE", ), 53 | "use_ensemble": ("BOOLEAN", {"default": False}), 54 | }, 55 | } 56 | 57 | RETURN_TYPES = ("IMAGE",) 58 | RETURN_NAMES = ("image",) 59 | FUNCTION = "gaze_detection" 60 | CATEGORY = get_category_name() 61 | 62 | def gaze_detection(self, 63 | model: MoondreamInferencer, 64 | image: Any, 65 | use_ensemble: bool, 66 | ) -> Tuple: 67 | fig, status = model.process_image(image, use_ensemble=use_ensemble) 68 | out_img = model.figure_to_tensor(fig) 69 | 70 | return (out_img, ) 71 | 72 | 73 | class GazeDetectionVideo: 74 | @classmethod 75 | def INPUT_TYPES(s): 76 | return { 77 | "required": { 78 | "model": ("MOONDREAM_MODEL", ), 79 | "video": ("IMAGE", ), 80 | "use_ensemble": ("BOOLEAN", {"default": False}), 81 | }, 82 | } 83 | 84 | RETURN_TYPES = ("IMAGE",) 85 | RETURN_NAMES = ("images",) 86 | FUNCTION = "gaze_detection_video" 87 | CATEGORY = get_category_name() 88 | 89 | def gaze_detection_video(self, 90 | model: MoondreamInferencer, 91 | video: Any, 92 | use_ensemble: bool, 93 | ) -> Tuple: 94 | num_frames = video.shape[0] 95 | height = video.shape[1] 96 | width = video.shape[2] 97 | channels = video.shape[3] 98 | 99 | comfy_pbar = ProgressBar(num_frames) 100 | out_frames = [] 101 | for f in range(num_frames): 102 | frame_tensor = video[f] 103 | fig, status = model.process_image(frame_tensor, use_ensemble=use_ensemble) 104 | out_img = model.figure_to_tensor(fig) 105 | 106 | out_img = out_img.squeeze(0) 107 | out_frames.append(out_img) 108 | 109 | comfy_pbar.update(1) 110 | 111 | out_frames_tensor = torch.stack(out_frames, dim=0) 112 | 113 | return (out_frames_tensor, ) 114 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "comfyui-moondream-gaze-detection" 3 | description = "Moondream's gaze detection feature wrapper node." 4 | version = "1.0.4" 5 | license = {file = "LICENSE"} 6 | dependencies = ["matplotlib==3.10.0", "pyvips==2.2.3", "accelerate>=0.32.1", "opencv-python"] 7 | 8 | [project.urls] 9 | Repository = "https://github.com/jhj0517/ComfyUI-Moondream-Gaze-Detection.git" 10 | # Used by Comfy Registry https://comfyregistry.org 11 | 12 | [tool.comfy] 13 | PublisherId = "jhj0517" 14 | DisplayName = "ComfyUI-Moondream-Gaze-Detection" 15 | Icon = "" 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib==3.10.0 2 | pyvips==2.2.3 3 | accelerate>=0.32.1 4 | opencv-python --------------------------------------------------------------------------------