├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── pull_request_template.md
    └── workflows
    │   └── publish-comfyui-registry.yml
├── .gitignore
├── README.md
├── __init__.py
├── examples
    ├── Moondream Gaze Detection Image.json
    └── Moondream Gaze Detection Video.json
├── modules
    └── inferencer
    │   ├── moondream_inferencer.py
    │   └── pyvips_dll_handler.py
├── nodes.py
├── pyproject.toml
└── requirements.txt


/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: bug
 6 | assignees: 'jhj0517'
 7 | ---
 8 | 
 9 | **Which OS are you using?**
10 |  - OS: [e.g. Linux or Windows]
11 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Any feature you want
 4 | title: ''
 5 | labels: enhancement
 6 | assignees: 'jhj0517'
 7 | ---
 8 | 
 9 | **Describe feature you want**
10 | 
11 | 


--------------------------------------------------------------------------------
/.github/pull_request_template.md:
--------------------------------------------------------------------------------
1 | ## Related issues / PRs.
2 | - #
3 | 
4 | ## Summarize Changes
5 | 1. 
6 | 


--------------------------------------------------------------------------------
/.github/workflows/publish-comfyui-registry.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Comfy registry
 2 | on:
 3 |   workflow_dispatch:
 4 | 
 5 |   push:
 6 |     branches:
 7 |       - main
 8 |       - master
 9 |     paths:
10 |       - "pyproject.toml"
11 | 
12 | jobs:
13 |   publish-node:
14 |     name: Publish Custom Node to registry
15 |     runs-on: ubuntu-latest
16 |     # if this is a forked repository. Skipping the workflow.
17 |     if: github.event.repository.fork == false 
18 |     steps:
19 |       - name: Check out code
20 |         uses: actions/checkout@v4
21 |       - name: Publish Custom Node
22 |         uses: Comfy-Org/publish-node-action@main
23 |         with:
24 |           ## Add your own personal access token to your Github Repository secrets and reference it here.
25 |           ## Get your personal access token from https://docs.comfy.org/registry/publishing#create-an-api-key-for-publishing
26 |           personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
27 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea/
2 | /venv/
3 | /__pycache__/
4 | models/
5 | *.png
6 | *.jpg
7 | *.webp
8 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ComfyUI Moondream Gaze Detection
 2 | 
 3 | This is the [ComfyUI](https://github.com/comfyanonymous/ComfyUI) custom node for [Moondream](https://github.com/vikhyat/moondream)'s [gaze detection feature](https://huggingface.co/spaces/moondream/gaze-demo).
 4 | 
 5 | 
 6 | 
 7 | https://github.com/user-attachments/assets/58ff74d8-5ab3-4269-b335-fbe3d38062fd
 8 | 
 9 | 
10 | 
11 | 
12 | 
13 | ## Installation
14 | 
15 | 1. Place this repository into `ComfyUI\custom_nodes\`
16 | ```
17 | git clone https://github.com/jhj0517/ComfyUI-Moondream-Gaze-Detection.git
18 | ```
19 | 
20 | 2. Go to `ComfyUI\custom_nodes\ComfyUI-Moondream-Gaze-Detection` and run
21 | ```
22 | pip install -r requirements.txt
23 | ```
24 | 
25 | If you are using the portable version of ComfyUI, do this:
26 | ```
27 | python_embeded\python.exe -m pip install -r ComfyUI\custom_nodes\ComfyUI-Moondream-Gaze-Detection\requirements.txt
28 | ```
29 | 
30 | ## Workflows
31 | Example workflows that generate from single image and video can be found in the [examples/](https://github.com/jhj0517/ComfyUI-Moondream-Gaze-Detection/tree/master/examples) directory.
32 | 
33 | ## Models
34 | 
35 | Models are automatically downloaded from:
36 | https://huggingface.co/vikhyatk/moondream2/tree/main
37 | 
38 | To the path of your `ComfyUI/models/moondream`.
39 | 
40 | ### VRAM Usage
41 | Peak VRAM for the model was 6GB on my end.
42 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
 1 | from .nodes import *
 2 | 
 3 | 
 4 | #  Map all your custom nodes classes with the names that will be displayed in the UI.
 5 | NODE_CLASS_MAPPINGS = {
 6 |     "(Down)Load Moondream Model": MoondreamModelLoader,
 7 |     "Gaze Detection": GazeDetection,
 8 |     "Gaze Detection Video": GazeDetectionVideo
 9 | }
10 | 
11 | 
12 | __all__ = ['NODE_CLASS_MAPPINGS']
13 | 


--------------------------------------------------------------------------------
/examples/Moondream Gaze Detection Image.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 21,
  3 |   "last_link_id": 14,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 13,
  7 |       "type": "(Down)Load Moondream Model",
  8 |       "pos": [
  9 |         -104.5158920288086,
 10 |         -324.65252685546875
 11 |       ],
 12 |       "size": [
 13 |         327.5999755859375,
 14 |         58
 15 |       ],
 16 |       "flags": {},
 17 |       "order": 0,
 18 |       "mode": 0,
 19 |       "inputs": [],
 20 |       "outputs": [
 21 |         {
 22 |           "name": "model",
 23 |           "type": "MOONDREAM_MODEL",
 24 |           "links": [
 25 |             12
 26 |           ],
 27 |           "slot_index": 0
 28 |         }
 29 |       ],
 30 |       "properties": {
 31 |         "Node name for S&R": "(Down)Load Moondream Model"
 32 |       },
 33 |       "widgets_values": [
 34 |         "cuda"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "id": 20,
 39 |       "type": "LoadImage",
 40 |       "pos": [
 41 |         -98.14690399169922,
 42 |         -209.35580444335938
 43 |       ],
 44 |       "size": [
 45 |         315,
 46 |         314
 47 |       ],
 48 |       "flags": {},
 49 |       "order": 1,
 50 |       "mode": 0,
 51 |       "inputs": [],
 52 |       "outputs": [
 53 |         {
 54 |           "name": "IMAGE",
 55 |           "type": "IMAGE",
 56 |           "links": [
 57 |             14
 58 |           ],
 59 |           "slot_index": 0
 60 |         },
 61 |         {
 62 |           "name": "MASK",
 63 |           "type": "MASK",
 64 |           "links": null
 65 |         }
 66 |       ],
 67 |       "properties": {
 68 |         "Node name for S&R": "LoadImage"
 69 |       },
 70 |       "widgets_values": [
 71 |         "ComfyUI_00448_.png",
 72 |         "image"
 73 |       ]
 74 |     },
 75 |     {
 76 |       "id": 19,
 77 |       "type": "Gaze Detection",
 78 |       "pos": [
 79 |         280.18670654296875,
 80 |         -327.49951171875
 81 |       ],
 82 |       "size": [
 83 |         315,
 84 |         78
 85 |       ],
 86 |       "flags": {},
 87 |       "order": 3,
 88 |       "mode": 0,
 89 |       "inputs": [
 90 |         {
 91 |           "name": "model",
 92 |           "type": "MOONDREAM_MODEL",
 93 |           "link": 12
 94 |         },
 95 |         {
 96 |           "name": "image",
 97 |           "type": "IMAGE",
 98 |           "link": 14
 99 |         }
100 |       ],
101 |       "outputs": [
102 |         {
103 |           "name": "image",
104 |           "type": "IMAGE",
105 |           "links": [
106 |             13
107 |           ],
108 |           "slot_index": 0
109 |         }
110 |       ],
111 |       "properties": {
112 |         "Node name for S&R": "Gaze Detection"
113 |       },
114 |       "widgets_values": [
115 |         false
116 |       ]
117 |     },
118 |     {
119 |       "id": 21,
120 |       "type": "PreviewImage",
121 |       "pos": [
122 |         655.6904296875,
123 |         -326.3870849609375
124 |       ],
125 |       "size": [
126 |         210,
127 |         26
128 |       ],
129 |       "flags": {},
130 |       "order": 4,
131 |       "mode": 0,
132 |       "inputs": [
133 |         {
134 |           "name": "images",
135 |           "type": "IMAGE",
136 |           "link": 13
137 |         }
138 |       ],
139 |       "outputs": [],
140 |       "properties": {
141 |         "Node name for S&R": "PreviewImage"
142 |       }
143 |     },
144 |     {
145 |       "id": 9,
146 |       "type": "Note",
147 |       "pos": [
148 |         -171.00161743164062,
149 |         -515.4930419921875
150 |       ],
151 |       "size": [
152 |         395.0805969238281,
153 |         121.18170928955078
154 |       ],
155 |       "flags": {},
156 |       "order": 2,
157 |       "mode": 0,
158 |       "inputs": [],
159 |       "outputs": [],
160 |       "properties": {},
161 |       "widgets_values": [
162 |         "(Down)Load Moondream Model node will automatically download model from:\nhttps://huggingface.co/vikhyatk/moondream2/tree/main\n\nTo your \"ComfyUI/models/moondream\" path.\n\nDownloading the model for the first time will take some time. ( about 5 minutes )"
163 |       ],
164 |       "color": "#432",
165 |       "bgcolor": "#653"
166 |     }
167 |   ],
168 |   "links": [
169 |     [
170 |       12,
171 |       13,
172 |       0,
173 |       19,
174 |       0,
175 |       "MOONDREAM_MODEL"
176 |     ],
177 |     [
178 |       13,
179 |       19,
180 |       0,
181 |       21,
182 |       0,
183 |       "IMAGE"
184 |     ],
185 |     [
186 |       14,
187 |       20,
188 |       0,
189 |       19,
190 |       1,
191 |       "IMAGE"
192 |     ]
193 |   ],
194 |   "groups": [],
195 |   "config": {},
196 |   "extra": {
197 |     "ds": {
198 |       "scale": 1.0731406124180265,
199 |       "offset": [
200 |         454.0207909586871,
201 |         642.07738521807
202 |       ]
203 |     },
204 |     "ue_links": [],
205 |     "node_versions": {
206 |       "ComfyUI-Moondream-Gaze-Detection": "1f57a4c6ad09d318330b0b29b3785f24231a05ee",
207 |       "comfy-core": "v0.3.10"
208 |     }
209 |   },
210 |   "version": 0.4
211 | }


--------------------------------------------------------------------------------
/examples/Moondream Gaze Detection Video.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "last_node_id": 17,
  3 |   "last_link_id": 14,
  4 |   "nodes": [
  5 |     {
  6 |       "id": 1,
  7 |       "type": "(Down)Load Moondream Model",
  8 |       "pos": [
  9 |         -92.72958374023438,
 10 |         -321.7299499511719
 11 |       ],
 12 |       "size": [
 13 |         327.5999755859375,
 14 |         58
 15 |       ],
 16 |       "flags": {},
 17 |       "order": 0,
 18 |       "mode": 0,
 19 |       "inputs": [],
 20 |       "outputs": [
 21 |         {
 22 |           "name": "model",
 23 |           "type": "MOONDREAM_MODEL",
 24 |           "links": [
 25 |             7
 26 |           ],
 27 |           "slot_index": 0
 28 |         }
 29 |       ],
 30 |       "properties": {
 31 |         "Node name for S&R": "(Down)Load Moondream Model"
 32 |       },
 33 |       "widgets_values": [
 34 |         "cuda"
 35 |       ]
 36 |     },
 37 |     {
 38 |       "id": 12,
 39 |       "type": "Gaze Detection Video",
 40 |       "pos": [
 41 |         375.0546569824219,
 42 |         -323.3594970703125
 43 |       ],
 44 |       "size": [
 45 |         252,
 46 |         78
 47 |       ],
 48 |       "flags": {},
 49 |       "order": 3,
 50 |       "mode": 0,
 51 |       "inputs": [
 52 |         {
 53 |           "name": "model",
 54 |           "type": "MOONDREAM_MODEL",
 55 |           "link": 7
 56 |         },
 57 |         {
 58 |           "name": "video",
 59 |           "type": "IMAGE",
 60 |           "link": 14
 61 |         }
 62 |       ],
 63 |       "outputs": [
 64 |         {
 65 |           "name": "images",
 66 |           "type": "IMAGE",
 67 |           "links": [
 68 |             13
 69 |           ],
 70 |           "slot_index": 0
 71 |         }
 72 |       ],
 73 |       "properties": {
 74 |         "Node name for S&R": "Gaze Detection Video"
 75 |       },
 76 |       "widgets_values": [
 77 |         false
 78 |       ]
 79 |     },
 80 |     {
 81 |       "id": 16,
 82 |       "type": "VHS_VideoCombine",
 83 |       "pos": [
 84 |         730.4083862304688,
 85 |         -329.4408264160156
 86 |       ],
 87 |       "size": [
 88 |         214.7587890625,
 89 |         334
 90 |       ],
 91 |       "flags": {},
 92 |       "order": 4,
 93 |       "mode": 0,
 94 |       "inputs": [
 95 |         {
 96 |           "name": "images",
 97 |           "type": "IMAGE",
 98 |           "link": 13
 99 |         },
100 |         {
101 |           "name": "audio",
102 |           "type": "AUDIO",
103 |           "link": null,
104 |           "shape": 7
105 |         },
106 |         {
107 |           "name": "meta_batch",
108 |           "type": "VHS_BatchManager",
109 |           "link": null,
110 |           "shape": 7
111 |         },
112 |         {
113 |           "name": "vae",
114 |           "type": "VAE",
115 |           "link": null,
116 |           "shape": 7
117 |         }
118 |       ],
119 |       "outputs": [
120 |         {
121 |           "name": "Filenames",
122 |           "type": "VHS_FILENAMES",
123 |           "links": null
124 |         }
125 |       ],
126 |       "properties": {
127 |         "Node name for S&R": "VHS_VideoCombine"
128 |       },
129 |       "widgets_values": {
130 |         "frame_rate": 15,
131 |         "loop_count": 0,
132 |         "filename_prefix": "Moondream",
133 |         "format": "video/h264-mp4",
134 |         "pix_fmt": "yuv420p",
135 |         "crf": 19,
136 |         "save_metadata": true,
137 |         "trim_to_audio": false,
138 |         "pingpong": false,
139 |         "save_output": true,
140 |         "videopreview": {
141 |           "hidden": false,
142 |           "paused": false,
143 |           "params": {},
144 |           "muted": false
145 |         }
146 |       }
147 |     },
148 |     {
149 |       "id": 9,
150 |       "type": "Note",
151 |       "pos": [
152 |         -163.30043029785156,
153 |         -511.2145080566406
154 |       ],
155 |       "size": [
156 |         395.0805969238281,
157 |         121.18170928955078
158 |       ],
159 |       "flags": {},
160 |       "order": 1,
161 |       "mode": 0,
162 |       "inputs": [],
163 |       "outputs": [],
164 |       "properties": {},
165 |       "widgets_values": [
166 |         "(Down)Load Moondream Model node will automatically download model from:\nhttps://huggingface.co/vikhyatk/moondream2/tree/main\n\nTo your \"ComfyUI/models/moondream\" path.\n\nDownloading the model for the first time will take some time. ( about 5 minutes )"
167 |       ],
168 |       "color": "#432",
169 |       "bgcolor": "#653"
170 |     },
171 |     {
172 |       "id": 17,
173 |       "type": "VHS_LoadVideo",
174 |       "pos": [
175 |         -6.481391429901123,
176 |         -191.2500762939453
177 |       ],
178 |       "size": [
179 |         247.455078125,
180 |         262
181 |       ],
182 |       "flags": {},
183 |       "order": 2,
184 |       "mode": 0,
185 |       "inputs": [
186 |         {
187 |           "name": "meta_batch",
188 |           "type": "VHS_BatchManager",
189 |           "link": null,
190 |           "shape": 7
191 |         },
192 |         {
193 |           "name": "vae",
194 |           "type": "VAE",
195 |           "link": null,
196 |           "shape": 7
197 |         }
198 |       ],
199 |       "outputs": [
200 |         {
201 |           "name": "IMAGE",
202 |           "type": "IMAGE",
203 |           "links": [
204 |             14
205 |           ],
206 |           "slot_index": 0
207 |         },
208 |         {
209 |           "name": "frame_count",
210 |           "type": "INT",
211 |           "links": null
212 |         },
213 |         {
214 |           "name": "audio",
215 |           "type": "AUDIO",
216 |           "links": null
217 |         },
218 |         {
219 |           "name": "video_info",
220 |           "type": "VHS_VIDEOINFO",
221 |           "links": null
222 |         }
223 |       ],
224 |       "properties": {
225 |         "Node name for S&R": "VHS_LoadVideo"
226 |       },
227 |       "widgets_values": {
228 |         "force_rate": 0,
229 |         "force_size": "Disabled",
230 |         "custom_width": 512,
231 |         "custom_height": 512,
232 |         "frame_load_cap": 0,
233 |         "skip_first_frames": 0,
234 |         "select_every_nth": 1,
235 |         "choose video to upload": "image",
236 |         "videopreview": {
237 |           "hidden": false,
238 |           "paused": false,
239 |           "params": {
240 |             "force_rate": 0,
241 |             "frame_load_cap": 0,
242 |             "skip_first_frames": 0,
243 |             "select_every_nth": 1
244 |           },
245 |           "muted": false
246 |         }
247 |       }
248 |     }
249 |   ],
250 |   "links": [
251 |     [
252 |       7,
253 |       1,
254 |       0,
255 |       12,
256 |       0,
257 |       "MOONDREAM_MODEL"
258 |     ],
259 |     [
260 |       13,
261 |       12,
262 |       0,
263 |       16,
264 |       0,
265 |       "IMAGE"
266 |     ],
267 |     [
268 |       14,
269 |       17,
270 |       0,
271 |       12,
272 |       1,
273 |       "IMAGE"
274 |     ]
275 |   ],
276 |   "groups": [],
277 |   "config": {},
278 |   "extra": {
279 |     "ds": {
280 |       "scale": 0.8868930681140714,
281 |       "offset": [
282 |         674.3723265187537,
283 |         729.820747848737
284 |       ]
285 |     },
286 |     "ue_links": [],
287 |     "VHS_latentpreview": false,
288 |     "VHS_latentpreviewrate": 0,
289 |     "node_versions": {
290 |       "ComfyUI-Moondream-Gaze-Detection": "1f57a4c6ad09d318330b0b29b3785f24231a05ee",
291 |       "comfyui-videohelpersuite": "1.4.5"
292 |     }
293 |   },
294 |   "version": 0.4
295 | }


--------------------------------------------------------------------------------
/modules/inferencer/moondream_inferencer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import torch
  3 | import matplotlib.pyplot as plt
  4 | import numpy as np
  5 | import matplotlib
  6 | import PIL
  7 | from PIL import Image
  8 | from transformers import AutoModelForCausalLM
  9 | from typing import (Union, Tuple, List, Dict, Optional, Any)
 10 | import cv2
 11 | import io
 12 | 
 13 | from .pyvips_dll_handler import handle_pyvips_dll_error
 14 | 
 15 | 
 16 | class MoondreamInferencer:
 17 |     def __init__(self,
 18 |                  model_dir: str):
 19 |         self.model = None
 20 |         self.model_dir = model_dir
 21 |         os.makedirs(self.model_dir, exist_ok=True)
 22 | 
 23 |     def load_model(self,
 24 |                    device: str = "cuda"):
 25 |         self.model = AutoModelForCausalLM.from_pretrained(
 26 |             "vikhyatk/moondream2",
 27 |             trust_remote_code=True,
 28 |             device_map={"": device},
 29 |             cache_dir=self.model_dir,
 30 |         )
 31 | 
 32 |     def process_image(
 33 |         self,
 34 |         input_image: Union[np.ndarray, Any],
 35 |         use_ensemble: bool
 36 |     ):
 37 |         if self.model is None:
 38 |             self.load_model()
 39 | 
 40 |         if input_image is None:
 41 |             return None, ""
 42 | 
 43 |         try:
 44 |             if isinstance(input_image, torch.Tensor):
 45 |                 if input_image.dim() == 4 and input_image.shape[0] == 1:
 46 |                     input_image = input_image[0]  # shape now (H, W, 3)
 47 |                 image_np = (input_image.cpu().numpy() * 255).astype(np.uint8)
 48 | 
 49 |                 pil_image = Image.fromarray(image_np).convert("RGB")
 50 | 
 51 |             elif isinstance(input_image, np.ndarray):
 52 |                 pil_image = Image.fromarray(input_image)
 53 | 
 54 |             else:
 55 |                 pil_image = input_image
 56 | 
 57 |             enc_image = self.model.encode_image(pil_image)
 58 |             if use_ensemble:
 59 |                 flipped_pil = pil_image.copy().transpose(method=Image.FLIP_LEFT_RIGHT)
 60 |                 flip_enc_image = self.model.encode_image(flipped_pil)
 61 |             else:
 62 |                 flip_enc_image = None
 63 | 
 64 |             faces = self.model.detect(enc_image, "face")["objects"]
 65 |             faces.sort(key=lambda x: (x["x_min"], x["y_min"]))
 66 | 
 67 |             face_boxes = []
 68 |             gaze_points = []
 69 | 
 70 |             if faces:
 71 |                 for face in faces:
 72 |                     # Add face bounding box regardless of gaze detection
 73 |                     face_box = (
 74 |                         face["x_min"] * pil_image.width,
 75 |                         face["y_min"] * pil_image.height,
 76 |                         (face["x_max"] - face["x_min"]) * pil_image.width,
 77 |                         (face["y_max"] - face["y_min"]) * pil_image.height,
 78 |                     )
 79 |                     face_center = (
 80 |                         (face["x_min"] + face["x_max"]) / 2,
 81 |                         (face["y_min"] + face["y_max"]) / 2
 82 |                     )
 83 |                     face_boxes.append(face_box)
 84 | 
 85 |                     # Try to detect gaze
 86 |                     gaze_settings = {
 87 |                         "prioritize_accuracy": use_ensemble,
 88 |                         "flip_enc_img": flip_enc_image
 89 |                     }
 90 |                     gaze = self.model.detect_gaze(enc_image, face=face, eye=face_center, unstable_settings=gaze_settings)["gaze"]
 91 | 
 92 |                     if gaze is not None:
 93 |                         gaze_point = (
 94 |                             gaze["x"] * pil_image.width,
 95 |                             gaze["y"] * pil_image.height,
 96 |                         )
 97 |                         gaze_points.append(gaze_point)
 98 |                     else:
 99 |                         gaze_points.append(None)
100 | 
101 |             # Create visualization
102 |             image_array = np.array(pil_image)
103 |             fig = self.visualize_faces_and_gaze(
104 |                 face_boxes, gaze_points, image=image_array, show_plot=False
105 |             )
106 | 
107 |             faces_with_gaze = sum(1 for gp in gaze_points if gp is not None)
108 |             status = f"Found {len(faces)} faces. {len(faces) - faces_with_gaze} gazing out of frame."
109 |             return fig, status
110 | 
111 |         except Exception as e:
112 |             return None, f"Error processing image: {str(e)}"
113 | 
114 |     @staticmethod
115 |     def visualize_faces_and_gaze(face_boxes, gaze_points=None, image=None, show_plot=True):
116 |         """Visualization function that can handle faces without gaze data"""
117 |         # Calculate figure size based on image aspect ratio
118 |         if image is not None:
119 |             height, width = image.shape[:2]
120 |             aspect_ratio = width / height
121 |             fig_height = 6  # Base height
122 |             fig_width = fig_height * aspect_ratio
123 |         else:
124 |             width, height = 800, 600
125 |             fig_width, fig_height = 10, 8
126 | 
127 |         fig = plt.figure(figsize=(fig_width, fig_height))
128 |         ax = fig.add_subplot(111)
129 | 
130 |         if image is not None:
131 |             ax.imshow(image)
132 |         else:
133 |             ax.set_facecolor("#1a1a1a")
134 |             fig.patch.set_facecolor("#1a1a1a")
135 | 
136 |         colors = plt.cm.rainbow(np.linspace(0, 1, len(face_boxes)))
137 | 
138 |         for i, (face_box, color) in enumerate(zip(face_boxes, colors)):
139 |             hex_color = "#{:02x}{:02x}{:02x}".format(
140 |                 int(color[0] * 255), int(color[1] * 255), int(color[2] * 255)
141 |             )
142 | 
143 |             x, y, width_box, height_box = face_box
144 |             face_center_x = x + width_box / 2
145 |             face_center_y = y + height_box / 2
146 | 
147 |             # Draw face bounding box
148 |             face_rect = plt.Rectangle(
149 |                 (x, y), width_box, height_box, fill=False, color=hex_color, linewidth=2
150 |             )
151 |             ax.add_patch(face_rect)
152 | 
153 |             # Draw gaze line if gaze data is available
154 |             if gaze_points is not None and i < len(gaze_points) and gaze_points[i] is not None:
155 |                 gaze_x, gaze_y = gaze_points[i]
156 | 
157 |                 points = 50
158 |                 alphas = np.linspace(0.8, 0, points)
159 | 
160 |                 x_points = np.linspace(face_center_x, gaze_x, points)
161 |                 y_points = np.linspace(face_center_y, gaze_y, points)
162 | 
163 |                 for j in range(points - 1):
164 |                     ax.plot(
165 |                         [x_points[j], x_points[j + 1]],
166 |                         [y_points[j], y_points[j + 1]],
167 |                         color=hex_color,
168 |                         alpha=alphas[j],
169 |                         linewidth=4,
170 |                     )
171 | 
172 |                 ax.scatter(gaze_x, gaze_y, color=hex_color, s=100, zorder=5)
173 |                 ax.scatter(gaze_x, gaze_y, color="white", s=50, zorder=6)
174 | 
175 |         # Set plot limits and remove axes
176 |         ax.set_xlim(0, width)
177 |         ax.set_ylim(height, 0)
178 |         ax.set_aspect("equal")
179 |         ax.set_xticks([])
180 |         ax.set_yticks([])
181 | 
182 |         # Remove padding around the plot
183 |         plt.subplots_adjust(left=0, right=1, bottom=0, top=1)
184 | 
185 |         return fig
186 | 
187 |     @staticmethod
188 |     def figure_to_tensor(fig) -> torch.Tensor:
189 |         """
190 |         Converts a matplotlib Figure into a PyTorch tensor of shape.
191 |         """
192 |         buf = io.BytesIO()
193 |         fig.savefig(buf, format="png", bbox_inches="tight", pad_inches=0)
194 |         plt.close(fig)
195 | 
196 |         buf.seek(0)
197 |         pil_img = Image.open(buf).convert("RGB")
198 | 
199 |         np_img = np.array(pil_img, dtype=np.float32) / 255.0
200 | 
201 |         tensor_img = torch.from_numpy(np_img).unsqueeze(0)
202 |         return tensor_img
203 | 


--------------------------------------------------------------------------------
/modules/inferencer/pyvips_dll_handler.py:
--------------------------------------------------------------------------------
 1 | #  To fix : https://github.com/libvips/pyvips/issues/489
 2 | #  You can manually download binaries from : https://github.com/libvips/build-win64-mxe/releases/tag/v8.16.0
 3 | 
 4 | import platform
 5 | import os
 6 | import requests
 7 | import zipfile
 8 | 
 9 | 
10 | PYVIPS_WIN_DLL_URL = {
11 |     "AMD64": "https://github.com/libvips/build-win64-mxe/releases/download/v8.16.0/vips-dev-w64-all-8.16.0.zip",
12 |     "X86": "https://github.com/libvips/build-win64-mxe/releases/download/v8.16.0/vips-dev-w32-all-8.16.0.zip"
13 | }
14 | 
15 | 
16 | def handle_pyvips_dll_error(download_dir: str):
17 |     """Download Windows dll for pyvips and add the bin directory to the PATH."""
18 |     pyvips_dll_dir = os.path.join(download_dir, "vips-dev-8.16")
19 |     os.makedirs(pyvips_dll_dir, exist_ok=True)
20 | 
21 |     if not os.path.exists(pyvips_dll_dir) or not os.listdir(pyvips_dll_dir):
22 |         system = platform.system()
23 | 
24 |         if system.upper() == "WINDOWS":
25 |             print(f"pyvips dll directory not detected. Downloading it to \"{pyvips_dll_dir}\"..")
26 | 
27 |             arch = os.environ.get("PROCESSOR_ARCHITECTURE", "")
28 |             arch = arch.upper()
29 |             url = PYVIPS_WIN_DLL_URL.get(arch, PYVIPS_WIN_DLL_URL["AMD64"])
30 |         else:
31 |             return
32 | 
33 |         zip_filename = os.path.join(download_dir, "pyvips_dll.zip")
34 | 
35 |         response = requests.get(url, stream=True)
36 |         response.raise_for_status()
37 | 
38 |         with open(zip_filename, 'wb') as f:
39 |             for chunk in response.iter_content(chunk_size=8192):
40 |                 f.write(chunk)
41 | 
42 |         with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
43 |             zip_ref.extractall(download_dir)
44 | 
45 |         os.remove(zip_filename)
46 | 
47 |     # Add PATH
48 |     pyvips_bin_dir = os.path.join(pyvips_dll_dir, "bin")
49 |     os.environ['PATH'] = os.pathsep.join((pyvips_bin_dir, os.environ['PATH']))
50 | 
51 | 


--------------------------------------------------------------------------------
/nodes.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import (Union, BinaryIO, Dict, List, Tuple, Optional, Any)
  3 | import torch
  4 | import time
  5 | 
  6 | import folder_paths
  7 | from comfy.utils import ProgressBar
  8 | 
  9 | from .modules.inferencer.moondream_inferencer import MoondreamInferencer
 10 | from .modules.inferencer.pyvips_dll_handler import handle_pyvips_dll_error
 11 | 
 12 | 
 13 | custom_nodes_script_dir = os.path.dirname(os.path.abspath(__file__))
 14 | custom_nodes_model_dir = os.path.join(folder_paths.models_dir, "moondream")
 15 | os.makedirs(custom_nodes_model_dir, exist_ok=True)
 16 | 
 17 | 
 18 | def get_category_name():
 19 |     return "Moondream Gaze Detection"
 20 | 
 21 | 
 22 | class MoondreamModelLoader:
 23 |     @classmethod
 24 |     def INPUT_TYPES(s):
 25 |         return {
 26 |             "required": {
 27 |                 "device": (['cuda', 'cpu'],),
 28 |             },
 29 |         }
 30 | 
 31 |     RETURN_TYPES = ("MOONDREAM_MODEL",)
 32 |     RETURN_NAMES = ("model",)
 33 |     FUNCTION = "load_model"
 34 |     CATEGORY = get_category_name()
 35 | 
 36 |     def load_model(self,
 37 |                    device: str,
 38 |                    ) -> Tuple[MoondreamInferencer]:
 39 |         handle_pyvips_dll_error(download_dir=custom_nodes_script_dir)
 40 |         model_inferencer = MoondreamInferencer(model_dir=custom_nodes_model_dir)
 41 |         model_inferencer.load_model(device=device)
 42 | 
 43 |         return (model_inferencer, )
 44 | 
 45 | 
 46 | class GazeDetection:
 47 |     @classmethod
 48 |     def INPUT_TYPES(s):
 49 |         return {
 50 |             "required": {
 51 |                 "model": ("MOONDREAM_MODEL", ),
 52 |                 "image": ("IMAGE", ),
 53 |                 "use_ensemble": ("BOOLEAN", {"default": False}),
 54 |             },
 55 |         }
 56 | 
 57 |     RETURN_TYPES = ("IMAGE",)
 58 |     RETURN_NAMES = ("image",)
 59 |     FUNCTION = "gaze_detection"
 60 |     CATEGORY = get_category_name()
 61 | 
 62 |     def gaze_detection(self,
 63 |                        model: MoondreamInferencer,
 64 |                        image: Any,
 65 |                        use_ensemble: bool,
 66 |                        ) -> Tuple:
 67 |         fig, status = model.process_image(image, use_ensemble=use_ensemble)
 68 |         out_img = model.figure_to_tensor(fig)
 69 | 
 70 |         return (out_img, )
 71 | 
 72 | 
 73 | class GazeDetectionVideo:
 74 |     @classmethod
 75 |     def INPUT_TYPES(s):
 76 |         return {
 77 |             "required": {
 78 |                 "model": ("MOONDREAM_MODEL", ),
 79 |                 "video": ("IMAGE", ),
 80 |                 "use_ensemble": ("BOOLEAN", {"default": False}),
 81 |             },
 82 |         }
 83 | 
 84 |     RETURN_TYPES = ("IMAGE",)
 85 |     RETURN_NAMES = ("images",)
 86 |     FUNCTION = "gaze_detection_video"
 87 |     CATEGORY = get_category_name()
 88 | 
 89 |     def gaze_detection_video(self,
 90 |                              model: MoondreamInferencer,
 91 |                              video: Any,
 92 |                              use_ensemble: bool,
 93 |                              ) -> Tuple:
 94 |         num_frames = video.shape[0]
 95 |         height = video.shape[1]
 96 |         width = video.shape[2]
 97 |         channels = video.shape[3]
 98 | 
 99 |         comfy_pbar = ProgressBar(num_frames)
100 |         out_frames = []
101 |         for f in range(num_frames):
102 |             frame_tensor = video[f]
103 |             fig, status = model.process_image(frame_tensor, use_ensemble=use_ensemble)
104 |             out_img = model.figure_to_tensor(fig)
105 | 
106 |             out_img = out_img.squeeze(0)
107 |             out_frames.append(out_img)
108 | 
109 |             comfy_pbar.update(1)
110 | 
111 |         out_frames_tensor = torch.stack(out_frames, dim=0)
112 | 
113 |         return (out_frames_tensor, )
114 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "comfyui-moondream-gaze-detection"
 3 | description = "Moondream's gaze detection feature wrapper node."
 4 | version = "1.0.4"
 5 | license = {file = "LICENSE"}
 6 | dependencies = ["matplotlib==3.10.0", "pyvips==2.2.3", "accelerate>=0.32.1", "opencv-python"]
 7 | 
 8 | [project.urls]
 9 | Repository = "https://github.com/jhj0517/ComfyUI-Moondream-Gaze-Detection.git"
10 | #  Used by Comfy Registry https://comfyregistry.org
11 | 
12 | [tool.comfy]
13 | PublisherId = "jhj0517"
14 | DisplayName = "ComfyUI-Moondream-Gaze-Detection"
15 | Icon = ""
16 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib==3.10.0
2 | pyvips==2.2.3
3 | accelerate>=0.32.1
4 | opencv-python


--------------------------------------------------------------------------------