├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── display_text_nodes.py ├── examples ├── AnimateDiff_00002.mp4 ├── Chat_with_multiple_images_workflow_legacy.json ├── Chat_with_multiple_images_workflow_legacy.png ├── Chat_with_multiple_images_workflow_polished.json ├── Chat_with_multiple_images_workflow_polished.png ├── Chat_with_single_image_workflow_legacy.json ├── Chat_with_single_image_workflow_legacy.png ├── Chat_with_single_image_workflow_polished.json ├── Chat_with_single_image_workflow_polished.png ├── Chat_with_text_workflow_legacy.json ├── Chat_with_text_workflow_legacy.png ├── Chat_with_text_workflow_polished.json ├── Chat_with_text_workflow_polished.png ├── Chat_with_video_workflow_legacy.json ├── Chat_with_video_workflow_legacy.png ├── Chat_with_video_workflow_polished.json ├── Chat_with_video_workflow_polished.png ├── ComfyUI_00508_.png ├── ComfyUI_00509_.png └── ComfyUI_00532_.png ├── favicon.ico ├── image_nodes.py ├── nodes_legacy.py ├── nodes_polished.py ├── pyproject.toml ├── requirements.txt ├── util_nodes.py └── web └── js ├── displayText.js ├── multipleImagesInput.js ├── previewVideo.js └── uploadVideo.js /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2024 OpenBMB 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ComfyUI_MiniCPM-V-2_6-int4 2 | 3 | This is an implementation of [MiniCPM-V-2_6-int4](https://github.com/OpenBMB/MiniCPM-V) by [ComfyUI](https://github.com/comfyanonymous/ComfyUI), including support for text-based queries, video queries, single-image queries, and multi-image queries to generate captions or responses. 4 | 5 | --- 6 | 7 | ## Recent Updates 8 | 9 | - Added `keep_model_loaded` parameter 10 | 11 | By default, this parameter is set to False, which indicates that the model will be unloaded from GPU memory after each prediction is made. 12 | 13 | However, if set to True, the model will remain loaded in GPU memory. This is particularly useful when multiple predictions with the same model are needed, eliminating the need to reload it between uses. 14 | 15 | - Added `seed` parameter 16 | 17 | This parameter enables the setting of a random seed for the purpose of ensuring reproducibility in results. 18 | 19 | --- 20 | 21 | ## Basic Workflow 22 | 23 | - **Text-based Query**: Users can submit textual queries to request information or generate descriptions. For instance, a user might input a description like "What is the meaning of life?" 24 | 25 | > Chat_with_text_workflow_legacy preview 26 | > ![Chat_with_text_workflow_legacy preview](examples/Chat_with_text_workflow_legacy.png) 27 | > Chat_with_text_workflow_polished preview 28 | > ![Chat_with_text_workflow_polished preview](examples/Chat_with_text_workflow_polished.png) 29 | 30 | - **Video Query**: When a user uploads a video, the system can analyze the content and generate a detailed caption for each frame or a summary of the entire video. For example, "Generate a caption for the given video." 31 | 32 | > Chat_with_video_workflow_legacy preview 33 | > ![Chat_with_video_workflow_legacy preview](examples/Chat_with_video_workflow_legacy.png) 34 | > Chat_with_video_workflow_polished preview 35 | > ![Chat_with_video_workflow_polished preview](examples/Chat_with_video_workflow_polished.png) 36 | 37 | - **Single-Image Query**: This workflow supports generating a caption for an individual image. A user could upload a photo and ask, "What does this image show?" resulting in a caption such as "A majestic lion pride relaxing on the savannah." 38 | 39 | > Chat_with_single_image_workflow_legacy preview 40 | > ![Chat_with_single_image_workflow_legacy preview](examples/Chat_with_single_image_workflow_legacy.png) 41 | > Chat_with_single_image_workflow_polished preview 42 | > ![Chat_with_single_image_workflow_polished preview](examples/Chat_with_single_image_workflow_polished.png) 43 | 44 | - **Multi-Image Query**: For multiple images, the system can provide a collective description or a narrative that ties the images together. For example, "Create a story from the following series of images: one of a couple at a beach, another at a wedding ceremony, and the last one at a baby's christening." 45 | 46 | > Chat_with_multiple_images_workflow_legacy preview 47 | > ![Chat_with_multiple_images_workflow_legacy preview](examples/Chat_with_multiple_images_workflow_legacy.png) 48 | > Chat_with_multiple_images_workflow_polished preview 49 | > ![Chat_with_multiple_images_workflow_polished preview](examples/Chat_with_multiple_images_workflow_polished.png) 50 | 51 | ## Installation 52 | 53 | - Install from [ComfyUI Manager](https://github.com/ltdrdata/ComfyUI-Manager) (search for `minicpm`) 54 | 55 | - Download or git clone this repository into the `ComfyUI\custom_nodes\` directory and run: 56 | 57 | ```python 58 | pip install -r requirements.txt 59 | ``` 60 | 61 | ## Download Models 62 | 63 | All the models will be downloaded automatically when running the workflow if they are not found in the `ComfyUI\models\prompt_generator\` directory. 64 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from .nodes_legacy import MiniCPM_VQA 2 | from .nodes_polished import MiniCPM_VQA_Polished 3 | from .image_nodes import MultipleImagesInput 4 | from .util_nodes import LoadVideo,PreviewVideo 5 | from .display_text_nodes import DisplayText 6 | WEB_DIRECTORY = "./web" 7 | # A dictionary that contains all nodes you want to export with their names 8 | # NOTE: names should be globally unique 9 | NODE_CLASS_MAPPINGS = { 10 | "LoadVideo": LoadVideo, 11 | "PreviewVideo": PreviewVideo, 12 | "MultipleImagesInput": MultipleImagesInput, 13 | "MiniCPM_VQA": MiniCPM_VQA, 14 | "MiniCPM_VQA_Polished": MiniCPM_VQA_Polished, 15 | "DisplayText": DisplayText, 16 | } 17 | 18 | # A dictionary that contains the friendly/humanly readable titles for the nodes 19 | NODE_DISPLAY_NAME_MAPPINGS = { 20 | "LoadVideo": "Load Video", 21 | "PreviewVideo": "Preview Video", 22 | "MultipleImagesInput": "Multiple Images Input", 23 | "MiniCPM_VQA": "MiniCPM VQA", 24 | "MiniCPM_VQA_Polished": "MiniCPM VQA Polished", 25 | "DisplayText": "Display Text", 26 | } -------------------------------------------------------------------------------- /display_text_nodes.py: -------------------------------------------------------------------------------- 1 | class DisplayText: 2 | @classmethod 3 | def INPUT_TYPES(s): 4 | return { 5 | "required": { 6 | "text": ("STRING", {"forceInput": True}), 7 | } 8 | } 9 | 10 | INPUT_IS_LIST = True 11 | RETURN_TYPES = ("STRING",) 12 | OUTPUT_NODE = True 13 | OUTPUT_IS_LIST = (True,) 14 | FUNCTION = "display_text" 15 | CATEGORY = "Comfyui_MiniCPM-V-2_6-int4" 16 | 17 | def display_text(self, text): 18 | return {"ui": {"text": text}, "result": (text,)} 19 | -------------------------------------------------------------------------------- /examples/AnimateDiff_00002.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/AnimateDiff_00002.mp4 -------------------------------------------------------------------------------- /examples/Chat_with_multiple_images_workflow_legacy.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 50, 3 | "last_link_id": 59, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | 415, 10 | -78 11 | ], 12 | "size": [ 13 | 436.56812016891763, 14 | 108.88176616327235 15 | ], 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 3, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 43, 32 | "type": "LoadImage", 33 | "pos": [ 34 | -361, 35 | -193 36 | ], 37 | "size": { 38 | "0": 315, 39 | "1": 314 40 | }, 41 | "flags": {}, 42 | "order": 0, 43 | "mode": 0, 44 | "outputs": [ 45 | { 46 | "name": "IMAGE", 47 | "type": "IMAGE", 48 | "links": [ 49 | 56 50 | ], 51 | "slot_index": 0, 52 | "shape": 3 53 | }, 54 | { 55 | "name": "MASK", 56 | "type": "MASK", 57 | "links": null, 58 | "shape": 3 59 | } 60 | ], 61 | "properties": { 62 | "Node name for S&R": "LoadImage" 63 | }, 64 | "widgets_values": [ 65 | "ComfyUI_00509_.png", 66 | "image" 67 | ] 68 | }, 69 | { 70 | "id": 45, 71 | "type": "LoadImage", 72 | "pos": [ 73 | -691, 74 | 15 75 | ], 76 | "size": { 77 | "0": 315, 78 | "1": 314 79 | }, 80 | "flags": {}, 81 | "order": 1, 82 | "mode": 0, 83 | "outputs": [ 84 | { 85 | "name": "IMAGE", 86 | "type": "IMAGE", 87 | "links": [ 88 | 57 89 | ], 90 | "slot_index": 0, 91 | "shape": 3 92 | }, 93 | { 94 | "name": "MASK", 95 | "type": "MASK", 96 | "links": null, 97 | "shape": 3 98 | } 99 | ], 100 | "properties": { 101 | "Node name for S&R": "LoadImage" 102 | }, 103 | "widgets_values": [ 104 | "ComfyUI_00532_.png", 105 | "image" 106 | ] 107 | }, 108 | { 109 | "id": 47, 110 | "type": "LoadImage", 111 | "pos": [ 112 | -360, 113 | 161 114 | ], 115 | "size": { 116 | "0": 315, 117 | "1": 314 118 | }, 119 | "flags": {}, 120 | "order": 2, 121 | "mode": 0, 122 | "outputs": [ 123 | { 124 | "name": "IMAGE", 125 | "type": "IMAGE", 126 | "links": [ 127 | 58 128 | ], 129 | "slot_index": 0, 130 | "shape": 3 131 | }, 132 | { 133 | "name": "MASK", 134 | "type": "MASK", 135 | "links": null, 136 | "shape": 3 137 | } 138 | ], 139 | "properties": { 140 | "Node name for S&R": "LoadImage" 141 | }, 142 | "widgets_values": [ 143 | "ComfyUI_00508_.png", 144 | "image" 145 | ] 146 | }, 147 | { 148 | "id": 48, 149 | "type": "DisplayText", 150 | "pos": [ 151 | 411, 152 | 79 153 | ], 154 | "size": [ 155 | 451.1885182898909, 156 | 265.19896846818176 157 | ], 158 | "flags": {}, 159 | "order": 5, 160 | "mode": 0, 161 | "inputs": [ 162 | { 163 | "name": "text", 164 | "type": "STRING", 165 | "link": 59, 166 | "widget": { 167 | "name": "text" 168 | } 169 | } 170 | ], 171 | "outputs": [ 172 | { 173 | "name": "STRING", 174 | "type": "STRING", 175 | "links": null, 176 | "shape": 6 177 | } 178 | ], 179 | "properties": { 180 | "Node name for S&R": "DisplayText" 181 | }, 182 | "widgets_values": [ 183 | "", 184 | "In the provided images, there are several differences to note:\n\n1. **Image 1**: This image shows Earth as seen from space, with a focus on the Western Hemisphere (North America and South America). The background is dark space with visible stars.\n\n2. **Image 2**: In this second image, the Earth appears in the center of the frame, surrounded by a starry backdrop with a galaxy-like structure visible in the upper right corner. There is also a smaller celestial body, possibly an asteroid or moon, near the Earth's left side.\n\n3. **Image 3**: The third image depicts Earth with its rings, resembling Saturn's rings, surrounding it. The background remains dark space, similar to Image 1, but the inclusion of the rings adds a distinct astronomical element.\n\nThe primary differences lie in the depiction of celestial bodies around Earth and the complexity of the cosmic environment." 185 | ] 186 | }, 187 | { 188 | "id": 50, 189 | "type": "MiniCPM_VQA", 190 | "pos": [ 191 | -13, 192 | -65 193 | ], 194 | "size": { 195 | "0": 400, 196 | "1": 400 197 | }, 198 | "flags": {}, 199 | "order": 4, 200 | "mode": 0, 201 | "inputs": [ 202 | { 203 | "name": "source_video_path", 204 | "type": "PATH", 205 | "link": null 206 | }, 207 | { 208 | "name": "source_image_path_1st", 209 | "type": "IMAGE", 210 | "link": 56 211 | }, 212 | { 213 | "name": "source_image_path_2nd", 214 | "type": "IMAGE", 215 | "link": 57 216 | }, 217 | { 218 | "name": "source_image_path_3rd", 219 | "type": "IMAGE", 220 | "link": 58 221 | } 222 | ], 223 | "outputs": [ 224 | { 225 | "name": "STRING", 226 | "type": "STRING", 227 | "links": [ 228 | 59 229 | ], 230 | "shape": 3, 231 | "slot_index": 0 232 | } 233 | ], 234 | "properties": { 235 | "Node name for S&R": "MiniCPM_VQA" 236 | }, 237 | "widgets_values": [ 238 | "Compare image 1, image 2 and image 3, tell me about the differences among them.", 239 | "MiniCPM-V-2_6-int4", 240 | true, 241 | 0.8, 242 | 100, 243 | 0.7, 244 | 1.05, 245 | 2048, 246 | 64, 247 | 2, 248 | 576, 249 | "randomize" 250 | ] 251 | } 252 | ], 253 | "links": [ 254 | [ 255 | 56, 256 | 43, 257 | 0, 258 | 50, 259 | 1, 260 | "IMAGE" 261 | ], 262 | [ 263 | 57, 264 | 45, 265 | 0, 266 | 50, 267 | 2, 268 | "IMAGE" 269 | ], 270 | [ 271 | 58, 272 | 47, 273 | 0, 274 | 50, 275 | 3, 276 | "IMAGE" 277 | ], 278 | [ 279 | 59, 280 | 50, 281 | 0, 282 | 48, 283 | 0, 284 | "STRING" 285 | ] 286 | ], 287 | "groups": [], 288 | "config": {}, 289 | "extra": { 290 | "ds": { 291 | "scale": 0.8769226950000022, 292 | "offset": [ 293 | 840.512281646217, 294 | 279.18423579817517 295 | ] 296 | } 297 | }, 298 | "version": 0.4 299 | } -------------------------------------------------------------------------------- /examples/Chat_with_multiple_images_workflow_legacy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_multiple_images_workflow_legacy.png -------------------------------------------------------------------------------- /examples/Chat_with_multiple_images_workflow_polished.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 60, 3 | "last_link_id": 71, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | -986, 10 | -453 11 | ], 12 | "size": { 13 | "0": 717.5083618164062, 14 | "1": 82.10267639160156 15 | }, 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 0, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 51, 32 | "type": "DisplayText", 33 | "pos": [ 34 | -697, 35 | 183 36 | ], 37 | "size": { 38 | "0": 396.3633117675781, 39 | "1": 321.38140869140625 40 | }, 41 | "flags": {}, 42 | "order": 7, 43 | "mode": 0, 44 | "inputs": [ 45 | { 46 | "name": "text", 47 | "type": "STRING", 48 | "link": 71, 49 | "widget": { 50 | "name": "text" 51 | } 52 | } 53 | ], 54 | "outputs": [ 55 | { 56 | "name": "STRING", 57 | "type": "STRING", 58 | "links": null, 59 | "shape": 6 60 | } 61 | ], 62 | "properties": { 63 | "Node name for S&R": "DisplayText" 64 | }, 65 | "widgets_values": [ 66 | "", 67 | "In comparing image 1, image 2 and image 3, the differences are as follows: In image 1, Earth is shown in isolation with no other celestial bodies present. In image 2, a moon has been added to the scene, positioned to the left of Earth. Finally, in image 3, Saturn's rings have been added around Earth, creating a dramatic visual effect against the backdrop of space." 68 | ] 69 | }, 70 | { 71 | "id": 54, 72 | "type": "MultipleImagesInput", 73 | "pos": [ 74 | -986, 75 | -136 76 | ], 77 | "size": { 78 | "0": 210, 79 | "1": 122 80 | }, 81 | "flags": {}, 82 | "order": 4, 83 | "mode": 0, 84 | "inputs": [ 85 | { 86 | "name": "image_1", 87 | "type": "IMAGE", 88 | "link": 65 89 | }, 90 | { 91 | "name": "image_2", 92 | "type": "IMAGE", 93 | "link": 66 94 | }, 95 | { 96 | "name": "image_3", 97 | "type": "IMAGE", 98 | "link": 68 99 | } 100 | ], 101 | "outputs": [ 102 | { 103 | "name": "images", 104 | "type": "IMAGE", 105 | "links": [ 106 | 69, 107 | 70 108 | ], 109 | "slot_index": 0, 110 | "shape": 3 111 | } 112 | ], 113 | "properties": { 114 | "Node name for S&R": "MultipleImagesInput" 115 | }, 116 | "widgets_values": [ 117 | 3, 118 | null 119 | ] 120 | }, 121 | { 122 | "id": 55, 123 | "type": "LoadImage", 124 | "pos": [ 125 | -1232, 126 | -453 127 | ], 128 | "size": [ 129 | 210, 130 | 314 131 | ], 132 | "flags": {}, 133 | "order": 1, 134 | "mode": 0, 135 | "outputs": [ 136 | { 137 | "name": "IMAGE", 138 | "type": "IMAGE", 139 | "links": [ 140 | 65 141 | ], 142 | "shape": 3 143 | }, 144 | { 145 | "name": "MASK", 146 | "type": "MASK", 147 | "links": null, 148 | "shape": 3 149 | } 150 | ], 151 | "properties": { 152 | "Node name for S&R": "LoadImage" 153 | }, 154 | "widgets_values": [ 155 | "ComfyUI_00509_.png", 156 | "image" 157 | ] 158 | }, 159 | { 160 | "id": 56, 161 | "type": "LoadImage", 162 | "pos": [ 163 | -1234, 164 | -122 165 | ], 166 | "size": [ 167 | 214.43836975097656, 168 | 314 169 | ], 170 | "flags": {}, 171 | "order": 2, 172 | "mode": 0, 173 | "outputs": [ 174 | { 175 | "name": "IMAGE", 176 | "type": "IMAGE", 177 | "links": [ 178 | 66 179 | ], 180 | "shape": 3 181 | }, 182 | { 183 | "name": "MASK", 184 | "type": "MASK", 185 | "links": null, 186 | "shape": 3 187 | } 188 | ], 189 | "properties": { 190 | "Node name for S&R": "LoadImage" 191 | }, 192 | "widgets_values": [ 193 | "ComfyUI_00532_.png", 194 | "image" 195 | ] 196 | }, 197 | { 198 | "id": 58, 199 | "type": "LoadImage", 200 | "pos": [ 201 | -1232, 202 | 227 203 | ], 204 | "size": [ 205 | 210, 206 | 314 207 | ], 208 | "flags": {}, 209 | "order": 3, 210 | "mode": 0, 211 | "outputs": [ 212 | { 213 | "name": "IMAGE", 214 | "type": "IMAGE", 215 | "links": [ 216 | 68 217 | ], 218 | "shape": 3 219 | }, 220 | { 221 | "name": "MASK", 222 | "type": "MASK", 223 | "links": null, 224 | "shape": 3 225 | } 226 | ], 227 | "properties": { 228 | "Node name for S&R": "LoadImage" 229 | }, 230 | "widgets_values": [ 231 | "ComfyUI_00508_.png", 232 | "image" 233 | ] 234 | }, 235 | { 236 | "id": 59, 237 | "type": "PreviewImage", 238 | "pos": [ 239 | -247, 240 | -450 241 | ], 242 | "size": { 243 | "0": 321.89825439453125, 244 | "1": 978.513916015625 245 | }, 246 | "flags": {}, 247 | "order": 5, 248 | "mode": 0, 249 | "inputs": [ 250 | { 251 | "name": "images", 252 | "type": "IMAGE", 253 | "link": 69 254 | } 255 | ], 256 | "properties": { 257 | "Node name for S&R": "PreviewImage" 258 | } 259 | }, 260 | { 261 | "id": 60, 262 | "type": "MiniCPM_VQA_Polished", 263 | "pos": [ 264 | -697, 265 | -241 266 | ], 267 | "size": { 268 | "0": 400, 269 | "1": 360 270 | }, 271 | "flags": {}, 272 | "order": 6, 273 | "mode": 0, 274 | "inputs": [ 275 | { 276 | "name": "source_video_path", 277 | "type": "PATH", 278 | "link": null 279 | }, 280 | { 281 | "name": "source_image_path", 282 | "type": "IMAGE", 283 | "link": 70 284 | } 285 | ], 286 | "outputs": [ 287 | { 288 | "name": "STRING", 289 | "type": "STRING", 290 | "links": [ 291 | 71 292 | ], 293 | "shape": 3, 294 | "slot_index": 0 295 | } 296 | ], 297 | "properties": { 298 | "Node name for S&R": "MiniCPM_VQA_Polished" 299 | }, 300 | "widgets_values": [ 301 | "Compare image 1, image 2 and image 3, tell me about the differences among them.", 302 | "MiniCPM-V-2_6-int4", 303 | true, 304 | 0.8, 305 | 100, 306 | 0.7, 307 | 1.05, 308 | 2048, 309 | 64, 310 | 2, 311 | 1293, 312 | "randomize" 313 | ] 314 | } 315 | ], 316 | "links": [ 317 | [ 318 | 65, 319 | 55, 320 | 0, 321 | 54, 322 | 0, 323 | "IMAGE" 324 | ], 325 | [ 326 | 66, 327 | 56, 328 | 0, 329 | 54, 330 | 1, 331 | "IMAGE" 332 | ], 333 | [ 334 | 68, 335 | 58, 336 | 0, 337 | 54, 338 | 2, 339 | "IMAGE" 340 | ], 341 | [ 342 | 69, 343 | 54, 344 | 0, 345 | 59, 346 | 0, 347 | "IMAGE" 348 | ], 349 | [ 350 | 70, 351 | 54, 352 | 0, 353 | 60, 354 | 1, 355 | "IMAGE" 356 | ], 357 | [ 358 | 71, 359 | 60, 360 | 0, 361 | 51, 362 | 0, 363 | "STRING" 364 | ] 365 | ], 366 | "groups": [], 367 | "config": {}, 368 | "extra": { 369 | "ds": { 370 | "scale": 0.7247295000000027, 371 | "offset": [ 372 | 1602.4864516115556, 373 | 507.1347555005483 374 | ] 375 | } 376 | }, 377 | "version": 0.4 378 | } -------------------------------------------------------------------------------- /examples/Chat_with_multiple_images_workflow_polished.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_multiple_images_workflow_polished.png -------------------------------------------------------------------------------- /examples/Chat_with_single_image_workflow_legacy.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 55, 3 | "last_link_id": 64, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | 385, 10 | -6 11 | ], 12 | "size": [ 13 | 681.1074433554127, 14 | 92.63203200215605 15 | ], 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 1, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 51, 32 | "type": "LoadImage", 33 | "pos": [ 34 | -363, 35 | -8 36 | ], 37 | "size": [ 38 | 308.78882573073196, 39 | 398.59335191598814 40 | ], 41 | "flags": {}, 42 | "order": 0, 43 | "mode": 0, 44 | "outputs": [ 45 | { 46 | "name": "IMAGE", 47 | "type": "IMAGE", 48 | "links": [ 49 | 63 50 | ], 51 | "slot_index": 0, 52 | "shape": 3 53 | }, 54 | { 55 | "name": "MASK", 56 | "type": "MASK", 57 | "links": null, 58 | "shape": 3 59 | } 60 | ], 61 | "properties": { 62 | "Node name for S&R": "LoadImage" 63 | }, 64 | "widgets_values": [ 65 | "ComfyUI_00509_.png", 66 | "image" 67 | ] 68 | }, 69 | { 70 | "id": 52, 71 | "type": "DisplayText", 72 | "pos": [ 73 | 385, 74 | 128 75 | ], 76 | "size": [ 77 | 682.011017760673, 78 | 262.51556121728197 79 | ], 80 | "flags": {}, 81 | "order": 3, 82 | "mode": 0, 83 | "inputs": [ 84 | { 85 | "name": "text", 86 | "type": "STRING", 87 | "link": 64, 88 | "widget": { 89 | "name": "text" 90 | } 91 | } 92 | ], 93 | "outputs": [ 94 | { 95 | "name": "STRING", 96 | "type": "STRING", 97 | "links": null, 98 | "slot_index": 0, 99 | "shape": 6 100 | } 101 | ], 102 | "properties": { 103 | "Node name for S&R": "DisplayText" 104 | }, 105 | "widgets_values": [ 106 | "", 107 | "The image presents a captivating view of Earth, captured from space. The planet is beautifully illuminated by the sun's rays, casting a warm glow over its surface. The curvature of the Earth is clearly visible, emphasizing the vastness of our home planet.\n\nThe continents and oceans are distinctly outlined in shades of green and brown, respectively, providing a stark contrast against the deep blue of the surrounding space. This color palette not only highlights the natural beauty of our world but also underscores the delicate balance between land and water.\n\nThe atmosphere, depicted in hues of white and gray, appears as swirling clouds that blanket the planet. These clouds, reminiscent of cosmic dust storms, add a sense of dynamism to the otherwise serene scene.\n\nIn the backdrop, a distant star can be seen, serving as a reminder of the infinite universe beyond our own. Its presence adds depth to the image, creating a sense of scale and distance.\n\nOverall, the image provides a comprehensive view of Earth, showcasing its unique characteristics and placing it within the context of the cosmos. It's a testament to the awe-inspiring nature of our planet and the wonders of space exploration." 108 | ] 109 | }, 110 | { 111 | "id": 55, 112 | "type": "MiniCPM_VQA", 113 | "pos": [ 114 | -34, 115 | -9 116 | ], 117 | "size": { 118 | "0": 400, 119 | "1": 400 120 | }, 121 | "flags": {}, 122 | "order": 2, 123 | "mode": 0, 124 | "inputs": [ 125 | { 126 | "name": "source_video_path", 127 | "type": "PATH", 128 | "link": null 129 | }, 130 | { 131 | "name": "source_image_path_1st", 132 | "type": "IMAGE", 133 | "link": 63 134 | }, 135 | { 136 | "name": "source_image_path_2nd", 137 | "type": "IMAGE", 138 | "link": null 139 | }, 140 | { 141 | "name": "source_image_path_3rd", 142 | "type": "IMAGE", 143 | "link": null 144 | } 145 | ], 146 | "outputs": [ 147 | { 148 | "name": "STRING", 149 | "type": "STRING", 150 | "links": [ 151 | 64 152 | ], 153 | "shape": 3, 154 | "slot_index": 0 155 | } 156 | ], 157 | "properties": { 158 | "Node name for S&R": "MiniCPM_VQA" 159 | }, 160 | "widgets_values": [ 161 | "Describe the image in detail", 162 | "MiniCPM-V-2_6-int4", 163 | true, 164 | 0.8, 165 | 100, 166 | 0.7, 167 | 1.05, 168 | 2048, 169 | 64, 170 | 2, 171 | 171, 172 | "randomize" 173 | ] 174 | } 175 | ], 176 | "links": [ 177 | [ 178 | 63, 179 | 51, 180 | 0, 181 | 55, 182 | 1, 183 | "IMAGE" 184 | ], 185 | [ 186 | 64, 187 | 55, 188 | 0, 189 | 52, 190 | 0, 191 | "STRING" 192 | ] 193 | ], 194 | "groups": [], 195 | "config": {}, 196 | "extra": { 197 | "ds": { 198 | "scale": 0.9646149645000015, 199 | "offset": [ 200 | 482.3653697740519, 201 | 191.776875012469 202 | ] 203 | } 204 | }, 205 | "version": 0.4 206 | } -------------------------------------------------------------------------------- /examples/Chat_with_single_image_workflow_legacy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_single_image_workflow_legacy.png -------------------------------------------------------------------------------- /examples/Chat_with_single_image_workflow_polished.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 56, 3 | "last_link_id": 66, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | 385, 10 | -6 11 | ], 12 | "size": [ 13 | 681.1074433554127, 14 | 92.63203200215605 15 | ], 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 1, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 51, 32 | "type": "LoadImage", 33 | "pos": [ 34 | -348, 35 | -2 36 | ], 37 | "size": [ 38 | 293.6592954235593, 39 | 358.0474368401352 40 | ], 41 | "flags": {}, 42 | "order": 0, 43 | "mode": 0, 44 | "outputs": [ 45 | { 46 | "name": "IMAGE", 47 | "type": "IMAGE", 48 | "links": [ 49 | 65 50 | ], 51 | "slot_index": 0, 52 | "shape": 3 53 | }, 54 | { 55 | "name": "MASK", 56 | "type": "MASK", 57 | "links": null, 58 | "shape": 3 59 | } 60 | ], 61 | "properties": { 62 | "Node name for S&R": "LoadImage" 63 | }, 64 | "widgets_values": [ 65 | "ComfyUI_00509_.png", 66 | "image" 67 | ] 68 | }, 69 | { 70 | "id": 52, 71 | "type": "DisplayText", 72 | "pos": [ 73 | 385, 74 | 121 75 | ], 76 | "size": [ 77 | 680.7480526602474, 78 | 233.90070457160425 79 | ], 80 | "flags": {}, 81 | "order": 3, 82 | "mode": 0, 83 | "inputs": [ 84 | { 85 | "name": "text", 86 | "type": "STRING", 87 | "link": 66, 88 | "widget": { 89 | "name": "text" 90 | } 91 | } 92 | ], 93 | "outputs": [ 94 | { 95 | "name": "STRING", 96 | "type": "STRING", 97 | "links": null, 98 | "slot_index": 0, 99 | "shape": 6 100 | } 101 | ], 102 | "properties": { 103 | "Node name for S&R": "DisplayText" 104 | }, 105 | "widgets_values": [ 106 | "", 107 | "The image presents a breathtaking view of Earth from space, showcasing the planet's diverse landscapes and vibrant colors. The curvature of the Earth is clearly visible, emphasizing its spherical shape. The oceans are depicted in shades of blue, while the continents stand out with their distinct green hues.\n\nOne of the most striking features is the swirling patterns in the clouds, which create a mesmerizing effect against the backdrop of the dark cosmos. These patterns seem to dance across the sky, adding a dynamic element to the otherwise static scene.\n\nIn the top right corner, a distant star glows brightly, serving as a reminder of the vastness of space. This celestial body stands out starkly against the black expanse, providing a sense of scale and depth to the image.\n\nOverall, the image captures the awe-inspiring beauty of our home planet and the infinite universe that surrounds it. It's a testament to the wonders of nature and the mysteries of the cosmos." 108 | ] 109 | }, 110 | { 111 | "id": 56, 112 | "type": "MiniCPM_VQA_Polished", 113 | "pos": [ 114 | -34, 115 | -4 116 | ], 117 | "size": { 118 | "0": 400, 119 | "1": 360 120 | }, 121 | "flags": {}, 122 | "order": 2, 123 | "mode": 0, 124 | "inputs": [ 125 | { 126 | "name": "source_video_path", 127 | "type": "PATH", 128 | "link": null 129 | }, 130 | { 131 | "name": "source_image_path", 132 | "type": "IMAGE", 133 | "link": 65 134 | } 135 | ], 136 | "outputs": [ 137 | { 138 | "name": "STRING", 139 | "type": "STRING", 140 | "links": [ 141 | 66 142 | ], 143 | "shape": 3, 144 | "slot_index": 0 145 | } 146 | ], 147 | "properties": { 148 | "Node name for S&R": "MiniCPM_VQA_Polished" 149 | }, 150 | "widgets_values": [ 151 | "Describe the image in detail", 152 | "MiniCPM-V-2_6-int4", 153 | true, 154 | 0.8, 155 | 100, 156 | 0.7, 157 | 1.05, 158 | 2048, 159 | 64, 160 | 2, 161 | 1923, 162 | "randomize" 163 | ] 164 | } 165 | ], 166 | "links": [ 167 | [ 168 | 65, 169 | 51, 170 | 0, 171 | 56, 172 | 1, 173 | "IMAGE" 174 | ], 175 | [ 176 | 66, 177 | 56, 178 | 0, 179 | 52, 180 | 0, 181 | "STRING" 182 | ] 183 | ], 184 | "groups": [], 185 | "config": {}, 186 | "extra": { 187 | "ds": { 188 | "scale": 0.9646149645000016, 189 | "offset": [ 190 | 465.71293314624074, 191 | 190.1005845668893 192 | ] 193 | } 194 | }, 195 | "version": 0.4 196 | } -------------------------------------------------------------------------------- /examples/Chat_with_single_image_workflow_polished.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_single_image_workflow_polished.png -------------------------------------------------------------------------------- /examples/Chat_with_text_workflow_legacy.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 51, 3 | "last_link_id": 54, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | 398, 10 | -255 11 | ], 12 | "size": [ 13 | 560.1107513648193, 14 | 103.60144459895338 15 | ], 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 1, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 49, 32 | "type": "DisplayText", 33 | "pos": [ 34 | 395, 35 | -115 36 | ], 37 | "size": [ 38 | 565.5332447212019, 39 | 259.1318214920463 40 | ], 41 | "flags": {}, 42 | "order": 2, 43 | "mode": 0, 44 | "inputs": [ 45 | { 46 | "name": "text", 47 | "type": "STRING", 48 | "link": 54, 49 | "widget": { 50 | "name": "text" 51 | } 52 | } 53 | ], 54 | "outputs": [ 55 | { 56 | "name": "STRING", 57 | "type": "STRING", 58 | "links": null, 59 | "shape": 6 60 | } 61 | ], 62 | "properties": { 63 | "Node name for S&R": "DisplayText" 64 | }, 65 | "widgets_values": [ 66 | "", 67 | "A quantum group is a mathematical object that generalizes the concept of a classical group to the realm of quantum mechanics. In classical mathematics, a group is a set of elements with a binary operation (such as addition or multiplication) that satisfies certain properties, such as associativity and the existence of an identity element.\nIn the context of quantum mechanics, a quantum group is a Hopf algebra that is associated with a classical group. The elements of a quantum group are represented by operators that satisfy the same algebraic rules as the elements of the corresponding classical group. However, the operators in a quantum group also have additional properties related to the uncertainty principle, which is a fundamental principle in quantum mechanics that states that it is impossible to precisely measure both the position and momentum of a particle at the same time.\nOne example of a quantum group is the quantum group associated with the special unitary group SU(2), which plays a central role in the study of quantum spin systems. Another example is the quantum group associated with the orthogonal group SO(3), which is used in the study of quantum mechanics of angular momentum.\nQuantum groups have many applications in physics and other areas of mathematics, including the study of quantum field theory, integrable systems, and knot theory. They are also important tools in the development of quantum computing, where they can be used to construct quantum algorithms for solving problems that are difficult or impossible to solve using classical computers." 68 | ] 69 | }, 70 | { 71 | "id": 51, 72 | "type": "MiniCPM_VQA", 73 | "pos": [ 74 | -21, 75 | -256 76 | ], 77 | "size": { 78 | "0": 400, 79 | "1": 400 80 | }, 81 | "flags": {}, 82 | "order": 0, 83 | "mode": 0, 84 | "inputs": [ 85 | { 86 | "name": "source_video_path", 87 | "type": "PATH", 88 | "link": null 89 | }, 90 | { 91 | "name": "source_image_path_1st", 92 | "type": "IMAGE", 93 | "link": null 94 | }, 95 | { 96 | "name": "source_image_path_2nd", 97 | "type": "IMAGE", 98 | "link": null 99 | }, 100 | { 101 | "name": "source_image_path_3rd", 102 | "type": "IMAGE", 103 | "link": null 104 | } 105 | ], 106 | "outputs": [ 107 | { 108 | "name": "STRING", 109 | "type": "STRING", 110 | "links": [ 111 | 54 112 | ], 113 | "shape": 3, 114 | "slot_index": 0 115 | } 116 | ], 117 | "properties": { 118 | "Node name for S&R": "MiniCPM_VQA" 119 | }, 120 | "widgets_values": [ 121 | "Quantum Group", 122 | "MiniCPM-V-2_6-int4", 123 | true, 124 | 0.8, 125 | 100, 126 | 0.7, 127 | 1.05, 128 | 2048, 129 | 64, 130 | 2, 131 | 1501, 132 | "randomize" 133 | ] 134 | } 135 | ], 136 | "links": [ 137 | [ 138 | 54, 139 | 51, 140 | 0, 141 | 49, 142 | 0, 143 | "STRING" 144 | ] 145 | ], 146 | "groups": [], 147 | "config": {}, 148 | "extra": { 149 | "ds": { 150 | "scale": 1.2839025177495011, 151 | "offset": [ 152 | 174.06698959850874, 153 | 351.1944051783393 154 | ] 155 | } 156 | }, 157 | "version": 0.4 158 | } -------------------------------------------------------------------------------- /examples/Chat_with_text_workflow_legacy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_text_workflow_legacy.png -------------------------------------------------------------------------------- /examples/Chat_with_text_workflow_polished.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 52, 3 | "last_link_id": 55, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | 390, 10 | -254 11 | ], 12 | "size": [ 13 | 443.0672250364122, 14 | 104.75543052432553 15 | ], 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 1, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 49, 32 | "type": "DisplayText", 33 | "pos": [ 34 | 386, 35 | -111 36 | ], 37 | "size": [ 38 | 452.45997097748295, 39 | 212.76987788146022 40 | ], 41 | "flags": {}, 42 | "order": 2, 43 | "mode": 0, 44 | "inputs": [ 45 | { 46 | "name": "text", 47 | "type": "STRING", 48 | "link": 55, 49 | "widget": { 50 | "name": "text" 51 | } 52 | } 53 | ], 54 | "outputs": [ 55 | { 56 | "name": "STRING", 57 | "type": "STRING", 58 | "links": null, 59 | "shape": 6 60 | } 61 | ], 62 | "properties": { 63 | "Node name for S&R": "DisplayText" 64 | }, 65 | "widgets_values": [ 66 | "", 67 | "A quantum group is a mathematical object that generalizes the concept of a group to include elements with quantum numbers. In traditional groups, each element has a unique identity and can be multiplied or divided by other elements in the group. However, in quantum groups, some elements may have multiple identities or may not be able to be multiplied or divided.\nQuantum groups are used in various areas of mathematics and physics, including representation theory, statistical mechanics, and quantum field theory. They are particularly important in the study of quantum systems, where they provide a way to describe the behavior of particles that follow quantum mechanical rules.\nOne example of a quantum group is the quantum group associated with the Lie algebra sl(2), which describes the symmetries of the hyperbolic plane. This group has several interesting properties, such as the fact that it contains both integer and non-integer elements, and that it is not commutative." 68 | ] 69 | }, 70 | { 71 | "id": 52, 72 | "type": "MiniCPM_VQA_Polished", 73 | "pos": [ 74 | -28, 75 | -258 76 | ], 77 | "size": { 78 | "0": 400, 79 | "1": 360 80 | }, 81 | "flags": {}, 82 | "order": 0, 83 | "mode": 0, 84 | "inputs": [ 85 | { 86 | "name": "source_video_path", 87 | "type": "PATH", 88 | "link": null 89 | }, 90 | { 91 | "name": "source_image_path", 92 | "type": "IMAGE", 93 | "link": null 94 | } 95 | ], 96 | "outputs": [ 97 | { 98 | "name": "STRING", 99 | "type": "STRING", 100 | "links": [ 101 | 55 102 | ], 103 | "shape": 3, 104 | "slot_index": 0 105 | } 106 | ], 107 | "properties": { 108 | "Node name for S&R": "MiniCPM_VQA_Polished" 109 | }, 110 | "widgets_values": [ 111 | "Quantum Group", 112 | "MiniCPM-V-2_6-int4", 113 | true, 114 | 0.8, 115 | 100, 116 | 0.7, 117 | 1.05, 118 | 2048, 119 | 64, 120 | 2, 121 | 2021, 122 | "randomize" 123 | ] 124 | } 125 | ], 126 | "links": [ 127 | [ 128 | 55, 129 | 52, 130 | 0, 131 | 49, 132 | 0, 133 | "STRING" 134 | ] 135 | ], 136 | "groups": [], 137 | "config": {}, 138 | "extra": { 139 | "ds": { 140 | "scale": 1.4122927695244516, 141 | "offset": [ 142 | 151.62360770168286, 143 | 363.80059898448 144 | ] 145 | } 146 | }, 147 | "version": 0.4 148 | } -------------------------------------------------------------------------------- /examples/Chat_with_text_workflow_polished.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_text_workflow_polished.png -------------------------------------------------------------------------------- /examples/Chat_with_video_workflow_legacy.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 53, 3 | "last_link_id": 63, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | -480, 10 | -284 11 | ], 12 | "size": { 13 | "0": 724.4190673828125, 14 | "1": 79.42505645751953 15 | }, 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 0, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 50, 32 | "type": "LoadVideo", 33 | "pos": [ 34 | -568, 35 | -158 36 | ], 37 | "size": [ 38 | 469.4351950653901, 39 | 397.9642639160156 40 | ], 41 | "flags": {}, 42 | "order": 1, 43 | "mode": 0, 44 | "outputs": [ 45 | { 46 | "name": "PATH", 47 | "type": "PATH", 48 | "links": [ 49 | 62 50 | ], 51 | "slot_index": 0, 52 | "shape": 3 53 | } 54 | ], 55 | "properties": { 56 | "Node name for S&R": "LoadVideo" 57 | }, 58 | "widgets_values": [ 59 | "AnimateDiff_00002.mp4", 60 | "Video", 61 | { 62 | "hidden": false, 63 | "paused": false, 64 | "params": {} 65 | } 66 | ] 67 | }, 68 | { 69 | "id": 51, 70 | "type": "DisplayText", 71 | "pos": [ 72 | -558, 73 | 287 74 | ], 75 | "size": [ 76 | 861.4327343832845, 77 | 107.98898362105666 78 | ], 79 | "flags": {}, 80 | "order": 3, 81 | "mode": 0, 82 | "inputs": [ 83 | { 84 | "name": "text", 85 | "type": "STRING", 86 | "link": 63, 87 | "widget": { 88 | "name": "text" 89 | } 90 | } 91 | ], 92 | "outputs": [ 93 | { 94 | "name": "STRING", 95 | "type": "STRING", 96 | "links": null, 97 | "shape": 6 98 | } 99 | ], 100 | "properties": { 101 | "Node name for S&R": "DisplayText" 102 | }, 103 | "widgets_values": [ 104 | "", 105 | "The video captures a serene sunset scene. The sky is painted with warm hues of orange and red, creating a dramatic backdrop as the sun descends towards the horizon. Silhouetted against this vibrant sky is a solitary tree, its bare branches reaching upwards. A flock of birds can be seen in mid-flight, their dark silhouettes contrasting sharply against the glowing orb of the setting sun. The overall atmosphere is one of tranquility and natural beauty, as the day transitions into night." 106 | ] 107 | }, 108 | { 109 | "id": 53, 110 | "type": "MiniCPM_VQA", 111 | "pos": [ 112 | -89, 113 | -159 114 | ], 115 | "size": { 116 | "0": 400, 117 | "1": 400 118 | }, 119 | "flags": {}, 120 | "order": 2, 121 | "mode": 0, 122 | "inputs": [ 123 | { 124 | "name": "source_video_path", 125 | "type": "PATH", 126 | "link": 62 127 | }, 128 | { 129 | "name": "source_image_path_1st", 130 | "type": "IMAGE", 131 | "link": null 132 | }, 133 | { 134 | "name": "source_image_path_2nd", 135 | "type": "IMAGE", 136 | "link": null 137 | }, 138 | { 139 | "name": "source_image_path_3rd", 140 | "type": "IMAGE", 141 | "link": null 142 | } 143 | ], 144 | "outputs": [ 145 | { 146 | "name": "STRING", 147 | "type": "STRING", 148 | "links": [ 149 | 63 150 | ], 151 | "shape": 3, 152 | "slot_index": 0 153 | } 154 | ], 155 | "properties": { 156 | "Node name for S&R": "MiniCPM_VQA" 157 | }, 158 | "widgets_values": [ 159 | "Describe the video in detail", 160 | "MiniCPM-V-2_6-int4", 161 | false, 162 | 0.8, 163 | 100, 164 | 0.7, 165 | 1.05, 166 | 2048, 167 | 64, 168 | 2, 169 | 1617, 170 | "randomize" 171 | ] 172 | } 173 | ], 174 | "links": [ 175 | [ 176 | 62, 177 | 50, 178 | 0, 179 | 53, 180 | 0, 181 | "PATH" 182 | ], 183 | [ 184 | 63, 185 | 53, 186 | 0, 187 | 51, 188 | 0, 189 | "STRING" 190 | ] 191 | ], 192 | "groups": [], 193 | "config": {}, 194 | "extra": { 195 | "ds": { 196 | "scale": 0.9646149645000006, 197 | "offset": [ 198 | 896.8108299009335, 199 | 348.18733398490764 200 | ] 201 | } 202 | }, 203 | "version": 0.4 204 | } -------------------------------------------------------------------------------- /examples/Chat_with_video_workflow_legacy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_video_workflow_legacy.png -------------------------------------------------------------------------------- /examples/Chat_with_video_workflow_polished.json: -------------------------------------------------------------------------------- 1 | { 2 | "last_node_id": 55, 3 | "last_link_id": 66, 4 | "nodes": [ 5 | { 6 | "id": 7, 7 | "type": "Note", 8 | "pos": [ 9 | -428, 10 | -256 11 | ], 12 | "size": { 13 | "0": 724.4190673828125, 14 | "1": 79.42505645751953 15 | }, 16 | "flags": { 17 | "collapsed": false 18 | }, 19 | "order": 1, 20 | "mode": 0, 21 | "properties": { 22 | "text": "" 23 | }, 24 | "widgets_values": [ 25 | "当 MiniCPM VQA 同时接收到图像和视频信息时,它会仅处理视频信息而忽略图像信息。如果您想处理图像信息,请断开视频信息的输入。\n\nWhen MiniCPM VQA simultaneously receives both image and video information, it processes only the video information while ignoring the image information. If you want to process the image information, please disconnect the input of the video information." 26 | ], 27 | "color": "#432", 28 | "bgcolor": "#653" 29 | }, 30 | { 31 | "id": 51, 32 | "type": "DisplayText", 33 | "pos": [ 34 | -471, 35 | 277 36 | ], 37 | "size": [ 38 | 813.5119048309946, 39 | 141.5117891133587 40 | ], 41 | "flags": {}, 42 | "order": 3, 43 | "mode": 0, 44 | "inputs": [ 45 | { 46 | "name": "text", 47 | "type": "STRING", 48 | "link": 65, 49 | "widget": { 50 | "name": "text" 51 | } 52 | } 53 | ], 54 | "outputs": [ 55 | { 56 | "name": "STRING", 57 | "type": "STRING", 58 | "links": null, 59 | "shape": 6 60 | } 61 | ], 62 | "properties": { 63 | "Node name for S&R": "DisplayText" 64 | }, 65 | "widgets_values": [ 66 | "", 67 | "The video captures a serene sunset scene with the sun descending towards the horizon. The sky is painted in vibrant hues of orange, yellow, and red, creating a warm gradient that transitions from fiery tones near the setting sun to softer shades as it moves upward. Silhouetted against this colorful backdrop is a solitary tree, its bare branches reaching out into the sky, adding a stark contrast to the vivid colors. A flock of birds can be seen flying across the sky, their dark shapes moving dynamically through the air, contributing to the sense of movement within the otherwise tranquil setting. The overall atmosphere is one of peacefulness and natural beauty, emphasizing the fleeting yet mesmerizing moments of twilight." 68 | ] 69 | }, 70 | { 71 | "id": 54, 72 | "type": "MiniCPM_VQA_Polished", 73 | "pos": [ 74 | -57, 75 | -129 76 | ], 77 | "size": { 78 | "0": 400, 79 | "1": 360 80 | }, 81 | "flags": {}, 82 | "order": 2, 83 | "mode": 0, 84 | "inputs": [ 85 | { 86 | "name": "source_video_path", 87 | "type": "PATH", 88 | "link": 66 89 | }, 90 | { 91 | "name": "source_image_path", 92 | "type": "IMAGE", 93 | "link": null 94 | } 95 | ], 96 | "outputs": [ 97 | { 98 | "name": "STRING", 99 | "type": "STRING", 100 | "links": [ 101 | 65 102 | ], 103 | "shape": 3, 104 | "slot_index": 0 105 | } 106 | ], 107 | "properties": { 108 | "Node name for S&R": "MiniCPM_VQA_Polished" 109 | }, 110 | "widgets_values": [ 111 | "Describe the video in detail", 112 | "MiniCPM-V-2_6-int4", 113 | true, 114 | 0.8, 115 | 100, 116 | 0.7, 117 | 1.05, 118 | 2048, 119 | 64, 120 | 2, 121 | 1746, 122 | "randomize" 123 | ] 124 | }, 125 | { 126 | "id": 55, 127 | "type": "LoadVideo", 128 | "pos": [ 129 | -479, 130 | -128 131 | ], 132 | "size": [ 133 | 409.98541024642964, 134 | 358.0214538574219 135 | ], 136 | "flags": {}, 137 | "order": 0, 138 | "mode": 0, 139 | "outputs": [ 140 | { 141 | "name": "PATH", 142 | "type": "PATH", 143 | "links": [ 144 | 66 145 | ], 146 | "shape": 3 147 | } 148 | ], 149 | "properties": { 150 | "Node name for S&R": "LoadVideo" 151 | }, 152 | "widgets_values": [ 153 | "AnimateDiff_00002.mp4", 154 | "Video", 155 | { 156 | "hidden": false, 157 | "paused": false, 158 | "params": {} 159 | } 160 | ] 161 | } 162 | ], 163 | "links": [ 164 | [ 165 | 65, 166 | 54, 167 | 0, 168 | 51, 169 | 0, 170 | "STRING" 171 | ], 172 | [ 173 | 66, 174 | 55, 175 | 0, 176 | 54, 177 | 0, 178 | "PATH" 179 | ] 180 | ], 181 | "groups": [], 182 | "config": {}, 183 | "extra": { 184 | "ds": { 185 | "scale": 0.9646149645000013, 186 | "offset": [ 187 | 829.9398344807752, 188 | 321.3710639555439 189 | ] 190 | } 191 | }, 192 | "version": 0.4 193 | } -------------------------------------------------------------------------------- /examples/Chat_with_video_workflow_polished.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/Chat_with_video_workflow_polished.png -------------------------------------------------------------------------------- /examples/ComfyUI_00508_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/ComfyUI_00508_.png -------------------------------------------------------------------------------- /examples/ComfyUI_00509_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/ComfyUI_00509_.png -------------------------------------------------------------------------------- /examples/ComfyUI_00532_.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/examples/ComfyUI_00532_.png -------------------------------------------------------------------------------- /favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4/763f95133a95b611977efaa05ad654ff410670a9/favicon.ico -------------------------------------------------------------------------------- /image_nodes.py: -------------------------------------------------------------------------------- 1 | class MultipleImagesInput: 2 | @classmethod 3 | def INPUT_TYPES(s): 4 | return { 5 | "required": { 6 | "inputcount": ("INT", {"default": 2, "min": 2, "max": 1000, "step": 1}), 7 | "image_1": ("IMAGE",), 8 | "image_2": ("IMAGE",), 9 | }, 10 | } 11 | 12 | RETURN_TYPES = ("IMAGE",) 13 | RETURN_NAMES = ("images",) 14 | FUNCTION = "combine" 15 | CATEGORY = "Comfyui_MiniCPM-V-2_6-int4" 16 | DESCRIPTION = """ 17 | Creates an image batch from multiple images. 18 | You can set how many inputs the node has, 19 | with the **inputcount** and clicking update. 20 | """ 21 | 22 | def combine(self, inputcount, **kwargs): 23 | from nodes import ImageBatch 24 | 25 | image_batch_node = ImageBatch() 26 | image = kwargs["image_1"] 27 | for c in range(1, inputcount): 28 | new_image = kwargs[f"image_{c + 1}"] 29 | (image,) = image_batch_node.batch(image, new_image) 30 | return (image,) -------------------------------------------------------------------------------- /nodes_legacy.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import folder_paths 4 | from transformers import AutoTokenizer, AutoModel 5 | from torchvision.transforms.v2 import ToPILImage 6 | from decord import VideoReader, cpu # pip install decord 7 | from PIL import Image 8 | 9 | 10 | class MiniCPM_VQA: 11 | def __init__(self): 12 | self.model_checkpoint = None 13 | self.tokenizer = None 14 | self.model = None 15 | self.device = ( 16 | torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 17 | ) 18 | self.bf16_support = ( 19 | torch.cuda.is_available() 20 | and torch.cuda.get_device_capability(self.device)[0] >= 8 21 | ) 22 | 23 | @classmethod 24 | def INPUT_TYPES(s): 25 | return { 26 | "required": { 27 | "text": ("STRING", {"default": "", "multiline": True}), 28 | "model": ( 29 | ["MiniCPM-V-2_6-int4", "MiniCPM-Llama3-V-2_5-int4"], 30 | {"default": "MiniCPM-V-2_6-int4"}, 31 | ), 32 | "keep_model_loaded": ("BOOLEAN", {"default": False}), 33 | "top_p": ( 34 | "FLOAT", 35 | { 36 | "default": 0.8, 37 | }, 38 | ), 39 | "top_k": ( 40 | "INT", 41 | { 42 | "default": 100, 43 | }, 44 | ), 45 | "temperature": ( 46 | "FLOAT", 47 | {"default": 0.7, "min": 0, "max": 1, "step": 0.1}, 48 | ), 49 | "repetition_penalty": ( 50 | "FLOAT", 51 | { 52 | "default": 1.05, 53 | }, 54 | ), 55 | "max_new_tokens": ( 56 | "INT", 57 | { 58 | "default": 2048, 59 | }, 60 | ), 61 | "video_max_num_frames": ( 62 | "INT", 63 | { 64 | "default": 64, 65 | }, 66 | ), # if cuda OOM set a smaller number 67 | "video_max_slice_nums": ( 68 | "INT", 69 | { 70 | "default": 2, 71 | }, 72 | ), # use 1 if cuda OOM and video resolution > 448*448 73 | "seed": ("INT", {"default": -1}), # add seed parameter, default is -1 74 | }, 75 | "optional": { 76 | "source_video_path": ("PATH",), 77 | "source_image_path_1st": ("IMAGE",), 78 | "source_image_path_2nd": ("IMAGE",), 79 | "source_image_path_3rd": ("IMAGE",), 80 | }, 81 | } 82 | 83 | RETURN_TYPES = ("STRING",) 84 | FUNCTION = "inference" 85 | CATEGORY = "Comfyui_MiniCPM-V-2_6-int4" 86 | 87 | def encode_video(self, source_video_path, MAX_NUM_FRAMES): 88 | def uniform_sample(l, n): # noqa: E741 89 | gap = len(l) / n 90 | idxs = [int(i * gap + gap / 2) for i in range(n)] 91 | return [l[i] for i in idxs] 92 | 93 | vr = VideoReader(source_video_path, ctx=cpu(0)) 94 | total_frames = len(vr) + 1 95 | print("Total frames:", total_frames) 96 | avg_fps = vr.get_avg_fps() 97 | print("Get average FPS(frame per second):", avg_fps) 98 | sample_fps = round(avg_fps / 1) # FPS 99 | duration = len(vr) / avg_fps 100 | print("Total duration:", duration, "seconds") 101 | width = vr[0].shape[1] 102 | height = vr[0].shape[0] 103 | print("Video resolution(width x height):", width, "x", height) 104 | 105 | frame_idx = [i for i in range(0, len(vr), sample_fps)] 106 | if len(frame_idx) > MAX_NUM_FRAMES: 107 | frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES) 108 | frames = vr.get_batch(frame_idx).asnumpy() 109 | frames = [Image.fromarray(v.astype("uint8")) for v in frames] 110 | print("num frames:", len(frames)) 111 | return frames 112 | 113 | def inference( 114 | self, 115 | text, 116 | model, 117 | keep_model_loaded, 118 | top_p, 119 | top_k, 120 | temperature, 121 | repetition_penalty, 122 | max_new_tokens, 123 | video_max_num_frames, 124 | video_max_slice_nums, 125 | seed, 126 | source_image_path_1st=None, 127 | source_image_path_2nd=None, 128 | source_image_path_3rd=None, 129 | source_video_path=None, 130 | ): 131 | if seed != -1: 132 | torch.manual_seed(seed) 133 | model_id = f"openbmb/{model}" 134 | self.model_checkpoint = os.path.join( 135 | folder_paths.models_dir, "prompt_generator", os.path.basename(model_id) 136 | ) 137 | 138 | if not os.path.exists(self.model_checkpoint): 139 | from huggingface_hub import snapshot_download 140 | 141 | snapshot_download( 142 | repo_id=model_id, 143 | local_dir=self.model_checkpoint, 144 | local_dir_use_symlinks=False, 145 | ) 146 | 147 | if self.tokenizer is None: 148 | self.tokenizer = AutoTokenizer.from_pretrained( 149 | self.model_checkpoint, 150 | trust_remote_code=True, 151 | low_cpu_mem_usage=True, 152 | ) 153 | if self.model is None: 154 | self.model = AutoModel.from_pretrained( 155 | self.model_checkpoint, 156 | trust_remote_code=True, 157 | low_cpu_mem_usage=True, 158 | attn_implementation="sdpa", 159 | torch_dtype=torch.bfloat16 if self.bf16_support else torch.float16, 160 | ) 161 | 162 | with torch.no_grad(): 163 | if source_video_path: 164 | frames = self.encode_video(source_video_path, video_max_num_frames) 165 | msgs = [{"role": "user", "content": frames + [text]}] 166 | elif ( 167 | source_image_path_1st is not None 168 | and source_image_path_2nd is not None 169 | and source_image_path_3rd is not None 170 | ): 171 | image1 = ToPILImage()( 172 | source_image_path_1st.permute([0, 3, 1, 2])[0] 173 | ).convert("RGB") 174 | image2 = ToPILImage()( 175 | source_image_path_2nd.permute([0, 3, 1, 2])[0] 176 | ).convert("RGB") 177 | image3 = ToPILImage()( 178 | source_image_path_3rd.permute([0, 3, 1, 2])[0] 179 | ).convert("RGB") 180 | msgs = [{"role": "user", "content": [image1, image2, image3, text]}] 181 | elif ( 182 | source_image_path_1st is not None 183 | and source_image_path_2nd is not None 184 | and source_image_path_3rd is None 185 | ): 186 | image1 = ToPILImage()( 187 | source_image_path_1st.permute([0, 3, 1, 2])[0] 188 | ).convert("RGB") 189 | image2 = ToPILImage()( 190 | source_image_path_2nd.permute([0, 3, 1, 2])[0] 191 | ).convert("RGB") 192 | msgs = [{"role": "user", "content": [image1, image2, text]}] 193 | elif ( 194 | source_image_path_1st is not None 195 | and source_image_path_2nd is None 196 | and source_image_path_3rd is not None 197 | ): 198 | image1 = ToPILImage()( 199 | source_image_path_1st.permute([0, 3, 1, 2])[0] 200 | ).convert("RGB") 201 | image3 = ToPILImage()( 202 | source_image_path_3rd.permute([0, 3, 1, 2])[0] 203 | ).convert("RGB") 204 | msgs = [{"role": "user", "content": [image1, image3, text]}] 205 | elif ( 206 | source_image_path_1st is None 207 | and source_image_path_2nd is not None 208 | and source_image_path_3rd is not None 209 | ): 210 | image2 = ToPILImage()( 211 | source_image_path_2nd.permute([0, 3, 1, 2])[0] 212 | ).convert("RGB") 213 | image3 = ToPILImage()( 214 | source_image_path_3rd.permute([0, 3, 1, 2])[0] 215 | ).convert("RGB") 216 | msgs = [{"role": "user", "content": [image2, image3, text]}] 217 | elif ( 218 | source_image_path_1st is not None 219 | and source_image_path_2nd is None 220 | and source_image_path_3rd is None 221 | ): 222 | image = ToPILImage()( 223 | source_image_path_1st.permute([0, 3, 1, 2])[0] 224 | ).convert("RGB") 225 | msgs = [{"role": "user", "content": [image, text]}] 226 | elif ( 227 | source_image_path_1st is None 228 | and source_image_path_2nd is not None 229 | and source_image_path_3rd is None 230 | ): 231 | image = ToPILImage()( 232 | source_image_path_2nd.permute([0, 3, 1, 2])[0] 233 | ).convert("RGB") 234 | msgs = [{"role": "user", "content": [image, text]}] 235 | elif ( 236 | source_image_path_1st is None 237 | and source_image_path_2nd is None 238 | and source_image_path_3rd is not None 239 | ): 240 | image = ToPILImage()( 241 | source_image_path_3rd.permute([0, 3, 1, 2])[0] 242 | ).convert("RGB") 243 | msgs = [{"role": "user", "content": [image, text]}] 244 | else: 245 | msgs = [{"role": "user", "content": [text]}] 246 | # raise ValueError("Either image or video must be provided") 247 | 248 | params = {"use_image_id": False, "max_slice_nums": video_max_slice_nums} 249 | 250 | # offload model to CPU 251 | # self.model = self.model.to(torch.device("cpu")) 252 | # self.model.eval() 253 | 254 | result = self.model.chat( 255 | image=None, 256 | msgs=msgs, 257 | tokenizer=self.tokenizer, 258 | sampling=True, 259 | top_k=top_k, 260 | top_p=top_p, 261 | temperature=temperature, 262 | repetition_penalty=repetition_penalty, 263 | max_new_tokens=max_new_tokens, 264 | **params, 265 | ) 266 | 267 | # offload model to GPU 268 | # self.model = self.model.to(torch.device("cpu")) 269 | # self.model.eval() 270 | 271 | if not keep_model_loaded: 272 | del self.tokenizer # release tokenizer memory 273 | del self.model # release model memory 274 | self.tokenizer = None # set tokenizer to None 275 | self.model = None # set model to None 276 | torch.cuda.empty_cache() # release GPU memory 277 | torch.cuda.ipc_collect() 278 | 279 | return (result,) 280 | -------------------------------------------------------------------------------- /nodes_polished.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import folder_paths 4 | from transformers import AutoTokenizer, AutoModel 5 | from torchvision.transforms.v2 import ToPILImage 6 | from decord import VideoReader, cpu # pip install decord 7 | from PIL import Image 8 | 9 | 10 | class MiniCPM_VQA_Polished: 11 | def __init__(self): 12 | self.model_checkpoint = None 13 | self.tokenizer = None 14 | self.model = None 15 | self.device = ( 16 | torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu") 17 | ) 18 | self.bf16_support = ( 19 | torch.cuda.is_available() 20 | and torch.cuda.get_device_capability(self.device)[0] >= 8 21 | ) 22 | 23 | @classmethod 24 | def INPUT_TYPES(s): 25 | return { 26 | "required": { 27 | "text": ("STRING", {"default": "", "multiline": True}), 28 | "model": ( 29 | ["MiniCPM-V-2_6-int4", "MiniCPM-Llama3-V-2_5-int4"], 30 | {"default": "MiniCPM-V-2_6-int4"}, 31 | ), 32 | "keep_model_loaded": ("BOOLEAN", {"default": False}), 33 | "top_p": ( 34 | "FLOAT", 35 | { 36 | "default": 0.8, 37 | }, 38 | ), 39 | "top_k": ( 40 | "INT", 41 | { 42 | "default": 100, 43 | }, 44 | ), 45 | "temperature": ( 46 | "FLOAT", 47 | {"default": 0.7, "min": 0, "max": 1, "step": 0.1}, 48 | ), 49 | "repetition_penalty": ( 50 | "FLOAT", 51 | { 52 | "default": 1.05, 53 | }, 54 | ), 55 | "max_new_tokens": ( 56 | "INT", 57 | { 58 | "default": 2048, 59 | }, 60 | ), 61 | "video_max_num_frames": ( 62 | "INT", 63 | { 64 | "default": 64, 65 | }, 66 | ), # if cuda OOM set a smaller number 67 | "video_max_slice_nums": ( 68 | "INT", 69 | { 70 | "default": 2, 71 | }, 72 | ), # use 1 if cuda OOM and video resolution > 448*448 73 | "seed": ("INT", {"default": -1}), # add seed parameter, default is -1 74 | }, 75 | "optional": { 76 | "source_video_path": ("PATH",), 77 | "source_image_path": ("IMAGE",), 78 | }, 79 | } 80 | 81 | RETURN_TYPES = ("STRING",) 82 | FUNCTION = "inference" 83 | CATEGORY = "Comfyui_MiniCPM-V-2_6-int4" 84 | 85 | def encode_video(self, source_video_path, MAX_NUM_FRAMES): 86 | def uniform_sample(l, n): # noqa: E741 87 | gap = len(l) / n 88 | idxs = [int(i * gap + gap / 2) for i in range(n)] 89 | return [l[i] for i in idxs] 90 | 91 | vr = VideoReader(source_video_path, ctx=cpu(0)) 92 | total_frames = len(vr) + 1 93 | print("Total frames:", total_frames) 94 | avg_fps = vr.get_avg_fps() 95 | print("Get average FPS(frame per second):", avg_fps) 96 | sample_fps = round(avg_fps / 1) # FPS 97 | duration = len(vr) / avg_fps 98 | print("Total duration:", duration, "seconds") 99 | width = vr[0].shape[1] 100 | height = vr[0].shape[0] 101 | print("Video resolution(width x height):", width, "x", height) 102 | 103 | frame_idx = [i for i in range(0, len(vr), sample_fps)] 104 | if len(frame_idx) > MAX_NUM_FRAMES: 105 | frame_idx = uniform_sample(frame_idx, MAX_NUM_FRAMES) 106 | frames = vr.get_batch(frame_idx).asnumpy() 107 | frames = [Image.fromarray(v.astype("uint8")) for v in frames] 108 | print("num frames:", len(frames)) 109 | return frames 110 | 111 | def inference( 112 | self, 113 | text, 114 | model, 115 | keep_model_loaded, 116 | top_p, 117 | top_k, 118 | temperature, 119 | repetition_penalty, 120 | max_new_tokens, 121 | video_max_num_frames, 122 | video_max_slice_nums, 123 | seed, 124 | source_image_path=None, 125 | source_video_path=None, 126 | ): 127 | if seed != -1: 128 | torch.manual_seed(seed) 129 | model_id = f"openbmb/{model}" 130 | self.model_checkpoint = os.path.join( 131 | folder_paths.models_dir, "prompt_generator", os.path.basename(model_id) 132 | ) 133 | 134 | if not os.path.exists(self.model_checkpoint): 135 | from huggingface_hub import snapshot_download 136 | 137 | snapshot_download( 138 | repo_id=model_id, 139 | local_dir=self.model_checkpoint, 140 | local_dir_use_symlinks=False, 141 | ) 142 | 143 | if self.tokenizer is None: 144 | self.tokenizer = AutoTokenizer.from_pretrained( 145 | self.model_checkpoint, 146 | trust_remote_code=True, 147 | low_cpu_mem_usage=True, 148 | ) 149 | 150 | if self.model is None: 151 | self.model = AutoModel.from_pretrained( 152 | self.model_checkpoint, 153 | trust_remote_code=True, 154 | low_cpu_mem_usage=True, 155 | attn_implementation="sdpa", 156 | torch_dtype=torch.bfloat16 if self.bf16_support else torch.float16, 157 | ) 158 | 159 | with torch.no_grad(): 160 | if source_video_path: 161 | print("source_video_path:", source_video_path) 162 | frames = self.encode_video(source_video_path, video_max_num_frames) 163 | msgs = [{"role": "user", "content": frames + [text]}] 164 | elif source_image_path is not None: 165 | images = source_image_path.permute([0, 3, 1, 2]) 166 | images = [ToPILImage()(img).convert("RGB") for img in images] 167 | msgs = [{"role": "user", "content": images + [text]}] 168 | else: 169 | msgs = [{"role": "user", "content": [text]}] 170 | # raise ValueError("Either image or video must be provided") 171 | 172 | params = {"use_image_id": False, "max_slice_nums": video_max_slice_nums} 173 | 174 | # offload model to CPU 175 | # self.model = self.model.to(torch.device("cpu")) 176 | # self.model.eval() 177 | 178 | result = self.model.chat( 179 | image=None, 180 | msgs=msgs, 181 | tokenizer=self.tokenizer, 182 | sampling=True, 183 | top_k=top_k, 184 | top_p=top_p, 185 | temperature=temperature, 186 | repetition_penalty=repetition_penalty, 187 | max_new_tokens=max_new_tokens, 188 | **params, 189 | ) 190 | # offload model to GPU 191 | # self.model = self.model.to(torch.device("cpu")) 192 | # self.model.eval() 193 | if not keep_model_loaded: 194 | del self.tokenizer # release tokenizer memory 195 | del self.model # release model memory 196 | self.tokenizer = None # set tokenizer to None 197 | self.model = None # set model to None 198 | torch.cuda.empty_cache() # release GPU memory 199 | torch.cuda.ipc_collect() 200 | 201 | return (result,) 202 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "ComfyUI_MiniCPM-V-2_6-int4" 3 | description = "This is an implementation of [MiniCPM-V-2_6-int4](https://github.com/OpenBMB/MiniCPM-V) by [ComfyUI](https://github.com/comfyanonymous/ComfyUI), including support for text-based queries, video queries, single-image queries, and multi-image queries to generate captions or responses." 4 | version = "1.0.0" 5 | license = { file = "LICENSE" } 6 | dependencies = ["torch", "torchvision", "numpy", "pillow", "huggingface_hub", "transformers", "decord", "bitsandbytes","accelerate"] 7 | 8 | [project.urls] 9 | Repository = "https://github.com/IuvenisSapiens/ComfyUI_MiniCPM-V-2_6-int4" 10 | 11 | [tool.comfy] 12 | PublisherId = "IuvenisSapiens" 13 | DisplayName = "ComfyUI_MiniCPM-V-2_6-int4" 14 | Icon = "favicon.ico" 15 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | torchvision 3 | torchaudio 4 | numpy 5 | pillow 6 | huggingface_hub 7 | transformers 8 | decord 9 | bitsandbytes 10 | accelerate -------------------------------------------------------------------------------- /util_nodes.py: -------------------------------------------------------------------------------- 1 | import os 2 | import folder_paths 3 | current_dir = os.path.dirname(os.path.abspath(__file__)) 4 | input_dir = folder_paths.get_input_directory() 5 | output_dir = folder_paths.get_output_directory() 6 | 7 | class LoadVideo: 8 | @classmethod 9 | def INPUT_TYPES(s): 10 | files = [f for f in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, f)) and f.split('.')[-1] in ["mp4", "mkv", "mov", "avi", "flv", "wmv", "webm", "m4v"]] 11 | return {"required":{ 12 | "video":(files,), 13 | }} 14 | 15 | CATEGORY = "Comfyui_MiniCPM-V-2_6-int4" 16 | DESCRIPTION = "Load Video" 17 | 18 | RETURN_TYPES = ("PATH",) 19 | 20 | OUTPUT_NODE = False 21 | 22 | FUNCTION = "load_video" 23 | 24 | def load_video(self, video): 25 | video_path = os.path.join(input_dir,video) 26 | return (video_path,) 27 | 28 | class PreviewVideo: 29 | @classmethod 30 | def INPUT_TYPES(s): 31 | return {"required":{ 32 | "video":("PATH",), 33 | }} 34 | 35 | CATEGORY = "Comfyui_MiniCPM-V-2_6-int4" 36 | DESCRIPTION = "Load Video" 37 | 38 | RETURN_TYPES = () 39 | 40 | OUTPUT_NODE = True 41 | 42 | FUNCTION = "load_video" 43 | 44 | def load_video(self, video): 45 | video_name = os.path.basename(video) 46 | video_path_name = os.path.basename(os.path.dirname(video)) 47 | return {"ui":{"video":[video_name,video_path_name]}} 48 | -------------------------------------------------------------------------------- /web/js/displayText.js: -------------------------------------------------------------------------------- 1 | const app = window.comfyAPI.app.app; 2 | const ComfyWidgets = window.comfyAPI.widgets.ComfyWidgets; 3 | 4 | app.registerExtension({ 5 | name: "Comfyui_MiniCPM-V-2_6-int4.DisplayTextNode", 6 | async beforeRegisterNodeDef(nodeType, nodeData, app) { 7 | if (nodeData.name === "DisplayText") { 8 | function populate(text) { 9 | if (this.widgets) { 10 | for (let i = 1; i < this.widgets.length; i++) { 11 | this.widgets[i].onRemove?.(); 12 | } 13 | this.widgets.length = 1; 14 | } 15 | 16 | const v = [...text]; 17 | if (!v[0]) { 18 | v.shift(); 19 | } 20 | for (const list of v) { 21 | const w = ComfyWidgets["STRING"](this, "text", ["STRING", { multiline: true }], app).widget; 22 | w.inputEl.readOnly = true; 23 | w.inputEl.style.opacity = 0.6; 24 | w.value = list; 25 | } 26 | 27 | requestAnimationFrame(() => { 28 | const sz = this.computeSize(); 29 | if (sz[0] < this.size[0]) { 30 | sz[0] = this.size[0]; 31 | } 32 | if (sz[1] < this.size[1]) { 33 | sz[1] = this.size[1]; 34 | } 35 | this.onResize?.(sz); 36 | app.graph.setDirtyCanvas(true, false); 37 | }); 38 | } 39 | 40 | const onExecuted = nodeType.prototype.onExecuted; 41 | nodeType.prototype.onExecuted = function (message) { 42 | onExecuted?.apply(this, arguments); 43 | populate.call(this, message.text); 44 | }; 45 | 46 | const onConfigure = nodeType.prototype.onConfigure; 47 | nodeType.prototype.onConfigure = function () { 48 | onConfigure?.apply(this, arguments); 49 | if (this.widgets_values?.length) { 50 | populate.call(this, this.widgets_values.slice(+this.widgets_values.length > 1)); 51 | } 52 | }; 53 | } 54 | }, 55 | }); 56 | -------------------------------------------------------------------------------- /web/js/multipleImagesInput.js: -------------------------------------------------------------------------------- 1 | const app = window.comfyAPI.app.app; 2 | 3 | app.registerExtension({ 4 | name: "Comfyui_MiniCPM-V-2_6-int4.MultipleImagesInput", 5 | async beforeRegisterNodeDef(nodeType, nodeData, app) { 6 | if (!nodeData?.category?.startsWith("Comfyui_MiniCPM-V-2_6-int4")) { 7 | return; 8 | } 9 | switch (nodeData.name) { 10 | case "MultipleImagesInput": 11 | nodeType.prototype.onNodeCreated = function () { 12 | this._type = "IMAGE"; 13 | this.inputs_offset = nodeData.name.includes("selective") ? 1 : 0; 14 | this.addWidget("button", "Update inputs", null, () => { 15 | if (!this.inputs) { 16 | this.inputs = []; 17 | } 18 | const target_number_of_inputs = this.widgets.find( 19 | (w) => w.name === "inputcount" 20 | )["value"]; 21 | if (target_number_of_inputs === this.inputs.length) return; // already set, do nothing 22 | 23 | if (target_number_of_inputs < this.inputs.length) { 24 | for ( 25 | let i = this.inputs.length; 26 | i >= this.inputs_offset + target_number_of_inputs; 27 | i-- 28 | ) 29 | this.removeInput(i); 30 | } else { 31 | for ( 32 | let i = this.inputs.length + 1 - this.inputs_offset; 33 | i <= target_number_of_inputs; 34 | ++i 35 | ) 36 | this.addInput(`image_${i}`, this._type); 37 | } 38 | }); 39 | }; 40 | break; 41 | } 42 | }, 43 | async setup() { 44 | const originalComputeVisibleNodes = 45 | LGraphCanvas.prototype.computeVisibleNodes; 46 | LGraphCanvas.prototype.computeVisibleNodes = function () { 47 | const visibleNodesSet = new Set( 48 | originalComputeVisibleNodes.apply(this, arguments) 49 | ); 50 | for (const node of this.graph._nodes) { 51 | if ( 52 | (node.type === "SetNode" || node.type === "GetNode") && 53 | node.drawConnection 54 | ) { 55 | visibleNodesSet.add(node); 56 | } 57 | } 58 | return Array.from(visibleNodesSet); 59 | }; 60 | }, 61 | }); 62 | -------------------------------------------------------------------------------- /web/js/previewVideo.js: -------------------------------------------------------------------------------- 1 | const app = window.comfyAPI.app.app; 2 | const api = window.comfyAPI.api.api; 3 | 4 | function fitHeight(node) { 5 | node.setSize([node.size[0], node.computeSize([node.size[0], node.size[1]])[1]]) 6 | node?.graph?.setDirtyCanvas(true); 7 | } 8 | function chainCallback(object, property, callback) { 9 | if (object == undefined) { 10 | //This should not happen. 11 | console.error("Tried to add callback to non-existant object") 12 | return; 13 | } 14 | if (property in object) { 15 | const callback_orig = object[property] 16 | object[property] = function () { 17 | const r = callback_orig.apply(this, arguments); 18 | callback.apply(this, arguments); 19 | return r 20 | }; 21 | } else { 22 | object[property] = callback; 23 | } 24 | } 25 | 26 | function addPreviewOptions(nodeType) { 27 | chainCallback(nodeType.prototype, "getExtraMenuOptions", function(_, options) { 28 | // The intended way of appending options is returning a list of extra options, 29 | // but this isn't used in widgetInputs.js and would require 30 | // less generalization of chainCallback 31 | let optNew = [] 32 | try { 33 | const previewWidget = this.widgets.find((w) => w.name === "videopreview"); 34 | 35 | let url = null 36 | if (previewWidget.videoEl?.hidden == false && previewWidget.videoEl.src) { 37 | //Use full quality video 38 | //url = api.apiURL('/view?' + new URLSearchParams(previewWidget.value.params)); 39 | url = previewWidget.videoEl.src 40 | } 41 | if (url) { 42 | optNew.push( 43 | { 44 | content: "Open preview", 45 | callback: () => { 46 | window.open(url, "_blank") 47 | }, 48 | }, 49 | { 50 | content: "Save preview", 51 | callback: () => { 52 | const a = document.createElement("a"); 53 | a.href = url; 54 | a.setAttribute("download", new URLSearchParams(previewWidget.value.params).get("filename")); 55 | document.body.append(a); 56 | a.click(); 57 | requestAnimationFrame(() => a.remove()); 58 | }, 59 | } 60 | ); 61 | } 62 | if(options.length > 0 && options[0] != null && optNew.length > 0) { 63 | optNew.push(null); 64 | } 65 | options.unshift(...optNew); 66 | 67 | } catch (error) { 68 | console.log(error); 69 | } 70 | 71 | }); 72 | } 73 | function previewVideo(node,file,type){ 74 | var element = document.createElement("div"); 75 | const previewNode = node; 76 | var previewWidget = node.addDOMWidget("videopreview", "preview", element, { 77 | serialize: false, 78 | hideOnZoom: false, 79 | getValue() { 80 | return element.value; 81 | }, 82 | setValue(v) { 83 | element.value = v; 84 | }, 85 | }); 86 | previewWidget.computeSize = function(width) { 87 | if (this.aspectRatio && !this.parentEl.hidden) { 88 | let height = (previewNode.size[0]-20)/ this.aspectRatio + 10; 89 | if (!(height > 0)) { 90 | height = 0; 91 | } 92 | this.computedHeight = height + 10; 93 | return [width, height]; 94 | } 95 | return [width, -4];//no loaded src, widget should not display 96 | } 97 | // element.style['pointer-events'] = "none" 98 | previewWidget.value = {hidden: false, paused: false, params: {}} 99 | previewWidget.parentEl = document.createElement("div"); 100 | previewWidget.parentEl.className = "video_preview"; 101 | previewWidget.parentEl.style['width'] = "100%" 102 | element.appendChild(previewWidget.parentEl); 103 | previewWidget.videoEl = document.createElement("video"); 104 | previewWidget.videoEl.controls = true; 105 | previewWidget.videoEl.loop = false; 106 | previewWidget.videoEl.muted = false; 107 | previewWidget.videoEl.style['width'] = "100%" 108 | previewWidget.videoEl.addEventListener("loadedmetadata", () => { 109 | 110 | previewWidget.aspectRatio = previewWidget.videoEl.videoWidth / previewWidget.videoEl.videoHeight; 111 | fitHeight(this); 112 | }); 113 | previewWidget.videoEl.addEventListener("error", () => { 114 | previewWidget.parentEl.hidden = true; 115 | fitHeight(this); 116 | }); 117 | 118 | let params = { 119 | "filename": file, 120 | "type": type, 121 | } 122 | 123 | previewWidget.parentEl.hidden = previewWidget.value.hidden; 124 | previewWidget.videoEl.autoplay = !previewWidget.value.paused && !previewWidget.value.hidden; 125 | let target_width = 256 126 | if (element.style?.width) { 127 | //overscale to allow scrolling. Endpoint won't return higher than native 128 | target_width = element.style.width.slice(0,-2)*2; 129 | } 130 | if (!params.force_size || params.force_size.includes("?") || params.force_size == "Disabled") { 131 | params.force_size = target_width+"x?" 132 | } else { 133 | let size = params.force_size.split("x") 134 | let ar = parseInt(size[0])/parseInt(size[1]) 135 | params.force_size = target_width+"x"+(target_width/ar) 136 | } 137 | 138 | previewWidget.videoEl.src = api.apiURL('/view?' + new URLSearchParams(params)); 139 | 140 | previewWidget.videoEl.hidden = false; 141 | previewWidget.parentEl.appendChild(previewWidget.videoEl) 142 | } 143 | 144 | app.registerExtension({ 145 | name: "Comfyui_MiniCPM-V-2_6-int4.VideoPreviewer", 146 | async beforeRegisterNodeDef(nodeType, nodeData, app) { 147 | if (nodeData?.name == "PreviewVideo") { 148 | nodeType.prototype.onExecuted = function (data) { 149 | previewVideo(this, data.video[0], data.video[1]); 150 | } 151 | } 152 | } 153 | }); 154 | -------------------------------------------------------------------------------- /web/js/uploadVideo.js: -------------------------------------------------------------------------------- 1 | const app = window.comfyAPI.app.app; 2 | const api = window.comfyAPI.api.api; 3 | const ComfyWidgets = window.comfyAPI.widgets.ComfyWidgets; 4 | 5 | function fitHeight(node) { 6 | node.setSize([node.size[0], node.computeSize([node.size[0], node.size[1]])[1]]) 7 | node?.graph?.setDirtyCanvas(true); 8 | } 9 | 10 | function previewVideo(node, file) { 11 | while (node.widgets.length > 2) { 12 | node.widgets.pop() 13 | } 14 | try { 15 | var el = document.getElementById("uploadVideo"); 16 | el.remove(); 17 | } catch (error) { 18 | console.log(error); 19 | } 20 | var element = document.createElement("div"); 21 | element.id = "uploadVideo"; 22 | const previewNode = node; 23 | var previewWidget = node.addDOMWidget("videopreview", "preview", element, { 24 | serialize: false, 25 | hideOnZoom: false, 26 | getValue() { 27 | return element.value; 28 | }, 29 | setValue(v) { 30 | element.value = v; 31 | }, 32 | }); 33 | previewWidget.computeSize = function (width) { 34 | if (this.aspectRatio && !this.parentEl.hidden) { 35 | let height = (previewNode.size[0] - 20) / this.aspectRatio + 10; 36 | if (!(height > 0)) { 37 | height = 0; 38 | } 39 | this.computedHeight = height + 10; 40 | return [width, height]; 41 | } 42 | return [width, -4];//no loaded src, widget should not display 43 | } 44 | // element.style['pointer-events'] = "none" 45 | previewWidget.value = { hidden: false, paused: false, params: {} } 46 | previewWidget.parentEl = document.createElement("div"); 47 | previewWidget.parentEl.className = "video_preview"; 48 | previewWidget.parentEl.style['width'] = "100%" 49 | element.appendChild(previewWidget.parentEl); 50 | previewWidget.videoEl = document.createElement("video"); 51 | previewWidget.videoEl.controls = true; 52 | previewWidget.videoEl.loop = false; 53 | previewWidget.videoEl.muted = false; 54 | previewWidget.videoEl.style['width'] = "100%" 55 | previewWidget.videoEl.addEventListener("loadedmetadata", () => { 56 | 57 | previewWidget.aspectRatio = previewWidget.videoEl.videoWidth / previewWidget.videoEl.videoHeight; 58 | fitHeight(this); 59 | }); 60 | previewWidget.videoEl.addEventListener("error", () => { 61 | previewWidget.parentEl.hidden = true; 62 | fitHeight(this); 63 | }); 64 | 65 | let params = { 66 | "filename": file, 67 | "type": "input", 68 | } 69 | 70 | previewWidget.parentEl.hidden = previewWidget.value.hidden; 71 | previewWidget.videoEl.autoplay = !previewWidget.value.paused && !previewWidget.value.hidden; 72 | let target_width = 256 73 | if (element.style?.width) { 74 | //overscale to allow scrolling. Endpoint won't return higher than native 75 | target_width = element.style.width.slice(0, -2) * 2; 76 | } 77 | if (!params.force_size || params.force_size.includes("?") || params.force_size == "Disabled") { 78 | params.force_size = target_width + "x?" 79 | } else { 80 | let size = params.force_size.split("x") 81 | let ar = parseInt(size[0]) / parseInt(size[1]) 82 | params.force_size = target_width + "x" + (target_width / ar) 83 | } 84 | 85 | previewWidget.videoEl.src = api.apiURL('/view?' + new URLSearchParams(params)); 86 | 87 | previewWidget.videoEl.hidden = false; 88 | previewWidget.parentEl.appendChild(previewWidget.videoEl) 89 | } 90 | 91 | function videoUpload(node, inputName, inputData, app) { 92 | const videoWidget = node.widgets.find((w) => w.name === "video"); 93 | let uploadWidget; 94 | /* 95 | A method that returns the required style for the html 96 | */ 97 | var default_value = videoWidget.value; 98 | Object.defineProperty(videoWidget, "value", { 99 | set: function (value) { 100 | this._real_value = value; 101 | }, 102 | 103 | get: function () { 104 | let value = ""; 105 | if (this._real_value) { 106 | value = this._real_value; 107 | } else { 108 | return default_value; 109 | } 110 | 111 | if (value.filename) { 112 | let real_value = value; 113 | value = ""; 114 | if (real_value.subfolder) { 115 | value = real_value.subfolder + "/"; 116 | } 117 | 118 | value += real_value.filename; 119 | 120 | if (real_value.type && real_value.type !== "input") 121 | value += ` [${real_value.type}]`; 122 | } 123 | return value; 124 | } 125 | }); 126 | async function uploadFile(file, updateNode, pasted = false) { 127 | try { 128 | // Wrap file in formdata so it includes filename 129 | const body = new FormData(); 130 | body.append("image", file); 131 | if (pasted) body.append("subfolder", "pasted"); 132 | const resp = await api.fetchApi("/upload/image", { 133 | method: "POST", 134 | body, 135 | }); 136 | 137 | if (resp.status === 200) { 138 | const data = await resp.json(); 139 | // Add the file to the dropdown list and update the widget value 140 | let path = data.name; 141 | if (data.subfolder) path = data.subfolder + "/" + path; 142 | 143 | if (!videoWidget.options.values.includes(path)) { 144 | videoWidget.options.values.push(path); 145 | } 146 | 147 | if (updateNode) { 148 | videoWidget.value = path; 149 | previewVideo(node, path) 150 | 151 | } 152 | } else { 153 | alert(resp.status + " - " + resp.statusText); 154 | } 155 | } catch (error) { 156 | alert(error); 157 | } 158 | } 159 | 160 | const fileInput = document.createElement("input"); 161 | Object.assign(fileInput, { 162 | type: "file", 163 | accept: "video/mp4,video/mkv,video/mov,video/avi,video/flv,video/wmv,video/webm,video/m4v", 164 | style: "display: none", 165 | onchange: async () => { 166 | if (fileInput.files.length) { 167 | await uploadFile(fileInput.files[0], true); 168 | } 169 | }, 170 | }); 171 | document.body.append(fileInput); 172 | 173 | // Create the button widget for selecting the files 174 | uploadWidget = node.addWidget("button", "choose video file to upload", "Video", () => { 175 | fileInput.click(); 176 | }); 177 | 178 | uploadWidget.serialize = false; 179 | 180 | previewVideo(node, videoWidget.value); 181 | const cb = node.callback; 182 | videoWidget.callback = function () { 183 | previewVideo(node, videoWidget.value); 184 | if (cb) { 185 | return cb.apply(this, arguments); 186 | } 187 | }; 188 | 189 | return { widget: uploadWidget }; 190 | } 191 | 192 | ComfyWidgets.VIDEOPLOAD = videoUpload; 193 | 194 | app.registerExtension({ 195 | name: "Comfyui_MiniCPM-V-2_6-int4.UploadVideo", 196 | async beforeRegisterNodeDef(nodeType, nodeData, app) { 197 | if (nodeData?.name == "LoadVideo") { 198 | nodeData.input.required.upload = ["VIDEOPLOAD"]; 199 | } 200 | }, 201 | }); 202 | 203 | --------------------------------------------------------------------------------