├── .github └── workflows │ └── publish.yml ├── .gitignore ├── ADV_CLIP_emb_encode.py ├── LICENSE ├── README.md ├── __init__.py ├── configs └── model_zoo.json ├── half_json.py ├── hook └── modeling_florence2.py ├── mz_gen_translate.py ├── mz_llama_core_nodes.py ├── mz_llama_cpp.py ├── mz_openaiapi.py ├── mz_prompt_utils.py ├── mz_prompt_webserver.py ├── mz_prompts.py ├── mz_transformers.py ├── pyproject.toml ├── v1 ├── init.py ├── mz_deprecated.py ├── mz_llama3.py ├── mz_llava.py └── mz_phi3.py └── web └── prompt_mz.js /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Comfy registry 2 | on: 3 | workflow_dispatch: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - "pyproject.toml" 9 | 10 | permissions: 11 | issues: write 12 | 13 | jobs: 14 | publish-node: 15 | name: Publish Custom Node to registry 16 | runs-on: ubuntu-latest 17 | if: ${{ github.repository_owner == 'MinusZoneAI' }} 18 | steps: 19 | - name: Check out code 20 | uses: actions/checkout@v4 21 | - name: Publish Custom Node 22 | uses: Comfy-Org/publish-node-action@v1 23 | with: 24 | ## Add your own personal access token to your Github Repository secrets and reference it here. 25 | personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }} 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | .vscode 162 | 163 | *.bat 164 | exclude.txt -------------------------------------------------------------------------------- /ADV_CLIP_emb_encode.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | import itertools 4 | from math import gcd 5 | 6 | from comfy import model_management 7 | from comfy.sdxl_clip import SDXLClipModel, SDXLRefinerClipModel, SDXLClipG 8 | 9 | def _grouper(n, iterable): 10 | it = iter(iterable) 11 | while True: 12 | chunk = list(itertools.islice(it, n)) 13 | if not chunk: 14 | return 15 | yield chunk 16 | 17 | def _norm_mag(w, n): 18 | d = w - 1 19 | return 1 + np.sign(d) * np.sqrt(np.abs(d)**2 / n) 20 | #return np.sign(w) * np.sqrt(np.abs(w)**2 / n) 21 | 22 | def divide_length(word_ids, weights): 23 | sums = dict(zip(*np.unique(word_ids, return_counts=True))) 24 | sums[0] = 1 25 | weights = [[_norm_mag(w, sums[id]) if id != 0 else 1.0 26 | for w, id in zip(x, y)] for x, y in zip(weights, word_ids)] 27 | return weights 28 | 29 | def shift_mean_weight(word_ids, weights): 30 | delta = 1 - np.mean([w for x, y in zip(weights, word_ids) for w, id in zip(x,y) if id != 0]) 31 | weights = [[w if id == 0 else w+delta 32 | for w, id in zip(x, y)] for x, y in zip(weights, word_ids)] 33 | return weights 34 | 35 | def scale_to_norm(weights, word_ids, w_max): 36 | top = np.max(weights) 37 | w_max = min(top, w_max) 38 | weights = [[w_max if id == 0 else (w/top) * w_max 39 | for w, id in zip(x, y)] for x, y in zip(weights, word_ids)] 40 | return weights 41 | 42 | def from_zero(weights, base_emb): 43 | weight_tensor = torch.tensor(weights, dtype=base_emb.dtype, device=base_emb.device) 44 | weight_tensor = weight_tensor.reshape(1,-1,1).expand(base_emb.shape) 45 | return base_emb * weight_tensor 46 | 47 | def mask_word_id(tokens, word_ids, target_id, mask_token): 48 | new_tokens = [[mask_token if wid == target_id else t 49 | for t, wid in zip(x,y)] for x,y in zip(tokens, word_ids)] 50 | mask = np.array(word_ids) == target_id 51 | return (new_tokens, mask) 52 | 53 | def batched_clip_encode(tokens, length, encode_func, num_chunks): 54 | embs = [] 55 | for e in _grouper(32, tokens): 56 | enc, pooled = encode_func(e) 57 | enc = enc.reshape((len(e), length, -1)) 58 | embs.append(enc) 59 | embs = torch.cat(embs) 60 | embs = embs.reshape((len(tokens) // num_chunks, length * num_chunks, -1)) 61 | return embs 62 | 63 | def from_masked(tokens, weights, word_ids, base_emb, length, encode_func, m_token=266): 64 | pooled_base = base_emb[0,length-1:length,:] 65 | wids, inds = np.unique(np.array(word_ids).reshape(-1), return_index=True) 66 | weight_dict = dict((id,w) 67 | for id,w in zip(wids ,np.array(weights).reshape(-1)[inds]) 68 | if w != 1.0) 69 | 70 | if len(weight_dict) == 0: 71 | return torch.zeros_like(base_emb), base_emb[0,length-1:length,:] 72 | 73 | weight_tensor = torch.tensor(weights, dtype=base_emb.dtype, device=base_emb.device) 74 | weight_tensor = weight_tensor.reshape(1,-1,1).expand(base_emb.shape) 75 | 76 | #m_token = (clip.tokenizer.end_token, 1.0) if clip.tokenizer.pad_with_end else (0,1.0) 77 | #TODO: find most suitable masking token here 78 | m_token = (m_token, 1.0) 79 | 80 | ws = [] 81 | masked_tokens = [] 82 | masks = [] 83 | 84 | #create prompts 85 | for id, w in weight_dict.items(): 86 | masked, m = mask_word_id(tokens, word_ids, id, m_token) 87 | masked_tokens.extend(masked) 88 | 89 | m = torch.tensor(m, dtype=base_emb.dtype, device=base_emb.device) 90 | m = m.reshape(1,-1,1).expand(base_emb.shape) 91 | masks.append(m) 92 | 93 | ws.append(w) 94 | 95 | #batch process prompts 96 | embs = batched_clip_encode(masked_tokens, length, encode_func, len(tokens)) 97 | masks = torch.cat(masks) 98 | 99 | embs = (base_emb.expand(embs.shape) - embs) 100 | pooled = embs[0,length-1:length,:] 101 | 102 | embs *= masks 103 | embs = embs.sum(axis=0, keepdim=True) 104 | 105 | pooled_start = pooled_base.expand(len(ws), -1) 106 | ws = torch.tensor(ws).reshape(-1,1).expand(pooled_start.shape) 107 | pooled = (pooled - pooled_start) * (ws - 1) 108 | pooled = pooled.mean(axis=0, keepdim=True) 109 | 110 | return ((weight_tensor - 1) * embs), pooled_base + pooled 111 | 112 | def mask_inds(tokens, inds, mask_token): 113 | clip_len = len(tokens[0]) 114 | inds_set = set(inds) 115 | new_tokens = [[mask_token if i*clip_len + j in inds_set else t 116 | for j, t in enumerate(x)] for i, x in enumerate(tokens)] 117 | return new_tokens 118 | 119 | def down_weight(tokens, weights, word_ids, base_emb, length, encode_func, m_token=266): 120 | w, w_inv = np.unique(weights,return_inverse=True) 121 | 122 | if np.sum(w < 1) == 0: 123 | return base_emb, tokens, base_emb[0,length-1:length,:] 124 | #m_token = (clip.tokenizer.end_token, 1.0) if clip.tokenizer.pad_with_end else (0,1.0) 125 | #using the comma token as a masking token seems to work better than aos tokens for SD 1.x 126 | m_token = (m_token, 1.0) 127 | 128 | masked_tokens = [] 129 | 130 | masked_current = tokens 131 | for i in range(len(w)): 132 | if w[i] >= 1: 133 | continue 134 | masked_current = mask_inds(masked_current, np.where(w_inv == i)[0], m_token) 135 | masked_tokens.extend(masked_current) 136 | 137 | embs = batched_clip_encode(masked_tokens, length, encode_func, len(tokens)) 138 | embs = torch.cat([base_emb, embs]) 139 | w = w[w<=1.0] 140 | w_mix = np.diff([0] + w.tolist()) 141 | w_mix = torch.tensor(w_mix, dtype=embs.dtype, device=embs.device).reshape((-1,1,1)) 142 | 143 | weighted_emb = (w_mix * embs).sum(axis=0, keepdim=True) 144 | return weighted_emb, masked_current, weighted_emb[0,length-1:length,:] 145 | 146 | def scale_emb_to_mag(base_emb, weighted_emb): 147 | norm_base = torch.linalg.norm(base_emb) 148 | norm_weighted = torch.linalg.norm(weighted_emb) 149 | embeddings_final = (norm_base / norm_weighted) * weighted_emb 150 | return embeddings_final 151 | 152 | def recover_dist(base_emb, weighted_emb): 153 | fixed_std = (base_emb.std() / weighted_emb.std()) * (weighted_emb - weighted_emb.mean()) 154 | embeddings_final = fixed_std + (base_emb.mean() - fixed_std.mean()) 155 | return embeddings_final 156 | 157 | def A1111_renorm(base_emb, weighted_emb): 158 | embeddings_final = (base_emb.mean() / weighted_emb.mean()) * weighted_emb 159 | return embeddings_final 160 | 161 | def advanced_encode_from_tokens(tokenized, token_normalization, weight_interpretation, encode_func, m_token=266, length=77, w_max=1.0, return_pooled=False, apply_to_pooled=False): 162 | tokens = [[t for t,_,_ in x] for x in tokenized] 163 | weights = [[w for _,w,_ in x] for x in tokenized] 164 | word_ids = [[wid for _,_,wid in x] for x in tokenized] 165 | 166 | #weight normalization 167 | #==================== 168 | 169 | #distribute down/up weights over word lengths 170 | if token_normalization.startswith("length"): 171 | weights = divide_length(word_ids, weights) 172 | 173 | #make mean of word tokens 1 174 | if token_normalization.endswith("mean"): 175 | weights = shift_mean_weight(word_ids, weights) 176 | 177 | #weight interpretation 178 | #===================== 179 | pooled = None 180 | 181 | if weight_interpretation == "comfy": 182 | weighted_tokens = [[(t,w) for t, w in zip(x, y)] for x, y in zip(tokens, weights)] 183 | weighted_emb, pooled_base = encode_func(weighted_tokens) 184 | pooled = pooled_base 185 | else: 186 | unweighted_tokens = [[(t,1.0) for t, _,_ in x] for x in tokenized] 187 | base_emb, pooled_base = encode_func(unweighted_tokens) 188 | 189 | if weight_interpretation == "A1111": 190 | weighted_emb = from_zero(weights, base_emb) 191 | weighted_emb = A1111_renorm(base_emb, weighted_emb) 192 | pooled = pooled_base 193 | 194 | if weight_interpretation == "compel": 195 | pos_tokens = [[(t,w) if w >= 1.0 else (t,1.0) for t, w in zip(x, y)] for x, y in zip(tokens, weights)] 196 | weighted_emb, _ = encode_func(pos_tokens) 197 | weighted_emb, _, pooled = down_weight(pos_tokens, weights, word_ids, weighted_emb, length, encode_func) 198 | 199 | if weight_interpretation == "comfy++": 200 | weighted_emb, tokens_down, _ = down_weight(unweighted_tokens, weights, word_ids, base_emb, length, encode_func) 201 | weights = [[w if w > 1.0 else 1.0 for w in x] for x in weights] 202 | #unweighted_tokens = [[(t,1.0) for t, _,_ in x] for x in tokens_down] 203 | embs, pooled = from_masked(unweighted_tokens, weights, word_ids, base_emb, length, encode_func) 204 | weighted_emb += embs 205 | 206 | if weight_interpretation == "down_weight": 207 | weights = scale_to_norm(weights, word_ids, w_max) 208 | weighted_emb, _, pooled = down_weight(unweighted_tokens, weights, word_ids, base_emb, length, encode_func) 209 | 210 | if return_pooled: 211 | if apply_to_pooled: 212 | return weighted_emb, pooled 213 | else: 214 | return weighted_emb, pooled_base 215 | return weighted_emb, None 216 | 217 | def encode_token_weights_g(model, token_weight_pairs): 218 | return model.clip_g.encode_token_weights(token_weight_pairs) 219 | 220 | def encode_token_weights_l(model, token_weight_pairs): 221 | l_out, _ = model.clip_l.encode_token_weights(token_weight_pairs) 222 | return l_out, None 223 | 224 | def encode_token_weights(model, token_weight_pairs, encode_func): 225 | if model.layer_idx is not None: 226 | if hasattr(model.cond_stage_model, 'set_clip_options'): 227 | model.cond_stage_model.set_clip_options({"layer": model.layer_idx}) 228 | else: 229 | print(f"[ComfyUI_ADV_CLIP_emb] ComfyUI is outdated.") 230 | model.cond_stage_model.clip_layer(model.layer_idx) 231 | 232 | model_management.load_model_gpu(model.patcher) 233 | return encode_func(model.cond_stage_model, token_weight_pairs) 234 | 235 | def prepareXL(embs_l, embs_g, pooled, clip_balance): 236 | l_w = 1 - max(0, clip_balance - .5) * 2 237 | g_w = 1 - max(0, .5 - clip_balance) * 2 238 | if embs_l is not None: 239 | return torch.cat([embs_l * l_w, embs_g * g_w], dim=-1), pooled 240 | else: 241 | return embs_g, pooled 242 | 243 | def advanced_encode(clip, text, token_normalization, weight_interpretation, w_max=1.0, clip_balance=.5, apply_to_pooled=True): 244 | tokenized = clip.tokenize(text, return_word_ids=True) 245 | if isinstance(clip.cond_stage_model, (SDXLClipModel, SDXLRefinerClipModel, SDXLClipG)): 246 | embs_l = None 247 | embs_g = None 248 | pooled = None 249 | if 'l' in tokenized and isinstance(clip.cond_stage_model, SDXLClipModel): 250 | embs_l, _ = advanced_encode_from_tokens(tokenized['l'], 251 | token_normalization, 252 | weight_interpretation, 253 | lambda x: encode_token_weights(clip, x, encode_token_weights_l), 254 | w_max=w_max, 255 | return_pooled=False) 256 | if 'g' in tokenized: 257 | embs_g, pooled = advanced_encode_from_tokens(tokenized['g'], 258 | token_normalization, 259 | weight_interpretation, 260 | lambda x: encode_token_weights(clip, x, encode_token_weights_g), 261 | w_max=w_max, 262 | return_pooled=True, 263 | apply_to_pooled=apply_to_pooled) 264 | return prepareXL(embs_l, embs_g, pooled, clip_balance) 265 | else: 266 | return advanced_encode_from_tokens(tokenized['l'], 267 | token_normalization, 268 | weight_interpretation, 269 | lambda x: (clip.encode_from_tokens({'l': x}), None), 270 | w_max=w_max) 271 | def advanced_encode_XL(clip, text1, text2, token_normalization, weight_interpretation, w_max=1.0, clip_balance=.5, apply_to_pooled=True): 272 | tokenized1 = clip.tokenize(text1, return_word_ids=True) 273 | tokenized2 = clip.tokenize(text2, return_word_ids=True) 274 | 275 | embs_l, _ = advanced_encode_from_tokens(tokenized1['l'], 276 | token_normalization, 277 | weight_interpretation, 278 | lambda x: encode_token_weights(clip, x, encode_token_weights_l), 279 | w_max=w_max, 280 | return_pooled=False) 281 | 282 | embs_g, pooled = advanced_encode_from_tokens(tokenized2['g'], 283 | token_normalization, 284 | weight_interpretation, 285 | lambda x: encode_token_weights(clip, x, encode_token_weights_g), 286 | w_max=w_max, 287 | return_pooled=True, 288 | apply_to_pooled=apply_to_pooled) 289 | 290 | gcd_num = gcd(embs_l.shape[1], embs_g.shape[1]) 291 | repeat_l = int((embs_g.shape[1] / gcd_num) * embs_l.shape[1]) 292 | repeat_g = int((embs_l.shape[1] / gcd_num) * embs_g.shape[1]) 293 | 294 | return prepareXL(embs_l.expand((-1,repeat_l,-1)), embs_g.expand((-1,repeat_g,-1)), pooled, clip_balance) 295 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/c5bae957-2c47-4a73-86e0-2949fcf72fd5) 2 | 3 | 4 | # ComfyUI-Prompt-MZ 5 | 基于llama.cpp的一些和提示词相关的节点,目前包括美化提示词和类似clip-interrogator的图片反推 6 | 7 | Use llama.cpp to assist in generating some nodes related to prompt words, including beautifying prompt words and image recognition similar to clip-interrogator 8 | 9 | ## Recent changes 10 | * [2024-06-22] 新增Florence-2-large图片反推模型节点 (Added Florence-2-large image interrogation model node) 11 | * [2024-06-20] 新增选择本机ollama模型的节点 (Added nodes to select local ollama models) 12 | * [2024-06-05] 新增千问2.0预设模型 (Added Qianwen 2.0 preset model) 13 | * [2024-06-05] 可选chat_format,图片反推后处理 (Optional chat_format, post-processing after image interrogation) 14 | * [2024-06-04] 新增了一些预设模型 (Added some preset models) 15 | * [2024-06-04] 新增通用节点,支持手动选择模型 (Add universal node, support manual selection of models) 16 | * [2024-05-30] 添加ImageCaptionerConfig节点来支持批量生成提示词 (Add ImageCaptionerConfig node to support batch generation of prompt words) 17 | * [2024-05-24] 运行后在当前节点显示生成的提示词 (Display the generated prompt words in the current node after running) 18 | * [2024-05-24] 兼容清华智谱API (Compatible with Zhipu API) 19 | * [2024-05-24] 使用A1111权重缩放,感谢ComfyUI_ADV_CLIP_emb (Use A1111 weight scaling, thanks to ComfyUI_ADV_CLIP_emb) 20 | * [2024-05-13] 新增OpenAI API节点 (add OpenAI API node) 21 | * [2024-04-30] 支持自定义指令 (Support for custom instructions) 22 | * [2024-04-30] 添加llava-v1.6-vicuna-13b (add llava-v1.6-vicuna-13b) 23 | * [2024-04-30] 添加翻译 24 | * [2024-04-28] 新增Phi-3-mini节点 (add Phi-3-mini node) 25 | 26 | ## Installation 27 | 1. Clone this repo into `custom_nodes` folder. 28 | 2. Restart ComfyUI. 29 | 30 | ## Nodes 31 | + MZ_Florence2CLIPTextEncode 32 | 33 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/b60bb22c-f42b-4c4f-a0ac-20a6c09c9046) 34 | 35 | 36 | 37 | 38 | 39 | + ModelConfigManualSelect (Ollama) 40 | 41 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/2009e330-0f1f-4f28-9b4c-8446d3cdc519) 42 | 43 | 44 | + CLIPTextEncode (LLamaCPP Universal) 45 | 46 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/1f66ce10-920f-4ada-9287-f86a51782bff) 47 | 48 | 49 | + ModelConfigManualSelect(LLamaCPP) 50 | 51 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/42473248-8902-43d7-a08b-37bb3d20b4aa) 52 | 53 | + ModelConfigDownloaderSelect(LLamaCPP) 54 | 55 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/6a2f561b-deb0-43d3-900f-c9d6b23d0ea4) 56 | 57 | 58 | 59 | + CLIPTextEncode (ImageInterrogator) 60 | 61 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/e76eb5dc-1c6c-4a59-8197-8bd7b56c3889) 62 | 63 | + ModelConfigManualSelect(ImageInterrogator) 64 | 65 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/71a48734-e3f3-4ced-a8d7-cd334340efdb) 66 | 67 | 68 | + ModelConfigDownloaderSelect(ImageInterrogator) 69 | 70 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/bfec7696-1f86-4fe5-9dc3-807b39366524) 71 | 72 | 73 | 74 | + CLIPTextEncode (OpenAI API) 75 | 76 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/14e9a96a-ec1b-481d-8f5a-43cd752ad01b) 77 | 78 | + CLIPTextEncode (Phi-3) 79 | 80 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/c4b97aeb-23c0-4cf1-a6a5-d259fdf83f6e) 81 | 82 | 83 | + CLIPTextEncode (LLama3) 84 | 85 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/40da75ab-46db-4f38-9d8e-b7f9184f77fa) 86 | 87 | 88 | + ImageInterrogator (LLava) 89 | 90 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/f397c432-c2f7-4d48-9b95-2031cfb19e8c) 91 | Enable parameter sd_format 92 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/4d2cf65d-e8a3-4dfa-b735-9d591638028c) 93 | 94 | + ImageCaptionerConfig 95 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/147941a2-cb5f-418f-acd9-8e17ffaf044a) 96 | 97 | 98 | + LLamaCPPOptions 99 | 100 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/256483e0-c3b7-4d04-82f4-f71f7d9584c9) 101 | 102 | + CustomizeInstruct 103 | 104 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/d328ba44-2eab-4f95-bd35-585a9cdc9ec2) 105 | 106 | 107 | + BaseLLamaCPPCLIPTextEncode (可以手动传入模型路径/You can directly pass in the model path) 108 | + BaseLLavaImageInterrogator (可以手动传入模型路径/You can directly pass in the model path) 109 | 110 | ## FAQ 111 | 112 | ### moudle 'llama_cpp' has no attribute 'LLAMA_SPLIT_MODE_LAYER' 113 | 升级llama_cpp_python的版本到最新版本,前往 https://github.com/abetlen/llama-cpp-python/releases 下载安装 114 | 115 | ### LLama.dll 无法加载 (Failed to load shared library LLama.dll) 116 | CUDA版本切换到12.1,如果你使用秋叶启动器,高级设置->环境维护->安装PyTorch->选择版本中选择CUDA 12.1的版本 117 | 118 | 119 | ### ...llama_cpp_python-0,2.63-cp310-cp310-win_and64.whl returned nonzero exit status 120 | 保持网络畅通,该上魔法上魔法,或者手动安装llama_cpp_python 121 | 122 | 123 | 124 | 125 | ## Credits 126 | + [https://github.com/comfyanonymous/ComfyUI](https://github.com/comfyanonymous/ComfyUI) 127 | + [https://github.com/ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp) 128 | + [https://github.com/BlenderNeko/ComfyUI_ADV_CLIP_emb](https://github.com/BlenderNeko/ComfyUI_ADV_CLIP_emb) 129 | 130 | ## Star History 131 | 132 | 133 | 134 | 135 | 136 | Star History Chart 137 | 138 | 139 | 140 | ## Contact 141 | - 绿泡泡: minrszone 142 | - Bilibili: [minus_zone](https://space.bilibili.com/5950992) 143 | - 小红书: [MinusZoneAI](https://www.xiaohongshu.com/user/profile/5f072e990000000001005472) 144 | - 爱发电: [MinusZoneAI](https://afdian.net/@MinusZoneAI) 145 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import json 4 | import os 5 | import sys 6 | from .mz_prompt_utils import Utils 7 | from nodes import MAX_RESOLUTION 8 | import comfy.utils 9 | import shutil 10 | import comfy.samplers 11 | import folder_paths 12 | 13 | 14 | WEB_DIRECTORY = "./web" 15 | 16 | AUTHOR_NAME = u"MinusZone" 17 | CATEGORY_NAME = f"{AUTHOR_NAME} - Prompt" 18 | 19 | # sys.path.append(os.path.join(os.path.dirname(__file__))) 20 | 21 | import importlib 22 | 23 | from . import mz_prompt_webserver 24 | # mz_prompt_webserver.start_server() 25 | 26 | NODE_CLASS_MAPPINGS = { 27 | } 28 | 29 | 30 | NODE_DISPLAY_NAME_MAPPINGS = { 31 | } 32 | 33 | 34 | from . import mz_llama_cpp 35 | 36 | 37 | def getCommonCLIPTextEncodeInput(): 38 | from . import mz_llama_core_nodes 39 | style_presets = mz_llama_core_nodes.get_style_presets() 40 | CommonCLIPTextEncodeInput = { 41 | "required": { 42 | "style_presets": ( 43 | style_presets, {"default": style_presets[1]} 44 | ), 45 | "text": ("STRING", {"multiline": True, }), 46 | "keep_device": ([False, True], {"default": False}), 47 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 48 | }, 49 | "optional": { 50 | "clip": ("CLIP", ), 51 | "llama_cpp_options": ("LLamaCPPOptions", ), 52 | "customize_instruct": ("CustomizeInstruct", ), 53 | } 54 | } 55 | 56 | return CommonCLIPTextEncodeInput 57 | 58 | 59 | class MZ_OllamaModelConfig_ManualSelect: 60 | @classmethod 61 | def INPUT_TYPES(s): 62 | search_dirs = [ 63 | os.path.join(os.path.expanduser('~'), ".ollama", "models"), 64 | os.path.join(os.environ.get("APPDATA", ""), ".ollama", "models"), 65 | ] 66 | 67 | ollama_models_dir = None 68 | for dir in search_dirs: 69 | if os.path.exists(dir): 70 | ollama_models_dir = dir 71 | break 72 | 73 | ollamas = [] 74 | if ollama_models_dir is not None: 75 | manifests_dir = os.path.join(ollama_models_dir, "manifests") 76 | for root, dirs, files in os.walk(manifests_dir): 77 | for file in files: 78 | ollamas.append(os.path.join(root, file)) 79 | 80 | chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers() 81 | return { 82 | "required": { 83 | "ollama": (ollamas,), 84 | "chat_format": (["auto"] + chat_format, {"default": "auto"}), 85 | }, 86 | "optional": { 87 | }, 88 | } 89 | 90 | RETURN_TYPES = ("LLamaCPPModelConfig",) 91 | RETURN_NAMES = ("llama_cpp_model_config",) 92 | 93 | FUNCTION = "create" 94 | CATEGORY = f"{CATEGORY_NAME}/others" 95 | 96 | def create(self, **kwargs): 97 | kwargs = kwargs.copy() 98 | 99 | ollama = kwargs.get("ollama", "") 100 | ollama_cpp_model = None 101 | if os.path.exists(ollama): 102 | # {"schemaVersion":2,"mediaType":"application/vnd.docker.distribution.manifest.v2+json","config":{"mediaType":"application/vnd.docker.container.image.v1+json","digest":"sha256:887433b89a901c156f7e6944442f3c9e57f3c55d6ed52042cbb7303aea994290","size":483},"layers":[{"mediaType":"application/vnd.ollama.image.model","digest":"sha256:c1864a5eb19305c40519da12cc543519e48a0697ecd30e15d5ac228644957d12","size":1678447520},{"mediaType":"application/vnd.ollama.image.license","digest":"sha256:097a36493f718248845233af1d3fefe7a303f864fae13bc31a3a9704229378ca","size":8433},{"mediaType":"application/vnd.ollama.image.template","digest":"sha256:109037bec39c0becc8221222ae23557559bc594290945a2c4221ab4f303b8871","size":136},{"mediaType":"application/vnd.ollama.image.params","digest":"sha256:22a838ceb7fb22755a3b0ae9b4eadde629d19be1f651f73efb8c6b4e2cd0eea0","size":84}]} 103 | with open(ollama, "r", encoding="utf-8") as f: 104 | data = json.load(f) 105 | if "layers" in data: 106 | for layer in data["layers"]: 107 | if "mediaType" in layer and layer["mediaType"] == "application/vnd.ollama.image.model": 108 | ollama_cpp_model = layer["digest"] 109 | break 110 | 111 | if ollama_cpp_model is None: 112 | raise ValueError("Invalid ollama file") 113 | 114 | if ollama_cpp_model.startswith("sha256:"): 115 | ollama_cpp_model = ollama_cpp_model[7:] 116 | # ollama = C:\Users\admin\.ollama\models\manifests\registry.ollama.ai\library\gemma\2b 117 | models_dir = ollama[:ollama.rfind("manifests")] 118 | ollama_cpp_model = os.path.join( 119 | models_dir, "blobs", f"sha256-{ollama_cpp_model}") 120 | 121 | if not os.path.exists(ollama_cpp_model): 122 | raise ValueError(f"Model not found at: {ollama_cpp_model}") 123 | 124 | llama_cpp_model = ollama_cpp_model 125 | 126 | chat_format = kwargs.get("chat_format", "auto") 127 | if chat_format == "auto": 128 | chat_format = None 129 | return ({ 130 | "type": "ManualSelect", 131 | "model_path": llama_cpp_model, 132 | "chat_format": chat_format, 133 | },) 134 | 135 | 136 | NODE_CLASS_MAPPINGS["MZ_OllamaModelConfig_ManualSelect"] = MZ_OllamaModelConfig_ManualSelect 137 | NODE_DISPLAY_NAME_MAPPINGS[ 138 | "MZ_OllamaModelConfig_ManualSelect"] = f"{AUTHOR_NAME} - ModelConfigManualSelect(OllamaFile)" 139 | 140 | 141 | class MZ_LLamaCPPModelConfig_ManualSelect: 142 | @ classmethod 143 | def INPUT_TYPES(s): 144 | gguf_files = Utils.get_gguf_files() 145 | 146 | chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers() 147 | return { 148 | "required": { 149 | "llama_cpp_model": (gguf_files,), 150 | "chat_format": (["auto"] + chat_format, {"default": "auto"}), 151 | }, 152 | "optional": { 153 | }, 154 | } 155 | 156 | RETURN_TYPES = ("LLamaCPPModelConfig",) 157 | RETURN_NAMES = ("llama_cpp_model_config",) 158 | 159 | FUNCTION = "create" 160 | CATEGORY = f"{CATEGORY_NAME}/others" 161 | 162 | def create(self, **kwargs): 163 | kwargs = kwargs.copy() 164 | 165 | llama_cpp_model = kwargs.get("llama_cpp_model", "") 166 | if llama_cpp_model != "": 167 | llama_cpp_model = os.path.join( 168 | Utils.get_gguf_models_path(), llama_cpp_model) 169 | 170 | chat_format = kwargs.get("chat_format", "auto") 171 | if chat_format == "auto": 172 | chat_format = None 173 | return ({ 174 | "type": "ManualSelect", 175 | "model_path": llama_cpp_model, 176 | "chat_format": chat_format, 177 | },) 178 | 179 | 180 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPModelConfig_ManualSelect"] = MZ_LLamaCPPModelConfig_ManualSelect 181 | NODE_DISPLAY_NAME_MAPPINGS[ 182 | "MZ_LLamaCPPModelConfig_ManualSelect"] = f"{AUTHOR_NAME} - ModelConfigManualSelect(LLamaCPP)" 183 | 184 | 185 | class MZ_LLamaCPPModelConfig_DownloaderSelect: 186 | @classmethod 187 | def INPUT_TYPES(s): 188 | optional_models = Utils.get_model_zoo(tags_filter="llama") 189 | model_names = [ 190 | model["model"] for model in optional_models 191 | ] 192 | chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers() 193 | return { 194 | "required": { 195 | "model_name": (model_names,), 196 | "chat_format": (["auto"] + chat_format, {"default": "auto"}), 197 | }, 198 | "optional": { 199 | }, 200 | } 201 | 202 | RETURN_TYPES = ("LLamaCPPModelConfig",) 203 | RETURN_NAMES = ("llama_cpp_model_config",) 204 | 205 | FUNCTION = "create" 206 | CATEGORY = f"{CATEGORY_NAME}/others" 207 | 208 | def create(self, **kwargs): 209 | kwargs = kwargs.copy() 210 | 211 | model_name = kwargs.get("model_name", "") 212 | chat_format = kwargs.get("chat_format", "auto") 213 | if chat_format == "auto": 214 | chat_format = None 215 | return ({ 216 | "type": "DownloaderSelect", 217 | "model_name": model_name, 218 | "chat_format": chat_format, 219 | },) 220 | 221 | 222 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPModelConfig_DownloaderSelect"] = MZ_LLamaCPPModelConfig_DownloaderSelect 223 | NODE_DISPLAY_NAME_MAPPINGS[ 224 | "MZ_LLamaCPPModelConfig_DownloaderSelect"] = f"{AUTHOR_NAME} - ModelConfigDownloaderSelect(LLamaCPP)" 225 | 226 | 227 | class MZ_LLamaCPPCLIPTextEncode: 228 | @classmethod 229 | def INPUT_TYPES(s): 230 | importlib.reload(mz_llama_cpp) 231 | 232 | result = { 233 | "required": { 234 | }, 235 | "optional": { 236 | "llama_cpp_model": ("LLamaCPPModelConfig",), 237 | }, 238 | } 239 | 240 | common_input = getCommonCLIPTextEncodeInput() 241 | for key in common_input["required"]: 242 | result["required"][key] = common_input["required"][key] 243 | for key in common_input["optional"]: 244 | result["optional"][key] = common_input["optional"][key] 245 | 246 | return result 247 | 248 | RETURN_TYPES = ("STRING", "CONDITIONING",) 249 | RETURN_NAMES = ("text", "conditioning",) 250 | OUTPUT_NODE = True 251 | FUNCTION = "encode" 252 | CATEGORY = CATEGORY_NAME 253 | 254 | DESCRIPTION = """ 255 | llama_cpp_model不设置时,将使用默认模型: Meta-Llama-3-8B-Instruct.Q4_K_M.gguf 256 | """ 257 | 258 | def encode(self, **kwargs): 259 | kwargs = kwargs.copy() 260 | from . import mz_llama_core_nodes 261 | importlib.reload(mz_llama_core_nodes) 262 | 263 | return mz_llama_core_nodes.llama_cpp_node_encode(kwargs) 264 | 265 | 266 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPCLIPTextEncode"] = MZ_LLamaCPPCLIPTextEncode 267 | NODE_DISPLAY_NAME_MAPPINGS[ 268 | "MZ_LLamaCPPCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(LLamaCPP Universal)" 269 | 270 | 271 | class MZ_LLamaCPPOptions: 272 | @classmethod 273 | def INPUT_TYPES(s): 274 | value = mz_llama_cpp.LlamaCppOptions() 275 | result = {} 276 | 277 | for key in value: 278 | if type(value[key]) == bool: 279 | result[key] = ([True, False], {"default": value[key]}) 280 | elif type(value[key]) == int: 281 | result[key] = ("INT", { 282 | "default": value[key], "min": -0xffffffffffffffff, "max": 0xffffffffffffffff}) 283 | elif type(value[key]) == float: 284 | result[key] = ("FLOAT", { 285 | "default": value[key], "min": -0xffffffffffffffff, "max": 0xffffffffffffffff}) 286 | elif type(value[key]) == str: 287 | result[key] = ("STRING", {"default": value[key]}) 288 | elif type(value[key]) == list: 289 | result[key] = (value[key], {"default": value[key][0]}) 290 | else: 291 | raise Exception(f"Unknown type: {type(value[key])}") 292 | 293 | return { 294 | "required": result, 295 | } 296 | 297 | RETURN_TYPES = ("LLamaCPPOptions",) 298 | RETURN_NAMES = ("llama_cpp_options",) 299 | 300 | FUNCTION = "create" 301 | CATEGORY = f"{CATEGORY_NAME}/others" 302 | 303 | def create(self, **kwargs): 304 | kwargs = kwargs.copy() 305 | importlib.reload(mz_llama_cpp) 306 | opt = {} 307 | for key in kwargs: 308 | opt[key] = kwargs[key] 309 | 310 | # if opt.get("chat_format", None) == "auto": 311 | # opt["chat_format"] = None 312 | return (opt,) 313 | 314 | 315 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPOptions"] = MZ_LLamaCPPOptions 316 | NODE_DISPLAY_NAME_MAPPINGS["MZ_LLamaCPPOptions"] = f"{AUTHOR_NAME} - LLamaCPPOptions" 317 | 318 | 319 | class MZ_CustomizeInstruct: 320 | @classmethod 321 | def INPUT_TYPES(s): 322 | from . import mz_prompts 323 | 324 | return { 325 | "required": { 326 | "system": ("STRING", {"multiline": True, "default": mz_prompts.Long_prompt}), 327 | "instruct": ("STRING", {"multiline": True, "default": ""}), 328 | }, 329 | } 330 | 331 | RETURN_TYPES = ("CustomizeInstruct",) 332 | RETURN_NAMES = ("customize_instruct",) 333 | FUNCTION = "create" 334 | CATEGORY = f"{CATEGORY_NAME}/others" 335 | 336 | def create(self, **kwargs): 337 | kwargs = kwargs.copy() 338 | 339 | return (kwargs,) 340 | 341 | 342 | NODE_CLASS_MAPPINGS["MZ_CustomizeInstruct"] = MZ_CustomizeInstruct 343 | NODE_DISPLAY_NAME_MAPPINGS["MZ_CustomizeInstruct"] = f"{AUTHOR_NAME} - CustomizeInstruct" 344 | 345 | 346 | class MZ_ImageCaptionerConfig: 347 | @classmethod 348 | def INPUT_TYPES(s): 349 | return { 350 | "required": { 351 | "directory": ("STRING", {"default": "", "placeholder": "directory"}), 352 | "caption_suffix": ("STRING", {"default": ".caption"}), 353 | "force_update": ([False, True], {"default": False}), 354 | "retry_keyword": ("STRING", {"default": "not,\",error"}), 355 | "prompt_fixed_beginning": ("STRING", {"default": "", }), 356 | }, 357 | "optional": { 358 | 359 | }, 360 | } 361 | 362 | RETURN_TYPES = ("ImageCaptionerConfig",) 363 | RETURN_NAMES = ("captioner_config", ) 364 | 365 | FUNCTION = "interrogate_batch" 366 | CATEGORY = f"{CATEGORY_NAME}/others" 367 | 368 | def interrogate_batch(self, **kwargs): 369 | kwargs = kwargs.copy() 370 | 371 | return (kwargs, ) 372 | 373 | 374 | NODE_CLASS_MAPPINGS["MZ_ImageCaptionerConfig"] = MZ_ImageCaptionerConfig 375 | NODE_DISPLAY_NAME_MAPPINGS["MZ_ImageCaptionerConfig"] = f"{AUTHOR_NAME} - ImageCaptionerConfig" 376 | 377 | 378 | class MZ_OpenAIApiCLIPTextEncode: 379 | @classmethod 380 | def INPUT_TYPES(s): 381 | importlib.reload(mz_llama_cpp) 382 | 383 | s.openai_config_path = os.path.join( 384 | Utils.get_models_path(), 385 | "openai_config.json", 386 | ) 387 | default_config = { 388 | "base_url": "", 389 | "api_key": "", 390 | "model_name": "gpt-3.5-turbo-1106", 391 | } 392 | if os.path.exists(s.openai_config_path): 393 | try: 394 | with open(s.openai_config_path, "r", encoding="utf-8") as f: 395 | default_config = json.load(f) 396 | except Exception as e: 397 | print(f"Failed to load openai_config.json: {e}") 398 | 399 | default_api_key = default_config.get("api_key", "") 400 | if default_api_key != "": 401 | default_api_key = default_api_key[:4] + "******" 402 | result = { 403 | "required": { 404 | "base_url": ("STRING", {"default": default_config.get("base_url", ""), "placeholder": ""}), 405 | "api_key": ("STRING", {"default": default_api_key, "placeholder": ""}), 406 | "model_name": ("STRING", {"default": default_config.get("model_name", ""), }), 407 | }, 408 | "optional": { 409 | }, 410 | } 411 | 412 | common_input = getCommonCLIPTextEncodeInput() 413 | for key in common_input["required"]: 414 | if key not in ["seed", "keep_device"]: 415 | result["required"][key] = common_input["required"][key] 416 | for key in common_input["optional"]: 417 | if key != "llama_cpp_options": 418 | result["optional"][key] = common_input["optional"][key] 419 | 420 | return result 421 | RETURN_TYPES = ("STRING", "CONDITIONING",) 422 | RETURN_NAMES = ("text", "conditioning",) 423 | OUTPUT_NODE = True 424 | FUNCTION = "encode" 425 | CATEGORY = CATEGORY_NAME 426 | 427 | def encode(self, **kwargs): 428 | kwargs = kwargs.copy() 429 | 430 | from . import mz_openaiapi 431 | importlib.reload(mz_openaiapi) 432 | 433 | if kwargs.get("api_key", "").endswith("******"): 434 | kwargs["api_key"] = "" 435 | try: 436 | with open(self.openai_config_path, "r", encoding="utf-8") as f: 437 | config = json.load(f) 438 | kwargs["api_key"] = config.get("api_key", "") 439 | except Exception as e: 440 | print(f"Failed to load openai_config.json: {e}") 441 | 442 | if kwargs.get("api_key", "") != "": 443 | with open(self.openai_config_path, "w", encoding="utf-8") as f: 444 | json.dump({ 445 | "base_url": kwargs.get("base_url", ""), 446 | "api_key": kwargs.get("api_key", ""), 447 | "model_name": kwargs.get("model_name", ""), 448 | }, f, indent=4) 449 | else: 450 | raise ValueError("api_key is required") 451 | 452 | text = mz_openaiapi.query_beautify_prompt_text(kwargs) 453 | conditionings = None 454 | clip = kwargs.get("clip", None) 455 | if clip is not None: 456 | conditionings = Utils.a1111_clip_text_encode(clip, text, ) 457 | 458 | return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)} 459 | 460 | 461 | NODE_CLASS_MAPPINGS["MZ_OpenAIApiCLIPTextEncode"] = MZ_OpenAIApiCLIPTextEncode 462 | NODE_DISPLAY_NAME_MAPPINGS[ 463 | "MZ_OpenAIApiCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(OpenAIApi)" 464 | 465 | 466 | class MZ_ImageInterrogatorCLIPTextEncode: 467 | @classmethod 468 | def INPUT_TYPES(s): 469 | return { 470 | "required": { 471 | "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}), 472 | "post_processing": ([False, True], {"default": True}), 473 | "keep_device": ([False, True], {"default": False}), 474 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 475 | }, 476 | "optional": { 477 | "image_interrogator_model": ("ImageInterrogatorModelConfig", ), 478 | "image": ("IMAGE",), 479 | "clip": ("CLIP", ), 480 | "llama_cpp_options": ("LLamaCPPOptions", ), 481 | "customize_instruct": ("CustomizeInstruct", ), 482 | "captioner_config": ("ImageCaptionerConfig", ), 483 | }, 484 | } 485 | 486 | RETURN_TYPES = ("STRING", "CONDITIONING",) 487 | RETURN_NAMES = ("text", "conditioning",) 488 | OUTPUT_NODE = True 489 | FUNCTION = "encode" 490 | CATEGORY = CATEGORY_NAME 491 | 492 | def encode(self, **kwargs): 493 | kwargs = kwargs.copy() 494 | from . import mz_llama_core_nodes 495 | importlib.reload(mz_llama_core_nodes) 496 | 497 | return mz_llama_core_nodes.image_interrogator_node_encode(kwargs) 498 | 499 | 500 | NODE_CLASS_MAPPINGS["MZ_ImageInterrogatorCLIPTextEncode"] = MZ_ImageInterrogatorCLIPTextEncode 501 | NODE_DISPLAY_NAME_MAPPINGS[ 502 | "MZ_ImageInterrogatorCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(ImageInterrogator)" 503 | 504 | 505 | class MZ_ImageInterrogatorModelConfig_ManualSelect: 506 | @classmethod 507 | def INPUT_TYPES(s): 508 | gguf_files = Utils.get_gguf_files() 509 | chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers() 510 | return { 511 | "required": { 512 | "llama_cpp_model": (gguf_files,), 513 | "mmproj_model": (["auto"] + gguf_files,), 514 | "chat_format": (["auto"] + chat_format, {"default": "auto"}), 515 | }, 516 | "optional": { 517 | }, 518 | } 519 | 520 | RETURN_TYPES = ("ImageInterrogatorModelConfig",) 521 | RETURN_NAMES = ("image_interrogator_model",) 522 | 523 | FUNCTION = "create" 524 | CATEGORY = f"{CATEGORY_NAME}/others" 525 | 526 | def create(self, **kwargs): 527 | kwargs = kwargs.copy() 528 | 529 | llama_cpp_model = kwargs.get("llama_cpp_model", "") 530 | if llama_cpp_model != "": 531 | llama_cpp_model = os.path.join( 532 | Utils.get_gguf_models_path(), llama_cpp_model) 533 | 534 | mmproj_model = kwargs.get("mmproj_model", "") 535 | if mmproj_model != "": 536 | mmproj_model = os.path.join( 537 | Utils.get_gguf_models_path(), mmproj_model) 538 | 539 | chat_format = kwargs.get("chat_format", "auto") 540 | if chat_format == "auto": 541 | chat_format = None 542 | return ({ 543 | "type": "ManualSelect", 544 | "model_path": llama_cpp_model, 545 | "mmproj_model": mmproj_model, 546 | "chat_format": chat_format, 547 | },) 548 | 549 | 550 | NODE_CLASS_MAPPINGS["MZ_ImageInterrogatorModelConfig_ManualSelect"] = MZ_ImageInterrogatorModelConfig_ManualSelect 551 | NODE_DISPLAY_NAME_MAPPINGS[ 552 | "MZ_ImageInterrogatorModelConfig_ManualSelect"] = f"{AUTHOR_NAME} - ModelConfigManualSelect(ImageInterrogator)" 553 | 554 | 555 | class MZ_ImageInterrogatorModelConfig_DownloaderSelect: 556 | @classmethod 557 | def INPUT_TYPES(s): 558 | optional_models = Utils.get_model_zoo(tags_filter="llava") 559 | model_names = [ 560 | model["model"] for model in optional_models 561 | ] 562 | 563 | optional_models = Utils.get_model_zoo(tags_filter="mmproj") 564 | mmproj_model_names = [ 565 | model["model"] for model in optional_models 566 | ] 567 | 568 | chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers() 569 | return { 570 | "required": { 571 | "model_name": (model_names,), 572 | "mmproj_model_name": (["auto"] + mmproj_model_names,), 573 | "chat_format": (["auto"] + chat_format, {"default": "auto"}), 574 | }, 575 | "optional": { 576 | }, 577 | } 578 | 579 | RETURN_TYPES = ("ImageInterrogatorModelConfig",) 580 | RETURN_NAMES = ("image_interrogator_model",) 581 | 582 | FUNCTION = "create" 583 | CATEGORY = f"{CATEGORY_NAME}/others" 584 | 585 | def create(self, **kwargs): 586 | kwargs = kwargs.copy() 587 | model_name = kwargs.get("model_name") 588 | mmproj_model_name = kwargs.get("mmproj_model_name", "auto") 589 | chat_format = kwargs.get("chat_format", "auto") 590 | if chat_format == "auto": 591 | chat_format = None 592 | return ({ 593 | "type": "DownloaderSelect", 594 | "model_name": model_name, 595 | "mmproj_model_name": mmproj_model_name, 596 | "chat_format": chat_format, 597 | },) 598 | 599 | 600 | NODE_CLASS_MAPPINGS["MZ_ImageInterrogatorModelConfig_DownloaderSelect"] = MZ_ImageInterrogatorModelConfig_DownloaderSelect 601 | NODE_DISPLAY_NAME_MAPPINGS[ 602 | "MZ_ImageInterrogatorModelConfig_DownloaderSelect"] = f"{AUTHOR_NAME} - ModelConfigDownloaderSelect(ImageInterrogator)" 603 | 604 | 605 | class MZ_Florence2CLIPTextEncode: 606 | @classmethod 607 | def INPUT_TYPES(s): 608 | return { 609 | "required": { 610 | "model_name": ([ 611 | "Florence-2-large-ft", 612 | "Florence-2-large", 613 | ],), 614 | "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}), 615 | "keep_device": ([False, True], {"default": False}), 616 | # "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 617 | }, 618 | "optional": { 619 | "image": ("IMAGE",), 620 | "clip": ("CLIP", ), 621 | }, 622 | } 623 | 624 | RETURN_TYPES = ("STRING", "CONDITIONING",) 625 | RETURN_NAMES = ("text", "conditioning",) 626 | OUTPUT_NODE = True 627 | FUNCTION = "encode" 628 | CATEGORY = CATEGORY_NAME 629 | 630 | def encode(self, **kwargs): 631 | kwargs = kwargs.copy() 632 | from . import mz_transformers 633 | importlib.reload(mz_transformers) 634 | 635 | return mz_transformers.florence2_node_encode(kwargs) 636 | 637 | 638 | NODE_CLASS_MAPPINGS["MZ_Florence2CLIPTextEncode"] = MZ_Florence2CLIPTextEncode 639 | NODE_DISPLAY_NAME_MAPPINGS[ 640 | "MZ_Florence2CLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(Florence-2)" 641 | 642 | 643 | class MZ_Florence2Captioner: 644 | @classmethod 645 | def INPUT_TYPES(s): 646 | return { 647 | "required": { 648 | "model_name": ([ 649 | "Florence-2-large-ft", 650 | "Florence-2-large", 651 | ],), 652 | "directory": ("STRING", {"default": "", "placeholder": "directory"}), 653 | "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}), 654 | "batch_size": ("INT", {"default": 1, "min": 1, "max": 0xffffffffffffffff}), 655 | "caption_suffix": ("STRING", {"default": ".caption"}), 656 | "force_update": ([False, True], {"default": False}), 657 | "prompt_fixed_beginning": ("STRING", {"default": "", }), 658 | }, 659 | "optional": { 660 | }, 661 | } 662 | 663 | RETURN_TYPES = ("STRING",) 664 | RETURN_NAMES = ("debug",) 665 | OUTPUT_NODE = True 666 | FUNCTION = "encode" 667 | CATEGORY = CATEGORY_NAME 668 | 669 | def encode(self, **kwargs): 670 | kwargs = kwargs.copy() 671 | from . import mz_transformers 672 | importlib.reload(mz_transformers) 673 | 674 | kwargs["captioner_config"] = { 675 | "directory": kwargs["directory"], 676 | "resolution": kwargs["resolution"], 677 | "batch_size": kwargs["batch_size"], 678 | "caption_suffix": kwargs["caption_suffix"], 679 | "force_update": kwargs["force_update"], 680 | "prompt_fixed_beginning": kwargs["prompt_fixed_beginning"], 681 | } 682 | 683 | return mz_transformers.florence2_node_encode(kwargs) 684 | 685 | 686 | NODE_CLASS_MAPPINGS["MZ_Florence2Captioner"] = MZ_Florence2Captioner 687 | NODE_DISPLAY_NAME_MAPPINGS[ 688 | "MZ_Florence2Captioner"] = f"{AUTHOR_NAME} - Captioner(Florence-2)" 689 | 690 | 691 | class MZ_PaliGemmaCLIPTextEncode: 692 | @classmethod 693 | def INPUT_TYPES(s): 694 | return { 695 | "required": { 696 | "model_name": ([ 697 | "paligemma-sd3-long-captioner-v2", 698 | "paligemma-sd3-long-captioner", 699 | ],), 700 | "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}), 701 | "keep_device": ([False, True], {"default": False}), 702 | # "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 703 | }, 704 | "optional": { 705 | "image": ("IMAGE",), 706 | "clip": ("CLIP", ), 707 | }, 708 | } 709 | 710 | RETURN_TYPES = ("STRING", "CONDITIONING",) 711 | RETURN_NAMES = ("text", "conditioning",) 712 | OUTPUT_NODE = True 713 | FUNCTION = "encode" 714 | CATEGORY = CATEGORY_NAME 715 | 716 | def encode(self, **kwargs): 717 | kwargs = kwargs.copy() 718 | from . import mz_transformers 719 | importlib.reload(mz_transformers) 720 | 721 | return mz_transformers.paligemma_node_encode(kwargs) 722 | 723 | 724 | NODE_CLASS_MAPPINGS["MZ_PaliGemmaCLIPTextEncode"] = MZ_PaliGemmaCLIPTextEncode 725 | NODE_DISPLAY_NAME_MAPPINGS[ 726 | "MZ_PaliGemmaCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(PaliGemma)" 727 | 728 | 729 | class MZ_PaliGemmaCaptioner: 730 | @classmethod 731 | def INPUT_TYPES(s): 732 | return { 733 | "required": { 734 | "model_name": ([ 735 | "paligemma-sd3-long-captioner-v2", 736 | "paligemma-sd3-long-captioner", 737 | ],), 738 | "directory": ("STRING", {"default": "", "placeholder": "directory"}), 739 | "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}), 740 | "caption_suffix": ("STRING", {"default": ".caption"}), 741 | "force_update": ([False, True], {"default": False}), 742 | "prompt_fixed_beginning": ("STRING", {"default": "", }), 743 | }, 744 | "optional": { 745 | }, 746 | } 747 | 748 | RETURN_TYPES = ("STRING",) 749 | RETURN_NAMES = ("debug",) 750 | OUTPUT_NODE = True 751 | FUNCTION = "encode" 752 | CATEGORY = CATEGORY_NAME 753 | 754 | def encode(self, **kwargs): 755 | kwargs = kwargs.copy() 756 | from . import mz_transformers 757 | importlib.reload(mz_transformers) 758 | kwargs["captioner_config"] = { 759 | "directory": kwargs["directory"], 760 | "resolution": kwargs["resolution"], 761 | "caption_suffix": kwargs["caption_suffix"], 762 | "force_update": kwargs["force_update"], 763 | "prompt_fixed_beginning": kwargs["prompt_fixed_beginning"], 764 | } 765 | return mz_transformers.paligemma_node_encode(kwargs) 766 | 767 | 768 | NODE_CLASS_MAPPINGS["MZ_PaliGemmaCaptioner"] = MZ_PaliGemmaCaptioner 769 | NODE_DISPLAY_NAME_MAPPINGS[ 770 | "MZ_PaliGemmaCaptioner"] = f"{AUTHOR_NAME} - Captioner(PaliGemma)" 771 | 772 | try: 773 | from . import mz_gen_translate 774 | mz_gen_translate.gen_translate( 775 | NODE_DISPLAY_NAME_MAPPINGS, NODE_CLASS_MAPPINGS) 776 | except Exception as e: 777 | print(f"Failed to generate translation: {e}") 778 | 779 | 780 | from .v1.init import NODE_CLASS_MAPPINGS as DEPRECATED_NODE_CLASS_MAPPINGS 781 | from .v1.init import NODE_DISPLAY_NAME_MAPPINGS as DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS 782 | 783 | NODE_CLASS_MAPPINGS.update(DEPRECATED_NODE_CLASS_MAPPINGS) 784 | NODE_DISPLAY_NAME_MAPPINGS.update(DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS) 785 | -------------------------------------------------------------------------------- /configs/model_zoo.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "model": "Meta-Llama-3-8B-Instruct.Q4_K_M", 4 | "tags": ["llama"], 5 | "find_path": ["gguf"], 6 | "file_path": "gguf/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", 7 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=Meta-Llama-3-8B-Instruct-GGUF%2FMeta-Llama-3-8B-Instruct.Q4_K_M.gguf", 8 | "SHA256": "647a2b64cbcdbe670432d0502ebb2592b36dd364d51a9ef7a1387b7a4365781f" 9 | }, 10 | { 11 | "model": "llama3_if_ai_sdpromptmkr_Q4_K_M", 12 | "tags": ["llama"], 13 | "find_path": ["gguf"], 14 | "file_path": "gguf/llama3_if_ai_sdpromptmkr_Q4_K_M.gguf", 15 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llama3_if_ai_sdpromptmkr_gguf%2Fllama3_if_ai_sdpromptmkr_Q4_K_M.gguf", 16 | "SHA256": "8c307d788852a28a00491c005155e030bc1ce76d96327035cecf1df789bb3a0a" 17 | }, 18 | { 19 | "model": "qwen2-7b-instruct-q5_k_m", 20 | "tags": ["llama"], 21 | "find_path": ["gguf"], 22 | "file_path": "gguf/qwen2-7b-instruct-q5_k_m.gguf", 23 | "url": "https://www.modelscope.cn/api/v1/models/qwen/Qwen2-7B-Instruct-GGUF/repo?Revision=master&FilePath=qwen2-7b-instruct-q5_k_m.gguf", 24 | "SHA256": "258dd2fa1bdf98b85327774e1fd36e2268c2a4b68eb9021d71106449ee4ba9d5" 25 | }, 26 | { 27 | "model": "qwen2-0_5b-instruct-q5_k_m", 28 | "tags": ["llama"], 29 | "find_path": ["gguf"], 30 | "file_path": "gguf/qwen2-0_5b-instruct-q5_k_m.gguf", 31 | "url": "https://www.modelscope.cn/api/v1/models/qwen/Qwen2-0.5B-Instruct-GGUF/repo?Revision=master&FilePath=qwen2-0_5b-instruct-q5_k_m.gguf", 32 | "SHA256": "16654d862b4b19f4f92ba14e11f056d0220400f59ee74e7a204cf0bf17e64d32" 33 | }, 34 | { 35 | "model": "omost-llama-3-8b-Q4_K_M", 36 | "tags": ["llama"], 37 | "find_path": ["gguf"], 38 | "file_path": "gguf/omost-llama-3-8b-Q4_K_M.gguf", 39 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=omost-gguf%2Fomost-llama-3-8b-Q4_K_M.gguf", 40 | "SHA256": "f09a3237a3b8ce8b96acdb3c83543a47bf7548764a659f888ead6ec1d8cfb780" 41 | }, 42 | { 43 | "model": "omost-phi-3-mini-128k-Q4_K_M", 44 | "tags": ["llama"], 45 | "find_path": ["gguf"], 46 | "file_path": "gguf/omost-phi-3-mini-128k-Q4_K_M.gguf", 47 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=omost-gguf%2Fomost-phi-3-mini-128k-Q4_K_M.gguf", 48 | "SHA256": "bd42cbddf4cbc00676292ddcbdb45567afe4795b9909482578f58bc2026cb60c" 49 | }, 50 | { 51 | "model": "Meta-Llama-3-8B.Q4_K_M", 52 | "tags": ["llama"], 53 | "find_path": ["gguf"], 54 | "file_path": "gguf/Meta-Llama-3-8B.Q4_K_M.gguf", 55 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=Meta-Llama-3-8B%2FMeta-Llama-3-8B.Q4_K_M.gguf", 56 | "SHA256": "2a19e7532fb544cfd164c65a1b045bb415e14924890a8abee0ec84644f66f61f" 57 | }, 58 | { 59 | "model": "Phi-3-mini-4k-instruct-q4", 60 | "tags": ["llama"], 61 | "find_path": ["gguf"], 62 | "file_path": "gguf/Phi-3-mini-4k-instruct-q4.gguf", 63 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=Phi-3-mini-4k-instruct-gguf%2FPhi-3-mini-4k-instruct-q4.gguf", 64 | "SHA256": "1cd9a9df07350196623f93bf4829cf228959e07ad32f787b8fdd7f5956f5b9de" 65 | }, 66 | { 67 | "model": "llama3-zh.Q4_K_M", 68 | "tags": ["llama"], 69 | "find_path": ["gguf"], 70 | "file_path": "gguf/llama3-zh.Q4_K_M.gguf", 71 | "url": "https://modelscope.cn/api/v1/models/ModelM/Llama-3-8b-zh-gguf/repo?Revision=master&FilePath=llama3-zh.Q4_K_M.gguf", 72 | "SHA256": "1b04ec22e4079af8064a8378d55d2cd79e43eff9faf4bbe8f341f1fd792a53cd" 73 | }, 74 | { 75 | "model": "llama3_8b_instruct_dpo_zh-Q4_K_M", 76 | "tags": ["llama"], 77 | "find_path": ["gguf"], 78 | "file_path": "gguf/llama3_8b_instruct_dpo_zh-Q4_K_M.gguf", 79 | "url": "https://modelscope.cn/api/v1/models/shareAI/llama-3-8b-Instruct-dpo-chinese-loftq-gguf/repo?Revision=master&FilePath=llama3_8b_instruct_dpo_zh-Q4_K_M.gguf", 80 | "SHA256": "5231f5f119e1ef7db058211b8a140b530930a40b9b89c54db8455cc20ae3f699" 81 | }, 82 | { 83 | "model": "qwen1_5-14b-chat-q4_k_m", 84 | "tags": ["llama"], 85 | "find_path": ["gguf"], 86 | "file_path": "gguf/qwen1_5-14b-chat-q4_k_m.gguf", 87 | "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-14B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-14b-chat-q4_k_m.gguf", 88 | "SHA256": "46fbff2797c39c2d6aa555db0b0b4fe3f41b712a9b45266e438aa9a5047c0563" 89 | }, 90 | { 91 | "model": "qwen1_5-7b-chat-q4_k_m", 92 | "tags": ["llama"], 93 | "find_path": ["gguf"], 94 | "file_path": "gguf/qwen1_5-7b-chat-q4_k_m.gguf", 95 | "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-7B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-7b-chat-q4_k_m.gguf", 96 | "SHA256": "d7f132b1eff9ce35acf8e83ab96d2bc87eaedb68244e467bbc99e9f46a122a4c" 97 | }, 98 | { 99 | "model": "qwen1_5-4b-chat-q4_k_m", 100 | "tags": ["llama"], 101 | "find_path": ["gguf"], 102 | "file_path": "gguf/qwen1_5-4b-chat-q4_k_m.gguf", 103 | "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-4B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-4b-chat-q4_k_m.gguf", 104 | "SHA256": "426143ccd3241b9547c2b70c622b4f4ef3436ee07e44991bd69ad84b36cd9b9b" 105 | }, 106 | { 107 | "model": "qwen1_5-1_8b-chat-q4_k_m", 108 | "tags": ["llama"], 109 | "find_path": ["gguf"], 110 | "file_path": "gguf/qwen1_5-1_8b-chat-q4_k_m.gguf", 111 | "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-1.8B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-1_8b-chat-q4_k_m.gguf", 112 | "SHA256": "702e983c77883426806a2af75d34ab3e462e1b822f9dc23b49e02280c24b2b18" 113 | }, 114 | { 115 | "model": "qwen1_5-0_5b-chat-q4_k_m", 116 | "tags": ["llama"], 117 | "find_path": ["gguf"], 118 | "file_path": "gguf/qwen1_5-0_5b-chat-q4_k_m.gguf", 119 | "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-0.5B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-0_5b-chat-q4_k_m.gguf", 120 | "SHA256": "92916b71d32f5afea48fb7383e3b48c5b1c111f5a59f0b83c764ea1d07fe1a3a" 121 | }, 122 | { 123 | "model": "llava-phi-3-mini-int4", 124 | "tags": ["llava"], 125 | "find_path": ["gguf"], 126 | "file_path": "gguf/llava-phi-3-mini-int4.gguf", 127 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-phi-3-mini-gguf%2Fllava-phi-3-mini-int4.gguf", 128 | "SHA256": "377876be20bac24488716c04824ab3a6978900679b40013b0d2585004555e658" 129 | }, 130 | { 131 | "model": "llava-phi-3-mini-mmproj-f16", 132 | "tags": [ 133 | "mmproj", 134 | "377876be20bac24488716c04824ab3a6978900679b40013b0d2585004555e658" 135 | ], 136 | "find_path": ["gguf"], 137 | "file_path": "gguf/llava-phi-3-mini-mmproj-f16.gguf", 138 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-phi-3-mini-gguf%2Fllava-phi-3-mini-mmproj-f16.gguf", 139 | "SHA256": "004fc09697203296f72321b296a8d48aade2d23e553cbfb1c1e6a0b5157a08d5" 140 | }, 141 | { 142 | "model": "MiniCPM-Llama3-V-2_5-Q4_K_M", 143 | "tags": ["llava-hide"], 144 | "find_path": ["gguf"], 145 | "file_path": "gguf/MiniCPM-Llama3-V-2_5-Q4_K_M.gguf", 146 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=MiniCPM-Llama3-V-2_5-gguf%2FMiniCPM-Llama3-V-2_5-Q4_K_M.gguf", 147 | "SHA256": "010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2" 148 | }, 149 | { 150 | "model": "MiniCPM-Llama3-V-2_5-mmproj-f16", 151 | "tags": [ 152 | "mmproj-hide", 153 | "010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2" 154 | ], 155 | "find_path": ["gguf"], 156 | "file_path": "gguf/MiniCPM-Llama3-V-2_5-mmproj-f16.gguf", 157 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=MiniCPM-Llama3-V-2_5-gguf%2FMiniCPM-Llama3-V-2_5-mmproj-f16.gguf", 158 | "SHA256": "391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e" 159 | }, 160 | { 161 | "model": "llava-llama-3-8b-v1_1-int4", 162 | "tags": ["llava"], 163 | "find_path": ["gguf"], 164 | "file_path": "gguf/llava-llama-3-8b-v1_1-int4.gguf", 165 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-llama-3-8b-v1_1-gguf%2Fllava-llama-3-8b-v1_1-int4.gguf", 166 | "SHA256": "b6e1d703db0da8227fdb7127d8716bbc5049c9bf17ca2bb345be9470d217f3fc" 167 | }, 168 | { 169 | "model": "llava-llama-3-8b-v1_1-mmproj-f16", 170 | "tags": [ 171 | "mmproj", 172 | "b6e1d703db0da8227fdb7127d8716bbc5049c9bf17ca2bb345be9470d217f3fc" 173 | ], 174 | "find_path": ["gguf"], 175 | "file_path": "gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf", 176 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-llama-3-8b-v1_1-gguf%2Fllava-llama-3-8b-v1_1-mmproj-f16.gguf", 177 | "SHA256": "eb569aba7d65cf3da1d0369610eb6869f4a53ee369992a804d5810a80e9fa035" 178 | }, 179 | { 180 | "model": "ggml_llava1_5-7b-q4_k_m", 181 | "tags": ["llava"], 182 | "find_path": ["gguf"], 183 | "file_path": "gguf/ggml_llava1_5-7b-q4_k_m.gguf", 184 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_llava-v1.5-7b%2Fggml-model-q4_k.gguf", 185 | "SHA256": "7ac9c2f7b8d76cc7f3118cdf0953ebab7a7a9b12bad5dbe237219d2ab61765ea" 186 | }, 187 | { 188 | "model": "ggml_llava1_5-7b-mmproj-f16", 189 | "tags": [ 190 | "mmproj", 191 | "7ac9c2f7b8d76cc7f3118cdf0953ebab7a7a9b12bad5dbe237219d2ab61765ea" 192 | ], 193 | "find_path": ["gguf"], 194 | "file_path": "gguf/ggml_llava1_5-7b-mmproj-f16.gguf", 195 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_llava-v1.5-7b%2Fmmproj-model-f16.gguf", 196 | "SHA256": "b7c8ff0f58fca47d28ba92c4443adf8653f3349282cb8d9e6911f22d9b3814fe" 197 | }, 198 | { 199 | "model": "ggml_bakllava-1-q4_k_m", 200 | "tags": ["llava"], 201 | "find_path": ["gguf"], 202 | "file_path": "gguf/ggml_bakllava-1-q4_k_m.gguf", 203 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_bakllava-1%2Fggml-model-q5_k.gguf", 204 | "SHA256": "c93de1376be9b6977cc94d252a3d165d6059e07b528de0fa762534d9599b27d6" 205 | }, 206 | { 207 | "model": "ggml_bakllava-1-mmproj-f16", 208 | "tags": [ 209 | "mmproj", 210 | "c93de1376be9b6977cc94d252a3d165d6059e07b528de0fa762534d9599b27d6" 211 | ], 212 | "find_path": ["gguf"], 213 | "file_path": "gguf/ggml_bakllava-1-mmproj-f16.gguf", 214 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_bakllava-1%2Fmmproj-model-f16.gguf", 215 | "SHA256": "2e467eba710002839e0966d5e329942bb836eabd4e787bc713b07eff1d8ea13b" 216 | }, 217 | { 218 | "model": "llava_v1_6_mistral_7b_q5_k_m", 219 | "tags": ["llava"], 220 | "find_path": ["gguf"], 221 | "file_path": "gguf/llava_v1_6_mistral_7b_q5_k_m.gguf", 222 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-1.6-mistral-7b-gguf%2Fllava-v1.6-mistral-7b.Q5_K_M.gguf", 223 | "SHA256": "b1d37fc65ecb80aa8f1ce185bf4d7605bc3c5cc5bcc77a160c3a1b0377631112" 224 | }, 225 | { 226 | "model": "llava_v1_6_mistral_7b_mmproj_f16", 227 | "tags": [ 228 | "mmproj", 229 | "b1d37fc65ecb80aa8f1ce185bf4d7605bc3c5cc5bcc77a160c3a1b0377631112" 230 | ], 231 | "find_path": ["gguf"], 232 | "file_path": "gguf/llava_v1_6_mistral_7b_mmproj_f16.gguf", 233 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-1.6-mistral-7b-gguf%2Fmmproj-model-f16.gguf", 234 | "SHA256": "00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16" 235 | } 236 | ] 237 | -------------------------------------------------------------------------------- /half_json.py: -------------------------------------------------------------------------------- 1 | # coding=utf8 2 | import json 3 | from typing import Any, List, NamedTuple, Optional, Tuple 4 | import json.decoder 5 | from json.decoder import JSONDecodeError as PyJSONDecodeError, JSONDecoder, py_scanstring 6 | from json.scanner import py_make_scanner 7 | from typing import Any, Dict, NamedTuple, Optional, Tuple, Union 8 | 9 | 10 | class FixResult(NamedTuple): 11 | success: bool 12 | line: str 13 | origin: bool 14 | 15 | 16 | class JSONFixer: 17 | def __init__(self, max_try: int = 20, max_stack: int = 3, *, js_style: bool = False) -> None: 18 | self._max_try = max_try 19 | self._max_stack = max_stack 20 | self._js_style = js_style 21 | self.last_fix: Optional[bool] = None 22 | self.fix_stack: List[str] = [] 23 | 24 | def fix(self, line: str, *, strict: bool = True) -> FixResult: 25 | try: 26 | json.loads(line, strict=strict) 27 | return FixResult(success=True, line=line, origin=True) 28 | except Exception: 29 | pass 30 | 31 | ok, new_line = self.fixwithtry(line, strict=strict) 32 | return FixResult(success=ok, line=new_line, origin=False) 33 | 34 | def fixwithtry(self, line: str, *, strict: bool = True) -> Tuple[bool, str]: 35 | if self._max_try <= 0: 36 | return False, line 37 | 38 | self.fix_stack = [] 39 | self.last_fix = None 40 | 41 | ok = False 42 | for _ in range(self._max_try): 43 | ok, new_line = self.patch_line(line, strict=strict) 44 | if ok: 45 | return ok, new_line 46 | 47 | self.last_fix = line != new_line 48 | if self.last_fix: 49 | self.fix_stack.insert(0, new_line) 50 | self.fix_stack = self.fix_stack[: self._max_stack] 51 | 52 | line = new_line 53 | return ok, line 54 | 55 | def patch_line(self, line: str, *, strict: bool = True) -> Tuple[bool, str]: 56 | result = decode_line(line, strict=strict) 57 | if result.success: 58 | return True, line 59 | 60 | if isinstance(result.exception, ValueError): 61 | return self.patch_value_error(line, result.err_info) 62 | 63 | if isinstance(result.exception, StopIteration): 64 | return self.patch_stop_iteration(line) 65 | 66 | if result.exception is None: 67 | return self.patch_half_parse(line, result.err_info) 68 | 69 | return False, line 70 | 71 | def patch_value_error(self, line: str, err_info: Any) -> Tuple[bool, str]: 72 | if err_info["error"] is None: 73 | return False, line 74 | 75 | error = err_info["error"] 76 | pos = err_info["pos"] 77 | nextchar = line[pos: pos + 1] 78 | lastchar = line[pos - 1: pos] 79 | nextline = line[pos:] 80 | lastline = line[:pos] 81 | 82 | if error == errors.StringUnterminatedString: 83 | return False, insert_line(line, '"', len(line)) 84 | if error == errors.ObjectExceptKey: 85 | if nextchar == "": 86 | return False, insert_line(line, "}", pos) 87 | if nextchar == ":": 88 | return False, insert_line(line, '""', pos) 89 | if lastchar in "{," and nextchar == ",": 90 | return False, remove_line(line, pos, pos + 1) 91 | if lastchar == "," and nextchar == "}": 92 | return False, remove_line(line, pos - 1, pos) 93 | if nextchar in "[{": 94 | return False, insert_line(line, '"":', pos) 95 | if self._js_style: 96 | # find 'abc' 97 | if nextchar == "'": 98 | nextline = remove_line(nextline, 0, 1) 99 | idx = nextline.find(":") 100 | if idx != -1 and idx != 0 and nextline[idx - 1] == "'": 101 | nextline = remove_line(nextline, idx - 1, idx) 102 | 103 | return False, lastline + nextline 104 | # abc:1 --> "aabc":1 105 | idx = nextline.find(":") 106 | if idx != -1: 107 | line = lastline + insert_line(nextline, '"', idx) 108 | return False, insert_line(line, '"', pos) 109 | # TODO process more case " 110 | return False, insert_line(line, '"', pos) 111 | if error == errors.ObjectExceptColon: 112 | return False, insert_line(line, ":", pos) 113 | if error == errors.ObjectExceptObject: 114 | if nextchar == "": 115 | if lastchar == "{": 116 | return False, insert_line(line, "}", pos) 117 | return False, insert_line(line, "null}", pos) 118 | if nextchar == "}": 119 | return False, insert_line(line, "null", pos) 120 | # TODO guess more 121 | return False, insert_line(line, '"', pos) 122 | if error == errors.ObjectExceptComma: 123 | if nextchar == "": 124 | return False, insert_line(line, "}", pos) 125 | return False, insert_line(line, ",", pos) 126 | if error == errors.ArrayExceptObject: 127 | if nextchar == "," and lastchar == "[": 128 | return False, remove_line(line, pos, pos + 1) 129 | if nextchar == ",": 130 | return False, insert_line(line, "null", pos) 131 | if nextchar == "]": 132 | return False, remove_line(line, pos - 1, pos) 133 | if nextchar == "": 134 | if lastchar == "[": 135 | return False, insert_line(line, "]", pos) 136 | return False, insert_line(line, "null]", pos) 137 | # TODO guess more? 138 | return False, insert_line(line, "{", pos) 139 | if error == errors.ArrayExceptComma: 140 | if len(line) == pos: 141 | return False, insert_line(line, "]", pos) 142 | return False, insert_line(line, ",", pos) 143 | # TODO unknonwn 144 | return False, line 145 | 146 | def patch_stop_iteration(self, line: str) -> Tuple[bool, str]: 147 | # TODO clean 148 | # TODO fix 149 | # 1. }] 150 | # 2. ]} 151 | # 3. constans 152 | # 4. - 153 | # 先 patch 完 {[]} 154 | # TODO: process number 155 | if line.startswith("-."): 156 | new_line = "-0." + line[2:] 157 | return False, new_line 158 | # patch 159 | left = patch_lastest_left_object_and_array(line) 160 | if left == "": 161 | if not self.last_fix: 162 | left = patch_guess_left(line) 163 | 164 | new_line = left + line 165 | return False, new_line 166 | 167 | def patch_half_parse(self, line: str, err_info: Any) -> Tuple[bool, str]: 168 | obj, end = err_info 169 | nextline = line[end:].strip() 170 | nextchar = nextline[:1] 171 | left = patch_lastest_left_object_and_array(nextline) 172 | # ?? 173 | if left == "": 174 | if nextchar == ",": 175 | left = "[" 176 | elif nextchar == ":" and isinstance(obj, str): 177 | left = "{" 178 | else: 179 | if not self.last_fix: 180 | left = patch_guess_left(nextline) 181 | 182 | new_line = left + line[:end] + nextline 183 | return False, new_line 184 | 185 | 186 | # TODO better name 187 | def patch_lastest_left_object_and_array(line: str) -> str: 188 | # '}]{[' --> '[{}]{[' 189 | pairs = {"}": "{", "]": "["} 190 | breaks = "{[" 191 | left = "" 192 | for char in line: 193 | if char in breaks: 194 | break 195 | if char in pairs: 196 | left = pairs[char] + left 197 | 198 | return left 199 | 200 | 201 | # TODO better name 202 | # TODO 改成 lastest 203 | # TODO {}}]]]] --> { not [ 204 | def patch_guess_left(line: str) -> str: 205 | miss_object = line.count("}") - line.count("{") 206 | miss_array = line.count("]") - line.count("[") 207 | if miss_object == miss_array == 0: 208 | if line[-1:] == '"' and line.count('"') == 1: 209 | return '"' 210 | elif miss_object >= miss_array: 211 | return "{" 212 | else: 213 | return "[" 214 | return "" 215 | 216 | 217 | def insert_line(line: str, value: str, pos: int) -> str: 218 | return line[:pos] + value + line[pos:] 219 | 220 | 221 | def remove_line(line: str, start: int, end: int) -> str: 222 | return line[:start] + line[end:] 223 | 224 | 225 | class JSONDecodeError: 226 | def __init__(self, parser, message): 227 | self.message = message 228 | self.parser = parser 229 | 230 | def __eq__(self, err): 231 | return err.parser == self.parser and self.message in err.message 232 | 233 | 234 | class errors: 235 | StringInvalidUXXXXEscape = JSONDecodeError( 236 | "py_scanstring", "Invalid \\uXXXX escape") 237 | # 2 different case 238 | StringUnterminatedString = JSONDecodeError( 239 | "py_scanstring", "Unterminated string starting at") 240 | StringInvalidControlCharacter = JSONDecodeError( 241 | "py_scanstring", "Invalid control character") 242 | StringInvalidEscape = JSONDecodeError("py_scanstring", "Invalid \\escape") 243 | ObjectExceptColon = JSONDecodeError( 244 | "JSONObject", "Expecting ':' delimiter") 245 | ObjectExceptObject = JSONDecodeError("JSONObject", "Expecting value") 246 | # 2 different case 247 | ObjectExceptKey = JSONDecodeError( 248 | "JSONObject", "Expecting property name enclosed in double quotes") 249 | ObjectExceptComma = JSONDecodeError( 250 | "JSONObject", "Expecting ',' delimiter") 251 | ArrayExceptObject = JSONDecodeError("JSONArray", "Expecting value") 252 | ArrayExceptComma = JSONDecodeError("JSONArray", "Expecting ',' delimiter") 253 | 254 | @classmethod 255 | def get_decode_error(cls, parser, message): 256 | err = JSONDecodeError(parser, message) 257 | for _, value in cls.__dict__.items(): 258 | if isinstance(value, JSONDecodeError): 259 | if err == value: 260 | return value 261 | return None 262 | 263 | """ 264 | 01 先不看,不研究 265 | 02 badcase: " --> "" success 266 | 03 控制符 pass 267 | 04 unicode \\u 的 pass 268 | 05 同上 269 | 06 object 后面没有跟随 " , badcase: {abc":1} --> {"abc":1} 270 | 07 object key 后面没有 : , badcase: {"abc"1} --> {"abc":1} 271 | 08 object 开始检测 Value 收到 StopIteration 272 | 08.1 要么后面没有了 273 | 08.2 要么后面不是 "/{/[/n[ull]/t[rue]/f[alse]/number/NaN/Infinity/-Infinity 开头的东西 274 | -- 08.1 后面补上 null} 275 | -- 08.2 无脑补一个 " 276 | 09 object 解析完一个 pair 后,下一个不是}, 期待一个 ',' 277 | badcase {"k":1"s":2} 278 | 10 在 09 的基础上解析完{"k":1, 发现下一个不是 ", 这个后面再优化(暂时和 06 一致) 279 | badcase {"k":1,x":2} 280 | 11 array 开始检测 Value 收到 StopIteration 281 | 11.1 要么后面没有了,补上] 282 | 11.2 同 08.2,无脑补一个{ 看看 283 | 12 array 解析完前一个 object, 需要一个 , 284 | 这里 nextchar 既不是 ] 也不是, 代表这个 nextchar 的 end 也已经+1 了,所以减 2 285 | """ 286 | 287 | 288 | def errmsg_inv(e: ValueError) -> Dict[str, Any]: 289 | assert isinstance(e, PyJSONDecodeError) 290 | parser = e.__dict__.get("parser", "") 291 | errmsg = e.msg 292 | localerr = errors.get_decode_error(parser, errmsg) 293 | return { 294 | "parsers": e.__dict__.get("parsers", []), 295 | "error": localerr, 296 | "lineno": e.lineno, 297 | "colno": e.colno, 298 | "pos": e.pos, 299 | } 300 | 301 | 302 | def record_parser_name(parser: Any) -> Any: 303 | def new_parser(*args: Any, **kwargs: Any) -> Any: 304 | try: 305 | return parser(*args, **kwargs) 306 | except Exception as e: 307 | if "parser" not in e.__dict__: 308 | e.__dict__["parser"] = parser.__name__ 309 | if "parsers" not in e.__dict__: 310 | e.__dict__["parsers"] = [] 311 | e.__dict__["parsers"].append(parser.__name__) 312 | raise e 313 | 314 | return new_parser 315 | 316 | 317 | def make_decoder(*, strict: bool = True) -> JSONDecoder: 318 | json.decoder.scanstring = record_parser_name(py_scanstring) 319 | 320 | decoder = JSONDecoder(strict=strict) 321 | decoder.parse_object = record_parser_name(decoder.parse_object) 322 | decoder.parse_array = record_parser_name(decoder.parse_array) 323 | decoder.parse_string = record_parser_name(py_scanstring) 324 | decoder.parse_object = record_parser_name(decoder.parse_object) 325 | 326 | decoder.scan_once = py_make_scanner(decoder) 327 | return decoder 328 | 329 | 330 | decoder = make_decoder() 331 | decoder_unstrict = make_decoder(strict=False) 332 | 333 | 334 | class DecodeResult(NamedTuple): 335 | success: bool 336 | exception: Optional[Exception] 337 | err_info: Optional[Union[Dict[str, Any], Tuple[Any, Any]]] 338 | 339 | 340 | def decode_line(line: str, *, strict: bool = True) -> DecodeResult: 341 | try: 342 | obj, end = (decoder if strict else decoder_unstrict).scan_once(line, 0) 343 | ok = end == len(line) 344 | return DecodeResult(success=ok, exception=None, err_info=(obj, end)) 345 | except StopIteration as e: 346 | return DecodeResult(success=False, exception=e, err_info=None) 347 | except ValueError as e: 348 | err_info = errmsg_inv(e) 349 | return DecodeResult(success=False, exception=e, err_info=err_info) 350 | -------------------------------------------------------------------------------- /mz_gen_translate.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import os 4 | import json 5 | import folder_paths 6 | from pathlib import Path 7 | COMFY_PATH = Path(folder_paths.__file__).parent 8 | 9 | 10 | ZH_Replace_Map = { 11 | "mmproj_model_name": "mmproj模型名称", 12 | "model_name": "模型名称", 13 | "llama_cpp_model": "llama.cpp模型", 14 | "mmproj_model": "mmproj模型", 15 | "resolution": "分辨率", 16 | "sd_format": "SD格式化", 17 | "ImageInterrogator": "图像反推", 18 | "image_interrogator_model": "图像反推模型", 19 | "image_interrogator": "图像反推", 20 | "image": "图像", 21 | "download_source": "下载源", 22 | "prompt_version": "提示词版本", 23 | "style_presets": "风格预设", 24 | "keep_device": "模型常驻显存", 25 | "llama_cpp_options": "llama.cpp可选配置", 26 | "Options": "可选配置", 27 | "LLamaCPPModelConfigManualSelect": "llama.cpp模型配置手动选择", 28 | "LLamaCPP": "llama.cpp", 29 | "CLIPTextEncode": "CLIP文本编码器", 30 | "clip": "CLIP", 31 | "conditioning": "条件", 32 | "customize_instruct": "自定义指令", 33 | "CustomizeInstruct": "自定义指令", 34 | "deprecated": "已废弃", 35 | "ModelConfigManualSelect": "手动模型选择器", 36 | "ModelConfigDownloaderSelect": "预设模型下载器", 37 | "captioner_config": "打标器配置", 38 | "post_processing": "后处理", 39 | } 40 | 41 | 42 | def gen_translate(NODE_DISPLAY_NAME_MAPPINGS={}, NODE_CLASS_MAPPINGS={}): 43 | translation_dirs = [ 44 | os.path.join(COMFY_PATH, "custom_nodes", 45 | "AIGODLIKE-COMFYUI-TRANSLATION", "zh-CN", "Nodes"), 46 | os.path.join(COMFY_PATH, "custom_nodes", 47 | "AIGODLIKE-ComfyUI-Translation", "zh-CN", "Nodes"), 48 | ] 49 | translation_dir = translation_dirs[0] 50 | for dir in translation_dirs: 51 | if os.path.exists(dir): 52 | translation_dir = dir 53 | break 54 | translation_config = os.path.join( 55 | translation_dir, "ComfyUI_MinusZone.translate.json") 56 | if os.path.exists(translation_dir): 57 | if not os.path.exists(translation_config): 58 | with open(translation_config, "w", encoding="utf-8") as f: 59 | f.write("{}") 60 | 61 | if os.path.exists(translation_config): 62 | translate_config = "{}" 63 | with open(translation_config, "r", encoding="utf-8") as f: 64 | translate_config = f.read() 65 | nodes = json.loads(translate_config) 66 | for key in NODE_DISPLAY_NAME_MAPPINGS: 67 | if key not in nodes: 68 | nodes[key] = {} 69 | 70 | title = NODE_DISPLAY_NAME_MAPPINGS[key] 71 | for k, v in ZH_Replace_Map.items(): 72 | title = title.replace(k, v) 73 | nodes[key]["title"] = title 74 | 75 | if key in NODE_CLASS_MAPPINGS: 76 | node = NODE_CLASS_MAPPINGS[key] 77 | node_INPUT_TYPES = node.INPUT_TYPES() 78 | node_INPUT_TYPES_required = node_INPUT_TYPES.get( 79 | "required", {}) 80 | nodes[key]["widgets"] = {} 81 | for widget_name, _ in node_INPUT_TYPES_required.items(): 82 | widget_name_zh = widget_name 83 | for k, v in ZH_Replace_Map.items(): 84 | widget_name_zh = widget_name_zh.replace(k, v) 85 | nodes[key]["widgets"][widget_name] = widget_name_zh 86 | 87 | node_INPUT_TYPES_optional = node_INPUT_TYPES.get( 88 | "optional", {}) 89 | nodes[key]["inputs"] = {} 90 | for widget_name, _ in node_INPUT_TYPES_optional.items(): 91 | widget_name_zh = widget_name 92 | for k, v in ZH_Replace_Map.items(): 93 | widget_name_zh = widget_name_zh.replace(k, v) 94 | nodes[key]["inputs"][widget_name] = widget_name_zh 95 | 96 | try: 97 | node_RETURN_NAMES = node.RETURN_NAMES 98 | nodes[key]["outputs"] = {} 99 | for widget_name in node_RETURN_NAMES: 100 | widget_name_zh = widget_name 101 | for k, v in ZH_Replace_Map.items(): 102 | widget_name_zh = widget_name_zh.replace(k, v) 103 | nodes[key]["outputs"][widget_name] = widget_name_zh 104 | except: 105 | pass 106 | 107 | with open(translation_config, "w", encoding="utf-8") as f: 108 | f.write(json.dumps(nodes, indent=4, ensure_ascii=False)) 109 | 110 | else: 111 | print("No translation dir found!") 112 | -------------------------------------------------------------------------------- /mz_llama_core_nodes.py: -------------------------------------------------------------------------------- 1 | 2 | import importlib 3 | import json 4 | import os 5 | from . import mz_prompt_utils 6 | from . import mz_llama_cpp 7 | from . import mz_prompts 8 | 9 | 10 | def get_schema_base_type(t): 11 | return { 12 | "type": t, 13 | } 14 | 15 | 16 | def get_schema_obj(keys_type={}, required=[]): 17 | item = {} 18 | for key, value in keys_type.items(): 19 | if type(value) == str: 20 | value = get_schema_base_type(value) 21 | item[key] = value 22 | return { 23 | "type": "object", 24 | "properties": item, 25 | "required": required 26 | } 27 | 28 | 29 | def get_schema_array(item_type="string"): 30 | if type(item_type) == str: 31 | item_type = get_schema_base_type(item_type) 32 | return { 33 | "type": "array", 34 | "items": item_type, 35 | } 36 | 37 | 38 | high_quality_prompt = "((high quality:1.4), (best quality:1.4), (masterpiece:1.4), (8K resolution), (2k wallpaper))" 39 | style_presets_prompt = { 40 | "none": "", 41 | "high_quality": high_quality_prompt, 42 | "photography": f"{high_quality_prompt}, (RAW photo, best quality), (realistic, photo-realistic:1.2), (bokeh, cinematic shot, dynamic composition, incredibly detailed, sharpen, details, intricate detail, professional lighting, film lighting, 35mm, anamorphic, lightroom, cinematography, bokeh, lens flare, film grain, HDR10, 8K)", 43 | "illustration": f"{high_quality_prompt}, ((detailed matte painting, intricate detail, splash screen, complementary colors), (detailed),(intricate details),illustration,an extremely delicate and beautiful,ultra-detailed,highres,extremely detailed)", 44 | } 45 | 46 | 47 | def get_style_presets(): 48 | return [ 49 | "none", 50 | "high_quality", 51 | "photography", 52 | "illustration", 53 | ] 54 | 55 | 56 | def llama_cpp_node_encode(args_dict): 57 | importlib.reload(mz_prompts) 58 | importlib.reload(mz_llama_cpp) 59 | # importlib.reload(mz_prompt_utils) 60 | 61 | model_config = args_dict.get("llama_cpp_model", {}) 62 | mz_prompt_utils.Utils.print_log(f"model_config: {model_config}") 63 | 64 | chat_format = model_config.get("chat_format", None) 65 | 66 | select_model_type = model_config.get("type", "ManualSelect") 67 | if select_model_type == "ManualSelect": 68 | model_file = model_config.get("model_path", "auto") 69 | if model_file == "auto": 70 | model_file = mz_prompt_utils.Utils.get_auto_model_fullpath( 71 | "Meta-Llama-3-8B-Instruct.Q4_K_M") 72 | 73 | if "llama-3" in mz_llama_cpp.get_llama_cpp_chat_handlers(): 74 | chat_format = "llama-3" 75 | 76 | elif select_model_type == "DownloaderSelect": 77 | model_name = model_config.get("model_name", "") 78 | model_file = mz_prompt_utils.Utils.get_auto_model_fullpath( 79 | model_name) 80 | else: 81 | raise Exception("Unknown select_model_type") 82 | 83 | mz_prompt_utils.Utils.print_log(f"model_file: {model_file}") 84 | 85 | text = args_dict.get("text", "") 86 | style_presets = args_dict.get("style_presets", "") 87 | options = args_dict.get("llama_cpp_options", {}) 88 | keep_device = args_dict.get("keep_device", False) 89 | seed = args_dict.get("seed", -1) 90 | options["seed"] = seed 91 | options["chat_format"] = chat_format 92 | 93 | customize_instruct = args_dict.get("customize_instruct", None) 94 | mz_prompt_utils.Utils.print_log( 95 | f"customize_instruct: {customize_instruct}") 96 | try: 97 | schema = None 98 | if customize_instruct is None: 99 | schema = get_schema_obj( 100 | keys_type={ 101 | "description": get_schema_base_type("string"), 102 | "long_prompt": get_schema_base_type("string"), 103 | "main_color_word": get_schema_base_type("string"), 104 | "camera_angle_word": get_schema_base_type("string"), 105 | "style_words": get_schema_array("string"), 106 | "subject_words": get_schema_array("string"), 107 | "light_words": get_schema_array("string"), 108 | "environment_words": get_schema_array("string"), 109 | }, 110 | required=[ 111 | "description", 112 | "long_prompt", 113 | "main_color_word", 114 | "camera_angle_word", 115 | "style_words", 116 | "subject_words", 117 | "light_words", 118 | "environment_words", 119 | ] 120 | ) 121 | 122 | question = f"IDEA: {style_presets},{text}" 123 | if style_presets == "none": 124 | question = f"IDEA: {text}" 125 | 126 | system_prompt = mz_prompts.Beautify_Prompt + mz_prompts.Long_prompt + "\n" 127 | 128 | else: 129 | 130 | system_prompt = customize_instruct.get("system", "") 131 | question = customize_instruct.get("instruct", "%text%") 132 | 133 | system_prompt = system_prompt.replace("%text%", text) 134 | question = question.replace("%text%", text) 135 | 136 | mz_prompt_utils.Utils.print_log(f"system_prompt: {system_prompt}") 137 | mz_prompt_utils.Utils.print_log(f"question: {question}") 138 | 139 | if schema is not None: 140 | response_text = mz_llama_cpp.llama_cpp_simple_interrogator_to_json( 141 | model_file=model_file, 142 | system=system_prompt, 143 | question=question, 144 | schema=schema, 145 | options=options, 146 | ) 147 | try: 148 | response_json = json.loads(response_text) 149 | except Exception as e: 150 | from . import half_json 151 | print("json.loads failed, try fix response_text: ", response_text) 152 | json_fixer = half_json.JSONFixer() 153 | fix_resp = json_fixer.fix(response_text) 154 | if fix_resp.success: 155 | print("fix success, use fixed response_text: ", fix_resp.line) 156 | response_json = json.loads(fix_resp.line) 157 | else: 158 | raise e 159 | 160 | mz_prompt_utils.Utils.print_log( 161 | f"response_json: {json.dumps(response_json, indent=2)}") 162 | 163 | responses = [] 164 | for key, value in response_json.items(): 165 | if type(value) == list: 166 | # 去除开头.和空格 167 | value = [v.strip().lstrip(".") for v in value] 168 | # 去除空字符串 169 | value = [v for v in value if v != ""] 170 | if len(value) > 0: 171 | responses.append(f"({', '.join(value)})") 172 | 173 | else: 174 | if value != "": 175 | responses.append(f"({value})") 176 | 177 | response = ", ".join(responses) 178 | else: 179 | response = mz_llama_cpp.llama_cpp_simple_interrogator( 180 | model_file=model_file, 181 | system=system_prompt, 182 | question=question, 183 | options=options, 184 | ) 185 | 186 | start_str = customize_instruct.get("start_str", "") 187 | if start_str != "" and response.find(start_str) != -1: 188 | full_response_list = response.split(start_str) 189 | # 删除第一个元素 190 | full_response_list.pop(0) 191 | response = start_str.join(full_response_list) 192 | 193 | end_str = customize_instruct.get("end_str", "") 194 | if end_str != "" and response.find(end_str) != -1: 195 | full_response_list = response.split(end_str) 196 | # 删除最后一个元素 197 | full_response_list.pop() 198 | response = end_str.join(full_response_list) 199 | 200 | if keep_device is False: 201 | mz_llama_cpp.freed_gpu_memory(model_file=model_file) 202 | 203 | # 去除换行 204 | while response.find("\n") != -1: 205 | response = response.replace("\n", " ") 206 | 207 | # 句号换成逗号 208 | while response.find(".") != -1: 209 | response = response.replace(".", ",") 210 | 211 | # 去除多余逗号 212 | while response.find(",,") != -1: 213 | response = response.replace(",,", ",") 214 | while response.find(", ,") != -1: 215 | response = response.replace(", ,", ",") 216 | 217 | response = mz_prompt_utils.Utils.prompt_zh_to_en(response) 218 | 219 | style_presets_prompt_text = style_presets_prompt.get(style_presets, "") 220 | 221 | if style_presets_prompt_text != "": 222 | response = f"{style_presets_prompt_text}, {response}" 223 | 224 | except Exception as e: 225 | mz_llama_cpp.freed_gpu_memory(model_file=model_file) 226 | raise e 227 | 228 | conditionings = None 229 | clip = args_dict.get("clip", None) 230 | if clip is not None: 231 | conditionings = mz_prompt_utils.Utils.a1111_clip_text_encode( 232 | clip, response, ) 233 | 234 | return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)} 235 | 236 | 237 | def image_interrogator_captioner(args_dict): 238 | import PIL.Image as Image 239 | captioner_config = args_dict.get("captioner_config", {}) 240 | directory = captioner_config.get("directory", None) 241 | force_update = captioner_config.get("force_update", False) 242 | caption_suffix = captioner_config.get("caption_suffix", "") 243 | retry_keyword = captioner_config.get("retry_keyword", "") 244 | retry_keywords = retry_keyword.split(",") 245 | 246 | retry_keywords = [k.strip() for k in retry_keywords] 247 | retry_keywords = [k for k in retry_keywords if k != ""] 248 | 249 | pre_images = [] 250 | for root, dirs, files in os.walk(directory): 251 | for file in files: 252 | if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"): 253 | image_path = os.path.join(root, file) 254 | base_file_path = os.path.splitext(image_path)[0] 255 | caption_file = os.path.join( 256 | root, base_file_path + caption_suffix) 257 | if os.path.exists(caption_file) and force_update is False: 258 | continue 259 | 260 | pre_images.append({ 261 | "image_path": image_path, 262 | "caption_path": caption_file 263 | }) 264 | 265 | result = [] 266 | 267 | pb = mz_prompt_utils.Utils.progress_bar(len(pre_images)) 268 | for i in range(len(pre_images)): 269 | pre_image = pre_images[i] 270 | image_path = pre_image["image_path"] 271 | caption_file = pre_image["caption_path"] 272 | 273 | onec_args_dict = args_dict.copy() 274 | del onec_args_dict["captioner_config"] 275 | 276 | pil_image = Image.open(image_path) 277 | onec_args_dict["image"] = mz_prompt_utils.Utils.pil2tensor(pil_image) 278 | 279 | if i < len(pre_images) - 1: 280 | onec_args_dict["keep_device"] = True 281 | 282 | pb.update( 283 | i, 284 | len(pre_images), 285 | pil_image.copy(), 286 | ) 287 | 288 | response = image_interrogator_node_encode(onec_args_dict) 289 | response = response.get("result", ())[0] 290 | response = response.strip() 291 | is_retry = response == "" 292 | for k in retry_keywords: 293 | if response.find(k) != -1: 294 | print(f"存在需要重试的关键词 ; Retry keyword found: {k}") 295 | is_retry = True 296 | break 297 | 298 | mz_prompt_utils.Utils.print_log( 299 | "\n\nonec_args_dict: ", onec_args_dict) 300 | if is_retry: 301 | for retry_n in range(5): 302 | print(f"Retry {retry_n+1}...") 303 | onec_args_dict["seed"] = onec_args_dict["seed"] + 1 304 | response = image_interrogator_node_encode(onec_args_dict) 305 | response = response.get("result", ())[0] 306 | response = response.strip() 307 | is_retry = response == "" 308 | for k in retry_keywords: 309 | if response.find(k) != -1: 310 | print(f"存在需要重试的关键词 ; Retry keyword found: {k}") 311 | is_retry = True 312 | break 313 | 314 | if is_retry is False: 315 | break 316 | if is_retry: 317 | print(f"重试失败,图片被跳过 ; Retry failed") 318 | response = "" 319 | 320 | if response != "": 321 | with open(caption_file, "w") as f: 322 | prompt_fixed_beginning = captioner_config.get( 323 | "prompt_fixed_beginning", "") 324 | f.write(prompt_fixed_beginning + response) 325 | 326 | result.append(response) 327 | 328 | # mz_prompt_webserver.show_toast_success( 329 | # f"提示词保存成功(prompt saved successfully): {caption_file}", 330 | # 1000, 331 | # ) 332 | 333 | return result 334 | 335 | 336 | def image_interrogator_node_encode(args_dict): 337 | importlib.reload(mz_prompts) 338 | 339 | captioner_config = args_dict.get("captioner_config", None) 340 | if captioner_config is not None: 341 | image_interrogator_captioner(args_dict) 342 | # raise Exception( 343 | # "图片批量反推任务已完成 ; Image batch reverse push task completed") 344 | return {"ui": {"string": ["图片批量反推任务已完成 ; Image batch reverse push task completed",]}, "result": ("", None)} 345 | 346 | model_config = args_dict.get("image_interrogator_model", {}) 347 | 348 | chat_format = model_config.get("chat_format", None) 349 | llama_cpp_model = model_config.get("llama_cpp_model", "auto") 350 | mmproj_model = model_config.get("mmproj_model", "auto") 351 | 352 | select_model_type = model_config.get("type", "ManualSelect") 353 | if select_model_type == "ManualSelect": 354 | llama_cpp_model = model_config.get("model_path", "auto") 355 | if llama_cpp_model == "auto": 356 | llama_cpp_model = mz_prompt_utils.Utils.get_auto_model_fullpath( 357 | "ggml_llava1_5-7b-q4_k_m") 358 | else: 359 | llama_cpp_model = os.path.join( 360 | mz_prompt_utils.Utils.get_gguf_models_path(), llama_cpp_model) 361 | 362 | if mmproj_model.endswith("auto"): 363 | llama_cpp_model_sha256 = mz_prompt_utils.Utils.file_sha256( 364 | llama_cpp_model) 365 | 366 | mmproj_model_name = mz_prompt_utils.Utils.get_model_zoo( 367 | tags_filter=llama_cpp_model_sha256) 368 | if len(mmproj_model_name) == 0: 369 | mmproj_model_name = None 370 | else: 371 | mmproj_model_name = mmproj_model_name[0].get("model", None) 372 | 373 | if mmproj_model_name is None: 374 | mz_prompt_utils.Utils.print_log( 375 | "llama_cpp_model_sha256: ", llama_cpp_model_sha256) 376 | raise Exception( 377 | "未能自动找到对应的mmproj文件 ; Failed to automatically find the corresponding mmproj file.") 378 | else: 379 | pass 380 | 381 | mmproj_model = mz_prompt_utils.Utils.get_auto_model_fullpath( 382 | mmproj_model_name) 383 | else: 384 | # mmproj_model = os.path.join( 385 | # mz_prompt_utils.Utils.get_gguf_models_path(), mmproj_model) 386 | pass 387 | 388 | elif select_model_type == "DownloaderSelect": 389 | model_name = model_config.get("model_name") 390 | llama_cpp_model = mz_prompt_utils.Utils.get_auto_model_fullpath( 391 | model_name) 392 | 393 | mmproj_model = model_config.get("mmproj_model_name", "auto") 394 | 395 | mmproj_model_name = mmproj_model 396 | if mmproj_model == "auto": 397 | llama_cpp_model_sha256 = mz_prompt_utils.Utils.file_sha256( 398 | llama_cpp_model) 399 | 400 | mz_prompt_utils.Utils.print_log( 401 | "llama_cpp_model_sha256: ", llama_cpp_model_sha256) 402 | 403 | mmproj_model_name = mz_prompt_utils.Utils.get_model_zoo( 404 | tags_filter=llama_cpp_model_sha256) 405 | if len(mmproj_model_name) == 0: 406 | mmproj_model_name = None 407 | else: 408 | mmproj_model_name = mmproj_model_name[0].get("model", None) 409 | 410 | if mmproj_model_name is None: 411 | raise Exception( 412 | "未能自动找到对应的mmproj文件 ; Failed to automatically find the corresponding mmproj file") 413 | 414 | mmproj_model = mz_prompt_utils.Utils.get_auto_model_fullpath( 415 | mmproj_model_name) 416 | 417 | else: 418 | raise Exception("Unknown select_model_type") 419 | 420 | image = args_dict.get("image", None) 421 | image = mz_prompt_utils.Utils.tensor2pil(image) 422 | 423 | resolution = args_dict.get("resolution", 512) 424 | keep_device = args_dict.get("keep_device", False) 425 | seed = args_dict.get("seed", -1) 426 | options = args_dict.get("llama_cpp_options", {}) 427 | options["seed"] = seed 428 | options["chat_format"] = chat_format 429 | 430 | image = mz_prompt_utils.Utils.resize_max(image, resolution, resolution) 431 | 432 | customize_instruct = args_dict.get("customize_instruct", None) 433 | if customize_instruct is None: 434 | # system_prompt = mz_prompts.GPT4VImageCaptioner_System 435 | # question = mz_prompts.GPT4VImageCaptioner_Prompt 436 | 437 | # system_prompt = mz_prompts.M_ImageCaptioner2_System 438 | # question = mz_prompts.M_ImageCaptioner2_Prompt 439 | 440 | system_prompt = "You are an assistant who perfectly describes images." 441 | question = "Describe this image in detail please." 442 | else: 443 | system_prompt = customize_instruct.get("system", "") 444 | question = customize_instruct.get("instruct", "") 445 | 446 | mz_prompt_utils.Utils.print_log(f"mmproj_model: {mmproj_model}") 447 | response = mz_llama_cpp.llava_cpp_simple_interrogator( 448 | model_file=llama_cpp_model, 449 | mmproj_file=mmproj_model, 450 | image=image, 451 | options=options, 452 | system=system_prompt, 453 | question=question, 454 | ) 455 | response = response.strip() 456 | if response is not None and response != "": 457 | 458 | if args_dict.get("post_processing", False): 459 | 460 | # 双引号换成空格 461 | response = response.replace("\"", " ") 462 | # 中括号换成空格 463 | response = response.replace("[", " ") 464 | response = response.replace("]", " ") 465 | 466 | # 括号换成空格 467 | response = response.replace("(", " ") 468 | response = response.replace(")", " ") 469 | 470 | # 去除多余空格 471 | while response.find(" ") != -1: 472 | response = response.replace(" ", " ") 473 | 474 | # 从第一个为英文字母的地方开始截取 475 | for i in range(len(response)): 476 | if response[i].isalpha(): 477 | response = response[i:] 478 | break 479 | 480 | response = response.strip() 481 | schema = get_schema_obj( 482 | keys_type={ 483 | "short_describes": get_schema_base_type("string"), 484 | "subject_tags": get_schema_array("string"), 485 | "action_tags": get_schema_array("string"), 486 | "light_tags": get_schema_array("string"), 487 | "scene_tags": get_schema_array("string"), 488 | "mood_tags": get_schema_array("string"), 489 | "style_tags": get_schema_array("string"), 490 | "object_tags": get_schema_array("string"), 491 | }, 492 | required=[ 493 | "short_describes", 494 | "subject_tags", 495 | "action_tags", 496 | "lights_tags", 497 | "scenes_tags", 498 | "moods_tags", 499 | "styles_tags", 500 | "objects_tags", 501 | ] 502 | ) 503 | response_json_str = mz_llama_cpp.llama_cpp_simple_interrogator_to_json( 504 | model_file=llama_cpp_model, 505 | system=mz_prompts.ImageCaptionerPostProcessing_System, 506 | question=f"Content: {response}", 507 | schema=schema, 508 | options=options, 509 | ) 510 | 511 | try: 512 | response_json = json.loads(response_json_str) 513 | except Exception as e: 514 | from . import half_json 515 | print("json.loads failed, try fix response_json_str: ", 516 | response_json_str) 517 | json_fixer = half_json.JSONFixer() 518 | fix_resp = json_fixer.fix(response_json_str) 519 | if fix_resp.success: 520 | print("fix success, use fixed response_json_str: ", 521 | fix_resp.line) 522 | response_json = json.loads(fix_resp.line) 523 | else: 524 | raise e 525 | 526 | responses = [] 527 | 528 | def pure_words(text: str) -> bool: 529 | number_of_spaces = text.count(" ") 530 | if number_of_spaces > 2: 531 | return False 532 | for c in text: 533 | if not c.isalpha() and c != "-" and c != "_" and c != " ": 534 | return False 535 | 536 | return True 537 | 538 | for key, value in response_json.items(): 539 | if type(value) == list: 540 | 541 | # 去除开头.和空格 542 | value = [v.strip().lstrip(".") for v in value] 543 | # 去除空字符串 544 | value = [v for v in value if v != ""] 545 | 546 | # 去除带有空格和标点符号的字符串 547 | value = [ 548 | v for v in value if pure_words(v)] 549 | 550 | # 空格换成下划线 551 | value = [v.replace(" ", "_") for v in value] 552 | 553 | # 首字母小写 554 | value = [v.lower() for v in value] 555 | 556 | if len(value) > 0: 557 | responses.append(f"{', '.join(value)}") 558 | 559 | description = response_json.get("short_describes", "") 560 | if description != "": 561 | responses.append(f"{description}") 562 | 563 | # 对response进行去重 564 | response = ", ".join(responses) 565 | 566 | if keep_device is False: 567 | mz_llama_cpp.freed_gpu_memory(model_file=llama_cpp_model) 568 | 569 | # return response 570 | 571 | conditionings = None 572 | clip = args_dict.get("clip", None) 573 | if clip is not None: 574 | conditionings = mz_prompt_utils.Utils.a1111_clip_text_encode( 575 | clip, response, ) 576 | 577 | return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)} 578 | -------------------------------------------------------------------------------- /mz_llama_cpp.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import importlib 3 | import json 4 | import os 5 | import shutil 6 | import subprocess 7 | import sys 8 | import torch 9 | try: 10 | from . import mz_prompt_utils 11 | from . import mz_prompt_webserver 12 | except ImportError: 13 | pass 14 | 15 | 16 | def check_llama_cpp_requirements(): 17 | min_version = "0.2.63" 18 | last_version = "0.2.76" 19 | try: 20 | from llama_cpp import Llama 21 | import llama_cpp 22 | if llama_cpp.__version__ < min_version: 23 | raise ImportError("llama_cpp version is too low. (llama_cpp版本过低)") 24 | except ImportError: 25 | py_version = "" 26 | if sys.version_info.major == 3: 27 | if sys.version_info.minor == 10: 28 | py_version = "310" 29 | elif sys.version_info.minor == 11: 30 | py_version = "311" 31 | elif sys.version_info.minor == 12: 32 | py_version = "312" 33 | 34 | if py_version == "": 35 | raise ValueError( 36 | f"Please upgrade python to version 3.10 or above. (找不到对应的python版本) 当前版本:{sys.version_info.major}.{sys.version_info.minor}") 37 | 38 | cuda_version = "" 39 | if torch.cuda.is_available(): 40 | cuda_version = "cu" + torch.version.cuda.replace(".", "") 41 | if cuda_version not in ["cu121", "cu122", "cu123"]: 42 | cuda_version = "cu121" 43 | print( 44 | f"Warning: The current version of cuda is not supported. (警告: 当前cuda版本不支持) {torch.version.cuda} (默认使用cu121)") 45 | else: 46 | cuda_version = "cpu" 47 | 48 | # https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.63-cu123/llama_cpp_python-0.2.63-cp310-cp310-linux_x86_64.whl 49 | 50 | system_name = "linux_x86_64" 51 | if sys.platform == "linux": 52 | if sys.maxsize > 2**32: 53 | system_name = "linux_x86_64" 54 | else: 55 | system_name = "linux_i686" 56 | elif sys.platform == "darwin": 57 | # 请手动前往https://github.com/abetlen/llama-cpp-python/releases 下载对应的whl文件后 使用pip install {whl文件路径}安装 58 | raise ValueError( 59 | "Please download the corresponding whl file from https://github.com/abetlen/llama-cpp-python/releases and install it using pip install {whl file path} (请手动前往https://github.com/abetlen/llama-cpp-python/releases 下载对应的whl文件后 使用pip install {whl文件路径}安装)") 60 | elif sys.platform == "win32": 61 | system_name = "win_amd64" 62 | else: 63 | raise ValueError( 64 | f"Unsupported platform. (不支持的平台) {sys.platform} (请手动前往https://github.com/abetlen/llama-cpp-python/releases 下载对应的whl文件后 使用pip install 'whl文件路径' 安装)") 65 | 66 | wheel_name = f"llama_cpp_python-{last_version}-cp{py_version}-cp{py_version}-{system_name}.whl" 67 | if cuda_version == "cpu": 68 | wheel_url = f"https://github.com/abetlen/llama-cpp-python/releases/download/v{last_version}/{wheel_name}" 69 | else: 70 | wheel_url = f"https://github.com/abetlen/llama-cpp-python/releases/download/v{last_version}-{cuda_version}/{wheel_name}" 71 | 72 | print(f"pip install {wheel_url}") 73 | modelscope_url = f"https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llama-cpp-python-win%2F{cuda_version}%2F{wheel_name}" 74 | if mz_prompt_utils.Utils.testDownloadSpeed(wheel_url): 75 | ret = subprocess.run([ 76 | sys.executable, "-m", 77 | "pip", "install", wheel_url], check=True) 78 | elif mz_prompt_utils.Utils.testDownloadSpeed(modelscope_url): 79 | import tempfile 80 | whl_download_file = os.path.join( 81 | tempfile.gettempdir(), wheel_name) 82 | mz_prompt_utils.Utils.download_file( 83 | modelscope_url, whl_download_file) 84 | print(f"pip install {whl_download_file}") 85 | ret = subprocess.run([ 86 | sys.executable, "-m", 87 | "pip", "install", whl_download_file], check=True) 88 | else: 89 | 90 | # 兜底方案 91 | modelscope_url = f"https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llama-cpp-python-win%2Fcu121%2Fllama_cpp_python-0.2.76-cp310-cp310-win_amd64.whl" 92 | if py_version == "310" and system_name == "win_amd64" and mz_prompt_utils.Utils.testDownloadSpeed(modelscope_url): 93 | import tempfile 94 | whl_download_file = os.path.join( 95 | tempfile.gettempdir(), wheel_name) 96 | mz_prompt_utils.Utils.download_file( 97 | modelscope_url, whl_download_file) 98 | print(f"pip install {whl_download_file}") 99 | ret = subprocess.run([ 100 | sys.executable, "-m", 101 | "pip", "install", whl_download_file], check=True) 102 | else: 103 | ret = subprocess.run([ 104 | sys.executable, "-m", 105 | "pip", "install", wheel_url], check=True) 106 | 107 | if ret.returncode != 0: 108 | raise ValueError("Failed to install llama_cpp. (安装llama_cpp失败)") 109 | else: 110 | print("llama_cpp installed successfully. (llama_cpp安装成功)") 111 | 112 | 113 | def get_llama_cpp_chat_handlers(): 114 | check_llama_cpp_requirements() 115 | from llama_cpp import llama_chat_format 116 | chat_handlers = llama_chat_format.LlamaChatCompletionHandlerRegistry()._chat_handlers 117 | chat_handlers = list(chat_handlers.keys()) 118 | 119 | return chat_handlers 120 | 121 | 122 | def LlamaCppOptions(): 123 | # chat_handlers = ["auto"] + get_llama_cpp_chat_handlers() 124 | return { 125 | # "chat_format": chat_handlers, 126 | "n_ctx": 2048, 127 | "n_batch": 2048, 128 | "n_threads": 0, 129 | "n_threads_batch": 0, 130 | "split_mode": ["LLAMA_SPLIT_MODE_NONE", "LLAMA_SPLIT_MODE_LAYER", "LLAMA_SPLIT_MODE_ROW",], 131 | "main_gpu": 0, 132 | "n_gpu_layers": -1, 133 | "max_tokens": 4096, 134 | "temperature": 1.6, 135 | "top_p": 0.95, 136 | "min_p": 0.05, 137 | "typical_p": 1.0, 138 | "stop": "", 139 | "frequency_penalty": 0.0, 140 | "presence_penalty": 0.0, 141 | "repeat_penalty": 1.1, 142 | "top_k": 50, 143 | "tfs_z": 1.0, 144 | "mirostat_mode": ["none", "mirostat", "mirostat_v2"], 145 | "mirostat_tau": 5.0, 146 | "mirostat_eta": 0.1, 147 | } 148 | 149 | 150 | def freed_gpu_memory(model_file): 151 | check_llama_cpp_requirements() 152 | 153 | model_and_opt = mz_prompt_utils.Utils.cache_get( 154 | f"llama_cpp_model_and_opt_{model_file}") 155 | 156 | if model_and_opt is None: 157 | return 0 158 | 159 | model = model_and_opt.get("model") 160 | 161 | del model 162 | torch.cuda.empty_cache() 163 | 164 | mz_prompt_utils.Utils.cache_set( 165 | f"llama_cpp_model_and_opt_{model_file}", None) 166 | 167 | 168 | def llama_cpp_messages(model_file, mmproj_file=None, messages=[], options={}): 169 | if options is None: 170 | options = {} 171 | options = options.copy() 172 | print(f"Find local model file: {model_file}") 173 | init_opts = ["n_ctx", "logits_all", "chat_format", "n_gpu_layers"] 174 | 175 | check_llama_cpp_requirements() 176 | 177 | from llama_cpp import Llama 178 | import llama_cpp 179 | 180 | model_and_opt = mz_prompt_utils.Utils.cache_get( 181 | f"llama_cpp_model_and_opt_{model_file}") 182 | 183 | is_opts_changed = False 184 | 185 | mz_prompt_utils.Utils.print_log( 186 | f"llama_cpp_messages chat_format: {options.get('chat_format', None)}") 187 | 188 | if model_and_opt is not None: 189 | for opt in init_opts: 190 | if model_and_opt.get("options").get(opt) != options.get(opt): 191 | is_opts_changed = True 192 | break 193 | 194 | if model_and_opt is None or is_opts_changed: 195 | print("llama_cpp: loading model...") 196 | verbose = False 197 | if os.environ.get("MZ_DEV", None) is not None: 198 | verbose = True 199 | 200 | split_mode_int = llama_cpp.LLAMA_SPLIT_MODE_LAYER 201 | if options.get("split_mode", "LLAMA_SPLIT_MODE_LAYER") == "LLAMA_SPLIT_MODE_ROW": 202 | split_mode_int = llama_cpp.LLAMA_SPLIT_MODE_ROW 203 | elif options.get("split_mode", "LLAMA_SPLIT_MODE_LAYER") == "LLAMA_SPLIT_MODE_NONE": 204 | split_mode_int = llama_cpp.LLAMA_SPLIT_MODE_NONE 205 | 206 | chat_handler = None 207 | if mmproj_file is not None: 208 | # 显存不释放,暂时全局缓存 209 | chat_handler = mz_prompt_utils.Utils.cache_get( 210 | f"llama_cpp_messages_mmproj_file_{mmproj_file}" 211 | ) 212 | if chat_handler is None: 213 | mz_prompt_utils.Utils.print_log( 214 | f"llama_cpp_messages mmproj_file: {mmproj_file}") 215 | from llama_cpp.llama_chat_format import Llava15ChatHandler 216 | chat_handler = Llava15ChatHandler(clip_model_path=mmproj_file) 217 | mz_prompt_utils.Utils.cache_set( 218 | f"llama_cpp_messages_mmproj_file_{mmproj_file}", chat_handler) 219 | 220 | model = Llama( 221 | model_path=model_file, 222 | n_gpu_layers=options.get("n_gpu_layers", -1), 223 | n_ctx=options.get("n_ctx", 2048), 224 | n_batch=options.get("n_batch", 2048), 225 | n_threads=options.get("n_threads", 0) if options.get( 226 | "n_threads", 0) > 0 else None, 227 | n_threads_batch=options.get("n_threads_batch", 0) if options.get( 228 | "n_threads_batch", 0) > 0 else None, 229 | main_gpu=options.get("main_gpu", 0), 230 | split_mode=split_mode_int, 231 | logits_all=options.get("logits_all", False), 232 | chat_handler=chat_handler, 233 | chat_format=options.get("chat_format", None), 234 | seed=options.get("seed", -1), 235 | verbose=verbose, 236 | ) 237 | model_and_opt = { 238 | "model": model, 239 | "chat_handler": chat_handler, 240 | "options": options, 241 | } 242 | mz_prompt_utils.Utils.cache_set( 243 | f"llama_cpp_model_and_opt_{model_file}", model_and_opt) 244 | 245 | model = model_and_opt.get("model") 246 | model.set_seed(options.get("seed", -1)) 247 | model.reset() 248 | 249 | response_format = options.get("response_format", None) 250 | mz_prompt_utils.Utils.print_log( 251 | f"======================================================LLAMA_CPP======================================================") 252 | # mz_utils.Utils.print_log("llama_cpp messages:", messages) 253 | mz_prompt_utils.Utils.print_log( 254 | "llama_cpp response_format:", response_format) 255 | 256 | stop = options.get("stop", "") 257 | if stop == "": 258 | stop = [] 259 | else: 260 | # 所有转译序列 261 | escape_sequence = { 262 | "\\n": "\n", 263 | "\\t": "\t", 264 | "\\r": "\r", 265 | "\\b": "\b", 266 | "\\f": "\f", 267 | } 268 | for key, value in escape_sequence.items(): 269 | stop = stop.replace(key, value) 270 | stop = stop.split(",") 271 | 272 | mirostat_mode = 0 273 | if options.get("mirostat_mode", "none") == "mirostat": 274 | mirostat_mode = 1 275 | elif options.get("mirostat_mode", "none") == "mirostat_v2": 276 | mirostat_mode = 2 277 | 278 | try: 279 | debuf_messages = copy.deepcopy(messages) 280 | for dindex in range(len(debuf_messages)): 281 | if debuf_messages[dindex].get("role") == "user": 282 | debuf_messages_content = debuf_messages[dindex].get( 283 | "content", []) 284 | if type(debuf_messages_content) != list: 285 | continue 286 | for ccindex in range(len(debuf_messages_content)): 287 | if debuf_messages_content[ccindex].get("type") == "image_url": 288 | debuf_messages[dindex]["content"][ccindex]["image_url"] = debuf_messages[ 289 | dindex]["content"][ccindex]["image_url"] = None 290 | 291 | mz_prompt_utils.Utils.print_log( 292 | f"LLAMA_CPP messages: {json.dumps(debuf_messages, indent=4, ensure_ascii=False)}") 293 | except Exception as e: 294 | mz_prompt_utils.Utils.print_log( 295 | f"LLAMA_CPP messages: {messages}") 296 | output = model.create_chat_completion( 297 | messages=messages, 298 | response_format=response_format, 299 | max_tokens=options.get("max_tokens", 4096), 300 | temperature=options.get("temperature", 1.6), 301 | top_p=options.get("top_p", 0.95), 302 | min_p=options.get("min_p", 0.05), 303 | typical_p=options.get("typical_p", 1.0), 304 | stop=stop, 305 | frequency_penalty=options.get("frequency_penalty", 0.0), 306 | presence_penalty=options.get("presence_penalty", 0.0), 307 | repeat_penalty=options.get("repeat_penalty", 1.1), 308 | top_k=options.get("top_k", 50), 309 | tfs_z=options.get("tfs_z", 1.0), 310 | mirostat_mode=mirostat_mode, 311 | mirostat_tau=options.get("mirostat_tau", 5.0), 312 | mirostat_eta=options.get("mirostat_eta", 0.1), 313 | tools=options.get("tools", None), 314 | tool_choice=options.get("tool_choice", None), 315 | ) 316 | mz_prompt_utils.Utils.print_log(f"LLAMA_CPP: \n{output}") 317 | choices = output.get("choices", []) 318 | # mz_utils.Utils.print_log(f"LLAMA_CPP choices: \n{choices}") 319 | if len(choices) == 0: 320 | return "" 321 | 322 | result = choices[0].get("message", {}).get("content", "") 323 | return result 324 | 325 | 326 | def llama_cpp_simple_interrogator_to_json(model_file, use_system=True, system=None, question="", schema={}, options={}): 327 | options = options.copy() 328 | if system is None: 329 | system = "" 330 | messages = [ 331 | { 332 | "role": "user", 333 | "content": question 334 | }, 335 | ] 336 | elif use_system: 337 | messages = [ 338 | { 339 | "role": "system", 340 | "content": system 341 | }, 342 | { 343 | "role": "user", 344 | "content": question 345 | }, 346 | ] 347 | else: 348 | messages = [ 349 | { 350 | "role": "user", 351 | "content": f"{system}\nIf you understand what I am saying, please reply 'OK' and do not reply with unnecessary content." 352 | }, 353 | { 354 | "role": "assistant", 355 | "content": "OK" 356 | }, 357 | { 358 | "role": "user", 359 | "content": question 360 | }, 361 | ] 362 | 363 | response_format = { 364 | "type": "json_object", 365 | "schema": schema, 366 | } 367 | 368 | options["response_format"] = response_format 369 | 370 | # if options.get("chat_format", None) is None: 371 | # options["chat_format"] = "llama-2" 372 | 373 | result = llama_cpp_messages(model_file, None, messages, options=options) 374 | result = result.replace("\n", " ") 375 | return result 376 | 377 | 378 | def llama_cpp_simple_interrogator(model_file, use_system=True, system=None, question="", options={}): 379 | if options is None: 380 | options = {} 381 | options = options.copy() 382 | if system is None: 383 | system = "" 384 | messages = [ 385 | { 386 | "role": "user", 387 | "content": question 388 | }, 389 | ] 390 | elif use_system: 391 | messages = [ 392 | { 393 | "role": "system", 394 | "content": system 395 | }, 396 | { 397 | "role": "user", 398 | "content": question 399 | }, 400 | ] 401 | else: 402 | messages = [ 403 | { 404 | "role": "user", 405 | "content": f"{system}\nIf you understand what I am saying, please reply 'OK' and do not reply with unnecessary content." 406 | }, 407 | { 408 | "role": "assistant", 409 | "content": "OK" 410 | }, 411 | { 412 | "role": "user", 413 | "content": question 414 | }, 415 | ] 416 | return llama_cpp_messages(model_file, None, messages, options=options) 417 | 418 | 419 | def llava_cpp_messages(model_file, mmproj_file, messages, options={}): 420 | if options is None: 421 | options = {} 422 | 423 | options = options.copy() 424 | options["logits_all"] = True 425 | options["n_ctx"] = max(4096, options.get("n_ctx", 4096)) 426 | 427 | # if options.get("chat_format", None) is None: 428 | # options["chat_format"] = "llama-2" 429 | return llama_cpp_messages(model_file, mmproj_file, messages, options) 430 | 431 | 432 | def llava_cpp_simple_interrogator( 433 | model_file, mmproj_file, system="You are an assistant who perfectly describes images.", question="Describe this image in detail please.", 434 | image=None, options={}): 435 | if options is None: 436 | options = {} 437 | options = options.copy() 438 | check_llama_cpp_requirements() 439 | 440 | content = [] 441 | if image is not None: 442 | data_uri = mz_prompt_utils.Utils.pil_image_to_base64(image) 443 | content.append({"type": "image_url", "image_url": {"url": data_uri}}) 444 | 445 | content.append({"type": "text", "text": question}) 446 | 447 | check_llama_cpp_requirements() 448 | 449 | return llava_cpp_messages(model_file, mmproj_file, [ 450 | { 451 | "role": "system", 452 | "content": system, 453 | }, 454 | { 455 | "role": "user", 456 | "content": content, 457 | }, 458 | ], options=options) 459 | -------------------------------------------------------------------------------- /mz_openaiapi.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import json 4 | import subprocess 5 | 6 | from . import mz_prompt_utils 7 | from . import mz_llama_cpp 8 | from . import mz_llama_core_nodes 9 | from . import mz_prompts 10 | 11 | 12 | def zhipu_json_fix(input_data): 13 | if type(input_data) == dict: 14 | if "Items" in input_data: 15 | return input_data["Items"] 16 | else: 17 | for key, value in input_data.items(): 18 | input_data[key] = zhipu_json_fix(value) 19 | return input_data 20 | 21 | elif type(input_data) == list: 22 | for i in range(len(input_data)): 23 | input_data[i] = zhipu_json_fix(input_data[i]) 24 | return input_data 25 | 26 | else: 27 | return input_data 28 | 29 | 30 | def query_beautify_prompt_text(args_dict): 31 | try: 32 | from openai import OpenAI 33 | import openai 34 | except ImportError: 35 | subprocess.check_call( 36 | [sys.executable, "-m", "pip", "install", "openai"]) 37 | from openai import OpenAI 38 | import openai 39 | 40 | api_key = args_dict.get("api_key", None) 41 | base_url = args_dict.get("base_url", None) 42 | 43 | text = args_dict.get("text", "") 44 | style_presets = args_dict.get("style_presets", "") 45 | 46 | if api_key is None: 47 | raise ValueError("api_key is required") 48 | 49 | client = OpenAI( 50 | api_key=api_key, 51 | default_headers={"x-foo": "true"} 52 | ) 53 | 54 | if base_url is not None: 55 | client.base_url = base_url 56 | 57 | model_name = args_dict.get("model_name", "gpt-3.5-turbo") 58 | 59 | options = args_dict.get("options", {}) 60 | 61 | customize_instruct = args_dict.get("customize_instruct", None) 62 | mz_prompt_utils.Utils.print_log( 63 | f"customize_instruct: {customize_instruct}") 64 | 65 | schema = None 66 | if customize_instruct is None: 67 | schema = mz_llama_core_nodes.get_schema_obj( 68 | keys_type={ 69 | "description": mz_llama_core_nodes.get_schema_base_type("string"), 70 | "long_prompt": mz_llama_core_nodes.get_schema_base_type("string"), 71 | "main_color_word": mz_llama_core_nodes.get_schema_base_type("string"), 72 | "camera_angle_word": mz_llama_core_nodes.get_schema_base_type("string"), 73 | "style_words": mz_llama_core_nodes.get_schema_array("string"), 74 | "subject_words": mz_llama_core_nodes.get_schema_array("string"), 75 | "light_words": mz_llama_core_nodes.get_schema_array("string"), 76 | "environment_words": mz_llama_core_nodes.get_schema_array("string"), 77 | }, 78 | required=[ 79 | "description", 80 | "long_prompt", 81 | "main_color_word", 82 | "camera_angle_word", 83 | "style_words", 84 | "subject_words", 85 | "light_words", 86 | "environment_words", 87 | ] 88 | ) 89 | 90 | question = f"IDEA: {style_presets},{text}" 91 | if style_presets == "none": 92 | question = f"IDEA: {text}" 93 | 94 | system_prompt = mz_prompts.Beautify_Prompt + mz_prompts.Long_prompt + "\n" 95 | 96 | else: 97 | 98 | system_prompt = customize_instruct.get("system", "") 99 | question = customize_instruct.get("instruct", "%text%") 100 | 101 | system_prompt = system_prompt.replace("%text%", text) 102 | question = question.replace("%text%", text) 103 | 104 | mz_prompt_utils.Utils.print_log(f"system_prompt: {system_prompt}") 105 | mz_prompt_utils.Utils.print_log(f"question: {question}") 106 | # print(f"system_prompt: {system_prompt}") 107 | # print(f"question: {question}") 108 | 109 | output = None 110 | if schema is not None: 111 | 112 | output = client.chat.completions.create( 113 | model=model_name, 114 | messages=[ 115 | {"role": "system", "content": system_prompt}, 116 | {"role": "user", "content": f"{question}\ncall beautify_prompt_text function to get the result."}, 117 | ], 118 | tools=[{ 119 | "type": "function", 120 | "function": { 121 | "name": "beautify_prompt_text", 122 | "description": "required Beautify Prompt Text", 123 | "parameters": schema, 124 | } 125 | }], 126 | tool_choice={"type": "function", 127 | "function": {"name": "beautify_prompt_text"}}, 128 | ) 129 | 130 | if type(output) == str: 131 | raise Exception( 132 | f"返回结果格式异常 ; Return result format exception : {output}") 133 | 134 | tool_calls = output.choices[0].message.tool_calls 135 | 136 | functions_args = {} 137 | for tool_call in tool_calls: 138 | function_name = tool_call.function.name 139 | function_args = json.loads(tool_call.function.arguments) 140 | functions_args[function_name] = function_args 141 | beautify_prompt_text_result = functions_args.get( 142 | "beautify_prompt_text", {}) 143 | 144 | mz_prompt_utils.Utils.print_log( 145 | f"beautify_prompt_text_result: {beautify_prompt_text_result}") 146 | 147 | beautify_prompt_text_result = zhipu_json_fix( 148 | beautify_prompt_text_result) 149 | results = [] 150 | for key, value in beautify_prompt_text_result.items(): 151 | if type(value) == list: 152 | value = [item for item in value if item != ""] 153 | value = [mz_prompt_utils.Utils.prompt_zh_to_en(item) 154 | for item in value] 155 | if len(value) == 0: 156 | continue 157 | item_str = ", ".join(value) 158 | results.append(f"({item_str})") 159 | else: 160 | if value == "": 161 | continue 162 | value = mz_prompt_utils.Utils.prompt_zh_to_en(value) 163 | results.append(f"({value})") 164 | 165 | full_response = ", ".join(results) 166 | 167 | else: 168 | output = client.chat.completions.create( 169 | model=model_name, 170 | messages=[ 171 | {"role": "system", "content": system_prompt}, 172 | {"role": "user", "content": question}, 173 | ], 174 | ) 175 | 176 | if type(output) == str: 177 | raise Exception( 178 | f"返回结果格式异常 ; Return result format exception : {output}") 179 | 180 | full_response = output.choices[0].message.content 181 | 182 | mz_prompt_utils.Utils.print_log( 183 | f"OPENAI_OUTPUT: \n{output.model_dump_json()}") 184 | # print(output.model_dump_json()) 185 | 186 | # 去除换行 187 | while full_response.find("\n") != -1: 188 | full_response = full_response.replace("\n", " ") 189 | # 句号换成逗号 190 | while full_response.find(".") != -1: 191 | full_response = full_response.replace(".", ",") 192 | # 去除多余逗号 193 | while full_response.find(",,") != -1: 194 | full_response = full_response.replace(",,", ",") 195 | while full_response.find(", ,") != -1: 196 | full_response = full_response.replace(", ,", ",") 197 | style_presets_prompt_text = mz_llama_core_nodes.style_presets_prompt.get( 198 | style_presets, "") 199 | if style_presets_prompt_text != "": 200 | full_response = f"{style_presets_prompt_text}, {full_response}" 201 | return full_response 202 | -------------------------------------------------------------------------------- /mz_prompt_webserver.py: -------------------------------------------------------------------------------- 1 | 2 | 3 | import asyncio 4 | import uuid 5 | from . import mz_prompt_utils 6 | 7 | 8 | web_msg_pool = { 9 | 10 | } 11 | 12 | 13 | def show_toast_success(message, duration=2000): 14 | send_message({ 15 | "type": "toast-success", 16 | "message": message, 17 | "duration": duration 18 | }) 19 | 20 | 21 | def send_message(data): 22 | global web_msg_pool 23 | for key in web_msg_pool: 24 | web_msg_pool[key].append(data) 25 | 26 | 27 | def start_server(): 28 | try: 29 | global web_msg_pool 30 | from aiohttp import web 31 | import server 32 | app: web.Application = server.PromptServer.instance.app 33 | 34 | async def message(request): 35 | muuid = uuid.uuid4() 36 | try: 37 | ws = web.WebSocketResponse() 38 | 39 | await ws.prepare(request) 40 | 41 | web_msg_pool[muuid] = [] 42 | async for msg in ws: 43 | if msg.type == web.WSMsgType.text: 44 | if len(web_msg_pool[muuid]) == 0: 45 | continue 46 | else: 47 | await ws.send_json(web_msg_pool[muuid]) 48 | web_msg_pool[muuid] = [] 49 | elif msg.type == web.WSMsgType.close: 50 | break 51 | 52 | del web_msg_pool[muuid] 53 | mz_prompt_utils.Utils.print_log(f"connection {muuid} closed") 54 | return ws 55 | except Exception as e: 56 | mz_prompt_utils.Utils.print_log(e) 57 | del web_msg_pool[muuid] 58 | return ws 59 | 60 | if not any([route.get_info().get("path", "") == "/mz_webapi/message" for route in app.router.routes()]): 61 | mz_prompt_utils.Utils.print_log("add route /mz_webapi/message") 62 | app.router.add_get("/mz_webapi/message", message) 63 | else: 64 | mz_prompt_utils.Utils.print_log( 65 | "route /mz_webapi/message is exist") 66 | 67 | except Exception as e: 68 | print(e) 69 | -------------------------------------------------------------------------------- /mz_prompts.py: -------------------------------------------------------------------------------- 1 | Beautify_Prompt = """ 2 | Stable Diffusion is an AI art generation model similar to DALLE-2. 3 | Below is a list of prompts that can be used to generate images with Stable Diffusion: 4 | - portait of a homer simpson archer shooting arrow at forest monster, front game card, drark, marvel comics, dark, intricate, highly detailed, smooth, artstation, digital illustration by ruan jia and mandy jurgens and artgerm and wayne barlowe and greg rutkowski and zdislav beksinski 5 | - pirate, concept art, deep focus, fantasy, intricate, highly detailed, digital painting, artstation, matte, sharp focus, illustration, art by magali villeneuve, chippy, ryan yee, rk post, clint cearley, daniel ljunggren, zoltan boros, gabor szikszai, howard lyon, steve argyle, winona nelson 6 | - ghost inside a hunted room, art by lois van baarle and loish and ross tran and rossdraws and sam yang and samdoesarts and artgerm, digital art, highly detailed, intricate, sharp focus, Trending on Artstation HQ, deviantart, unreal engine 5, 4K UHD image 7 | - red dead redemption 2, cinematic view, epic sky, detailed, concept art, low angle, high detail, warm lighting, volumetric, godrays, vivid, beautiful, trending on artstation, by jordan grimmer, huge scene, grass, art greg rutkowski 8 | - a fantasy style portrait painting of rachel lane / alison brie hybrid in the style of francois boucher oil painting unreal 5 daz. rpg portrait, extremely detailed artgerm greg rutkowski alphonse mucha greg hildebrandt tim hildebrandt 9 | - athena, greek goddess, claudia black, art by artgerm and greg rutkowski and magali villeneuve, bronze greek armor, owl crown, d & d, fantasy, intricate, portrait, highly detailed, headshot, digital painting, trending on artstation, concept art, sharp focus, illustration 10 | - closeup portrait shot of a large strong female biomechanic woman in a scenic scifi environment, intricate, elegant, highly detailed, centered, digital painting, artstation, concept art, smooth, sharp focus, warframe, illustration, thomas kinkade, tomasz alen kopera, peter mohrbacher, donato giancola, leyendecker, boris vallejo 11 | - ultra realistic illustration of steve urkle as the hulk, intricate, elegant, highly detailed, digital painting, artstation, concept art, smooth, sharp focus, illustration, art by artgerm and greg rutkowski and alphonse mucha 12 | I want you to write me a list of detailed prompts exactly about the idea written after IDEA. Follow the structure of the example prompts. This means a very short description of the scene, followed by modifiers divided by commas to alter the mood, style, lighting, and more. 13 | """ 14 | 15 | Long_prompt = "Long prompt version should consist of 3 to 5 sentences. Long prompt version must sepcify the color, shape, texture or spatial relation of the included objects. DO NOT generate sentences that describe any atmosphere!!! The language of reply is English only!!!" 16 | 17 | Standardize_Prompt = """ 18 | Extract the content about Stable Diffusion style from the following input and combine it into a json array. Note that the output will be directly used in the program. 19 | Please output the standardized json content. 20 | """ 21 | 22 | 23 | GPT4VImageCaptioner_System = """ 24 | As an AI image tagging expert, please provide precise tags for these images to enhance CLIP model's understanding of the content. 25 | """ 26 | 27 | # 来自https://github.com/jiayev/GPT4V-Image-Captioner 28 | GPT4VImageCaptioner_Prompt = """ 29 | Employ succinct keywords or phrases or sentence, steering clear of elaborate sentences and extraneous conjunctions. 30 | Prioritize the tags by relevance. 31 | Your tags should capture key elements such as the main subject, setting, artistic style, composition, image quality, color tone, filter, and camera specifications, and any other tags crucial for the image. 32 | When tagging photos of people, include specific details like gender, nationality, attire, actions, pose, expressions, accessories, makeup, composition type, age, etc. 33 | For other image categories, apply appropriate and common descriptive tags as well. 34 | Recognize and tag any celebrities, well-known landmark or IPs if clearly featured in the image. 35 | Your tags should be accurate, non-duplicative, and within a 20-75 word count range. 36 | These tags will use for image re-creation, so the closer the resemblance to the original image, the better the tag quality. 37 | Tags should be comma-separated. 38 | """ 39 | 40 | 41 | M_ImageCaptioner_System = """ 42 | Long prompt version should consist of 3 to 5 sentences. Long prompt version must sepcify the color, shape, texture or spatial relation of the included objects. DO NOT generate sentences that describe any atmosphere!!! 43 | """ 44 | 45 | M_ImageCaptioner_Prompt = """ 46 | Describe this image in detail please. 47 | The language of reply is English only!!! 48 | Starts with "In the image," 49 | """ 50 | 51 | 52 | M_ImageCaptioner2_System = """ 53 | You are an assistant who perfectly describes images. 54 | """ 55 | 56 | M_ImageCaptioner2_Prompt = """ 57 | Describe this image in detail please. 58 | The language of reply is English only!!! 59 | Starts with "In the image," 60 | """ 61 | 62 | 63 | ImageCaptionerPostProcessing_System = """ 64 | I want you to write me a detailed list of tips for Content. 65 | Write a very short description of the scene and put it in the 'short_describes' field 66 | Write complete [moods, styles, lights, elements, objects] of the word array and put it in the '$_tags' field 67 | Don't include anything that isn't in Content. 68 | The language of reply is English only!!! 69 | """ 70 | -------------------------------------------------------------------------------- /mz_transformers.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import shutil 4 | import subprocess 5 | import traceback 6 | 7 | 8 | def transformers_captioner(args_dict, myfunc): 9 | from . import mz_prompt_utils 10 | import PIL.Image as Image 11 | captioner_config = args_dict.get("captioner_config", {}) 12 | directory = captioner_config.get("directory", None) 13 | force_update = captioner_config.get("force_update", False) 14 | caption_suffix = captioner_config.get("caption_suffix", "") 15 | retry_keyword = captioner_config.get("retry_keyword", "") 16 | batch_size = captioner_config.get("batch_size", 1) 17 | retry_keywords = retry_keyword.split(",") 18 | 19 | retry_keywords = [k.strip() for k in retry_keywords] 20 | retry_keywords = [k for k in retry_keywords if k != ""] 21 | 22 | pre_images = [] 23 | # print("directory:", directory) 24 | for root, dirs, files in os.walk(directory): 25 | for file in files: 26 | if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png") or file.endswith(".webp"): 27 | image_path = os.path.join(root, file) 28 | base_file_path = os.path.splitext(image_path)[0] 29 | caption_file = os.path.join( 30 | root, base_file_path + caption_suffix) 31 | if os.path.exists(caption_file) and force_update is False: 32 | continue 33 | 34 | pre_images.append({ 35 | "image_path": image_path, 36 | "caption_path": caption_file 37 | }) 38 | 39 | result = [] 40 | 41 | # print(f"Total images: {len(pre_images)} : {json.dumps(pre_images, indent=4)}") 42 | print(f"Total images: {len(pre_images)}") 43 | 44 | pb = mz_prompt_utils.Utils.progress_bar(len(pre_images)) 45 | images_batch = [] 46 | for i in range(len(pre_images)): 47 | # print(f"Processing image {i+1}/{len(pre_images)}") 48 | try: 49 | pre_image = pre_images[i] 50 | image_path = pre_image["image_path"] 51 | caption_file = pre_image["caption_path"] 52 | 53 | onec_args_dict = args_dict.copy() 54 | del onec_args_dict["captioner_config"] 55 | 56 | pil_image = Image.open(image_path) 57 | images_batch.append({ 58 | "image_path": image_path, 59 | "pil_image": pil_image 60 | }) 61 | 62 | if len(images_batch) < batch_size: 63 | if i < len(pre_images) - 1: 64 | continue 65 | 66 | if i < len(pre_images) - 1: 67 | onec_args_dict["keep_device"] = True 68 | 69 | pil_images = [] 70 | for j in range(len(images_batch)): 71 | pil_images.append(images_batch[j]["pil_image"]) 72 | 73 | # onec_args_dict["image"] = mz_prompt_utils.Utils.pil2tensor( 74 | # pil_image) 75 | 76 | thumbnail = Image.new( 77 | "RGB", (images_batch[0]["pil_image"].width * batch_size, images_batch[0]["pil_image"].height)) 78 | 79 | for j in range(len(images_batch)): 80 | pil_image = images_batch[j]["pil_image"] 81 | thumbnail.paste(pil_image, (j * pil_image.width, 0)) 82 | 83 | pb.update( 84 | i, 85 | len(pre_images), 86 | # 转RGB 87 | thumbnail, 88 | ) 89 | onec_args_dict["images"] = pil_images 90 | onec_args_dict["captioner_mode"] = True 91 | 92 | responses = myfunc(onec_args_dict) 93 | # print(f"responses: {responses}") 94 | for j in range(len(images_batch)): 95 | item = images_batch[j] 96 | image_path = item["image_path"] 97 | caption_file = os.path.join( 98 | os.path.dirname(image_path), os.path.splitext(image_path)[0] + caption_suffix) 99 | response = responses[j] 100 | response = response.strip() 101 | 102 | print(f"==={image_path}===") 103 | print(image_path) 104 | print(response) 105 | print("") 106 | print("") 107 | 108 | if response != "": 109 | with open(caption_file, "w") as f: 110 | prompt_fixed_beginning = captioner_config.get( 111 | "prompt_fixed_beginning", "") 112 | f.write(prompt_fixed_beginning + response) 113 | 114 | result.append(response) 115 | 116 | images_batch = [] 117 | except Exception as e: 118 | print( 119 | f"For image {image_path}, error: {e} , stack: {traceback.format_exc()}") 120 | return result 121 | 122 | 123 | def florence2_node_encode(args_dict): 124 | args_dict = args_dict.copy() 125 | captioner_config = args_dict.get("captioner_config", None) 126 | if captioner_config is not None: 127 | transformers_captioner(args_dict, florence2_node_encode) 128 | # raise Exception( 129 | # "图片批量反推任务已完成 ; Image batch reverse push task completed") 130 | return {"ui": {"string": ["图片批量反推任务已完成 ; Image batch reverse push task completed",]}, "result": ("", None)} 131 | 132 | import torch 133 | import folder_paths 134 | from . import mz_prompt_utils 135 | from .mz_prompt_utils import Utils 136 | 137 | florence2_large_files_map = { 138 | "Florence-2-large": [ 139 | { 140 | "file_path": "pytorch_model.bin", 141 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=pytorch_model.bin" 142 | }, 143 | 144 | # tokenizer.json 145 | { 146 | "file_path": "tokenizer.json", 147 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=tokenizer.json" 148 | }, 149 | # tokenizer_config.json 150 | { 151 | "file_path": "tokenizer_config.json", 152 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=tokenizer_config.json" 153 | }, 154 | # vocab.json 155 | { 156 | "file_path": "vocab.json", 157 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=vocab.json" 158 | } 159 | ], 160 | "Florence-2-large-ft": [ 161 | { 162 | "file_path": "pytorch_model.bin", 163 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=pytorch_model.bin" 164 | }, 165 | # tokenizer.json 166 | { 167 | "file_path": "tokenizer.json", 168 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=tokenizer.json" 169 | }, 170 | # tokenizer_config.json 171 | { 172 | "file_path": "tokenizer_config.json", 173 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=tokenizer_config.json" 174 | }, 175 | # vocab.json 176 | { 177 | "file_path": "vocab.json", 178 | "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=vocab.json" 179 | } 180 | ], 181 | } 182 | 183 | llm_path = os.path.join( 184 | folder_paths.models_dir, 185 | "LLM", 186 | ) 187 | os.makedirs(llm_path, exist_ok=True) 188 | 189 | model_name = args_dict.get("model_name", "Florence-2-large") 190 | 191 | model_path = os.path.join(llm_path, model_name) 192 | 193 | if not os.path.exists(model_path): 194 | # GIT_LFS_SKIP_SMUDGE=1 git clone https://www.modelscope.cn/AI-ModelScope/Florence-2-large.git 195 | original_env = os.environ.get("GIT_LFS_SKIP_SMUDGE") 196 | os.environ["GIT_LFS_SKIP_SMUDGE"] = "1" 197 | subprocess.run( 198 | ["git", "clone", "https://www.modelscope.cn/AI-ModelScope/Florence-2-large.git", model_path]) 199 | if original_env is not None: 200 | os.environ["GIT_LFS_SKIP_SMUDGE"] = original_env 201 | 202 | florence2_large_files = florence2_large_files_map.get(model_name, []) 203 | for file_info in florence2_large_files: 204 | file_path = os.path.join(model_path, file_info["file_path"]) 205 | # 判断文件大小小于1M 206 | if not os.path.exists(file_path) or os.path.getsize(file_path) < 1024 * 1024: 207 | Utils.download_file(file_info["url"], file_path) 208 | 209 | # with open(os.path.join(os.path.dirname(__file__), "hook", "modeling_florence2.py"), "r") as f: 210 | # code = f.read() 211 | shutil.copyfile( 212 | os.path.join(os.path.dirname(__file__), 213 | "hook", "modeling_florence2.py"), 214 | os.path.join(model_path, "modeling_florence2.py") 215 | ) 216 | 217 | from transformers import AutoProcessor, AutoModelForCausalLM 218 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 219 | model = Utils.cache_get(f"florence_model_and_opt_") 220 | if model is None: 221 | model = AutoModelForCausalLM.from_pretrained( 222 | model_path, 223 | local_files_only=True, 224 | trust_remote_code=True, 225 | ) 226 | model.to(device).eval() 227 | Utils.cache_set(f"florence_model_and_opt_", model) 228 | 229 | processor = AutoProcessor.from_pretrained( 230 | model_path, 231 | local_files_only=True, 232 | trust_remote_code=True 233 | ) 234 | 235 | captioner_mode = args_dict.get("captioner_mode", False) 236 | if captioner_mode: 237 | pil_images = args_dict.get("images", None) 238 | _pil_images = [] 239 | for pil_image in pil_images: 240 | resolution = args_dict.get("resolution", 512) 241 | pil_image = Utils.resize_max( 242 | pil_image, resolution, resolution).convert("RGB") 243 | _pil_images.append(pil_image) 244 | pil_images = _pil_images 245 | else: 246 | tensor_image = args_dict.get("image", None) 247 | pil_image = Utils.tensor2pil(tensor_image) 248 | resolution = args_dict.get("resolution", 512) 249 | pil_image = Utils.resize_max( 250 | pil_image, resolution, resolution).convert("RGB") 251 | pil_images = [pil_image] 252 | 253 | prompt = "" 254 | prompts = [prompt for _ in pil_images] 255 | inputs = processor(text=prompts, images=pil_images, return_tensors="pt") 256 | generated_ids = model.generate( 257 | input_ids=inputs["input_ids"].to(device), 258 | pixel_values=inputs["pixel_values"].to(device), 259 | max_new_tokens=1024, 260 | num_beams=3, 261 | do_sample=False 262 | ) 263 | 264 | generated_texts = processor.batch_decode( 265 | generated_ids, skip_special_tokens=True) 266 | 267 | pil_image = pil_images[0] 268 | parsed_answers = [] 269 | for i in range(len(generated_texts)): 270 | generated_text = generated_texts[i] 271 | parsed_answer = processor.post_process_generation( 272 | generated_text, 273 | task=prompt, 274 | image_size=(pil_image.width, pil_image.height)) 275 | parsed_answers.append(parsed_answer) 276 | 277 | response = [] 278 | for i in range(len(parsed_answers)): 279 | response.append(parsed_answers[i].get(prompt)) 280 | 281 | keep_device = args_dict.get("keep_device", False) 282 | if not keep_device: 283 | model.cpu() 284 | del model 285 | torch.cuda.empty_cache() 286 | Utils.cache_set(f"florence_model_and_opt_", None) 287 | 288 | if captioner_mode: 289 | return response 290 | else: 291 | response = response[0] 292 | 293 | conditionings = None 294 | clip = args_dict.get("clip", None) 295 | if clip is not None: 296 | conditionings = Utils.a1111_clip_text_encode( 297 | clip, response, ) 298 | 299 | return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)} 300 | 301 | 302 | def paligemma_node_encode(args_dict): 303 | args_dict = args_dict.copy() 304 | captioner_config = args_dict.get("captioner_config", None) 305 | if captioner_config is not None: 306 | transformers_captioner(args_dict, paligemma_node_encode) 307 | # raise Exception( 308 | # "图片批量反推任务已完成 ; Image batch reverse push task completed") 309 | return {"ui": {"string": ["图片批量反推任务已完成 ; Image batch reverse push task completed",]}, "result": ("", None)} 310 | 311 | import torch 312 | import folder_paths 313 | from . import mz_prompt_utils 314 | from .mz_prompt_utils import Utils 315 | 316 | paligemma_files_map = { 317 | "common": [ 318 | 319 | { 320 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fadded_tokens.json", 321 | "file_path": "added_tokens.json" 322 | }, 323 | { 324 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fconfig.json", 325 | "file_path": "config.json" 326 | }, 327 | { 328 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fgeneration_config.json", 329 | "file_path": "generation_config.json" 330 | }, 331 | { 332 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fpreprocessor_config.json", 333 | "file_path": "preprocessor_config.json" 334 | }, 335 | { 336 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fspecial_tokens_map.json", 337 | "file_path": "special_tokens_map.json" 338 | }, 339 | { 340 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Ftokenizer.json", 341 | "file_path": "tokenizer.json" 342 | }, 343 | { 344 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Ftokenizer.model", 345 | "file_path": "tokenizer.model" 346 | }, 347 | { 348 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Ftokenizer_config.json", 349 | "file_path": "tokenizer_config.json" 350 | }, 351 | ], 352 | "paligemma-sd3-long-captioner": [ 353 | { 354 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fmodel.safetensors", 355 | "file_path": "model.safetensors" 356 | }, 357 | ], 358 | "paligemma-sd3-long-captioner-v2": [ 359 | { 360 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-v2-merge%2Fmodel.safetensors", 361 | "file_path": "model.safetensors" 362 | }, 363 | ], 364 | "paligemma-sdxl-long-captioner": [ 365 | { 366 | "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sdxl-long-captioner-merge%2Fmodel.safetensors", 367 | "file_path": "model.safetensors" 368 | }, 369 | ], 370 | } 371 | 372 | llm_path = os.path.join( 373 | folder_paths.models_dir, 374 | "LLM", 375 | ) 376 | os.makedirs(llm_path, exist_ok=True) 377 | 378 | model_name = args_dict.get("model_name") 379 | 380 | model_path = os.path.join(llm_path, model_name) 381 | 382 | common_files = paligemma_files_map.get("common", []) 383 | for file_info in common_files: 384 | file_path = os.path.join(model_path, file_info["file_path"]) 385 | if not os.path.exists(file_path): 386 | Utils.download_file(file_info["url"], file_path) 387 | 388 | paligemma_files = paligemma_files_map.get(model_name, []) 389 | for file_info in paligemma_files: 390 | file_path = os.path.join(model_path, file_info["file_path"]) 391 | 392 | if not os.path.exists(file_path): 393 | Utils.download_file(file_info["url"], file_path) 394 | 395 | from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor 396 | device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 397 | model = Utils.cache_get(f"paligemma_model_and_opt_") 398 | if model is None: 399 | model = PaliGemmaForConditionalGeneration.from_pretrained( 400 | model_path, 401 | local_files_only=True, 402 | torch_dtype=torch.float16, 403 | ) 404 | model.to(device).eval() 405 | Utils.cache_set(f"paligemma_model_and_opt_", model) 406 | 407 | processor = PaliGemmaProcessor.from_pretrained( 408 | model_path, 409 | local_files_only=True, 410 | ) 411 | 412 | 413 | captioner_mode = args_dict.get("captioner_mode", False) 414 | if captioner_mode: 415 | pil_images = args_dict.get("images", None) 416 | _pil_images = [] 417 | for pil_image in pil_images: 418 | resolution = args_dict.get("resolution", 512) 419 | pil_image = Utils.resize_max( 420 | pil_image, resolution, resolution).convert("RGB") 421 | _pil_images.append(pil_image) 422 | pil_images = _pil_images 423 | pil_image = pil_images[0] 424 | else: 425 | tensor_image = args_dict.get("image", None) 426 | pil_image = Utils.tensor2pil(tensor_image) 427 | resolution = args_dict.get("resolution", 512) 428 | pil_image = Utils.resize_max( 429 | pil_image, resolution, resolution).convert("RGB") 430 | pil_images = [pil_image] 431 | pil_image = pil_images[0] 432 | 433 | 434 | # prefix 435 | prompt = "caption en" 436 | model_inputs = processor( 437 | text=prompt, images=pil_image, return_tensors="pt").to('cuda') 438 | input_len = model_inputs["input_ids"].shape[-1] 439 | 440 | def modify_caption(caption: str) -> str: 441 | """ 442 | Removes specific prefixes from captions. 443 | Args: 444 | caption (str): A string containing a caption. 445 | Returns: 446 | str: The caption with the prefix removed if it was present. 447 | """ 448 | # Define the prefixes to remove 449 | import re 450 | prefix_substrings = [ 451 | ('captured from ', ''), 452 | ('captured at ', '') 453 | ] 454 | 455 | # Create a regex pattern to match any of the prefixes 456 | pattern = '|'.join([re.escape(opening) 457 | for opening, _ in prefix_substrings]) 458 | replacers = {opening: replacer for opening, 459 | replacer in prefix_substrings} 460 | 461 | # Function to replace matched prefix with its corresponding replacement 462 | def replace_fn(match): 463 | return replacers[match.group(0)] 464 | 465 | # Apply the regex to the caption 466 | return re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE) 467 | 468 | with torch.inference_mode(): 469 | generation = model.generate( 470 | **model_inputs, max_new_tokens=256, do_sample=False) 471 | generation = generation[0][input_len:] 472 | decoded = processor.decode(generation, skip_special_tokens=True) 473 | 474 | modified_caption = modify_caption(decoded) 475 | # print(modified_caption) 476 | 477 | response = modified_caption 478 | 479 | keep_device = args_dict.get("keep_device", False) 480 | if not keep_device: 481 | model.cpu() 482 | del model 483 | torch.cuda.empty_cache() 484 | Utils.cache_set(f"paligemma_model_and_opt_", None) 485 | 486 | if captioner_mode: 487 | return [response] 488 | else: 489 | response = response[0] 490 | conditionings = None 491 | clip = args_dict.get("clip", None) 492 | if clip is not None: 493 | conditionings = Utils.a1111_clip_text_encode( 494 | clip, response, ) 495 | 496 | return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)} 497 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "comfyui-prompt-mz" 3 | description = "Use llama.cpp to help generate some nodes for prompt word related work" 4 | version = "1.1.0" 5 | license = "LICENSE" 6 | 7 | [project.urls] 8 | Repository = "https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ" 9 | # Used by Comfy Registry https://comfyregistry.org 10 | 11 | [tool.comfy] 12 | PublisherId = "wailovet" 13 | DisplayName = "ComfyUI-Prompt-MZ" 14 | Icon = "" 15 | -------------------------------------------------------------------------------- /v1/init.py: -------------------------------------------------------------------------------- 1 | from ..mz_prompt_utils import Utils 2 | NODE_CLASS_MAPPINGS = { 3 | } 4 | 5 | 6 | NODE_DISPLAY_NAME_MAPPINGS = { 7 | } 8 | 9 | 10 | import importlib 11 | from . import mz_llama3 12 | from . import mz_phi3 13 | from .. import mz_llama_cpp 14 | from .. import mz_llama_core_nodes 15 | from . import mz_deprecated 16 | from . import mz_llava 17 | 18 | 19 | AUTHOR_NAME = u"MinusZone" 20 | CATEGORY_NAME = f"{AUTHOR_NAME} - Prompt/v1" 21 | 22 | 23 | def getCommonCLIPTextEncodeInput(): 24 | style_presets = mz_llama_core_nodes.get_style_presets() 25 | CommonCLIPTextEncodeInput = { 26 | "required": { 27 | "prompt_version": (["v1"], {"default": "v1"}), 28 | "style_presets": ( 29 | style_presets, {"default": style_presets[1]} 30 | ), 31 | "text": ("STRING", {"multiline": True, }), 32 | "keep_device": ([False, True], {"default": False}), 33 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 34 | }, 35 | "optional": { 36 | "clip": ("CLIP", ), 37 | "llama_cpp_options": ("LLamaCPPOptions", ), 38 | "customize_instruct": ("CustomizeInstruct", ), 39 | # "customize_json_schema": ("STRING", ), 40 | } 41 | } 42 | 43 | return CommonCLIPTextEncodeInput 44 | 45 | 46 | class MZ_LLama3CLIPTextEncode: 47 | @classmethod 48 | def INPUT_TYPES(s): 49 | m_models = mz_llama3.llama3_models.copy() 50 | for i in range(len(m_models)): 51 | if mz_llama3.get_exist_model(m_models[i]) is not None: 52 | m_models[i] += "[downloaded]" 53 | 54 | result = { 55 | "required": { 56 | "llama_cpp_model": (m_models, {"default": m_models[0]}), 57 | "download_source": ( 58 | ["none", "modelscope", "hf-mirror.com",], 59 | {"default": "none"} 60 | ), 61 | }, 62 | "optional": {}, 63 | } 64 | 65 | common_input = getCommonCLIPTextEncodeInput() 66 | for key in common_input["required"]: 67 | result["required"][key] = common_input["required"][key] 68 | for key in common_input["optional"]: 69 | result["optional"][key] = common_input["optional"][key] 70 | 71 | return result 72 | 73 | RETURN_TYPES = ("STRING", "CONDITIONING",) 74 | RETURN_NAMES = ("text", "conditioning",) 75 | OUTPUT_NODE = True 76 | FUNCTION = "encode" 77 | CATEGORY = CATEGORY_NAME 78 | 79 | def encode(self, **kwargs): 80 | 81 | kwargs["llama_cpp_model"] = kwargs.get( 82 | "llama_cpp_model", "").replace("[downloaded]", "") 83 | 84 | text = mz_llama3.query_beautify_prompt_text(kwargs) 85 | conditionings = None 86 | clip = kwargs.get("clip", None) 87 | if clip is not None: 88 | conditionings = Utils.a1111_clip_text_encode(clip, text, ) 89 | 90 | return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)} 91 | 92 | 93 | NODE_CLASS_MAPPINGS["MZ_LLama3CLIPTextEncode"] = MZ_LLama3CLIPTextEncode 94 | NODE_DISPLAY_NAME_MAPPINGS[ 95 | "MZ_LLama3CLIPTextEncode"] = f"{AUTHOR_NAME} - deprecated - CLIPTextEncode(LLama3)" 96 | 97 | 98 | class MZ_Phi3CLIPTextEncode: 99 | @classmethod 100 | def INPUT_TYPES(s): 101 | m_models = mz_phi3.phi3_models.copy() 102 | for i in range(len(m_models)): 103 | if mz_llama3.get_exist_model(m_models[i]) is not None: 104 | m_models[i] += "[downloaded]" 105 | 106 | importlib.reload(mz_phi3) 107 | 108 | result = { 109 | "required": { 110 | "llama_cpp_model": (m_models, {"default": m_models[0]}), 111 | "download_source": ( 112 | ["none", "modelscope", "hf-mirror.com",], 113 | {"default": "none"} 114 | ), 115 | }, 116 | "optional": {}, 117 | } 118 | 119 | common_input = getCommonCLIPTextEncodeInput() 120 | for key in common_input["required"]: 121 | result["required"][key] = common_input["required"][key] 122 | for key in common_input["optional"]: 123 | result["optional"][key] = common_input["optional"][key] 124 | 125 | return result 126 | 127 | RETURN_TYPES = ("STRING", "CONDITIONING",) 128 | RETURN_NAMES = ("text", "conditioning",) 129 | OUTPUT_NODE = True 130 | FUNCTION = "encode" 131 | CATEGORY = CATEGORY_NAME 132 | 133 | def encode(self, **kwargs): 134 | kwargs = kwargs.copy() 135 | 136 | importlib.reload(mz_llama3) 137 | 138 | kwargs["llama_cpp_model"] = kwargs.get( 139 | "llama_cpp_model", "").replace("[downloaded]", "") 140 | 141 | text = mz_phi3.query_beautify_prompt_text(kwargs) 142 | conditionings = None 143 | clip = kwargs.get("clip", None) 144 | if clip is not None: 145 | conditionings = Utils.a1111_clip_text_encode(clip, text, ) 146 | 147 | return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)} 148 | 149 | 150 | NODE_CLASS_MAPPINGS["MZ_Phi3CLIPTextEncode"] = MZ_Phi3CLIPTextEncode 151 | NODE_DISPLAY_NAME_MAPPINGS[ 152 | "MZ_Phi3CLIPTextEncode"] = f"{AUTHOR_NAME} - deprecated - CLIPTextEncode(Phi3)" 153 | 154 | 155 | class MZ_BaseLLamaCPPCLIPTextEncode: 156 | @classmethod 157 | def INPUT_TYPES(s): 158 | importlib.reload(mz_llama_cpp) 159 | 160 | result = { 161 | "required": { 162 | "llama_cpp_model": ("STRING", {"default": "", "placeholder": "model_path"}), 163 | }, 164 | "optional": { 165 | }, 166 | } 167 | 168 | common_input = getCommonCLIPTextEncodeInput() 169 | for key in common_input["required"]: 170 | result["required"][key] = common_input["required"][key] 171 | for key in common_input["optional"]: 172 | result["optional"][key] = common_input["optional"][key] 173 | 174 | return result 175 | RETURN_TYPES = ("STRING", "CONDITIONING",) 176 | RETURN_NAMES = ("text", "conditioning",) 177 | OUTPUT_NODE = True 178 | FUNCTION = "encode" 179 | CATEGORY = CATEGORY_NAME 180 | 181 | def encode(self, **kwargs): 182 | kwargs = kwargs.copy() 183 | 184 | kwargs["llama_cpp_model"] = kwargs.get( 185 | "llama_cpp_model", "").replace("[downloaded]", "") 186 | text = mz_deprecated.base_query_beautify_prompt_text(kwargs) 187 | conditionings = None 188 | clip = kwargs.get("clip", None) 189 | if clip is not None: 190 | conditionings = Utils.a1111_clip_text_encode(clip, text, ) 191 | 192 | return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)} 193 | 194 | 195 | NODE_CLASS_MAPPINGS["MZ_BaseLLamaCPPCLIPTextEncode"] = MZ_BaseLLamaCPPCLIPTextEncode 196 | NODE_DISPLAY_NAME_MAPPINGS[ 197 | "MZ_BaseLLamaCPPCLIPTextEncode"] = f"{AUTHOR_NAME} - deprecated - CLIPTextEncode(BaseLLamaCPP)" 198 | 199 | 200 | class MZ_LLavaImageInterrogator: 201 | @classmethod 202 | def INPUT_TYPES(s): 203 | importlib.reload(mz_llava) 204 | m_llava_models = mz_llava.LLava_models.copy() 205 | for i in range(len(m_llava_models)): 206 | if mz_llava.get_exist_model(m_llava_models[i]) is not None: 207 | m_llava_models[i] += "[downloaded]" 208 | 209 | m_llava_mmproj_models = mz_llava.LLava_mmproj_models.copy() 210 | for i in range(len(m_llava_mmproj_models)): 211 | if mz_llava.get_exist_model(m_llava_mmproj_models[i]) is not None: 212 | m_llava_mmproj_models[i] += "[downloaded]" 213 | 214 | return { 215 | "required": { 216 | "llama_cpp_model": (m_llava_models, {"default": m_llava_models[0]}), 217 | "mmproj_model": (m_llava_mmproj_models, {"default": m_llava_mmproj_models[0]}), 218 | "download_source": ( 219 | [ 220 | "none", 221 | "modelscope", 222 | "hf-mirror.com", 223 | ], 224 | {"default": "none"} 225 | ), 226 | "resolution": ("INT", {"default": 512, "min": 128, "max": 2048}), 227 | "sd_format": (["none", "v1", ], {"default": "none"}), 228 | "keep_device": ([False, True], {"default": False}), 229 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 230 | }, 231 | "optional": { 232 | "image": ("IMAGE",), 233 | "clip": ("CLIP", ), 234 | "llama_cpp_options": ("LLamaCPPOptions", ), 235 | "customize_instruct": ("CustomizeInstruct", ), 236 | "captioner_config": ("ImageCaptionerConfig", ), 237 | }, 238 | } 239 | RETURN_TYPES = ("STRING", "CONDITIONING",) 240 | RETURN_NAMES = ("text", "conditioning",) 241 | OUTPUT_NODE = True 242 | FUNCTION = "interrogate" 243 | CATEGORY = CATEGORY_NAME 244 | 245 | def interrogate(self, **kwargs): 246 | kwargs = kwargs.copy() 247 | 248 | kwargs["llama_cpp_model"] = kwargs.get( 249 | "llama_cpp_model", "").replace("[downloaded]", "") 250 | kwargs["mmproj_model"] = kwargs.get( 251 | "mmproj_model", "").replace("[downloaded]", "") 252 | 253 | if kwargs.get("image", None) is not None: 254 | kwargs["image"] = Utils.tensor2pil(kwargs["image"]) 255 | else: 256 | kwargs["image"] = None 257 | 258 | text = mz_llava.image_interrogator(kwargs) 259 | conditionings = None 260 | clip = kwargs.get("clip", None) 261 | if clip is not None: 262 | conditionings = Utils.a1111_clip_text_encode(clip, text, ) 263 | 264 | return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)} 265 | 266 | 267 | NODE_CLASS_MAPPINGS["MZ_LLavaImageInterrogator"] = MZ_LLavaImageInterrogator 268 | NODE_DISPLAY_NAME_MAPPINGS[ 269 | "MZ_LLavaImageInterrogator"] = f"{AUTHOR_NAME} - deprecated - ImageInterrogator(LLava)" 270 | 271 | 272 | class MZ_BaseLLavaImageInterrogator: 273 | @classmethod 274 | def INPUT_TYPES(s): 275 | return { 276 | "required": { 277 | "llama_cpp_model": ("STRING", {"default": ""}), 278 | "mmproj_model": ("STRING", {"default": ""}), 279 | "resolution": ("INT", {"default": 512, "min": 128, "max": 2048}), 280 | "sd_format": (["none", "v1"], {"default": "none"}), 281 | "keep_device": ([False, True], {"default": False}), 282 | "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}), 283 | }, 284 | "optional": { 285 | "image": ("IMAGE",), 286 | "clip": ("CLIP", ), 287 | "llama_cpp_options": ("LLamaCPPOptions", ), 288 | "customize_instruct": ("CustomizeInstruct", ), 289 | "captioner_config": ("ImageCaptionerConfig", ), 290 | }, 291 | } 292 | RETURN_TYPES = ("STRING", "CONDITIONING",) 293 | RETURN_NAMES = ("text", "conditioning",) 294 | OUTPUT_NODE = True 295 | FUNCTION = "interrogate" 296 | CATEGORY = CATEGORY_NAME 297 | 298 | def interrogate(self, **kwargs): 299 | kwargs = kwargs.copy() 300 | 301 | importlib.reload(mz_llava) 302 | 303 | if kwargs.get("image", None) is not None: 304 | kwargs["image"] = Utils.tensor2pil(kwargs["image"]) 305 | else: 306 | kwargs["image"] = None 307 | 308 | text = mz_llava.base_image_interrogator(kwargs) 309 | conditionings = None 310 | clip = kwargs.get("clip", None) 311 | if clip is not None: 312 | conditionings = Utils.a1111_clip_text_encode(clip, text, ) 313 | 314 | return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)} 315 | 316 | 317 | NODE_CLASS_MAPPINGS["MZ_BaseLLavaImageInterrogator"] = MZ_BaseLLavaImageInterrogator 318 | NODE_DISPLAY_NAME_MAPPINGS[ 319 | "MZ_BaseLLavaImageInterrogator"] = f"{AUTHOR_NAME} - deprecated - ImageInterrogator(BaseLLava)" 320 | -------------------------------------------------------------------------------- /v1/mz_deprecated.py: -------------------------------------------------------------------------------- 1 | from ..mz_prompt_utils import Utils 2 | from ..mz_llama_cpp import * 3 | from ..mz_llama_core_nodes import * 4 | from ..mz_prompts import * 5 | 6 | def base_query_beautify_prompt_text(args_dict): 7 | model_file = args_dict.get("llama_cpp_model", "") 8 | text = args_dict.get("text", "") 9 | style_presets = args_dict.get("style_presets", "") 10 | options = args_dict.get("llama_cpp_options", {}) 11 | keep_device = args_dict.get("keep_device", False) 12 | seed = args_dict.get("seed", -1) 13 | options["seed"] = seed 14 | 15 | 16 | customize_instruct = args_dict.get("customize_instruct", None) 17 | Utils.print_log( 18 | f"customize_instruct: {customize_instruct}") 19 | try: 20 | schema = None 21 | if customize_instruct is None: 22 | schema = get_schema_obj( 23 | keys_type={ 24 | "description": get_schema_base_type("string"), 25 | "long_prompt": get_schema_base_type("string"), 26 | "main_color_word": get_schema_base_type("string"), 27 | "camera_angle_word": get_schema_base_type("string"), 28 | "style_words": get_schema_array("string"), 29 | "subject_words": get_schema_array("string"), 30 | "light_words": get_schema_array("string"), 31 | "environment_words": get_schema_array("string"), 32 | }, 33 | required=[ 34 | "description", 35 | "long_prompt", 36 | "main_color_word", 37 | "camera_angle_word", 38 | "style_words", 39 | "subject_words", 40 | "light_words", 41 | "environment_words", 42 | ] 43 | ) 44 | 45 | question = f"IDEA: {style_presets},{text}" 46 | if style_presets == "none": 47 | question = f"IDEA: {text}" 48 | 49 | system_prompt = mz_prompts.Beautify_Prompt + mz_prompts.Long_prompt + "\n" 50 | 51 | else: 52 | 53 | system_prompt = customize_instruct.get("system", "") 54 | question = customize_instruct.get("instruct", "%text%") 55 | 56 | system_prompt = system_prompt.replace("%text%", text) 57 | question = question.replace("%text%", text) 58 | 59 | Utils.print_log(f"system_prompt: {system_prompt}") 60 | Utils.print_log(f"question: {question}") 61 | 62 | if schema is not None: 63 | response_json = llama_cpp_simple_interrogator_to_json( 64 | model_file=model_file, 65 | system=system_prompt, 66 | question=question, 67 | schema=schema, 68 | options=options, 69 | ) 70 | Utils.print_log(f"response_json: {response_json}") 71 | 72 | response = json.loads(response_json) 73 | full_responses = [] 74 | 75 | if response["description"] != "": 76 | full_responses.append(f"({response['description']})") 77 | if response["long_prompt"] != "": 78 | full_responses.append(f"({response['long_prompt']})") 79 | if response["main_color_word"] != "": 80 | full_responses.append(f"({response['main_color_word']})") 81 | if response["camera_angle_word"] != "": 82 | full_responses.append(f"({response['camera_angle_word']})") 83 | 84 | response["style_words"] = [ 85 | x for x in response["style_words"] if x != ""] 86 | if len(response["style_words"]) > 0: 87 | full_responses.append( 88 | f"({', '.join(response['style_words'])})") 89 | 90 | response["subject_words"] = [ 91 | x for x in response["subject_words"] if x != ""] 92 | if len(response["subject_words"]) > 0: 93 | full_responses.append( 94 | f"({', '.join(response['subject_words'])})") 95 | 96 | response["light_words"] = [ 97 | x for x in response["light_words"] if x != ""] 98 | if len(response["light_words"]) > 0: 99 | full_responses.append( 100 | f"({', '.join(response['light_words'])})") 101 | 102 | response["environment_words"] = [ 103 | x for x in response["environment_words"] if x != ""] 104 | if len(response["environment_words"]) > 0: 105 | full_responses.append( 106 | f"({', '.join(response['environment_words'])})") 107 | 108 | full_response = ", ".join(full_responses) 109 | else: 110 | full_response = llama_cpp_simple_interrogator( 111 | model_file=model_file, 112 | system=system_prompt, 113 | question=question, 114 | options=options, 115 | ) 116 | 117 | start_str = customize_instruct.get("start_str", "") 118 | if start_str != "" and full_response.find(start_str) != -1: 119 | full_response_list = full_response.split(start_str) 120 | # 删除第一个元素 121 | full_response_list.pop(0) 122 | full_response = start_str.join(full_response_list) 123 | 124 | end_str = customize_instruct.get("end_str", "") 125 | if end_str != "" and full_response.find(end_str) != -1: 126 | full_response_list = full_response.split(end_str) 127 | # 删除最后一个元素 128 | full_response_list.pop() 129 | full_response = end_str.join(full_response_list) 130 | 131 | if keep_device is False: 132 | freed_gpu_memory(model_file=model_file) 133 | 134 | # 去除换行 135 | while full_response.find("\n") != -1: 136 | full_response = full_response.replace("\n", " ") 137 | 138 | # 句号换成逗号 139 | while full_response.find(".") != -1: 140 | full_response = full_response.replace(".", ",") 141 | 142 | # 去除多余逗号 143 | while full_response.find(",,") != -1: 144 | full_response = full_response.replace(",,", ",") 145 | while full_response.find(", ,") != -1: 146 | full_response = full_response.replace(", ,", ",") 147 | 148 | full_response = Utils.prompt_zh_to_en(full_response) 149 | 150 | style_presets_prompt_text = style_presets_prompt.get(style_presets, "") 151 | 152 | if style_presets_prompt_text != "": 153 | full_response = f"{style_presets_prompt_text}, {full_response}" 154 | 155 | return full_response 156 | 157 | except Exception as e: 158 | freed_gpu_memory(model_file=model_file) 159 | # mz_utils.Utils.print_log(f"Error in auto_prompt_text: {e}") 160 | raise e 161 | -------------------------------------------------------------------------------- /v1/mz_llama3.py: -------------------------------------------------------------------------------- 1 | import json 2 | from .. import mz_prompt_utils 3 | from .. import mz_llama_cpp 4 | from . import mz_deprecated 5 | import importlib 6 | 7 | 8 | llama3_models = [ 9 | "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", 10 | "Meta-Llama-3-8B-Instruct.Q2_K.gguf", 11 | "Meta-Llama-3-8B-Instruct.Q3_K_L.gguf", 12 | "Meta-Llama-3-8B-Instruct.Q3_K_M.gguf", 13 | "Meta-Llama-3-8B-Instruct.Q3_K_S.gguf", 14 | "Meta-Llama-3-8B-Instruct.Q4_0.gguf", 15 | "Meta-Llama-3-8B-Instruct.Q4_1.gguf", 16 | "Meta-Llama-3-8B-Instruct.Q4_K_S.gguf", 17 | "Meta-Llama-3-8B-Instruct.Q5_0.gguf", 18 | "Meta-Llama-3-8B-Instruct.Q5_1.gguf", 19 | "Meta-Llama-3-8B-Instruct.Q5_K_M.gguf", 20 | "Meta-Llama-3-8B-Instruct.Q5_K_S.gguf", 21 | "Meta-Llama-3-8B-Instruct.Q6_K.gguf", 22 | "Meta-Llama-3-8B-Instruct.Q8_0.gguf", 23 | ] 24 | 25 | 26 | def get_exist_model(model_name): 27 | modelscope_model_path = mz_prompt_utils.Utils.modelscope_download_model( 28 | model_type="llama3", 29 | model_name=model_name, 30 | only_get_path=True, 31 | ) 32 | 33 | if modelscope_model_path is not None: 34 | return modelscope_model_path 35 | 36 | model_url = f"https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/{model_name}" 37 | hf_model_path = mz_prompt_utils.Utils.hf_download_model( 38 | model_url, only_get_path=True) 39 | if hf_model_path is not None: 40 | return hf_model_path 41 | 42 | return None 43 | 44 | 45 | def query_beautify_prompt_text(args_dict): 46 | model_name = args_dict.get("llama_cpp_model", "") 47 | download_source = args_dict.get("download_source", None) 48 | 49 | try: 50 | model_file = get_exist_model(model_name) 51 | 52 | if model_file is None: 53 | if download_source == "modelscope": 54 | model_file = mz_prompt_utils.Utils.modelscope_download_model( 55 | model_type="llama3", 56 | model_name=model_name, 57 | ) 58 | else: 59 | model_url = f"https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/{model_name}" 60 | if download_source == "hf-mirror.com": 61 | model_url = f"https://hf-mirror.com/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/{model_name}" 62 | model_file = mz_prompt_utils.Utils.hf_download_model(model_url) 63 | 64 | args_dict["llama_cpp_model"] = model_file 65 | full_response = mz_deprecated.base_query_beautify_prompt_text( 66 | args_dict=args_dict) 67 | return full_response 68 | 69 | except Exception as e: 70 | mz_llama_cpp.freed_gpu_memory(model_file=model_file) 71 | # mz_utils.Utils.print_log(f"Error in auto_prompt_text: {e}") 72 | raise e 73 | -------------------------------------------------------------------------------- /v1/mz_llava.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from .. import mz_prompt_utils 4 | from .. import mz_llama_cpp 5 | from .. import mz_prompts 6 | from .. import mz_llama_core_nodes 7 | from .. import mz_prompt_webserver 8 | 9 | import importlib 10 | 11 | 12 | LLava_models = [ 13 | "llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf", 14 | "llava-v1.6-vicuna-13b-gguf/llava-v1.6-vicuna-13b.Q5_K_M.gguf", 15 | "ggml_llava-v1.5-7b/ggml-model-q4_k.gguf", 16 | "ggml_llava-v1.5-7b/ggml-model-q5_k.gguf", 17 | "ggml_llava-v1.5-7b/ggml-model-f16.gguf", 18 | "ggml_bakllava-1/ggml-model-q4_k.gguf", 19 | "ggml_bakllava-1/ggml-model-q5_k.gguf", 20 | "ggml_bakllava-1/ggml-model-f16.gguf", 21 | ] 22 | 23 | LLava_mmproj_models = [ 24 | "llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf", 25 | "llava-v1.6-vicuna-13b-gguf/mmproj-model-f16.gguf", 26 | "ggml_llava-v1.5-7b/mmproj-model-f16.gguf", 27 | "ggml_bakllava-1/mmproj-model-f16.gguf", 28 | ] 29 | 30 | 31 | huggingface_models_map = { 32 | "llava-v1.6-vicuna-13b-gguf": "cjpais", 33 | "llava-1.6-mistral-7b-gguf": "cjpais", 34 | "ggml_llava-v1.5-7b": "mys", 35 | "ggml_llava-v1.5-13b": "mys", 36 | "ggml_bakllava-1": "mys", 37 | } 38 | 39 | 40 | def get_exist_model(model_name): 41 | modelscope_model_path = mz_prompt_utils.Utils.modelscope_download_model( 42 | model_type="llava", 43 | model_name=model_name, 44 | only_get_path=True, 45 | ) 46 | 47 | if modelscope_model_path is not None: 48 | return modelscope_model_path 49 | 50 | model_name = model_name.split("?")[0] 51 | model_names = model_name.split("/") 52 | 53 | author = huggingface_models_map.get(model_names[0], None) 54 | if author is None: 55 | return False 56 | 57 | model_url = f"https://hf-mirror.com/{author}/{model_names[0]}/resolve/main/{model_names[1]}" 58 | 59 | hf_model_path = mz_prompt_utils.Utils.hf_download_model( 60 | model_url, only_get_path=True) 61 | 62 | if hf_model_path is not None: 63 | return hf_model_path 64 | 65 | return None 66 | 67 | 68 | def image_interrogator(args_dict): 69 | args_dict = args_dict.copy() 70 | 71 | captioner_config = args_dict.get("captioner_config", None) 72 | if captioner_config is not None: 73 | import PIL.Image as Image 74 | directory = captioner_config.get("directory", None) 75 | force_update = captioner_config.get("force_update", False) 76 | caption_suffix = captioner_config.get("caption_suffix", "") 77 | retry_keyword = captioner_config.get("retry_keyword", "") 78 | retry_keywords = retry_keyword.split(",") 79 | 80 | retry_keywords = [k.strip() for k in retry_keywords] 81 | retry_keywords = [k for k in retry_keywords if k != ""] 82 | 83 | pre_images = [] 84 | for root, dirs, files in os.walk(directory): 85 | for file in files: 86 | if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"): 87 | image_path = os.path.join(root, file) 88 | base_file_path = os.path.splitext(image_path)[0] 89 | caption_file = os.path.join( 90 | root, base_file_path + caption_suffix) 91 | if os.path.exists(caption_file) and force_update is False: 92 | continue 93 | 94 | pre_images.append({ 95 | "image_path": image_path, 96 | "caption_path": caption_file 97 | }) 98 | 99 | result = [] 100 | 101 | pb = mz_prompt_utils.Utils.progress_bar(len(pre_images)) 102 | for i in range(len(pre_images)): 103 | pre_image = pre_images[i] 104 | image_path = pre_image["image_path"] 105 | caption_file = pre_image["caption_path"] 106 | 107 | onec_args_dict = args_dict.copy() 108 | del onec_args_dict["captioner_config"] 109 | 110 | pil_image = Image.open(image_path) 111 | onec_args_dict["image"] = pil_image 112 | 113 | if i < len(pre_images) - 1: 114 | onec_args_dict["keep_device"] = True 115 | 116 | pb.update( 117 | i, 118 | len(pre_images), 119 | pil_image.copy(), 120 | ) 121 | 122 | response = image_interrogator(onec_args_dict) 123 | response = response.strip() 124 | is_retry = response == "" 125 | for k in retry_keywords: 126 | if response.find(k) != -1: 127 | print(f"存在需要重试的关键词 ; Retry keyword found: {k}") 128 | is_retry = True 129 | break 130 | 131 | mz_prompt_utils.Utils.print_log( 132 | "\n\nonec_args_dict: ", onec_args_dict) 133 | if is_retry: 134 | for retry_n in range(5): 135 | print(f"Retry {retry_n+1}...") 136 | onec_args_dict["seed"] = onec_args_dict["seed"] + 1 137 | response = image_interrogator(onec_args_dict) 138 | response = response.strip() 139 | is_retry = response == "" 140 | for k in retry_keywords: 141 | if response.find(k) != -1: 142 | print(f"存在需要重试的关键词 ; Retry keyword found: {k}") 143 | is_retry = True 144 | break 145 | 146 | if is_retry is False: 147 | break 148 | if is_retry: 149 | print(f"重试失败,图片被跳过 ; Retry failed") 150 | response = "" 151 | 152 | if response != "": 153 | with open(caption_file, "w") as f: 154 | f.write(response) 155 | 156 | result.append(response) 157 | 158 | # mz_prompt_webserver.show_toast_success( 159 | # f"提示词保存成功(prompt saved successfully): {caption_file}", 160 | # 1000, 161 | # ) 162 | 163 | return result 164 | 165 | model_name = args_dict.get("llama_cpp_model", "") 166 | mmproj_name = args_dict.get("mmproj_model", "") 167 | download_source = args_dict.get("download_source", None) 168 | model_file = get_exist_model(model_name) 169 | mmproj_file = get_exist_model(mmproj_name) 170 | 171 | if model_file is None or mmproj_file is None: 172 | if download_source == "modelscope": 173 | if model_file is None: 174 | model_file = mz_prompt_utils.Utils.modelscope_download_model( 175 | model_type="llava", 176 | model_name=model_name, 177 | ) 178 | if mmproj_file is None: 179 | mmproj_file = mz_prompt_utils.Utils.modelscope_download_model( 180 | model_type="llava", 181 | model_name=mmproj_name, 182 | ) 183 | else: 184 | model_name = model_name.split("?")[0] 185 | model_names = model_name.split("/") 186 | 187 | author = huggingface_models_map.get(model_names[0], None) 188 | if author is None: 189 | raise Exception( 190 | f"Model {model_names[0]} is not supported for image_to_text.") 191 | 192 | if download_source == "hf-mirror.com": 193 | model_url = f"https://hf-mirror.com/{author}/{model_names[0]}/resolve/main/{model_names[1]}" 194 | else: 195 | model_url = f"https://huggingface.co/{author}/{model_names[0]}/resolve/main/{model_names[1]}" 196 | 197 | if model_file is None: 198 | model_file = mz_prompt_utils.Utils.hf_download_model(model_url) 199 | 200 | mmproj_name = mmproj_name.split("?")[0] 201 | mmproj_names = mmproj_name.split("/") 202 | if download_source == "hf-mirror.com": 203 | mmproj_url = f"https://hf-mirror.com/{author}/{mmproj_names[0]}/resolve/main/{mmproj_names[1]}" 204 | else: 205 | mmproj_url = f"https://huggingface.co/{author}/{mmproj_names[0]}/resolve/main/{mmproj_names[1]}" 206 | 207 | if mmproj_file is None: 208 | mmproj_file = mz_prompt_utils.Utils.hf_download_model( 209 | mmproj_url) 210 | 211 | args_dict["llama_cpp_model"] = model_file 212 | args_dict["mmproj_model"] = mmproj_file 213 | response = base_image_interrogator(args_dict=args_dict) 214 | return response 215 | 216 | 217 | def base_image_interrogator(args_dict): 218 | model_file = args_dict.get("llama_cpp_model", "") 219 | mmproj_file = args_dict.get("mmproj_model", "") 220 | image = args_dict.get("image", None) 221 | resolution = args_dict.get("resolution", 512) 222 | keep_device = args_dict.get("keep_device", False) 223 | seed = args_dict.get("seed", -1) 224 | options = args_dict.get("llama_cpp_options", {}) 225 | options["seed"] = seed 226 | 227 | mz_prompt_utils.Utils.print_log( 228 | "base_image_interrogator options: ", options) 229 | # input("Press Enter to continue...") 230 | 231 | image = mz_prompt_utils.Utils.resize_max(image, resolution, resolution) 232 | 233 | customize_instruct = args_dict.get("customize_instruct", None) 234 | if customize_instruct is None: 235 | 236 | system_text = mz_prompts.GPT4VImageCaptioner_System 237 | question_text = mz_prompts.GPT4VImageCaptioner_Prompt 238 | 239 | response = mz_llama_cpp.llava_cpp_simple_interrogator( 240 | model_file=model_file, 241 | mmproj_file=mmproj_file, 242 | image=image, 243 | options=options, 244 | # system=system_text, 245 | # question=question_text, 246 | ) 247 | else: 248 | 249 | system_prompt = customize_instruct.get("system", "") 250 | question = customize_instruct.get("instruct", "%text%") 251 | system_prompt = system_prompt.replace("%text%", "") 252 | question = question.replace("%text%", "") 253 | 254 | full_response = mz_llama_cpp.llava_cpp_simple_interrogator( 255 | model_file=model_file, 256 | mmproj_file=mmproj_file, 257 | image=image, 258 | system=system_prompt, 259 | question=question, 260 | options=options, 261 | ) 262 | 263 | start_str = customize_instruct.get("start_str", "") 264 | if start_str != "" and full_response.find(start_str) != -1: 265 | full_response_list = full_response.split(start_str) 266 | # 删除第一个元素 267 | full_response_list.pop(0) 268 | full_response = start_str.join(full_response_list) 269 | end_str = customize_instruct.get("end_str", "") 270 | if end_str != "" and full_response.find(end_str) != -1: 271 | full_response_list = full_response.split(end_str) 272 | # 删除最后一个元素 273 | full_response_list.pop() 274 | full_response = end_str.join(full_response_list) 275 | 276 | response = full_response 277 | 278 | sd_format = args_dict.get("sd_format", "v1") 279 | 280 | if sd_format == "v1" and customize_instruct is None: 281 | mz_prompt_utils.Utils.print_log(f"response v1: {response}") 282 | schema = mz_llama_core_nodes.get_schema_obj( 283 | keys_type={ 284 | "short_describes": mz_llama_core_nodes.get_schema_base_type("string"), 285 | "subject_tags": mz_llama_core_nodes.get_schema_array("string"), 286 | "action_tags": mz_llama_core_nodes.get_schema_array("string"), 287 | "light_tags": mz_llama_core_nodes.get_schema_array("string"), 288 | "scenes_tags": mz_llama_core_nodes.get_schema_array("string"), 289 | "other_tags": mz_llama_core_nodes.get_schema_array("string"), 290 | }, 291 | required=[ 292 | "short_describes", 293 | "subject_tags", 294 | "action_tags", 295 | "light_tags", 296 | "scenes_tags", 297 | "other_tags", 298 | ] 299 | ) 300 | 301 | response = mz_llama_cpp.llama_cpp_simple_interrogator_to_json( 302 | model_file=model_file, 303 | system=mz_prompts.Beautify_Prompt, 304 | question=f"IDEA: {response}", 305 | schema=schema, 306 | options=options, 307 | ) 308 | 309 | response_json = json.loads(response) 310 | responses = [] 311 | for key, value in response_json.items(): 312 | if type(value) == list: 313 | # 去除开头.和空格 314 | value = [v.strip().lstrip(".") for v in value] 315 | # 去除空字符串 316 | value = [v for v in value if v != ""] 317 | if len(value) > 0: 318 | responses.append(f"({', '.join(value)})") 319 | 320 | else: 321 | if value != "": 322 | responses.append(f"({value})") 323 | response = ", ".join(responses) 324 | 325 | if keep_device is False: 326 | mz_llama_cpp.freed_gpu_memory(model_file=model_file) 327 | return response 328 | -------------------------------------------------------------------------------- /v1/mz_phi3.py: -------------------------------------------------------------------------------- 1 | import json 2 | from .. import mz_prompt_utils 3 | from .. import mz_llama_cpp 4 | from . import mz_deprecated 5 | 6 | phi3_models = [ 7 | "Phi-3-mini-4k-instruct-q4.gguf" 8 | ] 9 | 10 | 11 | def get_exist_model(model_name): 12 | modelscope_model_path = mz_prompt_utils.Utils.modelscope_download_model( 13 | model_type="phi3", 14 | model_name=model_name, 15 | only_get_path=True, 16 | ) 17 | 18 | if modelscope_model_path is not None: 19 | return modelscope_model_path 20 | 21 | model_url = f"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/{model_name}" 22 | hf_model_path = mz_prompt_utils.Utils.hf_download_model( 23 | model_url, only_get_path=True) 24 | if hf_model_path is not None: 25 | return hf_model_path 26 | 27 | return None 28 | 29 | 30 | def query_beautify_prompt_text(args_dict): 31 | model_name = args_dict.get("llama_cpp_model", "") 32 | download_source = args_dict.get("download_source", None) 33 | 34 | try: 35 | model_file = get_exist_model(model_name) 36 | 37 | if model_file is None: 38 | if download_source == "modelscope": 39 | model_file = mz_prompt_utils.Utils.modelscope_download_model( 40 | model_type="phi3", 41 | model_name=model_name, 42 | ) 43 | else: 44 | model_url = f"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/{model_name}" 45 | if download_source == "hf-mirror.com": 46 | model_url = f"https://hf-mirror.com/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/{model_name}" 47 | model_file = mz_prompt_utils.Utils.hf_download_model(model_url) 48 | 49 | args_dict["llama_cpp_model"] = model_file 50 | full_response = mz_deprecated.base_query_beautify_prompt_text( 51 | args_dict=args_dict) 52 | return full_response 53 | 54 | except Exception as e: 55 | mz_llama_cpp.freed_gpu_memory(model_file=model_file) 56 | # mz_utils.Utils.print_log(f"Error in auto_prompt_text: {e}") 57 | raise e 58 | -------------------------------------------------------------------------------- /web/prompt_mz.js: -------------------------------------------------------------------------------- 1 | import { app } from "../../scripts/app.js"; 2 | import { api } from "../../scripts/api.js"; 3 | import { ComfyWidgets } from "/scripts/widgets.js"; 4 | 5 | async function sleep(ms) { 6 | return new Promise((resolve) => setTimeout(resolve, ms)); 7 | } 8 | 9 | async function showToast(message, duration = 3000) { 10 | const toast = document.createElement("div"); 11 | toast.style.position = "fixed"; 12 | toast.style.top = "20px"; 13 | toast.style.left = "50%"; 14 | toast.style.transform = "translateX(-50%)"; 15 | toast.style.padding = "10px 20px"; 16 | toast.style.backgroundColor = "var(--comfy-menu-bg)"; 17 | toast.style.color = "var(--input-text)"; 18 | toast.style.borderRadius = "10px"; 19 | toast.style.border = "2px solid var(--border-color)"; 20 | toast.style.zIndex = "9999"; 21 | 22 | toast.textContent = message; 23 | document.body.appendChild(toast); 24 | await sleep(duration); 25 | toast.remove(); 26 | } 27 | 28 | async function waitMessage() { 29 | var websocket = new WebSocket( 30 | `ws://${window.location.host}/mz_webapi/message` 31 | ); 32 | websocket.onmessage = async (event) => { 33 | const resp = JSON.parse(event.data); 34 | console.log("Message received", resp); 35 | 36 | for (const data of resp) { 37 | if (data.type === "toast-success") { 38 | await showToast(data.message, data?.duration || 3000); 39 | } 40 | } 41 | }; 42 | websocket.onclose = async (event) => { 43 | console.log("Connection closed", event); 44 | }; 45 | 46 | websocket.onerror = async (event) => { 47 | console.log("Connection error", event); 48 | }; 49 | 50 | // for (;;) { 51 | // await sleep(1000); 52 | // try { 53 | // if (websocket.readyState !== WebSocket.OPEN) { 54 | // return; 55 | // } 56 | // websocket.send( 57 | // JSON.stringify({ 58 | // type: "ping", 59 | // }) 60 | // ); 61 | // } catch (error) { 62 | // return; 63 | // } 64 | // } 65 | } 66 | 67 | /** 68 | * @returns {import("./types/comfy").ComfyExtension} extension 69 | */ 70 | const my_ui = { 71 | name: "prompt_mz.ui", 72 | setup() {}, 73 | init: async () => { 74 | console.log("prompt_mz Registering UI extension"); 75 | 76 | waitMessage(); 77 | }, 78 | 79 | /** 80 | * @param {import("./types/comfy").NodeType} nodeType 81 | * @param {import("./types/comfy").NodeDef} nodeData 82 | * @param {import("./types/comfy").App} app 83 | */ 84 | async beforeRegisterNodeDef(nodeType, nodeData, app) { 85 | switch (nodeData.name) { 86 | case "MZ_OpenAIApiCLIPTextEncode": 87 | case "MZ_LLama3CLIPTextEncode": 88 | case "MZ_Phi3CLIPTextEncode": 89 | case "MZ_BaseLLamaCPPCLIPTextEncode": 90 | case "MZ_LLavaImageInterrogator": 91 | case "MZ_BaseLLavaImageInterrogator": 92 | case "MZ_LLamaCPPCLIPTextEncode": 93 | case "MZ_ImageInterrogatorCLIPTextEncode": 94 | case "MZ_Florence2CLIPTextEncode": 95 | case "MZ_PaliGemmaCLIPTextEncode": 96 | // Node Created 97 | const onNodeCreated = nodeType.prototype.onNodeCreated; 98 | nodeType.prototype.onNodeCreated = function () { 99 | const ret = onNodeCreated 100 | ? onNodeCreated.apply(this, arguments) 101 | : undefined; 102 | 103 | console.log("onNodeCreated:", this); 104 | const nodeName = this.name + "_" + "customtext"; 105 | const wi = ComfyWidgets.STRING( 106 | this, 107 | nodeName, 108 | [ 109 | "STRING", 110 | { 111 | default: "", 112 | placeholder: "Text message output...", 113 | multiline: true, 114 | }, 115 | ], 116 | app 117 | ); 118 | wi.widget.inputEl.readOnly = true; 119 | 120 | return ret; 121 | }; 122 | 123 | const outSet = function (texts) { 124 | if (texts.length > 0) { 125 | let widget_id = this?.widgets.findIndex( 126 | (w) => w.name === this.name + "_" + "customtext" 127 | ); 128 | if (Array.isArray(texts)) 129 | texts = texts 130 | .filter((word) => word.trim() !== "") 131 | .map((word) => word.trim()) 132 | .join(" "); 133 | this.widgets[widget_id].value = texts; 134 | app.graph.setDirtyCanvas(true); 135 | } 136 | }; 137 | 138 | // onConfigure 139 | const onConfigure = nodeType.prototype.onConfigure; 140 | nodeType.prototype.onConfigure = function (w) { 141 | onConfigure?.apply(this, arguments); 142 | 143 | // outSet.call(this, a?.string); 144 | }; 145 | 146 | // onExecuted 147 | const onExecuted = nodeType.prototype.onExecuted; 148 | nodeType.prototype.onExecuted = function (a, b) { 149 | // console.log("onExecuted:", arguments); 150 | onExecuted?.apply(this, arguments); 151 | 152 | outSet.call(this, a?.string); 153 | }; 154 | } 155 | }, 156 | }; 157 | 158 | app.registerExtension(my_ui); 159 | --------------------------------------------------------------------------------