├── .github
    └── workflows
    │   └── publish.yml
├── .gitignore
├── ADV_CLIP_emb_encode.py
├── LICENSE
├── README.md
├── __init__.py
├── configs
    └── model_zoo.json
├── half_json.py
├── hook
    └── modeling_florence2.py
├── mz_gen_translate.py
├── mz_llama_core_nodes.py
├── mz_llama_cpp.py
├── mz_openaiapi.py
├── mz_prompt_utils.py
├── mz_prompt_webserver.py
├── mz_prompts.py
├── mz_transformers.py
├── pyproject.toml
├── v1
    ├── init.py
    ├── mz_deprecated.py
    ├── mz_llama3.py
    ├── mz_llava.py
    └── mz_phi3.py
└── web
    └── prompt_mz.js


/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Publish to Comfy registry
 2 | on:
 3 |   workflow_dispatch:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |     paths:
 8 |       - "pyproject.toml"
 9 | 
10 | permissions:
11 |   issues: write
12 | 
13 | jobs:
14 |   publish-node:
15 |     name: Publish Custom Node to registry
16 |     runs-on: ubuntu-latest
17 |     if: ${{ github.repository_owner == 'MinusZoneAI' }}
18 |     steps:
19 |       - name: Check out code
20 |         uses: actions/checkout@v4
21 |       - name: Publish Custom Node
22 |         uses: Comfy-Org/publish-node-action@v1
23 |         with:
24 |           ## Add your own personal access token to your Github Repository secrets and reference it here.
25 |           personal_access_token: ${{ secrets.REGISTRY_ACCESS_TOKEN }}
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # poetry
 98 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
 99 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
100 | #   commonly ignored for libraries.
101 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 | 
104 | # pdm
105 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | #   in version control.
109 | #   https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 | 
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 | 
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 | 
119 | # SageMath parsed files
120 | *.sage.py
121 | 
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 | 
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 | 
135 | # Rope project settings
136 | .ropeproject
137 | 
138 | # mkdocs documentation
139 | /site
140 | 
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 | 
146 | # Pyre type checker
147 | .pyre/
148 | 
149 | # pytype static type analyzer
150 | .pytype/
151 | 
152 | # Cython debug symbols
153 | cython_debug/
154 | 
155 | # PyCharm
156 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
159 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | .vscode
162 | 
163 | *.bat
164 | exclude.txt


--------------------------------------------------------------------------------
/ADV_CLIP_emb_encode.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | import itertools
  4 | from math import gcd
  5 | 
  6 | from comfy import model_management
  7 | from comfy.sdxl_clip import SDXLClipModel, SDXLRefinerClipModel, SDXLClipG
  8 | 
  9 | def _grouper(n, iterable):
 10 |     it = iter(iterable)
 11 |     while True:
 12 |         chunk = list(itertools.islice(it, n))
 13 |         if not chunk:
 14 |             return
 15 |         yield chunk
 16 | 
 17 | def _norm_mag(w, n):
 18 |     d = w - 1
 19 |     return  1 + np.sign(d) * np.sqrt(np.abs(d)**2 / n)
 20 |     #return  np.sign(w) * np.sqrt(np.abs(w)**2 / n)
 21 | 
 22 | def divide_length(word_ids, weights):
 23 |     sums = dict(zip(*np.unique(word_ids, return_counts=True)))
 24 |     sums[0] = 1
 25 |     weights = [[_norm_mag(w, sums[id]) if id != 0 else 1.0
 26 |                 for w, id in zip(x, y)] for x, y in zip(weights, word_ids)]
 27 |     return weights
 28 | 
 29 | def shift_mean_weight(word_ids, weights):
 30 |     delta = 1 - np.mean([w for x, y in zip(weights, word_ids) for  w, id in zip(x,y) if id != 0])
 31 |     weights = [[w if id == 0 else w+delta 
 32 |                 for w, id in zip(x, y)] for x, y in zip(weights, word_ids)]
 33 |     return weights
 34 | 
 35 | def scale_to_norm(weights, word_ids, w_max):
 36 |     top = np.max(weights)
 37 |     w_max = min(top, w_max)
 38 |     weights = [[w_max if id == 0 else (w/top) * w_max
 39 |                 for w, id in zip(x, y)] for x, y in zip(weights, word_ids)]
 40 |     return weights
 41 | 
 42 | def from_zero(weights, base_emb):
 43 |     weight_tensor = torch.tensor(weights, dtype=base_emb.dtype, device=base_emb.device)
 44 |     weight_tensor = weight_tensor.reshape(1,-1,1).expand(base_emb.shape)
 45 |     return base_emb * weight_tensor
 46 | 
 47 | def mask_word_id(tokens, word_ids, target_id, mask_token):
 48 |         new_tokens = [[mask_token if wid == target_id else t 
 49 |                        for t, wid in zip(x,y)] for x,y in zip(tokens, word_ids)]
 50 |         mask = np.array(word_ids) == target_id
 51 |         return (new_tokens, mask)
 52 | 
 53 | def batched_clip_encode(tokens, length, encode_func, num_chunks):
 54 |     embs = []
 55 |     for e in _grouper(32, tokens):
 56 |         enc, pooled = encode_func(e)
 57 |         enc = enc.reshape((len(e), length, -1))
 58 |         embs.append(enc)
 59 |     embs = torch.cat(embs)
 60 |     embs = embs.reshape((len(tokens) // num_chunks, length * num_chunks, -1))
 61 |     return embs
 62 | 
 63 | def from_masked(tokens, weights, word_ids, base_emb, length, encode_func, m_token=266):
 64 |         pooled_base = base_emb[0,length-1:length,:]
 65 |         wids, inds = np.unique(np.array(word_ids).reshape(-1), return_index=True)
 66 |         weight_dict = dict((id,w) 
 67 |                            for id,w in zip(wids ,np.array(weights).reshape(-1)[inds]) 
 68 |                            if w != 1.0)
 69 | 
 70 |         if len(weight_dict) == 0:
 71 |             return torch.zeros_like(base_emb), base_emb[0,length-1:length,:]
 72 | 
 73 |         weight_tensor = torch.tensor(weights, dtype=base_emb.dtype, device=base_emb.device)
 74 |         weight_tensor = weight_tensor.reshape(1,-1,1).expand(base_emb.shape)
 75 | 
 76 |         #m_token = (clip.tokenizer.end_token, 1.0) if  clip.tokenizer.pad_with_end else (0,1.0)
 77 |         #TODO: find most suitable masking token here
 78 |         m_token = (m_token, 1.0)
 79 | 
 80 |         ws = []
 81 |         masked_tokens = []
 82 |         masks = []
 83 | 
 84 |         #create prompts
 85 |         for id, w in weight_dict.items():
 86 |             masked, m = mask_word_id(tokens, word_ids, id, m_token)
 87 |             masked_tokens.extend(masked)
 88 |             
 89 |             m = torch.tensor(m, dtype=base_emb.dtype, device=base_emb.device)
 90 |             m = m.reshape(1,-1,1).expand(base_emb.shape)
 91 |             masks.append(m)
 92 | 
 93 |             ws.append(w)
 94 |         
 95 |         #batch process prompts
 96 |         embs = batched_clip_encode(masked_tokens, length, encode_func, len(tokens))
 97 |         masks = torch.cat(masks)
 98 |         
 99 |         embs = (base_emb.expand(embs.shape) - embs)
100 |         pooled = embs[0,length-1:length,:]
101 | 
102 |         embs *= masks
103 |         embs = embs.sum(axis=0, keepdim=True)
104 | 
105 |         pooled_start = pooled_base.expand(len(ws), -1)
106 |         ws = torch.tensor(ws).reshape(-1,1).expand(pooled_start.shape)
107 |         pooled = (pooled - pooled_start) * (ws - 1)
108 |         pooled = pooled.mean(axis=0, keepdim=True)
109 | 
110 |         return ((weight_tensor - 1) * embs), pooled_base + pooled
111 | 
112 | def mask_inds(tokens, inds, mask_token):
113 |     clip_len = len(tokens[0])
114 |     inds_set = set(inds)
115 |     new_tokens = [[mask_token if i*clip_len + j in inds_set else t 
116 |                    for j, t in enumerate(x)] for i, x in enumerate(tokens)]
117 |     return new_tokens
118 | 
119 | def down_weight(tokens, weights, word_ids, base_emb, length, encode_func, m_token=266):
120 |     w, w_inv = np.unique(weights,return_inverse=True)
121 | 
122 |     if np.sum(w < 1) == 0:
123 |         return base_emb, tokens, base_emb[0,length-1:length,:]
124 |     #m_token = (clip.tokenizer.end_token, 1.0) if  clip.tokenizer.pad_with_end else (0,1.0)
125 |     #using the comma token as a masking token seems to work better than aos tokens for SD 1.x
126 |     m_token = (m_token, 1.0)
127 | 
128 |     masked_tokens = []
129 | 
130 |     masked_current = tokens
131 |     for i in range(len(w)):
132 |         if w[i] >= 1:
133 |             continue
134 |         masked_current = mask_inds(masked_current, np.where(w_inv == i)[0], m_token)
135 |         masked_tokens.extend(masked_current)
136 | 
137 |     embs = batched_clip_encode(masked_tokens, length, encode_func, len(tokens))
138 |     embs = torch.cat([base_emb, embs])
139 |     w = w[w<=1.0]
140 |     w_mix = np.diff([0] + w.tolist())
141 |     w_mix = torch.tensor(w_mix, dtype=embs.dtype, device=embs.device).reshape((-1,1,1))
142 | 
143 |     weighted_emb = (w_mix * embs).sum(axis=0, keepdim=True)
144 |     return weighted_emb, masked_current, weighted_emb[0,length-1:length,:]
145 | 
146 | def scale_emb_to_mag(base_emb, weighted_emb):
147 |     norm_base = torch.linalg.norm(base_emb)
148 |     norm_weighted = torch.linalg.norm(weighted_emb)
149 |     embeddings_final = (norm_base / norm_weighted) * weighted_emb
150 |     return embeddings_final
151 | 
152 | def recover_dist(base_emb, weighted_emb):
153 |     fixed_std = (base_emb.std() / weighted_emb.std()) * (weighted_emb - weighted_emb.mean())
154 |     embeddings_final = fixed_std + (base_emb.mean() - fixed_std.mean())
155 |     return embeddings_final
156 | 
157 | def A1111_renorm(base_emb, weighted_emb):
158 |     embeddings_final = (base_emb.mean() / weighted_emb.mean()) * weighted_emb
159 |     return embeddings_final
160 | 
161 | def advanced_encode_from_tokens(tokenized, token_normalization, weight_interpretation, encode_func, m_token=266, length=77, w_max=1.0, return_pooled=False, apply_to_pooled=False):
162 |     tokens = [[t for t,_,_ in x] for x in tokenized]
163 |     weights = [[w for _,w,_ in x] for x in tokenized]
164 |     word_ids = [[wid for _,_,wid in x] for x in tokenized]
165 | 
166 |     #weight normalization
167 |     #====================
168 | 
169 |     #distribute down/up weights over word lengths
170 |     if token_normalization.startswith("length"):
171 |         weights = divide_length(word_ids, weights)
172 |         
173 |     #make mean of word tokens 1
174 |     if token_normalization.endswith("mean"):
175 |         weights = shift_mean_weight(word_ids, weights)        
176 | 
177 |     #weight interpretation
178 |     #=====================
179 |     pooled = None
180 | 
181 |     if weight_interpretation == "comfy":
182 |         weighted_tokens = [[(t,w) for t, w in zip(x, y)] for x, y in zip(tokens, weights)]
183 |         weighted_emb, pooled_base = encode_func(weighted_tokens)
184 |         pooled = pooled_base
185 |     else:
186 |         unweighted_tokens = [[(t,1.0) for t, _,_ in x] for x in tokenized]
187 |         base_emb, pooled_base = encode_func(unweighted_tokens)
188 |     
189 |     if weight_interpretation == "A1111":
190 |         weighted_emb = from_zero(weights, base_emb)
191 |         weighted_emb = A1111_renorm(base_emb, weighted_emb)
192 |         pooled = pooled_base
193 |     
194 |     if weight_interpretation == "compel":
195 |         pos_tokens = [[(t,w) if w >= 1.0 else (t,1.0) for t, w in zip(x, y)] for x, y in zip(tokens, weights)]
196 |         weighted_emb, _ = encode_func(pos_tokens)
197 |         weighted_emb, _, pooled = down_weight(pos_tokens, weights, word_ids, weighted_emb, length, encode_func)
198 |     
199 |     if weight_interpretation == "comfy++":
200 |         weighted_emb, tokens_down, _ = down_weight(unweighted_tokens, weights, word_ids, base_emb, length, encode_func)
201 |         weights = [[w if w > 1.0 else 1.0 for w in x] for x in weights]
202 |         #unweighted_tokens = [[(t,1.0) for t, _,_ in x] for x in tokens_down]
203 |         embs, pooled = from_masked(unweighted_tokens, weights, word_ids, base_emb, length, encode_func)
204 |         weighted_emb += embs
205 | 
206 |     if weight_interpretation == "down_weight":
207 |         weights = scale_to_norm(weights, word_ids, w_max)
208 |         weighted_emb, _, pooled = down_weight(unweighted_tokens, weights, word_ids, base_emb, length, encode_func)
209 | 
210 |     if return_pooled:
211 |         if apply_to_pooled:
212 |             return weighted_emb, pooled
213 |         else:
214 |             return weighted_emb, pooled_base
215 |     return weighted_emb, None
216 | 
217 | def encode_token_weights_g(model, token_weight_pairs):
218 |     return model.clip_g.encode_token_weights(token_weight_pairs)
219 | 
220 | def encode_token_weights_l(model, token_weight_pairs):
221 |     l_out, _ = model.clip_l.encode_token_weights(token_weight_pairs)
222 |     return l_out, None
223 | 
224 | def encode_token_weights(model, token_weight_pairs, encode_func):
225 |     if model.layer_idx is not None:
226 |         if hasattr(model.cond_stage_model, 'set_clip_options'):
227 |             model.cond_stage_model.set_clip_options({"layer": model.layer_idx})
228 |         else:
229 |             print(f"[ComfyUI_ADV_CLIP_emb] ComfyUI is outdated.")
230 |             model.cond_stage_model.clip_layer(model.layer_idx)
231 |     
232 |     model_management.load_model_gpu(model.patcher)
233 |     return encode_func(model.cond_stage_model, token_weight_pairs)
234 | 
235 | def prepareXL(embs_l, embs_g, pooled, clip_balance):
236 |     l_w = 1 - max(0, clip_balance - .5) * 2
237 |     g_w = 1 - max(0, .5 - clip_balance) * 2
238 |     if embs_l is not None:
239 |         return torch.cat([embs_l * l_w, embs_g * g_w], dim=-1), pooled
240 |     else:
241 |         return embs_g, pooled
242 | 
243 | def advanced_encode(clip, text, token_normalization, weight_interpretation, w_max=1.0, clip_balance=.5, apply_to_pooled=True):
244 |     tokenized = clip.tokenize(text, return_word_ids=True)
245 |     if isinstance(clip.cond_stage_model, (SDXLClipModel, SDXLRefinerClipModel, SDXLClipG)):
246 |         embs_l = None
247 |         embs_g = None
248 |         pooled = None
249 |         if 'l' in tokenized and isinstance(clip.cond_stage_model, SDXLClipModel):
250 |             embs_l, _ = advanced_encode_from_tokens(tokenized['l'], 
251 |                                                  token_normalization, 
252 |                                                  weight_interpretation, 
253 |                                                  lambda x: encode_token_weights(clip, x, encode_token_weights_l),
254 |                                                  w_max=w_max, 
255 |                                                  return_pooled=False)
256 |         if 'g' in tokenized:
257 |             embs_g, pooled = advanced_encode_from_tokens(tokenized['g'], 
258 |                                                          token_normalization, 
259 |                                                          weight_interpretation,
260 |                                                          lambda x: encode_token_weights(clip, x, encode_token_weights_g),
261 |                                                          w_max=w_max, 
262 |                                                          return_pooled=True,
263 |                                                          apply_to_pooled=apply_to_pooled)
264 |         return prepareXL(embs_l, embs_g, pooled, clip_balance)
265 |     else:
266 |         return advanced_encode_from_tokens(tokenized['l'],
267 |                                            token_normalization, 
268 |                                            weight_interpretation, 
269 |                                            lambda x: (clip.encode_from_tokens({'l': x}), None),
270 |                                            w_max=w_max)
271 | def advanced_encode_XL(clip, text1, text2, token_normalization, weight_interpretation, w_max=1.0, clip_balance=.5, apply_to_pooled=True):
272 |     tokenized1 = clip.tokenize(text1, return_word_ids=True)
273 |     tokenized2 = clip.tokenize(text2, return_word_ids=True)
274 | 
275 |     embs_l, _ = advanced_encode_from_tokens(tokenized1['l'], 
276 |                                             token_normalization, 
277 |                                             weight_interpretation, 
278 |                                             lambda x: encode_token_weights(clip, x, encode_token_weights_l),
279 |                                             w_max=w_max, 
280 |                                             return_pooled=False)
281 | 
282 |     embs_g, pooled = advanced_encode_from_tokens(tokenized2['g'], 
283 |                                                  token_normalization, 
284 |                                                  weight_interpretation,
285 |                                                  lambda x: encode_token_weights(clip, x, encode_token_weights_g),
286 |                                                  w_max=w_max, 
287 |                                                  return_pooled=True,
288 |                                                  apply_to_pooled=apply_to_pooled)
289 |     
290 |     gcd_num = gcd(embs_l.shape[1], embs_g.shape[1])
291 |     repeat_l = int((embs_g.shape[1] / gcd_num) * embs_l.shape[1])
292 |     repeat_g = int((embs_l.shape[1] / gcd_num) * embs_g.shape[1])
293 |     
294 |     return prepareXL(embs_l.expand((-1,repeat_l,-1)), embs_g.expand((-1,repeat_g,-1)), pooled, clip_balance)
295 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/c5bae957-2c47-4a73-86e0-2949fcf72fd5)
  2 | 
  3 | 
  4 | # ComfyUI-Prompt-MZ
  5 | 基于llama.cpp的一些和提示词相关的节点，目前包括美化提示词和类似clip-interrogator的图片反推
  6 | 
  7 | Use llama.cpp to assist in generating some nodes related to prompt words, including beautifying prompt words and image recognition similar to clip-interrogator
  8 | 
  9 | ## Recent changes 
 10 | * [2024-06-22] 新增Florence-2-large图片反推模型节点 (Added Florence-2-large image interrogation model node)
 11 | * [2024-06-20] 新增选择本机ollama模型的节点 (Added nodes to select local ollama models)
 12 | * [2024-06-05] 新增千问2.0预设模型 (Added Qianwen 2.0 preset model)
 13 | * [2024-06-05] 可选chat_format,图片反推后处理 (Optional chat_format, post-processing after image interrogation)
 14 | * [2024-06-04] 新增了一些预设模型 (Added some preset models)
 15 | * [2024-06-04] 新增通用节点,支持手动选择模型 (Add universal node, support manual selection of models)
 16 | * [2024-05-30] 添加ImageCaptionerConfig节点来支持批量生成提示词 (Add ImageCaptionerConfig node to support batch generation of prompt words)
 17 | * [2024-05-24] 运行后在当前节点显示生成的提示词 (Display the generated prompt words in the current node after running)
 18 | * [2024-05-24] 兼容清华智谱API (Compatible with Zhipu API)
 19 | * [2024-05-24] 使用A1111权重缩放,感谢ComfyUI_ADV_CLIP_emb (Use A1111 weight scaling, thanks to ComfyUI_ADV_CLIP_emb)
 20 | * [2024-05-13] 新增OpenAI API节点 (add OpenAI API node)
 21 | * [2024-04-30] 支持自定义指令 (Support for custom instructions)
 22 | * [2024-04-30] 添加llava-v1.6-vicuna-13b (add llava-v1.6-vicuna-13b)
 23 | * [2024-04-30] 添加翻译
 24 | * [2024-04-28] 新增Phi-3-mini节点 (add Phi-3-mini node)
 25 | 
 26 | ## Installation
 27 | 1. Clone this repo into `custom_nodes` folder.
 28 | 2. Restart ComfyUI.
 29 |  
 30 | ## Nodes
 31 | + MZ_Florence2CLIPTextEncode
 32 |   
 33 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/b60bb22c-f42b-4c4f-a0ac-20a6c09c9046)
 34 | 
 35 | 
 36 | 
 37 | 
 38 | 
 39 | + ModelConfigManualSelect (Ollama)
 40 |   
 41 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/2009e330-0f1f-4f28-9b4c-8446d3cdc519)
 42 | 
 43 | 
 44 | + CLIPTextEncode (LLamaCPP Universal)
 45 |   
 46 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/1f66ce10-920f-4ada-9287-f86a51782bff)
 47 | 
 48 | 
 49 | + ModelConfigManualSelect(LLamaCPP)
 50 |   
 51 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/42473248-8902-43d7-a08b-37bb3d20b4aa)
 52 | 
 53 | + ModelConfigDownloaderSelect(LLamaCPP)
 54 |   
 55 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/6a2f561b-deb0-43d3-900f-c9d6b23d0ea4)
 56 | 
 57 | 
 58 | 
 59 | + CLIPTextEncode (ImageInterrogator)
 60 |   
 61 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/e76eb5dc-1c6c-4a59-8197-8bd7b56c3889)
 62 | 
 63 | + ModelConfigManualSelect(ImageInterrogator)
 64 |   
 65 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/71a48734-e3f3-4ced-a8d7-cd334340efdb)
 66 | 
 67 | 
 68 | + ModelConfigDownloaderSelect(ImageInterrogator)
 69 |   
 70 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/bfec7696-1f86-4fe5-9dc3-807b39366524)
 71 | 
 72 | 
 73 | 
 74 | + CLIPTextEncode (OpenAI API)
 75 | 
 76 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/14e9a96a-ec1b-481d-8f5a-43cd752ad01b)
 77 | 
 78 | + CLIPTextEncode (Phi-3)
 79 | 
 80 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/c4b97aeb-23c0-4cf1-a6a5-d259fdf83f6e)
 81 | 
 82 | 
 83 | + CLIPTextEncode (LLama3)
 84 | 
 85 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/40da75ab-46db-4f38-9d8e-b7f9184f77fa)
 86 | 
 87 | 
 88 | + ImageInterrogator (LLava)
 89 | 
 90 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/f397c432-c2f7-4d48-9b95-2031cfb19e8c)
 91 |   Enable parameter sd_format
 92 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/4d2cf65d-e8a3-4dfa-b735-9d591638028c)
 93 | 
 94 | + ImageCaptionerConfig
 95 | ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/147941a2-cb5f-418f-acd9-8e17ffaf044a)
 96 | 
 97 | 
 98 | + LLamaCPPOptions
 99 | 
100 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/256483e0-c3b7-4d04-82f4-f71f7d9584c9)
101 | 
102 | + CustomizeInstruct
103 | 
104 |   ![image](https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ/assets/5035199/d328ba44-2eab-4f95-bd35-585a9cdc9ec2)
105 | 
106 | 
107 | + BaseLLamaCPPCLIPTextEncode (可以手动传入模型路径/You can directly pass in the model path)
108 | + BaseLLavaImageInterrogator (可以手动传入模型路径/You can directly pass in the model path)
109 | 
110 | ## FAQ
111 | 
112 | ### moudle 'llama_cpp' has no attribute 'LLAMA_SPLIT_MODE_LAYER'
113 | 升级llama_cpp_python的版本到最新版本,前往 https://github.com/abetlen/llama-cpp-python/releases 下载安装
114 | 
115 | ### LLama.dll 无法加载 (Failed to load shared library LLama.dll)
116 | CUDA版本切换到12.1,如果你使用秋叶启动器,高级设置->环境维护->安装PyTorch->选择版本中选择CUDA 12.1的版本
117 | 
118 | 
119 | ### ...llama_cpp_python-0,2.63-cp310-cp310-win_and64.whl returned nonzero exit status
120 | 保持网络畅通,该上魔法上魔法,或者手动安装llama_cpp_python
121 | 
122 | 
123 | 
124 | 
125 | ## Credits
126 | + [https://github.com/comfyanonymous/ComfyUI](https://github.com/comfyanonymous/ComfyUI)
127 | + [https://github.com/ggerganov/llama.cpp](https://github.com/ggerganov/llama.cpp)
128 | + [https://github.com/BlenderNeko/ComfyUI_ADV_CLIP_emb](https://github.com/BlenderNeko/ComfyUI_ADV_CLIP_emb)
129 | 
130 | ## Star History
131 | 
132 | <a href="https://star-history.com/#MinusZoneAI/ComfyUI-Prompt-MZ&Date">
133 |  <picture>
134 |    <source media="(prefers-color-scheme: dark)" srcset="https://api.star-history.com/svg?repos=MinusZoneAI/ComfyUI-Prompt-MZ&type=Date&theme=dark" />
135 |    <source media="(prefers-color-scheme: light)" srcset="https://api.star-history.com/svg?repos=MinusZoneAI/ComfyUI-Prompt-MZ&type=Date" />
136 |    <img alt="Star History Chart" src="https://api.star-history.com/svg?repos=MinusZoneAI/ComfyUI-Prompt-MZ&type=Date" />
137 |  </picture>
138 | </a>
139 | 
140 | ## Contact
141 | - 绿泡泡: minrszone
142 | - Bilibili: [minus_zone](https://space.bilibili.com/5950992)
143 | - 小红书: [MinusZoneAI](https://www.xiaohongshu.com/user/profile/5f072e990000000001005472)
144 | - 爱发电: [MinusZoneAI](https://afdian.net/@MinusZoneAI)
145 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import json
  4 | import os
  5 | import sys
  6 | from .mz_prompt_utils import Utils
  7 | from nodes import MAX_RESOLUTION
  8 | import comfy.utils
  9 | import shutil
 10 | import comfy.samplers
 11 | import folder_paths
 12 | 
 13 | 
 14 | WEB_DIRECTORY = "./web"
 15 | 
 16 | AUTHOR_NAME = u"MinusZone"
 17 | CATEGORY_NAME = f"{AUTHOR_NAME} - Prompt"
 18 | 
 19 | # sys.path.append(os.path.join(os.path.dirname(__file__)))
 20 | 
 21 | import importlib
 22 | 
 23 | from . import mz_prompt_webserver
 24 | # mz_prompt_webserver.start_server()
 25 | 
 26 | NODE_CLASS_MAPPINGS = {
 27 | }
 28 | 
 29 | 
 30 | NODE_DISPLAY_NAME_MAPPINGS = {
 31 | }
 32 | 
 33 | 
 34 | from . import mz_llama_cpp
 35 | 
 36 | 
 37 | def getCommonCLIPTextEncodeInput():
 38 |     from . import mz_llama_core_nodes
 39 |     style_presets = mz_llama_core_nodes.get_style_presets()
 40 |     CommonCLIPTextEncodeInput = {
 41 |         "required": {
 42 |             "style_presets": (
 43 |                 style_presets, {"default": style_presets[1]}
 44 |             ),
 45 |             "text": ("STRING", {"multiline": True, }),
 46 |             "keep_device": ([False, True], {"default": False}),
 47 |             "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
 48 |         },
 49 |         "optional": {
 50 |             "clip": ("CLIP", ),
 51 |             "llama_cpp_options": ("LLamaCPPOptions", ),
 52 |             "customize_instruct": ("CustomizeInstruct", ),
 53 |         }
 54 |     }
 55 | 
 56 |     return CommonCLIPTextEncodeInput
 57 | 
 58 | 
 59 | class MZ_OllamaModelConfig_ManualSelect:
 60 |     @classmethod
 61 |     def INPUT_TYPES(s):
 62 |         search_dirs = [
 63 |             os.path.join(os.path.expanduser('~'), ".ollama", "models"),
 64 |             os.path.join(os.environ.get("APPDATA", ""), ".ollama", "models"),
 65 |         ]
 66 | 
 67 |         ollama_models_dir = None
 68 |         for dir in search_dirs:
 69 |             if os.path.exists(dir):
 70 |                 ollama_models_dir = dir
 71 |                 break
 72 | 
 73 |         ollamas = []
 74 |         if ollama_models_dir is not None:
 75 |             manifests_dir = os.path.join(ollama_models_dir, "manifests")
 76 |             for root, dirs, files in os.walk(manifests_dir):
 77 |                 for file in files:
 78 |                     ollamas.append(os.path.join(root, file))
 79 | 
 80 |         chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers()
 81 |         return {
 82 |             "required": {
 83 |                 "ollama": (ollamas,),
 84 |                 "chat_format": (["auto"] + chat_format, {"default": "auto"}),
 85 |             },
 86 |             "optional": {
 87 |             },
 88 |         }
 89 | 
 90 |     RETURN_TYPES = ("LLamaCPPModelConfig",)
 91 |     RETURN_NAMES = ("llama_cpp_model_config",)
 92 | 
 93 |     FUNCTION = "create"
 94 |     CATEGORY = f"{CATEGORY_NAME}/others"
 95 | 
 96 |     def create(self, **kwargs):
 97 |         kwargs = kwargs.copy()
 98 | 
 99 |         ollama = kwargs.get("ollama", "")
100 |         ollama_cpp_model = None
101 |         if os.path.exists(ollama):
102 |             # {"schemaVersion":2,"mediaType":"application/vnd.docker.distribution.manifest.v2+json","config":{"mediaType":"application/vnd.docker.container.image.v1+json","digest":"sha256:887433b89a901c156f7e6944442f3c9e57f3c55d6ed52042cbb7303aea994290","size":483},"layers":[{"mediaType":"application/vnd.ollama.image.model","digest":"sha256:c1864a5eb19305c40519da12cc543519e48a0697ecd30e15d5ac228644957d12","size":1678447520},{"mediaType":"application/vnd.ollama.image.license","digest":"sha256:097a36493f718248845233af1d3fefe7a303f864fae13bc31a3a9704229378ca","size":8433},{"mediaType":"application/vnd.ollama.image.template","digest":"sha256:109037bec39c0becc8221222ae23557559bc594290945a2c4221ab4f303b8871","size":136},{"mediaType":"application/vnd.ollama.image.params","digest":"sha256:22a838ceb7fb22755a3b0ae9b4eadde629d19be1f651f73efb8c6b4e2cd0eea0","size":84}]}
103 |             with open(ollama, "r", encoding="utf-8") as f:
104 |                 data = json.load(f)
105 |                 if "layers" in data:
106 |                     for layer in data["layers"]:
107 |                         if "mediaType" in layer and layer["mediaType"] == "application/vnd.ollama.image.model":
108 |                             ollama_cpp_model = layer["digest"]
109 |                             break
110 | 
111 |         if ollama_cpp_model is None:
112 |             raise ValueError("Invalid ollama file")
113 | 
114 |         if ollama_cpp_model.startswith("sha256:"):
115 |             ollama_cpp_model = ollama_cpp_model[7:]
116 |         # ollama = C:\Users\admin\.ollama\models\manifests\registry.ollama.ai\library\gemma\2b
117 |         models_dir = ollama[:ollama.rfind("manifests")]
118 |         ollama_cpp_model = os.path.join(
119 |             models_dir, "blobs", f"sha256-{ollama_cpp_model}")
120 | 
121 |         if not os.path.exists(ollama_cpp_model):
122 |             raise ValueError(f"Model not found at: {ollama_cpp_model}")
123 | 
124 |         llama_cpp_model = ollama_cpp_model
125 | 
126 |         chat_format = kwargs.get("chat_format", "auto")
127 |         if chat_format == "auto":
128 |             chat_format = None
129 |         return ({
130 |             "type": "ManualSelect",
131 |             "model_path": llama_cpp_model,
132 |             "chat_format": chat_format,
133 |         },)
134 | 
135 | 
136 | NODE_CLASS_MAPPINGS["MZ_OllamaModelConfig_ManualSelect"] = MZ_OllamaModelConfig_ManualSelect
137 | NODE_DISPLAY_NAME_MAPPINGS[
138 |     "MZ_OllamaModelConfig_ManualSelect"] = f"{AUTHOR_NAME} - ModelConfigManualSelect(OllamaFile)"
139 | 
140 | 
141 | class MZ_LLamaCPPModelConfig_ManualSelect:
142 |     @ classmethod
143 |     def INPUT_TYPES(s):
144 |         gguf_files = Utils.get_gguf_files()
145 | 
146 |         chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers()
147 |         return {
148 |             "required": {
149 |                 "llama_cpp_model": (gguf_files,),
150 |                 "chat_format": (["auto"] + chat_format, {"default": "auto"}),
151 |             },
152 |             "optional": {
153 |             },
154 |         }
155 | 
156 |     RETURN_TYPES = ("LLamaCPPModelConfig",)
157 |     RETURN_NAMES = ("llama_cpp_model_config",)
158 | 
159 |     FUNCTION = "create"
160 |     CATEGORY = f"{CATEGORY_NAME}/others"
161 | 
162 |     def create(self, **kwargs):
163 |         kwargs = kwargs.copy()
164 | 
165 |         llama_cpp_model = kwargs.get("llama_cpp_model", "")
166 |         if llama_cpp_model != "":
167 |             llama_cpp_model = os.path.join(
168 |                 Utils.get_gguf_models_path(), llama_cpp_model)
169 | 
170 |         chat_format = kwargs.get("chat_format", "auto")
171 |         if chat_format == "auto":
172 |             chat_format = None
173 |         return ({
174 |             "type": "ManualSelect",
175 |             "model_path": llama_cpp_model,
176 |             "chat_format": chat_format,
177 |         },)
178 | 
179 | 
180 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPModelConfig_ManualSelect"] = MZ_LLamaCPPModelConfig_ManualSelect
181 | NODE_DISPLAY_NAME_MAPPINGS[
182 |     "MZ_LLamaCPPModelConfig_ManualSelect"] = f"{AUTHOR_NAME} - ModelConfigManualSelect(LLamaCPP)"
183 | 
184 | 
185 | class MZ_LLamaCPPModelConfig_DownloaderSelect:
186 |     @classmethod
187 |     def INPUT_TYPES(s):
188 |         optional_models = Utils.get_model_zoo(tags_filter="llama")
189 |         model_names = [
190 |             model["model"] for model in optional_models
191 |         ]
192 |         chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers()
193 |         return {
194 |             "required": {
195 |                 "model_name": (model_names,),
196 |                 "chat_format": (["auto"] + chat_format, {"default": "auto"}),
197 |             },
198 |             "optional": {
199 |             },
200 |         }
201 | 
202 |     RETURN_TYPES = ("LLamaCPPModelConfig",)
203 |     RETURN_NAMES = ("llama_cpp_model_config",)
204 | 
205 |     FUNCTION = "create"
206 |     CATEGORY = f"{CATEGORY_NAME}/others"
207 | 
208 |     def create(self, **kwargs):
209 |         kwargs = kwargs.copy()
210 | 
211 |         model_name = kwargs.get("model_name", "")
212 |         chat_format = kwargs.get("chat_format", "auto")
213 |         if chat_format == "auto":
214 |             chat_format = None
215 |         return ({
216 |             "type": "DownloaderSelect",
217 |             "model_name": model_name,
218 |             "chat_format": chat_format,
219 |         },)
220 | 
221 | 
222 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPModelConfig_DownloaderSelect"] = MZ_LLamaCPPModelConfig_DownloaderSelect
223 | NODE_DISPLAY_NAME_MAPPINGS[
224 |     "MZ_LLamaCPPModelConfig_DownloaderSelect"] = f"{AUTHOR_NAME} - ModelConfigDownloaderSelect(LLamaCPP)"
225 | 
226 | 
227 | class MZ_LLamaCPPCLIPTextEncode:
228 |     @classmethod
229 |     def INPUT_TYPES(s):
230 |         importlib.reload(mz_llama_cpp)
231 | 
232 |         result = {
233 |             "required": {
234 |             },
235 |             "optional": {
236 |                 "llama_cpp_model": ("LLamaCPPModelConfig",),
237 |             },
238 |         }
239 | 
240 |         common_input = getCommonCLIPTextEncodeInput()
241 |         for key in common_input["required"]:
242 |             result["required"][key] = common_input["required"][key]
243 |         for key in common_input["optional"]:
244 |             result["optional"][key] = common_input["optional"][key]
245 | 
246 |         return result
247 | 
248 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
249 |     RETURN_NAMES = ("text", "conditioning",)
250 |     OUTPUT_NODE = True
251 |     FUNCTION = "encode"
252 |     CATEGORY = CATEGORY_NAME
253 | 
254 |     DESCRIPTION = """
255 | llama_cpp_model不设置时，将使用默认模型: Meta-Llama-3-8B-Instruct.Q4_K_M.gguf
256 | """
257 | 
258 |     def encode(self, **kwargs):
259 |         kwargs = kwargs.copy()
260 |         from . import mz_llama_core_nodes
261 |         importlib.reload(mz_llama_core_nodes)
262 | 
263 |         return mz_llama_core_nodes.llama_cpp_node_encode(kwargs)
264 | 
265 | 
266 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPCLIPTextEncode"] = MZ_LLamaCPPCLIPTextEncode
267 | NODE_DISPLAY_NAME_MAPPINGS[
268 |     "MZ_LLamaCPPCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(LLamaCPP Universal)"
269 | 
270 | 
271 | class MZ_LLamaCPPOptions:
272 |     @classmethod
273 |     def INPUT_TYPES(s):
274 |         value = mz_llama_cpp.LlamaCppOptions()
275 |         result = {}
276 | 
277 |         for key in value:
278 |             if type(value[key]) == bool:
279 |                 result[key] = ([True, False], {"default": value[key]})
280 |             elif type(value[key]) == int:
281 |                 result[key] = ("INT", {
282 |                                "default": value[key], "min": -0xffffffffffffffff, "max": 0xffffffffffffffff})
283 |             elif type(value[key]) == float:
284 |                 result[key] = ("FLOAT", {
285 |                                "default": value[key], "min": -0xffffffffffffffff, "max": 0xffffffffffffffff})
286 |             elif type(value[key]) == str:
287 |                 result[key] = ("STRING", {"default": value[key]})
288 |             elif type(value[key]) == list:
289 |                 result[key] = (value[key], {"default": value[key][0]})
290 |             else:
291 |                 raise Exception(f"Unknown type: {type(value[key])}")
292 | 
293 |         return {
294 |             "required": result,
295 |         }
296 | 
297 |     RETURN_TYPES = ("LLamaCPPOptions",)
298 |     RETURN_NAMES = ("llama_cpp_options",)
299 | 
300 |     FUNCTION = "create"
301 |     CATEGORY = f"{CATEGORY_NAME}/others"
302 | 
303 |     def create(self, **kwargs):
304 |         kwargs = kwargs.copy()
305 |         importlib.reload(mz_llama_cpp)
306 |         opt = {}
307 |         for key in kwargs:
308 |             opt[key] = kwargs[key]
309 | 
310 |         # if opt.get("chat_format", None) == "auto":
311 |         #     opt["chat_format"] = None
312 |         return (opt,)
313 | 
314 | 
315 | NODE_CLASS_MAPPINGS["MZ_LLamaCPPOptions"] = MZ_LLamaCPPOptions
316 | NODE_DISPLAY_NAME_MAPPINGS["MZ_LLamaCPPOptions"] = f"{AUTHOR_NAME} - LLamaCPPOptions"
317 | 
318 | 
319 | class MZ_CustomizeInstruct:
320 |     @classmethod
321 |     def INPUT_TYPES(s):
322 |         from . import mz_prompts
323 | 
324 |         return {
325 |             "required": {
326 |                 "system": ("STRING", {"multiline": True, "default": mz_prompts.Long_prompt}),
327 |                 "instruct": ("STRING", {"multiline": True, "default": ""}),
328 |             },
329 |         }
330 | 
331 |     RETURN_TYPES = ("CustomizeInstruct",)
332 |     RETURN_NAMES = ("customize_instruct",)
333 |     FUNCTION = "create"
334 |     CATEGORY = f"{CATEGORY_NAME}/others"
335 | 
336 |     def create(self, **kwargs):
337 |         kwargs = kwargs.copy()
338 | 
339 |         return (kwargs,)
340 | 
341 | 
342 | NODE_CLASS_MAPPINGS["MZ_CustomizeInstruct"] = MZ_CustomizeInstruct
343 | NODE_DISPLAY_NAME_MAPPINGS["MZ_CustomizeInstruct"] = f"{AUTHOR_NAME} - CustomizeInstruct"
344 | 
345 | 
346 | class MZ_ImageCaptionerConfig:
347 |     @classmethod
348 |     def INPUT_TYPES(s):
349 |         return {
350 |             "required": {
351 |                 "directory": ("STRING", {"default": "", "placeholder": "directory"}),
352 |                 "caption_suffix": ("STRING", {"default": ".caption"}),
353 |                 "force_update": ([False, True], {"default": False}),
354 |                 "retry_keyword": ("STRING", {"default": "not,\",error"}),
355 |                 "prompt_fixed_beginning": ("STRING", {"default": "", }),
356 |             },
357 |             "optional": {
358 | 
359 |             },
360 |         }
361 | 
362 |     RETURN_TYPES = ("ImageCaptionerConfig",)
363 |     RETURN_NAMES = ("captioner_config", )
364 | 
365 |     FUNCTION = "interrogate_batch"
366 |     CATEGORY = f"{CATEGORY_NAME}/others"
367 | 
368 |     def interrogate_batch(self, **kwargs):
369 |         kwargs = kwargs.copy()
370 | 
371 |         return (kwargs, )
372 | 
373 | 
374 | NODE_CLASS_MAPPINGS["MZ_ImageCaptionerConfig"] = MZ_ImageCaptionerConfig
375 | NODE_DISPLAY_NAME_MAPPINGS["MZ_ImageCaptionerConfig"] = f"{AUTHOR_NAME} - ImageCaptionerConfig"
376 | 
377 | 
378 | class MZ_OpenAIApiCLIPTextEncode:
379 |     @classmethod
380 |     def INPUT_TYPES(s):
381 |         importlib.reload(mz_llama_cpp)
382 | 
383 |         s.openai_config_path = os.path.join(
384 |             Utils.get_models_path(),
385 |             "openai_config.json",
386 |         )
387 |         default_config = {
388 |             "base_url": "",
389 |             "api_key": "",
390 |             "model_name": "gpt-3.5-turbo-1106",
391 |         }
392 |         if os.path.exists(s.openai_config_path):
393 |             try:
394 |                 with open(s.openai_config_path, "r", encoding="utf-8") as f:
395 |                     default_config = json.load(f)
396 |             except Exception as e:
397 |                 print(f"Failed to load openai_config.json: {e}")
398 | 
399 |         default_api_key = default_config.get("api_key", "")
400 |         if default_api_key != "":
401 |             default_api_key = default_api_key[:4] + "******"
402 |         result = {
403 |             "required": {
404 |                 "base_url": ("STRING", {"default": default_config.get("base_url", ""), "placeholder": ""}),
405 |                 "api_key": ("STRING", {"default": default_api_key, "placeholder": ""}),
406 |                 "model_name": ("STRING", {"default": default_config.get("model_name", ""), }),
407 |             },
408 |             "optional": {
409 |             },
410 |         }
411 | 
412 |         common_input = getCommonCLIPTextEncodeInput()
413 |         for key in common_input["required"]:
414 |             if key not in ["seed", "keep_device"]:
415 |                 result["required"][key] = common_input["required"][key]
416 |         for key in common_input["optional"]:
417 |             if key != "llama_cpp_options":
418 |                 result["optional"][key] = common_input["optional"][key]
419 | 
420 |         return result
421 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
422 |     RETURN_NAMES = ("text", "conditioning",)
423 |     OUTPUT_NODE = True
424 |     FUNCTION = "encode"
425 |     CATEGORY = CATEGORY_NAME
426 | 
427 |     def encode(self, **kwargs):
428 |         kwargs = kwargs.copy()
429 | 
430 |         from . import mz_openaiapi
431 |         importlib.reload(mz_openaiapi)
432 | 
433 |         if kwargs.get("api_key", "").endswith("******"):
434 |             kwargs["api_key"] = ""
435 |             try:
436 |                 with open(self.openai_config_path, "r", encoding="utf-8") as f:
437 |                     config = json.load(f)
438 |                     kwargs["api_key"] = config.get("api_key", "")
439 |             except Exception as e:
440 |                 print(f"Failed to load openai_config.json: {e}")
441 | 
442 |         if kwargs.get("api_key", "") != "":
443 |             with open(self.openai_config_path, "w", encoding="utf-8") as f:
444 |                 json.dump({
445 |                     "base_url": kwargs.get("base_url", ""),
446 |                     "api_key": kwargs.get("api_key", ""),
447 |                     "model_name": kwargs.get("model_name", ""),
448 |                 }, f, indent=4)
449 |         else:
450 |             raise ValueError("api_key is required")
451 | 
452 |         text = mz_openaiapi.query_beautify_prompt_text(kwargs)
453 |         conditionings = None
454 |         clip = kwargs.get("clip", None)
455 |         if clip is not None:
456 |             conditionings = Utils.a1111_clip_text_encode(clip, text, )
457 | 
458 |         return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)}
459 | 
460 | 
461 | NODE_CLASS_MAPPINGS["MZ_OpenAIApiCLIPTextEncode"] = MZ_OpenAIApiCLIPTextEncode
462 | NODE_DISPLAY_NAME_MAPPINGS[
463 |     "MZ_OpenAIApiCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(OpenAIApi)"
464 | 
465 | 
466 | class MZ_ImageInterrogatorCLIPTextEncode:
467 |     @classmethod
468 |     def INPUT_TYPES(s):
469 |         return {
470 |             "required": {
471 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}),
472 |                 "post_processing": ([False, True], {"default": True}),
473 |                 "keep_device": ([False, True], {"default": False}),
474 |                 "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
475 |             },
476 |             "optional": {
477 |                 "image_interrogator_model": ("ImageInterrogatorModelConfig", ),
478 |                 "image": ("IMAGE",),
479 |                 "clip": ("CLIP", ),
480 |                 "llama_cpp_options": ("LLamaCPPOptions", ),
481 |                 "customize_instruct": ("CustomizeInstruct", ),
482 |                 "captioner_config": ("ImageCaptionerConfig", ),
483 |             },
484 |         }
485 | 
486 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
487 |     RETURN_NAMES = ("text", "conditioning",)
488 |     OUTPUT_NODE = True
489 |     FUNCTION = "encode"
490 |     CATEGORY = CATEGORY_NAME
491 | 
492 |     def encode(self, **kwargs):
493 |         kwargs = kwargs.copy()
494 |         from . import mz_llama_core_nodes
495 |         importlib.reload(mz_llama_core_nodes)
496 | 
497 |         return mz_llama_core_nodes.image_interrogator_node_encode(kwargs)
498 | 
499 | 
500 | NODE_CLASS_MAPPINGS["MZ_ImageInterrogatorCLIPTextEncode"] = MZ_ImageInterrogatorCLIPTextEncode
501 | NODE_DISPLAY_NAME_MAPPINGS[
502 |     "MZ_ImageInterrogatorCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(ImageInterrogator)"
503 | 
504 | 
505 | class MZ_ImageInterrogatorModelConfig_ManualSelect:
506 |     @classmethod
507 |     def INPUT_TYPES(s):
508 |         gguf_files = Utils.get_gguf_files()
509 |         chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers()
510 |         return {
511 |             "required": {
512 |                 "llama_cpp_model": (gguf_files,),
513 |                 "mmproj_model": (["auto"] + gguf_files,),
514 |                 "chat_format": (["auto"] + chat_format, {"default": "auto"}),
515 |             },
516 |             "optional": {
517 |             },
518 |         }
519 | 
520 |     RETURN_TYPES = ("ImageInterrogatorModelConfig",)
521 |     RETURN_NAMES = ("image_interrogator_model",)
522 | 
523 |     FUNCTION = "create"
524 |     CATEGORY = f"{CATEGORY_NAME}/others"
525 | 
526 |     def create(self, **kwargs):
527 |         kwargs = kwargs.copy()
528 | 
529 |         llama_cpp_model = kwargs.get("llama_cpp_model", "")
530 |         if llama_cpp_model != "":
531 |             llama_cpp_model = os.path.join(
532 |                 Utils.get_gguf_models_path(), llama_cpp_model)
533 | 
534 |         mmproj_model = kwargs.get("mmproj_model", "")
535 |         if mmproj_model != "":
536 |             mmproj_model = os.path.join(
537 |                 Utils.get_gguf_models_path(), mmproj_model)
538 | 
539 |         chat_format = kwargs.get("chat_format", "auto")
540 |         if chat_format == "auto":
541 |             chat_format = None
542 |         return ({
543 |             "type": "ManualSelect",
544 |             "model_path": llama_cpp_model,
545 |             "mmproj_model": mmproj_model,
546 |             "chat_format": chat_format,
547 |         },)
548 | 
549 | 
550 | NODE_CLASS_MAPPINGS["MZ_ImageInterrogatorModelConfig_ManualSelect"] = MZ_ImageInterrogatorModelConfig_ManualSelect
551 | NODE_DISPLAY_NAME_MAPPINGS[
552 |     "MZ_ImageInterrogatorModelConfig_ManualSelect"] = f"{AUTHOR_NAME} - ModelConfigManualSelect(ImageInterrogator)"
553 | 
554 | 
555 | class MZ_ImageInterrogatorModelConfig_DownloaderSelect:
556 |     @classmethod
557 |     def INPUT_TYPES(s):
558 |         optional_models = Utils.get_model_zoo(tags_filter="llava")
559 |         model_names = [
560 |             model["model"] for model in optional_models
561 |         ]
562 | 
563 |         optional_models = Utils.get_model_zoo(tags_filter="mmproj")
564 |         mmproj_model_names = [
565 |             model["model"] for model in optional_models
566 |         ]
567 | 
568 |         chat_format = mz_llama_cpp.get_llama_cpp_chat_handlers()
569 |         return {
570 |             "required": {
571 |                 "model_name": (model_names,),
572 |                 "mmproj_model_name": (["auto"] + mmproj_model_names,),
573 |                 "chat_format": (["auto"] + chat_format, {"default": "auto"}),
574 |             },
575 |             "optional": {
576 |             },
577 |         }
578 | 
579 |     RETURN_TYPES = ("ImageInterrogatorModelConfig",)
580 |     RETURN_NAMES = ("image_interrogator_model",)
581 | 
582 |     FUNCTION = "create"
583 |     CATEGORY = f"{CATEGORY_NAME}/others"
584 | 
585 |     def create(self, **kwargs):
586 |         kwargs = kwargs.copy()
587 |         model_name = kwargs.get("model_name")
588 |         mmproj_model_name = kwargs.get("mmproj_model_name", "auto")
589 |         chat_format = kwargs.get("chat_format", "auto")
590 |         if chat_format == "auto":
591 |             chat_format = None
592 |         return ({
593 |             "type": "DownloaderSelect",
594 |             "model_name": model_name,
595 |             "mmproj_model_name": mmproj_model_name,
596 |             "chat_format": chat_format,
597 |         },)
598 | 
599 | 
600 | NODE_CLASS_MAPPINGS["MZ_ImageInterrogatorModelConfig_DownloaderSelect"] = MZ_ImageInterrogatorModelConfig_DownloaderSelect
601 | NODE_DISPLAY_NAME_MAPPINGS[
602 |     "MZ_ImageInterrogatorModelConfig_DownloaderSelect"] = f"{AUTHOR_NAME} - ModelConfigDownloaderSelect(ImageInterrogator)"
603 | 
604 | 
605 | class MZ_Florence2CLIPTextEncode:
606 |     @classmethod
607 |     def INPUT_TYPES(s):
608 |         return {
609 |             "required": {
610 |                 "model_name": ([
611 |                     "Florence-2-large-ft",
612 |                     "Florence-2-large",
613 |                 ],),
614 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}),
615 |                 "keep_device": ([False, True], {"default": False}),
616 |                 # "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
617 |             },
618 |             "optional": {
619 |                 "image": ("IMAGE",),
620 |                 "clip": ("CLIP", ),
621 |             },
622 |         }
623 | 
624 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
625 |     RETURN_NAMES = ("text", "conditioning",)
626 |     OUTPUT_NODE = True
627 |     FUNCTION = "encode"
628 |     CATEGORY = CATEGORY_NAME
629 | 
630 |     def encode(self, **kwargs):
631 |         kwargs = kwargs.copy()
632 |         from . import mz_transformers
633 |         importlib.reload(mz_transformers)
634 | 
635 |         return mz_transformers.florence2_node_encode(kwargs)
636 | 
637 | 
638 | NODE_CLASS_MAPPINGS["MZ_Florence2CLIPTextEncode"] = MZ_Florence2CLIPTextEncode
639 | NODE_DISPLAY_NAME_MAPPINGS[
640 |     "MZ_Florence2CLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(Florence-2)"
641 | 
642 | 
643 | class MZ_Florence2Captioner:
644 |     @classmethod
645 |     def INPUT_TYPES(s):
646 |         return {
647 |             "required": {
648 |                 "model_name": ([
649 |                     "Florence-2-large-ft",
650 |                     "Florence-2-large",
651 |                 ],),
652 |                 "directory": ("STRING", {"default": "", "placeholder": "directory"}),
653 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}),
654 |                 "batch_size": ("INT", {"default": 1, "min": 1, "max": 0xffffffffffffffff}),
655 |                 "caption_suffix": ("STRING", {"default": ".caption"}),
656 |                 "force_update": ([False, True], {"default": False}),
657 |                 "prompt_fixed_beginning": ("STRING", {"default": "", }),
658 |             },
659 |             "optional": {
660 |             },
661 |         }
662 | 
663 |     RETURN_TYPES = ("STRING",)
664 |     RETURN_NAMES = ("debug",)
665 |     OUTPUT_NODE = True
666 |     FUNCTION = "encode"
667 |     CATEGORY = CATEGORY_NAME
668 | 
669 |     def encode(self, **kwargs):
670 |         kwargs = kwargs.copy()
671 |         from . import mz_transformers
672 |         importlib.reload(mz_transformers)
673 | 
674 |         kwargs["captioner_config"] = {
675 |             "directory": kwargs["directory"],
676 |             "resolution": kwargs["resolution"],
677 |             "batch_size": kwargs["batch_size"],
678 |             "caption_suffix": kwargs["caption_suffix"],
679 |             "force_update": kwargs["force_update"],
680 |             "prompt_fixed_beginning": kwargs["prompt_fixed_beginning"],
681 |         }
682 | 
683 |         return mz_transformers.florence2_node_encode(kwargs)
684 | 
685 | 
686 | NODE_CLASS_MAPPINGS["MZ_Florence2Captioner"] = MZ_Florence2Captioner
687 | NODE_DISPLAY_NAME_MAPPINGS[
688 |     "MZ_Florence2Captioner"] = f"{AUTHOR_NAME} - Captioner(Florence-2)"
689 | 
690 | 
691 | class MZ_PaliGemmaCLIPTextEncode:
692 |     @classmethod
693 |     def INPUT_TYPES(s):
694 |         return {
695 |             "required": {
696 |                 "model_name": ([
697 |                     "paligemma-sd3-long-captioner-v2",
698 |                     "paligemma-sd3-long-captioner",
699 |                 ],),
700 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}),
701 |                 "keep_device": ([False, True], {"default": False}),
702 |                 # "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
703 |             },
704 |             "optional": {
705 |                 "image": ("IMAGE",),
706 |                 "clip": ("CLIP", ),
707 |             },
708 |         }
709 | 
710 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
711 |     RETURN_NAMES = ("text", "conditioning",)
712 |     OUTPUT_NODE = True
713 |     FUNCTION = "encode"
714 |     CATEGORY = CATEGORY_NAME
715 | 
716 |     def encode(self, **kwargs):
717 |         kwargs = kwargs.copy()
718 |         from . import mz_transformers
719 |         importlib.reload(mz_transformers)
720 | 
721 |         return mz_transformers.paligemma_node_encode(kwargs)
722 | 
723 | 
724 | NODE_CLASS_MAPPINGS["MZ_PaliGemmaCLIPTextEncode"] = MZ_PaliGemmaCLIPTextEncode
725 | NODE_DISPLAY_NAME_MAPPINGS[
726 |     "MZ_PaliGemmaCLIPTextEncode"] = f"{AUTHOR_NAME} - CLIPTextEncode(PaliGemma)"
727 | 
728 | 
729 | class MZ_PaliGemmaCaptioner:
730 |     @classmethod
731 |     def INPUT_TYPES(s):
732 |         return {
733 |             "required": {
734 |                 "model_name": ([
735 |                     "paligemma-sd3-long-captioner-v2",
736 |                     "paligemma-sd3-long-captioner",
737 |                 ],),
738 |                 "directory": ("STRING", {"default": "", "placeholder": "directory"}),
739 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 0xffffffffffffffff}),
740 |                 "caption_suffix": ("STRING", {"default": ".caption"}),
741 |                 "force_update": ([False, True], {"default": False}),
742 |                 "prompt_fixed_beginning": ("STRING", {"default": "", }),
743 |             },
744 |             "optional": {
745 |             },
746 |         }
747 | 
748 |     RETURN_TYPES = ("STRING",)
749 |     RETURN_NAMES = ("debug",)
750 |     OUTPUT_NODE = True
751 |     FUNCTION = "encode"
752 |     CATEGORY = CATEGORY_NAME
753 | 
754 |     def encode(self, **kwargs):
755 |         kwargs = kwargs.copy()
756 |         from . import mz_transformers
757 |         importlib.reload(mz_transformers)
758 |         kwargs["captioner_config"] = {
759 |             "directory": kwargs["directory"],
760 |             "resolution": kwargs["resolution"],
761 |             "caption_suffix": kwargs["caption_suffix"],
762 |             "force_update": kwargs["force_update"],
763 |             "prompt_fixed_beginning": kwargs["prompt_fixed_beginning"],
764 |         }
765 |         return mz_transformers.paligemma_node_encode(kwargs)
766 | 
767 | 
768 | NODE_CLASS_MAPPINGS["MZ_PaliGemmaCaptioner"] = MZ_PaliGemmaCaptioner
769 | NODE_DISPLAY_NAME_MAPPINGS[
770 |     "MZ_PaliGemmaCaptioner"] = f"{AUTHOR_NAME} - Captioner(PaliGemma)"
771 | 
772 | try:
773 |     from . import mz_gen_translate
774 |     mz_gen_translate.gen_translate(
775 |         NODE_DISPLAY_NAME_MAPPINGS, NODE_CLASS_MAPPINGS)
776 | except Exception as e:
777 |     print(f"Failed to generate translation: {e}")
778 | 
779 | 
780 | from .v1.init import NODE_CLASS_MAPPINGS as DEPRECATED_NODE_CLASS_MAPPINGS
781 | from .v1.init import NODE_DISPLAY_NAME_MAPPINGS as DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS
782 | 
783 | NODE_CLASS_MAPPINGS.update(DEPRECATED_NODE_CLASS_MAPPINGS)
784 | NODE_DISPLAY_NAME_MAPPINGS.update(DEPRECATED_NODE_DISPLAY_NAME_MAPPINGS)
785 | 


--------------------------------------------------------------------------------
/configs/model_zoo.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "model": "Meta-Llama-3-8B-Instruct.Q4_K_M",
  4 |     "tags": ["llama"],
  5 |     "find_path": ["gguf"],
  6 |     "file_path": "gguf/Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
  7 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=Meta-Llama-3-8B-Instruct-GGUF%2FMeta-Llama-3-8B-Instruct.Q4_K_M.gguf",
  8 |     "SHA256": "647a2b64cbcdbe670432d0502ebb2592b36dd364d51a9ef7a1387b7a4365781f"
  9 |   },
 10 |   {
 11 |     "model": "llama3_if_ai_sdpromptmkr_Q4_K_M",
 12 |     "tags": ["llama"],
 13 |     "find_path": ["gguf"],
 14 |     "file_path": "gguf/llama3_if_ai_sdpromptmkr_Q4_K_M.gguf",
 15 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llama3_if_ai_sdpromptmkr_gguf%2Fllama3_if_ai_sdpromptmkr_Q4_K_M.gguf",
 16 |     "SHA256": "8c307d788852a28a00491c005155e030bc1ce76d96327035cecf1df789bb3a0a"
 17 |   },
 18 |   {
 19 |     "model": "qwen2-7b-instruct-q5_k_m",
 20 |     "tags": ["llama"],
 21 |     "find_path": ["gguf"],
 22 |     "file_path": "gguf/qwen2-7b-instruct-q5_k_m.gguf",
 23 |     "url": "https://www.modelscope.cn/api/v1/models/qwen/Qwen2-7B-Instruct-GGUF/repo?Revision=master&FilePath=qwen2-7b-instruct-q5_k_m.gguf",
 24 |     "SHA256": "258dd2fa1bdf98b85327774e1fd36e2268c2a4b68eb9021d71106449ee4ba9d5"
 25 |   },
 26 |   {
 27 |     "model": "qwen2-0_5b-instruct-q5_k_m",
 28 |     "tags": ["llama"],
 29 |     "find_path": ["gguf"],
 30 |     "file_path": "gguf/qwen2-0_5b-instruct-q5_k_m.gguf",
 31 |     "url": "https://www.modelscope.cn/api/v1/models/qwen/Qwen2-0.5B-Instruct-GGUF/repo?Revision=master&FilePath=qwen2-0_5b-instruct-q5_k_m.gguf",
 32 |     "SHA256": "16654d862b4b19f4f92ba14e11f056d0220400f59ee74e7a204cf0bf17e64d32"
 33 |   },
 34 |   {
 35 |     "model": "omost-llama-3-8b-Q4_K_M",
 36 |     "tags": ["llama"],
 37 |     "find_path": ["gguf"],
 38 |     "file_path": "gguf/omost-llama-3-8b-Q4_K_M.gguf",
 39 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=omost-gguf%2Fomost-llama-3-8b-Q4_K_M.gguf",
 40 |     "SHA256": "f09a3237a3b8ce8b96acdb3c83543a47bf7548764a659f888ead6ec1d8cfb780"
 41 |   },
 42 |   {
 43 |     "model": "omost-phi-3-mini-128k-Q4_K_M",
 44 |     "tags": ["llama"],
 45 |     "find_path": ["gguf"],
 46 |     "file_path": "gguf/omost-phi-3-mini-128k-Q4_K_M.gguf",
 47 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=omost-gguf%2Fomost-phi-3-mini-128k-Q4_K_M.gguf",
 48 |     "SHA256": "bd42cbddf4cbc00676292ddcbdb45567afe4795b9909482578f58bc2026cb60c"
 49 |   },
 50 |   {
 51 |     "model": "Meta-Llama-3-8B.Q4_K_M",
 52 |     "tags": ["llama"],
 53 |     "find_path": ["gguf"],
 54 |     "file_path": "gguf/Meta-Llama-3-8B.Q4_K_M.gguf",
 55 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=Meta-Llama-3-8B%2FMeta-Llama-3-8B.Q4_K_M.gguf",
 56 |     "SHA256": "2a19e7532fb544cfd164c65a1b045bb415e14924890a8abee0ec84644f66f61f"
 57 |   },
 58 |   {
 59 |     "model": "Phi-3-mini-4k-instruct-q4",
 60 |     "tags": ["llama"],
 61 |     "find_path": ["gguf"],
 62 |     "file_path": "gguf/Phi-3-mini-4k-instruct-q4.gguf",
 63 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=Phi-3-mini-4k-instruct-gguf%2FPhi-3-mini-4k-instruct-q4.gguf",
 64 |     "SHA256": "1cd9a9df07350196623f93bf4829cf228959e07ad32f787b8fdd7f5956f5b9de"
 65 |   },
 66 |   {
 67 |     "model": "llama3-zh.Q4_K_M",
 68 |     "tags": ["llama"],
 69 |     "find_path": ["gguf"],
 70 |     "file_path": "gguf/llama3-zh.Q4_K_M.gguf",
 71 |     "url": "https://modelscope.cn/api/v1/models/ModelM/Llama-3-8b-zh-gguf/repo?Revision=master&FilePath=llama3-zh.Q4_K_M.gguf",
 72 |     "SHA256": "1b04ec22e4079af8064a8378d55d2cd79e43eff9faf4bbe8f341f1fd792a53cd"
 73 |   },
 74 |   {
 75 |     "model": "llama3_8b_instruct_dpo_zh-Q4_K_M",
 76 |     "tags": ["llama"],
 77 |     "find_path": ["gguf"],
 78 |     "file_path": "gguf/llama3_8b_instruct_dpo_zh-Q4_K_M.gguf",
 79 |     "url": "https://modelscope.cn/api/v1/models/shareAI/llama-3-8b-Instruct-dpo-chinese-loftq-gguf/repo?Revision=master&FilePath=llama3_8b_instruct_dpo_zh-Q4_K_M.gguf",
 80 |     "SHA256": "5231f5f119e1ef7db058211b8a140b530930a40b9b89c54db8455cc20ae3f699"
 81 |   },
 82 |   {
 83 |     "model": "qwen1_5-14b-chat-q4_k_m",
 84 |     "tags": ["llama"],
 85 |     "find_path": ["gguf"],
 86 |     "file_path": "gguf/qwen1_5-14b-chat-q4_k_m.gguf",
 87 |     "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-14B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-14b-chat-q4_k_m.gguf",
 88 |     "SHA256": "46fbff2797c39c2d6aa555db0b0b4fe3f41b712a9b45266e438aa9a5047c0563"
 89 |   },
 90 |   {
 91 |     "model": "qwen1_5-7b-chat-q4_k_m",
 92 |     "tags": ["llama"],
 93 |     "find_path": ["gguf"],
 94 |     "file_path": "gguf/qwen1_5-7b-chat-q4_k_m.gguf",
 95 |     "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-7B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-7b-chat-q4_k_m.gguf",
 96 |     "SHA256": "d7f132b1eff9ce35acf8e83ab96d2bc87eaedb68244e467bbc99e9f46a122a4c"
 97 |   },
 98 |   {
 99 |     "model": "qwen1_5-4b-chat-q4_k_m",
100 |     "tags": ["llama"],
101 |     "find_path": ["gguf"],
102 |     "file_path": "gguf/qwen1_5-4b-chat-q4_k_m.gguf",
103 |     "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-4B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-4b-chat-q4_k_m.gguf",
104 |     "SHA256": "426143ccd3241b9547c2b70c622b4f4ef3436ee07e44991bd69ad84b36cd9b9b"
105 |   },
106 |   {
107 |     "model": "qwen1_5-1_8b-chat-q4_k_m",
108 |     "tags": ["llama"],
109 |     "find_path": ["gguf"],
110 |     "file_path": "gguf/qwen1_5-1_8b-chat-q4_k_m.gguf",
111 |     "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-1.8B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-1_8b-chat-q4_k_m.gguf",
112 |     "SHA256": "702e983c77883426806a2af75d34ab3e462e1b822f9dc23b49e02280c24b2b18"
113 |   },
114 |   {
115 |     "model": "qwen1_5-0_5b-chat-q4_k_m",
116 |     "tags": ["llama"],
117 |     "find_path": ["gguf"],
118 |     "file_path": "gguf/qwen1_5-0_5b-chat-q4_k_m.gguf",
119 |     "url": "https://modelscope.cn/api/v1/models/qwen/Qwen1.5-0.5B-Chat-GGUF/repo?Revision=master&FilePath=qwen1_5-0_5b-chat-q4_k_m.gguf",
120 |     "SHA256": "92916b71d32f5afea48fb7383e3b48c5b1c111f5a59f0b83c764ea1d07fe1a3a"
121 |   },
122 |   {
123 |     "model": "llava-phi-3-mini-int4",
124 |     "tags": ["llava"],
125 |     "find_path": ["gguf"],
126 |     "file_path": "gguf/llava-phi-3-mini-int4.gguf",
127 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-phi-3-mini-gguf%2Fllava-phi-3-mini-int4.gguf",
128 |     "SHA256": "377876be20bac24488716c04824ab3a6978900679b40013b0d2585004555e658"
129 |   },
130 |   {
131 |     "model": "llava-phi-3-mini-mmproj-f16",
132 |     "tags": [
133 |       "mmproj",
134 |       "377876be20bac24488716c04824ab3a6978900679b40013b0d2585004555e658"
135 |     ],
136 |     "find_path": ["gguf"],
137 |     "file_path": "gguf/llava-phi-3-mini-mmproj-f16.gguf",
138 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-phi-3-mini-gguf%2Fllava-phi-3-mini-mmproj-f16.gguf",
139 |     "SHA256": "004fc09697203296f72321b296a8d48aade2d23e553cbfb1c1e6a0b5157a08d5"
140 |   },
141 |   {
142 |     "model": "MiniCPM-Llama3-V-2_5-Q4_K_M",
143 |     "tags": ["llava-hide"],
144 |     "find_path": ["gguf"],
145 |     "file_path": "gguf/MiniCPM-Llama3-V-2_5-Q4_K_M.gguf",
146 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=MiniCPM-Llama3-V-2_5-gguf%2FMiniCPM-Llama3-V-2_5-Q4_K_M.gguf",
147 |     "SHA256": "010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2"
148 |   },
149 |   {
150 |     "model": "MiniCPM-Llama3-V-2_5-mmproj-f16",
151 |     "tags": [
152 |       "mmproj-hide",
153 |       "010ec3ba94cb5ad2d9c8f95f46f01c6d80f83deab9df0a0831334ea45afff3e2"
154 |     ],
155 |     "find_path": ["gguf"],
156 |     "file_path": "gguf/MiniCPM-Llama3-V-2_5-mmproj-f16.gguf",
157 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=MiniCPM-Llama3-V-2_5-gguf%2FMiniCPM-Llama3-V-2_5-mmproj-f16.gguf",
158 |     "SHA256": "391d11736c3cd24a90417c47b0c88975e86918fcddb1b00494c4d715b08af13e"
159 |   },
160 |   {
161 |     "model": "llava-llama-3-8b-v1_1-int4",
162 |     "tags": ["llava"],
163 |     "find_path": ["gguf"],
164 |     "file_path": "gguf/llava-llama-3-8b-v1_1-int4.gguf",
165 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-llama-3-8b-v1_1-gguf%2Fllava-llama-3-8b-v1_1-int4.gguf",
166 |     "SHA256": "b6e1d703db0da8227fdb7127d8716bbc5049c9bf17ca2bb345be9470d217f3fc"
167 |   },
168 |   {
169 |     "model": "llava-llama-3-8b-v1_1-mmproj-f16",
170 |     "tags": [
171 |       "mmproj",
172 |       "b6e1d703db0da8227fdb7127d8716bbc5049c9bf17ca2bb345be9470d217f3fc"
173 |     ],
174 |     "find_path": ["gguf"],
175 |     "file_path": "gguf/llava-llama-3-8b-v1_1-mmproj-f16.gguf",
176 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-llama-3-8b-v1_1-gguf%2Fllava-llama-3-8b-v1_1-mmproj-f16.gguf",
177 |     "SHA256": "eb569aba7d65cf3da1d0369610eb6869f4a53ee369992a804d5810a80e9fa035"
178 |   },
179 |   {
180 |     "model": "ggml_llava1_5-7b-q4_k_m",
181 |     "tags": ["llava"],
182 |     "find_path": ["gguf"],
183 |     "file_path": "gguf/ggml_llava1_5-7b-q4_k_m.gguf",
184 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_llava-v1.5-7b%2Fggml-model-q4_k.gguf",
185 |     "SHA256": "7ac9c2f7b8d76cc7f3118cdf0953ebab7a7a9b12bad5dbe237219d2ab61765ea"
186 |   },
187 |   {
188 |     "model": "ggml_llava1_5-7b-mmproj-f16",
189 |     "tags": [
190 |       "mmproj",
191 |       "7ac9c2f7b8d76cc7f3118cdf0953ebab7a7a9b12bad5dbe237219d2ab61765ea"
192 |     ],
193 |     "find_path": ["gguf"],
194 |     "file_path": "gguf/ggml_llava1_5-7b-mmproj-f16.gguf",
195 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_llava-v1.5-7b%2Fmmproj-model-f16.gguf",
196 |     "SHA256": "b7c8ff0f58fca47d28ba92c4443adf8653f3349282cb8d9e6911f22d9b3814fe"
197 |   },
198 |   {
199 |     "model": "ggml_bakllava-1-q4_k_m",
200 |     "tags": ["llava"],
201 |     "find_path": ["gguf"],
202 |     "file_path": "gguf/ggml_bakllava-1-q4_k_m.gguf",
203 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_bakllava-1%2Fggml-model-q5_k.gguf",
204 |     "SHA256": "c93de1376be9b6977cc94d252a3d165d6059e07b528de0fa762534d9599b27d6"
205 |   },
206 |   {
207 |     "model": "ggml_bakllava-1-mmproj-f16",
208 |     "tags": [
209 |       "mmproj",
210 |       "c93de1376be9b6977cc94d252a3d165d6059e07b528de0fa762534d9599b27d6"
211 |     ],
212 |     "find_path": ["gguf"],
213 |     "file_path": "gguf/ggml_bakllava-1-mmproj-f16.gguf",
214 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=ggml_bakllava-1%2Fmmproj-model-f16.gguf",
215 |     "SHA256": "2e467eba710002839e0966d5e329942bb836eabd4e787bc713b07eff1d8ea13b"
216 |   },
217 |   {
218 |     "model": "llava_v1_6_mistral_7b_q5_k_m",
219 |     "tags": ["llava"],
220 |     "find_path": ["gguf"],
221 |     "file_path": "gguf/llava_v1_6_mistral_7b_q5_k_m.gguf",
222 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-1.6-mistral-7b-gguf%2Fllava-v1.6-mistral-7b.Q5_K_M.gguf",
223 |     "SHA256": "b1d37fc65ecb80aa8f1ce185bf4d7605bc3c5cc5bcc77a160c3a1b0377631112"
224 |   },
225 |   {
226 |     "model": "llava_v1_6_mistral_7b_mmproj_f16",
227 |     "tags": [
228 |       "mmproj",
229 |       "b1d37fc65ecb80aa8f1ce185bf4d7605bc3c5cc5bcc77a160c3a1b0377631112"
230 |     ],
231 |     "find_path": ["gguf"],
232 |     "file_path": "gguf/llava_v1_6_mistral_7b_mmproj_f16.gguf",
233 |     "url": "https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llava-1.6-mistral-7b-gguf%2Fmmproj-model-f16.gguf",
234 |     "SHA256": "00205ee8a0d7a381900cd031e43105f86aa0d8c07bf329851e85c71a26632d16"
235 |   }
236 | ]
237 | 


--------------------------------------------------------------------------------
/half_json.py:
--------------------------------------------------------------------------------
  1 | # coding=utf8
  2 | import json
  3 | from typing import Any, List, NamedTuple, Optional, Tuple
  4 | import json.decoder
  5 | from json.decoder import JSONDecodeError as PyJSONDecodeError, JSONDecoder, py_scanstring
  6 | from json.scanner import py_make_scanner
  7 | from typing import Any, Dict, NamedTuple, Optional, Tuple, Union
  8 | 
  9 | 
 10 | class FixResult(NamedTuple):
 11 |     success: bool
 12 |     line: str
 13 |     origin: bool
 14 | 
 15 | 
 16 | class JSONFixer:
 17 |     def __init__(self, max_try: int = 20, max_stack: int = 3, *, js_style: bool = False) -> None:
 18 |         self._max_try = max_try
 19 |         self._max_stack = max_stack
 20 |         self._js_style = js_style
 21 |         self.last_fix: Optional[bool] = None
 22 |         self.fix_stack: List[str] = []
 23 | 
 24 |     def fix(self, line: str, *, strict: bool = True) -> FixResult:
 25 |         try:
 26 |             json.loads(line, strict=strict)
 27 |             return FixResult(success=True, line=line, origin=True)
 28 |         except Exception:
 29 |             pass
 30 | 
 31 |         ok, new_line = self.fixwithtry(line, strict=strict)
 32 |         return FixResult(success=ok, line=new_line, origin=False)
 33 | 
 34 |     def fixwithtry(self, line: str, *, strict: bool = True) -> Tuple[bool, str]:
 35 |         if self._max_try <= 0:
 36 |             return False, line
 37 | 
 38 |         self.fix_stack = []
 39 |         self.last_fix = None
 40 | 
 41 |         ok = False
 42 |         for _ in range(self._max_try):
 43 |             ok, new_line = self.patch_line(line, strict=strict)
 44 |             if ok:
 45 |                 return ok, new_line
 46 | 
 47 |             self.last_fix = line != new_line
 48 |             if self.last_fix:
 49 |                 self.fix_stack.insert(0, new_line)
 50 |                 self.fix_stack = self.fix_stack[: self._max_stack]
 51 | 
 52 |             line = new_line
 53 |         return ok, line
 54 | 
 55 |     def patch_line(self, line: str, *, strict: bool = True) -> Tuple[bool, str]:
 56 |         result = decode_line(line, strict=strict)
 57 |         if result.success:
 58 |             return True, line
 59 | 
 60 |         if isinstance(result.exception, ValueError):
 61 |             return self.patch_value_error(line, result.err_info)
 62 | 
 63 |         if isinstance(result.exception, StopIteration):
 64 |             return self.patch_stop_iteration(line)
 65 | 
 66 |         if result.exception is None:
 67 |             return self.patch_half_parse(line, result.err_info)
 68 | 
 69 |         return False, line
 70 | 
 71 |     def patch_value_error(self, line: str, err_info: Any) -> Tuple[bool, str]:
 72 |         if err_info["error"] is None:
 73 |             return False, line
 74 | 
 75 |         error = err_info["error"]
 76 |         pos = err_info["pos"]
 77 |         nextchar = line[pos: pos + 1]
 78 |         lastchar = line[pos - 1: pos]
 79 |         nextline = line[pos:]
 80 |         lastline = line[:pos]
 81 | 
 82 |         if error == errors.StringUnterminatedString:
 83 |             return False, insert_line(line, '"', len(line))
 84 |         if error == errors.ObjectExceptKey:
 85 |             if nextchar == "":
 86 |                 return False, insert_line(line, "}", pos)
 87 |             if nextchar == ":":
 88 |                 return False, insert_line(line, '""', pos)
 89 |             if lastchar in "{," and nextchar == ",":
 90 |                 return False, remove_line(line, pos, pos + 1)
 91 |             if lastchar == "," and nextchar == "}":
 92 |                 return False, remove_line(line, pos - 1, pos)
 93 |             if nextchar in "[{":
 94 |                 return False, insert_line(line, '"":', pos)
 95 |             if self._js_style:
 96 |                 # find 'abc'
 97 |                 if nextchar == "'":
 98 |                     nextline = remove_line(nextline, 0, 1)
 99 |                     idx = nextline.find(":")
100 |                     if idx != -1 and idx != 0 and nextline[idx - 1] == "'":
101 |                         nextline = remove_line(nextline, idx - 1, idx)
102 | 
103 |                     return False, lastline + nextline
104 |                 # abc:1 --> "aabc":1
105 |                 idx = nextline.find(":")
106 |                 if idx != -1:
107 |                     line = lastline + insert_line(nextline, '"', idx)
108 |                     return False, insert_line(line, '"', pos)
109 |             # TODO process more case "
110 |             return False, insert_line(line, '"', pos)
111 |         if error == errors.ObjectExceptColon:
112 |             return False, insert_line(line, ":", pos)
113 |         if error == errors.ObjectExceptObject:
114 |             if nextchar == "":
115 |                 if lastchar == "{":
116 |                     return False, insert_line(line, "}", pos)
117 |                 return False, insert_line(line, "null}", pos)
118 |             if nextchar == "}":
119 |                 return False, insert_line(line, "null", pos)
120 |             # TODO guess more
121 |             return False, insert_line(line, '"', pos)
122 |         if error == errors.ObjectExceptComma:
123 |             if nextchar == "":
124 |                 return False, insert_line(line, "}", pos)
125 |             return False, insert_line(line, ",", pos)
126 |         if error == errors.ArrayExceptObject:
127 |             if nextchar == "," and lastchar == "[":
128 |                 return False, remove_line(line, pos, pos + 1)
129 |             if nextchar == ",":
130 |                 return False, insert_line(line, "null", pos)
131 |             if nextchar == "]":
132 |                 return False, remove_line(line, pos - 1, pos)
133 |             if nextchar == "":
134 |                 if lastchar == "[":
135 |                     return False, insert_line(line, "]", pos)
136 |                 return False, insert_line(line, "null]", pos)
137 |             # TODO guess more?
138 |             return False, insert_line(line, "{", pos)
139 |         if error == errors.ArrayExceptComma:
140 |             if len(line) == pos:
141 |                 return False, insert_line(line, "]", pos)
142 |             return False, insert_line(line, ",", pos)
143 |         # TODO unknonwn
144 |         return False, line
145 | 
146 |     def patch_stop_iteration(self, line: str) -> Tuple[bool, str]:
147 |         # TODO clean
148 |         # TODO fix
149 |         # 1. }]
150 |         # 2. ]}
151 |         # 3. constans
152 |         # 4. -
153 |         # 先 patch 完 {[]}
154 |         # TODO: process number
155 |         if line.startswith("-."):
156 |             new_line = "-0." + line[2:]
157 |             return False, new_line
158 |         # patch
159 |         left = patch_lastest_left_object_and_array(line)
160 |         if left == "":
161 |             if not self.last_fix:
162 |                 left = patch_guess_left(line)
163 | 
164 |         new_line = left + line
165 |         return False, new_line
166 | 
167 |     def patch_half_parse(self, line: str, err_info: Any) -> Tuple[bool, str]:
168 |         obj, end = err_info
169 |         nextline = line[end:].strip()
170 |         nextchar = nextline[:1]
171 |         left = patch_lastest_left_object_and_array(nextline)
172 |         # ??
173 |         if left == "":
174 |             if nextchar == ",":
175 |                 left = "["
176 |             elif nextchar == ":" and isinstance(obj, str):
177 |                 left = "{"
178 |             else:
179 |                 if not self.last_fix:
180 |                     left = patch_guess_left(nextline)
181 | 
182 |         new_line = left + line[:end] + nextline
183 |         return False, new_line
184 | 
185 | 
186 | # TODO better name
187 | def patch_lastest_left_object_and_array(line: str) -> str:
188 |     # '}]{[' --> '[{}]{['
189 |     pairs = {"}": "{", "]": "["}
190 |     breaks = "{["
191 |     left = ""
192 |     for char in line:
193 |         if char in breaks:
194 |             break
195 |         if char in pairs:
196 |             left = pairs[char] + left
197 | 
198 |     return left
199 | 
200 | 
201 | # TODO better name
202 | # TODO 改成 lastest
203 | # TODO {}}]]]] --> { not [
204 | def patch_guess_left(line: str) -> str:
205 |     miss_object = line.count("}") - line.count("{")
206 |     miss_array = line.count("]") - line.count("[")
207 |     if miss_object == miss_array == 0:
208 |         if line[-1:] == '"' and line.count('"') == 1:
209 |             return '"'
210 |     elif miss_object >= miss_array:
211 |         return "{"
212 |     else:
213 |         return "["
214 |     return ""
215 | 
216 | 
217 | def insert_line(line: str, value: str, pos: int) -> str:
218 |     return line[:pos] + value + line[pos:]
219 | 
220 | 
221 | def remove_line(line: str, start: int, end: int) -> str:
222 |     return line[:start] + line[end:]
223 | 
224 | 
225 | class JSONDecodeError:
226 |     def __init__(self, parser, message):
227 |         self.message = message
228 |         self.parser = parser
229 | 
230 |     def __eq__(self, err):
231 |         return err.parser == self.parser and self.message in err.message
232 | 
233 | 
234 | class errors:
235 |     StringInvalidUXXXXEscape = JSONDecodeError(
236 |         "py_scanstring", "Invalid \\uXXXX escape")
237 |     # 2 different case
238 |     StringUnterminatedString = JSONDecodeError(
239 |         "py_scanstring", "Unterminated string starting at")
240 |     StringInvalidControlCharacter = JSONDecodeError(
241 |         "py_scanstring", "Invalid control character")
242 |     StringInvalidEscape = JSONDecodeError("py_scanstring", "Invalid \\escape")
243 |     ObjectExceptColon = JSONDecodeError(
244 |         "JSONObject", "Expecting ':' delimiter")
245 |     ObjectExceptObject = JSONDecodeError("JSONObject", "Expecting value")
246 |     # 2 different case
247 |     ObjectExceptKey = JSONDecodeError(
248 |         "JSONObject", "Expecting property name enclosed in double quotes")
249 |     ObjectExceptComma = JSONDecodeError(
250 |         "JSONObject", "Expecting ',' delimiter")
251 |     ArrayExceptObject = JSONDecodeError("JSONArray", "Expecting value")
252 |     ArrayExceptComma = JSONDecodeError("JSONArray", "Expecting ',' delimiter")
253 | 
254 |     @classmethod
255 |     def get_decode_error(cls, parser, message):
256 |         err = JSONDecodeError(parser, message)
257 |         for _, value in cls.__dict__.items():
258 |             if isinstance(value, JSONDecodeError):
259 |                 if err == value:
260 |                     return value
261 |         return None
262 | 
263 |     """
264 |     01 先不看,不研究
265 |     02 badcase: " --> "" success
266 |     03 控制符 pass
267 |     04 unicode \\u 的 pass
268 |     05 同上
269 |     06 object 后面没有跟随 " , badcase: {abc":1} --> {"abc":1}
270 |     07 object key 后面没有 : , badcase: {"abc"1} --> {"abc":1}
271 |     08 object 开始检测 Value 收到 StopIteration
272 |     08.1 要么后面没有了
273 |     08.2 要么后面不是 "/{/[/n[ull]/t[rue]/f[alse]/number/NaN/Infinity/-Infinity 开头的东西
274 |     -- 08.1 后面补上 null}
275 |     -- 08.2 无脑补一个 "
276 |     09 object 解析完一个 pair 后,下一个不是}, 期待一个 ','
277 |        badcase {"k":1"s":2}
278 |     10 在 09 的基础上解析完{"k":1, 发现下一个不是 ", 这个后面再优化(暂时和 06 一致)
279 |        badcase {"k":1,x":2}
280 |     11 array 开始检测 Value 收到 StopIteration
281 |     11.1 要么后面没有了,补上]
282 |     11.2 同 08.2,无脑补一个{ 看看
283 |     12 array 解析完前一个 object, 需要一个 ,
284 |         这里 nextchar 既不是 ] 也不是, 代表这个 nextchar 的 end 也已经+1 了，所以减 2
285 |     """
286 | 
287 | 
288 | def errmsg_inv(e: ValueError) -> Dict[str, Any]:
289 |     assert isinstance(e, PyJSONDecodeError)
290 |     parser = e.__dict__.get("parser", "")
291 |     errmsg = e.msg
292 |     localerr = errors.get_decode_error(parser, errmsg)
293 |     return {
294 |         "parsers": e.__dict__.get("parsers", []),
295 |         "error": localerr,
296 |         "lineno": e.lineno,
297 |         "colno": e.colno,
298 |         "pos": e.pos,
299 |     }
300 | 
301 | 
302 | def record_parser_name(parser: Any) -> Any:
303 |     def new_parser(*args: Any, **kwargs: Any) -> Any:
304 |         try:
305 |             return parser(*args, **kwargs)
306 |         except Exception as e:
307 |             if "parser" not in e.__dict__:
308 |                 e.__dict__["parser"] = parser.__name__
309 |             if "parsers" not in e.__dict__:
310 |                 e.__dict__["parsers"] = []
311 |             e.__dict__["parsers"].append(parser.__name__)
312 |             raise e
313 | 
314 |     return new_parser
315 | 
316 | 
317 | def make_decoder(*, strict: bool = True) -> JSONDecoder:
318 |     json.decoder.scanstring = record_parser_name(py_scanstring)
319 | 
320 |     decoder = JSONDecoder(strict=strict)
321 |     decoder.parse_object = record_parser_name(decoder.parse_object)
322 |     decoder.parse_array = record_parser_name(decoder.parse_array)
323 |     decoder.parse_string = record_parser_name(py_scanstring)
324 |     decoder.parse_object = record_parser_name(decoder.parse_object)
325 | 
326 |     decoder.scan_once = py_make_scanner(decoder)
327 |     return decoder
328 | 
329 | 
330 | decoder = make_decoder()
331 | decoder_unstrict = make_decoder(strict=False)
332 | 
333 | 
334 | class DecodeResult(NamedTuple):
335 |     success: bool
336 |     exception: Optional[Exception]
337 |     err_info: Optional[Union[Dict[str, Any], Tuple[Any, Any]]]
338 | 
339 | 
340 | def decode_line(line: str, *, strict: bool = True) -> DecodeResult:
341 |     try:
342 |         obj, end = (decoder if strict else decoder_unstrict).scan_once(line, 0)
343 |         ok = end == len(line)
344 |         return DecodeResult(success=ok, exception=None, err_info=(obj, end))
345 |     except StopIteration as e:
346 |         return DecodeResult(success=False, exception=e, err_info=None)
347 |     except ValueError as e:
348 |         err_info = errmsg_inv(e)
349 |         return DecodeResult(success=False, exception=e, err_info=err_info)
350 | 


--------------------------------------------------------------------------------
/mz_gen_translate.py:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | import os
  4 | import json
  5 | import folder_paths
  6 | from pathlib import Path
  7 | COMFY_PATH = Path(folder_paths.__file__).parent
  8 | 
  9 | 
 10 | ZH_Replace_Map = {
 11 |     "mmproj_model_name": "mmproj模型名称",
 12 |     "model_name": "模型名称",
 13 |     "llama_cpp_model": "llama.cpp模型",
 14 |     "mmproj_model": "mmproj模型",
 15 |     "resolution": "分辨率",
 16 |     "sd_format": "SD格式化",
 17 |     "ImageInterrogator": "图像反推",
 18 |     "image_interrogator_model": "图像反推模型",
 19 |     "image_interrogator": "图像反推",
 20 |     "image": "图像",
 21 |     "download_source": "下载源",
 22 |     "prompt_version": "提示词版本",
 23 |     "style_presets": "风格预设",
 24 |     "keep_device": "模型常驻显存",
 25 |     "llama_cpp_options": "llama.cpp可选配置",
 26 |     "Options": "可选配置",
 27 |     "LLamaCPPModelConfigManualSelect": "llama.cpp模型配置手动选择",
 28 |     "LLamaCPP": "llama.cpp",
 29 |     "CLIPTextEncode": "CLIP文本编码器",
 30 |     "clip": "CLIP",
 31 |     "conditioning": "条件",
 32 |     "customize_instruct": "自定义指令",
 33 |     "CustomizeInstruct": "自定义指令",
 34 |     "deprecated": "已废弃",
 35 |     "ModelConfigManualSelect": "手动模型选择器",
 36 |     "ModelConfigDownloaderSelect": "预设模型下载器",
 37 |     "captioner_config": "打标器配置",
 38 |     "post_processing": "后处理",
 39 | }
 40 | 
 41 | 
 42 | def gen_translate(NODE_DISPLAY_NAME_MAPPINGS={}, NODE_CLASS_MAPPINGS={}):
 43 |     translation_dirs = [
 44 |         os.path.join(COMFY_PATH, "custom_nodes",
 45 |                      "AIGODLIKE-COMFYUI-TRANSLATION", "zh-CN", "Nodes"),
 46 |         os.path.join(COMFY_PATH, "custom_nodes",
 47 |                      "AIGODLIKE-ComfyUI-Translation", "zh-CN", "Nodes"),
 48 |     ]
 49 |     translation_dir = translation_dirs[0]
 50 |     for dir in translation_dirs:
 51 |         if os.path.exists(dir):
 52 |             translation_dir = dir
 53 |             break
 54 |     translation_config = os.path.join(
 55 |         translation_dir, "ComfyUI_MinusZone.translate.json")
 56 |     if os.path.exists(translation_dir):
 57 |         if not os.path.exists(translation_config):
 58 |             with open(translation_config, "w", encoding="utf-8") as f:
 59 |                 f.write("{}")
 60 | 
 61 |         if os.path.exists(translation_config):
 62 |             translate_config = "{}"
 63 |             with open(translation_config, "r", encoding="utf-8") as f:
 64 |                 translate_config = f.read()
 65 |             nodes = json.loads(translate_config)
 66 |             for key in NODE_DISPLAY_NAME_MAPPINGS:
 67 |                 if key not in nodes:
 68 |                     nodes[key] = {}
 69 | 
 70 |                 title = NODE_DISPLAY_NAME_MAPPINGS[key]
 71 |                 for k, v in ZH_Replace_Map.items():
 72 |                     title = title.replace(k, v)
 73 |                 nodes[key]["title"] = title
 74 | 
 75 |                 if key in NODE_CLASS_MAPPINGS:
 76 |                     node = NODE_CLASS_MAPPINGS[key]
 77 |                     node_INPUT_TYPES = node.INPUT_TYPES()
 78 |                     node_INPUT_TYPES_required = node_INPUT_TYPES.get(
 79 |                         "required", {})
 80 |                     nodes[key]["widgets"] = {}
 81 |                     for widget_name, _ in node_INPUT_TYPES_required.items():
 82 |                         widget_name_zh = widget_name
 83 |                         for k, v in ZH_Replace_Map.items():
 84 |                             widget_name_zh = widget_name_zh.replace(k, v)
 85 |                         nodes[key]["widgets"][widget_name] = widget_name_zh
 86 | 
 87 |                     node_INPUT_TYPES_optional = node_INPUT_TYPES.get(
 88 |                         "optional", {})
 89 |                     nodes[key]["inputs"] = {}
 90 |                     for widget_name, _ in node_INPUT_TYPES_optional.items():
 91 |                         widget_name_zh = widget_name
 92 |                         for k, v in ZH_Replace_Map.items():
 93 |                             widget_name_zh = widget_name_zh.replace(k, v)
 94 |                         nodes[key]["inputs"][widget_name] = widget_name_zh
 95 | 
 96 |                     try:
 97 |                         node_RETURN_NAMES = node.RETURN_NAMES
 98 |                         nodes[key]["outputs"] = {}
 99 |                         for widget_name in node_RETURN_NAMES:
100 |                             widget_name_zh = widget_name
101 |                             for k, v in ZH_Replace_Map.items():
102 |                                 widget_name_zh = widget_name_zh.replace(k, v)
103 |                             nodes[key]["outputs"][widget_name] = widget_name_zh
104 |                     except:
105 |                         pass
106 | 
107 |             with open(translation_config, "w", encoding="utf-8") as f:
108 |                 f.write(json.dumps(nodes, indent=4, ensure_ascii=False))
109 | 
110 |     else:
111 |         print("No translation dir found!")
112 | 


--------------------------------------------------------------------------------
/mz_llama_core_nodes.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import importlib
  3 | import json
  4 | import os
  5 | from . import mz_prompt_utils
  6 | from . import mz_llama_cpp
  7 | from . import mz_prompts
  8 | 
  9 | 
 10 | def get_schema_base_type(t):
 11 |     return {
 12 |         "type": t,
 13 |     }
 14 | 
 15 | 
 16 | def get_schema_obj(keys_type={}, required=[]):
 17 |     item = {}
 18 |     for key, value in keys_type.items():
 19 |         if type(value) == str:
 20 |             value = get_schema_base_type(value)
 21 |         item[key] = value
 22 |     return {
 23 |         "type": "object",
 24 |         "properties": item,
 25 |         "required": required
 26 |     }
 27 | 
 28 | 
 29 | def get_schema_array(item_type="string"):
 30 |     if type(item_type) == str:
 31 |         item_type = get_schema_base_type(item_type)
 32 |     return {
 33 |         "type": "array",
 34 |         "items": item_type,
 35 |     }
 36 | 
 37 | 
 38 | high_quality_prompt = "((high quality:1.4), (best quality:1.4), (masterpiece:1.4), (8K resolution), (2k wallpaper))"
 39 | style_presets_prompt = {
 40 |     "none": "",
 41 |     "high_quality": high_quality_prompt,
 42 |     "photography": f"{high_quality_prompt}, (RAW photo, best quality), (realistic, photo-realistic:1.2), (bokeh, cinematic shot, dynamic composition, incredibly detailed, sharpen, details, intricate detail, professional lighting, film lighting, 35mm, anamorphic, lightroom, cinematography, bokeh, lens flare, film grain, HDR10, 8K)",
 43 |     "illustration": f"{high_quality_prompt}, ((detailed matte painting, intricate detail, splash screen, complementary colors), (detailed),(intricate details),illustration,an extremely delicate and beautiful,ultra-detailed,highres,extremely detailed)",
 44 | }
 45 | 
 46 | 
 47 | def get_style_presets():
 48 |     return [
 49 |         "none",
 50 |         "high_quality",
 51 |         "photography",
 52 |         "illustration",
 53 |     ]
 54 | 
 55 | 
 56 | def llama_cpp_node_encode(args_dict):
 57 |     importlib.reload(mz_prompts)
 58 |     importlib.reload(mz_llama_cpp)
 59 |     # importlib.reload(mz_prompt_utils)
 60 | 
 61 |     model_config = args_dict.get("llama_cpp_model", {})
 62 |     mz_prompt_utils.Utils.print_log(f"model_config: {model_config}")
 63 | 
 64 |     chat_format = model_config.get("chat_format", None)
 65 | 
 66 |     select_model_type = model_config.get("type", "ManualSelect")
 67 |     if select_model_type == "ManualSelect":
 68 |         model_file = model_config.get("model_path", "auto")
 69 |         if model_file == "auto":
 70 |             model_file = mz_prompt_utils.Utils.get_auto_model_fullpath(
 71 |                 "Meta-Llama-3-8B-Instruct.Q4_K_M")
 72 | 
 73 |             if "llama-3" in mz_llama_cpp.get_llama_cpp_chat_handlers():
 74 |                 chat_format = "llama-3"
 75 | 
 76 |     elif select_model_type == "DownloaderSelect":
 77 |         model_name = model_config.get("model_name", "")
 78 |         model_file = mz_prompt_utils.Utils.get_auto_model_fullpath(
 79 |             model_name)
 80 |     else:
 81 |         raise Exception("Unknown select_model_type")
 82 | 
 83 |     mz_prompt_utils.Utils.print_log(f"model_file: {model_file}")
 84 | 
 85 |     text = args_dict.get("text", "")
 86 |     style_presets = args_dict.get("style_presets", "")
 87 |     options = args_dict.get("llama_cpp_options", {})
 88 |     keep_device = args_dict.get("keep_device", False)
 89 |     seed = args_dict.get("seed", -1)
 90 |     options["seed"] = seed
 91 |     options["chat_format"] = chat_format
 92 | 
 93 |     customize_instruct = args_dict.get("customize_instruct", None)
 94 |     mz_prompt_utils.Utils.print_log(
 95 |         f"customize_instruct: {customize_instruct}")
 96 |     try:
 97 |         schema = None
 98 |         if customize_instruct is None:
 99 |             schema = get_schema_obj(
100 |                 keys_type={
101 |                     "description": get_schema_base_type("string"),
102 |                     "long_prompt": get_schema_base_type("string"),
103 |                     "main_color_word": get_schema_base_type("string"),
104 |                     "camera_angle_word": get_schema_base_type("string"),
105 |                     "style_words": get_schema_array("string"),
106 |                     "subject_words": get_schema_array("string"),
107 |                     "light_words": get_schema_array("string"),
108 |                     "environment_words": get_schema_array("string"),
109 |                 },
110 |                 required=[
111 |                     "description",
112 |                     "long_prompt",
113 |                     "main_color_word",
114 |                     "camera_angle_word",
115 |                     "style_words",
116 |                     "subject_words",
117 |                     "light_words",
118 |                     "environment_words",
119 |                 ]
120 |             )
121 | 
122 |             question = f"IDEA: {style_presets},{text}"
123 |             if style_presets == "none":
124 |                 question = f"IDEA: {text}"
125 | 
126 |             system_prompt = mz_prompts.Beautify_Prompt + mz_prompts.Long_prompt + "\n"
127 | 
128 |         else:
129 | 
130 |             system_prompt = customize_instruct.get("system", "")
131 |             question = customize_instruct.get("instruct", "%text%")
132 | 
133 |             system_prompt = system_prompt.replace("%text%", text)
134 |             question = question.replace("%text%", text)
135 | 
136 |             mz_prompt_utils.Utils.print_log(f"system_prompt: {system_prompt}")
137 |             mz_prompt_utils.Utils.print_log(f"question: {question}")
138 | 
139 |         if schema is not None:
140 |             response_text = mz_llama_cpp.llama_cpp_simple_interrogator_to_json(
141 |                 model_file=model_file,
142 |                 system=system_prompt,
143 |                 question=question,
144 |                 schema=schema,
145 |                 options=options,
146 |             )
147 |             try:
148 |                 response_json = json.loads(response_text)
149 |             except Exception as e:
150 |                 from . import half_json
151 |                 print("json.loads failed, try fix response_text: ", response_text)
152 |                 json_fixer = half_json.JSONFixer()
153 |                 fix_resp = json_fixer.fix(response_text)
154 |                 if fix_resp.success:
155 |                     print("fix success, use fixed response_text: ", fix_resp.line)
156 |                     response_json = json.loads(fix_resp.line)
157 |                 else:
158 |                     raise e
159 | 
160 |             mz_prompt_utils.Utils.print_log(
161 |                 f"response_json: {json.dumps(response_json, indent=2)}")
162 | 
163 |             responses = []
164 |             for key, value in response_json.items():
165 |                 if type(value) == list:
166 |                     # 去除开头.和空格
167 |                     value = [v.strip().lstrip(".") for v in value]
168 |                     # 去除空字符串
169 |                     value = [v for v in value if v != ""]
170 |                     if len(value) > 0:
171 |                         responses.append(f"({', '.join(value)})")
172 | 
173 |                 else:
174 |                     if value != "":
175 |                         responses.append(f"({value})")
176 | 
177 |             response = ", ".join(responses)
178 |         else:
179 |             response = mz_llama_cpp.llama_cpp_simple_interrogator(
180 |                 model_file=model_file,
181 |                 system=system_prompt,
182 |                 question=question,
183 |                 options=options,
184 |             )
185 | 
186 |             start_str = customize_instruct.get("start_str", "")
187 |             if start_str != "" and response.find(start_str) != -1:
188 |                 full_response_list = response.split(start_str)
189 |                 # 删除第一个元素
190 |                 full_response_list.pop(0)
191 |                 response = start_str.join(full_response_list)
192 | 
193 |             end_str = customize_instruct.get("end_str", "")
194 |             if end_str != "" and response.find(end_str) != -1:
195 |                 full_response_list = response.split(end_str)
196 |                 # 删除最后一个元素
197 |                 full_response_list.pop()
198 |                 response = end_str.join(full_response_list)
199 | 
200 |         if keep_device is False:
201 |             mz_llama_cpp.freed_gpu_memory(model_file=model_file)
202 | 
203 |         # 去除换行
204 |         while response.find("\n") != -1:
205 |             response = response.replace("\n", " ")
206 | 
207 |         # 句号换成逗号
208 |         while response.find(".") != -1:
209 |             response = response.replace(".", ",")
210 | 
211 |         # 去除多余逗号
212 |         while response.find(",,") != -1:
213 |             response = response.replace(",,", ",")
214 |         while response.find(", ,") != -1:
215 |             response = response.replace(", ,", ",")
216 | 
217 |         response = mz_prompt_utils.Utils.prompt_zh_to_en(response)
218 | 
219 |         style_presets_prompt_text = style_presets_prompt.get(style_presets, "")
220 | 
221 |         if style_presets_prompt_text != "":
222 |             response = f"{style_presets_prompt_text}, {response}"
223 | 
224 |     except Exception as e:
225 |         mz_llama_cpp.freed_gpu_memory(model_file=model_file)
226 |         raise e
227 | 
228 |     conditionings = None
229 |     clip = args_dict.get("clip", None)
230 |     if clip is not None:
231 |         conditionings = mz_prompt_utils.Utils.a1111_clip_text_encode(
232 |             clip, response, )
233 | 
234 |     return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)}
235 | 
236 | 
237 | def image_interrogator_captioner(args_dict):
238 |     import PIL.Image as Image
239 |     captioner_config = args_dict.get("captioner_config", {})
240 |     directory = captioner_config.get("directory", None)
241 |     force_update = captioner_config.get("force_update", False)
242 |     caption_suffix = captioner_config.get("caption_suffix", "")
243 |     retry_keyword = captioner_config.get("retry_keyword", "")
244 |     retry_keywords = retry_keyword.split(",")
245 | 
246 |     retry_keywords = [k.strip() for k in retry_keywords]
247 |     retry_keywords = [k for k in retry_keywords if k != ""]
248 | 
249 |     pre_images = []
250 |     for root, dirs, files in os.walk(directory):
251 |         for file in files:
252 |             if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"):
253 |                 image_path = os.path.join(root, file)
254 |                 base_file_path = os.path.splitext(image_path)[0]
255 |                 caption_file = os.path.join(
256 |                     root, base_file_path + caption_suffix)
257 |                 if os.path.exists(caption_file) and force_update is False:
258 |                     continue
259 | 
260 |                 pre_images.append({
261 |                     "image_path": image_path,
262 |                     "caption_path": caption_file
263 |                 })
264 | 
265 |     result = []
266 | 
267 |     pb = mz_prompt_utils.Utils.progress_bar(len(pre_images))
268 |     for i in range(len(pre_images)):
269 |         pre_image = pre_images[i]
270 |         image_path = pre_image["image_path"]
271 |         caption_file = pre_image["caption_path"]
272 | 
273 |         onec_args_dict = args_dict.copy()
274 |         del onec_args_dict["captioner_config"]
275 | 
276 |         pil_image = Image.open(image_path)
277 |         onec_args_dict["image"] = mz_prompt_utils.Utils.pil2tensor(pil_image)
278 | 
279 |         if i < len(pre_images) - 1:
280 |             onec_args_dict["keep_device"] = True
281 | 
282 |         pb.update(
283 |             i,
284 |             len(pre_images),
285 |             pil_image.copy(),
286 |         )
287 | 
288 |         response = image_interrogator_node_encode(onec_args_dict)
289 |         response = response.get("result", ())[0]
290 |         response = response.strip()
291 |         is_retry = response == ""
292 |         for k in retry_keywords:
293 |             if response.find(k) != -1:
294 |                 print(f"存在需要重试的关键词 ; Retry keyword found: {k}")
295 |                 is_retry = True
296 |                 break
297 | 
298 |         mz_prompt_utils.Utils.print_log(
299 |             "\n\nonec_args_dict: ", onec_args_dict)
300 |         if is_retry:
301 |             for retry_n in range(5):
302 |                 print(f"Retry {retry_n+1}...")
303 |                 onec_args_dict["seed"] = onec_args_dict["seed"] + 1
304 |                 response = image_interrogator_node_encode(onec_args_dict)
305 |                 response = response.get("result", ())[0]
306 |                 response = response.strip()
307 |                 is_retry = response == ""
308 |                 for k in retry_keywords:
309 |                     if response.find(k) != -1:
310 |                         print(f"存在需要重试的关键词 ; Retry keyword found: {k}")
311 |                         is_retry = True
312 |                         break
313 | 
314 |                 if is_retry is False:
315 |                     break
316 |             if is_retry:
317 |                 print(f"重试失败,图片被跳过 ; Retry failed")
318 |                 response = ""
319 | 
320 |         if response != "":
321 |             with open(caption_file, "w") as f:
322 |                 prompt_fixed_beginning = captioner_config.get(
323 |                     "prompt_fixed_beginning", "")
324 |                 f.write(prompt_fixed_beginning + response)
325 | 
326 |         result.append(response)
327 | 
328 |         # mz_prompt_webserver.show_toast_success(
329 |         #     f"提示词保存成功(prompt saved successfully): {caption_file}",
330 |         #     1000,
331 |         # )
332 | 
333 |     return result
334 | 
335 | 
336 | def image_interrogator_node_encode(args_dict):
337 |     importlib.reload(mz_prompts)
338 | 
339 |     captioner_config = args_dict.get("captioner_config", None)
340 |     if captioner_config is not None:
341 |         image_interrogator_captioner(args_dict)
342 |         # raise Exception(
343 |         #     "图片批量反推任务已完成 ; Image batch reverse push task completed")
344 |         return {"ui": {"string": ["图片批量反推任务已完成 ; Image batch reverse push task completed",]}, "result": ("", None)}
345 | 
346 |     model_config = args_dict.get("image_interrogator_model", {})
347 | 
348 |     chat_format = model_config.get("chat_format", None)
349 |     llama_cpp_model = model_config.get("llama_cpp_model", "auto")
350 |     mmproj_model = model_config.get("mmproj_model", "auto")
351 | 
352 |     select_model_type = model_config.get("type", "ManualSelect")
353 |     if select_model_type == "ManualSelect":
354 |         llama_cpp_model = model_config.get("model_path", "auto")
355 |         if llama_cpp_model == "auto":
356 |             llama_cpp_model = mz_prompt_utils.Utils.get_auto_model_fullpath(
357 |                 "ggml_llava1_5-7b-q4_k_m")
358 |         else:
359 |             llama_cpp_model = os.path.join(
360 |                 mz_prompt_utils.Utils.get_gguf_models_path(), llama_cpp_model)
361 | 
362 |         if mmproj_model.endswith("auto"):
363 |             llama_cpp_model_sha256 = mz_prompt_utils.Utils.file_sha256(
364 |                 llama_cpp_model)
365 | 
366 |             mmproj_model_name = mz_prompt_utils.Utils.get_model_zoo(
367 |                 tags_filter=llama_cpp_model_sha256)
368 |             if len(mmproj_model_name) == 0:
369 |                 mmproj_model_name = None
370 |             else:
371 |                 mmproj_model_name = mmproj_model_name[0].get("model", None)
372 | 
373 |             if mmproj_model_name is None:
374 |                 mz_prompt_utils.Utils.print_log(
375 |                     "llama_cpp_model_sha256: ", llama_cpp_model_sha256)
376 |                 raise Exception(
377 |                     "未能自动找到对应的mmproj文件 ; Failed to automatically find the corresponding mmproj file.")
378 |             else:
379 |                 pass
380 | 
381 |             mmproj_model = mz_prompt_utils.Utils.get_auto_model_fullpath(
382 |                 mmproj_model_name)
383 |         else:
384 |             # mmproj_model = os.path.join(
385 |             #     mz_prompt_utils.Utils.get_gguf_models_path(), mmproj_model)
386 |             pass
387 | 
388 |     elif select_model_type == "DownloaderSelect":
389 |         model_name = model_config.get("model_name")
390 |         llama_cpp_model = mz_prompt_utils.Utils.get_auto_model_fullpath(
391 |             model_name)
392 | 
393 |         mmproj_model = model_config.get("mmproj_model_name", "auto")
394 | 
395 |         mmproj_model_name = mmproj_model
396 |         if mmproj_model == "auto":
397 |             llama_cpp_model_sha256 = mz_prompt_utils.Utils.file_sha256(
398 |                 llama_cpp_model)
399 | 
400 |             mz_prompt_utils.Utils.print_log(
401 |                 "llama_cpp_model_sha256: ", llama_cpp_model_sha256)
402 | 
403 |             mmproj_model_name = mz_prompt_utils.Utils.get_model_zoo(
404 |                 tags_filter=llama_cpp_model_sha256)
405 |             if len(mmproj_model_name) == 0:
406 |                 mmproj_model_name = None
407 |             else:
408 |                 mmproj_model_name = mmproj_model_name[0].get("model", None)
409 | 
410 |             if mmproj_model_name is None:
411 |                 raise Exception(
412 |                     "未能自动找到对应的mmproj文件 ; Failed to automatically find the corresponding mmproj file")
413 | 
414 |         mmproj_model = mz_prompt_utils.Utils.get_auto_model_fullpath(
415 |             mmproj_model_name)
416 | 
417 |     else:
418 |         raise Exception("Unknown select_model_type")
419 | 
420 |     image = args_dict.get("image", None)
421 |     image = mz_prompt_utils.Utils.tensor2pil(image)
422 | 
423 |     resolution = args_dict.get("resolution", 512)
424 |     keep_device = args_dict.get("keep_device", False)
425 |     seed = args_dict.get("seed", -1)
426 |     options = args_dict.get("llama_cpp_options", {})
427 |     options["seed"] = seed
428 |     options["chat_format"] = chat_format
429 | 
430 |     image = mz_prompt_utils.Utils.resize_max(image, resolution, resolution)
431 | 
432 |     customize_instruct = args_dict.get("customize_instruct", None)
433 |     if customize_instruct is None:
434 |         # system_prompt = mz_prompts.GPT4VImageCaptioner_System
435 |         # question = mz_prompts.GPT4VImageCaptioner_Prompt
436 | 
437 |         # system_prompt = mz_prompts.M_ImageCaptioner2_System
438 |         # question = mz_prompts.M_ImageCaptioner2_Prompt
439 | 
440 |         system_prompt = "You are an assistant who perfectly describes images."
441 |         question = "Describe this image in detail please."
442 |     else:
443 |         system_prompt = customize_instruct.get("system", "")
444 |         question = customize_instruct.get("instruct", "")
445 | 
446 |     mz_prompt_utils.Utils.print_log(f"mmproj_model: {mmproj_model}")
447 |     response = mz_llama_cpp.llava_cpp_simple_interrogator(
448 |         model_file=llama_cpp_model,
449 |         mmproj_file=mmproj_model,
450 |         image=image,
451 |         options=options,
452 |         system=system_prompt,
453 |         question=question,
454 |     )
455 |     response = response.strip()
456 |     if response is not None and response != "":
457 | 
458 |         if args_dict.get("post_processing", False):
459 | 
460 |             # 双引号换成空格
461 |             response = response.replace("\"", " ")
462 |             # 中括号换成空格
463 |             response = response.replace("[", " ")
464 |             response = response.replace("]", " ")
465 | 
466 |             # 括号换成空格
467 |             response = response.replace("(", " ")
468 |             response = response.replace(")", " ")
469 | 
470 |             # 去除多余空格
471 |             while response.find("  ") != -1:
472 |                 response = response.replace("  ", " ")
473 | 
474 |             # 从第一个为英文字母的地方开始截取
475 |             for i in range(len(response)):
476 |                 if response[i].isalpha():
477 |                     response = response[i:]
478 |                     break
479 | 
480 |             response = response.strip()
481 |             schema = get_schema_obj(
482 |                 keys_type={
483 |                     "short_describes": get_schema_base_type("string"),
484 |                     "subject_tags": get_schema_array("string"),
485 |                     "action_tags": get_schema_array("string"),
486 |                     "light_tags": get_schema_array("string"),
487 |                     "scene_tags": get_schema_array("string"),
488 |                     "mood_tags": get_schema_array("string"),
489 |                     "style_tags": get_schema_array("string"),
490 |                     "object_tags": get_schema_array("string"),
491 |                 },
492 |                 required=[
493 |                     "short_describes",
494 |                     "subject_tags",
495 |                     "action_tags",
496 |                     "lights_tags",
497 |                     "scenes_tags",
498 |                     "moods_tags",
499 |                     "styles_tags",
500 |                     "objects_tags",
501 |                 ]
502 |             )
503 |             response_json_str = mz_llama_cpp.llama_cpp_simple_interrogator_to_json(
504 |                 model_file=llama_cpp_model,
505 |                 system=mz_prompts.ImageCaptionerPostProcessing_System,
506 |                 question=f"Content: {response}",
507 |                 schema=schema,
508 |                 options=options,
509 |             )
510 | 
511 |             try:
512 |                 response_json = json.loads(response_json_str)
513 |             except Exception as e:
514 |                 from . import half_json
515 |                 print("json.loads failed, try fix response_json_str: ",
516 |                       response_json_str)
517 |                 json_fixer = half_json.JSONFixer()
518 |                 fix_resp = json_fixer.fix(response_json_str)
519 |                 if fix_resp.success:
520 |                     print("fix success, use fixed response_json_str: ",
521 |                           fix_resp.line)
522 |                     response_json = json.loads(fix_resp.line)
523 |                 else:
524 |                     raise e
525 | 
526 |             responses = []
527 | 
528 |             def pure_words(text: str) -> bool:
529 |                 number_of_spaces = text.count(" ")
530 |                 if number_of_spaces > 2:
531 |                     return False
532 |                 for c in text:
533 |                     if not c.isalpha() and c != "-" and c != "_" and c != " ":
534 |                         return False
535 | 
536 |                 return True
537 | 
538 |             for key, value in response_json.items():
539 |                 if type(value) == list:
540 | 
541 |                     # 去除开头.和空格
542 |                     value = [v.strip().lstrip(".") for v in value]
543 |                     # 去除空字符串
544 |                     value = [v for v in value if v != ""]
545 | 
546 |                     # 去除带有空格和标点符号的字符串
547 |                     value = [
548 |                         v for v in value if pure_words(v)]
549 | 
550 |                     # 空格换成下划线
551 |                     value = [v.replace(" ", "_") for v in value]
552 | 
553 |                     # 首字母小写
554 |                     value = [v.lower() for v in value]
555 | 
556 |                     if len(value) > 0:
557 |                         responses.append(f"{', '.join(value)}")
558 | 
559 |             description = response_json.get("short_describes", "")
560 |             if description != "":
561 |                 responses.append(f"{description}")
562 | 
563 |             # 对response进行去重
564 |             response = ", ".join(responses)
565 | 
566 |     if keep_device is False:
567 |         mz_llama_cpp.freed_gpu_memory(model_file=llama_cpp_model)
568 | 
569 |     # return response
570 | 
571 |     conditionings = None
572 |     clip = args_dict.get("clip", None)
573 |     if clip is not None:
574 |         conditionings = mz_prompt_utils.Utils.a1111_clip_text_encode(
575 |             clip, response, )
576 | 
577 |     return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)}
578 | 


--------------------------------------------------------------------------------
/mz_llama_cpp.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import importlib
  3 | import json
  4 | import os
  5 | import shutil
  6 | import subprocess
  7 | import sys
  8 | import torch
  9 | try:
 10 |     from . import mz_prompt_utils
 11 |     from . import mz_prompt_webserver
 12 | except ImportError:
 13 |     pass
 14 | 
 15 | 
 16 | def check_llama_cpp_requirements():
 17 |     min_version = "0.2.63"
 18 |     last_version = "0.2.76"
 19 |     try:
 20 |         from llama_cpp import Llama
 21 |         import llama_cpp
 22 |         if llama_cpp.__version__ < min_version:
 23 |             raise ImportError("llama_cpp version is too low. (llama_cpp版本过低)")
 24 |     except ImportError:
 25 |         py_version = ""
 26 |         if sys.version_info.major == 3:
 27 |             if sys.version_info.minor == 10:
 28 |                 py_version = "310"
 29 |             elif sys.version_info.minor == 11:
 30 |                 py_version = "311"
 31 |             elif sys.version_info.minor == 12:
 32 |                 py_version = "312"
 33 | 
 34 |         if py_version == "":
 35 |             raise ValueError(
 36 |                 f"Please upgrade python to version 3.10 or above. (找不到对应的python版本) 当前版本:{sys.version_info.major}.{sys.version_info.minor}")
 37 | 
 38 |         cuda_version = ""
 39 |         if torch.cuda.is_available():
 40 |             cuda_version = "cu" + torch.version.cuda.replace(".", "")
 41 |             if cuda_version not in ["cu121", "cu122", "cu123"]:
 42 |                 cuda_version = "cu121"
 43 |                 print(
 44 |                     f"Warning: The current version of cuda is not supported. (警告: 当前cuda版本不支持) {torch.version.cuda} (默认使用cu121)")
 45 |         else:
 46 |             cuda_version = "cpu"
 47 | 
 48 |         # https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.63-cu123/llama_cpp_python-0.2.63-cp310-cp310-linux_x86_64.whl
 49 | 
 50 |         system_name = "linux_x86_64"
 51 |         if sys.platform == "linux":
 52 |             if sys.maxsize > 2**32:
 53 |                 system_name = "linux_x86_64"
 54 |             else:
 55 |                 system_name = "linux_i686"
 56 |         elif sys.platform == "darwin":
 57 |             # 请手动前往https://github.com/abetlen/llama-cpp-python/releases 下载对应的whl文件后 使用pip install {whl文件路径}安装
 58 |             raise ValueError(
 59 |                 "Please download the corresponding whl file from https://github.com/abetlen/llama-cpp-python/releases and install it using pip install {whl file path} (请手动前往https://github.com/abetlen/llama-cpp-python/releases 下载对应的whl文件后 使用pip install {whl文件路径}安装)")
 60 |         elif sys.platform == "win32":
 61 |             system_name = "win_amd64"
 62 |         else:
 63 |             raise ValueError(
 64 |                 f"Unsupported platform. (不支持的平台) {sys.platform} (请手动前往https://github.com/abetlen/llama-cpp-python/releases 下载对应的whl文件后 使用pip install 'whl文件路径' 安装)")
 65 | 
 66 |         wheel_name = f"llama_cpp_python-{last_version}-cp{py_version}-cp{py_version}-{system_name}.whl"
 67 |         if cuda_version == "cpu":
 68 |             wheel_url = f"https://github.com/abetlen/llama-cpp-python/releases/download/v{last_version}/{wheel_name}"
 69 |         else:
 70 |             wheel_url = f"https://github.com/abetlen/llama-cpp-python/releases/download/v{last_version}-{cuda_version}/{wheel_name}"
 71 | 
 72 |         print(f"pip install {wheel_url}")
 73 |         modelscope_url = f"https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llama-cpp-python-win%2F{cuda_version}%2F{wheel_name}"
 74 |         if mz_prompt_utils.Utils.testDownloadSpeed(wheel_url):
 75 |             ret = subprocess.run([
 76 |                 sys.executable, "-m",
 77 |                 "pip", "install", wheel_url], check=True)
 78 |         elif mz_prompt_utils.Utils.testDownloadSpeed(modelscope_url):
 79 |             import tempfile
 80 |             whl_download_file = os.path.join(
 81 |                 tempfile.gettempdir(), wheel_name)
 82 |             mz_prompt_utils.Utils.download_file(
 83 |                 modelscope_url, whl_download_file)
 84 |             print(f"pip install {whl_download_file}")
 85 |             ret = subprocess.run([
 86 |                 sys.executable, "-m",
 87 |                 "pip", "install", whl_download_file], check=True)
 88 |         else:
 89 | 
 90 |             # 兜底方案
 91 |             modelscope_url = f"https://www.modelscope.cn/api/v1/models/wailovet/MinusZoneAIModels/repo?Revision=master&FilePath=llama-cpp-python-win%2Fcu121%2Fllama_cpp_python-0.2.76-cp310-cp310-win_amd64.whl"
 92 |             if py_version == "310" and system_name == "win_amd64" and mz_prompt_utils.Utils.testDownloadSpeed(modelscope_url):
 93 |                 import tempfile
 94 |                 whl_download_file = os.path.join(
 95 |                     tempfile.gettempdir(), wheel_name)
 96 |                 mz_prompt_utils.Utils.download_file(
 97 |                     modelscope_url, whl_download_file)
 98 |                 print(f"pip install {whl_download_file}")
 99 |                 ret = subprocess.run([
100 |                     sys.executable, "-m",
101 |                     "pip", "install", whl_download_file], check=True)
102 |             else:
103 |                 ret = subprocess.run([
104 |                     sys.executable, "-m",
105 |                     "pip", "install", wheel_url], check=True)
106 | 
107 |         if ret.returncode != 0:
108 |             raise ValueError("Failed to install llama_cpp. (安装llama_cpp失败)")
109 |         else:
110 |             print("llama_cpp installed successfully. (llama_cpp安装成功)")
111 | 
112 | 
113 | def get_llama_cpp_chat_handlers():
114 |     check_llama_cpp_requirements()
115 |     from llama_cpp import llama_chat_format
116 |     chat_handlers = llama_chat_format.LlamaChatCompletionHandlerRegistry()._chat_handlers
117 |     chat_handlers = list(chat_handlers.keys())
118 | 
119 |     return chat_handlers
120 | 
121 | 
122 | def LlamaCppOptions():
123 |     # chat_handlers = ["auto"] + get_llama_cpp_chat_handlers()
124 |     return {
125 |         # "chat_format": chat_handlers,
126 |         "n_ctx": 2048,
127 |         "n_batch": 2048,
128 |         "n_threads": 0,
129 |         "n_threads_batch": 0,
130 |         "split_mode": ["LLAMA_SPLIT_MODE_NONE", "LLAMA_SPLIT_MODE_LAYER", "LLAMA_SPLIT_MODE_ROW",],
131 |         "main_gpu": 0,
132 |         "n_gpu_layers": -1,
133 |         "max_tokens": 4096,
134 |         "temperature": 1.6,
135 |         "top_p": 0.95,
136 |         "min_p": 0.05,
137 |         "typical_p": 1.0,
138 |         "stop": "",
139 |         "frequency_penalty": 0.0,
140 |         "presence_penalty": 0.0,
141 |         "repeat_penalty": 1.1,
142 |         "top_k": 50,
143 |         "tfs_z": 1.0,
144 |         "mirostat_mode": ["none", "mirostat", "mirostat_v2"],
145 |         "mirostat_tau": 5.0,
146 |         "mirostat_eta": 0.1,
147 |     }
148 | 
149 | 
150 | def freed_gpu_memory(model_file):
151 |     check_llama_cpp_requirements()
152 | 
153 |     model_and_opt = mz_prompt_utils.Utils.cache_get(
154 |         f"llama_cpp_model_and_opt_{model_file}")
155 | 
156 |     if model_and_opt is None:
157 |         return 0
158 | 
159 |     model = model_and_opt.get("model")
160 | 
161 |     del model
162 |     torch.cuda.empty_cache()
163 | 
164 |     mz_prompt_utils.Utils.cache_set(
165 |         f"llama_cpp_model_and_opt_{model_file}", None)
166 | 
167 | 
168 | def llama_cpp_messages(model_file, mmproj_file=None, messages=[], options={}):
169 |     if options is None:
170 |         options = {}
171 |     options = options.copy()
172 |     print(f"Find local model file: {model_file}")
173 |     init_opts = ["n_ctx", "logits_all", "chat_format", "n_gpu_layers"]
174 | 
175 |     check_llama_cpp_requirements()
176 | 
177 |     from llama_cpp import Llama
178 |     import llama_cpp
179 | 
180 |     model_and_opt = mz_prompt_utils.Utils.cache_get(
181 |         f"llama_cpp_model_and_opt_{model_file}")
182 | 
183 |     is_opts_changed = False
184 | 
185 |     mz_prompt_utils.Utils.print_log(
186 |         f"llama_cpp_messages chat_format: {options.get('chat_format', None)}")
187 | 
188 |     if model_and_opt is not None:
189 |         for opt in init_opts:
190 |             if model_and_opt.get("options").get(opt) != options.get(opt):
191 |                 is_opts_changed = True
192 |                 break
193 | 
194 |     if model_and_opt is None or is_opts_changed:
195 |         print("llama_cpp: loading model...")
196 |         verbose = False
197 |         if os.environ.get("MZ_DEV", None) is not None:
198 |             verbose = True
199 | 
200 |         split_mode_int = llama_cpp.LLAMA_SPLIT_MODE_LAYER
201 |         if options.get("split_mode", "LLAMA_SPLIT_MODE_LAYER") == "LLAMA_SPLIT_MODE_ROW":
202 |             split_mode_int = llama_cpp.LLAMA_SPLIT_MODE_ROW
203 |         elif options.get("split_mode", "LLAMA_SPLIT_MODE_LAYER") == "LLAMA_SPLIT_MODE_NONE":
204 |             split_mode_int = llama_cpp.LLAMA_SPLIT_MODE_NONE
205 | 
206 |         chat_handler = None
207 |         if mmproj_file is not None:
208 |             # 显存不释放,暂时全局缓存
209 |             chat_handler = mz_prompt_utils.Utils.cache_get(
210 |                 f"llama_cpp_messages_mmproj_file_{mmproj_file}"
211 |             )
212 |             if chat_handler is None:
213 |                 mz_prompt_utils.Utils.print_log(
214 |                     f"llama_cpp_messages mmproj_file: {mmproj_file}")
215 |                 from llama_cpp.llama_chat_format import Llava15ChatHandler
216 |                 chat_handler = Llava15ChatHandler(clip_model_path=mmproj_file)
217 |                 mz_prompt_utils.Utils.cache_set(
218 |                     f"llama_cpp_messages_mmproj_file_{mmproj_file}", chat_handler)
219 | 
220 |         model = Llama(
221 |             model_path=model_file,
222 |             n_gpu_layers=options.get("n_gpu_layers", -1),
223 |             n_ctx=options.get("n_ctx", 2048),
224 |             n_batch=options.get("n_batch", 2048),
225 |             n_threads=options.get("n_threads", 0) if options.get(
226 |                 "n_threads", 0) > 0 else None,
227 |             n_threads_batch=options.get("n_threads_batch", 0) if options.get(
228 |                 "n_threads_batch", 0) > 0 else None,
229 |             main_gpu=options.get("main_gpu", 0),
230 |             split_mode=split_mode_int,
231 |             logits_all=options.get("logits_all", False),
232 |             chat_handler=chat_handler,
233 |             chat_format=options.get("chat_format", None),
234 |             seed=options.get("seed", -1),
235 |             verbose=verbose,
236 |         )
237 |         model_and_opt = {
238 |             "model": model,
239 |             "chat_handler": chat_handler,
240 |             "options": options,
241 |         }
242 |         mz_prompt_utils.Utils.cache_set(
243 |             f"llama_cpp_model_and_opt_{model_file}", model_and_opt)
244 | 
245 |     model = model_and_opt.get("model")
246 |     model.set_seed(options.get("seed", -1))
247 |     model.reset()
248 | 
249 |     response_format = options.get("response_format", None)
250 |     mz_prompt_utils.Utils.print_log(
251 |         f"======================================================LLAMA_CPP======================================================")
252 |     # mz_utils.Utils.print_log("llama_cpp messages:", messages)
253 |     mz_prompt_utils.Utils.print_log(
254 |         "llama_cpp response_format:", response_format)
255 | 
256 |     stop = options.get("stop", "")
257 |     if stop == "":
258 |         stop = []
259 |     else:
260 |         # 所有转译序列
261 |         escape_sequence = {
262 |             "\\n": "\n",
263 |             "\\t": "\t",
264 |             "\\r": "\r",
265 |             "\\b": "\b",
266 |             "\\f": "\f",
267 |         }
268 |         for key, value in escape_sequence.items():
269 |             stop = stop.replace(key, value)
270 |         stop = stop.split(",")
271 | 
272 |     mirostat_mode = 0
273 |     if options.get("mirostat_mode", "none") == "mirostat":
274 |         mirostat_mode = 1
275 |     elif options.get("mirostat_mode", "none") == "mirostat_v2":
276 |         mirostat_mode = 2
277 | 
278 |     try:
279 |         debuf_messages = copy.deepcopy(messages)
280 |         for dindex in range(len(debuf_messages)):
281 |             if debuf_messages[dindex].get("role") == "user":
282 |                 debuf_messages_content = debuf_messages[dindex].get(
283 |                     "content", [])
284 |                 if type(debuf_messages_content) != list:
285 |                     continue
286 |                 for ccindex in range(len(debuf_messages_content)):
287 |                     if debuf_messages_content[ccindex].get("type") == "image_url":
288 |                         debuf_messages[dindex]["content"][ccindex]["image_url"] = debuf_messages[
289 |                             dindex]["content"][ccindex]["image_url"] = None
290 | 
291 |         mz_prompt_utils.Utils.print_log(
292 |             f"LLAMA_CPP messages: {json.dumps(debuf_messages, indent=4, ensure_ascii=False)}")
293 |     except Exception as e:
294 |         mz_prompt_utils.Utils.print_log(
295 |             f"LLAMA_CPP messages: {messages}")
296 |     output = model.create_chat_completion(
297 |         messages=messages,
298 |         response_format=response_format,
299 |         max_tokens=options.get("max_tokens", 4096),
300 |         temperature=options.get("temperature", 1.6),
301 |         top_p=options.get("top_p", 0.95),
302 |         min_p=options.get("min_p", 0.05),
303 |         typical_p=options.get("typical_p", 1.0),
304 |         stop=stop,
305 |         frequency_penalty=options.get("frequency_penalty", 0.0),
306 |         presence_penalty=options.get("presence_penalty", 0.0),
307 |         repeat_penalty=options.get("repeat_penalty", 1.1),
308 |         top_k=options.get("top_k", 50),
309 |         tfs_z=options.get("tfs_z", 1.0),
310 |         mirostat_mode=mirostat_mode,
311 |         mirostat_tau=options.get("mirostat_tau", 5.0),
312 |         mirostat_eta=options.get("mirostat_eta", 0.1),
313 |         tools=options.get("tools", None),
314 |         tool_choice=options.get("tool_choice", None),
315 |     )
316 |     mz_prompt_utils.Utils.print_log(f"LLAMA_CPP: \n{output}")
317 |     choices = output.get("choices", [])
318 |     # mz_utils.Utils.print_log(f"LLAMA_CPP choices: \n{choices}")
319 |     if len(choices) == 0:
320 |         return ""
321 | 
322 |     result = choices[0].get("message", {}).get("content", "")
323 |     return result
324 | 
325 | 
326 | def llama_cpp_simple_interrogator_to_json(model_file, use_system=True, system=None, question="", schema={}, options={}):
327 |     options = options.copy()
328 |     if system is None:
329 |         system = ""
330 |         messages = [
331 |             {
332 |                 "role": "user",
333 |                 "content": question
334 |             },
335 |         ]
336 |     elif use_system:
337 |         messages = [
338 |             {
339 |                 "role": "system",
340 |                 "content": system
341 |             },
342 |             {
343 |                 "role": "user",
344 |                 "content": question
345 |             },
346 |         ]
347 |     else:
348 |         messages = [
349 |             {
350 |                 "role": "user",
351 |                 "content": f"{system}\nIf you understand what I am saying, please reply 'OK' and do not reply with unnecessary content."
352 |             },
353 |             {
354 |                 "role": "assistant",
355 |                 "content": "OK"
356 |             },
357 |             {
358 |                 "role": "user",
359 |                 "content": question
360 |             },
361 |         ]
362 | 
363 |     response_format = {
364 |         "type": "json_object",
365 |         "schema": schema,
366 |     }
367 | 
368 |     options["response_format"] = response_format
369 | 
370 |     # if options.get("chat_format", None) is None:
371 |     #     options["chat_format"] = "llama-2"
372 | 
373 |     result = llama_cpp_messages(model_file, None, messages, options=options)
374 |     result = result.replace("\n", " ")
375 |     return result
376 | 
377 | 
378 | def llama_cpp_simple_interrogator(model_file, use_system=True, system=None, question="", options={}):
379 |     if options is None:
380 |         options = {}
381 |     options = options.copy()
382 |     if system is None:
383 |         system = ""
384 |         messages = [
385 |             {
386 |                 "role": "user",
387 |                 "content": question
388 |             },
389 |         ]
390 |     elif use_system:
391 |         messages = [
392 |             {
393 |                 "role": "system",
394 |                 "content": system
395 |             },
396 |             {
397 |                 "role": "user",
398 |                 "content": question
399 |             },
400 |         ]
401 |     else:
402 |         messages = [
403 |             {
404 |                 "role": "user",
405 |                 "content": f"{system}\nIf you understand what I am saying, please reply 'OK' and do not reply with unnecessary content."
406 |             },
407 |             {
408 |                 "role": "assistant",
409 |                 "content": "OK"
410 |             },
411 |             {
412 |                 "role": "user",
413 |                 "content": question
414 |             },
415 |         ]
416 |     return llama_cpp_messages(model_file, None, messages, options=options)
417 | 
418 | 
419 | def llava_cpp_messages(model_file, mmproj_file, messages, options={}):
420 |     if options is None:
421 |         options = {}
422 | 
423 |     options = options.copy()
424 |     options["logits_all"] = True
425 |     options["n_ctx"] = max(4096, options.get("n_ctx", 4096))
426 | 
427 |     # if options.get("chat_format", None) is None:
428 |     #     options["chat_format"] = "llama-2"
429 |     return llama_cpp_messages(model_file, mmproj_file, messages, options)
430 | 
431 | 
432 | def llava_cpp_simple_interrogator(
433 |         model_file, mmproj_file, system="You are an assistant who perfectly describes images.", question="Describe this image in detail please.",
434 |         image=None, options={}):
435 |     if options is None:
436 |         options = {}
437 |     options = options.copy()
438 |     check_llama_cpp_requirements()
439 | 
440 |     content = []
441 |     if image is not None:
442 |         data_uri = mz_prompt_utils.Utils.pil_image_to_base64(image)
443 |         content.append({"type": "image_url", "image_url": {"url": data_uri}})
444 | 
445 |     content.append({"type": "text", "text": question})
446 | 
447 |     check_llama_cpp_requirements()
448 | 
449 |     return llava_cpp_messages(model_file, mmproj_file, [
450 |         {
451 |             "role": "system",
452 |             "content": system,
453 |         },
454 |         {
455 |             "role": "user",
456 |             "content": content,
457 |         },
458 |     ], options=options)
459 | 


--------------------------------------------------------------------------------
/mz_openaiapi.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import json
  4 | import subprocess
  5 | 
  6 | from . import mz_prompt_utils
  7 | from . import mz_llama_cpp
  8 | from . import mz_llama_core_nodes
  9 | from . import mz_prompts
 10 | 
 11 | 
 12 | def zhipu_json_fix(input_data):
 13 |     if type(input_data) == dict:
 14 |         if "Items" in input_data:
 15 |             return input_data["Items"]
 16 |         else:
 17 |             for key, value in input_data.items():
 18 |                 input_data[key] = zhipu_json_fix(value)
 19 |             return input_data
 20 | 
 21 |     elif type(input_data) == list:
 22 |         for i in range(len(input_data)):
 23 |             input_data[i] = zhipu_json_fix(input_data[i])
 24 |         return input_data
 25 | 
 26 |     else:
 27 |         return input_data
 28 | 
 29 | 
 30 | def query_beautify_prompt_text(args_dict):
 31 |     try:
 32 |         from openai import OpenAI
 33 |         import openai
 34 |     except ImportError:
 35 |         subprocess.check_call(
 36 |             [sys.executable, "-m", "pip", "install", "openai"])
 37 |         from openai import OpenAI
 38 |         import openai
 39 | 
 40 |     api_key = args_dict.get("api_key", None)
 41 |     base_url = args_dict.get("base_url", None)
 42 | 
 43 |     text = args_dict.get("text", "")
 44 |     style_presets = args_dict.get("style_presets", "")
 45 | 
 46 |     if api_key is None:
 47 |         raise ValueError("api_key is required")
 48 | 
 49 |     client = OpenAI(
 50 |         api_key=api_key,
 51 |         default_headers={"x-foo": "true"}
 52 |     )
 53 | 
 54 |     if base_url is not None:
 55 |         client.base_url = base_url
 56 | 
 57 |     model_name = args_dict.get("model_name", "gpt-3.5-turbo")
 58 | 
 59 |     options = args_dict.get("options", {})
 60 | 
 61 |     customize_instruct = args_dict.get("customize_instruct", None)
 62 |     mz_prompt_utils.Utils.print_log(
 63 |         f"customize_instruct: {customize_instruct}")
 64 | 
 65 |     schema = None
 66 |     if customize_instruct is None:
 67 |         schema = mz_llama_core_nodes.get_schema_obj(
 68 |             keys_type={
 69 |                 "description": mz_llama_core_nodes.get_schema_base_type("string"),
 70 |                 "long_prompt": mz_llama_core_nodes.get_schema_base_type("string"),
 71 |                 "main_color_word": mz_llama_core_nodes.get_schema_base_type("string"),
 72 |                 "camera_angle_word": mz_llama_core_nodes.get_schema_base_type("string"),
 73 |                 "style_words": mz_llama_core_nodes.get_schema_array("string"),
 74 |                 "subject_words": mz_llama_core_nodes.get_schema_array("string"),
 75 |                 "light_words": mz_llama_core_nodes.get_schema_array("string"),
 76 |                 "environment_words": mz_llama_core_nodes.get_schema_array("string"),
 77 |             },
 78 |             required=[
 79 |                 "description",
 80 |                 "long_prompt",
 81 |                 "main_color_word",
 82 |                 "camera_angle_word",
 83 |                 "style_words",
 84 |                 "subject_words",
 85 |                 "light_words",
 86 |                 "environment_words",
 87 |             ]
 88 |         )
 89 | 
 90 |         question = f"IDEA: {style_presets},{text}"
 91 |         if style_presets == "none":
 92 |             question = f"IDEA: {text}"
 93 | 
 94 |         system_prompt = mz_prompts.Beautify_Prompt + mz_prompts.Long_prompt + "\n"
 95 | 
 96 |     else:
 97 | 
 98 |         system_prompt = customize_instruct.get("system", "")
 99 |         question = customize_instruct.get("instruct", "%text%")
100 | 
101 |         system_prompt = system_prompt.replace("%text%", text)
102 |         question = question.replace("%text%", text)
103 | 
104 |         mz_prompt_utils.Utils.print_log(f"system_prompt: {system_prompt}")
105 |         mz_prompt_utils.Utils.print_log(f"question: {question}")
106 |         # print(f"system_prompt: {system_prompt}")
107 |         # print(f"question: {question}")
108 | 
109 |     output = None
110 |     if schema is not None:
111 | 
112 |         output = client.chat.completions.create(
113 |             model=model_name,
114 |             messages=[
115 |                 {"role": "system", "content": system_prompt},
116 |                 {"role": "user", "content": f"{question}\ncall beautify_prompt_text function to get the result."},
117 |             ],
118 |             tools=[{
119 |                 "type": "function",
120 |                 "function": {
121 |                     "name": "beautify_prompt_text",
122 |                     "description": "required Beautify Prompt Text",
123 |                     "parameters": schema,
124 |                 }
125 |             }],
126 |             tool_choice={"type": "function",
127 |                          "function": {"name": "beautify_prompt_text"}},
128 |         )
129 | 
130 |         if type(output) == str:
131 |             raise Exception(
132 |                 f"返回结果格式异常 ; Return result format exception : {output}")
133 | 
134 |         tool_calls = output.choices[0].message.tool_calls
135 | 
136 |         functions_args = {}
137 |         for tool_call in tool_calls:
138 |             function_name = tool_call.function.name
139 |             function_args = json.loads(tool_call.function.arguments)
140 |             functions_args[function_name] = function_args
141 |         beautify_prompt_text_result = functions_args.get(
142 |             "beautify_prompt_text", {})
143 | 
144 |         mz_prompt_utils.Utils.print_log(
145 |             f"beautify_prompt_text_result: {beautify_prompt_text_result}")
146 | 
147 |         beautify_prompt_text_result = zhipu_json_fix(
148 |             beautify_prompt_text_result)
149 |         results = []
150 |         for key, value in beautify_prompt_text_result.items():
151 |             if type(value) == list:
152 |                 value = [item for item in value if item != ""]
153 |                 value = [mz_prompt_utils.Utils.prompt_zh_to_en(item)
154 |                          for item in value]
155 |                 if len(value) == 0:
156 |                     continue
157 |                 item_str = ", ".join(value)
158 |                 results.append(f"({item_str})")
159 |             else:
160 |                 if value == "":
161 |                     continue
162 |                 value = mz_prompt_utils.Utils.prompt_zh_to_en(value)
163 |                 results.append(f"({value})")
164 | 
165 |         full_response = ", ".join(results)
166 | 
167 |     else:
168 |         output = client.chat.completions.create(
169 |             model=model_name,
170 |             messages=[
171 |                 {"role": "system", "content": system_prompt},
172 |                 {"role": "user", "content": question},
173 |             ],
174 |         )
175 | 
176 |         if type(output) == str:
177 |             raise Exception(
178 |                 f"返回结果格式异常 ; Return result format exception : {output}")
179 | 
180 |         full_response = output.choices[0].message.content
181 | 
182 |     mz_prompt_utils.Utils.print_log(
183 |         f"OPENAI_OUTPUT: \n{output.model_dump_json()}")
184 |     # print(output.model_dump_json())
185 | 
186 |     # 去除换行
187 |     while full_response.find("\n") != -1:
188 |         full_response = full_response.replace("\n", " ")
189 |     # 句号换成逗号
190 |     while full_response.find(".") != -1:
191 |         full_response = full_response.replace(".", ",")
192 |     # 去除多余逗号
193 |     while full_response.find(",,") != -1:
194 |         full_response = full_response.replace(",,", ",")
195 |     while full_response.find(", ,") != -1:
196 |         full_response = full_response.replace(", ,", ",")
197 |     style_presets_prompt_text = mz_llama_core_nodes.style_presets_prompt.get(
198 |         style_presets, "")
199 |     if style_presets_prompt_text != "":
200 |         full_response = f"{style_presets_prompt_text}, {full_response}"
201 |     return full_response
202 | 


--------------------------------------------------------------------------------
/mz_prompt_webserver.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | import asyncio
 4 | import uuid
 5 | from . import mz_prompt_utils
 6 | 
 7 | 
 8 | web_msg_pool = {
 9 | 
10 | }
11 | 
12 | 
13 | def show_toast_success(message, duration=2000):
14 |     send_message({
15 |         "type": "toast-success",
16 |         "message": message,
17 |         "duration": duration
18 |     })
19 | 
20 | 
21 | def send_message(data):
22 |     global web_msg_pool
23 |     for key in web_msg_pool:
24 |         web_msg_pool[key].append(data)
25 | 
26 | 
27 | def start_server():
28 |     try:
29 |         global web_msg_pool
30 |         from aiohttp import web
31 |         import server
32 |         app: web.Application = server.PromptServer.instance.app
33 | 
34 |         async def message(request):
35 |             muuid = uuid.uuid4()
36 |             try:
37 |                 ws = web.WebSocketResponse()
38 | 
39 |                 await ws.prepare(request)
40 | 
41 |                 web_msg_pool[muuid] = []
42 |                 async for msg in ws:
43 |                     if msg.type == web.WSMsgType.text:
44 |                         if len(web_msg_pool[muuid]) == 0:
45 |                             continue
46 |                         else:
47 |                             await ws.send_json(web_msg_pool[muuid])
48 |                             web_msg_pool[muuid] = []
49 |                     elif msg.type == web.WSMsgType.close:
50 |                         break
51 | 
52 |                 del web_msg_pool[muuid]
53 |                 mz_prompt_utils.Utils.print_log(f"connection {muuid} closed")
54 |                 return ws
55 |             except Exception as e:
56 |                 mz_prompt_utils.Utils.print_log(e)
57 |                 del web_msg_pool[muuid]
58 |                 return ws
59 | 
60 |         if not any([route.get_info().get("path", "") == "/mz_webapi/message" for route in app.router.routes()]):
61 |             mz_prompt_utils.Utils.print_log("add route /mz_webapi/message")
62 |             app.router.add_get("/mz_webapi/message", message)
63 |         else:
64 |             mz_prompt_utils.Utils.print_log(
65 |                 "route /mz_webapi/message is exist")
66 | 
67 |     except Exception as e:
68 |         print(e)
69 | 


--------------------------------------------------------------------------------
/mz_prompts.py:
--------------------------------------------------------------------------------
 1 | Beautify_Prompt = """
 2 | Stable Diffusion is an AI art generation model similar to DALLE-2.
 3 | Below is a list of prompts that can be used to generate images with Stable Diffusion:
 4 | - portait of a homer simpson archer shooting arrow at forest monster, front game card, drark, marvel comics, dark, intricate, highly detailed, smooth, artstation, digital illustration by ruan jia and mandy jurgens and artgerm and wayne barlowe and greg rutkowski and zdislav beksinski
 5 | - pirate, concept art, deep focus, fantasy, intricate, highly detailed, digital painting, artstation, matte, sharp focus, illustration, art by magali villeneuve, chippy, ryan yee, rk post, clint cearley, daniel ljunggren, zoltan boros, gabor szikszai, howard lyon, steve argyle, winona nelson
 6 | - ghost inside a hunted room, art by lois van baarle and loish and ross tran and rossdraws and sam yang and samdoesarts and artgerm, digital art, highly detailed, intricate, sharp focus, Trending on Artstation HQ, deviantart, unreal engine 5, 4K UHD image
 7 | - red dead redemption 2, cinematic view, epic sky, detailed, concept art, low angle, high detail, warm lighting, volumetric, godrays, vivid, beautiful, trending on artstation, by jordan grimmer, huge scene, grass, art greg rutkowski
 8 | - a fantasy style portrait painting of rachel lane / alison brie hybrid in the style of francois boucher oil painting unreal 5 daz. rpg portrait, extremely detailed artgerm greg rutkowski alphonse mucha greg hildebrandt tim hildebrandt
 9 | - athena, greek goddess, claudia black, art by artgerm and greg rutkowski and magali villeneuve, bronze greek armor, owl crown, d & d, fantasy, intricate, portrait, highly detailed, headshot, digital painting, trending on artstation, concept art, sharp focus, illustration
10 | - closeup portrait shot of a large strong female biomechanic woman in a scenic scifi environment, intricate, elegant, highly detailed, centered, digital painting, artstation, concept art, smooth, sharp focus, warframe, illustration, thomas kinkade, tomasz alen kopera, peter mohrbacher, donato giancola, leyendecker, boris vallejo
11 | - ultra realistic illustration of steve urkle as the hulk, intricate, elegant, highly detailed, digital painting, artstation, concept art, smooth, sharp focus, illustration, art by artgerm and greg rutkowski and alphonse mucha
12 | I want you to write me a list of detailed prompts exactly about the idea written after IDEA. Follow the structure of the example prompts. This means a very short description of the scene, followed by modifiers divided by commas to alter the mood, style, lighting, and more.
13 | """
14 | 
15 | Long_prompt = "Long prompt version should consist of 3 to 5 sentences. Long prompt version must sepcify the color, shape, texture or spatial relation of the included objects. DO NOT generate sentences that describe any atmosphere!!!  The language of reply is English only!!!"
16 | 
17 | Standardize_Prompt = """
18 | Extract the content about Stable Diffusion style from the following input and combine it into a json array. Note that the output will be directly used in the program. 
19 | Please output the standardized json content.
20 | """
21 | 
22 | 
23 | GPT4VImageCaptioner_System = """
24 | As an AI image tagging expert, please provide precise tags for these images to enhance CLIP model's understanding of the content. 
25 | """
26 | 
27 | # 来自https://github.com/jiayev/GPT4V-Image-Captioner
28 | GPT4VImageCaptioner_Prompt = """
29 | Employ succinct keywords or phrases or sentence, steering clear of elaborate sentences and extraneous conjunctions. 
30 | Prioritize the tags by relevance. 
31 | Your tags should capture key elements such as the main subject, setting, artistic style, composition, image quality, color tone, filter, and camera specifications, and any other tags crucial for the image. 
32 | When tagging photos of people, include specific details like gender, nationality, attire, actions, pose, expressions, accessories, makeup, composition type, age, etc. 
33 | For other image categories, apply appropriate and common descriptive tags as well. 
34 | Recognize and tag any celebrities, well-known landmark or IPs if clearly featured in the image. 
35 | Your tags should be accurate, non-duplicative, and within a 20-75 word count range. 
36 | These tags will use for image re-creation, so the closer the resemblance to the original image, the better the tag quality. 
37 | Tags should be comma-separated.
38 | """
39 | 
40 | 
41 | M_ImageCaptioner_System = """
42 | Long prompt version should consist of 3 to 5 sentences. Long prompt version must sepcify the color, shape, texture or spatial relation of the included objects. DO NOT generate sentences that describe any atmosphere!!!  
43 | """
44 | 
45 | M_ImageCaptioner_Prompt = """
46 | Describe this image in detail please.
47 | The language of reply is English only!!!
48 | Starts with "In the image," 
49 | """
50 | 
51 | 
52 | M_ImageCaptioner2_System = """
53 | You are an assistant who perfectly describes images. 
54 | """
55 | 
56 | M_ImageCaptioner2_Prompt = """
57 | Describe this image in detail please.
58 | The language of reply is English only!!!
59 | Starts with "In the image," 
60 | """
61 | 
62 | 
63 | ImageCaptionerPostProcessing_System = """
64 | I want you to write me a detailed list of tips for Content.
65 | Write a very short description of the scene and put it in the 'short_describes' field
66 | Write complete [moods, styles, lights, elements, objects] of the word array and put it in the '$_tags' field
67 | Don't include anything that isn't in Content.
68 | The language of reply is English only!!!
69 | """
70 | 


--------------------------------------------------------------------------------
/mz_transformers.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | import shutil
  4 | import subprocess
  5 | import traceback
  6 | 
  7 | 
  8 | def transformers_captioner(args_dict, myfunc):
  9 |     from . import mz_prompt_utils
 10 |     import PIL.Image as Image
 11 |     captioner_config = args_dict.get("captioner_config", {})
 12 |     directory = captioner_config.get("directory", None)
 13 |     force_update = captioner_config.get("force_update", False)
 14 |     caption_suffix = captioner_config.get("caption_suffix", "")
 15 |     retry_keyword = captioner_config.get("retry_keyword", "")
 16 |     batch_size = captioner_config.get("batch_size", 1)
 17 |     retry_keywords = retry_keyword.split(",")
 18 | 
 19 |     retry_keywords = [k.strip() for k in retry_keywords]
 20 |     retry_keywords = [k for k in retry_keywords if k != ""]
 21 | 
 22 |     pre_images = []
 23 |     # print("directory:", directory)
 24 |     for root, dirs, files in os.walk(directory):
 25 |         for file in files:
 26 |             if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png") or file.endswith(".webp"):
 27 |                 image_path = os.path.join(root, file)
 28 |                 base_file_path = os.path.splitext(image_path)[0]
 29 |                 caption_file = os.path.join(
 30 |                     root, base_file_path + caption_suffix)
 31 |                 if os.path.exists(caption_file) and force_update is False:
 32 |                     continue
 33 | 
 34 |                 pre_images.append({
 35 |                     "image_path": image_path,
 36 |                     "caption_path": caption_file
 37 |                 })
 38 | 
 39 |     result = []
 40 | 
 41 |     # print(f"Total images: {len(pre_images)} : {json.dumps(pre_images, indent=4)}")
 42 |     print(f"Total images: {len(pre_images)}")
 43 | 
 44 |     pb = mz_prompt_utils.Utils.progress_bar(len(pre_images))
 45 |     images_batch = []
 46 |     for i in range(len(pre_images)):
 47 |         # print(f"Processing image {i+1}/{len(pre_images)}")
 48 |         try:
 49 |             pre_image = pre_images[i]
 50 |             image_path = pre_image["image_path"]
 51 |             caption_file = pre_image["caption_path"]
 52 | 
 53 |             onec_args_dict = args_dict.copy()
 54 |             del onec_args_dict["captioner_config"]
 55 | 
 56 |             pil_image = Image.open(image_path)
 57 |             images_batch.append({
 58 |                 "image_path": image_path,
 59 |                 "pil_image": pil_image
 60 |             })
 61 | 
 62 |             if len(images_batch) < batch_size:
 63 |                 if i < len(pre_images) - 1:
 64 |                     continue
 65 | 
 66 |             if i < len(pre_images) - 1:
 67 |                 onec_args_dict["keep_device"] = True
 68 | 
 69 |             pil_images = []
 70 |             for j in range(len(images_batch)):
 71 |                 pil_images.append(images_batch[j]["pil_image"])
 72 | 
 73 |             # onec_args_dict["image"] = mz_prompt_utils.Utils.pil2tensor(
 74 |             #     pil_image)
 75 | 
 76 |             thumbnail = Image.new(
 77 |                 "RGB", (images_batch[0]["pil_image"].width * batch_size, images_batch[0]["pil_image"].height))
 78 | 
 79 |             for j in range(len(images_batch)):
 80 |                 pil_image = images_batch[j]["pil_image"]
 81 |                 thumbnail.paste(pil_image, (j * pil_image.width, 0))
 82 | 
 83 |             pb.update(
 84 |                 i,
 85 |                 len(pre_images),
 86 |                 # 转RGB
 87 |                 thumbnail,
 88 |             )
 89 |             onec_args_dict["images"] = pil_images
 90 |             onec_args_dict["captioner_mode"] = True
 91 | 
 92 |             responses = myfunc(onec_args_dict)
 93 |             # print(f"responses: {responses}")
 94 |             for j in range(len(images_batch)):
 95 |                 item = images_batch[j]
 96 |                 image_path = item["image_path"]
 97 |                 caption_file = os.path.join(
 98 |                     os.path.dirname(image_path), os.path.splitext(image_path)[0] + caption_suffix)
 99 |                 response = responses[j]
100 |                 response = response.strip()
101 | 
102 |                 print(f"==={image_path}===")
103 |                 print(image_path)
104 |                 print(response)
105 |                 print("")
106 |                 print("")
107 | 
108 |                 if response != "":
109 |                     with open(caption_file, "w") as f:
110 |                         prompt_fixed_beginning = captioner_config.get(
111 |                             "prompt_fixed_beginning", "")
112 |                         f.write(prompt_fixed_beginning + response)
113 | 
114 |                 result.append(response)
115 | 
116 |             images_batch = []
117 |         except Exception as e:
118 |             print(
119 |                 f"For image {image_path}, error: {e} , stack: {traceback.format_exc()}")
120 |     return result
121 | 
122 | 
123 | def florence2_node_encode(args_dict):
124 |     args_dict = args_dict.copy()
125 |     captioner_config = args_dict.get("captioner_config", None)
126 |     if captioner_config is not None:
127 |         transformers_captioner(args_dict, florence2_node_encode)
128 |         # raise Exception(
129 |         #     "图片批量反推任务已完成 ; Image batch reverse push task completed")
130 |         return {"ui": {"string": ["图片批量反推任务已完成 ; Image batch reverse push task completed",]}, "result": ("", None)}
131 | 
132 |     import torch
133 |     import folder_paths
134 |     from . import mz_prompt_utils
135 |     from .mz_prompt_utils import Utils
136 | 
137 |     florence2_large_files_map = {
138 |         "Florence-2-large": [
139 |             {
140 |                 "file_path": "pytorch_model.bin",
141 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=pytorch_model.bin"
142 |             },
143 | 
144 |             # tokenizer.json
145 |             {
146 |                 "file_path": "tokenizer.json",
147 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=tokenizer.json"
148 |             },
149 |             # tokenizer_config.json
150 |             {
151 |                 "file_path": "tokenizer_config.json",
152 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=tokenizer_config.json"
153 |             },
154 |             # vocab.json
155 |             {
156 |                 "file_path": "vocab.json",
157 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large/repo?Revision=master&FilePath=vocab.json"
158 |             }
159 |         ],
160 |         "Florence-2-large-ft": [
161 |             {
162 |                 "file_path": "pytorch_model.bin",
163 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=pytorch_model.bin"
164 |             },
165 |             # tokenizer.json
166 |             {
167 |                 "file_path": "tokenizer.json",
168 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=tokenizer.json"
169 |             },
170 |             # tokenizer_config.json
171 |             {
172 |                 "file_path": "tokenizer_config.json",
173 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=tokenizer_config.json"
174 |             },
175 |             # vocab.json
176 |             {
177 |                 "file_path": "vocab.json",
178 |                 "url": "https://www.modelscope.cn/api/v1/models/AI-ModelScope/Florence-2-large-ft/repo?Revision=master&FilePath=vocab.json"
179 |             }
180 |         ],
181 |     }
182 | 
183 |     llm_path = os.path.join(
184 |         folder_paths.models_dir,
185 |         "LLM",
186 |     )
187 |     os.makedirs(llm_path, exist_ok=True)
188 | 
189 |     model_name = args_dict.get("model_name", "Florence-2-large")
190 | 
191 |     model_path = os.path.join(llm_path, model_name)
192 | 
193 |     if not os.path.exists(model_path):
194 |         # GIT_LFS_SKIP_SMUDGE=1 git clone https://www.modelscope.cn/AI-ModelScope/Florence-2-large.git
195 |         original_env = os.environ.get("GIT_LFS_SKIP_SMUDGE")
196 |         os.environ["GIT_LFS_SKIP_SMUDGE"] = "1"
197 |         subprocess.run(
198 |             ["git", "clone", "https://www.modelscope.cn/AI-ModelScope/Florence-2-large.git", model_path])
199 |         if original_env is not None:
200 |             os.environ["GIT_LFS_SKIP_SMUDGE"] = original_env
201 | 
202 |     florence2_large_files = florence2_large_files_map.get(model_name, [])
203 |     for file_info in florence2_large_files:
204 |         file_path = os.path.join(model_path, file_info["file_path"])
205 |         # 判断文件大小小于1M
206 |         if not os.path.exists(file_path) or os.path.getsize(file_path) < 1024 * 1024:
207 |             Utils.download_file(file_info["url"], file_path)
208 | 
209 |     # with open(os.path.join(os.path.dirname(__file__), "hook", "modeling_florence2.py"), "r") as f:
210 |     #     code = f.read()
211 |     shutil.copyfile(
212 |         os.path.join(os.path.dirname(__file__),
213 |                      "hook", "modeling_florence2.py"),
214 |         os.path.join(model_path, "modeling_florence2.py")
215 |     )
216 | 
217 |     from transformers import AutoProcessor, AutoModelForCausalLM
218 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
219 |     model = Utils.cache_get(f"florence_model_and_opt_")
220 |     if model is None:
221 |         model = AutoModelForCausalLM.from_pretrained(
222 |             model_path,
223 |             local_files_only=True,
224 |             trust_remote_code=True,
225 |         )
226 |         model.to(device).eval()
227 |         Utils.cache_set(f"florence_model_and_opt_", model)
228 | 
229 |     processor = AutoProcessor.from_pretrained(
230 |         model_path,
231 |         local_files_only=True,
232 |         trust_remote_code=True
233 |     )
234 | 
235 |     captioner_mode = args_dict.get("captioner_mode", False)
236 |     if captioner_mode:
237 |         pil_images = args_dict.get("images", None)
238 |         _pil_images = []
239 |         for pil_image in pil_images:
240 |             resolution = args_dict.get("resolution", 512)
241 |             pil_image = Utils.resize_max(
242 |                 pil_image, resolution, resolution).convert("RGB")
243 |             _pil_images.append(pil_image)
244 |         pil_images = _pil_images
245 |     else:
246 |         tensor_image = args_dict.get("image", None)
247 |         pil_image = Utils.tensor2pil(tensor_image)
248 |         resolution = args_dict.get("resolution", 512)
249 |         pil_image = Utils.resize_max(
250 |             pil_image, resolution, resolution).convert("RGB")
251 |         pil_images = [pil_image]
252 | 
253 |     prompt = "<MORE_DETAILED_CAPTION>"
254 |     prompts = [prompt for _ in pil_images]
255 |     inputs = processor(text=prompts, images=pil_images, return_tensors="pt")
256 |     generated_ids = model.generate(
257 |         input_ids=inputs["input_ids"].to(device),
258 |         pixel_values=inputs["pixel_values"].to(device),
259 |         max_new_tokens=1024,
260 |         num_beams=3,
261 |         do_sample=False
262 |     )
263 | 
264 |     generated_texts = processor.batch_decode(
265 |         generated_ids, skip_special_tokens=True)
266 | 
267 |     pil_image = pil_images[0]
268 |     parsed_answers = []
269 |     for i in range(len(generated_texts)):
270 |         generated_text = generated_texts[i]
271 |         parsed_answer = processor.post_process_generation(
272 |             generated_text,
273 |             task=prompt,
274 |             image_size=(pil_image.width, pil_image.height))
275 |         parsed_answers.append(parsed_answer)
276 | 
277 |     response = []
278 |     for i in range(len(parsed_answers)):
279 |         response.append(parsed_answers[i].get(prompt))
280 | 
281 |     keep_device = args_dict.get("keep_device", False)
282 |     if not keep_device:
283 |         model.cpu()
284 |         del model
285 |         torch.cuda.empty_cache()
286 |         Utils.cache_set(f"florence_model_and_opt_", None)
287 | 
288 |     if captioner_mode:
289 |         return response
290 |     else:
291 |         response = response[0]
292 | 
293 |     conditionings = None
294 |     clip = args_dict.get("clip", None)
295 |     if clip is not None:
296 |         conditionings = Utils.a1111_clip_text_encode(
297 |             clip, response, )
298 | 
299 |     return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)}
300 | 
301 | 
302 | def paligemma_node_encode(args_dict):
303 |     args_dict = args_dict.copy()
304 |     captioner_config = args_dict.get("captioner_config", None)
305 |     if captioner_config is not None:
306 |         transformers_captioner(args_dict, paligemma_node_encode)
307 |         # raise Exception(
308 |         #     "图片批量反推任务已完成 ; Image batch reverse push task completed")
309 |         return {"ui": {"string": ["图片批量反推任务已完成 ; Image batch reverse push task completed",]}, "result": ("", None)}
310 | 
311 |     import torch
312 |     import folder_paths
313 |     from . import mz_prompt_utils
314 |     from .mz_prompt_utils import Utils
315 | 
316 |     paligemma_files_map = {
317 |         "common": [
318 | 
319 |             {
320 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fadded_tokens.json",
321 |                 "file_path": "added_tokens.json"
322 |             },
323 |             {
324 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fconfig.json",
325 |                 "file_path": "config.json"
326 |             },
327 |             {
328 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fgeneration_config.json",
329 |                 "file_path": "generation_config.json"
330 |             },
331 |             {
332 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fpreprocessor_config.json",
333 |                 "file_path": "preprocessor_config.json"
334 |             },
335 |             {
336 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fspecial_tokens_map.json",
337 |                 "file_path": "special_tokens_map.json"
338 |             },
339 |             {
340 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Ftokenizer.json",
341 |                 "file_path": "tokenizer.json"
342 |             },
343 |             {
344 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Ftokenizer.model",
345 |                 "file_path": "tokenizer.model"
346 |             },
347 |             {
348 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Ftokenizer_config.json",
349 |                 "file_path": "tokenizer_config.json"
350 |             },
351 |         ],
352 |         "paligemma-sd3-long-captioner": [
353 |             {
354 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-merge%2Fmodel.safetensors",
355 |                 "file_path": "model.safetensors"
356 |             },
357 |         ],
358 |         "paligemma-sd3-long-captioner-v2": [
359 |             {
360 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sd3-long-captioner-v2-merge%2Fmodel.safetensors",
361 |                 "file_path": "model.safetensors"
362 |             },
363 |         ],
364 |         "paligemma-sdxl-long-captioner": [
365 |             {
366 |                 "url": "https://www.modelscope.cn/api/v1/models/wailovet/sd-models/repo?Revision=master&FilePath=sdxl-long-captioner-merge%2Fmodel.safetensors",
367 |                 "file_path": "model.safetensors"
368 |             },
369 |         ],
370 |     }
371 | 
372 |     llm_path = os.path.join(
373 |         folder_paths.models_dir,
374 |         "LLM",
375 |     )
376 |     os.makedirs(llm_path, exist_ok=True)
377 | 
378 |     model_name = args_dict.get("model_name")
379 | 
380 |     model_path = os.path.join(llm_path, model_name)
381 | 
382 |     common_files = paligemma_files_map.get("common", [])
383 |     for file_info in common_files:
384 |         file_path = os.path.join(model_path, file_info["file_path"])
385 |         if not os.path.exists(file_path):
386 |             Utils.download_file(file_info["url"], file_path)
387 | 
388 |     paligemma_files = paligemma_files_map.get(model_name, [])
389 |     for file_info in paligemma_files:
390 |         file_path = os.path.join(model_path, file_info["file_path"])
391 | 
392 |         if not os.path.exists(file_path):
393 |             Utils.download_file(file_info["url"], file_path)
394 | 
395 |     from transformers import PaliGemmaForConditionalGeneration, PaliGemmaProcessor
396 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
397 |     model = Utils.cache_get(f"paligemma_model_and_opt_")
398 |     if model is None:
399 |         model = PaliGemmaForConditionalGeneration.from_pretrained(
400 |             model_path,
401 |             local_files_only=True,
402 |             torch_dtype=torch.float16,
403 |         )
404 |         model.to(device).eval()
405 |         Utils.cache_set(f"paligemma_model_and_opt_", model)
406 | 
407 |     processor = PaliGemmaProcessor.from_pretrained(
408 |         model_path,
409 |         local_files_only=True,
410 |     )
411 | 
412 |     
413 |     captioner_mode = args_dict.get("captioner_mode", False)
414 |     if captioner_mode:
415 |         pil_images = args_dict.get("images", None)
416 |         _pil_images = []
417 |         for pil_image in pil_images:
418 |             resolution = args_dict.get("resolution", 512)
419 |             pil_image = Utils.resize_max(
420 |                 pil_image, resolution, resolution).convert("RGB")
421 |             _pil_images.append(pil_image)
422 |         pil_images = _pil_images
423 |         pil_image = pil_images[0]
424 |     else:
425 |         tensor_image = args_dict.get("image", None)
426 |         pil_image = Utils.tensor2pil(tensor_image)
427 |         resolution = args_dict.get("resolution", 512)
428 |         pil_image = Utils.resize_max(
429 |             pil_image, resolution, resolution).convert("RGB")
430 |         pil_images = [pil_image] 
431 |         pil_image = pil_images[0]
432 | 
433 |  
434 |     # prefix
435 |     prompt = "caption en"
436 |     model_inputs = processor(
437 |         text=prompt, images=pil_image, return_tensors="pt").to('cuda')
438 |     input_len = model_inputs["input_ids"].shape[-1]
439 | 
440 |     def modify_caption(caption: str) -> str:
441 |         """
442 |         Removes specific prefixes from captions.
443 |         Args:
444 |             caption (str): A string containing a caption.
445 |         Returns:
446 |             str: The caption with the prefix removed if it was present.
447 |         """
448 |         # Define the prefixes to remove
449 |         import re
450 |         prefix_substrings = [
451 |             ('captured from ', ''),
452 |             ('captured at ', '')
453 |         ]
454 | 
455 |         # Create a regex pattern to match any of the prefixes
456 |         pattern = '|'.join([re.escape(opening)
457 |                            for opening, _ in prefix_substrings])
458 |         replacers = {opening: replacer for opening,
459 |                      replacer in prefix_substrings}
460 | 
461 |         # Function to replace matched prefix with its corresponding replacement
462 |         def replace_fn(match):
463 |             return replacers[match.group(0)]
464 | 
465 |         # Apply the regex to the caption
466 |         return re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
467 | 
468 |     with torch.inference_mode():
469 |         generation = model.generate(
470 |             **model_inputs, max_new_tokens=256, do_sample=False)
471 |         generation = generation[0][input_len:]
472 |         decoded = processor.decode(generation, skip_special_tokens=True)
473 | 
474 |         modified_caption = modify_caption(decoded)
475 |         # print(modified_caption)
476 | 
477 |     response = modified_caption
478 | 
479 |     keep_device = args_dict.get("keep_device", False)
480 |     if not keep_device:
481 |         model.cpu()
482 |         del model
483 |         torch.cuda.empty_cache()
484 |         Utils.cache_set(f"paligemma_model_and_opt_", None)
485 | 
486 |     if captioner_mode:
487 |         return [response]
488 |     else:
489 |         response = response[0]
490 |     conditionings = None
491 |     clip = args_dict.get("clip", None)
492 |     if clip is not None:
493 |         conditionings = Utils.a1111_clip_text_encode(
494 |             clip, response, )
495 | 
496 |     return {"ui": {"string": [mz_prompt_utils.Utils.to_debug_prompt(response),]}, "result": (response, conditionings)}
497 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [project]
 2 | name = "comfyui-prompt-mz"
 3 | description = "Use llama.cpp to help generate some nodes for prompt word related work"
 4 | version = "1.1.0"
 5 | license = "LICENSE"
 6 | 
 7 | [project.urls]
 8 | Repository = "https://github.com/MinusZoneAI/ComfyUI-Prompt-MZ"
 9 | #  Used by Comfy Registry https://comfyregistry.org
10 | 
11 | [tool.comfy]
12 | PublisherId = "wailovet"
13 | DisplayName = "ComfyUI-Prompt-MZ"
14 | Icon = ""
15 | 


--------------------------------------------------------------------------------
/v1/init.py:
--------------------------------------------------------------------------------
  1 | from ..mz_prompt_utils import Utils
  2 | NODE_CLASS_MAPPINGS = {
  3 | }
  4 | 
  5 | 
  6 | NODE_DISPLAY_NAME_MAPPINGS = {
  7 | }
  8 | 
  9 | 
 10 | import importlib
 11 | from . import mz_llama3
 12 | from . import mz_phi3
 13 | from .. import mz_llama_cpp
 14 | from .. import mz_llama_core_nodes
 15 | from . import mz_deprecated
 16 | from . import mz_llava
 17 | 
 18 | 
 19 | AUTHOR_NAME = u"MinusZone"
 20 | CATEGORY_NAME = f"{AUTHOR_NAME} - Prompt/v1"
 21 | 
 22 | 
 23 | def getCommonCLIPTextEncodeInput():
 24 |     style_presets = mz_llama_core_nodes.get_style_presets()
 25 |     CommonCLIPTextEncodeInput = {
 26 |         "required": {
 27 |             "prompt_version": (["v1"], {"default": "v1"}),
 28 |             "style_presets": (
 29 |                 style_presets, {"default": style_presets[1]}
 30 |             ),
 31 |             "text": ("STRING", {"multiline": True, }),
 32 |             "keep_device": ([False, True], {"default": False}),
 33 |             "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
 34 |         },
 35 |         "optional": {
 36 |             "clip": ("CLIP", ),
 37 |             "llama_cpp_options": ("LLamaCPPOptions", ),
 38 |             "customize_instruct": ("CustomizeInstruct", ),
 39 |             # "customize_json_schema": ("STRING", ),
 40 |         }
 41 |     }
 42 | 
 43 |     return CommonCLIPTextEncodeInput
 44 | 
 45 | 
 46 | class MZ_LLama3CLIPTextEncode:
 47 |     @classmethod
 48 |     def INPUT_TYPES(s):
 49 |         m_models = mz_llama3.llama3_models.copy()
 50 |         for i in range(len(m_models)):
 51 |             if mz_llama3.get_exist_model(m_models[i]) is not None:
 52 |                 m_models[i] += "[downloaded]"
 53 | 
 54 |         result = {
 55 |             "required": {
 56 |                 "llama_cpp_model": (m_models, {"default": m_models[0]}),
 57 |                 "download_source": (
 58 |                     ["none", "modelscope", "hf-mirror.com",],
 59 |                     {"default": "none"}
 60 |                 ),
 61 |             },
 62 |             "optional": {},
 63 |         }
 64 | 
 65 |         common_input = getCommonCLIPTextEncodeInput()
 66 |         for key in common_input["required"]:
 67 |             result["required"][key] = common_input["required"][key]
 68 |         for key in common_input["optional"]:
 69 |             result["optional"][key] = common_input["optional"][key]
 70 | 
 71 |         return result
 72 | 
 73 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
 74 |     RETURN_NAMES = ("text", "conditioning",)
 75 |     OUTPUT_NODE = True
 76 |     FUNCTION = "encode"
 77 |     CATEGORY = CATEGORY_NAME
 78 | 
 79 |     def encode(self, **kwargs):
 80 | 
 81 |         kwargs["llama_cpp_model"] = kwargs.get(
 82 |             "llama_cpp_model", "").replace("[downloaded]", "")
 83 | 
 84 |         text = mz_llama3.query_beautify_prompt_text(kwargs)
 85 |         conditionings = None
 86 |         clip = kwargs.get("clip", None)
 87 |         if clip is not None:
 88 |             conditionings = Utils.a1111_clip_text_encode(clip, text, )
 89 | 
 90 |         return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)}
 91 | 
 92 | 
 93 | NODE_CLASS_MAPPINGS["MZ_LLama3CLIPTextEncode"] = MZ_LLama3CLIPTextEncode
 94 | NODE_DISPLAY_NAME_MAPPINGS[
 95 |     "MZ_LLama3CLIPTextEncode"] = f"{AUTHOR_NAME} - deprecated - CLIPTextEncode(LLama3)"
 96 | 
 97 | 
 98 | class MZ_Phi3CLIPTextEncode:
 99 |     @classmethod
100 |     def INPUT_TYPES(s):
101 |         m_models = mz_phi3.phi3_models.copy()
102 |         for i in range(len(m_models)):
103 |             if mz_llama3.get_exist_model(m_models[i]) is not None:
104 |                 m_models[i] += "[downloaded]"
105 | 
106 |         importlib.reload(mz_phi3)
107 | 
108 |         result = {
109 |             "required": {
110 |                 "llama_cpp_model": (m_models, {"default": m_models[0]}),
111 |                 "download_source": (
112 |                     ["none", "modelscope", "hf-mirror.com",],
113 |                     {"default": "none"}
114 |                 ),
115 |             },
116 |             "optional": {},
117 |         }
118 | 
119 |         common_input = getCommonCLIPTextEncodeInput()
120 |         for key in common_input["required"]:
121 |             result["required"][key] = common_input["required"][key]
122 |         for key in common_input["optional"]:
123 |             result["optional"][key] = common_input["optional"][key]
124 | 
125 |         return result
126 | 
127 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
128 |     RETURN_NAMES = ("text", "conditioning",)
129 |     OUTPUT_NODE = True
130 |     FUNCTION = "encode"
131 |     CATEGORY = CATEGORY_NAME
132 | 
133 |     def encode(self, **kwargs):
134 |         kwargs = kwargs.copy()
135 | 
136 |         importlib.reload(mz_llama3)
137 | 
138 |         kwargs["llama_cpp_model"] = kwargs.get(
139 |             "llama_cpp_model", "").replace("[downloaded]", "")
140 | 
141 |         text = mz_phi3.query_beautify_prompt_text(kwargs)
142 |         conditionings = None
143 |         clip = kwargs.get("clip", None)
144 |         if clip is not None:
145 |             conditionings = Utils.a1111_clip_text_encode(clip, text, )
146 | 
147 |         return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)}
148 | 
149 | 
150 | NODE_CLASS_MAPPINGS["MZ_Phi3CLIPTextEncode"] = MZ_Phi3CLIPTextEncode
151 | NODE_DISPLAY_NAME_MAPPINGS[
152 |     "MZ_Phi3CLIPTextEncode"] = f"{AUTHOR_NAME} - deprecated - CLIPTextEncode(Phi3)"
153 | 
154 | 
155 | class MZ_BaseLLamaCPPCLIPTextEncode:
156 |     @classmethod
157 |     def INPUT_TYPES(s):
158 |         importlib.reload(mz_llama_cpp)
159 | 
160 |         result = {
161 |             "required": {
162 |                 "llama_cpp_model": ("STRING", {"default": "", "placeholder": "model_path"}),
163 |             },
164 |             "optional": {
165 |             },
166 |         }
167 | 
168 |         common_input = getCommonCLIPTextEncodeInput()
169 |         for key in common_input["required"]:
170 |             result["required"][key] = common_input["required"][key]
171 |         for key in common_input["optional"]:
172 |             result["optional"][key] = common_input["optional"][key]
173 | 
174 |         return result
175 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
176 |     RETURN_NAMES = ("text", "conditioning",)
177 |     OUTPUT_NODE = True
178 |     FUNCTION = "encode"
179 |     CATEGORY = CATEGORY_NAME
180 | 
181 |     def encode(self, **kwargs):
182 |         kwargs = kwargs.copy()
183 | 
184 |         kwargs["llama_cpp_model"] = kwargs.get(
185 |             "llama_cpp_model", "").replace("[downloaded]", "")
186 |         text = mz_deprecated.base_query_beautify_prompt_text(kwargs)
187 |         conditionings = None
188 |         clip = kwargs.get("clip", None)
189 |         if clip is not None:
190 |             conditionings = Utils.a1111_clip_text_encode(clip, text, )
191 | 
192 |         return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)}
193 | 
194 | 
195 | NODE_CLASS_MAPPINGS["MZ_BaseLLamaCPPCLIPTextEncode"] = MZ_BaseLLamaCPPCLIPTextEncode
196 | NODE_DISPLAY_NAME_MAPPINGS[
197 |     "MZ_BaseLLamaCPPCLIPTextEncode"] = f"{AUTHOR_NAME} - deprecated - CLIPTextEncode(BaseLLamaCPP)"
198 | 
199 | 
200 | class MZ_LLavaImageInterrogator:
201 |     @classmethod
202 |     def INPUT_TYPES(s):
203 |         importlib.reload(mz_llava)
204 |         m_llava_models = mz_llava.LLava_models.copy()
205 |         for i in range(len(m_llava_models)):
206 |             if mz_llava.get_exist_model(m_llava_models[i]) is not None:
207 |                 m_llava_models[i] += "[downloaded]"
208 | 
209 |         m_llava_mmproj_models = mz_llava.LLava_mmproj_models.copy()
210 |         for i in range(len(m_llava_mmproj_models)):
211 |             if mz_llava.get_exist_model(m_llava_mmproj_models[i]) is not None:
212 |                 m_llava_mmproj_models[i] += "[downloaded]"
213 | 
214 |         return {
215 |             "required": {
216 |                 "llama_cpp_model": (m_llava_models, {"default": m_llava_models[0]}),
217 |                 "mmproj_model": (m_llava_mmproj_models, {"default": m_llava_mmproj_models[0]}),
218 |                 "download_source": (
219 |                     [
220 |                         "none",
221 |                         "modelscope",
222 |                         "hf-mirror.com",
223 |                     ],
224 |                     {"default": "none"}
225 |                 ),
226 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 2048}),
227 |                 "sd_format": (["none", "v1", ], {"default": "none"}),
228 |                 "keep_device": ([False, True], {"default": False}),
229 |                 "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
230 |             },
231 |             "optional": {
232 |                 "image": ("IMAGE",),
233 |                 "clip": ("CLIP", ),
234 |                 "llama_cpp_options": ("LLamaCPPOptions", ),
235 |                 "customize_instruct": ("CustomizeInstruct", ),
236 |                 "captioner_config": ("ImageCaptionerConfig", ),
237 |             },
238 |         }
239 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
240 |     RETURN_NAMES = ("text", "conditioning",)
241 |     OUTPUT_NODE = True
242 |     FUNCTION = "interrogate"
243 |     CATEGORY = CATEGORY_NAME
244 | 
245 |     def interrogate(self, **kwargs):
246 |         kwargs = kwargs.copy()
247 | 
248 |         kwargs["llama_cpp_model"] = kwargs.get(
249 |             "llama_cpp_model", "").replace("[downloaded]", "")
250 |         kwargs["mmproj_model"] = kwargs.get(
251 |             "mmproj_model", "").replace("[downloaded]", "")
252 | 
253 |         if kwargs.get("image", None) is not None:
254 |             kwargs["image"] = Utils.tensor2pil(kwargs["image"])
255 |         else:
256 |             kwargs["image"] = None
257 | 
258 |         text = mz_llava.image_interrogator(kwargs)
259 |         conditionings = None
260 |         clip = kwargs.get("clip", None)
261 |         if clip is not None:
262 |             conditionings = Utils.a1111_clip_text_encode(clip, text, )
263 | 
264 |         return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)}
265 | 
266 | 
267 | NODE_CLASS_MAPPINGS["MZ_LLavaImageInterrogator"] = MZ_LLavaImageInterrogator
268 | NODE_DISPLAY_NAME_MAPPINGS[
269 |     "MZ_LLavaImageInterrogator"] = f"{AUTHOR_NAME} - deprecated - ImageInterrogator(LLava)"
270 | 
271 | 
272 | class MZ_BaseLLavaImageInterrogator:
273 |     @classmethod
274 |     def INPUT_TYPES(s):
275 |         return {
276 |             "required": {
277 |                 "llama_cpp_model": ("STRING", {"default": ""}),
278 |                 "mmproj_model": ("STRING", {"default": ""}),
279 |                 "resolution": ("INT", {"default": 512, "min": 128, "max": 2048}),
280 |                 "sd_format": (["none", "v1"], {"default": "none"}),
281 |                 "keep_device": ([False, True], {"default": False}),
282 |                 "seed": ("INT", {"default": 0, "min": 0, "max": 0xffffffffffffffff}),
283 |             },
284 |             "optional": {
285 |                 "image": ("IMAGE",),
286 |                 "clip": ("CLIP", ),
287 |                 "llama_cpp_options": ("LLamaCPPOptions", ),
288 |                 "customize_instruct": ("CustomizeInstruct", ),
289 |                 "captioner_config": ("ImageCaptionerConfig", ),
290 |             },
291 |         }
292 |     RETURN_TYPES = ("STRING", "CONDITIONING",)
293 |     RETURN_NAMES = ("text", "conditioning",)
294 |     OUTPUT_NODE = True
295 |     FUNCTION = "interrogate"
296 |     CATEGORY = CATEGORY_NAME
297 | 
298 |     def interrogate(self, **kwargs):
299 |         kwargs = kwargs.copy()
300 | 
301 |         importlib.reload(mz_llava)
302 | 
303 |         if kwargs.get("image", None) is not None:
304 |             kwargs["image"] = Utils.tensor2pil(kwargs["image"])
305 |         else:
306 |             kwargs["image"] = None
307 | 
308 |         text = mz_llava.base_image_interrogator(kwargs)
309 |         conditionings = None
310 |         clip = kwargs.get("clip", None)
311 |         if clip is not None:
312 |             conditionings = Utils.a1111_clip_text_encode(clip, text, )
313 | 
314 |         return {"ui": {"string": [Utils.to_debug_prompt(text),]}, "result": (text, conditionings)}
315 | 
316 | 
317 | NODE_CLASS_MAPPINGS["MZ_BaseLLavaImageInterrogator"] = MZ_BaseLLavaImageInterrogator
318 | NODE_DISPLAY_NAME_MAPPINGS[
319 |     "MZ_BaseLLavaImageInterrogator"] = f"{AUTHOR_NAME} - deprecated - ImageInterrogator(BaseLLava)"
320 | 


--------------------------------------------------------------------------------
/v1/mz_deprecated.py:
--------------------------------------------------------------------------------
  1 | from ..mz_prompt_utils import Utils  
  2 | from ..mz_llama_cpp import *
  3 | from ..mz_llama_core_nodes import *
  4 | from ..mz_prompts import *
  5 | 
  6 | def base_query_beautify_prompt_text(args_dict):
  7 |     model_file = args_dict.get("llama_cpp_model", "")
  8 |     text = args_dict.get("text", "")
  9 |     style_presets = args_dict.get("style_presets", "")
 10 |     options = args_dict.get("llama_cpp_options", {})
 11 |     keep_device = args_dict.get("keep_device", False)
 12 |     seed = args_dict.get("seed", -1)
 13 |     options["seed"] = seed
 14 |   
 15 | 
 16 |     customize_instruct = args_dict.get("customize_instruct", None)
 17 |     Utils.print_log(
 18 |         f"customize_instruct: {customize_instruct}")
 19 |     try:
 20 |         schema = None
 21 |         if customize_instruct is None:
 22 |             schema = get_schema_obj(
 23 |                 keys_type={
 24 |                     "description": get_schema_base_type("string"),
 25 |                     "long_prompt": get_schema_base_type("string"),
 26 |                     "main_color_word": get_schema_base_type("string"),
 27 |                     "camera_angle_word": get_schema_base_type("string"),
 28 |                     "style_words": get_schema_array("string"),
 29 |                     "subject_words": get_schema_array("string"),
 30 |                     "light_words": get_schema_array("string"),
 31 |                     "environment_words": get_schema_array("string"),
 32 |                 },
 33 |                 required=[
 34 |                     "description",
 35 |                     "long_prompt",
 36 |                     "main_color_word",
 37 |                     "camera_angle_word",
 38 |                     "style_words",
 39 |                     "subject_words",
 40 |                     "light_words",
 41 |                     "environment_words",
 42 |                 ]
 43 |             )
 44 | 
 45 |             question = f"IDEA: {style_presets},{text}"
 46 |             if style_presets == "none":
 47 |                 question = f"IDEA: {text}"
 48 | 
 49 |             system_prompt = mz_prompts.Beautify_Prompt + mz_prompts.Long_prompt + "\n"
 50 | 
 51 |         else:
 52 | 
 53 |             system_prompt = customize_instruct.get("system", "")
 54 |             question = customize_instruct.get("instruct", "%text%")
 55 | 
 56 |             system_prompt = system_prompt.replace("%text%", text)
 57 |             question = question.replace("%text%", text)
 58 | 
 59 |             Utils.print_log(f"system_prompt: {system_prompt}")
 60 |             Utils.print_log(f"question: {question}")
 61 | 
 62 |         if schema is not None:
 63 |             response_json = llama_cpp_simple_interrogator_to_json(
 64 |                 model_file=model_file,
 65 |                 system=system_prompt,
 66 |                 question=question,
 67 |                 schema=schema,
 68 |                 options=options,
 69 |             )
 70 |             Utils.print_log(f"response_json: {response_json}")
 71 | 
 72 |             response = json.loads(response_json)
 73 |             full_responses = []
 74 | 
 75 |             if response["description"] != "":
 76 |                 full_responses.append(f"({response['description']})")
 77 |             if response["long_prompt"] != "":
 78 |                 full_responses.append(f"({response['long_prompt']})")
 79 |             if response["main_color_word"] != "":
 80 |                 full_responses.append(f"({response['main_color_word']})")
 81 |             if response["camera_angle_word"] != "":
 82 |                 full_responses.append(f"({response['camera_angle_word']})")
 83 | 
 84 |             response["style_words"] = [
 85 |                 x for x in response["style_words"] if x != ""]
 86 |             if len(response["style_words"]) > 0:
 87 |                 full_responses.append(
 88 |                     f"({', '.join(response['style_words'])})")
 89 | 
 90 |             response["subject_words"] = [
 91 |                 x for x in response["subject_words"] if x != ""]
 92 |             if len(response["subject_words"]) > 0:
 93 |                 full_responses.append(
 94 |                     f"({', '.join(response['subject_words'])})")
 95 | 
 96 |             response["light_words"] = [
 97 |                 x for x in response["light_words"] if x != ""]
 98 |             if len(response["light_words"]) > 0:
 99 |                 full_responses.append(
100 |                     f"({', '.join(response['light_words'])})")
101 | 
102 |             response["environment_words"] = [
103 |                 x for x in response["environment_words"] if x != ""]
104 |             if len(response["environment_words"]) > 0:
105 |                 full_responses.append(
106 |                     f"({', '.join(response['environment_words'])})")
107 | 
108 |             full_response = ", ".join(full_responses)
109 |         else:
110 |             full_response = llama_cpp_simple_interrogator(
111 |                 model_file=model_file,
112 |                 system=system_prompt,
113 |                 question=question,
114 |                 options=options,
115 |             )
116 | 
117 |             start_str = customize_instruct.get("start_str", "")
118 |             if start_str != "" and full_response.find(start_str) != -1:
119 |                 full_response_list = full_response.split(start_str)
120 |                 # 删除第一个元素
121 |                 full_response_list.pop(0)
122 |                 full_response = start_str.join(full_response_list)
123 | 
124 |             end_str = customize_instruct.get("end_str", "")
125 |             if end_str != "" and full_response.find(end_str) != -1:
126 |                 full_response_list = full_response.split(end_str)
127 |                 # 删除最后一个元素
128 |                 full_response_list.pop()
129 |                 full_response = end_str.join(full_response_list)
130 | 
131 |         if keep_device is False:
132 |             freed_gpu_memory(model_file=model_file)
133 | 
134 |         # 去除换行
135 |         while full_response.find("\n") != -1:
136 |             full_response = full_response.replace("\n", " ")
137 | 
138 |         # 句号换成逗号
139 |         while full_response.find(".") != -1:
140 |             full_response = full_response.replace(".", ",")
141 | 
142 |         # 去除多余逗号
143 |         while full_response.find(",,") != -1:
144 |             full_response = full_response.replace(",,", ",")
145 |         while full_response.find(", ,") != -1:
146 |             full_response = full_response.replace(", ,", ",")
147 | 
148 |         full_response = Utils.prompt_zh_to_en(full_response)
149 | 
150 |         style_presets_prompt_text = style_presets_prompt.get(style_presets, "")
151 | 
152 |         if style_presets_prompt_text != "":
153 |             full_response = f"{style_presets_prompt_text}, {full_response}"
154 | 
155 |         return full_response
156 | 
157 |     except Exception as e:
158 |         freed_gpu_memory(model_file=model_file)
159 |         # mz_utils.Utils.print_log(f"Error in auto_prompt_text: {e}")
160 |         raise e
161 | 


--------------------------------------------------------------------------------
/v1/mz_llama3.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from .. import mz_prompt_utils
 3 | from .. import mz_llama_cpp
 4 | from . import mz_deprecated
 5 | import importlib
 6 | 
 7 | 
 8 | llama3_models = [
 9 |     "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
10 |     "Meta-Llama-3-8B-Instruct.Q2_K.gguf",
11 |     "Meta-Llama-3-8B-Instruct.Q3_K_L.gguf",
12 |     "Meta-Llama-3-8B-Instruct.Q3_K_M.gguf",
13 |     "Meta-Llama-3-8B-Instruct.Q3_K_S.gguf",
14 |     "Meta-Llama-3-8B-Instruct.Q4_0.gguf",
15 |     "Meta-Llama-3-8B-Instruct.Q4_1.gguf",
16 |     "Meta-Llama-3-8B-Instruct.Q4_K_S.gguf",
17 |     "Meta-Llama-3-8B-Instruct.Q5_0.gguf",
18 |     "Meta-Llama-3-8B-Instruct.Q5_1.gguf",
19 |     "Meta-Llama-3-8B-Instruct.Q5_K_M.gguf",
20 |     "Meta-Llama-3-8B-Instruct.Q5_K_S.gguf",
21 |     "Meta-Llama-3-8B-Instruct.Q6_K.gguf",
22 |     "Meta-Llama-3-8B-Instruct.Q8_0.gguf",
23 | ]
24 | 
25 | 
26 | def get_exist_model(model_name):
27 |     modelscope_model_path = mz_prompt_utils.Utils.modelscope_download_model(
28 |         model_type="llama3",
29 |         model_name=model_name,
30 |         only_get_path=True,
31 |     )
32 | 
33 |     if modelscope_model_path is not None:
34 |         return modelscope_model_path
35 | 
36 |     model_url = f"https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/{model_name}"
37 |     hf_model_path = mz_prompt_utils.Utils.hf_download_model(
38 |         model_url, only_get_path=True)
39 |     if hf_model_path is not None:
40 |         return hf_model_path
41 | 
42 |     return None
43 | 
44 | 
45 | def query_beautify_prompt_text(args_dict):
46 |     model_name = args_dict.get("llama_cpp_model", "")
47 |     download_source = args_dict.get("download_source", None)
48 | 
49 |     try:
50 |         model_file = get_exist_model(model_name)
51 | 
52 |         if model_file is None:
53 |             if download_source == "modelscope":
54 |                 model_file = mz_prompt_utils.Utils.modelscope_download_model(
55 |                     model_type="llama3",
56 |                     model_name=model_name,
57 |                 )
58 |             else:
59 |                 model_url = f"https://huggingface.co/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/{model_name}"
60 |                 if download_source == "hf-mirror.com":
61 |                     model_url = f"https://hf-mirror.com/QuantFactory/Meta-Llama-3-8B-Instruct-GGUF/resolve/main/{model_name}"
62 |                 model_file = mz_prompt_utils.Utils.hf_download_model(model_url)
63 | 
64 |         args_dict["llama_cpp_model"] = model_file
65 |         full_response = mz_deprecated.base_query_beautify_prompt_text(
66 |             args_dict=args_dict)
67 |         return full_response
68 | 
69 |     except Exception as e:
70 |         mz_llama_cpp.freed_gpu_memory(model_file=model_file)
71 |         # mz_utils.Utils.print_log(f"Error in auto_prompt_text: {e}")
72 |         raise e
73 | 


--------------------------------------------------------------------------------
/v1/mz_llava.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import os
  3 | from .. import mz_prompt_utils
  4 | from .. import mz_llama_cpp
  5 | from .. import mz_prompts
  6 | from .. import mz_llama_core_nodes
  7 | from .. import mz_prompt_webserver
  8 | 
  9 | import importlib
 10 | 
 11 | 
 12 | LLava_models = [
 13 |     "llava-1.6-mistral-7b-gguf/llava-v1.6-mistral-7b.Q5_K_M.gguf",
 14 |     "llava-v1.6-vicuna-13b-gguf/llava-v1.6-vicuna-13b.Q5_K_M.gguf",
 15 |     "ggml_llava-v1.5-7b/ggml-model-q4_k.gguf",
 16 |     "ggml_llava-v1.5-7b/ggml-model-q5_k.gguf",
 17 |     "ggml_llava-v1.5-7b/ggml-model-f16.gguf",
 18 |     "ggml_bakllava-1/ggml-model-q4_k.gguf",
 19 |     "ggml_bakllava-1/ggml-model-q5_k.gguf",
 20 |     "ggml_bakllava-1/ggml-model-f16.gguf",
 21 | ]
 22 | 
 23 | LLava_mmproj_models = [
 24 |     "llava-1.6-mistral-7b-gguf/mmproj-model-f16.gguf",
 25 |     "llava-v1.6-vicuna-13b-gguf/mmproj-model-f16.gguf",
 26 |     "ggml_llava-v1.5-7b/mmproj-model-f16.gguf",
 27 |     "ggml_bakllava-1/mmproj-model-f16.gguf",
 28 | ]
 29 | 
 30 | 
 31 | huggingface_models_map = {
 32 |     "llava-v1.6-vicuna-13b-gguf": "cjpais",
 33 |     "llava-1.6-mistral-7b-gguf": "cjpais",
 34 |     "ggml_llava-v1.5-7b": "mys",
 35 |     "ggml_llava-v1.5-13b": "mys",
 36 |     "ggml_bakllava-1": "mys",
 37 | }
 38 | 
 39 | 
 40 | def get_exist_model(model_name):
 41 |     modelscope_model_path = mz_prompt_utils.Utils.modelscope_download_model(
 42 |         model_type="llava",
 43 |         model_name=model_name,
 44 |         only_get_path=True,
 45 |     )
 46 | 
 47 |     if modelscope_model_path is not None:
 48 |         return modelscope_model_path
 49 | 
 50 |     model_name = model_name.split("?")[0]
 51 |     model_names = model_name.split("/")
 52 | 
 53 |     author = huggingface_models_map.get(model_names[0], None)
 54 |     if author is None:
 55 |         return False
 56 | 
 57 |     model_url = f"https://hf-mirror.com/{author}/{model_names[0]}/resolve/main/{model_names[1]}"
 58 | 
 59 |     hf_model_path = mz_prompt_utils.Utils.hf_download_model(
 60 |         model_url, only_get_path=True)
 61 | 
 62 |     if hf_model_path is not None:
 63 |         return hf_model_path
 64 | 
 65 |     return None
 66 | 
 67 | 
 68 | def image_interrogator(args_dict):
 69 |     args_dict = args_dict.copy()
 70 | 
 71 |     captioner_config = args_dict.get("captioner_config", None)
 72 |     if captioner_config is not None:
 73 |         import PIL.Image as Image
 74 |         directory = captioner_config.get("directory", None)
 75 |         force_update = captioner_config.get("force_update", False)
 76 |         caption_suffix = captioner_config.get("caption_suffix", "")
 77 |         retry_keyword = captioner_config.get("retry_keyword", "")
 78 |         retry_keywords = retry_keyword.split(",")
 79 | 
 80 |         retry_keywords = [k.strip() for k in retry_keywords]
 81 |         retry_keywords = [k for k in retry_keywords if k != ""]
 82 | 
 83 |         pre_images = []
 84 |         for root, dirs, files in os.walk(directory):
 85 |             for file in files:
 86 |                 if file.endswith(".jpg") or file.endswith(".jpeg") or file.endswith(".png"):
 87 |                     image_path = os.path.join(root, file)
 88 |                     base_file_path = os.path.splitext(image_path)[0]
 89 |                     caption_file = os.path.join(
 90 |                         root, base_file_path + caption_suffix)
 91 |                     if os.path.exists(caption_file) and force_update is False:
 92 |                         continue
 93 | 
 94 |                     pre_images.append({
 95 |                         "image_path": image_path,
 96 |                         "caption_path": caption_file
 97 |                     })
 98 | 
 99 |         result = []
100 | 
101 |         pb = mz_prompt_utils.Utils.progress_bar(len(pre_images))
102 |         for i in range(len(pre_images)):
103 |             pre_image = pre_images[i]
104 |             image_path = pre_image["image_path"]
105 |             caption_file = pre_image["caption_path"]
106 | 
107 |             onec_args_dict = args_dict.copy()
108 |             del onec_args_dict["captioner_config"]
109 | 
110 |             pil_image = Image.open(image_path)
111 |             onec_args_dict["image"] = pil_image
112 | 
113 |             if i < len(pre_images) - 1:
114 |                 onec_args_dict["keep_device"] = True
115 | 
116 |             pb.update(
117 |                 i,
118 |                 len(pre_images),
119 |                 pil_image.copy(),
120 |             )
121 | 
122 |             response = image_interrogator(onec_args_dict)
123 |             response = response.strip()
124 |             is_retry = response == ""
125 |             for k in retry_keywords:
126 |                 if response.find(k) != -1:
127 |                     print(f"存在需要重试的关键词 ; Retry keyword found: {k}")
128 |                     is_retry = True
129 |                     break
130 | 
131 |             mz_prompt_utils.Utils.print_log(
132 |                 "\n\nonec_args_dict: ", onec_args_dict)
133 |             if is_retry:
134 |                 for retry_n in range(5):
135 |                     print(f"Retry {retry_n+1}...")
136 |                     onec_args_dict["seed"] = onec_args_dict["seed"] + 1
137 |                     response = image_interrogator(onec_args_dict)
138 |                     response = response.strip()
139 |                     is_retry = response == ""
140 |                     for k in retry_keywords:
141 |                         if response.find(k) != -1:
142 |                             print(f"存在需要重试的关键词 ; Retry keyword found: {k}")
143 |                             is_retry = True
144 |                             break
145 | 
146 |                     if is_retry is False:
147 |                         break
148 |                 if is_retry:
149 |                     print(f"重试失败,图片被跳过 ; Retry failed")
150 |                     response = ""
151 | 
152 |             if response != "":
153 |                 with open(caption_file, "w") as f:
154 |                     f.write(response)
155 | 
156 |             result.append(response)
157 | 
158 |             # mz_prompt_webserver.show_toast_success(
159 |             #     f"提示词保存成功(prompt saved successfully): {caption_file}",
160 |             #     1000,
161 |             # )
162 | 
163 |         return result
164 | 
165 |     model_name = args_dict.get("llama_cpp_model", "")
166 |     mmproj_name = args_dict.get("mmproj_model", "")
167 |     download_source = args_dict.get("download_source", None)
168 |     model_file = get_exist_model(model_name)
169 |     mmproj_file = get_exist_model(mmproj_name)
170 | 
171 |     if model_file is None or mmproj_file is None:
172 |         if download_source == "modelscope":
173 |             if model_file is None:
174 |                 model_file = mz_prompt_utils.Utils.modelscope_download_model(
175 |                     model_type="llava",
176 |                     model_name=model_name,
177 |                 )
178 |             if mmproj_file is None:
179 |                 mmproj_file = mz_prompt_utils.Utils.modelscope_download_model(
180 |                     model_type="llava",
181 |                     model_name=mmproj_name,
182 |                 )
183 |         else:
184 |             model_name = model_name.split("?")[0]
185 |             model_names = model_name.split("/")
186 | 
187 |             author = huggingface_models_map.get(model_names[0], None)
188 |             if author is None:
189 |                 raise Exception(
190 |                     f"Model {model_names[0]} is not supported for image_to_text.")
191 | 
192 |             if download_source == "hf-mirror.com":
193 |                 model_url = f"https://hf-mirror.com/{author}/{model_names[0]}/resolve/main/{model_names[1]}"
194 |             else:
195 |                 model_url = f"https://huggingface.co/{author}/{model_names[0]}/resolve/main/{model_names[1]}"
196 | 
197 |             if model_file is None:
198 |                 model_file = mz_prompt_utils.Utils.hf_download_model(model_url)
199 | 
200 |             mmproj_name = mmproj_name.split("?")[0]
201 |             mmproj_names = mmproj_name.split("/")
202 |             if download_source == "hf-mirror.com":
203 |                 mmproj_url = f"https://hf-mirror.com/{author}/{mmproj_names[0]}/resolve/main/{mmproj_names[1]}"
204 |             else:
205 |                 mmproj_url = f"https://huggingface.co/{author}/{mmproj_names[0]}/resolve/main/{mmproj_names[1]}"
206 | 
207 |             if mmproj_file is None:
208 |                 mmproj_file = mz_prompt_utils.Utils.hf_download_model(
209 |                     mmproj_url)
210 | 
211 |     args_dict["llama_cpp_model"] = model_file
212 |     args_dict["mmproj_model"] = mmproj_file
213 |     response = base_image_interrogator(args_dict=args_dict)
214 |     return response
215 | 
216 | 
217 | def base_image_interrogator(args_dict):
218 |     model_file = args_dict.get("llama_cpp_model", "")
219 |     mmproj_file = args_dict.get("mmproj_model", "")
220 |     image = args_dict.get("image", None)
221 |     resolution = args_dict.get("resolution", 512)
222 |     keep_device = args_dict.get("keep_device", False)
223 |     seed = args_dict.get("seed", -1)
224 |     options = args_dict.get("llama_cpp_options", {})
225 |     options["seed"] = seed
226 | 
227 |     mz_prompt_utils.Utils.print_log(
228 |         "base_image_interrogator options: ", options)
229 |     # input("Press Enter to continue...")
230 | 
231 |     image = mz_prompt_utils.Utils.resize_max(image, resolution, resolution)
232 | 
233 |     customize_instruct = args_dict.get("customize_instruct", None)
234 |     if customize_instruct is None:
235 | 
236 |         system_text = mz_prompts.GPT4VImageCaptioner_System
237 |         question_text = mz_prompts.GPT4VImageCaptioner_Prompt
238 | 
239 |         response = mz_llama_cpp.llava_cpp_simple_interrogator(
240 |             model_file=model_file,
241 |             mmproj_file=mmproj_file,
242 |             image=image,
243 |             options=options,
244 |             # system=system_text,
245 |             # question=question_text,
246 |         )
247 |     else:
248 | 
249 |         system_prompt = customize_instruct.get("system", "")
250 |         question = customize_instruct.get("instruct", "%text%")
251 |         system_prompt = system_prompt.replace("%text%", "")
252 |         question = question.replace("%text%", "")
253 | 
254 |         full_response = mz_llama_cpp.llava_cpp_simple_interrogator(
255 |             model_file=model_file,
256 |             mmproj_file=mmproj_file,
257 |             image=image,
258 |             system=system_prompt,
259 |             question=question,
260 |             options=options,
261 |         )
262 | 
263 |         start_str = customize_instruct.get("start_str", "")
264 |         if start_str != "" and full_response.find(start_str) != -1:
265 |             full_response_list = full_response.split(start_str)
266 |             # 删除第一个元素
267 |             full_response_list.pop(0)
268 |             full_response = start_str.join(full_response_list)
269 |         end_str = customize_instruct.get("end_str", "")
270 |         if end_str != "" and full_response.find(end_str) != -1:
271 |             full_response_list = full_response.split(end_str)
272 |             # 删除最后一个元素
273 |             full_response_list.pop()
274 |             full_response = end_str.join(full_response_list)
275 | 
276 |         response = full_response
277 | 
278 |     sd_format = args_dict.get("sd_format", "v1")
279 | 
280 |     if sd_format == "v1" and customize_instruct is None:
281 |         mz_prompt_utils.Utils.print_log(f"response v1: {response}")
282 |         schema = mz_llama_core_nodes.get_schema_obj(
283 |             keys_type={
284 |                 "short_describes": mz_llama_core_nodes.get_schema_base_type("string"),
285 |                 "subject_tags": mz_llama_core_nodes.get_schema_array("string"),
286 |                 "action_tags": mz_llama_core_nodes.get_schema_array("string"),
287 |                 "light_tags": mz_llama_core_nodes.get_schema_array("string"),
288 |                 "scenes_tags": mz_llama_core_nodes.get_schema_array("string"),
289 |                 "other_tags": mz_llama_core_nodes.get_schema_array("string"),
290 |             },
291 |             required=[
292 |                 "short_describes",
293 |                 "subject_tags",
294 |                 "action_tags",
295 |                 "light_tags",
296 |                 "scenes_tags",
297 |                 "other_tags",
298 |             ]
299 |         )
300 | 
301 |         response = mz_llama_cpp.llama_cpp_simple_interrogator_to_json(
302 |             model_file=model_file,
303 |             system=mz_prompts.Beautify_Prompt,
304 |             question=f"IDEA: {response}",
305 |             schema=schema,
306 |             options=options,
307 |         )
308 | 
309 |         response_json = json.loads(response)
310 |         responses = []
311 |         for key, value in response_json.items():
312 |             if type(value) == list:
313 |                 # 去除开头.和空格
314 |                 value = [v.strip().lstrip(".") for v in value]
315 |                 # 去除空字符串
316 |                 value = [v for v in value if v != ""]
317 |                 if len(value) > 0:
318 |                     responses.append(f"({', '.join(value)})")
319 | 
320 |             else:
321 |                 if value != "":
322 |                     responses.append(f"({value})")
323 |         response = ", ".join(responses)
324 | 
325 |     if keep_device is False:
326 |         mz_llama_cpp.freed_gpu_memory(model_file=model_file)
327 |     return response
328 | 


--------------------------------------------------------------------------------
/v1/mz_phi3.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from .. import mz_prompt_utils
 3 | from .. import mz_llama_cpp
 4 | from . import mz_deprecated
 5 | 
 6 | phi3_models = [
 7 |     "Phi-3-mini-4k-instruct-q4.gguf"
 8 | ]
 9 | 
10 | 
11 | def get_exist_model(model_name):
12 |     modelscope_model_path = mz_prompt_utils.Utils.modelscope_download_model(
13 |         model_type="phi3",
14 |         model_name=model_name,
15 |         only_get_path=True,
16 |     )
17 | 
18 |     if modelscope_model_path is not None:
19 |         return modelscope_model_path
20 | 
21 |     model_url = f"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/{model_name}"
22 |     hf_model_path = mz_prompt_utils.Utils.hf_download_model(
23 |         model_url, only_get_path=True)
24 |     if hf_model_path is not None:
25 |         return hf_model_path
26 | 
27 |     return None
28 | 
29 | 
30 | def query_beautify_prompt_text(args_dict):
31 |     model_name = args_dict.get("llama_cpp_model", "")
32 |     download_source = args_dict.get("download_source", None)
33 | 
34 |     try:
35 |         model_file = get_exist_model(model_name)
36 | 
37 |         if model_file is None:
38 |             if download_source == "modelscope":
39 |                 model_file = mz_prompt_utils.Utils.modelscope_download_model(
40 |                     model_type="phi3",
41 |                     model_name=model_name,
42 |                 )
43 |             else:
44 |                 model_url = f"https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/{model_name}"
45 |                 if download_source == "hf-mirror.com":
46 |                     model_url = f"https://hf-mirror.com/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/{model_name}"
47 |                 model_file = mz_prompt_utils.Utils.hf_download_model(model_url)
48 | 
49 |         args_dict["llama_cpp_model"] = model_file
50 |         full_response = mz_deprecated.base_query_beautify_prompt_text(
51 |             args_dict=args_dict)
52 |         return full_response
53 | 
54 |     except Exception as e:
55 |         mz_llama_cpp.freed_gpu_memory(model_file=model_file)
56 |         # mz_utils.Utils.print_log(f"Error in auto_prompt_text: {e}")
57 |         raise e
58 | 


--------------------------------------------------------------------------------
/web/prompt_mz.js:
--------------------------------------------------------------------------------
  1 | import { app } from "../../scripts/app.js";
  2 | import { api } from "../../scripts/api.js";
  3 | import { ComfyWidgets } from "/scripts/widgets.js";
  4 | 
  5 | async function sleep(ms) {
  6 |   return new Promise((resolve) => setTimeout(resolve, ms));
  7 | }
  8 | 
  9 | async function showToast(message, duration = 3000) {
 10 |   const toast = document.createElement("div");
 11 |   toast.style.position = "fixed";
 12 |   toast.style.top = "20px";
 13 |   toast.style.left = "50%";
 14 |   toast.style.transform = "translateX(-50%)";
 15 |   toast.style.padding = "10px 20px";
 16 |   toast.style.backgroundColor = "var(--comfy-menu-bg)";
 17 |   toast.style.color = "var(--input-text)";
 18 |   toast.style.borderRadius = "10px";
 19 |   toast.style.border = "2px solid var(--border-color)";
 20 |   toast.style.zIndex = "9999";
 21 | 
 22 |   toast.textContent = message;
 23 |   document.body.appendChild(toast);
 24 |   await sleep(duration);
 25 |   toast.remove();
 26 | }
 27 | 
 28 | async function waitMessage() {
 29 |   var websocket = new WebSocket(
 30 |     `ws://${window.location.host}/mz_webapi/message`
 31 |   );
 32 |   websocket.onmessage = async (event) => {
 33 |     const resp = JSON.parse(event.data);
 34 |     console.log("Message received", resp);
 35 | 
 36 |     for (const data of resp) {
 37 |       if (data.type === "toast-success") {
 38 |         await showToast(data.message, data?.duration || 3000);
 39 |       }
 40 |     }
 41 |   };
 42 |   websocket.onclose = async (event) => {
 43 |     console.log("Connection closed", event);
 44 |   };
 45 | 
 46 |   websocket.onerror = async (event) => {
 47 |     console.log("Connection error", event);
 48 |   };
 49 | 
 50 |   // for (;;) {
 51 |   //   await sleep(1000);
 52 |   //   try {
 53 |   //     if (websocket.readyState !== WebSocket.OPEN) {
 54 |   //       return;
 55 |   //     }
 56 |   //     websocket.send(
 57 |   //       JSON.stringify({
 58 |   //         type: "ping",
 59 |   //       })
 60 |   //     );
 61 |   //   } catch (error) {
 62 |   //     return;
 63 |   //   }
 64 |   // }
 65 | }
 66 | 
 67 | /**
 68 |  * @returns {import("./types/comfy").ComfyExtension} extension
 69 |  */
 70 | const my_ui = {
 71 |   name: "prompt_mz.ui",
 72 |   setup() {},
 73 |   init: async () => {
 74 |     console.log("prompt_mz Registering UI extension");
 75 | 
 76 |     waitMessage();
 77 |   },
 78 | 
 79 |   /**
 80 |    * @param {import("./types/comfy").NodeType} nodeType
 81 |    * @param {import("./types/comfy").NodeDef} nodeData
 82 |    * @param {import("./types/comfy").App} app
 83 |    */
 84 |   async beforeRegisterNodeDef(nodeType, nodeData, app) {
 85 |     switch (nodeData.name) {
 86 |       case "MZ_OpenAIApiCLIPTextEncode":
 87 |       case "MZ_LLama3CLIPTextEncode":
 88 |       case "MZ_Phi3CLIPTextEncode":
 89 |       case "MZ_BaseLLamaCPPCLIPTextEncode":
 90 |       case "MZ_LLavaImageInterrogator":
 91 |       case "MZ_BaseLLavaImageInterrogator":
 92 |       case "MZ_LLamaCPPCLIPTextEncode":
 93 |       case "MZ_ImageInterrogatorCLIPTextEncode":
 94 |       case "MZ_Florence2CLIPTextEncode":
 95 |       case "MZ_PaliGemmaCLIPTextEncode":
 96 |         // Node Created
 97 |         const onNodeCreated = nodeType.prototype.onNodeCreated;
 98 |         nodeType.prototype.onNodeCreated = function () {
 99 |           const ret = onNodeCreated
100 |             ? onNodeCreated.apply(this, arguments)
101 |             : undefined;
102 | 
103 |           console.log("onNodeCreated:", this);
104 |           const nodeName = this.name + "_" + "customtext";
105 |           const wi = ComfyWidgets.STRING(
106 |             this,
107 |             nodeName,
108 |             [
109 |               "STRING",
110 |               {
111 |                 default: "",
112 |                 placeholder: "Text message output...",
113 |                 multiline: true,
114 |               },
115 |             ],
116 |             app
117 |           );
118 |           wi.widget.inputEl.readOnly = true;
119 | 
120 |           return ret;
121 |         };
122 | 
123 |         const outSet = function (texts) {
124 |           if (texts.length > 0) {
125 |             let widget_id = this?.widgets.findIndex(
126 |               (w) => w.name === this.name + "_" + "customtext"
127 |             );
128 |             if (Array.isArray(texts))
129 |               texts = texts
130 |                 .filter((word) => word.trim() !== "")
131 |                 .map((word) => word.trim())
132 |                 .join(" ");
133 |             this.widgets[widget_id].value = texts;
134 |             app.graph.setDirtyCanvas(true);
135 |           }
136 |         };
137 | 
138 |         // onConfigure
139 |         const onConfigure = nodeType.prototype.onConfigure;
140 |         nodeType.prototype.onConfigure = function (w) {
141 |           onConfigure?.apply(this, arguments);
142 | 
143 |           // outSet.call(this, a?.string);
144 |         };
145 | 
146 |         // onExecuted
147 |         const onExecuted = nodeType.prototype.onExecuted;
148 |         nodeType.prototype.onExecuted = function (a, b) {
149 |           // console.log("onExecuted:", arguments);
150 |           onExecuted?.apply(this, arguments);
151 | 
152 |           outSet.call(this, a?.string);
153 |         };
154 |     }
155 |   },
156 | };
157 | 
158 | app.registerExtension(my_ui);
159 | 


--------------------------------------------------------------------------------