├── pulse
    ├── __init__.py
    ├── dataset
    │   ├── __init__.py
    │   ├── diffusiondb.py
    │   ├── anytext.py
    │   └── dataset.py
    ├── processor
    │   ├── __init__.py
    │   ├── face.py
    │   ├── preference.py
    │   ├── flux_t2i.py
    │   ├── sdxl_t2i.py
    │   ├── sd_t2i.py
    │   ├── general.py
    │   ├── image_cache.py
    │   ├── qwenvl_i2t.py
    │   └── style.py
    └── pipeline
    │   ├── __init__.py
    │   ├── unit.py
    │   └── pipeline.py
├── requirements.txt
├── .gitignore
├── scripts
    ├── english_text.py
    ├── faceid.py
    ├── zoomin_zoomout.py
    ├── change_add_remove.py
    └── style_transfer.py
├── README_zh.md
├── README.md
└── LICENSE


/pulse/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pulse/dataset/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pulse/processor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pulse/pipeline/__init__.py:
--------------------------------------------------------------------------------
1 | from .unit import DataProcessUnit
2 | from .pipeline import DataPipeline
3 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | diffsynth
2 | dashscope
3 | pandas
4 | pyarrow
5 | fastparquet
6 | opencv-python-headless
7 | 


--------------------------------------------------------------------------------
/pulse/processor/face.py:
--------------------------------------------------------------------------------
1 | import torch
2 | 
3 | class FaceDataSelector:
4 |     def __init__(self):
5 |         pass
6 |     
7 |     def __call__(self, metadata):
8 |         descriptions = [data for data in metadata["descriptions"] if data["gender"] == metadata["gender_in_image"]]
9 |         return descriptions[0]["description"], descriptions[1 if len(descriptions) > 1 else 0]["description"]


--------------------------------------------------------------------------------
/pulse/processor/preference.py:
--------------------------------------------------------------------------------
 1 | from diffsynth.extensions.ImageQualityMetric import download_preference_model, load_preference_model, preference_model_id
 2 | 
 3 | 
 4 | class ImagePreferenceModel:
 5 |     def __init__(self, model_name: preference_model_id, cache_dir="./models", device="cuda"):
 6 |         path = download_preference_model(model_name, cache_dir=cache_dir)
 7 |         self.preference_model = load_preference_model(model_name, device=device, path=path)
 8 |         
 9 |     def __call__(self, image, prompt):
10 |         return self.preference_model.score(image, prompt)[0]
11 | 


--------------------------------------------------------------------------------
/pulse/processor/flux_t2i.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from diffsynth import ModelManager, FluxImagePipeline
 3 | 
 4 | 
 5 | class FLUXT2I:
 6 |     def __init__(self, model_path, device="cuda", model_kwargs={}, pipeline_kwargs={}):
 7 |         model_manager = ModelManager(torch_dtype=torch.bfloat16, device=device)
 8 |         model_manager.load_models(model_path)
 9 |         self.pipe = FluxImagePipeline.from_model_manager(model_manager, **model_kwargs)
10 |         self.pipeline_kwargs = pipeline_kwargs
11 | 
12 |     def __call__(self, **kwargs):
13 |         return self.pipe(**self.pipeline_kwargs, **kwargs)
14 | 
15 | 


--------------------------------------------------------------------------------
/pulse/processor/sdxl_t2i.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from diffsynth import ModelManager, SDXLImagePipeline
 3 | 
 4 | 
 5 | class SDXLT2I:
 6 |     def __init__(self, model_path, device="cuda", model_kwargs={}, pipeline_kwargs={}):
 7 |         model_manager = ModelManager(torch_dtype=torch.float16, device=device)
 8 |         model_manager.load_models(model_path)
 9 |         self.pipe = SDXLImagePipeline.from_model_manager(model_manager, **model_kwargs)
10 |         self.pipeline_kwargs = pipeline_kwargs
11 | 
12 |     def __call__(self, **kwargs):
13 |         return self.pipe(**self.pipeline_kwargs, **kwargs)
14 | 
15 | 


--------------------------------------------------------------------------------
/pulse/processor/sd_t2i.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from diffsynth import ModelManager, SDImagePipeline
 3 | 
 4 | 
 5 | class SDT2I:
 6 |     def __init__(self, model_path, device="cuda", textual_inversions=[], model_kwargs={}, pipeline_kwargs={}):
 7 |         model_manager = ModelManager(torch_dtype=torch.float16, device=device)
 8 |         model_manager.load_models(model_path)
 9 |         model_manager.load_lora("models/lora/add_detail.safetensors", lora_alpha=-4)
10 |         self.pipe = SDImagePipeline.from_model_manager(model_manager, **model_kwargs)
11 |         self.pipe.prompter.load_textual_inversions(textual_inversions)
12 |         self.pipeline_kwargs = pipeline_kwargs
13 | 
14 |     def __call__(self, **kwargs):
15 |         return self.pipe(**self.pipeline_kwargs, **kwargs)
16 | 
17 | 


--------------------------------------------------------------------------------
/pulse/dataset/diffusiondb.py:
--------------------------------------------------------------------------------
 1 | import random, pandas, torch
 2 | 
 3 | 
 4 | class DiffusionDB:
 5 |     def __init__(self, path, shuffle=True, seed=None, num_data=1000000, multi_prompt=False, num_prompt=1):
 6 |         self.data = pandas.read_parquet(path)["prompt"].tolist()
 7 |         if shuffle:
 8 |             if seed is None:
 9 |                 seed = torch.randint(0, 10**9, size=(1,)).tolist()[0]
10 |             random.seed(seed)
11 |             random.shuffle(self.data)
12 |         self.num_data = num_data
13 |         self.multi_prompt = multi_prompt
14 |         self.num_prompt = num_prompt
15 |     
16 |     def __getitem__(self, i):
17 |         if self.multi_prompt:
18 |             return {"prompt": self.data[i * self.num_prompt: i * self.num_prompt + self.num_prompt]}
19 |         else:
20 |             return {"prompt": self.data[i]}
21 |     
22 |     def __len__(self):
23 |         return self.num_data // self.num_prompt
24 | 


--------------------------------------------------------------------------------
/pulse/pipeline/unit.py:
--------------------------------------------------------------------------------
 1 | class DataProcessUnit:
 2 |     def __init__(self, processor, input_params={}, output_params=(), parse_output_dict=False, extra_input_kwargs={}):
 3 |         self.processor = processor
 4 |         self.input_params = input_params
 5 |         self.output_params = output_params
 6 |         self.parse_output_dict = parse_output_dict
 7 |         self.extra_input_kwargs = extra_input_kwargs
 8 |         
 9 |     def __call__(self, data: dict):
10 |         input_params = {name: data[self.input_params[name]] for name in self.input_params}
11 |         input_params.update(self.extra_input_kwargs)
12 |         raw_output = self.processor(**input_params)
13 |         if self.parse_output_dict:
14 |             data.update(raw_output)
15 |         else:
16 |             if not isinstance(raw_output, tuple):
17 |                 raw_output = (raw_output,)
18 |             for name, output in zip(self.output_params, raw_output):
19 |                 data[name] = output
20 |         return data


--------------------------------------------------------------------------------
/pulse/pipeline/pipeline.py:
--------------------------------------------------------------------------------
 1 | class DataPipeline:
 2 |     def __init__(self, units=()):
 3 |         self.units = units
 4 |         self.error_log = [0] * len(units)
 5 |         self.drop_log = [0] * len(units)
 6 |     
 7 |     def __call__(self, data, ignore_errors=False, debug_mode=False):
 8 |         for unit_id, unit in enumerate(self.units):
 9 |             if ignore_errors:
10 |                 try:
11 |                     data = unit(data)
12 |                 except:
13 |                     self.error_log[unit_id] += 1
14 |                     return None
15 |             else:
16 |                 data = unit(data)
17 |             if debug_mode:
18 |                 print("-" * 200)
19 |                 for key in data:
20 |                     print(key, data[key])
21 |                 print("-" * 200)
22 |         return data
23 |     
24 |     def report_log(self):
25 |         for unit_id, unit in enumerate(self.units):
26 |             print(f"Unit id: {unit_id} Processor name: {unit.processor.__class__.__name__} Errors: {self.error_log[unit_id]} Drops: {self.drop_log[unit_id]}")
27 | 


--------------------------------------------------------------------------------
/pulse/dataset/anytext.py:
--------------------------------------------------------------------------------
 1 | import random, torch, json
 2 | 
 3 | 
 4 | class AnyText:
 5 |     def __init__(self, path, shuffle=True, seed=None, num_data=1000000, multi_prompt=False, num_prompt=1):
 6 |         with open(path, "r", encoding="utf-8") as f:
 7 |             data = json.load(f)
 8 |             prompt_list = []
 9 |             for i in data["data_list"]:
10 |                 prompt_list.append(i["caption"])
11 |         self.data = prompt_list
12 |         if shuffle:
13 |             if seed is None:
14 |                 seed = torch.randint(0, 10**9, size=(1,)).tolist()[0]
15 |             random.seed(seed)
16 |             random.shuffle(self.data)
17 |         self.num_data = num_data
18 |         self.multi_prompt = multi_prompt
19 |         self.num_prompt = num_prompt
20 |     
21 |     def __getitem__(self, i):
22 |         if self.multi_prompt:
23 |             return {"prompt": self.data[i * self.num_prompt: i * self.num_prompt + self.num_prompt]}
24 |         else:
25 |             return {"prompt": self.data[i]}
26 |     
27 |     def __len__(self):
28 |         return self.num_data // self.num_prompt
29 | 


--------------------------------------------------------------------------------
/pulse/processor/general.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | 
 6 | class ListSampler:
 7 |     def __init__(self):
 8 |         pass
 9 |     
10 |     def __call__(self, ls):
11 |         i = torch.randint(0, len(ls), size=(1,)).tolist()[0]
12 |         return ls[i]
13 | 
14 | 
15 | class TextFormater:
16 |     def __init__(self, template):
17 |         self.template = template
18 |         
19 |     def __call__(self, text_list=[], *args, **kwargs):
20 |         args = tuple(text_list) + tuple(str(i) for i in args) + tuple(str(kwargs[i]) for i in kwargs)
21 |         return self.template % args
22 | 
23 | 
24 | class ListPacker:
25 |     def __init__(self):
26 |         pass
27 |     
28 |     def __call__(self, *args, **kwargs):
29 |         ls = list(i for i in args) + list(kwargs[i] for i in kwargs)
30 |         return ls
31 | 
32 | 
33 | class ImageCropper:
34 |     def __init__(self):
35 |         pass
36 |     
37 |     def __call__(self, bbox, image):
38 |         x1, y1, x2, y2 = bbox
39 |         image = np.array(image)
40 |         image = image[y1: y2, x1: x2]
41 |         image = Image.fromarray(image)
42 |         return image
43 |     
44 |     
45 | class ImageResizer:
46 |     def __init__(self):
47 |         pass
48 |     
49 |     def __call__(self, image, height=1024, width=1024):
50 |         return image.resize((width, height))
51 | 


--------------------------------------------------------------------------------
/pulse/processor/image_cache.py:
--------------------------------------------------------------------------------
 1 | import os, time, shutil
 2 | from PIL.Image import Image
 3 | 
 4 | 
 5 | class ImageCache:
 6 |     def __init__(self, cache_dir="cache", max_cache_num=10, file_extension="png"):
 7 |         timestamp = str(time.time_ns())
 8 |         self.cache_dir = os.path.join(cache_dir, timestamp)
 9 |         print(f"Image cache files will be saved at {self.cache_dir}")
10 |         os.makedirs(self.cache_dir, exist_ok=True)
11 |         self.max_cache_num = max_cache_num
12 |         self.file_extension = file_extension
13 |         self.cached_files = []
14 |         
15 |     def __call__(self, image):
16 |         while len(self.cached_files) > self.max_cache_num:
17 |             file_path = self.cached_files.pop(0)
18 |             os.remove(file_path)
19 |         timestamp = str(time.time_ns())
20 |         if isinstance(image, Image):
21 |             path = os.path.join(self.cache_dir, f"{timestamp}.{self.file_extension}")
22 |             image.save(path)
23 |             self.cached_files.append(path)
24 |         elif isinstance(image, str):
25 |             _, file_extension = os.path.splitext(image)
26 |             path = os.path.join(self.cache_dir, f"{timestamp}.{file_extension}")
27 |             shutil.copy(image, path)
28 |             self.cached_files.append(path)
29 |         else:
30 |             raise ValueError("Unsupported image format.")
31 |         return path
32 | 


--------------------------------------------------------------------------------
/pulse/processor/qwenvl_i2t.py:
--------------------------------------------------------------------------------
 1 | import json, dashscope
 2 | import numpy as np
 3 | from PIL import Image
 4 | 
 5 | 
 6 | class QwenVLI2T:
 7 |     def __init__(self, api_key, model_id, prompt=""):
 8 |         dashscope.api_key = api_key
 9 |         self.model_id = model_id
10 |         self.prompt = prompt
11 |         
12 |     def __call__(self, images=[], prompt=None, system_prompt=None):
13 |         messages = []
14 |         if system_prompt is not None:
15 |             messages.append({"role": "system", "content": system_prompt})
16 |         if prompt is None:
17 |             prompt = self.prompt
18 |         if not isinstance(images, list):
19 |             images = [images]
20 |         messages.append({"role": "user", "content": [{"text": prompt}] + [{"image": image} for image in images]})
21 |         response = dashscope.MultiModalConversation.call(model=self.model_id, messages=messages)
22 |         response = response["output"]["choices"][0]["message"]["content"][0]["text"]
23 |         return response
24 | 
25 | 
26 | class QwenJsonParser:
27 |     def __init__(self):
28 |         pass
29 |     
30 |     def __call__(self, text):
31 |         text = text.strip()
32 |         if text.startswith("```json"):
33 |             text = text[len("```json\n"):]
34 |         if text.endswith("```"):
35 |             text = text[:-len("\n```")]
36 |         json_data = json.loads(text)
37 |         return json_data
38 | 
39 | 
40 | class QwenBbox2Mask:
41 |     def __init__(self, absolute_coordinate=False):
42 |         self.absolute_coordinate = absolute_coordinate
43 |     
44 |     def __call__(self, bbox, height=1024, width=1024):
45 |         x1, y1, x2, y2 = bbox
46 |         image = np.zeros((height, width, 3), dtype=np.uint8)
47 |         if self.absolute_coordinate:
48 |             image[y1: y2, x1: x2] = 255
49 |         else:
50 |             image[int(y1/1000*width): int(y2/1000*width), int(x1/1000*height): int(x2/1000*height)] = 255
51 |         image = Image.fromarray(image)
52 |         return image
53 | 
54 | 
55 | class QwenBbox2Square:
56 |     def __init__(self):
57 |         pass
58 |     
59 |     def expand(self, x1, x2, dx):
60 |         x1, x2 = x1 - dx // 2, x2 + dx // 2 + dx % 2
61 |         return x1, x2
62 |     
63 |     def shift(self, x1, x2, max_length):
64 |         if x1 < 0:
65 |             dx = -x1
66 |         elif x2 > max_length:
67 |             dx = -(x2 - max_length)
68 |         else:
69 |             dx = 0
70 |         x1, x2 = x1 + dx, x2 + dx
71 |         return x1, x2
72 |     
73 |     def __call__(self, bbox, height=1024, width=1024):
74 |         x1, y1, x2, y2 = bbox
75 |         y1, y2, x1, x2 = int(y1/1000*width), int(y2/1000*width), int(x1/1000*height), int(x2/1000*height)
76 |         h, w = x2 - x1, y2 - y1
77 |         if h > w:
78 |             y1, y2 = self.expand(y1, y2, h - w)
79 |             y1, y2 = self.shift(y1, y2, width)
80 |         else:
81 |             x1, x2 = self.expand(x1, x2, w - h)
82 |             x1, x2 = self.shift(x1, x2, height)
83 |         return {"square": (x1, y1, x2, y2)}
84 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | /data
  2 | /cache
  3 | /models
  4 | *.pkl
  5 | *.safetensors
  6 | *.pth
  7 | *.ckpt
  8 | *.pt
  9 | *.bin
 10 | 
 11 | # Byte-compiled / optimized / DLL files
 12 | __pycache__/
 13 | *.py[cod]
 14 | *$py.class
 15 | 
 16 | # C extensions
 17 | *.so
 18 | 
 19 | # Distribution / packaging
 20 | .Python
 21 | build/
 22 | develop-eggs/
 23 | dist/
 24 | downloads/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | wheels/
 33 | share/python-wheels/
 34 | *.egg-info/
 35 | .installed.cfg
 36 | *.egg
 37 | MANIFEST
 38 | 
 39 | # PyInstaller
 40 | #  Usually these files are written by a python script from a template
 41 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 42 | *.manifest
 43 | *.spec
 44 | 
 45 | # Installer logs
 46 | pip-log.txt
 47 | pip-delete-this-directory.txt
 48 | 
 49 | # Unit test / coverage reports
 50 | htmlcov/
 51 | .tox/
 52 | .nox/
 53 | .coverage
 54 | .coverage.*
 55 | .cache
 56 | nosetests.xml
 57 | coverage.xml
 58 | *.cover
 59 | *.py,cover
 60 | .hypothesis/
 61 | .pytest_cache/
 62 | cover/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | db.sqlite3-journal
 73 | 
 74 | # Flask stuff:
 75 | instance/
 76 | .webassets-cache
 77 | 
 78 | # Scrapy stuff:
 79 | .scrapy
 80 | 
 81 | # Sphinx documentation
 82 | docs/_build/
 83 | 
 84 | # PyBuilder
 85 | .pybuilder/
 86 | target/
 87 | 
 88 | # Jupyter Notebook
 89 | .ipynb_checkpoints
 90 | 
 91 | # IPython
 92 | profile_default/
 93 | ipython_config.py
 94 | 
 95 | # pyenv
 96 | #   For a library or package, you might want to ignore these files since the code is
 97 | #   intended to run in multiple environments; otherwise, check them in:
 98 | # .python-version
 99 | 
100 | # pipenv
101 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
102 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
103 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
104 | #   install all needed dependencies.
105 | #Pipfile.lock
106 | 
107 | # poetry
108 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
109 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
110 | #   commonly ignored for libraries.
111 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
112 | #poetry.lock
113 | 
114 | # pdm
115 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
116 | #pdm.lock
117 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
118 | #   in version control.
119 | #   https://pdm.fming.dev/#use-with-ide
120 | .pdm.toml
121 | 
122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
123 | __pypackages__/
124 | 
125 | # Celery stuff
126 | celerybeat-schedule
127 | celerybeat.pid
128 | 
129 | # SageMath parsed files
130 | *.sage.py
131 | 
132 | # Environments
133 | .env
134 | .venv
135 | env/
136 | venv/
137 | ENV/
138 | env.bak/
139 | venv.bak/
140 | 
141 | # Spyder project settings
142 | .spyderproject
143 | .spyproject
144 | 
145 | # Rope project settings
146 | .ropeproject
147 | 
148 | # mkdocs documentation
149 | /site
150 | 
151 | # mypy
152 | .mypy_cache/
153 | .dmypy.json
154 | dmypy.json
155 | 
156 | # Pyre type checker
157 | .pyre/
158 | 
159 | # pytype static type analyzer
160 | .pytype/
161 | 
162 | # Cython debug symbols
163 | cython_debug/
164 | 


--------------------------------------------------------------------------------
/scripts/english_text.py:
--------------------------------------------------------------------------------
  1 | from pulse.processor.flux_t2i import FLUXT2I
  2 | from pulse.dataset.anytext import AnyText
  3 | from pulse.dataset.dataset import ImageDatasetStorage
  4 | from pulse.pipeline import DataProcessUnit, DataPipeline
  5 | from diffsynth import download_models
  6 | from diffsynth.extensions.ImageQualityMetric import download_preference_model
  7 | from modelscope import dataset_snapshot_download
  8 | from tqdm import tqdm
  9 | import argparse, os, zipfile
 10 | 
 11 | 
 12 | 
 13 | def parse_args():
 14 |     parser = argparse.ArgumentParser(description="Dataset generation script: Style Transfer.")
 15 |     parser.add_argument(
 16 |         "--target_dir",
 17 |         type=str,
 18 |         default="data/dataset",
 19 |         required=True,
 20 |         help="Path to save dataset.",
 21 |     )
 22 |     parser.add_argument(
 23 |         "--cache_dir",
 24 |         type=str,
 25 |         default="data/cache",
 26 |         help="Path to save cache files.",
 27 |     )
 28 |     parser.add_argument(
 29 |         "--dashscope_api_key",
 30 |         type=str,
 31 |         default="",
 32 |         help="Dashscope api key.",
 33 |     )
 34 |     parser.add_argument(
 35 |         "--qwenvl_model_id",
 36 |         type=str,
 37 |         default="qwen-vl-max-0809",
 38 |         help="QwenVL model id.",
 39 |     )
 40 |     parser.add_argument(
 41 |         "--modelscope_access_token",
 42 |         type=str,
 43 |         default=None,
 44 |         help="Modelscope access token",
 45 |     )
 46 |     parser.add_argument(
 47 |         "--modelscope_dataset_id",
 48 |         type=str,
 49 |         default=None,
 50 |         help="Modelscope Dataset ID",
 51 |     )
 52 |     parser.add_argument(
 53 |         "--num_data",
 54 |         type=int,
 55 |         default=100000,
 56 |         help="Number of data samples",
 57 |     )
 58 |     parser.add_argument(
 59 |         "--max_num_files_per_folder",
 60 |         type=int,
 61 |         default=5000,
 62 |         help="Max number of files per folder",
 63 |     )
 64 |     args = parser.parse_args()
 65 |     return args
 66 |     
 67 |     
 68 | def initialize(args):
 69 |     dataset_snapshot_download("iic/AnyWord-3M", allow_file_pattern=["anytext2_json_files.zip"], cache_dir="./data")
 70 |     if "TextEn" not in os.listdir("data"):
 71 |         os.makedirs("data/TextEn")
 72 |         with zipfile.ZipFile("data/iic/AnyWord-3M/anytext2_json_files.zip", 'r') as f:
 73 |             f.extractall("data/TextEn")
 74 |     download_models(["FLUX.1-dev"])
 75 |     
 76 |     t2i = FLUXT2I(
 77 |         model_path=[
 78 |             "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
 79 |             "models/FLUX/FLUX.1-dev/text_encoder_2",
 80 |             "models/FLUX/FLUX.1-dev/ae.safetensors",
 81 |             "models/FLUX/FLUX.1-dev/flux1-dev.safetensors",
 82 |         ],
 83 |         device="cuda",
 84 |     )
 85 |     
 86 |     dataset = AnyText("data/TextEn/anytext2_json_files/laion_word/data_v1.2b.json", shuffle=True, num_data=args.num_data)
 87 | 
 88 |     pipe = DataPipeline(units=[
 89 |         DataProcessUnit(
 90 |             processor=t2i,
 91 |             input_params={"prompt": "prompt"},
 92 |             output_params=("image_1",),
 93 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x}
 94 |         ),
 95 |         DataProcessUnit(
 96 |             processor=ImageDatasetStorage(
 97 |                 target_dir=args.target_dir,
 98 |                 image_keys=("image_1",),
 99 |                 metadata_keys=("prompt",),
100 |                 modelscope_access_token=args.modelscope_access_token,
101 |                 modelscope_dataset_id=args.modelscope_dataset_id,
102 |                 max_num_files_per_folder=args.max_num_files_per_folder,
103 |             ),
104 |             input_params={
105 |                 "image_1": "image_1", "prompt": "prompt",
106 |             },
107 |             output_params=("metadata_path")
108 |         )
109 |     ])
110 |     return dataset, pipe
111 | 
112 | 
113 | if __name__ == "__main__":
114 |     args = parse_args()
115 |     dataset, pipe = initialize(args)
116 |     for data_id, data in enumerate(tqdm(dataset)):
117 |         pipe(data, ignore_errors=True)
118 |         if (data_id + 1) % 100 == 0:
119 |             pipe.report_log()
120 | 


--------------------------------------------------------------------------------
/pulse/dataset/dataset.py:
--------------------------------------------------------------------------------
  1 | import os, time, shutil, json, tarfile, torchvision, torch
  2 | from PIL import Image
  3 | from modelscope.hub.api import HubApi
  4 | import torchvision.transforms.functional
  5 | 
  6 | 
  7 | class ImageDatasetStorage:
  8 |     def __init__(self, target_dir, max_num_files_per_folder=5000, file_extension="png", image_keys=(), metadata_keys=(), modelscope_access_token=None, modelscope_dataset_id=None):
  9 |         os.makedirs(target_dir, exist_ok=True)
 10 |         self.target_dir = target_dir
 11 |         self.max_num_files_per_folder = max_num_files_per_folder
 12 |         self.file_extension = file_extension
 13 |         self.image_keys = image_keys
 14 |         self.metadata_keys = metadata_keys
 15 |         self.save_dir = None
 16 |         self.modelscope_access_token = modelscope_access_token
 17 |         self.modelscope_dataset_id = modelscope_dataset_id
 18 |         self.set_new_dir()
 19 |         
 20 |         
 21 |     def push_to_hub(self):
 22 |         if self.save_dir is not None and self.modelscope_dataset_id is not None:
 23 |             tar_file = self.save_dir + ".tar.gz"
 24 |             with tarfile.open(tar_file, "w:gz") as tar:
 25 |                 tar.add(self.save_dir, arcname=os.path.basename(self.save_dir))
 26 |             api = HubApi()
 27 |             api.login(self.modelscope_access_token)
 28 |             api.upload_file(
 29 |                 path_or_fileobj=tar_file,
 30 |                 path_in_repo="data/" + os.path.basename(self.save_dir) + ".tar.gz",
 31 |                 repo_id=self.modelscope_dataset_id,
 32 |                 repo_type="dataset",
 33 |                 commit_message=f"Upload {os.path.basename(self.save_dir)}",
 34 |             )
 35 |                     
 36 |         
 37 |     def set_new_dir(self):
 38 |         self.push_to_hub()
 39 |         timestamp = str(time.time_ns())
 40 |         self.save_dir = os.path.join(self.target_dir, timestamp)
 41 |         print(f"Dataset will be saved at {self.save_dir}")
 42 |         os.makedirs(self.save_dir, exist_ok=True)
 43 |         self.num_files = 0
 44 |         
 45 |         
 46 |     def get_image(self, image):
 47 |         timestamp = str(time.time_ns())
 48 |         if isinstance(image, Image.Image):
 49 |             path = os.path.join(self.save_dir, f"{timestamp}.{self.file_extension}")
 50 |             image.save(path)
 51 |         elif isinstance(image, str):
 52 |             _, file_extension = os.path.splitext(image)
 53 |             path = os.path.join(self.save_dir, f"{timestamp}.{file_extension}")
 54 |             shutil.copy(image, path)
 55 |         else:
 56 |             raise ValueError("Unsupported image format.")
 57 |         self.num_files += 1
 58 |         return path
 59 |         
 60 |         
 61 |     def get_images(self, images):
 62 |         if not isinstance(images, list):
 63 |             images = [images]
 64 |         path = [self.get_image(image) for image in images]
 65 |         return path
 66 |     
 67 |     
 68 |     def get_metadata(self, metadata):
 69 |         timestamp = str(time.time_ns())
 70 |         path = os.path.join(self.save_dir, f"{timestamp}.json")
 71 |         with open(path, "w") as f:
 72 |             json.dump(metadata, f, ensure_ascii=False)
 73 |         self.num_files += 1
 74 |         return path
 75 |     
 76 |     
 77 |     def __call__(self, **kwargs):
 78 |         metadata = {key: kwargs[key] for key in self.metadata_keys}
 79 |         for key in self.image_keys:
 80 |             path = self.get_image(kwargs[key])
 81 |             metadata[key] = os.path.basename(path)
 82 |         path = self.get_metadata(metadata)
 83 |         path = os.path.basename(path)
 84 |         if self.num_files > self.max_num_files_per_folder:
 85 |             self.set_new_dir()
 86 |         return path
 87 |         
 88 |         
 89 |         
 90 | class ImageDataset:
 91 |     def __init__(self, base_path, crop=False, height=1024, width=1024, max_num=10000000):
 92 |         self.path = []
 93 |         self.search_for_images(base_path)
 94 |         self.crop = crop
 95 |         self.height = height
 96 |         self.width = width
 97 |         self.max_num = max_num
 98 |     
 99 |     def is_image_file(self, file_path):
100 |         if "." not in file_path:
101 |             return False
102 |         file_ext_name = file_path.split(".")[-1]
103 |         if file_ext_name.lower() in ["jpg", "jpeg", "png", "webp"]:
104 |             return True
105 |         return False
106 | 
107 |     def search_for_images(self, path):
108 |         if os.path.isfile(path):
109 |             if self.is_image_file(path):
110 |                 self.path.append(path)
111 |         else:
112 |             for file_name in os.listdir(path):
113 |                 sub_path = os.path.join(path, file_name)
114 |                 self.search_for_images(sub_path)
115 |                 
116 |     def crop_and_resize(self, image):
117 |         width, height = image.size
118 |         scale = max(self.width / width, self.height / height)
119 |         image = torchvision.transforms.functional.resize(
120 |             image,
121 |             (round(height*scale), round(width*scale)),
122 |             interpolation=torchvision.transforms.InterpolationMode.BILINEAR
123 |         )
124 |         image = torchvision.transforms.functional.center_crop(
125 |             image,
126 |             (self.height, self.width),
127 |         )
128 |         return image
129 |     
130 |     def __getitem__(self, idx):
131 |         while True:
132 |             try:
133 |                 idx = torch.randint(0, len(self.path), size=(1,)).tolist()[0]
134 |                 path = self.path[idx]
135 |                 image = Image.open(path)
136 |                 if self.crop:
137 |                     image = self.crop_and_resize(image)
138 |                 return image
139 |             except:
140 |                 continue
141 |     
142 |     def __len__(self):
143 |         return self.max_num
144 | 


--------------------------------------------------------------------------------
/README_zh.md:
--------------------------------------------------------------------------------
 1 | # ImagePulse-图律脉动
 2 | 
 3 | 图律脉动项目旨在为下一代图像理解和生成模型提供数据集支撑，将模型的能力原子化，并构建原子能力数据集。
 4 | 
 5 | [切换到英文](./README.md)
 6 | 
 7 | ## 原子能力数据集
 8 | 
 9 | ### 1. 修改、添加、移除
10 | 
11 | * 数据集：https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ChangeAddRemove
12 | * 数据集构建脚本：[./scripts/change_add_remove.py](./scripts/change_add_remove.py)
13 | 
14 | |image_1|image_2|mask|editing_instruction|reverse_editing_instruction|
15 | |-|-|-|-|-|
16 | |![](https://github.com/user-attachments/assets/3a657ccd-6fae-4c44-bff8-a3b702c89d65)|![](https://github.com/user-attachments/assets/cc91af21-0090-4392-89d3-ddd62e056da5)|![](https://github.com/user-attachments/assets/5e4c0fdf-15ef-4bf0-b027-ef863e74afaa)|Remove the mustache and beard, change the white shirt to a blue turtleneck sweater, and remove the glass of milk.|Add a mustache and beard, change the blue turtleneck sweater to a white shirt, and add a glass of milk.|
17 | |![](https://github.com/user-attachments/assets/e3ed5116-1d51-47ab-ae51-0fd4e1548bfd)|![](https://github.com/user-attachments/assets/f78eb833-82bd-4a1f-9856-58718b05dc03)|![](https://github.com/user-attachments/assets/8d1e7e9d-6f5e-4abc-905a-c9f3321ec772)|Add a silver butterfly to the glowing golden lace on her face.|Remove the silver butterfly from the glowing golden lace on her face.|
18 | |![](https://github.com/user-attachments/assets/169e1170-f1d2-4f37-a758-baee81343720)|![](https://github.com/user-attachments/assets/6c250bd1-a705-45ba-8c8a-aacb91eaaa0f)|![](https://github.com/user-attachments/assets/167eb187-605b-4dcd-be62-b6833309aa5c)|Remove the necklace.|Add a necklace.|
19 | 
20 | ### 2. 放大、缩小
21 | 
22 | * 数据集：https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ZoominZoomout
23 | * 数据集构建脚本：[./scripts/zoomin_zoomout.py](./scripts/zoomin_zoomout.py)
24 | 
25 | |image_1|image_2|image_cropped|mask|editing_instruction|reverse_editing_instruction|
26 | |-|-|-|-|-|-|
27 | |![](https://github.com/user-attachments/assets/c90e2a05-8bbe-4897-83f6-fad5692677e2)|![](https://github.com/user-attachments/assets/70ab6767-e088-49f1-afb8-b85cca894031)|![](https://github.com/user-attachments/assets/76718ff8-f6ae-4f75-8f3f-be10d2eebde4)|![](https://github.com/user-attachments/assets/4bebe7f6-a3a7-481b-bcef-100bb18bec5d)|Zoom in to focus on the headband.|Zoom out to show the full view of the anime girl.|
28 | |![](https://github.com/user-attachments/assets/99fc81f9-77e5-4181-a376-06cdf5feaf65)|![](https://github.com/user-attachments/assets/e97b398d-a68e-4f34-a5e9-a831d16f3941)|![](https://github.com/user-attachments/assets/aef092d1-8d8c-4353-a9b7-089875307830)|![](https://github.com/user-attachments/assets/dcf2578a-df22-471c-96c0-34ba361a10b5)|Remove the superhero costume and replace it with a red shirt. Adjust the lighting to highlight the man's face.|Add a superhero costume with a red and yellow emblem on the chest and a red cape. Adjust the lighting to emphasize the costume.|
29 | |![](https://github.com/user-attachments/assets/356fc12b-02ca-4f3c-bf65-3248ca5576eb)|![](https://github.com/user-attachments/assets/41dcdf1c-3ce6-49aa-a651-cfc981932689)|![](https://github.com/user-attachments/assets/d5facc03-99d0-4f15-93ce-9f1bc5397bfd)|![](https://github.com/user-attachments/assets/5df06650-8c2b-47f2-9bf4-d3e2510e224d)|Remove the elephant and replace it with a large rock.|Replace the large rock with an elephant.|
30 | 
31 | ### 3. 风格迁移
32 | 
33 | * 数据集：https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-StyleTransfer
34 | * 数据集构建脚本：[./scripts/style_transfer.py](./scripts/style_transfer.py)
35 | 
36 | |image_1|image_2|image_3|image_4|editing_instruction|reverse_editing_instruction|
37 | |-|-|-|-|-|-|
38 | |![](https://github.com/user-attachments/assets/f8974a51-fe70-4081-b0c8-60acc0c73f28)|![](https://github.com/user-attachments/assets/81e99ac5-8458-4f4a-ac4e-ae57e809f7f2)|![](https://github.com/user-attachments/assets/c8bb8062-3ad9-44b5-9ee0-a70be4dcbfb3)|![](https://github.com/user-attachments/assets/9edd818e-b6ae-4e6e-924b-cdb21d02a2ec)|transform the image into a cartoon style with vibrant colors and a confident expression.|transform the image into a realistic portrait with a serious expression and subtle lighting.|
39 | |![](https://github.com/user-attachments/assets/82253243-028b-43b4-9a37-796f17fa21af)|![](https://github.com/user-attachments/assets/84bf1c5b-55ae-4084-82ec-3a45c15b2030)|![](https://github.com/user-attachments/assets/b8908d78-ad41-42ce-af4b-c52bf92b2989)|![](https://github.com/user-attachments/assets/6593c9d6-7d5e-4cc0-b2ba-49e5fb38a229)|transform the image to have a brighter, more colorful palette and a clear blue sky.|transform the image to have a more muted color palette and an overcast sky.|
40 | |![](https://github.com/user-attachments/assets/705efc5f-504b-49ac-ba76-ae2f9edb56e4)|![](https://github.com/user-attachments/assets/d0e2e902-d97f-4ffd-91c3-56c96aa19f71)|![](https://github.com/user-attachments/assets/d8c0150f-2e41-480a-9873-dbb8419c8ac5)|![](https://github.com/user-attachments/assets/7be0991c-06e8-4560-8ff8-5fbd2f81b1a0)|transform the style of the image to an anime illustration, change the jacket to red, and add a cityscape background.|transform the style of the image to a digital painting, change the jacket to black, and remove the cityscape background.|
41 | 
42 | ### 4. 人脸保持
43 | 
44 | * 数据集：https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-FaceID
45 | * 数据集构建脚本：[./scripts/faceid.py](./scripts/faceid.py)
46 | 
47 | |image_face|image_1|image_2|editing_instruction|reverse_editing_instruction|
48 | |-|-|-|-|-|
49 | |![](https://github.com/user-attachments/assets/6b61c298-1938-405b-a680-c767bc8913e0)|![](https://github.com/user-attachments/assets/071f2743-3fc8-42d1-b17a-768835c3f9f4)|![](https://github.com/user-attachments/assets/05b45391-df5b-453c-9007-e94674056c5a)|Change the woman's white t-shirt to a white tank top.|Change the woman's white tank top to a white t-shirt.|
50 | |![](https://github.com/user-attachments/assets/203c9f5d-58fe-4e55-8ab8-5adbf14a1fbf)|![](https://github.com/user-attachments/assets/1022a76c-9ac2-43f0-bde2-d65322834251)|![](https://github.com/user-attachments/assets/09511643-8370-46ba-aee8-bcf4efd86d72)|Add a nighttime street scene with bokeh lights in the background.|Remove the nighttime street scene and bokeh lights from the background.|
51 | |![](https://github.com/user-attachments/assets/64d8d216-0966-4108-a378-1ad2312ad8eb)|![](https://github.com/user-attachments/assets/9d182b1e-8b4f-4f74-9f58-d14d7ad15474)|![](https://github.com/user-attachments/assets/c0f9a43e-dd2e-48c9-945c-643f11852808)|Change the background to a warmly lit room with lamps, change the suit to maroon, and add a sweater under the suit.|Change the background to a dimly lit room with red lighting, change the suit to black, and remove the sweater.|
52 | 
53 | ## 运行数据集生成
54 | 
55 | ```bash
56 | python change_add_remove.py \
57 |   --target_dir "data/dataset" \
58 |   --cache_dir "data/cache" \
59 |   --dashscope_api_key "sk-xxxxxxxxxxxxxxxx" \
60 |   --qwenvl_model_id "qwen-vl-max" \
61 |   --modelscope_access_token "xxxxxxxxxxxxxxx" \
62 |   --modelscope_dataset_id "DiffSynth-Studio/ImagePulse-ChangeAddRemove" \
63 |   --num_data 1000000 \
64 |   --max_num_files_per_folder 1000
65 | ```
66 | 
67 | * `target_dir`: 数据集存储路径
68 | * `cache_dir`: 缓存路径
69 | * `dashscope_api_key`: [百炼](https://bailian.console.aliyun.com/#/home) API Key，调用百炼 API 时需填入
70 | * `qwenvl_model_id`: [百炼](https://bailian.console.aliyun.com/#/home) 上 Qwen-VL 模型的 ID，调用百炼 API 时需填入
71 | * `modelscope_access_token`: [魔搭社区](https://modelscope.cn/my/myaccesstoken) 访问令牌，上传数据集到魔搭社区时需填入
72 | * `modelscope_dataset_id`: [魔搭社区](https://modelscope.cn) 数据集 ID，上传数据集到魔搭社区时需填入
73 | * `num_data`: 数据样本总量
74 | * `max_num_files_per_folder`: 每个打包文件中的文件数量
75 | 
76 | ## 致谢
77 | 
78 | * [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio)：为本项目提供 Diffusion 模型推理支持
79 | * [魔搭社区](https://modelscope.cn)：为本项目提供模型和数据集的存储与下载支持
80 | * [百炼](https://bailian.console.aliyun.com/#/home)：为本项目提供大型语言模型的推理 API 支持
81 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # ImagePulse
 2 | 
 3 | ImagePulse project aims to provide dataset support for the next generation of image understanding and generation models, by atomizing the capabilities of these models and constructing atomic capability datasets.
 4 | 
 5 | [Switch to Chinese](./README_zh.md)
 6 | 
 7 | ## Atomic Capability Datasets
 8 | 
 9 | ### 1. Change, Add, Remove
10 | 
11 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ChangeAddRemove](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ChangeAddRemove)
12 | * Dataset Construction Script: [./scripts/change_add_remove.py](./scripts/change_add_remove.py)
13 | 
14 | |image_1|image_2|mask|editing_instruction|reverse_editing_instruction|
15 | |-|-|-|-|-|
16 | |![](https://github.com/user-attachments/assets/3a657ccd-6fae-4c44-bff8-a3b702c89d65)|![](https://github.com/user-attachments/assets/cc91af21-0090-4392-89d3-ddd62e056da5)|![](https://github.com/user-attachments/assets/5e4c0fdf-15ef-4bf0-b027-ef863e74afaa)|Remove the mustache and beard, change the white shirt to a blue turtleneck sweater, and remove the glass of milk.|Add a mustache and beard, change the blue turtleneck sweater to a white shirt, and add a glass of milk.|
17 | |![](https://github.com/user-attachments/assets/e3ed5116-1d51-47ab-ae51-0fd4e1548bfd)|![](https://github.com/user-attachments/assets/f78eb833-82bd-4a1f-9856-58718b05dc03)|![](https://github.com/user-attachments/assets/8d1e7e9d-6f5e-4abc-905a-c9f3321ec772)|Add a silver butterfly to the glowing golden lace on her face.|Remove the silver butterfly from the glowing golden lace on her face.|
18 | |![](https://github.com/user-attachments/assets/169e1170-f1d2-4f37-a758-baee81343720)|![](https://github.com/user-attachments/assets/6c250bd1-a705-45ba-8c8a-aacb91eaaa0f)|![](https://github.com/user-attachments/assets/167eb187-605b-4dcd-be62-b6833309aa5c)|Remove the necklace.|Add a necklace.|
19 | 
20 | ### 2. Zoom In, Zoom Out
21 | 
22 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ZoominZoomout](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ZoominZoomout)
23 | * Dataset Construction Script: [./scripts/zoomin_zoomout.py](./scripts/zoomin_zoomout.py)
24 | 
25 | |image_1|image_2|image_cropped|mask|editing_instruction|reverse_editing_instruction|
26 | |-|-|-|-|-|-|
27 | |![](https://github.com/user-attachments/assets/c90e2a05-8bbe-4897-83f6-fad5692677e2)|![](https://github.com/user-attachments/assets/70ab6767-e088-49f1-afb8-b85cca894031)|![](https://github.com/user-attachments/assets/76718ff8-f6ae-4f75-8f3f-be10d2eebde4)|![](https://github.com/user-attachments/assets/4bebe7f6-a3a7-481b-bcef-100bb18bec5d)|Zoom in to focus on the headband.|Zoom out to show the full view of the anime girl.|
28 | |![](https://github.com/user-attachments/assets/99fc81f9-77e5-4181-a376-06cdf5feaf65)|![](https://github.com/user-attachments/assets/e97b398d-a68e-4f34-a5e9-a831d16f3941)|![](https://github.com/user-attachments/assets/aef092d1-8d8c-4353-a9b7-089875307830)|![](https://github.com/user-attachments/assets/dcf2578a-df22-471c-96c0-34ba361a10b5)|Remove the superhero costume and replace it with a red shirt. Adjust the lighting to highlight the man's face.|Add a superhero costume with a red and yellow emblem on the chest and a red cape. Adjust the lighting to emphasize the costume.|
29 | |![](https://github.com/user-attachments/assets/356fc12b-02ca-4f3c-bf65-3248ca5576eb)|![](https://github.com/user-attachments/assets/41dcdf1c-3ce6-49aa-a651-cfc981932689)|![](https://github.com/user-attachments/assets/d5facc03-99d0-4f15-93ce-9f1bc5397bfd)|![](https://github.com/user-attachments/assets/5df06650-8c2b-47f2-9bf4-d3e2510e224d)|Remove the elephant and replace it with a large rock.|Replace the large rock with an elephant.|
30 | 
31 | ### 3. Style Transfer
32 | 
33 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-StyleTransfer](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-StyleTransfer)
34 | * Dataset Construction Script: [./scripts/style_transfer.py](./scripts/style_transfer.py)
35 | 
36 | |image_1|image_2|image_3|image_4|editing_instruction|reverse_editing_instruction|
37 | |-|-|-|-|-|-|
38 | |![](https://github.com/user-attachments/assets/f8974a51-fe70-4081-b0c8-60acc0c73f28)|![](https://github.com/user-attachments/assets/81e99ac5-8458-4f4a-ac4e-ae57e809f7f2)|![](https://github.com/user-attachments/assets/c8bb8062-3ad9-44b5-9ee0-a70be4dcbfb3)|![](https://github.com/user-attachments/assets/9edd818e-b6ae-4e6e-924b-cdb21d02a2ec)|transform the image into a cartoon style with vibrant colors and a confident expression.|transform the image into a realistic portrait with a serious expression and subtle lighting.|
39 | |![](https://github.com/user-attachments/assets/82253243-028b-43b4-9a37-796f17fa21af)|![](https://github.com/user-attachments/assets/84bf1c5b-55ae-4084-82ec-3a45c15b2030)|![](https://github.com/user-attachments/assets/b8908d78-ad41-42ce-af4b-c52bf92b2989)|![](https://github.com/user-attachments/assets/6593c9d6-7d5e-4cc0-b2ba-49e5fb38a229)|transform the image to have a brighter, more colorful palette and a clear blue sky.|transform the image to have a more muted color palette and an overcast sky.|
40 | |![](https://github.com/user-attachments/assets/705efc5f-504b-49ac-ba76-ae2f9edb56e4)|![](https://github.com/user-attachments/assets/d0e2e902-d97f-4ffd-91c3-56c96aa19f71)|![](https://github.com/user-attachments/assets/d8c0150f-2e41-480a-9873-dbb8419c8ac5)|![](https://github.com/user-attachments/assets/7be0991c-06e8-4560-8ff8-5fbd2f81b1a0)|transform the style of the image to an anime illustration, change the jacket to red, and add a cityscape background.|transform the style of the image to a digital painting, change the jacket to black, and remove the cityscape background.|
41 | 
42 | ### 4. Face ID
43 | 
44 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-FaceID](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-FaceID)
45 | * Dataset Construction Script: [./scripts/faceid.py](./scripts/faceid.py)
46 | 
47 | |image_face|image_1|image_2|editing_instruction|reverse_editing_instruction|
48 | |-|-|-|-|-|
49 | |![](https://github.com/user-attachments/assets/6b61c298-1938-405b-a680-c767bc8913e0)|![](https://github.com/user-attachments/assets/071f2743-3fc8-42d1-b17a-768835c3f9f4)|![](https://github.com/user-attachments/assets/05b45391-df5b-453c-9007-e94674056c5a)|Change the woman's white t-shirt to a white tank top.|Change the woman's white tank top to a white t-shirt.|
50 | |![](https://github.com/user-attachments/assets/203c9f5d-58fe-4e55-8ab8-5adbf14a1fbf)|![](https://github.com/user-attachments/assets/1022a76c-9ac2-43f0-bde2-d65322834251)|![](https://github.com/user-attachments/assets/09511643-8370-46ba-aee8-bcf4efd86d72)|Add a nighttime street scene with bokeh lights in the background.|Remove the nighttime street scene and bokeh lights from the background.|
51 | |![](https://github.com/user-attachments/assets/64d8d216-0966-4108-a378-1ad2312ad8eb)|![](https://github.com/user-attachments/assets/9d182b1e-8b4f-4f74-9f58-d14d7ad15474)|![](https://github.com/user-attachments/assets/c0f9a43e-dd2e-48c9-945c-643f11852808)|Change the background to a warmly lit room with lamps, change the suit to maroon, and add a sweater under the suit.|Change the background to a dimly lit room with red lighting, change the suit to black, and remove the sweater.|
52 | 
53 | ## Running Dataset Generation
54 | 
55 | ```bash
56 | pip install -r requirements.txt
57 | ```
58 | 
59 | ```bash
60 | python change_add_remove.py \
61 |   --target_dir "data/dataset" \
62 |   --cache_dir "data/cache" \
63 |   --dashscope_api_key "sk-xxxxxxxxxxxxxxxx" \
64 |   --qwenvl_model_id "qwen-vl-max" \
65 |   --modelscope_access_token "xxxxxxxxxxxxxxx" \
66 |   --modelscope_dataset_id "DiffSynth-Studio/ImagePulse-ChangeAddRemove" \
67 |   --num_data 1000000 \
68 |   --max_num_files_per_folder 1000
69 | ```
70 | 
71 | * `target_dir`: Path to store the dataset
72 | * `cache_dir`: Cache path
73 | * `dashscope_api_key`: [DashScope](https://DashScope.console.aliyun.com/#/home) API Key, required when calling DashScope API
74 | * `qwenvl_model_id`: ID of the Qwen-VL model on [DashScope](https://DashScope.console.aliyun.com/#/home), required when calling DashScope API
75 | * `modelscope_access_token`: Access token from [ModelScope](https://modelscope.cn/my/myaccesstoken), required when uploading datasets to ModelScope
76 | * `modelscope_dataset_id`: Dataset ID on [ModelScope](https://modelscope.cn), required when uploading datasets to ModelScope
77 | * `num_data`: Total number of data samples
78 | * `max_num_files_per_folder`: Number of files per packaged folder
79 | 
80 | ## Acknowledgements
81 | 
82 | * [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio): Provided Diffusion model inference support for this project
83 | * [ModelScope](https://modelscope.cn): Provided storage and download support for models and datasets in this project
84 | * [DashScope](https://DashScope.console.aliyun.com/#/home): Provided inference API support for large language models in this project
85 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/scripts/faceid.py:
--------------------------------------------------------------------------------
  1 | from pulse.processor.flux_t2i import FLUXT2I
  2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask
  3 | from pulse.processor.general import ListSampler, TextFormater, ListPacker, ImageResizer
  4 | from pulse.processor.image_cache import ImageCache
  5 | from pulse.processor.face import FaceDataSelector
  6 | from pulse.dataset.dataset import ImageDatasetStorage, ImageDataset
  7 | from pulse.pipeline import DataProcessUnit, DataPipeline
  8 | from pulse.dataset.diffusiondb import DiffusionDB
  9 | from diffsynth import ControlNetConfigUnit, download_models
 10 | from modelscope import dataset_snapshot_download
 11 | from tqdm import tqdm
 12 | import argparse, os, io
 13 | import pandas as pd
 14 | from PIL import Image
 15 | 
 16 | 
 17 | qwen_prompt_1 = """
 18 | Here are some image descriptions. Please select those (`descriptions`) that describe a single person and identify the gender in each image description.
 19 | 
 20 | 1. %s
 21 | 2. %s
 22 | 3. %s
 23 | 4. %s
 24 | 5. %s
 25 | 6. %s
 26 | 7. %s
 27 | 8. %s
 28 | 9. %s
 29 | 10. %s
 30 | 
 31 | Next, identify the gender of the person in the image (`gender_in_image`).
 32 | 
 33 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 34 | {
 35 |     "descriptions": [
 36 |         {
 37 |             "description": "a girl holding an apple",
 38 |             "gender": "female",
 39 |         },
 40 |         {
 41 |             "description": "a man is reading a book",
 42 |             "gender": "male",
 43 |         },
 44 |         ...
 45 |     ]
 46 |     "gender_in_image": "male",
 47 | }
 48 | """
 49 | qwen_prompt_2 = """
 50 | Here are two images of the same person, denoted as image_1 and image_2
 51 | 
 52 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption.
 53 | 
 54 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions.
 55 | 
 56 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2.
 57 | 
 58 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 59 | {
 60 |     "image_1_caption": "...",
 61 |     "image_2_caption": "...",
 62 |     "editing_instruction": "...",
 63 |     "reverse_editing_instruction": "...",
 64 |     "artifacts_in_image_1": ...,
 65 |     "artifacts_in_image_2": ...
 66 | }
 67 | 
 68 | Here are some examples:
 69 | {
 70 |     "image_1_caption": "a girl holding a basketball",
 71 |     "image_2_caption": "a girl holding a Teddy bear doll",
 72 |     "editing_instruction": "Change the basketball to a teddy bear.",
 73 |     "reverse_editing_instruction": "Change the teddy bear to a basketball.",
 74 |     "artifacts_in_image_1": false,
 75 |     "artifacts_in_image_2": false
 76 | }
 77 | 
 78 | {
 79 |     "image_1_caption": "a man is walking on the street",
 80 |     "image_2_caption": "a man is sitting on a chair",
 81 |     "editing_instruction": "Let the man sit down.",
 82 |     "reverse_editing_instruction": "Let the man walk.",
 83 |     "artifacts_in_image_1": false,
 84 |     "artifacts_in_image_2": true
 85 | }
 86 | """
 87 | 
 88 | 
 89 | def parse_args():
 90 |     parser = argparse.ArgumentParser(description="Dataset generation script: FaceID.")
 91 |     parser.add_argument(
 92 |         "--target_dir",
 93 |         type=str,
 94 |         default="data/dataset",
 95 |         required=True,
 96 |         help="Path to save dataset.",
 97 |     )
 98 |     parser.add_argument(
 99 |         "--cache_dir",
100 |         type=str,
101 |         default="data/cache",
102 |         help="Path to save cache files.",
103 |     )
104 |     parser.add_argument(
105 |         "--dashscope_api_key",
106 |         type=str,
107 |         default="",
108 |         help="Dashscope api key.",
109 |     )
110 |     parser.add_argument(
111 |         "--qwenvl_model_id",
112 |         type=str,
113 |         default="qwen-vl-max-0809",
114 |         help="QwenVL model id.",
115 |     )
116 |     parser.add_argument(
117 |         "--modelscope_access_token",
118 |         type=str,
119 |         default=None,
120 |         help="Modelscope access token",
121 |     )
122 |     parser.add_argument(
123 |         "--modelscope_dataset_id",
124 |         type=str,
125 |         default=None,
126 |         help="Modelscope Dataset ID",
127 |     )
128 |     parser.add_argument(
129 |         "--num_data",
130 |         type=int,
131 |         default=100000,
132 |         help="Number of data samples",
133 |     )
134 |     parser.add_argument(
135 |         "--max_num_files_per_folder",
136 |         type=int,
137 |         default=5000,
138 |         help="Max number of files per folder",
139 |     )
140 |     args = parser.parse_args()
141 |     return args
142 |     
143 |     
144 | def initialize(args):
145 |     dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data")
146 |     dataset_snapshot_download("AI-ModelScope/celeb-a-hq_training_untransformed_faces", allow_file_pattern=["*.parquet"], cache_dir="./data")
147 |     download_models(["FLUX.1-dev", "InfiniteYou"])
148 |     
149 |     for file_name in os.listdir("data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data"):
150 |         if file_name.endswith(".parquet"):
151 |             if not os.path.exists(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}_images"):
152 |                 data = pd.read_parquet(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}")
153 |                 os.makedirs(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}_images")
154 |                 for image_id, image_data in enumerate(tqdm(data["image"], desc=file_name)):
155 |                     image = image_data["bytes"]
156 |                     image = Image.open(io.BytesIO(image))
157 |                     image.save(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}_images/{image_id}.png")
158 |     
159 |     t2i = FLUXT2I(
160 |         model_path=[
161 |             "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
162 |             "models/FLUX/FLUX.1-dev/text_encoder_2",
163 |             "models/FLUX/FLUX.1-dev/ae.safetensors",
164 |             "models/FLUX/FLUX.1-dev/flux1-dev.safetensors",
165 |             [
166 |                 "models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00001-of-00002.safetensors",
167 |                 "models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00002-of-00002.safetensors"
168 |             ],
169 |             "models/InfiniteYou/image_proj_model.bin",
170 |         ],
171 |         device="cuda",
172 |         model_kwargs={
173 |             "controlnet_config_units": [
174 |                 ControlNetConfigUnit(
175 |                     processor_id="none",
176 |                     model_path=[
177 |                         'models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00001-of-00002.safetensors',
178 |                         'models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00002-of-00002.safetensors'
179 |                     ],
180 |                     scale=1.0
181 |                 )
182 |             ]
183 |         }
184 |     )
185 |     cache = ImageCache(cache_dir=args.cache_dir)
186 |     
187 |     dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data, multi_prompt=True, num_prompt=10)
188 |     face_generator = ImageDataset("data/AI-ModelScope/celeb-a-hq_training_untransformed_faces")
189 | 
190 |     pipe = DataPipeline(units=[
191 |         DataProcessUnit(
192 |             processor=TextFormater(template=qwen_prompt_1),
193 |             input_params={"text_list": "prompt"},
194 |             output_params=("qwen_prompt_for_prompt_selection",)
195 |         ),
196 |         DataProcessUnit(
197 |             processor=cache,
198 |             input_params={"image": "image_face"},
199 |             output_params=("image_face_path",)
200 |         ),
201 |         DataProcessUnit(
202 |             processor=QwenVLI2T(
203 |                 api_key=args.dashscope_api_key,
204 |                 model_id=args.qwenvl_model_id,
205 |                 prompt=qwen_prompt_1
206 |             ),
207 |             input_params={"images": "image_face_path", "prompt": "qwen_prompt_for_prompt_selection"},
208 |             output_params=("face_prompts_str",)
209 |         ),
210 |         DataProcessUnit(
211 |             processor=QwenJsonParser(),
212 |             input_params={"text": "face_prompts_str"},
213 |             output_params=("face_prompts",)
214 |         ),
215 |         DataProcessUnit(
216 |             processor=FaceDataSelector(),
217 |             input_params={"metadata": "face_prompts"},
218 |             output_params=("prompt_1", "prompt_2")
219 |         ),
220 |         
221 |         DataProcessUnit(
222 |             processor=t2i,
223 |             input_params={"infinityou_id_image": "image_face", "prompt": "prompt_1"},
224 |             output_params=("image_1",),
225 |             extra_input_kwargs={
226 |                 "progress_bar_cmd": lambda x: x,
227 |                 "num_inference_steps": 50,
228 |                 "infinityou_guidance": 1.0,
229 |             }
230 |         ),
231 |         DataProcessUnit(
232 |             processor=cache,
233 |             input_params={"image": "image_1"},
234 |             output_params=("image_1_path",)
235 |         ),
236 | 
237 |         DataProcessUnit(
238 |             processor=t2i,
239 |             input_params={"infinityou_id_image": "image_face", "prompt": "prompt_2"},
240 |             output_params=("image_2",),
241 |             extra_input_kwargs={
242 |                 "progress_bar_cmd": lambda x: x,
243 |                 "num_inference_steps": 50,
244 |                 "infinityou_guidance": 1.0,
245 |             }
246 |         ),
247 |         DataProcessUnit(
248 |             processor=cache,
249 |             input_params={"image": "image_2"},
250 |             output_params=("image_2_path",)
251 |         ),
252 |         
253 |         DataProcessUnit(
254 |             processor=ListPacker(),
255 |             input_params={"image_1_path": "image_1_path", "image_2_path": "image_2_path"},
256 |             output_params=("image_list",)
257 |         ),
258 |         
259 |         DataProcessUnit(
260 |             processor=QwenVLI2T(
261 |                 api_key=args.dashscope_api_key,
262 |                 model_id=args.qwenvl_model_id,
263 |                 prompt=qwen_prompt_2
264 |             ),
265 |             input_params={"images": "image_list"},
266 |             output_params=("generated_instructions",)
267 |         ),
268 |         DataProcessUnit(
269 |             processor=QwenJsonParser(),
270 |             input_params={"text": "generated_instructions"},
271 |             parse_output_dict=True,
272 |         ),
273 |         DataProcessUnit(
274 |             processor=ImageDatasetStorage(
275 |                 target_dir=args.target_dir,
276 |                 image_keys=("image_face", "image_1", "image_2"),
277 |                 metadata_keys=(
278 |                     "editing_instruction", "reverse_editing_instruction", "prompt_1", "prompt_2", "image_1_caption", "image_2_caption",
279 |                     "artifacts_in_image_1", "artifacts_in_image_2"
280 |                 ),
281 |                 modelscope_access_token=args.modelscope_access_token,
282 |                 modelscope_dataset_id=args.modelscope_dataset_id,
283 |                 max_num_files_per_folder=args.max_num_files_per_folder,
284 |             ),
285 |             input_params={
286 |                 "image_face": "image_face", "image_1": "image_1", "image_2": "image_2",
287 |                 "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction",
288 |                 "prompt_1": "prompt_1", "prompt_2": "prompt_2", "image_1_caption": "image_1_caption", "image_2_caption": "image_2_caption",
289 |                 "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_2": "artifacts_in_image_2",
290 |             },
291 |             output_params=("metadata_path")
292 |         )
293 |     ])
294 |     return dataset, pipe, face_generator
295 | 
296 | 
297 | if __name__ == "__main__":
298 |     args = parse_args()
299 |     dataset, pipe, face_generator = initialize(args)
300 |     for data_id, data in enumerate(tqdm(dataset)):
301 |         data["image_face"] = face_generator[0]
302 |         pipe(data, ignore_errors=True)
303 |         if (data_id + 1) % 100 == 0:
304 |             pipe.report_log()
305 | 


--------------------------------------------------------------------------------
/scripts/zoomin_zoomout.py:
--------------------------------------------------------------------------------
  1 | from pulse.processor.flux_t2i import FLUXT2I
  2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask, QwenBbox2Square
  3 | from pulse.processor.general import ListSampler, ListPacker, ImageCropper, ImageResizer
  4 | from pulse.processor.image_cache import ImageCache
  5 | from pulse.dataset.dataset import ImageDatasetStorage
  6 | from pulse.pipeline import DataProcessUnit, DataPipeline
  7 | from pulse.dataset.diffusiondb import DiffusionDB
  8 | from diffsynth import ControlNetConfigUnit, download_models
  9 | from modelscope import dataset_snapshot_download
 10 | from tqdm import tqdm
 11 | import argparse
 12 | 
 13 | 
 14 | qwen_prompt_1 = """
 15 | Please use relative coordinates in range [0, 1000] to mark all the entities in the image and write the corresponding text descriptions for each bbox in English.
 16 | 
 17 | The bbox [x1, y1, x2, y2] is a square slightly larger than the corresponding object. Please ensure that the square contains a complete composition of another image as much as possible. Do not let the bbox almost cover the entire image.
 18 | 
 19 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 20 | [
 21 |     {
 22 |         "bbox": [x1, y1, x2, y2],
 23 |         "description": "portrait of a man"
 24 |     },
 25 |     {
 26 |         "bbox": [x1, y1, x2, y2],
 27 |         "description": "a hat"
 28 |     },
 29 |     {
 30 |         "bbox": [x1, y1, x2, y2],
 31 |         "description": "a dog is running"
 32 |     },
 33 |     ...
 34 | ]
 35 | """
 36 | qwen_prompt_2 = """
 37 | Please provide a comprehensive and detailed description of the following image, ensuring the inclusion of the following elements:
 38 | 
 39 | - Main subjects and objects present in the image.
 40 | - Key visual elements, including colors, shapes, textures that stand out.
 41 | - Spatial relationships and composition, focusing on how elements are arranged and interact within the frame.
 42 | - Notable background elements that contribute to the overall context or setting.
 43 | 
 44 | Generate a caption according to the image so that another model can generate the image via the caption. Just return the string description, do not return anything else.
 45 | """
 46 | qwen_prompt_3 = """
 47 | Here are two images, denoted as image_1 and image_2
 48 | 
 49 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption.
 50 | 
 51 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions.
 52 | 
 53 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2.
 54 | 
 55 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 56 | {
 57 |     "image_1_caption": "...",
 58 |     "image_2_caption": "...",
 59 |     "editing_instruction": "...",
 60 |     "reverse_editing_instruction": "...",
 61 |     "artifacts_in_image_1": ...,
 62 |     "artifacts_in_image_2": ...
 63 | }
 64 | 
 65 | Here are some examples:
 66 | {
 67 |     "image_1_caption": "a girl holding a basketball",
 68 |     "image_2_caption": "a girl holding a Teddy bear doll",
 69 |     "editing_instruction": "Zoom in to view the basketball in the girl's hand.",
 70 |     "reverse_editing_instruction": "Zoom out to view the girl holding the basketball.",
 71 |     "artifacts_in_image_1": false,
 72 |     "artifacts_in_image_2": false
 73 | }
 74 | 
 75 | {
 76 |     "image_1_caption": "an apple on the desk",
 77 |     "image_2_caption": "an apple",
 78 |     "editing_instruction": "Crop the apple from the image.",
 79 |     "reverse_editing_instruction": "Expand the image so that the enlarged version shows an apple on a table.",
 80 |     "artifacts_in_image_1": false,
 81 |     "artifacts_in_image_2": true
 82 | }
 83 | """
 84 | 
 85 | def parse_args():
 86 |     parser = argparse.ArgumentParser(description="Dataset generation script: Zoom in & Zoom out.")
 87 |     parser.add_argument(
 88 |         "--target_dir",
 89 |         type=str,
 90 |         default="data/dataset",
 91 |         required=True,
 92 |         help="Path to save dataset.",
 93 |     )
 94 |     parser.add_argument(
 95 |         "--cache_dir",
 96 |         type=str,
 97 |         default="data/cache",
 98 |         help="Path to save cache files.",
 99 |     )
100 |     parser.add_argument(
101 |         "--dashscope_api_key",
102 |         type=str,
103 |         default="",
104 |         help="Dashscope api key.",
105 |     )
106 |     parser.add_argument(
107 |         "--qwenvl_model_id",
108 |         type=str,
109 |         default="qwen-vl-max-0809",
110 |         help="QwenVL model id.",
111 |     )
112 |     parser.add_argument(
113 |         "--modelscope_access_token",
114 |         type=str,
115 |         default=None,
116 |         help="Modelscope access token",
117 |     )
118 |     parser.add_argument(
119 |         "--modelscope_dataset_id",
120 |         type=str,
121 |         default=None,
122 |         help="Modelscope Dataset ID",
123 |     )
124 |     parser.add_argument(
125 |         "--num_data",
126 |         type=int,
127 |         default=100000,
128 |         help="Number of data samples",
129 |     )
130 |     parser.add_argument(
131 |         "--max_num_files_per_folder",
132 |         type=int,
133 |         default=5000,
134 |         help="Max number of files per folder",
135 |     )
136 |     args = parser.parse_args()
137 |     return args
138 |     
139 |     
140 | def initialize(args):
141 |     dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data")
142 |     download_models(["FLUX.1-dev", "jasperai/Flux.1-dev-Controlnet-Upscaler"])
143 |     
144 |     t2i = FLUXT2I(
145 |         model_path=[
146 |             "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
147 |             "models/FLUX/FLUX.1-dev/text_encoder_2",
148 |             "models/FLUX/FLUX.1-dev/ae.safetensors",
149 |             "models/FLUX/FLUX.1-dev/flux1-dev.safetensors",
150 |             "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Upscaler/diffusion_pytorch_model.safetensors"
151 |         ],
152 |         device="cuda",
153 |         model_kwargs={
154 |             "controlnet_config_units": [
155 |                     ControlNetConfigUnit(
156 |                     processor_id="tile",
157 |                     model_path="models/ControlNet/jasperai/Flux.1-dev-Controlnet-Upscaler/diffusion_pytorch_model.safetensors",
158 |                     scale=0.6
159 |                 ),
160 |             ]
161 |         }
162 |     )
163 |     cache = ImageCache(cache_dir=args.cache_dir)
164 |     
165 |     dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data)
166 | 
167 |     pipe = DataPipeline(units=[
168 |         DataProcessUnit(
169 |             processor=t2i,
170 |             input_params={"prompt": "prompt"},
171 |             output_params=("image_1",),
172 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x}
173 |         ),
174 |         DataProcessUnit(
175 |             processor=cache,
176 |             input_params={"image": "image_1"},
177 |             output_params=("image_1_path",)
178 |         ),
179 |         DataProcessUnit(
180 |             processor=QwenVLI2T(
181 |                 api_key=args.dashscope_api_key,
182 |                 model_id=args.qwenvl_model_id,
183 |                 prompt=qwen_prompt_1
184 |             ),
185 |             input_params={"images": "image_1_path"},
186 |             output_params=("grounding_results_str",)
187 |         ),
188 |         DataProcessUnit(
189 |             processor=QwenJsonParser(),
190 |             input_params={"text": "grounding_results_str"},
191 |             output_params=("grounding_results_list",)
192 |         ),
193 |         DataProcessUnit(
194 |             processor=ListSampler(),
195 |             input_params={"ls": "grounding_results_list"},
196 |             parse_output_dict=True,
197 |         ),
198 |         DataProcessUnit(
199 |             processor=QwenBbox2Square(),
200 |             input_params={"bbox": "bbox"},
201 |             parse_output_dict=True,
202 |         ),
203 |         DataProcessUnit(
204 |             processor=QwenBbox2Mask(),
205 |             input_params={"bbox": "square"},
206 |             output_params=("mask",)
207 |         ),
208 |         DataProcessUnit(
209 |             processor=cache,
210 |             input_params={"image": "mask"},
211 |             output_params=("mask_path",)
212 |         ),
213 |         DataProcessUnit(
214 |             processor=ImageCropper(),
215 |             input_params={"image": "image_1", "bbox": "square"},
216 |             output_params=("image_cropped",)
217 |         ),
218 |         DataProcessUnit(
219 |             processor=cache,
220 |             input_params={"image": "image_cropped"},
221 |             output_params=("image_cropped_path",)
222 |         ),
223 |         DataProcessUnit(
224 |             processor=QwenVLI2T(
225 |                 api_key=args.dashscope_api_key,
226 |                 model_id=args.qwenvl_model_id,
227 |                 prompt=qwen_prompt_2
228 |             ),
229 |             input_params={"images": "image_cropped_path"},
230 |             output_params=("local_description",)
231 |         ),
232 |         DataProcessUnit(
233 |             processor=ImageResizer(),
234 |             input_params={"image": "image_cropped"},
235 |             output_params=("image_resized",)
236 |         ),
237 |         DataProcessUnit(
238 |             processor=t2i,
239 |             input_params={
240 |                 "prompt": "local_description",
241 |                 "controlnet_image": "image_resized",
242 |                 "input_image": "image_resized"
243 |             },
244 |             output_params=("image_2",),
245 |             extra_input_kwargs={
246 |                 "progress_bar_cmd": lambda x: x,
247 |                 "num_inference_steps": 30,
248 |                 "denoising_strength": 0.9
249 |             }
250 |         ),
251 |         DataProcessUnit(
252 |             processor=cache,
253 |             input_params={"image": "image_2"},
254 |             output_params=("image_2_path",)
255 |         ),
256 |         DataProcessUnit(
257 |             processor=ListPacker(),
258 |             input_params={"image_1_path": "image_1_path", "image_2_path": "image_2_path"},
259 |             output_params=("image_list",)
260 |         ),
261 |         DataProcessUnit(
262 |             processor=QwenVLI2T(
263 |                 api_key=args.dashscope_api_key,
264 |                 model_id=args.qwenvl_model_id,
265 |                 prompt=qwen_prompt_3
266 |             ),
267 |             input_params={"images": "image_list"},
268 |             output_params=("generated_instructions",)
269 |         ),
270 |         DataProcessUnit(
271 |             processor=QwenJsonParser(),
272 |             input_params={"text": "generated_instructions"},
273 |             parse_output_dict=True,
274 |         ),
275 |         DataProcessUnit(
276 |             processor=ImageDatasetStorage(
277 |                 target_dir=args.target_dir,
278 |                 image_keys=("image_1", "image_2", "image_cropped", "mask"),
279 |                 metadata_keys=(
280 |                     "editing_instruction", "reverse_editing_instruction", "prompt", "local_description", "image_1_caption", "image_2_caption",
281 |                     "artifacts_in_image_1", "artifacts_in_image_2", "square",
282 |                 ),
283 |                 modelscope_access_token=args.modelscope_access_token,
284 |                 modelscope_dataset_id=args.modelscope_dataset_id,
285 |                 max_num_files_per_folder=args.max_num_files_per_folder,
286 |             ),
287 |             input_params={
288 |                 "image_1": "image_1", "image_2": "image_2", "image_cropped": "image_cropped", "mask": "mask",
289 |                 "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction",
290 |                 "prompt": "prompt", "local_description": "local_description", "image_1_caption": "image_1_caption", "image_2_caption": "image_2_caption",
291 |                 "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_2": "artifacts_in_image_2",
292 |                 "square": "square"
293 |             },
294 |             output_params=("metadata_path")
295 |         )
296 |     ])
297 |     return dataset, pipe
298 | 
299 | 
300 | if __name__ == "__main__":
301 |     args = parse_args()
302 |     dataset, pipe = initialize(args)
303 |     for data_id, data in enumerate(tqdm(dataset)):
304 |         pipe(data, ignore_errors=True)
305 |         if (data_id + 1) % 100 == 0:
306 |             pipe.report_log()
307 | 


--------------------------------------------------------------------------------
/scripts/change_add_remove.py:
--------------------------------------------------------------------------------
  1 | from pulse.processor.flux_t2i import FLUXT2I
  2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask
  3 | from pulse.processor.general import ListSampler, TextFormater, ListPacker
  4 | from pulse.processor.preference import ImagePreferenceModel
  5 | from pulse.processor.image_cache import ImageCache
  6 | from pulse.dataset.dataset import ImageDatasetStorage
  7 | from pulse.pipeline import DataProcessUnit, DataPipeline
  8 | from pulse.dataset.diffusiondb import DiffusionDB
  9 | from diffsynth import ControlNetConfigUnit, download_models
 10 | from diffsynth.extensions.ImageQualityMetric import download_preference_model
 11 | from modelscope import dataset_snapshot_download
 12 | from tqdm import tqdm
 13 | import argparse
 14 | 
 15 | 
 16 | qwen_prompt_1 = """
 17 | Please use relative coordinates in range [0, 1000] to mark all the entities in the image and write the corresponding text descriptions for each bbox in English.
 18 | 
 19 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 20 | [
 21 |     {
 22 |         "bbox": [x1, y1, x2, y2],
 23 |         "description": "a dog is running"
 24 |     },
 25 |     {
 26 |         "bbox": [x1, y1, x2, y2],
 27 |         "description": "a red car"
 28 |     },
 29 |     {
 30 |         "bbox": [x1, y1, x2, y2],
 31 |         "description": "black hair"
 32 |     },
 33 |     ...
 34 | ]
 35 | """
 36 | qwen_prompt_2 = """
 37 | There is an image, and the full text description of this image is "%s" The area in the image (%s) indicates "%s".
 38 | 
 39 | Now I need to modify this part to reflect other content in the image. Please write a piece of randomly modified text (local_description) describing the localized image content after the modification; as well as another piece of text (global_description) describing the overall image content after the modification." 
 40 | 
 41 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 42 | {
 43 |     "original_local_description": "...",
 44 |     "original_global_description": "...",
 45 |     "local_description": "...",
 46 |     "global_description": "..."
 47 | }
 48 | 
 49 | Here are some examples:
 50 | {
 51 |     "original_local_description": "a basketball",
 52 |     "original_global_description": "a girl holding a basketball",
 53 |     "local_description": "a Teddy bear doll",
 54 |     "global_description": "a girl holding a Teddy bear doll"
 55 | }
 56 | 
 57 | {
 58 |     "original_local_description": "an apple",
 59 |     "original_global_description": "an apple on the desk",
 60 |     "local_description": "a banana",
 61 |     "global_description": "a banana on the desk"
 62 | }
 63 | """
 64 | qwen_prompt_3 = """
 65 | Here are two images, denoted as image_1 and image_2
 66 | 
 67 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption.
 68 | 
 69 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions.
 70 | 
 71 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2.
 72 | 
 73 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 74 | {
 75 |     "image_1_caption": "...",
 76 |     "image_2_caption": "...",
 77 |     "editing_instruction": "...",
 78 |     "reverse_editing_instruction": "...",
 79 |     "artifacts_in_image_1": ...,
 80 |     "artifacts_in_image_2": ...
 81 | }
 82 | 
 83 | Here are some examples:
 84 | {
 85 |     "image_1_caption": "a girl holding a basketball",
 86 |     "image_2_caption": "a girl holding a Teddy bear doll",
 87 |     "editing_instruction": "Change the basketball to a teddy bear.",
 88 |     "reverse_editing_instruction": "Change the teddy bear to a basketball.",
 89 |     "artifacts_in_image_1": false,
 90 |     "artifacts_in_image_2": false
 91 | }
 92 | 
 93 | {
 94 |     "image_1_caption": "an apple on the desk",
 95 |     "image_2_caption": "a desk",
 96 |     "editing_instruction": "Remove the apple.",
 97 |     "reverse_editing_instruction": "Add an apple on the desk.",
 98 |     "artifacts_in_image_1": false,
 99 |     "artifacts_in_image_2": true
100 | }
101 | """
102 | 
103 | 
104 | def parse_args():
105 |     parser = argparse.ArgumentParser(description="Dataset generation script: Change, add & remove.")
106 |     parser.add_argument(
107 |         "--target_dir",
108 |         type=str,
109 |         default="data/dataset",
110 |         required=True,
111 |         help="Path to save dataset.",
112 |     )
113 |     parser.add_argument(
114 |         "--cache_dir",
115 |         type=str,
116 |         default="data/cache",
117 |         help="Path to save cache files.",
118 |     )
119 |     parser.add_argument(
120 |         "--dashscope_api_key",
121 |         type=str,
122 |         default="",
123 |         help="Dashscope api key.",
124 |     )
125 |     parser.add_argument(
126 |         "--qwenvl_model_id",
127 |         type=str,
128 |         default="qwen-vl-max-0809",
129 |         help="QwenVL model id.",
130 |     )
131 |     parser.add_argument(
132 |         "--modelscope_access_token",
133 |         type=str,
134 |         default=None,
135 |         help="Modelscope access token",
136 |     )
137 |     parser.add_argument(
138 |         "--modelscope_dataset_id",
139 |         type=str,
140 |         default=None,
141 |         help="Modelscope Dataset ID",
142 |     )
143 |     parser.add_argument(
144 |         "--num_data",
145 |         type=int,
146 |         default=100000,
147 |         help="Number of data samples",
148 |     )
149 |     parser.add_argument(
150 |         "--max_num_files_per_folder",
151 |         type=int,
152 |         default=5000,
153 |         help="Max number of files per folder",
154 |     )
155 |     args = parser.parse_args()
156 |     return args
157 |     
158 |     
159 | def initialize(args):
160 |     dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data")
161 |     download_models(["FLUX.1-dev", "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta"])
162 |     download_preference_model("MPS", cache_dir="./models")
163 |     
164 |     t2i = FLUXT2I(
165 |         model_path=[
166 |             "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
167 |             "models/FLUX/FLUX.1-dev/text_encoder_2",
168 |             "models/FLUX/FLUX.1-dev/ae.safetensors",
169 |             "models/FLUX/FLUX.1-dev/flux1-dev.safetensors",
170 |             "models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta/diffusion_pytorch_model.safetensors"
171 |         ],
172 |         device="cuda",
173 |         model_kwargs={
174 |             "controlnet_config_units": [
175 |                 ControlNetConfigUnit(
176 |                     processor_id="inpaint",
177 |                     model_path="models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta/diffusion_pytorch_model.safetensors",
178 |                     scale=0.9
179 |                 ),
180 |             ]
181 |         }
182 |     )
183 |     preference_model = ImagePreferenceModel("MPS", cache_dir="./models", device="cuda")
184 |     cache = ImageCache(cache_dir=args.cache_dir)
185 |     
186 |     dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data)
187 | 
188 |     pipe = DataPipeline(units=[
189 |         DataProcessUnit(
190 |             processor=t2i,
191 |             input_params={"prompt": "prompt"},
192 |             output_params=("image_1",),
193 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x}
194 |         ),
195 |         DataProcessUnit(
196 |             processor=cache,
197 |             input_params={"image": "image_1"},
198 |             output_params=("image_1_path",)
199 |         ),
200 |         DataProcessUnit(
201 |             processor=QwenVLI2T(
202 |                 api_key=args.dashscope_api_key,
203 |                 model_id=args.qwenvl_model_id,
204 |                 prompt=qwen_prompt_1
205 |             ),
206 |             input_params={"images": "image_1_path"},
207 |             output_params=("grounding_results_str",)
208 |         ),
209 |         DataProcessUnit(
210 |             processor=QwenJsonParser(),
211 |             input_params={"text": "grounding_results_str"},
212 |             output_params=("grounding_results_list",)
213 |         ),
214 |         DataProcessUnit(
215 |             processor=ListSampler(),
216 |             input_params={"ls": "grounding_results_list"},
217 |             parse_output_dict=True,
218 |         ),
219 |         DataProcessUnit(
220 |             processor=QwenBbox2Mask(),
221 |             input_params={"bbox": "bbox"},
222 |             output_params=("mask",)
223 |         ),
224 |         DataProcessUnit(
225 |             processor=cache,
226 |             input_params={"image": "mask"},
227 |             output_params=("mask_path",)
228 |         ),
229 |         DataProcessUnit(
230 |             processor=TextFormater(template=qwen_prompt_2),
231 |             input_params={"prompt": "prompt", "bbox": "bbox", "description": "description"},
232 |             output_params=("editing_prompt_for_qwen",)
233 |         ),
234 |         DataProcessUnit(
235 |             processor=QwenVLI2T(
236 |                 api_key=args.dashscope_api_key,
237 |                 model_id=args.qwenvl_model_id,
238 |             ),
239 |             input_params={"images": "image_1_path", "prompt": "editing_prompt_for_qwen"},
240 |             output_params=("editing_str",)
241 |         ),
242 |         DataProcessUnit(
243 |             processor=QwenJsonParser(),
244 |             input_params={"text": "editing_str"},
245 |             parse_output_dict=True,
246 |         ),
247 |         DataProcessUnit(
248 |             processor=t2i,
249 |             input_params={
250 |                 "prompt": "local_description",
251 |                 "controlnet_image": "image_1",
252 |                 "controlnet_inpaint_mask": "mask"
253 |             },
254 |             output_params=("image_2",),
255 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x, "num_inference_steps": 50}
256 |         ),
257 |         DataProcessUnit(
258 |             processor=cache,
259 |             input_params={"image": "image_2"},
260 |             output_params=("image_2_path",)
261 |         ),
262 |         DataProcessUnit(
263 |             processor=ListPacker(),
264 |             input_params={"image_1_path": "image_1_path", "image_2_path": "image_2_path"},
265 |             output_params=("image_list",)
266 |         ),
267 |         DataProcessUnit(
268 |             processor=QwenVLI2T(
269 |                 api_key=args.dashscope_api_key,
270 |                 model_id=args.qwenvl_model_id,
271 |                 prompt=qwen_prompt_3,
272 |             ),
273 |             input_params={"images": "image_list"},
274 |             output_params=("generated_instructions",)
275 |         ),
276 |         DataProcessUnit(
277 |             processor=QwenJsonParser(),
278 |             input_params={"text": "generated_instructions"},
279 |             parse_output_dict=True,
280 |         ),
281 |         DataProcessUnit(
282 |             processor=preference_model,
283 |             input_params={"image": "image_1", "prompt": "image_1_caption"},
284 |             output_params=("image_1_preference_score",)
285 |         ),
286 |         DataProcessUnit(
287 |             processor=preference_model,
288 |             input_params={"image": "image_2", "prompt": "image_2_caption"},
289 |             output_params=("image_2_preference_score",)
290 |         ),
291 |         DataProcessUnit(
292 |             processor=ImageDatasetStorage(
293 |                 target_dir=args.target_dir,
294 |                 image_keys=("image_1", "image_2", "mask"),
295 |                 metadata_keys=(
296 |                     "editing_instruction", "reverse_editing_instruction", "prompt", "image_1_caption", "image_2_caption",
297 |                     "image_1_preference_score", "image_2_preference_score", "artifacts_in_image_1", "artifacts_in_image_2"
298 |                 ),
299 |                 modelscope_access_token=args.modelscope_access_token,
300 |                 modelscope_dataset_id=args.modelscope_dataset_id,
301 |                 max_num_files_per_folder=args.max_num_files_per_folder,
302 |             ),
303 |             input_params={
304 |                 "image_1": "image_1", "image_2": "image_2", "mask": "mask",
305 |                 "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction",
306 |                 "prompt": "prompt", "image_1_caption": "image_1_caption", "image_2_caption": "image_2_caption",
307 |                 "image_1_preference_score": "image_1_preference_score", "image_2_preference_score": "image_2_preference_score",
308 |                 "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_2": "artifacts_in_image_2",
309 |             },
310 |             output_params=("metadata_path"),
311 |         )
312 |     ])
313 |     return dataset, pipe
314 | 
315 | 
316 | if __name__ == "__main__":
317 |     args = parse_args()
318 |     dataset, pipe = initialize(args)
319 |     for data_id, data in enumerate(tqdm(dataset)):
320 |         pipe(data, ignore_errors=True)
321 |         if (data_id + 1) % 100 == 0:
322 |             pipe.report_log()
323 | 


--------------------------------------------------------------------------------
/scripts/style_transfer.py:
--------------------------------------------------------------------------------
  1 | from pulse.processor.flux_t2i import FLUXT2I
  2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask, QwenBbox2Square
  3 | from pulse.processor.general import ListSampler, ListPacker, ImageCropper, ImageResizer, TextFormater
  4 | from pulse.processor.image_cache import ImageCache
  5 | from pulse.processor.preference import ImagePreferenceModel
  6 | from pulse.processor.style import RandomPromptStyler
  7 | from pulse.processor.sdxl_t2i import SDXLT2I
  8 | from pulse.dataset.dataset import ImageDatasetStorage
  9 | from pulse.pipeline import DataProcessUnit, DataPipeline
 10 | from pulse.dataset.diffusiondb import DiffusionDB
 11 | from diffsynth import ControlNetConfigUnit, download_models
 12 | from diffsynth.extensions.ImageQualityMetric import download_preference_model
 13 | from modelscope import dataset_snapshot_download
 14 | from tqdm import tqdm
 15 | import argparse
 16 | 
 17 | 
 18 | qwen_prompt_1 = """
 19 | Please describe the content of the image in concise text, focusing only on the elements present in the image without discussing its style.
 20 | 
 21 | Here are some examples:
 22 | * A dog is running
 23 | * Red and blue flowers in a garden
 24 | * An apple and a cup on the desk
 25 | 
 26 | Just return the string description, do not return anything else.
 27 | """
 28 | qwen_prompt_2 = """
 29 | Here are two images, denoted as image_1 and image_2
 30 | 
 31 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption.
 32 | 
 33 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions. Please ensure that the editing instructions emphasize the style of the image.
 34 | 
 35 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2.
 36 | 
 37 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python:
 38 | {
 39 |     "image_1_caption": "...",
 40 |     "image_2_caption": "...",
 41 |     "editing_instruction": "...",
 42 |     "reverse_editing_instruction": "...",
 43 |     "artifacts_in_image_1": ...,
 44 |     "artifacts_in_image_2": ...
 45 | }
 46 | 
 47 | Here are some examples:
 48 | {
 49 |     "image_1_caption": "a photo of a girl holding a basketball.",
 50 |     "image_2_caption": "an oil painting of a teenage girl holding a basketball.",
 51 |     "editing_instruction": "transform the photo into an oil painting style.",
 52 |     "reverse_editing_instruction": "generate a realistic scene based on the content of this oil painting.",
 53 |     "artifacts_in_image_1": false,
 54 |     "artifacts_in_image_2": false
 55 | }
 56 | 
 57 | {
 58 |     "image_1_caption": "flat illustration, anime style, featuring an orange kitten.",
 59 |     "image_2_caption": "highly outlined anime illustration, featuring an orange kitten.",
 60 |     "editing_instruction": "transform the style of the image to enhance the line definition.",
 61 |     "reverse_editing_instruction": "transform the style of the image to make it appear more flat.",
 62 |     "artifacts_in_image_1": false,
 63 |     "artifacts_in_image_2": true
 64 | }
 65 | """
 66 | 
 67 | def parse_args():
 68 |     parser = argparse.ArgumentParser(description="Dataset generation script: Style Transfer.")
 69 |     parser.add_argument(
 70 |         "--target_dir",
 71 |         type=str,
 72 |         default="data/dataset",
 73 |         required=True,
 74 |         help="Path to save dataset.",
 75 |     )
 76 |     parser.add_argument(
 77 |         "--cache_dir",
 78 |         type=str,
 79 |         default="data/cache",
 80 |         help="Path to save cache files.",
 81 |     )
 82 |     parser.add_argument(
 83 |         "--dashscope_api_key",
 84 |         type=str,
 85 |         default="",
 86 |         help="Dashscope api key.",
 87 |     )
 88 |     parser.add_argument(
 89 |         "--qwenvl_model_id",
 90 |         type=str,
 91 |         default="qwen-vl-max-0809",
 92 |         help="QwenVL model id.",
 93 |     )
 94 |     parser.add_argument(
 95 |         "--modelscope_access_token",
 96 |         type=str,
 97 |         default=None,
 98 |         help="Modelscope access token",
 99 |     )
100 |     parser.add_argument(
101 |         "--modelscope_dataset_id",
102 |         type=str,
103 |         default=None,
104 |         help="Modelscope Dataset ID",
105 |     )
106 |     parser.add_argument(
107 |         "--num_data",
108 |         type=int,
109 |         default=100000,
110 |         help="Number of data samples",
111 |     )
112 |     parser.add_argument(
113 |         "--max_num_files_per_folder",
114 |         type=int,
115 |         default=5000,
116 |         help="Max number of files per folder",
117 |     )
118 |     args = parser.parse_args()
119 |     return args
120 |     
121 |     
122 | def initialize(args):
123 |     dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data")
124 |     download_models(["FLUX.1-dev", "InstantX/FLUX.1-dev-IP-Adapter", "StableDiffusionXL_v1", "IP-Adapter-SDXL", "ControlNet_union_sdxl_promax"])
125 |     download_preference_model("MPS", cache_dir="./models")
126 |     
127 |     t2i = FLUXT2I(
128 |         model_path=[
129 |             "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors",
130 |             "models/FLUX/FLUX.1-dev/text_encoder_2",
131 |             "models/FLUX/FLUX.1-dev/ae.safetensors",
132 |             "models/FLUX/FLUX.1-dev/flux1-dev.safetensors",
133 |             "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/ip-adapter.bin",
134 |             "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder",
135 |         ],
136 |         device="cuda",
137 |     )
138 |     instant_style = SDXLT2I(
139 |         model_path=[
140 |             "models/stable_diffusion_xl/sd_xl_base_1.0.safetensors",
141 |             "models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors",
142 |             "models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin",
143 |             "models/ControlNet/controlnet_union/diffusion_pytorch_model_promax.safetensors",
144 |         ],
145 |         device="cuda",
146 |         model_kwargs={
147 |             "controlnet_config_units": [
148 |                 ControlNetConfigUnit(
149 |                     processor_id="canny",
150 |                     model_path="models/ControlNet/controlnet_union/diffusion_pytorch_model_promax.safetensors",
151 |                     scale=0.6
152 |                 )
153 |             ]
154 |         },
155 |         pipeline_kwargs={
156 |             "negative_prompt": "text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry",
157 |             "cfg_scale": 5,
158 |             "height": 1024,
159 |             "width": 1024,
160 |             "num_inference_steps": 50,
161 |             "ipadapter_use_instant_style": True
162 |         }
163 |     )
164 |     preference_model = ImagePreferenceModel("MPS", cache_dir="./models", device="cuda")
165 |     cache = ImageCache(cache_dir=args.cache_dir)
166 |     
167 |     dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data)
168 | 
169 |     pipe = DataPipeline(units=[
170 |         DataProcessUnit(
171 |             processor=t2i,
172 |             input_params={"prompt": "prompt"},
173 |             output_params=("image_1",),
174 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x}
175 |         ),
176 |         DataProcessUnit(
177 |             processor=cache,
178 |             input_params={"image": "image_1"},
179 |             output_params=("image_1_path",)
180 |         ),
181 |         DataProcessUnit(
182 |             processor=QwenVLI2T(
183 |                 api_key=args.dashscope_api_key,
184 |                 model_id=args.qwenvl_model_id,
185 |                 prompt=qwen_prompt_1
186 |             ),
187 |             input_params={"images": "image_1_path"},
188 |             output_params=("image_content_description",)
189 |         ),
190 |         DataProcessUnit(
191 |             processor=RandomPromptStyler(),
192 |             input_params={"prompt": "image_content_description"},
193 |             output_params=("image_content_style_description",)
194 |         ),
195 |         DataProcessUnit(
196 |             processor=t2i,
197 |             input_params={"prompt": "image_content_style_description"},
198 |             output_params=("image_2",),
199 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x}
200 |         ),
201 |         DataProcessUnit(
202 |             processor=cache,
203 |             input_params={"image": "image_2"},
204 |             output_params=("image_2_path",)
205 |         ),
206 |         DataProcessUnit(
207 |             processor=ListPacker(),
208 |             input_params={"image": "image_2"},
209 |             output_params=("ipadapter_images",)
210 |         ),
211 |         DataProcessUnit(
212 |             processor=instant_style,
213 |             input_params={
214 |                 "prompt": "image_content_style_description",
215 |                 "controlnet_image": "image_1",
216 |                 "ipadapter_images": "ipadapter_images",
217 |             },
218 |             output_params=("image_3",),
219 |             extra_input_kwargs={"progress_bar_cmd": lambda x: x}
220 |         ),
221 |         DataProcessUnit(
222 |             processor=cache,
223 |             input_params={"image": "image_3"},
224 |             output_params=("image_3_path",)
225 |         ),
226 |         DataProcessUnit(
227 |             processor=t2i,
228 |             input_params={
229 |                 "prompt": "image_content_style_description",
230 |                 "input_image": "image_3",
231 |                 "ipadapter_images": "ipadapter_images",
232 |             },
233 |             output_params=("image_4",),
234 |             extra_input_kwargs={
235 |                 "progress_bar_cmd": lambda x: x,
236 |                 "denoising_strength": 0.6,
237 |                 "num_inference_steps": 50
238 |             }
239 |         ),
240 |         DataProcessUnit(
241 |             processor=cache,
242 |             input_params={"image": "image_4"},
243 |             output_params=("image_4_path",)
244 |         ),
245 |         DataProcessUnit(
246 |             processor=ListPacker(),
247 |             input_params={"image_1_path": "image_1_path", "image_4_path": "image_4_path"},
248 |             output_params=("image_list",)
249 |         ),
250 |         DataProcessUnit(
251 |             processor=QwenVLI2T(
252 |                 api_key=args.dashscope_api_key,
253 |                 model_id=args.qwenvl_model_id,
254 |                 prompt=qwen_prompt_2
255 |             ),
256 |             input_params={"images": "image_list"},
257 |             output_params=("generated_instructions",)
258 |         ),
259 |         DataProcessUnit(
260 |             processor=QwenJsonParser(),
261 |             input_params={"text": "generated_instructions"},
262 |             parse_output_dict=True,
263 |         ),
264 |         DataProcessUnit(
265 |             processor=preference_model,
266 |             input_params={"image": "image_1", "prompt": "image_1_caption"},
267 |             output_params=("image_1_preference_score",)
268 |         ),
269 |         DataProcessUnit(
270 |             processor=preference_model,
271 |             input_params={"image": "image_4", "prompt": "image_2_caption"},
272 |             output_params=("image_4_preference_score",)
273 |         ),
274 |         DataProcessUnit(
275 |             processor=ImageDatasetStorage(
276 |                 target_dir=args.target_dir,
277 |                 image_keys=("image_1", "image_2", "image_3", "image_4"),
278 |                 metadata_keys=(
279 |                     "editing_instruction", "reverse_editing_instruction",
280 |                     "prompt", "image_content_description", "image_content_style_description",
281 |                     "image_1_caption", "image_4_caption", "artifacts_in_image_1", "artifacts_in_image_4",
282 |                     "image_1_preference_score", "image_4_preference_score"
283 |                 ),
284 |                 modelscope_access_token=args.modelscope_access_token,
285 |                 modelscope_dataset_id=args.modelscope_dataset_id,
286 |                 max_num_files_per_folder=args.max_num_files_per_folder,
287 |             ),
288 |             input_params={
289 |                 "image_1": "image_1", "image_2": "image_2", "image_3": "image_3", "image_4": "image_4",
290 |                 "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction",
291 |                 "prompt": "prompt", "image_content_description": "image_content_description", "image_content_style_description": "image_content_style_description",
292 |                 "image_1_caption": "image_1_caption", "image_4_caption": "image_2_caption",
293 |                 "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_4": "artifacts_in_image_2",
294 |                 "image_1_preference_score": "image_1_preference_score", "image_4_preference_score": "image_4_preference_score"
295 |             },
296 |             output_params=("metadata_path")
297 |         )
298 |     ])
299 |     return dataset, pipe
300 | 
301 | 
302 | if __name__ == "__main__":
303 |     args = parse_args()
304 |     dataset, pipe = initialize(args)
305 |     for data_id, data in enumerate(tqdm(dataset)):
306 |         pipe(data, ignore_errors=True)
307 |         if (data_id + 1) % 100 == 0:
308 |             pipe.report_log()
309 | 


--------------------------------------------------------------------------------
/pulse/processor/style.py:
--------------------------------------------------------------------------------
   1 | import torch
   2 | 
   3 | 
   4 | class RandomPromptStyler:
   5 |     def __init__(self):
   6 |         self.styles = [
   7 |             {
   8 |                 "name": "cinematic-diva",
   9 |                 "name_zh": "电影歌星画风",
  10 |                 "template": "UHD, 8K, ultra detailed, a cinematic photograph of {prompt}, beautiful lighting, great composition"
  11 |             },
  12 |             {
  13 |                 "name": "Abstract Expressionism",
  14 |                 "name_zh": "抽象表现主义",
  15 |                 "template": "Abstract Expressionism Art, {prompt}, High contrast, minimalistic, colorful, stark, dramatic, expressionism"
  16 |             },
  17 |             {
  18 |                 "name": "Academia",
  19 |                 "name_zh": "学院风",
  20 |                 "template": "Academia, {prompt}, preppy Ivy League style, stark, dramatic, chic boarding school, academia"
  21 |             },
  22 |             {
  23 |                 "name": "Action Figure",
  24 |                 "name_zh": "动作人偶",
  25 |                 "template": "Action Figure, {prompt}, plastic collectable action figure, collectable toy action figure"
  26 |             },
  27 |             {
  28 |                 "name": "Adorable 3D Character",
  29 |                 "name_zh": "可爱的3D角色",
  30 |                 "template": "Adorable 3D Character, {prompt}, 3D render, adorable character, 3D art"
  31 |             },
  32 |             {
  33 |                 "name": "Adorable Kawaii",
  34 |                 "name_zh": "可爱卡哇伊风格",
  35 |                 "template": "Adorable Kawaii, {prompt}, pretty, cute, adorable, kawaii"
  36 |             },
  37 |             {
  38 |                 "name": "Art Deco",
  39 |                 "name_zh": "艺术装饰风格",
  40 |                 "template": "Art Deco, {prompt}, sleek, geometric forms, art deco style"
  41 |             },
  42 |             {
  43 |                 "name": "Art Nouveau",
  44 |                 "name_zh": "新艺术风格",
  45 |                 "template": "Art Nouveau, beautiful art, {prompt}, sleek, organic forms, long, sinuous, art nouveau style"
  46 |             },
  47 |             {
  48 |                 "name": "Astral Aura",
  49 |                 "name_zh": "星体光环",
  50 |                 "template": "Astral Aura, {prompt}, astral, colorful aura, vibrant energy"
  51 |             },
  52 |             {
  53 |                 "name": "Avant-garde",
  54 |                 "name_zh": "先锋派",
  55 |                 "template": "Avant-garde, {prompt}, unusual, experimental, avant-garde art"
  56 |             },
  57 |             {
  58 |                 "name": "Baroque",
  59 |                 "name_zh": "巴洛克风格",
  60 |                 "template": "Baroque, {prompt}, dramatic, exuberant, grandeur, baroque art"
  61 |             },
  62 |             {
  63 |                 "name": "Bauhaus-Style Poster",
  64 |                 "name_zh": "包豪斯风格海报",
  65 |                 "template": "Bauhaus-Style Poster, {prompt}, simple geometric shapes, clean lines, primary colors, Bauhaus-Style Poster"
  66 |             },
  67 |             {
  68 |                 "name": "Blueprint Schematic Drawing",
  69 |                 "name_zh": "蓝图原理图绘制",
  70 |                 "template": "Blueprint Schematic Drawing, {prompt}, technical drawing, blueprint, schematic"
  71 |             },
  72 |             {
  73 |                 "name": "Caricature",
  74 |                 "name_zh": "漫画",
  75 |                 "template": "Caricature, {prompt}, exaggerated, comical, caricature"
  76 |             },
  77 |             {
  78 |                 "name": "Cel Shaded Art",
  79 |                 "name_zh": "单色阴影艺术",
  80 |                 "template": "Cel Shaded Art, {prompt}, 2D, flat color, toon shading, cel shaded style"
  81 |             },
  82 |             {
  83 |                 "name": "Character Design Sheet",
  84 |                 "name_zh": "角色设计图",
  85 |                 "template": "Character Design Sheet, {prompt}, character reference sheet, character turn around"
  86 |             },
  87 |             {
  88 |                 "name": "Classicism Art",
  89 |                 "name_zh": "古典主义艺术",
  90 |                 "template": "Classicism Art, {prompt}, inspired by Roman and Greek culture, clarity, harmonious, classicism art"
  91 |             },
  92 |             {
  93 |                 "name": "Color Field Painting",
  94 |                 "name_zh": "色域绘画",
  95 |                 "template": "Color Field Painting, {prompt}, abstract, simple, geometic, color field painting style"
  96 |             },
  97 |             {
  98 |                 "name": "Colored Pencil Art",
  99 |                 "name_zh": "彩色铅笔艺术",
 100 |                 "template": "Colored Pencil Art, {prompt}, colored pencil strokes, light color, visible paper texture, colored pencil art"
 101 |             },
 102 |             {
 103 |                 "name": "Conceptual Art",
 104 |                 "name_zh": "概念艺术",
 105 |                 "template": "Conceptual Art, {prompt}, concept art"
 106 |             },
 107 |             {
 108 |                 "name": "Constructivism",
 109 |                 "name_zh": "结构主义",
 110 |                 "template": "Constructivism Art, {prompt}, minimalistic, geometric forms, constructivism art"
 111 |             },
 112 |             {
 113 |                 "name": "Cubism",
 114 |                 "name_zh": "立体主义",
 115 |                 "template": "Cubism Art, {prompt}, flat geometric forms, cubism art"
 116 |             },
 117 |             {
 118 |                 "name": "Dadaism",
 119 |                 "name_zh": "达达主义",
 120 |                 "template": "Dadaism Art, {prompt}, satirical, nonsensical, dadaism art"
 121 |             },
 122 |             {
 123 |                 "name": "Dark Fantasy",
 124 |                 "name_zh": "黑暗幻想",
 125 |                 "template": "Dark Fantasy Art, {prompt}, dark, moody, dark fantasy style"
 126 |             },
 127 |             {
 128 |                 "name": "Dark Moody Atmosphere",
 129 |                 "name_zh": "暗色忧郁氛围",
 130 |                 "template": "Dark Moody Atmosphere, {prompt}, dramatic, mysterious, dark moody atmosphere"
 131 |             },
 132 |             {
 133 |                 "name": "DMT Art Style",
 134 |                 "name_zh": "DMT艺术风格",
 135 |                 "template": "DMT Art Style, {prompt}, bright colors, surreal visuals, swirling patterns, DMT art style"
 136 |             },
 137 |             {
 138 |                 "name": "Doodle Art",
 139 |                 "name_zh": "涂鸦艺术",
 140 |                 "template": "Doodle Art Style, {prompt}, drawing, freeform, swirling patterns, doodle art style"
 141 |             },
 142 |             {
 143 |                 "name": "Double Exposure",
 144 |                 "name_zh": "双重曝光",
 145 |                 "template": "Double Exposure Style, {prompt}, double image ghost effect, image combination, double exposure style"
 146 |             },
 147 |             {
 148 |                 "name": "Dripping Paint Splatter Art",
 149 |                 "name_zh": "滴漆溅画艺术",
 150 |                 "template": "Dripping Paint Splatter Art, {prompt}, dramatic, paint drips, splatters, dripping paint"
 151 |             },
 152 |             {
 153 |                 "name": "Expressionism",
 154 |                 "name_zh": "表现主义",
 155 |                 "template": "Expressionism Art Style, {prompt}, movement, contrast, emotional, exaggerated forms, expressionism art style"
 156 |             },
 157 |             {
 158 |                 "name": "Faded Polaroid Photo",
 159 |                 "name_zh": "褪色的宝丽来照片",
 160 |                 "template": "Faded Polaroid Photo, {prompt}, analog, old faded photo, old polaroid"
 161 |             },
 162 |             {
 163 |                 "name": "Fauvism",
 164 |                 "name_zh": "野兽派",
 165 |                 "template": "Fauvism Art, {prompt}, painterly, bold colors, textured brushwork, fauvism art"
 166 |             },
 167 |             {
 168 |                 "name": "Flat 2D Art",
 169 |                 "name_zh": "扁平2D艺术",
 170 |                 "template": "Flat 2D Art, {prompt}, simple flat color, 2-dimensional, Flat 2D Art Style"
 171 |             },
 172 |             {
 173 |                 "name": "Fortnite Art Style",
 174 |                 "name_zh": "堡垒之夜艺术风格",
 175 |                 "template": "Fortnite Art Style, {prompt}, 3D cartoon, colorful, Fortnite Art Style"
 176 |             },
 177 |             {
 178 |                 "name": "Futurism",
 179 |                 "name_zh": "未来主义",
 180 |                 "template": "Futurism Art Style, {prompt}, dynamic, dramatic, Futurism Art Style"
 181 |             },
 182 |             {
 183 |                 "name": "Glitchcore",
 184 |                 "name_zh": "故障核心",
 185 |                 "template": "Glitchcore Art Style, {prompt}, dynamic, dramatic, distorted, vibrant colors, glitchcore art style"
 186 |             },
 187 |             {
 188 |                 "name": "Glo-fi",
 189 |                 "name_zh": "光环音乐风格",
 190 |                 "template": "Glo-fi Art Style, {prompt}, dynamic, dramatic, vibrant colors, glo-fi art style"
 191 |             },
 192 |             {
 193 |                 "name": "Googie Art Style",
 194 |                 "name_zh": "古奇艺术风格",
 195 |                 "template": "Googie Art Style, {prompt}, dynamic, dramatic, 1950's futurism, bold boomerang angles, Googie art style"
 196 |             },
 197 |             {
 198 |                 "name": "Graffiti Art",
 199 |                 "name_zh": "涂鸦艺术",
 200 |                 "template": "Graffiti Art Style, {prompt}, dynamic, dramatic, vibrant colors, graffiti art style"
 201 |             },
 202 |             {
 203 |                 "name": "Harlem Renaissance Art",
 204 |                 "name_zh": "哈莱姆文艺复兴艺术",
 205 |                 "template": "Harlem Renaissance Art Style, {prompt}, dynamic, dramatic, 1920s African American culture, Harlem Renaissance art style"
 206 |             },
 207 |             {
 208 |                 "name": "High Fashion",
 209 |                 "name_zh": "高级时装",
 210 |                 "template": "High Fashion, {prompt}, dynamic, dramatic, haute couture, elegant, ornate clothing, High Fashion"
 211 |             },
 212 |             {
 213 |                 "name": "Idyllic",
 214 |                 "name_zh": "田园诗般的",
 215 |                 "template": "Idyllic, {prompt}, peaceful, happy, pleasant, happy, harmonious, picturesque, charming"
 216 |             },
 217 |             {
 218 |                 "name": "Impressionism",
 219 |                 "name_zh": "印象主义",
 220 |                 "template": "Impressionism, {prompt}, painterly, small brushstrokes, visible brushstrokes, impressionistic style"
 221 |             },
 222 |             {
 223 |                 "name": "Infographic Drawing",
 224 |                 "name_zh": "信息图表绘制",
 225 |                 "template": "Infographic Drawing, {prompt}, diagram, infographic"
 226 |             },
 227 |             {
 228 |                 "name": "Ink Dripping Drawing",
 229 |                 "name_zh": "墨水滴画",
 230 |                 "template": "Ink Dripping Drawing, {prompt}, ink drawing, dripping ink"
 231 |             },
 232 |             {
 233 |                 "name": "Japanese Ink Drawing",
 234 |                 "name_zh": "日本墨画",
 235 |                 "template": "Japanese Ink Drawing, {prompt}, ink drawing, inkwash, Japanese Ink Drawing"
 236 |             },
 237 |             {
 238 |                 "name": "Knolling Photography",
 239 |                 "name_zh": "秩序拍摄",
 240 |                 "template": "Knolling Photography, {prompt}, flat lay photography, object arrangment, knolling photography"
 241 |             },
 242 |             {
 243 |                 "name": "Light Cheery Atmosphere",
 244 |                 "name_zh": "轻快愉快的氛围",
 245 |                 "template": "Light Cheery Atmosphere, {prompt}, happy, joyful, cheerful, carefree, gleeful, lighthearted, pleasant atmosphere"
 246 |             },
 247 |             {
 248 |                 "name": "Logo Design",
 249 |                 "name_zh": "标志设计",
 250 |                 "template": "Logo Design, {prompt}, dynamic graphic art, vector art, minimalist, professional logo design"
 251 |             },
 252 |             {
 253 |                 "name": "Luxurious Elegance",
 254 |                 "name_zh": "奢华优雅",
 255 |                 "template": "Luxurious Elegance, {prompt}, extravagant, ornate, designer, opulent, picturesque, lavish"
 256 |             },
 257 |             {
 258 |                 "name": "Macro Photography",
 259 |                 "name_zh": "微距摄影",
 260 |                 "template": "Macro Photography, {prompt}, close-up, macro 100mm, macro photography"
 261 |             },
 262 |             {
 263 |                 "name": "Mandola Art",
 264 |                 "name_zh": "曼陀罗艺术",
 265 |                 "template": "Mandola art style, {prompt}, complex, circular design, mandola"
 266 |             },
 267 |             {
 268 |                 "name": "Marker Drawing",
 269 |                 "name_zh": "马克笔绘图",
 270 |                 "template": "Marker Drawing, {prompt}, bold marker lines, visibile paper texture, marker drawing"
 271 |             },
 272 |             {
 273 |                 "name": "Medievalism",
 274 |                 "name_zh": "中世纪主义",
 275 |                 "template": "Medievalism, {prompt}, inspired by The Middle Ages, medieval art, elaborate patterns and decoration, Medievalism"
 276 |             },
 277 |             {
 278 |                 "name": "Minimalism",
 279 |                 "name_zh": "极简主义",
 280 |                 "template": "Minimalism, {prompt}, abstract, simple geometic shapes, hard edges, sleek contours, Minimalism"
 281 |             },
 282 |             {
 283 |                 "name": "Neo-Baroque",
 284 |                 "name_zh": "新巴洛克",
 285 |                 "template": "Neo-Baroque, {prompt}, ornate and elaborate, dynaimc, Neo-Baroque"
 286 |             },
 287 |             {
 288 |                 "name": "Neo-Byzantine",
 289 |                 "name_zh": "新拜占庭",
 290 |                 "template": "Neo-Byzantine, {prompt}, grand decorative religious style, Orthodox Christian inspired, Neo-Byzantine"
 291 |             },
 292 |             {
 293 |                 "name": "Neo-Futurism",
 294 |                 "name_zh": "新未来主义",
 295 |                 "template": "Neo-Futurism, {prompt}, high-tech, curves, spirals, flowing lines, idealistic future, Neo-Futurism"
 296 |             },
 297 |             {
 298 |                 "name": "Neo-Impressionism",
 299 |                 "name_zh": "新印象主义",
 300 |                 "template": "Neo-Impressionism, {prompt}, tiny dabs of color, Pointillism, painterly, Neo-Impressionism"
 301 |             },
 302 |             {
 303 |                 "name": "Neo-Rococo",
 304 |                 "name_zh": "新洛可可",
 305 |                 "template": "Neo-Rococo, {prompt}, curved forms, naturalistic ornamentation, elaborate, decorative, gaudy, Neo-Rococo"
 306 |             },
 307 |             {
 308 |                 "name": "Neoclassicism",
 309 |                 "name_zh": "新古典主义",
 310 |                 "template": "Neoclassicism, {prompt}, ancient Rome and Greece inspired, idealic, sober colors, Neoclassicism"
 311 |             },
 312 |             {
 313 |                 "name": "Op Art",
 314 |                 "name_zh": "视觉艺术",
 315 |                 "template": "Op Art, {prompt}, optical illusion, abstract, geometric pattern, impression of movement, Op Art"
 316 |             },
 317 |             {
 318 |                 "name": "Ornate and Intricate",
 319 |                 "name_zh": "华丽复杂",
 320 |                 "template": "Ornate and Intricate, {prompt}, decorative, highly detailed, elaborate, ornate, intricate"
 321 |             },
 322 |             {
 323 |                 "name": "Pencil Sketch Drawing",
 324 |                 "name_zh": "铅笔素描",
 325 |                 "template": "Pencil Sketch Drawing, {prompt}, black and white drawing, graphite drawing"
 326 |             },
 327 |             {
 328 |                 "name": "Pop Art 2",
 329 |                 "name_zh": "流行艺术",
 330 |                 "template": "Pop Art, {prompt}, vivid colors, flat color, 2D, strong lines, Pop Art"
 331 |             },
 332 |             {
 333 |                 "name": "Rococo",
 334 |                 "name_zh": "洛可可",
 335 |                 "template": "Rococo, {prompt}, flamboyant, pastel colors, curved lines, elaborate detail, Rococo"
 336 |             },
 337 |             {
 338 |                 "name": "Silhouette Art",
 339 |                 "name_zh": "剪影艺术",
 340 |                 "template": "Silhouette Art, {prompt}, high contrast, well defined, Silhouette Art"
 341 |             },
 342 |             {
 343 |                 "name": "Simple Vector Art",
 344 |                 "name_zh": "简单矢量艺术",
 345 |                 "template": "Simple Vector Art, {prompt}, 2D flat, simple shapes, minimalistic, professional graphic, flat color, high contrast, Simple Vector Art"
 346 |             },
 347 |             {
 348 |                 "name": "Sketchup",
 349 |                 "name_zh": "草图大师",
 350 |                 "template": "Sketchup, {prompt}, CAD, professional design, Sketchup"
 351 |             },
 352 |             {
 353 |                 "name": "Steampunk 2",
 354 |                 "name_zh": "蒸汽朋克",
 355 |                 "template": "Steampunk, {prompt}, retrofuturistic science fantasy, steam-powered tech, vintage industry, gears, neo-victorian, steampunk"
 356 |             },
 357 |             {
 358 |                 "name": "Surrealism",
 359 |                 "name_zh": "超现实主义",
 360 |                 "template": "Surrealism, {prompt}, expressive, dramatic, organic lines and forms, dreamlike and mysterious, Surrealism"
 361 |             },
 362 |             {
 363 |                 "name": "Suprematism",
 364 |                 "name_zh": "至上主义",
 365 |                 "template": "Suprematism, {prompt}, abstract, limited color palette, geometric forms, Suprematism"
 366 |             },
 367 |             {
 368 |                 "name": "Terragen",
 369 |                 "name_zh": "地形生成",
 370 |                 "template": "Terragen, {prompt}, beautiful massive landscape, epic scenery, Terragen"
 371 |             },
 372 |             {
 373 |                 "name": "Tranquil Relaxing Atmosphere",
 374 |                 "name_zh": "宁静放松的氛围",
 375 |                 "template": "Tranquil Relaxing Atmosphere, {prompt}, calming style, soothing colors, peaceful, idealic, Tranquil Relaxing Atmosphere"
 376 |             },
 377 |             {
 378 |                 "name": "Sticker Designs",
 379 |                 "name_zh": "贴纸设计",
 380 |                 "template": "Vector Art Stickers, {prompt}, professional vector design, sticker designs, Sticker Sheet"
 381 |             },
 382 |             {
 383 |                 "name": "Vibrant Rim Light",
 384 |                 "name_zh": "生动的边缘光",
 385 |                 "template": "Vibrant Rim Light, {prompt}, bright rim light, high contrast, bold edge light"
 386 |             },
 387 |             {
 388 |                 "name": "Volumetric Lighting",
 389 |                 "name_zh": "体积光照明",
 390 |                 "template": "Volumetric Lighting, {prompt}, light depth, dramatic atmospheric lighting, Volumetric Lighting"
 391 |             },
 392 |             {
 393 |                 "name": "Watercolor 2",
 394 |                 "name_zh": "水彩",
 395 |                 "template": "Watercolor style painting, {prompt}, visible paper texture, colorwash, watercolor"
 396 |             },
 397 |             {
 398 |                 "name": "Whimsical and Playful",
 399 |                 "name_zh": "异想天开和俏皮",
 400 |                 "template": "Whimsical and Playful, {prompt}, imaginative, fantastical, bight colors, stylized, happy, Whimsical and Playful"
 401 |             },
 402 |             {
 403 |                 "name": "Fooocus Sharp",
 404 |                 "name_zh": "焦点锐化",
 405 |                 "template": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy"
 406 |             },
 407 |             {
 408 |                 "name": "Fooocus Masterpiece",
 409 |                 "name_zh": "焦点杰作",
 410 |                 "template": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings, by wlop"
 411 |             },
 412 |             {
 413 |                 "name": "Fooocus Photograph",
 414 |                 "name_zh": "焦点摄影",
 415 |                 "template": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage"
 416 |             },
 417 |             {
 418 |                 "name": "Fooocus Cinematic",
 419 |                 "name_zh": "焦点电影",
 420 |                 "template": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy"
 421 |             },
 422 |             {
 423 |                 "name": "mre-cinematic-dynamic",
 424 |                 "name_zh": "MRE电影动态",
 425 |                 "template": "epic cinematic shot of dynamic {prompt} in motion. main subject of high budget action movie. raw photo, motion blur. best quality, high resolution"
 426 |             },
 427 |             {
 428 |                 "name": "mre-spontaneous-picture",
 429 |                 "name_zh": "MRE自发图片",
 430 |                 "template": "spontaneous picture of {prompt}, taken by talented amateur. best quality, high resolution. magical moment, natural look. simple but good looking"
 431 |             },
 432 |             {
 433 |                 "name": "mre-artistic-vision",
 434 |                 "name_zh": "MRE艺术视觉",
 435 |                 "template": "powerful artistic vision of {prompt}. breathtaking masterpiece made by great artist. best quality, high resolution"
 436 |             },
 437 |             {
 438 |                 "name": "mre-dark-dream",
 439 |                 "name_zh": "MRE黑暗梦境",
 440 |                 "template": "dark and unsettling dream showing {prompt}. best quality, high resolution. created by genius but depressed mad artist. grim beauty"
 441 |             },
 442 |             {
 443 |                 "name": "mre-gloomy-art",
 444 |                 "name_zh": "MRE忧郁艺术",
 445 |                 "template": "astonishing gloomy art made mainly of shadows and lighting, forming {prompt}. masterful usage of lighting, shadows and chiaroscuro. made by black-hearted artist, drawing from darkness. best quality, high resolution"
 446 |             },
 447 |             {
 448 |                 "name": "mre-bad-dream",
 449 |                 "name_zh": "MRE恶梦",
 450 |                 "template": "picture from really bad dream about terrifying {prompt}, true horror. bone-chilling vision. mad world that shouldn't exist. best quality, high resolution"
 451 |             },
 452 |             {
 453 |                 "name": "mre-underground",
 454 |                 "name_zh": "MRE地下",
 455 |                 "template": "uncanny caliginous vision of {prompt}, created by remarkable underground artist. best quality, high resolution. raw and brutal art, careless but impressive style. inspired by darkness and chaos"
 456 |             },
 457 |             {
 458 |                 "name": "mre-surreal-painting",
 459 |                 "name_zh": "MRE超现实绘画",
 460 |                 "template": "surreal painting representing strange vision of {prompt}. harmonious madness, synergy with chance. unique artstyle, mindbending art, magical surrealism. best quality, high resolution"
 461 |             },
 462 |             {
 463 |                 "name": "mre-dynamic-illustration",
 464 |                 "name_zh": "MRE动态插画",
 465 |                 "template": "insanely dynamic illustration of {prompt}. best quality, high resolution. crazy artstyle, careless brushstrokes, emotional and fun"
 466 |             },
 467 |             {
 468 |                 "name": "mre-undead-art",
 469 |                 "name_zh": "MRE不死艺术",
 470 |                 "template": "long forgotten art created by undead artist illustrating {prompt}, tribute to the death and decay. miserable art of the damned. wretched and decaying world. best quality, high resolution"
 471 |             },
 472 |             {
 473 |                 "name": "mre-elemental-art",
 474 |                 "name_zh": "MRE元素艺术",
 475 |                 "template": "art illustrating insane amounts of raging elemental energy turning into {prompt}, avatar of elements. magical surrealism, wizardry. best quality, high resolution"
 476 |             },
 477 |             {
 478 |                 "name": "mre-space-art",
 479 |                 "name_zh": "MRE太空艺术",
 480 |                 "template": "winner of inter-galactic art contest illustrating {prompt}, symbol of the interstellar singularity. best quality, high resolution. artstyle previously unseen in the whole galaxy"
 481 |             },
 482 |             {
 483 |                 "name": "mre-ancient-illustration",
 484 |                 "name_zh": "MRE古代插画",
 485 |                 "template": "sublime ancient illustration of {prompt}, predating human civilization. crude and simple, but also surprisingly beautiful artwork, made by genius primeval artist. best quality, high resolution"
 486 |             },
 487 |             {
 488 |                 "name": "mre-brave-art",
 489 |                 "name_zh": "MRE勇敢艺术",
 490 |                 "template": "brave, shocking, and brutally true art showing {prompt}. inspired by courage and unlimited creativity. truth found in chaos. best quality, high resolution"
 491 |             },
 492 |             {
 493 |                 "name": "mre-heroic-fantasy",
 494 |                 "name_zh": "MRE英雄幻想",
 495 |                 "template": "heroic fantasy painting of {prompt}, in the dangerous fantasy world. airbrush over oil on canvas. best quality, high resolution"
 496 |             },
 497 |             {
 498 |                 "name": "mre-dark-cyberpunk",
 499 |                 "name_zh": "MRE黑暗赛博朋克",
 500 |                 "template": "dark cyberpunk illustration of brutal {prompt} in a world without hope, ruled by ruthless criminal corporations. best quality, high resolution"
 501 |             },
 502 |             {
 503 |                 "name": "mre-lyrical-geometry",
 504 |                 "name_zh": "MRE抒情几何",
 505 |                 "template": "geometric and lyrical abstraction painting presenting {prompt}. oil on metal. best quality, high resolution"
 506 |             },
 507 |             {
 508 |                 "name": "mre-sumi-e-symbolic",
 509 |                 "name_zh": "MRE墨绘象征",
 510 |                 "template": "big long brushstrokes of deep black sumi-e turning into symbolic painting of {prompt}. master level raw art. best quality, high resolution"
 511 |             },
 512 |             {
 513 |                 "name": "mre-sumi-e-detailed",
 514 |                 "name_zh": "MRE墨绘精细",
 515 |                 "template": "highly detailed black sumi-e painting of {prompt}. in-depth study of perfection, created by a master. best quality, high resolution"
 516 |             },
 517 |             {
 518 |                 "name": "mre-manga",
 519 |                 "name_zh": "MRE漫画",
 520 |                 "template": "manga artwork presenting {prompt}. created by japanese manga artist. highly emotional. best quality, high resolution"
 521 |             },
 522 |             {
 523 |                 "name": "mre-anime",
 524 |                 "name_zh": "MRE动漫",
 525 |                 "template": "anime artwork illustrating {prompt}. created by japanese anime studio. highly emotional. best quality, high resolution"
 526 |             },
 527 |             {
 528 |                 "name": "mre-comic",
 529 |                 "name_zh": "MRE漫画书",
 530 |                 "template": "breathtaking illustration from adult comic book presenting {prompt}. fabulous artwork. best quality, high resolution"
 531 |             },
 532 |             {
 533 |                 "name": "sai-3d-model",
 534 |                 "name_zh": "SAI三维模型",
 535 |                 "template": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting"
 536 |             },
 537 |             {
 538 |                 "name": "sai-analog film",
 539 |                 "name_zh": "SAI模拟胶片",
 540 |                 "template": "analog film photo {prompt} . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage"
 541 |             },
 542 |             {
 543 |                 "name": "sai-anime",
 544 |                 "name_zh": "SAI动漫",
 545 |                 "template": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed"
 546 |             },
 547 |             {
 548 |                 "name": "sai-cinematic",
 549 |                 "name_zh": "SAI电影",
 550 |                 "template": "cinematic film still {prompt} . shallow depth of field, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy"
 551 |             },
 552 |             {
 553 |                 "name": "sai-comic book",
 554 |                 "name_zh": "SAI漫画书",
 555 |                 "template": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed"
 556 |             },
 557 |             {
 558 |                 "name": "sai-craft clay",
 559 |                 "name_zh": "SAI手工粘土",
 560 |                 "template": "play-doh style {prompt} . sculpture, clay art, centered composition, Claymation"
 561 |             },
 562 |             {
 563 |                 "name": "sai-digital art",
 564 |                 "name_zh": "SAI数字艺术",
 565 |                 "template": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed"
 566 |             },
 567 |             {
 568 |                 "name": "sai-enhance",
 569 |                 "name_zh": "SAI增强",
 570 |                 "template": "breathtaking {prompt} . award-winning, professional, highly detailed"
 571 |             },
 572 |             {
 573 |                 "name": "sai-fantasy art",
 574 |                 "name_zh": "SAI幻想艺术",
 575 |                 "template": "ethereal fantasy concept art of  {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy"
 576 |             },
 577 |             {
 578 |                 "name": "sai-isometric",
 579 |                 "name_zh": "SAI等距",
 580 |                 "template": "isometric style {prompt} . vibrant, beautiful, crisp, detailed, ultra detailed, intricate"
 581 |             },
 582 |             {
 583 |                 "name": "sai-line art",
 584 |                 "name_zh": "SAI线条艺术",
 585 |                 "template": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics"
 586 |             },
 587 |             {
 588 |                 "name": "sai-lowpoly",
 589 |                 "name_zh": "SAI低多边形",
 590 |                 "template": "low-poly style {prompt} . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition"
 591 |             },
 592 |             {
 593 |                 "name": "sai-neonpunk",
 594 |                 "name_zh": "SAI霓虹朋克",
 595 |                 "template": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional"
 596 |             },
 597 |             {
 598 |                 "name": "sai-origami",
 599 |                 "name_zh": "SAI折纸",
 600 |                 "template": "origami style {prompt} . paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition"
 601 |             },
 602 |             {
 603 |                 "name": "sai-photographic",
 604 |                 "name_zh": "SAI摄影",
 605 |                 "template": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed"
 606 |             },
 607 |             {
 608 |                 "name": "sai-pixel art",
 609 |                 "name_zh": "SAI像素艺术",
 610 |                 "template": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics"
 611 |             },
 612 |             {
 613 |                 "name": "sai-texture",
 614 |                 "name_zh": "SAI质地",
 615 |                 "template": "texture {prompt} top down close-up"
 616 |             },
 617 |             {
 618 |                 "name": "ads-advertising",
 619 |                 "name_zh": "广告",
 620 |                 "template": "advertising poster style {prompt} . Professional, modern, product-focused, commercial, eye-catching, highly detailed"
 621 |             },
 622 |             {
 623 |                 "name": "ads-automotive",
 624 |                 "name_zh": "汽车广告",
 625 |                 "template": "automotive advertisement style {prompt} . sleek, dynamic, professional, commercial, vehicle-focused, high-resolution, highly detailed"
 626 |             },
 627 |             {
 628 |                 "name": "ads-corporate",
 629 |                 "name_zh": "企业广告",
 630 |                 "template": "corporate branding style {prompt} . professional, clean, modern, sleek, minimalist, business-oriented, highly detailed"
 631 |             },
 632 |             {
 633 |                 "name": "ads-fashion editorial",
 634 |                 "name_zh": "时尚编辑",
 635 |                 "template": "fashion editorial style {prompt} . high fashion, trendy, stylish, editorial, magazine style, professional, highly detailed"
 636 |             },
 637 |             {
 638 |                 "name": "ads-food photography",
 639 |                 "name_zh": "食品摄影",
 640 |                 "template": "food photography style {prompt} . appetizing, professional, culinary, high-resolution, commercial, highly detailed"
 641 |             },
 642 |             {
 643 |                 "name": "ads-gourmet food photography",
 644 |                 "name_zh": "美食摄影",
 645 |                 "template": "gourmet food photo of {prompt} . soft natural lighting, macro details, vibrant colors, fresh ingredients, glistening textures, bokeh background, styled plating, wooden tabletop, garnished, tantalizing, editorial quality"
 646 |             },
 647 |             {
 648 |                 "name": "ads-luxury",
 649 |                 "name_zh": "奢华广告",
 650 |                 "template": "luxury product style {prompt} . elegant, sophisticated, high-end, luxurious, professional, highly detailed"
 651 |             },
 652 |             {
 653 |                 "name": "ads-real estate",
 654 |                 "name_zh": "房地产广告",
 655 |                 "template": "real estate photography style {prompt} . professional, inviting, well-lit, high-resolution, property-focused, commercial, highly detailed"
 656 |             },
 657 |             {
 658 |                 "name": "ads-retail",
 659 |                 "name_zh": "零售广告",
 660 |                 "template": "retail packaging style {prompt} . vibrant, enticing, commercial, product-focused, eye-catching, professional, highly detailed"
 661 |             },
 662 |             {
 663 |                 "name": "artstyle-abstract",
 664 |                 "name_zh": "抽象艺术风格",
 665 |                 "template": "abstract style {prompt} . non-representational, colors and shapes, expression of feelings, imaginative, highly detailed"
 666 |             },
 667 |             {
 668 |                 "name": "artstyle-abstract expressionism",
 669 |                 "name_zh": "抽象表现主义",
 670 |                 "template": "abstract expressionist painting {prompt} . energetic brushwork, bold colors, abstract forms, expressive, emotional"
 671 |             },
 672 |             {
 673 |                 "name": "artstyle-art deco",
 674 |                 "name_zh": "艺术装饰风格",
 675 |                 "template": "art deco style {prompt} . geometric shapes, bold colors, luxurious, elegant, decorative, symmetrical, ornate, detailed"
 676 |             },
 677 |             {
 678 |                 "name": "artstyle-art nouveau",
 679 |                 "name_zh": "新艺术风格",
 680 |                 "template": "art nouveau style {prompt} . elegant, decorative, curvilinear forms, nature-inspired, ornate, detailed"
 681 |             },
 682 |             {
 683 |                 "name": "artstyle-constructivist",
 684 |                 "name_zh": "构成主义",
 685 |                 "template": "constructivist style {prompt} . geometric shapes, bold colors, dynamic composition, propaganda art style"
 686 |             },
 687 |             {
 688 |                 "name": "artstyle-cubist",
 689 |                 "name_zh": "立体主义",
 690 |                 "template": "cubist artwork {prompt} . geometric shapes, abstract, innovative, revolutionary"
 691 |             },
 692 |             {
 693 |                 "name": "artstyle-expressionist",
 694 |                 "name_zh": "表现主义",
 695 |                 "template": "expressionist {prompt} . raw, emotional, dynamic, distortion for emotional effect, vibrant, use of unusual colors, detailed"
 696 |             },
 697 |             {
 698 |                 "name": "artstyle-graffiti",
 699 |                 "name_zh": "涂鸦",
 700 |                 "template": "graffiti style {prompt} . street art, vibrant, urban, detailed, tag, mural"
 701 |             },
 702 |             {
 703 |                 "name": "artstyle-hyperrealism",
 704 |                 "name_zh": "超现实主义",
 705 |                 "template": "hyperrealistic art {prompt} . extremely high-resolution details, photographic, realism pushed to extreme, fine texture, incredibly lifelike"
 706 |             },
 707 |             {
 708 |                 "name": "artstyle-impressionist",
 709 |                 "name_zh": "印象主义",
 710 |                 "template": "impressionist painting {prompt} . loose brushwork, vibrant color, light and shadow play, captures feeling over form"
 711 |             },
 712 |             {
 713 |                 "name": "artstyle-pointillism",
 714 |                 "name_zh": "点彩主义",
 715 |                 "template": "pointillism style {prompt} . composed entirely of small, distinct dots of color, vibrant, highly detailed"
 716 |             },
 717 |             {
 718 |                 "name": "artstyle-pop art",
 719 |                 "name_zh": "波普艺术",
 720 |                 "template": "pop Art style {prompt} . bright colors, bold outlines, popular culture themes, ironic or kitsch"
 721 |             },
 722 |             {
 723 |                 "name": "artstyle-psychedelic",
 724 |                 "name_zh": "迷幻艺术",
 725 |                 "template": "psychedelic style {prompt} . vibrant colors, swirling patterns, abstract forms, surreal, trippy"
 726 |             },
 727 |             {
 728 |                 "name": "artstyle-renaissance",
 729 |                 "name_zh": "文艺复兴",
 730 |                 "template": "renaissance style {prompt} . realistic, perspective, light and shadow, religious or mythological themes, highly detailed"
 731 |             },
 732 |             {
 733 |                 "name": "artstyle-steampunk",
 734 |                 "name_zh": "蒸汽朋克",
 735 |                 "template": "steampunk style {prompt} . antique, mechanical, brass and copper tones, gears, intricate, detailed"
 736 |             },
 737 |             {
 738 |                 "name": "artstyle-surrealist",
 739 |                 "name_zh": "超现实主义",
 740 |                 "template": "surrealist art {prompt} . dreamlike, mysterious, provocative, symbolic, intricate, detailed"
 741 |             },
 742 |             {
 743 |                 "name": "artstyle-typography",
 744 |                 "name_zh": "排版艺术",
 745 |                 "template": "typographic art {prompt} . stylized, intricate, detailed, artistic, text-based"
 746 |             },
 747 |             {
 748 |                 "name": "artstyle-watercolor",
 749 |                 "name_zh": "水彩艺术",
 750 |                 "template": "watercolor painting {prompt} . vibrant, beautiful, painterly, detailed, textural, artistic"
 751 |             },
 752 |             {
 753 |                 "name": "futuristic-biomechanical",
 754 |                 "name_zh": "未来生物力学",
 755 |                 "template": "biomechanical style {prompt} . blend of organic and mechanical elements, futuristic, cybernetic, detailed, intricate"
 756 |             },
 757 |             {
 758 |                 "name": "futuristic-biomechanical cyberpunk",
 759 |                 "name_zh": "未来生物力学赛博朋克",
 760 |                 "template": "biomechanical cyberpunk {prompt} . cybernetics, human-machine fusion, dystopian, organic meets artificial, dark, intricate, highly detailed"
 761 |             },
 762 |             {
 763 |                 "name": "futuristic-cybernetic",
 764 |                 "name_zh": "未来赛博",
 765 |                 "template": "cybernetic style {prompt} . futuristic, technological, cybernetic enhancements, robotics, artificial intelligence themes"
 766 |             },
 767 |             {
 768 |                 "name": "futuristic-cybernetic robot",
 769 |                 "name_zh": "未来机器人",
 770 |                 "template": "cybernetic robot {prompt} . android, AI, machine, metal, wires, tech, futuristic, highly detailed"
 771 |             },
 772 |             {
 773 |                 "name": "futuristic-cyberpunk cityscape",
 774 |                 "name_zh": "未来赛博朋克城市景观",
 775 |                 "template": "cyberpunk cityscape {prompt} . neon lights, dark alleys, skyscrapers, futuristic, vibrant colors, high contrast, highly detailed"
 776 |             },
 777 |             {
 778 |                 "name": "futuristic-futuristic",
 779 |                 "name_zh": "未来主义",
 780 |                 "template": "futuristic style {prompt} . sleek, modern, ultramodern, high tech, detailed"
 781 |             },
 782 |             {
 783 |                 "name": "futuristic-retro cyberpunk",
 784 |                 "name_zh": "未来复古赛博朋克",
 785 |                 "template": "retro cyberpunk {prompt} . 80's inspired, synthwave, neon, vibrant, detailed, retro futurism"
 786 |             },
 787 |             {
 788 |                 "name": "futuristic-retro futurism",
 789 |                 "name_zh": "未来复古主义",
 790 |                 "template": "retro-futuristic {prompt} . vintage sci-fi, 50s and 60s style, atomic age, vibrant, highly detailed"
 791 |             },
 792 |             {
 793 |                 "name": "futuristic-sci-fi",
 794 |                 "name_zh": "科幻未来主义",
 795 |                 "template": "sci-fi style {prompt} . futuristic, technological, alien worlds, space themes, advanced civilizations"
 796 |             },
 797 |             {
 798 |                 "name": "futuristic-vaporwave",
 799 |                 "name_zh": "未来波",
 800 |                 "template": "vaporwave style {prompt} . retro aesthetic, cyberpunk, vibrant, neon colors, vintage 80s and 90s style, highly detailed"
 801 |             },
 802 |             {
 803 |                 "name": "game-bubble bobble",
 804 |                 "name_zh": "游戏-泡泡龙",
 805 |                 "template": "Bubble Bobble style {prompt} . 8-bit, cute, pixelated, fantasy, vibrant, reminiscent of Bubble Bobble game"
 806 |             },
 807 |             {
 808 |                 "name": "game-cyberpunk game",
 809 |                 "name_zh": "赛博朋克游戏",
 810 |                 "template": "cyberpunk game style {prompt} . neon, dystopian, futuristic, digital, vibrant, detailed, high contrast, reminiscent of cyberpunk genre video games"
 811 |             },
 812 |             {
 813 |                 "name": "game-fighting game",
 814 |                 "name_zh": "格斗游戏",
 815 |                 "template": "fighting game style {prompt} . dynamic, vibrant, action-packed, detailed character design, reminiscent of fighting video games"
 816 |             },
 817 |             {
 818 |                 "name": "game-gta",
 819 |                 "name_zh": "侠盗猎车手游戏",
 820 |                 "template": "GTA-style artwork {prompt} . satirical, exaggerated, pop art style, vibrant colors, iconic characters, action-packed"
 821 |             },
 822 |             {
 823 |                 "name": "game-mario",
 824 |                 "name_zh": "马里奥游戏",
 825 |                 "template": "Super Mario style {prompt} . vibrant, cute, cartoony, fantasy, playful, reminiscent of Super Mario series"
 826 |             },
 827 |             {
 828 |                 "name": "game-minecraft",
 829 |                 "name_zh": "我的世界游戏",
 830 |                 "template": "Minecraft style {prompt} . blocky, pixelated, vibrant colors, recognizable characters and objects, game assets"
 831 |             },
 832 |             {
 833 |                 "name": "game-pokemon",
 834 |                 "name_zh": "宝可梦游戏",
 835 |                 "template": "Pokémon style {prompt} . vibrant, cute, anime, fantasy, reminiscent of Pokémon series"
 836 |             },
 837 |             {
 838 |                 "name": "game-retro arcade",
 839 |                 "name_zh": "复古街机",
 840 |                 "template": "retro arcade style {prompt} . 8-bit, pixelated, vibrant, classic video game, old school gaming, reminiscent of 80s and 90s arcade games"
 841 |             },
 842 |             {
 843 |                 "name": "game-retro game",
 844 |                 "name_zh": "复古游戏",
 845 |                 "template": "retro game art {prompt} . 16-bit, vibrant colors, pixelated, nostalgic, charming, fun"
 846 |             },
 847 |             {
 848 |                 "name": "game-rpg fantasy game",
 849 |                 "name_zh": "角色扮演幻想游戏",
 850 |                 "template": "role-playing game (RPG) style fantasy {prompt} . detailed, vibrant, immersive, reminiscent of high fantasy RPG games"
 851 |             },
 852 |             {
 853 |                 "name": "game-strategy game",
 854 |                 "name_zh": "策略游戏",
 855 |                 "template": "strategy game style {prompt} . overhead view, detailed map, units, reminiscent of real-time strategy video games"
 856 |             },
 857 |             {
 858 |                 "name": "game-streetfighter",
 859 |                 "name_zh": "街头霸王游戏",
 860 |                 "template": "Street Fighter style {prompt} . vibrant, dynamic, arcade, 2D fighting game, highly detailed, reminiscent of Street Fighter series"
 861 |             },
 862 |             {
 863 |                 "name": "game-zelda",
 864 |                 "name_zh": "塞尔达传说游戏",
 865 |                 "template": "Legend of Zelda style {prompt} . vibrant, fantasy, detailed, epic, heroic, reminiscent of The Legend of Zelda series"
 866 |             },
 867 |             {
 868 |                 "name": "misc-architectural",
 869 |                 "name_zh": "建筑",
 870 |                 "template": "architectural style {prompt} . clean lines, geometric shapes, minimalist, modern, architectural drawing, highly detailed"
 871 |             },
 872 |             {
 873 |                 "name": "misc-disco",
 874 |                 "name_zh": "迪斯科",
 875 |                 "template": "disco-themed {prompt} . vibrant, groovy, retro 70s style, shiny disco balls, neon lights, dance floor, highly detailed"
 876 |             },
 877 |             {
 878 |                 "name": "misc-dreamscape",
 879 |                 "name_zh": "梦境",
 880 |                 "template": "dreamscape {prompt} . surreal, ethereal, dreamy, mysterious, fantasy, highly detailed"
 881 |             },
 882 |             {
 883 |                 "name": "misc-dystopian",
 884 |                 "name_zh": "反乌托邦",
 885 |                 "template": "dystopian style {prompt} . bleak, post-apocalyptic, somber, dramatic, highly detailed"
 886 |             },
 887 |             {
 888 |                 "name": "misc-fairy tale",
 889 |                 "name_zh": "童话故事",
 890 |                 "template": "fairy tale {prompt} . magical, fantastical, enchanting, storybook style, highly detailed"
 891 |             },
 892 |             {
 893 |                 "name": "misc-gothic",
 894 |                 "name_zh": "哥特",
 895 |                 "template": "gothic style {prompt} . dark, mysterious, haunting, dramatic, ornate, detailed"
 896 |             },
 897 |             {
 898 |                 "name": "misc-grunge",
 899 |                 "name_zh": "垃圾摇滚",
 900 |                 "template": "grunge style {prompt} . textured, distressed, vintage, edgy, punk rock vibe, dirty, noisy"
 901 |             },
 902 |             {
 903 |                 "name": "misc-horror",
 904 |                 "name_zh": "恐怖",
 905 |                 "template": "horror-themed {prompt} . eerie, unsettling, dark, spooky, suspenseful, grim, highly detailed"
 906 |             },
 907 |             {
 908 |                 "name": "misc-kawaii",
 909 |                 "name_zh": "卡哇伊",
 910 |                 "template": "kawaii style {prompt} . cute, adorable, brightly colored, cheerful, anime influence, highly detailed"
 911 |             },
 912 |             {
 913 |                 "name": "misc-lovecraftian",
 914 |                 "name_zh": "克苏鲁神话",
 915 |                 "template": "lovecraftian horror {prompt} . eldritch, cosmic horror, unknown, mysterious, surreal, highly detailed"
 916 |             },
 917 |             {
 918 |                 "name": "misc-macabre",
 919 |                 "name_zh": "恐怖的",
 920 |                 "template": "macabre style {prompt} . dark, gothic, grim, haunting, highly detailed"
 921 |             },
 922 |             {
 923 |                 "name": "misc-manga",
 924 |                 "name_zh": "漫画",
 925 |                 "template": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style"
 926 |             },
 927 |             {
 928 |                 "name": "misc-metropolis",
 929 |                 "name_zh": "大都市",
 930 |                 "template": "metropolis-themed {prompt} . urban, cityscape, skyscrapers, modern, futuristic, highly detailed"
 931 |             },
 932 |             {
 933 |                 "name": "misc-minimalist",
 934 |                 "name_zh": "极简主义",
 935 |                 "template": "minimalist style {prompt} . simple, clean, uncluttered, modern, elegant"
 936 |             },
 937 |             {
 938 |                 "name": "misc-monochrome",
 939 |                 "name_zh": "单色",
 940 |                 "template": "monochrome {prompt} . black and white, contrast, tone, texture, detailed"
 941 |             },
 942 |             {
 943 |                 "name": "misc-nautical",
 944 |                 "name_zh": "航海",
 945 |                 "template": "nautical-themed {prompt} . sea, ocean, ships, maritime, beach, marine life, highly detailed"
 946 |             },
 947 |             {
 948 |                 "name": "misc-space",
 949 |                 "name_zh": "太空",
 950 |                 "template": "space-themed {prompt} . cosmic, celestial, stars, galaxies, nebulas, planets, science fiction, highly detailed"
 951 |             },
 952 |             {
 953 |                 "name": "misc-stained glass",
 954 |                 "name_zh": "彩色玻璃",
 955 |                 "template": "stained glass style {prompt} . vibrant, beautiful, translucent, intricate, detailed"
 956 |             },
 957 |             {
 958 |                 "name": "misc-techwear fashion",
 959 |                 "name_zh": "科技服饰",
 960 |                 "template": "techwear fashion {prompt} . futuristic, cyberpunk, urban, tactical, sleek, dark, highly detailed"
 961 |             },
 962 |             {
 963 |                 "name": "misc-tribal",
 964 |                 "name_zh": "部落",
 965 |                 "template": "tribal style {prompt} . indigenous, ethnic, traditional patterns, bold, natural colors, highly detailed"
 966 |             },
 967 |             {
 968 |                 "name": "misc-zentangle",
 969 |                 "name_zh": "禅绕画",
 970 |                 "template": "zentangle {prompt} . intricate, abstract, monochrome, patterns, meditative, highly detailed"
 971 |             },
 972 |             {
 973 |                 "name": "papercraft-collage",
 974 |                 "name_zh": "纸艺拼贴",
 975 |                 "template": "collage style {prompt} . mixed media, layered, textural, detailed, artistic"
 976 |             },
 977 |             {
 978 |                 "name": "papercraft-flat papercut",
 979 |                 "name_zh": "平面剪纸",
 980 |                 "template": "flat papercut style {prompt} . silhouette, clean cuts, paper, sharp edges, minimalist, color block"
 981 |             },
 982 |             {
 983 |                 "name": "papercraft-kirigami",
 984 |                 "name_zh": "剪纸",
 985 |                 "template": "kirigami representation of {prompt} . 3D, paper folding, paper cutting, Japanese, intricate, symmetrical, precision, clean lines"
 986 |             },
 987 |             {
 988 |                 "name": "papercraft-paper mache",
 989 |                 "name_zh": "纸浆塑型",
 990 |                 "template": "paper mache representation of {prompt} . 3D, sculptural, textured, handmade, vibrant, fun"
 991 |             },
 992 |             {
 993 |                 "name": "papercraft-paper quilling",
 994 |                 "name_zh": "纸卷艺术",
 995 |                 "template": "paper quilling art of {prompt} . intricate, delicate, curling, rolling, shaping, coiling, loops, 3D, dimensional, ornamental"
 996 |             },
 997 |             {
 998 |                 "name": "papercraft-papercut collage",
 999 |                 "name_zh": "剪纸拼贴",
1000 |                 "template": "papercut collage of {prompt} . mixed media, textured paper, overlapping, asymmetrical, abstract, vibrant"
1001 |             },
1002 |             {
1003 |                 "name": "papercraft-papercut shadow box",
1004 |                 "name_zh": "剪纸影箱",
1005 |                 "template": "3D papercut shadow box of {prompt} . layered, dimensional, depth, silhouette, shadow, papercut, handmade, high contrast"
1006 |             },
1007 |             {
1008 |                 "name": "papercraft-stacked papercut",
1009 |                 "name_zh": "堆叠剪纸",
1010 |                 "template": "stacked papercut art of {prompt} . 3D, layered, dimensional, depth, precision cut, stacked layers, papercut, high contrast"
1011 |             },
1012 |             {
1013 |                 "name": "papercraft-thick layered papercut",
1014 |                 "name_zh": "厚层剪纸",
1015 |                 "template": "thick layered papercut art of {prompt} . deep 3D, volumetric, dimensional, depth, thick paper, high stack, heavy texture, tangible layers"
1016 |             },
1017 |             {
1018 |                 "name": "photo-alien",
1019 |                 "name_zh": "异形",
1020 |                 "template": "alien-themed {prompt} . extraterrestrial, cosmic, otherworldly, mysterious, sci-fi, highly detailed"
1021 |             },
1022 |             {
1023 |                 "name": "photo-film noir",
1024 |                 "name_zh": "黑色电影",
1025 |                 "template": "film noir style {prompt} . monochrome, high contrast, dramatic shadows, 1940s style, mysterious, cinematic"
1026 |             },
1027 |             {
1028 |                 "name": "photo-glamour",
1029 |                 "name_zh": "魅力",
1030 |                 "template": "glamorous photo {prompt} . high fashion, luxurious, extravagant, stylish, sensual, opulent, elegance, stunning beauty, professional, high contrast, detailed"
1031 |             },
1032 |             {
1033 |                 "name": "photo-hdr",
1034 |                 "name_zh": "高动态范围",
1035 |                 "template": "HDR photo of {prompt} . High dynamic range, vivid, rich details, clear shadows and highlights, realistic, intense, enhanced contrast, highly detailed"
1036 |             },
1037 |             {
1038 |                 "name": "photo-iphone photographic",
1039 |                 "name_zh": "iPhone摄影",
1040 |                 "template": "iphone photo {prompt} . large depth of field, deep depth of field, highly detailed"
1041 |             },
1042 |             {
1043 |                 "name": "photo-long exposure",
1044 |                 "name_zh": "长曝光",
1045 |                 "template": "long exposure photo of {prompt} . Blurred motion, streaks of light, surreal, dreamy, ghosting effect, highly detailed"
1046 |             },
1047 |             {
1048 |                 "name": "photo-neon noir",
1049 |                 "name_zh": "霓虹黑色",
1050 |                 "template": "neon noir {prompt} . cyberpunk, dark, rainy streets, neon signs, high contrast, low light, vibrant, highly detailed"
1051 |             },
1052 |             {
1053 |                 "name": "photo-silhouette",
1054 |                 "name_zh": "剪影",
1055 |                 "template": "silhouette style {prompt} . high contrast, minimalistic, black and white, stark, dramatic"
1056 |             },
1057 |             {
1058 |                 "name": "photo-tilt-shift",
1059 |                 "name_zh": "倾斜移位",
1060 |                 "template": "tilt-shift photo of {prompt} . selective focus, miniature effect, blurred background, highly detailed, vibrant, perspective control"
1061 |             }
1062 |         ]
1063 |         
1064 |     def __call__(self, prompt):
1065 |         style_id = torch.randint(0, len(self.styles), size=(1,)).tolist()[0]
1066 |         prompt = self.styles[style_id]["template"].format(prompt=prompt)
1067 |         return prompt
1068 | 


--------------------------------------------------------------------------------