├── pulse ├── __init__.py ├── dataset │ ├── __init__.py │ ├── diffusiondb.py │ ├── anytext.py │ └── dataset.py ├── processor │ ├── __init__.py │ ├── face.py │ ├── preference.py │ ├── flux_t2i.py │ ├── sdxl_t2i.py │ ├── sd_t2i.py │ ├── general.py │ ├── image_cache.py │ ├── qwenvl_i2t.py │ └── style.py └── pipeline │ ├── __init__.py │ ├── unit.py │ └── pipeline.py ├── requirements.txt ├── .gitignore ├── scripts ├── english_text.py ├── faceid.py ├── zoomin_zoomout.py ├── change_add_remove.py └── style_transfer.py ├── README_zh.md ├── README.md └── LICENSE /pulse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pulse/dataset/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pulse/processor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pulse/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .unit import DataProcessUnit 2 | from .pipeline import DataPipeline 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | diffsynth 2 | dashscope 3 | pandas 4 | pyarrow 5 | fastparquet 6 | opencv-python-headless 7 | -------------------------------------------------------------------------------- /pulse/processor/face.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | class FaceDataSelector: 4 | def __init__(self): 5 | pass 6 | 7 | def __call__(self, metadata): 8 | descriptions = [data for data in metadata["descriptions"] if data["gender"] == metadata["gender_in_image"]] 9 | return descriptions[0]["description"], descriptions[1 if len(descriptions) > 1 else 0]["description"] -------------------------------------------------------------------------------- /pulse/processor/preference.py: -------------------------------------------------------------------------------- 1 | from diffsynth.extensions.ImageQualityMetric import download_preference_model, load_preference_model, preference_model_id 2 | 3 | 4 | class ImagePreferenceModel: 5 | def __init__(self, model_name: preference_model_id, cache_dir="./models", device="cuda"): 6 | path = download_preference_model(model_name, cache_dir=cache_dir) 7 | self.preference_model = load_preference_model(model_name, device=device, path=path) 8 | 9 | def __call__(self, image, prompt): 10 | return self.preference_model.score(image, prompt)[0] 11 | -------------------------------------------------------------------------------- /pulse/processor/flux_t2i.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffsynth import ModelManager, FluxImagePipeline 3 | 4 | 5 | class FLUXT2I: 6 | def __init__(self, model_path, device="cuda", model_kwargs={}, pipeline_kwargs={}): 7 | model_manager = ModelManager(torch_dtype=torch.bfloat16, device=device) 8 | model_manager.load_models(model_path) 9 | self.pipe = FluxImagePipeline.from_model_manager(model_manager, **model_kwargs) 10 | self.pipeline_kwargs = pipeline_kwargs 11 | 12 | def __call__(self, **kwargs): 13 | return self.pipe(**self.pipeline_kwargs, **kwargs) 14 | 15 | -------------------------------------------------------------------------------- /pulse/processor/sdxl_t2i.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffsynth import ModelManager, SDXLImagePipeline 3 | 4 | 5 | class SDXLT2I: 6 | def __init__(self, model_path, device="cuda", model_kwargs={}, pipeline_kwargs={}): 7 | model_manager = ModelManager(torch_dtype=torch.float16, device=device) 8 | model_manager.load_models(model_path) 9 | self.pipe = SDXLImagePipeline.from_model_manager(model_manager, **model_kwargs) 10 | self.pipeline_kwargs = pipeline_kwargs 11 | 12 | def __call__(self, **kwargs): 13 | return self.pipe(**self.pipeline_kwargs, **kwargs) 14 | 15 | -------------------------------------------------------------------------------- /pulse/processor/sd_t2i.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from diffsynth import ModelManager, SDImagePipeline 3 | 4 | 5 | class SDT2I: 6 | def __init__(self, model_path, device="cuda", textual_inversions=[], model_kwargs={}, pipeline_kwargs={}): 7 | model_manager = ModelManager(torch_dtype=torch.float16, device=device) 8 | model_manager.load_models(model_path) 9 | model_manager.load_lora("models/lora/add_detail.safetensors", lora_alpha=-4) 10 | self.pipe = SDImagePipeline.from_model_manager(model_manager, **model_kwargs) 11 | self.pipe.prompter.load_textual_inversions(textual_inversions) 12 | self.pipeline_kwargs = pipeline_kwargs 13 | 14 | def __call__(self, **kwargs): 15 | return self.pipe(**self.pipeline_kwargs, **kwargs) 16 | 17 | -------------------------------------------------------------------------------- /pulse/dataset/diffusiondb.py: -------------------------------------------------------------------------------- 1 | import random, pandas, torch 2 | 3 | 4 | class DiffusionDB: 5 | def __init__(self, path, shuffle=True, seed=None, num_data=1000000, multi_prompt=False, num_prompt=1): 6 | self.data = pandas.read_parquet(path)["prompt"].tolist() 7 | if shuffle: 8 | if seed is None: 9 | seed = torch.randint(0, 10**9, size=(1,)).tolist()[0] 10 | random.seed(seed) 11 | random.shuffle(self.data) 12 | self.num_data = num_data 13 | self.multi_prompt = multi_prompt 14 | self.num_prompt = num_prompt 15 | 16 | def __getitem__(self, i): 17 | if self.multi_prompt: 18 | return {"prompt": self.data[i * self.num_prompt: i * self.num_prompt + self.num_prompt]} 19 | else: 20 | return {"prompt": self.data[i]} 21 | 22 | def __len__(self): 23 | return self.num_data // self.num_prompt 24 | -------------------------------------------------------------------------------- /pulse/pipeline/unit.py: -------------------------------------------------------------------------------- 1 | class DataProcessUnit: 2 | def __init__(self, processor, input_params={}, output_params=(), parse_output_dict=False, extra_input_kwargs={}): 3 | self.processor = processor 4 | self.input_params = input_params 5 | self.output_params = output_params 6 | self.parse_output_dict = parse_output_dict 7 | self.extra_input_kwargs = extra_input_kwargs 8 | 9 | def __call__(self, data: dict): 10 | input_params = {name: data[self.input_params[name]] for name in self.input_params} 11 | input_params.update(self.extra_input_kwargs) 12 | raw_output = self.processor(**input_params) 13 | if self.parse_output_dict: 14 | data.update(raw_output) 15 | else: 16 | if not isinstance(raw_output, tuple): 17 | raw_output = (raw_output,) 18 | for name, output in zip(self.output_params, raw_output): 19 | data[name] = output 20 | return data -------------------------------------------------------------------------------- /pulse/pipeline/pipeline.py: -------------------------------------------------------------------------------- 1 | class DataPipeline: 2 | def __init__(self, units=()): 3 | self.units = units 4 | self.error_log = [0] * len(units) 5 | self.drop_log = [0] * len(units) 6 | 7 | def __call__(self, data, ignore_errors=False, debug_mode=False): 8 | for unit_id, unit in enumerate(self.units): 9 | if ignore_errors: 10 | try: 11 | data = unit(data) 12 | except: 13 | self.error_log[unit_id] += 1 14 | return None 15 | else: 16 | data = unit(data) 17 | if debug_mode: 18 | print("-" * 200) 19 | for key in data: 20 | print(key, data[key]) 21 | print("-" * 200) 22 | return data 23 | 24 | def report_log(self): 25 | for unit_id, unit in enumerate(self.units): 26 | print(f"Unit id: {unit_id} Processor name: {unit.processor.__class__.__name__} Errors: {self.error_log[unit_id]} Drops: {self.drop_log[unit_id]}") 27 | -------------------------------------------------------------------------------- /pulse/dataset/anytext.py: -------------------------------------------------------------------------------- 1 | import random, torch, json 2 | 3 | 4 | class AnyText: 5 | def __init__(self, path, shuffle=True, seed=None, num_data=1000000, multi_prompt=False, num_prompt=1): 6 | with open(path, "r", encoding="utf-8") as f: 7 | data = json.load(f) 8 | prompt_list = [] 9 | for i in data["data_list"]: 10 | prompt_list.append(i["caption"]) 11 | self.data = prompt_list 12 | if shuffle: 13 | if seed is None: 14 | seed = torch.randint(0, 10**9, size=(1,)).tolist()[0] 15 | random.seed(seed) 16 | random.shuffle(self.data) 17 | self.num_data = num_data 18 | self.multi_prompt = multi_prompt 19 | self.num_prompt = num_prompt 20 | 21 | def __getitem__(self, i): 22 | if self.multi_prompt: 23 | return {"prompt": self.data[i * self.num_prompt: i * self.num_prompt + self.num_prompt]} 24 | else: 25 | return {"prompt": self.data[i]} 26 | 27 | def __len__(self): 28 | return self.num_data // self.num_prompt 29 | -------------------------------------------------------------------------------- /pulse/processor/general.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from PIL import Image 4 | 5 | 6 | class ListSampler: 7 | def __init__(self): 8 | pass 9 | 10 | def __call__(self, ls): 11 | i = torch.randint(0, len(ls), size=(1,)).tolist()[0] 12 | return ls[i] 13 | 14 | 15 | class TextFormater: 16 | def __init__(self, template): 17 | self.template = template 18 | 19 | def __call__(self, text_list=[], *args, **kwargs): 20 | args = tuple(text_list) + tuple(str(i) for i in args) + tuple(str(kwargs[i]) for i in kwargs) 21 | return self.template % args 22 | 23 | 24 | class ListPacker: 25 | def __init__(self): 26 | pass 27 | 28 | def __call__(self, *args, **kwargs): 29 | ls = list(i for i in args) + list(kwargs[i] for i in kwargs) 30 | return ls 31 | 32 | 33 | class ImageCropper: 34 | def __init__(self): 35 | pass 36 | 37 | def __call__(self, bbox, image): 38 | x1, y1, x2, y2 = bbox 39 | image = np.array(image) 40 | image = image[y1: y2, x1: x2] 41 | image = Image.fromarray(image) 42 | return image 43 | 44 | 45 | class ImageResizer: 46 | def __init__(self): 47 | pass 48 | 49 | def __call__(self, image, height=1024, width=1024): 50 | return image.resize((width, height)) 51 | -------------------------------------------------------------------------------- /pulse/processor/image_cache.py: -------------------------------------------------------------------------------- 1 | import os, time, shutil 2 | from PIL.Image import Image 3 | 4 | 5 | class ImageCache: 6 | def __init__(self, cache_dir="cache", max_cache_num=10, file_extension="png"): 7 | timestamp = str(time.time_ns()) 8 | self.cache_dir = os.path.join(cache_dir, timestamp) 9 | print(f"Image cache files will be saved at {self.cache_dir}") 10 | os.makedirs(self.cache_dir, exist_ok=True) 11 | self.max_cache_num = max_cache_num 12 | self.file_extension = file_extension 13 | self.cached_files = [] 14 | 15 | def __call__(self, image): 16 | while len(self.cached_files) > self.max_cache_num: 17 | file_path = self.cached_files.pop(0) 18 | os.remove(file_path) 19 | timestamp = str(time.time_ns()) 20 | if isinstance(image, Image): 21 | path = os.path.join(self.cache_dir, f"{timestamp}.{self.file_extension}") 22 | image.save(path) 23 | self.cached_files.append(path) 24 | elif isinstance(image, str): 25 | _, file_extension = os.path.splitext(image) 26 | path = os.path.join(self.cache_dir, f"{timestamp}.{file_extension}") 27 | shutil.copy(image, path) 28 | self.cached_files.append(path) 29 | else: 30 | raise ValueError("Unsupported image format.") 31 | return path 32 | -------------------------------------------------------------------------------- /pulse/processor/qwenvl_i2t.py: -------------------------------------------------------------------------------- 1 | import json, dashscope 2 | import numpy as np 3 | from PIL import Image 4 | 5 | 6 | class QwenVLI2T: 7 | def __init__(self, api_key, model_id, prompt=""): 8 | dashscope.api_key = api_key 9 | self.model_id = model_id 10 | self.prompt = prompt 11 | 12 | def __call__(self, images=[], prompt=None, system_prompt=None): 13 | messages = [] 14 | if system_prompt is not None: 15 | messages.append({"role": "system", "content": system_prompt}) 16 | if prompt is None: 17 | prompt = self.prompt 18 | if not isinstance(images, list): 19 | images = [images] 20 | messages.append({"role": "user", "content": [{"text": prompt}] + [{"image": image} for image in images]}) 21 | response = dashscope.MultiModalConversation.call(model=self.model_id, messages=messages) 22 | response = response["output"]["choices"][0]["message"]["content"][0]["text"] 23 | return response 24 | 25 | 26 | class QwenJsonParser: 27 | def __init__(self): 28 | pass 29 | 30 | def __call__(self, text): 31 | text = text.strip() 32 | if text.startswith("```json"): 33 | text = text[len("```json\n"):] 34 | if text.endswith("```"): 35 | text = text[:-len("\n```")] 36 | json_data = json.loads(text) 37 | return json_data 38 | 39 | 40 | class QwenBbox2Mask: 41 | def __init__(self, absolute_coordinate=False): 42 | self.absolute_coordinate = absolute_coordinate 43 | 44 | def __call__(self, bbox, height=1024, width=1024): 45 | x1, y1, x2, y2 = bbox 46 | image = np.zeros((height, width, 3), dtype=np.uint8) 47 | if self.absolute_coordinate: 48 | image[y1: y2, x1: x2] = 255 49 | else: 50 | image[int(y1/1000*width): int(y2/1000*width), int(x1/1000*height): int(x2/1000*height)] = 255 51 | image = Image.fromarray(image) 52 | return image 53 | 54 | 55 | class QwenBbox2Square: 56 | def __init__(self): 57 | pass 58 | 59 | def expand(self, x1, x2, dx): 60 | x1, x2 = x1 - dx // 2, x2 + dx // 2 + dx % 2 61 | return x1, x2 62 | 63 | def shift(self, x1, x2, max_length): 64 | if x1 < 0: 65 | dx = -x1 66 | elif x2 > max_length: 67 | dx = -(x2 - max_length) 68 | else: 69 | dx = 0 70 | x1, x2 = x1 + dx, x2 + dx 71 | return x1, x2 72 | 73 | def __call__(self, bbox, height=1024, width=1024): 74 | x1, y1, x2, y2 = bbox 75 | y1, y2, x1, x2 = int(y1/1000*width), int(y2/1000*width), int(x1/1000*height), int(x2/1000*height) 76 | h, w = x2 - x1, y2 - y1 77 | if h > w: 78 | y1, y2 = self.expand(y1, y2, h - w) 79 | y1, y2 = self.shift(y1, y2, width) 80 | else: 81 | x1, x2 = self.expand(x1, x2, w - h) 82 | x1, x2 = self.shift(x1, x2, height) 83 | return {"square": (x1, y1, x2, y2)} 84 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /data 2 | /cache 3 | /models 4 | *.pkl 5 | *.safetensors 6 | *.pth 7 | *.ckpt 8 | *.pt 9 | *.bin 10 | 11 | # Byte-compiled / optimized / DLL files 12 | __pycache__/ 13 | *.py[cod] 14 | *$py.class 15 | 16 | # C extensions 17 | *.so 18 | 19 | # Distribution / packaging 20 | .Python 21 | build/ 22 | develop-eggs/ 23 | dist/ 24 | downloads/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | wheels/ 33 | share/python-wheels/ 34 | *.egg-info/ 35 | .installed.cfg 36 | *.egg 37 | MANIFEST 38 | 39 | # PyInstaller 40 | # Usually these files are written by a python script from a template 41 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 42 | *.manifest 43 | *.spec 44 | 45 | # Installer logs 46 | pip-log.txt 47 | pip-delete-this-directory.txt 48 | 49 | # Unit test / coverage reports 50 | htmlcov/ 51 | .tox/ 52 | .nox/ 53 | .coverage 54 | .coverage.* 55 | .cache 56 | nosetests.xml 57 | coverage.xml 58 | *.cover 59 | *.py,cover 60 | .hypothesis/ 61 | .pytest_cache/ 62 | cover/ 63 | 64 | # Translations 65 | *.mo 66 | *.pot 67 | 68 | # Django stuff: 69 | *.log 70 | local_settings.py 71 | db.sqlite3 72 | db.sqlite3-journal 73 | 74 | # Flask stuff: 75 | instance/ 76 | .webassets-cache 77 | 78 | # Scrapy stuff: 79 | .scrapy 80 | 81 | # Sphinx documentation 82 | docs/_build/ 83 | 84 | # PyBuilder 85 | .pybuilder/ 86 | target/ 87 | 88 | # Jupyter Notebook 89 | .ipynb_checkpoints 90 | 91 | # IPython 92 | profile_default/ 93 | ipython_config.py 94 | 95 | # pyenv 96 | # For a library or package, you might want to ignore these files since the code is 97 | # intended to run in multiple environments; otherwise, check them in: 98 | # .python-version 99 | 100 | # pipenv 101 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 102 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 103 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 104 | # install all needed dependencies. 105 | #Pipfile.lock 106 | 107 | # poetry 108 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 109 | # This is especially recommended for binary packages to ensure reproducibility, and is more 110 | # commonly ignored for libraries. 111 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 112 | #poetry.lock 113 | 114 | # pdm 115 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 116 | #pdm.lock 117 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 118 | # in version control. 119 | # https://pdm.fming.dev/#use-with-ide 120 | .pdm.toml 121 | 122 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 123 | __pypackages__/ 124 | 125 | # Celery stuff 126 | celerybeat-schedule 127 | celerybeat.pid 128 | 129 | # SageMath parsed files 130 | *.sage.py 131 | 132 | # Environments 133 | .env 134 | .venv 135 | env/ 136 | venv/ 137 | ENV/ 138 | env.bak/ 139 | venv.bak/ 140 | 141 | # Spyder project settings 142 | .spyderproject 143 | .spyproject 144 | 145 | # Rope project settings 146 | .ropeproject 147 | 148 | # mkdocs documentation 149 | /site 150 | 151 | # mypy 152 | .mypy_cache/ 153 | .dmypy.json 154 | dmypy.json 155 | 156 | # Pyre type checker 157 | .pyre/ 158 | 159 | # pytype static type analyzer 160 | .pytype/ 161 | 162 | # Cython debug symbols 163 | cython_debug/ 164 | -------------------------------------------------------------------------------- /scripts/english_text.py: -------------------------------------------------------------------------------- 1 | from pulse.processor.flux_t2i import FLUXT2I 2 | from pulse.dataset.anytext import AnyText 3 | from pulse.dataset.dataset import ImageDatasetStorage 4 | from pulse.pipeline import DataProcessUnit, DataPipeline 5 | from diffsynth import download_models 6 | from diffsynth.extensions.ImageQualityMetric import download_preference_model 7 | from modelscope import dataset_snapshot_download 8 | from tqdm import tqdm 9 | import argparse, os, zipfile 10 | 11 | 12 | 13 | def parse_args(): 14 | parser = argparse.ArgumentParser(description="Dataset generation script: Style Transfer.") 15 | parser.add_argument( 16 | "--target_dir", 17 | type=str, 18 | default="data/dataset", 19 | required=True, 20 | help="Path to save dataset.", 21 | ) 22 | parser.add_argument( 23 | "--cache_dir", 24 | type=str, 25 | default="data/cache", 26 | help="Path to save cache files.", 27 | ) 28 | parser.add_argument( 29 | "--dashscope_api_key", 30 | type=str, 31 | default="", 32 | help="Dashscope api key.", 33 | ) 34 | parser.add_argument( 35 | "--qwenvl_model_id", 36 | type=str, 37 | default="qwen-vl-max-0809", 38 | help="QwenVL model id.", 39 | ) 40 | parser.add_argument( 41 | "--modelscope_access_token", 42 | type=str, 43 | default=None, 44 | help="Modelscope access token", 45 | ) 46 | parser.add_argument( 47 | "--modelscope_dataset_id", 48 | type=str, 49 | default=None, 50 | help="Modelscope Dataset ID", 51 | ) 52 | parser.add_argument( 53 | "--num_data", 54 | type=int, 55 | default=100000, 56 | help="Number of data samples", 57 | ) 58 | parser.add_argument( 59 | "--max_num_files_per_folder", 60 | type=int, 61 | default=5000, 62 | help="Max number of files per folder", 63 | ) 64 | args = parser.parse_args() 65 | return args 66 | 67 | 68 | def initialize(args): 69 | dataset_snapshot_download("iic/AnyWord-3M", allow_file_pattern=["anytext2_json_files.zip"], cache_dir="./data") 70 | if "TextEn" not in os.listdir("data"): 71 | os.makedirs("data/TextEn") 72 | with zipfile.ZipFile("data/iic/AnyWord-3M/anytext2_json_files.zip", 'r') as f: 73 | f.extractall("data/TextEn") 74 | download_models(["FLUX.1-dev"]) 75 | 76 | t2i = FLUXT2I( 77 | model_path=[ 78 | "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", 79 | "models/FLUX/FLUX.1-dev/text_encoder_2", 80 | "models/FLUX/FLUX.1-dev/ae.safetensors", 81 | "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", 82 | ], 83 | device="cuda", 84 | ) 85 | 86 | dataset = AnyText("data/TextEn/anytext2_json_files/laion_word/data_v1.2b.json", shuffle=True, num_data=args.num_data) 87 | 88 | pipe = DataPipeline(units=[ 89 | DataProcessUnit( 90 | processor=t2i, 91 | input_params={"prompt": "prompt"}, 92 | output_params=("image_1",), 93 | extra_input_kwargs={"progress_bar_cmd": lambda x: x} 94 | ), 95 | DataProcessUnit( 96 | processor=ImageDatasetStorage( 97 | target_dir=args.target_dir, 98 | image_keys=("image_1",), 99 | metadata_keys=("prompt",), 100 | modelscope_access_token=args.modelscope_access_token, 101 | modelscope_dataset_id=args.modelscope_dataset_id, 102 | max_num_files_per_folder=args.max_num_files_per_folder, 103 | ), 104 | input_params={ 105 | "image_1": "image_1", "prompt": "prompt", 106 | }, 107 | output_params=("metadata_path") 108 | ) 109 | ]) 110 | return dataset, pipe 111 | 112 | 113 | if __name__ == "__main__": 114 | args = parse_args() 115 | dataset, pipe = initialize(args) 116 | for data_id, data in enumerate(tqdm(dataset)): 117 | pipe(data, ignore_errors=True) 118 | if (data_id + 1) % 100 == 0: 119 | pipe.report_log() 120 | -------------------------------------------------------------------------------- /pulse/dataset/dataset.py: -------------------------------------------------------------------------------- 1 | import os, time, shutil, json, tarfile, torchvision, torch 2 | from PIL import Image 3 | from modelscope.hub.api import HubApi 4 | import torchvision.transforms.functional 5 | 6 | 7 | class ImageDatasetStorage: 8 | def __init__(self, target_dir, max_num_files_per_folder=5000, file_extension="png", image_keys=(), metadata_keys=(), modelscope_access_token=None, modelscope_dataset_id=None): 9 | os.makedirs(target_dir, exist_ok=True) 10 | self.target_dir = target_dir 11 | self.max_num_files_per_folder = max_num_files_per_folder 12 | self.file_extension = file_extension 13 | self.image_keys = image_keys 14 | self.metadata_keys = metadata_keys 15 | self.save_dir = None 16 | self.modelscope_access_token = modelscope_access_token 17 | self.modelscope_dataset_id = modelscope_dataset_id 18 | self.set_new_dir() 19 | 20 | 21 | def push_to_hub(self): 22 | if self.save_dir is not None and self.modelscope_dataset_id is not None: 23 | tar_file = self.save_dir + ".tar.gz" 24 | with tarfile.open(tar_file, "w:gz") as tar: 25 | tar.add(self.save_dir, arcname=os.path.basename(self.save_dir)) 26 | api = HubApi() 27 | api.login(self.modelscope_access_token) 28 | api.upload_file( 29 | path_or_fileobj=tar_file, 30 | path_in_repo="data/" + os.path.basename(self.save_dir) + ".tar.gz", 31 | repo_id=self.modelscope_dataset_id, 32 | repo_type="dataset", 33 | commit_message=f"Upload {os.path.basename(self.save_dir)}", 34 | ) 35 | 36 | 37 | def set_new_dir(self): 38 | self.push_to_hub() 39 | timestamp = str(time.time_ns()) 40 | self.save_dir = os.path.join(self.target_dir, timestamp) 41 | print(f"Dataset will be saved at {self.save_dir}") 42 | os.makedirs(self.save_dir, exist_ok=True) 43 | self.num_files = 0 44 | 45 | 46 | def get_image(self, image): 47 | timestamp = str(time.time_ns()) 48 | if isinstance(image, Image.Image): 49 | path = os.path.join(self.save_dir, f"{timestamp}.{self.file_extension}") 50 | image.save(path) 51 | elif isinstance(image, str): 52 | _, file_extension = os.path.splitext(image) 53 | path = os.path.join(self.save_dir, f"{timestamp}.{file_extension}") 54 | shutil.copy(image, path) 55 | else: 56 | raise ValueError("Unsupported image format.") 57 | self.num_files += 1 58 | return path 59 | 60 | 61 | def get_images(self, images): 62 | if not isinstance(images, list): 63 | images = [images] 64 | path = [self.get_image(image) for image in images] 65 | return path 66 | 67 | 68 | def get_metadata(self, metadata): 69 | timestamp = str(time.time_ns()) 70 | path = os.path.join(self.save_dir, f"{timestamp}.json") 71 | with open(path, "w") as f: 72 | json.dump(metadata, f, ensure_ascii=False) 73 | self.num_files += 1 74 | return path 75 | 76 | 77 | def __call__(self, **kwargs): 78 | metadata = {key: kwargs[key] for key in self.metadata_keys} 79 | for key in self.image_keys: 80 | path = self.get_image(kwargs[key]) 81 | metadata[key] = os.path.basename(path) 82 | path = self.get_metadata(metadata) 83 | path = os.path.basename(path) 84 | if self.num_files > self.max_num_files_per_folder: 85 | self.set_new_dir() 86 | return path 87 | 88 | 89 | 90 | class ImageDataset: 91 | def __init__(self, base_path, crop=False, height=1024, width=1024, max_num=10000000): 92 | self.path = [] 93 | self.search_for_images(base_path) 94 | self.crop = crop 95 | self.height = height 96 | self.width = width 97 | self.max_num = max_num 98 | 99 | def is_image_file(self, file_path): 100 | if "." not in file_path: 101 | return False 102 | file_ext_name = file_path.split(".")[-1] 103 | if file_ext_name.lower() in ["jpg", "jpeg", "png", "webp"]: 104 | return True 105 | return False 106 | 107 | def search_for_images(self, path): 108 | if os.path.isfile(path): 109 | if self.is_image_file(path): 110 | self.path.append(path) 111 | else: 112 | for file_name in os.listdir(path): 113 | sub_path = os.path.join(path, file_name) 114 | self.search_for_images(sub_path) 115 | 116 | def crop_and_resize(self, image): 117 | width, height = image.size 118 | scale = max(self.width / width, self.height / height) 119 | image = torchvision.transforms.functional.resize( 120 | image, 121 | (round(height*scale), round(width*scale)), 122 | interpolation=torchvision.transforms.InterpolationMode.BILINEAR 123 | ) 124 | image = torchvision.transforms.functional.center_crop( 125 | image, 126 | (self.height, self.width), 127 | ) 128 | return image 129 | 130 | def __getitem__(self, idx): 131 | while True: 132 | try: 133 | idx = torch.randint(0, len(self.path), size=(1,)).tolist()[0] 134 | path = self.path[idx] 135 | image = Image.open(path) 136 | if self.crop: 137 | image = self.crop_and_resize(image) 138 | return image 139 | except: 140 | continue 141 | 142 | def __len__(self): 143 | return self.max_num 144 | -------------------------------------------------------------------------------- /README_zh.md: -------------------------------------------------------------------------------- 1 | # ImagePulse-图律脉动 2 | 3 | 图律脉动项目旨在为下一代图像理解和生成模型提供数据集支撑,将模型的能力原子化,并构建原子能力数据集。 4 | 5 | [切换到英文](./README.md) 6 | 7 | ## 原子能力数据集 8 | 9 | ### 1. 修改、添加、移除 10 | 11 | * 数据集:https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ChangeAddRemove 12 | * 数据集构建脚本:[./scripts/change_add_remove.py](./scripts/change_add_remove.py) 13 | 14 | |image_1|image_2|mask|editing_instruction|reverse_editing_instruction| 15 | |-|-|-|-|-| 16 | |![](https://github.com/user-attachments/assets/3a657ccd-6fae-4c44-bff8-a3b702c89d65)|![](https://github.com/user-attachments/assets/cc91af21-0090-4392-89d3-ddd62e056da5)|![](https://github.com/user-attachments/assets/5e4c0fdf-15ef-4bf0-b027-ef863e74afaa)|Remove the mustache and beard, change the white shirt to a blue turtleneck sweater, and remove the glass of milk.|Add a mustache and beard, change the blue turtleneck sweater to a white shirt, and add a glass of milk.| 17 | |![](https://github.com/user-attachments/assets/e3ed5116-1d51-47ab-ae51-0fd4e1548bfd)|![](https://github.com/user-attachments/assets/f78eb833-82bd-4a1f-9856-58718b05dc03)|![](https://github.com/user-attachments/assets/8d1e7e9d-6f5e-4abc-905a-c9f3321ec772)|Add a silver butterfly to the glowing golden lace on her face.|Remove the silver butterfly from the glowing golden lace on her face.| 18 | |![](https://github.com/user-attachments/assets/169e1170-f1d2-4f37-a758-baee81343720)|![](https://github.com/user-attachments/assets/6c250bd1-a705-45ba-8c8a-aacb91eaaa0f)|![](https://github.com/user-attachments/assets/167eb187-605b-4dcd-be62-b6833309aa5c)|Remove the necklace.|Add a necklace.| 19 | 20 | ### 2. 放大、缩小 21 | 22 | * 数据集:https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ZoominZoomout 23 | * 数据集构建脚本:[./scripts/zoomin_zoomout.py](./scripts/zoomin_zoomout.py) 24 | 25 | |image_1|image_2|image_cropped|mask|editing_instruction|reverse_editing_instruction| 26 | |-|-|-|-|-|-| 27 | |![](https://github.com/user-attachments/assets/c90e2a05-8bbe-4897-83f6-fad5692677e2)|![](https://github.com/user-attachments/assets/70ab6767-e088-49f1-afb8-b85cca894031)|![](https://github.com/user-attachments/assets/76718ff8-f6ae-4f75-8f3f-be10d2eebde4)|![](https://github.com/user-attachments/assets/4bebe7f6-a3a7-481b-bcef-100bb18bec5d)|Zoom in to focus on the headband.|Zoom out to show the full view of the anime girl.| 28 | |![](https://github.com/user-attachments/assets/99fc81f9-77e5-4181-a376-06cdf5feaf65)|![](https://github.com/user-attachments/assets/e97b398d-a68e-4f34-a5e9-a831d16f3941)|![](https://github.com/user-attachments/assets/aef092d1-8d8c-4353-a9b7-089875307830)|![](https://github.com/user-attachments/assets/dcf2578a-df22-471c-96c0-34ba361a10b5)|Remove the superhero costume and replace it with a red shirt. Adjust the lighting to highlight the man's face.|Add a superhero costume with a red and yellow emblem on the chest and a red cape. Adjust the lighting to emphasize the costume.| 29 | |![](https://github.com/user-attachments/assets/356fc12b-02ca-4f3c-bf65-3248ca5576eb)|![](https://github.com/user-attachments/assets/41dcdf1c-3ce6-49aa-a651-cfc981932689)|![](https://github.com/user-attachments/assets/d5facc03-99d0-4f15-93ce-9f1bc5397bfd)|![](https://github.com/user-attachments/assets/5df06650-8c2b-47f2-9bf4-d3e2510e224d)|Remove the elephant and replace it with a large rock.|Replace the large rock with an elephant.| 30 | 31 | ### 3. 风格迁移 32 | 33 | * 数据集:https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-StyleTransfer 34 | * 数据集构建脚本:[./scripts/style_transfer.py](./scripts/style_transfer.py) 35 | 36 | |image_1|image_2|image_3|image_4|editing_instruction|reverse_editing_instruction| 37 | |-|-|-|-|-|-| 38 | |![](https://github.com/user-attachments/assets/f8974a51-fe70-4081-b0c8-60acc0c73f28)|![](https://github.com/user-attachments/assets/81e99ac5-8458-4f4a-ac4e-ae57e809f7f2)|![](https://github.com/user-attachments/assets/c8bb8062-3ad9-44b5-9ee0-a70be4dcbfb3)|![](https://github.com/user-attachments/assets/9edd818e-b6ae-4e6e-924b-cdb21d02a2ec)|transform the image into a cartoon style with vibrant colors and a confident expression.|transform the image into a realistic portrait with a serious expression and subtle lighting.| 39 | |![](https://github.com/user-attachments/assets/82253243-028b-43b4-9a37-796f17fa21af)|![](https://github.com/user-attachments/assets/84bf1c5b-55ae-4084-82ec-3a45c15b2030)|![](https://github.com/user-attachments/assets/b8908d78-ad41-42ce-af4b-c52bf92b2989)|![](https://github.com/user-attachments/assets/6593c9d6-7d5e-4cc0-b2ba-49e5fb38a229)|transform the image to have a brighter, more colorful palette and a clear blue sky.|transform the image to have a more muted color palette and an overcast sky.| 40 | |![](https://github.com/user-attachments/assets/705efc5f-504b-49ac-ba76-ae2f9edb56e4)|![](https://github.com/user-attachments/assets/d0e2e902-d97f-4ffd-91c3-56c96aa19f71)|![](https://github.com/user-attachments/assets/d8c0150f-2e41-480a-9873-dbb8419c8ac5)|![](https://github.com/user-attachments/assets/7be0991c-06e8-4560-8ff8-5fbd2f81b1a0)|transform the style of the image to an anime illustration, change the jacket to red, and add a cityscape background.|transform the style of the image to a digital painting, change the jacket to black, and remove the cityscape background.| 41 | 42 | ### 4. 人脸保持 43 | 44 | * 数据集:https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-FaceID 45 | * 数据集构建脚本:[./scripts/faceid.py](./scripts/faceid.py) 46 | 47 | |image_face|image_1|image_2|editing_instruction|reverse_editing_instruction| 48 | |-|-|-|-|-| 49 | |![](https://github.com/user-attachments/assets/6b61c298-1938-405b-a680-c767bc8913e0)|![](https://github.com/user-attachments/assets/071f2743-3fc8-42d1-b17a-768835c3f9f4)|![](https://github.com/user-attachments/assets/05b45391-df5b-453c-9007-e94674056c5a)|Change the woman's white t-shirt to a white tank top.|Change the woman's white tank top to a white t-shirt.| 50 | |![](https://github.com/user-attachments/assets/203c9f5d-58fe-4e55-8ab8-5adbf14a1fbf)|![](https://github.com/user-attachments/assets/1022a76c-9ac2-43f0-bde2-d65322834251)|![](https://github.com/user-attachments/assets/09511643-8370-46ba-aee8-bcf4efd86d72)|Add a nighttime street scene with bokeh lights in the background.|Remove the nighttime street scene and bokeh lights from the background.| 51 | |![](https://github.com/user-attachments/assets/64d8d216-0966-4108-a378-1ad2312ad8eb)|![](https://github.com/user-attachments/assets/9d182b1e-8b4f-4f74-9f58-d14d7ad15474)|![](https://github.com/user-attachments/assets/c0f9a43e-dd2e-48c9-945c-643f11852808)|Change the background to a warmly lit room with lamps, change the suit to maroon, and add a sweater under the suit.|Change the background to a dimly lit room with red lighting, change the suit to black, and remove the sweater.| 52 | 53 | ## 运行数据集生成 54 | 55 | ```bash 56 | python change_add_remove.py \ 57 | --target_dir "data/dataset" \ 58 | --cache_dir "data/cache" \ 59 | --dashscope_api_key "sk-xxxxxxxxxxxxxxxx" \ 60 | --qwenvl_model_id "qwen-vl-max" \ 61 | --modelscope_access_token "xxxxxxxxxxxxxxx" \ 62 | --modelscope_dataset_id "DiffSynth-Studio/ImagePulse-ChangeAddRemove" \ 63 | --num_data 1000000 \ 64 | --max_num_files_per_folder 1000 65 | ``` 66 | 67 | * `target_dir`: 数据集存储路径 68 | * `cache_dir`: 缓存路径 69 | * `dashscope_api_key`: [百炼](https://bailian.console.aliyun.com/#/home) API Key,调用百炼 API 时需填入 70 | * `qwenvl_model_id`: [百炼](https://bailian.console.aliyun.com/#/home) 上 Qwen-VL 模型的 ID,调用百炼 API 时需填入 71 | * `modelscope_access_token`: [魔搭社区](https://modelscope.cn/my/myaccesstoken) 访问令牌,上传数据集到魔搭社区时需填入 72 | * `modelscope_dataset_id`: [魔搭社区](https://modelscope.cn) 数据集 ID,上传数据集到魔搭社区时需填入 73 | * `num_data`: 数据样本总量 74 | * `max_num_files_per_folder`: 每个打包文件中的文件数量 75 | 76 | ## 致谢 77 | 78 | * [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio):为本项目提供 Diffusion 模型推理支持 79 | * [魔搭社区](https://modelscope.cn):为本项目提供模型和数据集的存储与下载支持 80 | * [百炼](https://bailian.console.aliyun.com/#/home):为本项目提供大型语言模型的推理 API 支持 81 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ImagePulse 2 | 3 | ImagePulse project aims to provide dataset support for the next generation of image understanding and generation models, by atomizing the capabilities of these models and constructing atomic capability datasets. 4 | 5 | [Switch to Chinese](./README_zh.md) 6 | 7 | ## Atomic Capability Datasets 8 | 9 | ### 1. Change, Add, Remove 10 | 11 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ChangeAddRemove](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ChangeAddRemove) 12 | * Dataset Construction Script: [./scripts/change_add_remove.py](./scripts/change_add_remove.py) 13 | 14 | |image_1|image_2|mask|editing_instruction|reverse_editing_instruction| 15 | |-|-|-|-|-| 16 | |![](https://github.com/user-attachments/assets/3a657ccd-6fae-4c44-bff8-a3b702c89d65)|![](https://github.com/user-attachments/assets/cc91af21-0090-4392-89d3-ddd62e056da5)|![](https://github.com/user-attachments/assets/5e4c0fdf-15ef-4bf0-b027-ef863e74afaa)|Remove the mustache and beard, change the white shirt to a blue turtleneck sweater, and remove the glass of milk.|Add a mustache and beard, change the blue turtleneck sweater to a white shirt, and add a glass of milk.| 17 | |![](https://github.com/user-attachments/assets/e3ed5116-1d51-47ab-ae51-0fd4e1548bfd)|![](https://github.com/user-attachments/assets/f78eb833-82bd-4a1f-9856-58718b05dc03)|![](https://github.com/user-attachments/assets/8d1e7e9d-6f5e-4abc-905a-c9f3321ec772)|Add a silver butterfly to the glowing golden lace on her face.|Remove the silver butterfly from the glowing golden lace on her face.| 18 | |![](https://github.com/user-attachments/assets/169e1170-f1d2-4f37-a758-baee81343720)|![](https://github.com/user-attachments/assets/6c250bd1-a705-45ba-8c8a-aacb91eaaa0f)|![](https://github.com/user-attachments/assets/167eb187-605b-4dcd-be62-b6833309aa5c)|Remove the necklace.|Add a necklace.| 19 | 20 | ### 2. Zoom In, Zoom Out 21 | 22 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ZoominZoomout](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-ZoominZoomout) 23 | * Dataset Construction Script: [./scripts/zoomin_zoomout.py](./scripts/zoomin_zoomout.py) 24 | 25 | |image_1|image_2|image_cropped|mask|editing_instruction|reverse_editing_instruction| 26 | |-|-|-|-|-|-| 27 | |![](https://github.com/user-attachments/assets/c90e2a05-8bbe-4897-83f6-fad5692677e2)|![](https://github.com/user-attachments/assets/70ab6767-e088-49f1-afb8-b85cca894031)|![](https://github.com/user-attachments/assets/76718ff8-f6ae-4f75-8f3f-be10d2eebde4)|![](https://github.com/user-attachments/assets/4bebe7f6-a3a7-481b-bcef-100bb18bec5d)|Zoom in to focus on the headband.|Zoom out to show the full view of the anime girl.| 28 | |![](https://github.com/user-attachments/assets/99fc81f9-77e5-4181-a376-06cdf5feaf65)|![](https://github.com/user-attachments/assets/e97b398d-a68e-4f34-a5e9-a831d16f3941)|![](https://github.com/user-attachments/assets/aef092d1-8d8c-4353-a9b7-089875307830)|![](https://github.com/user-attachments/assets/dcf2578a-df22-471c-96c0-34ba361a10b5)|Remove the superhero costume and replace it with a red shirt. Adjust the lighting to highlight the man's face.|Add a superhero costume with a red and yellow emblem on the chest and a red cape. Adjust the lighting to emphasize the costume.| 29 | |![](https://github.com/user-attachments/assets/356fc12b-02ca-4f3c-bf65-3248ca5576eb)|![](https://github.com/user-attachments/assets/41dcdf1c-3ce6-49aa-a651-cfc981932689)|![](https://github.com/user-attachments/assets/d5facc03-99d0-4f15-93ce-9f1bc5397bfd)|![](https://github.com/user-attachments/assets/5df06650-8c2b-47f2-9bf4-d3e2510e224d)|Remove the elephant and replace it with a large rock.|Replace the large rock with an elephant.| 30 | 31 | ### 3. Style Transfer 32 | 33 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-StyleTransfer](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-StyleTransfer) 34 | * Dataset Construction Script: [./scripts/style_transfer.py](./scripts/style_transfer.py) 35 | 36 | |image_1|image_2|image_3|image_4|editing_instruction|reverse_editing_instruction| 37 | |-|-|-|-|-|-| 38 | |![](https://github.com/user-attachments/assets/f8974a51-fe70-4081-b0c8-60acc0c73f28)|![](https://github.com/user-attachments/assets/81e99ac5-8458-4f4a-ac4e-ae57e809f7f2)|![](https://github.com/user-attachments/assets/c8bb8062-3ad9-44b5-9ee0-a70be4dcbfb3)|![](https://github.com/user-attachments/assets/9edd818e-b6ae-4e6e-924b-cdb21d02a2ec)|transform the image into a cartoon style with vibrant colors and a confident expression.|transform the image into a realistic portrait with a serious expression and subtle lighting.| 39 | |![](https://github.com/user-attachments/assets/82253243-028b-43b4-9a37-796f17fa21af)|![](https://github.com/user-attachments/assets/84bf1c5b-55ae-4084-82ec-3a45c15b2030)|![](https://github.com/user-attachments/assets/b8908d78-ad41-42ce-af4b-c52bf92b2989)|![](https://github.com/user-attachments/assets/6593c9d6-7d5e-4cc0-b2ba-49e5fb38a229)|transform the image to have a brighter, more colorful palette and a clear blue sky.|transform the image to have a more muted color palette and an overcast sky.| 40 | |![](https://github.com/user-attachments/assets/705efc5f-504b-49ac-ba76-ae2f9edb56e4)|![](https://github.com/user-attachments/assets/d0e2e902-d97f-4ffd-91c3-56c96aa19f71)|![](https://github.com/user-attachments/assets/d8c0150f-2e41-480a-9873-dbb8419c8ac5)|![](https://github.com/user-attachments/assets/7be0991c-06e8-4560-8ff8-5fbd2f81b1a0)|transform the style of the image to an anime illustration, change the jacket to red, and add a cityscape background.|transform the style of the image to a digital painting, change the jacket to black, and remove the cityscape background.| 41 | 42 | ### 4. Face ID 43 | 44 | * Dataset: [https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-FaceID](https://www.modelscope.cn/datasets/DiffSynth-Studio/ImagePulse-FaceID) 45 | * Dataset Construction Script: [./scripts/faceid.py](./scripts/faceid.py) 46 | 47 | |image_face|image_1|image_2|editing_instruction|reverse_editing_instruction| 48 | |-|-|-|-|-| 49 | |![](https://github.com/user-attachments/assets/6b61c298-1938-405b-a680-c767bc8913e0)|![](https://github.com/user-attachments/assets/071f2743-3fc8-42d1-b17a-768835c3f9f4)|![](https://github.com/user-attachments/assets/05b45391-df5b-453c-9007-e94674056c5a)|Change the woman's white t-shirt to a white tank top.|Change the woman's white tank top to a white t-shirt.| 50 | |![](https://github.com/user-attachments/assets/203c9f5d-58fe-4e55-8ab8-5adbf14a1fbf)|![](https://github.com/user-attachments/assets/1022a76c-9ac2-43f0-bde2-d65322834251)|![](https://github.com/user-attachments/assets/09511643-8370-46ba-aee8-bcf4efd86d72)|Add a nighttime street scene with bokeh lights in the background.|Remove the nighttime street scene and bokeh lights from the background.| 51 | |![](https://github.com/user-attachments/assets/64d8d216-0966-4108-a378-1ad2312ad8eb)|![](https://github.com/user-attachments/assets/9d182b1e-8b4f-4f74-9f58-d14d7ad15474)|![](https://github.com/user-attachments/assets/c0f9a43e-dd2e-48c9-945c-643f11852808)|Change the background to a warmly lit room with lamps, change the suit to maroon, and add a sweater under the suit.|Change the background to a dimly lit room with red lighting, change the suit to black, and remove the sweater.| 52 | 53 | ## Running Dataset Generation 54 | 55 | ```bash 56 | pip install -r requirements.txt 57 | ``` 58 | 59 | ```bash 60 | python change_add_remove.py \ 61 | --target_dir "data/dataset" \ 62 | --cache_dir "data/cache" \ 63 | --dashscope_api_key "sk-xxxxxxxxxxxxxxxx" \ 64 | --qwenvl_model_id "qwen-vl-max" \ 65 | --modelscope_access_token "xxxxxxxxxxxxxxx" \ 66 | --modelscope_dataset_id "DiffSynth-Studio/ImagePulse-ChangeAddRemove" \ 67 | --num_data 1000000 \ 68 | --max_num_files_per_folder 1000 69 | ``` 70 | 71 | * `target_dir`: Path to store the dataset 72 | * `cache_dir`: Cache path 73 | * `dashscope_api_key`: [DashScope](https://DashScope.console.aliyun.com/#/home) API Key, required when calling DashScope API 74 | * `qwenvl_model_id`: ID of the Qwen-VL model on [DashScope](https://DashScope.console.aliyun.com/#/home), required when calling DashScope API 75 | * `modelscope_access_token`: Access token from [ModelScope](https://modelscope.cn/my/myaccesstoken), required when uploading datasets to ModelScope 76 | * `modelscope_dataset_id`: Dataset ID on [ModelScope](https://modelscope.cn), required when uploading datasets to ModelScope 77 | * `num_data`: Total number of data samples 78 | * `max_num_files_per_folder`: Number of files per packaged folder 79 | 80 | ## Acknowledgements 81 | 82 | * [DiffSynth-Studio](https://github.com/modelscope/DiffSynth-Studio): Provided Diffusion model inference support for this project 83 | * [ModelScope](https://modelscope.cn): Provided storage and download support for models and datasets in this project 84 | * [DashScope](https://DashScope.console.aliyun.com/#/home): Provided inference API support for large language models in this project 85 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /scripts/faceid.py: -------------------------------------------------------------------------------- 1 | from pulse.processor.flux_t2i import FLUXT2I 2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask 3 | from pulse.processor.general import ListSampler, TextFormater, ListPacker, ImageResizer 4 | from pulse.processor.image_cache import ImageCache 5 | from pulse.processor.face import FaceDataSelector 6 | from pulse.dataset.dataset import ImageDatasetStorage, ImageDataset 7 | from pulse.pipeline import DataProcessUnit, DataPipeline 8 | from pulse.dataset.diffusiondb import DiffusionDB 9 | from diffsynth import ControlNetConfigUnit, download_models 10 | from modelscope import dataset_snapshot_download 11 | from tqdm import tqdm 12 | import argparse, os, io 13 | import pandas as pd 14 | from PIL import Image 15 | 16 | 17 | qwen_prompt_1 = """ 18 | Here are some image descriptions. Please select those (`descriptions`) that describe a single person and identify the gender in each image description. 19 | 20 | 1. %s 21 | 2. %s 22 | 3. %s 23 | 4. %s 24 | 5. %s 25 | 6. %s 26 | 7. %s 27 | 8. %s 28 | 9. %s 29 | 10. %s 30 | 31 | Next, identify the gender of the person in the image (`gender_in_image`). 32 | 33 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 34 | { 35 | "descriptions": [ 36 | { 37 | "description": "a girl holding an apple", 38 | "gender": "female", 39 | }, 40 | { 41 | "description": "a man is reading a book", 42 | "gender": "male", 43 | }, 44 | ... 45 | ] 46 | "gender_in_image": "male", 47 | } 48 | """ 49 | qwen_prompt_2 = """ 50 | Here are two images of the same person, denoted as image_1 and image_2 51 | 52 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption. 53 | 54 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions. 55 | 56 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2. 57 | 58 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 59 | { 60 | "image_1_caption": "...", 61 | "image_2_caption": "...", 62 | "editing_instruction": "...", 63 | "reverse_editing_instruction": "...", 64 | "artifacts_in_image_1": ..., 65 | "artifacts_in_image_2": ... 66 | } 67 | 68 | Here are some examples: 69 | { 70 | "image_1_caption": "a girl holding a basketball", 71 | "image_2_caption": "a girl holding a Teddy bear doll", 72 | "editing_instruction": "Change the basketball to a teddy bear.", 73 | "reverse_editing_instruction": "Change the teddy bear to a basketball.", 74 | "artifacts_in_image_1": false, 75 | "artifacts_in_image_2": false 76 | } 77 | 78 | { 79 | "image_1_caption": "a man is walking on the street", 80 | "image_2_caption": "a man is sitting on a chair", 81 | "editing_instruction": "Let the man sit down.", 82 | "reverse_editing_instruction": "Let the man walk.", 83 | "artifacts_in_image_1": false, 84 | "artifacts_in_image_2": true 85 | } 86 | """ 87 | 88 | 89 | def parse_args(): 90 | parser = argparse.ArgumentParser(description="Dataset generation script: FaceID.") 91 | parser.add_argument( 92 | "--target_dir", 93 | type=str, 94 | default="data/dataset", 95 | required=True, 96 | help="Path to save dataset.", 97 | ) 98 | parser.add_argument( 99 | "--cache_dir", 100 | type=str, 101 | default="data/cache", 102 | help="Path to save cache files.", 103 | ) 104 | parser.add_argument( 105 | "--dashscope_api_key", 106 | type=str, 107 | default="", 108 | help="Dashscope api key.", 109 | ) 110 | parser.add_argument( 111 | "--qwenvl_model_id", 112 | type=str, 113 | default="qwen-vl-max-0809", 114 | help="QwenVL model id.", 115 | ) 116 | parser.add_argument( 117 | "--modelscope_access_token", 118 | type=str, 119 | default=None, 120 | help="Modelscope access token", 121 | ) 122 | parser.add_argument( 123 | "--modelscope_dataset_id", 124 | type=str, 125 | default=None, 126 | help="Modelscope Dataset ID", 127 | ) 128 | parser.add_argument( 129 | "--num_data", 130 | type=int, 131 | default=100000, 132 | help="Number of data samples", 133 | ) 134 | parser.add_argument( 135 | "--max_num_files_per_folder", 136 | type=int, 137 | default=5000, 138 | help="Max number of files per folder", 139 | ) 140 | args = parser.parse_args() 141 | return args 142 | 143 | 144 | def initialize(args): 145 | dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data") 146 | dataset_snapshot_download("AI-ModelScope/celeb-a-hq_training_untransformed_faces", allow_file_pattern=["*.parquet"], cache_dir="./data") 147 | download_models(["FLUX.1-dev", "InfiniteYou"]) 148 | 149 | for file_name in os.listdir("data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data"): 150 | if file_name.endswith(".parquet"): 151 | if not os.path.exists(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}_images"): 152 | data = pd.read_parquet(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}") 153 | os.makedirs(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}_images") 154 | for image_id, image_data in enumerate(tqdm(data["image"], desc=file_name)): 155 | image = image_data["bytes"] 156 | image = Image.open(io.BytesIO(image)) 157 | image.save(f"data/AI-ModelScope/celeb-a-hq_training_untransformed_faces/data/{file_name}_images/{image_id}.png") 158 | 159 | t2i = FLUXT2I( 160 | model_path=[ 161 | "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", 162 | "models/FLUX/FLUX.1-dev/text_encoder_2", 163 | "models/FLUX/FLUX.1-dev/ae.safetensors", 164 | "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", 165 | [ 166 | "models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00001-of-00002.safetensors", 167 | "models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00002-of-00002.safetensors" 168 | ], 169 | "models/InfiniteYou/image_proj_model.bin", 170 | ], 171 | device="cuda", 172 | model_kwargs={ 173 | "controlnet_config_units": [ 174 | ControlNetConfigUnit( 175 | processor_id="none", 176 | model_path=[ 177 | 'models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00001-of-00002.safetensors', 178 | 'models/InfiniteYou/InfuseNetModel/diffusion_pytorch_model-00002-of-00002.safetensors' 179 | ], 180 | scale=1.0 181 | ) 182 | ] 183 | } 184 | ) 185 | cache = ImageCache(cache_dir=args.cache_dir) 186 | 187 | dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data, multi_prompt=True, num_prompt=10) 188 | face_generator = ImageDataset("data/AI-ModelScope/celeb-a-hq_training_untransformed_faces") 189 | 190 | pipe = DataPipeline(units=[ 191 | DataProcessUnit( 192 | processor=TextFormater(template=qwen_prompt_1), 193 | input_params={"text_list": "prompt"}, 194 | output_params=("qwen_prompt_for_prompt_selection",) 195 | ), 196 | DataProcessUnit( 197 | processor=cache, 198 | input_params={"image": "image_face"}, 199 | output_params=("image_face_path",) 200 | ), 201 | DataProcessUnit( 202 | processor=QwenVLI2T( 203 | api_key=args.dashscope_api_key, 204 | model_id=args.qwenvl_model_id, 205 | prompt=qwen_prompt_1 206 | ), 207 | input_params={"images": "image_face_path", "prompt": "qwen_prompt_for_prompt_selection"}, 208 | output_params=("face_prompts_str",) 209 | ), 210 | DataProcessUnit( 211 | processor=QwenJsonParser(), 212 | input_params={"text": "face_prompts_str"}, 213 | output_params=("face_prompts",) 214 | ), 215 | DataProcessUnit( 216 | processor=FaceDataSelector(), 217 | input_params={"metadata": "face_prompts"}, 218 | output_params=("prompt_1", "prompt_2") 219 | ), 220 | 221 | DataProcessUnit( 222 | processor=t2i, 223 | input_params={"infinityou_id_image": "image_face", "prompt": "prompt_1"}, 224 | output_params=("image_1",), 225 | extra_input_kwargs={ 226 | "progress_bar_cmd": lambda x: x, 227 | "num_inference_steps": 50, 228 | "infinityou_guidance": 1.0, 229 | } 230 | ), 231 | DataProcessUnit( 232 | processor=cache, 233 | input_params={"image": "image_1"}, 234 | output_params=("image_1_path",) 235 | ), 236 | 237 | DataProcessUnit( 238 | processor=t2i, 239 | input_params={"infinityou_id_image": "image_face", "prompt": "prompt_2"}, 240 | output_params=("image_2",), 241 | extra_input_kwargs={ 242 | "progress_bar_cmd": lambda x: x, 243 | "num_inference_steps": 50, 244 | "infinityou_guidance": 1.0, 245 | } 246 | ), 247 | DataProcessUnit( 248 | processor=cache, 249 | input_params={"image": "image_2"}, 250 | output_params=("image_2_path",) 251 | ), 252 | 253 | DataProcessUnit( 254 | processor=ListPacker(), 255 | input_params={"image_1_path": "image_1_path", "image_2_path": "image_2_path"}, 256 | output_params=("image_list",) 257 | ), 258 | 259 | DataProcessUnit( 260 | processor=QwenVLI2T( 261 | api_key=args.dashscope_api_key, 262 | model_id=args.qwenvl_model_id, 263 | prompt=qwen_prompt_2 264 | ), 265 | input_params={"images": "image_list"}, 266 | output_params=("generated_instructions",) 267 | ), 268 | DataProcessUnit( 269 | processor=QwenJsonParser(), 270 | input_params={"text": "generated_instructions"}, 271 | parse_output_dict=True, 272 | ), 273 | DataProcessUnit( 274 | processor=ImageDatasetStorage( 275 | target_dir=args.target_dir, 276 | image_keys=("image_face", "image_1", "image_2"), 277 | metadata_keys=( 278 | "editing_instruction", "reverse_editing_instruction", "prompt_1", "prompt_2", "image_1_caption", "image_2_caption", 279 | "artifacts_in_image_1", "artifacts_in_image_2" 280 | ), 281 | modelscope_access_token=args.modelscope_access_token, 282 | modelscope_dataset_id=args.modelscope_dataset_id, 283 | max_num_files_per_folder=args.max_num_files_per_folder, 284 | ), 285 | input_params={ 286 | "image_face": "image_face", "image_1": "image_1", "image_2": "image_2", 287 | "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction", 288 | "prompt_1": "prompt_1", "prompt_2": "prompt_2", "image_1_caption": "image_1_caption", "image_2_caption": "image_2_caption", 289 | "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_2": "artifacts_in_image_2", 290 | }, 291 | output_params=("metadata_path") 292 | ) 293 | ]) 294 | return dataset, pipe, face_generator 295 | 296 | 297 | if __name__ == "__main__": 298 | args = parse_args() 299 | dataset, pipe, face_generator = initialize(args) 300 | for data_id, data in enumerate(tqdm(dataset)): 301 | data["image_face"] = face_generator[0] 302 | pipe(data, ignore_errors=True) 303 | if (data_id + 1) % 100 == 0: 304 | pipe.report_log() 305 | -------------------------------------------------------------------------------- /scripts/zoomin_zoomout.py: -------------------------------------------------------------------------------- 1 | from pulse.processor.flux_t2i import FLUXT2I 2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask, QwenBbox2Square 3 | from pulse.processor.general import ListSampler, ListPacker, ImageCropper, ImageResizer 4 | from pulse.processor.image_cache import ImageCache 5 | from pulse.dataset.dataset import ImageDatasetStorage 6 | from pulse.pipeline import DataProcessUnit, DataPipeline 7 | from pulse.dataset.diffusiondb import DiffusionDB 8 | from diffsynth import ControlNetConfigUnit, download_models 9 | from modelscope import dataset_snapshot_download 10 | from tqdm import tqdm 11 | import argparse 12 | 13 | 14 | qwen_prompt_1 = """ 15 | Please use relative coordinates in range [0, 1000] to mark all the entities in the image and write the corresponding text descriptions for each bbox in English. 16 | 17 | The bbox [x1, y1, x2, y2] is a square slightly larger than the corresponding object. Please ensure that the square contains a complete composition of another image as much as possible. Do not let the bbox almost cover the entire image. 18 | 19 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 20 | [ 21 | { 22 | "bbox": [x1, y1, x2, y2], 23 | "description": "portrait of a man" 24 | }, 25 | { 26 | "bbox": [x1, y1, x2, y2], 27 | "description": "a hat" 28 | }, 29 | { 30 | "bbox": [x1, y1, x2, y2], 31 | "description": "a dog is running" 32 | }, 33 | ... 34 | ] 35 | """ 36 | qwen_prompt_2 = """ 37 | Please provide a comprehensive and detailed description of the following image, ensuring the inclusion of the following elements: 38 | 39 | - Main subjects and objects present in the image. 40 | - Key visual elements, including colors, shapes, textures that stand out. 41 | - Spatial relationships and composition, focusing on how elements are arranged and interact within the frame. 42 | - Notable background elements that contribute to the overall context or setting. 43 | 44 | Generate a caption according to the image so that another model can generate the image via the caption. Just return the string description, do not return anything else. 45 | """ 46 | qwen_prompt_3 = """ 47 | Here are two images, denoted as image_1 and image_2 48 | 49 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption. 50 | 51 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions. 52 | 53 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2. 54 | 55 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 56 | { 57 | "image_1_caption": "...", 58 | "image_2_caption": "...", 59 | "editing_instruction": "...", 60 | "reverse_editing_instruction": "...", 61 | "artifacts_in_image_1": ..., 62 | "artifacts_in_image_2": ... 63 | } 64 | 65 | Here are some examples: 66 | { 67 | "image_1_caption": "a girl holding a basketball", 68 | "image_2_caption": "a girl holding a Teddy bear doll", 69 | "editing_instruction": "Zoom in to view the basketball in the girl's hand.", 70 | "reverse_editing_instruction": "Zoom out to view the girl holding the basketball.", 71 | "artifacts_in_image_1": false, 72 | "artifacts_in_image_2": false 73 | } 74 | 75 | { 76 | "image_1_caption": "an apple on the desk", 77 | "image_2_caption": "an apple", 78 | "editing_instruction": "Crop the apple from the image.", 79 | "reverse_editing_instruction": "Expand the image so that the enlarged version shows an apple on a table.", 80 | "artifacts_in_image_1": false, 81 | "artifacts_in_image_2": true 82 | } 83 | """ 84 | 85 | def parse_args(): 86 | parser = argparse.ArgumentParser(description="Dataset generation script: Zoom in & Zoom out.") 87 | parser.add_argument( 88 | "--target_dir", 89 | type=str, 90 | default="data/dataset", 91 | required=True, 92 | help="Path to save dataset.", 93 | ) 94 | parser.add_argument( 95 | "--cache_dir", 96 | type=str, 97 | default="data/cache", 98 | help="Path to save cache files.", 99 | ) 100 | parser.add_argument( 101 | "--dashscope_api_key", 102 | type=str, 103 | default="", 104 | help="Dashscope api key.", 105 | ) 106 | parser.add_argument( 107 | "--qwenvl_model_id", 108 | type=str, 109 | default="qwen-vl-max-0809", 110 | help="QwenVL model id.", 111 | ) 112 | parser.add_argument( 113 | "--modelscope_access_token", 114 | type=str, 115 | default=None, 116 | help="Modelscope access token", 117 | ) 118 | parser.add_argument( 119 | "--modelscope_dataset_id", 120 | type=str, 121 | default=None, 122 | help="Modelscope Dataset ID", 123 | ) 124 | parser.add_argument( 125 | "--num_data", 126 | type=int, 127 | default=100000, 128 | help="Number of data samples", 129 | ) 130 | parser.add_argument( 131 | "--max_num_files_per_folder", 132 | type=int, 133 | default=5000, 134 | help="Max number of files per folder", 135 | ) 136 | args = parser.parse_args() 137 | return args 138 | 139 | 140 | def initialize(args): 141 | dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data") 142 | download_models(["FLUX.1-dev", "jasperai/Flux.1-dev-Controlnet-Upscaler"]) 143 | 144 | t2i = FLUXT2I( 145 | model_path=[ 146 | "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", 147 | "models/FLUX/FLUX.1-dev/text_encoder_2", 148 | "models/FLUX/FLUX.1-dev/ae.safetensors", 149 | "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", 150 | "models/ControlNet/jasperai/Flux.1-dev-Controlnet-Upscaler/diffusion_pytorch_model.safetensors" 151 | ], 152 | device="cuda", 153 | model_kwargs={ 154 | "controlnet_config_units": [ 155 | ControlNetConfigUnit( 156 | processor_id="tile", 157 | model_path="models/ControlNet/jasperai/Flux.1-dev-Controlnet-Upscaler/diffusion_pytorch_model.safetensors", 158 | scale=0.6 159 | ), 160 | ] 161 | } 162 | ) 163 | cache = ImageCache(cache_dir=args.cache_dir) 164 | 165 | dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data) 166 | 167 | pipe = DataPipeline(units=[ 168 | DataProcessUnit( 169 | processor=t2i, 170 | input_params={"prompt": "prompt"}, 171 | output_params=("image_1",), 172 | extra_input_kwargs={"progress_bar_cmd": lambda x: x} 173 | ), 174 | DataProcessUnit( 175 | processor=cache, 176 | input_params={"image": "image_1"}, 177 | output_params=("image_1_path",) 178 | ), 179 | DataProcessUnit( 180 | processor=QwenVLI2T( 181 | api_key=args.dashscope_api_key, 182 | model_id=args.qwenvl_model_id, 183 | prompt=qwen_prompt_1 184 | ), 185 | input_params={"images": "image_1_path"}, 186 | output_params=("grounding_results_str",) 187 | ), 188 | DataProcessUnit( 189 | processor=QwenJsonParser(), 190 | input_params={"text": "grounding_results_str"}, 191 | output_params=("grounding_results_list",) 192 | ), 193 | DataProcessUnit( 194 | processor=ListSampler(), 195 | input_params={"ls": "grounding_results_list"}, 196 | parse_output_dict=True, 197 | ), 198 | DataProcessUnit( 199 | processor=QwenBbox2Square(), 200 | input_params={"bbox": "bbox"}, 201 | parse_output_dict=True, 202 | ), 203 | DataProcessUnit( 204 | processor=QwenBbox2Mask(), 205 | input_params={"bbox": "square"}, 206 | output_params=("mask",) 207 | ), 208 | DataProcessUnit( 209 | processor=cache, 210 | input_params={"image": "mask"}, 211 | output_params=("mask_path",) 212 | ), 213 | DataProcessUnit( 214 | processor=ImageCropper(), 215 | input_params={"image": "image_1", "bbox": "square"}, 216 | output_params=("image_cropped",) 217 | ), 218 | DataProcessUnit( 219 | processor=cache, 220 | input_params={"image": "image_cropped"}, 221 | output_params=("image_cropped_path",) 222 | ), 223 | DataProcessUnit( 224 | processor=QwenVLI2T( 225 | api_key=args.dashscope_api_key, 226 | model_id=args.qwenvl_model_id, 227 | prompt=qwen_prompt_2 228 | ), 229 | input_params={"images": "image_cropped_path"}, 230 | output_params=("local_description",) 231 | ), 232 | DataProcessUnit( 233 | processor=ImageResizer(), 234 | input_params={"image": "image_cropped"}, 235 | output_params=("image_resized",) 236 | ), 237 | DataProcessUnit( 238 | processor=t2i, 239 | input_params={ 240 | "prompt": "local_description", 241 | "controlnet_image": "image_resized", 242 | "input_image": "image_resized" 243 | }, 244 | output_params=("image_2",), 245 | extra_input_kwargs={ 246 | "progress_bar_cmd": lambda x: x, 247 | "num_inference_steps": 30, 248 | "denoising_strength": 0.9 249 | } 250 | ), 251 | DataProcessUnit( 252 | processor=cache, 253 | input_params={"image": "image_2"}, 254 | output_params=("image_2_path",) 255 | ), 256 | DataProcessUnit( 257 | processor=ListPacker(), 258 | input_params={"image_1_path": "image_1_path", "image_2_path": "image_2_path"}, 259 | output_params=("image_list",) 260 | ), 261 | DataProcessUnit( 262 | processor=QwenVLI2T( 263 | api_key=args.dashscope_api_key, 264 | model_id=args.qwenvl_model_id, 265 | prompt=qwen_prompt_3 266 | ), 267 | input_params={"images": "image_list"}, 268 | output_params=("generated_instructions",) 269 | ), 270 | DataProcessUnit( 271 | processor=QwenJsonParser(), 272 | input_params={"text": "generated_instructions"}, 273 | parse_output_dict=True, 274 | ), 275 | DataProcessUnit( 276 | processor=ImageDatasetStorage( 277 | target_dir=args.target_dir, 278 | image_keys=("image_1", "image_2", "image_cropped", "mask"), 279 | metadata_keys=( 280 | "editing_instruction", "reverse_editing_instruction", "prompt", "local_description", "image_1_caption", "image_2_caption", 281 | "artifacts_in_image_1", "artifacts_in_image_2", "square", 282 | ), 283 | modelscope_access_token=args.modelscope_access_token, 284 | modelscope_dataset_id=args.modelscope_dataset_id, 285 | max_num_files_per_folder=args.max_num_files_per_folder, 286 | ), 287 | input_params={ 288 | "image_1": "image_1", "image_2": "image_2", "image_cropped": "image_cropped", "mask": "mask", 289 | "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction", 290 | "prompt": "prompt", "local_description": "local_description", "image_1_caption": "image_1_caption", "image_2_caption": "image_2_caption", 291 | "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_2": "artifacts_in_image_2", 292 | "square": "square" 293 | }, 294 | output_params=("metadata_path") 295 | ) 296 | ]) 297 | return dataset, pipe 298 | 299 | 300 | if __name__ == "__main__": 301 | args = parse_args() 302 | dataset, pipe = initialize(args) 303 | for data_id, data in enumerate(tqdm(dataset)): 304 | pipe(data, ignore_errors=True) 305 | if (data_id + 1) % 100 == 0: 306 | pipe.report_log() 307 | -------------------------------------------------------------------------------- /scripts/change_add_remove.py: -------------------------------------------------------------------------------- 1 | from pulse.processor.flux_t2i import FLUXT2I 2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask 3 | from pulse.processor.general import ListSampler, TextFormater, ListPacker 4 | from pulse.processor.preference import ImagePreferenceModel 5 | from pulse.processor.image_cache import ImageCache 6 | from pulse.dataset.dataset import ImageDatasetStorage 7 | from pulse.pipeline import DataProcessUnit, DataPipeline 8 | from pulse.dataset.diffusiondb import DiffusionDB 9 | from diffsynth import ControlNetConfigUnit, download_models 10 | from diffsynth.extensions.ImageQualityMetric import download_preference_model 11 | from modelscope import dataset_snapshot_download 12 | from tqdm import tqdm 13 | import argparse 14 | 15 | 16 | qwen_prompt_1 = """ 17 | Please use relative coordinates in range [0, 1000] to mark all the entities in the image and write the corresponding text descriptions for each bbox in English. 18 | 19 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 20 | [ 21 | { 22 | "bbox": [x1, y1, x2, y2], 23 | "description": "a dog is running" 24 | }, 25 | { 26 | "bbox": [x1, y1, x2, y2], 27 | "description": "a red car" 28 | }, 29 | { 30 | "bbox": [x1, y1, x2, y2], 31 | "description": "black hair" 32 | }, 33 | ... 34 | ] 35 | """ 36 | qwen_prompt_2 = """ 37 | There is an image, and the full text description of this image is "%s" The area in the image (%s) indicates "%s". 38 | 39 | Now I need to modify this part to reflect other content in the image. Please write a piece of randomly modified text (local_description) describing the localized image content after the modification; as well as another piece of text (global_description) describing the overall image content after the modification." 40 | 41 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 42 | { 43 | "original_local_description": "...", 44 | "original_global_description": "...", 45 | "local_description": "...", 46 | "global_description": "..." 47 | } 48 | 49 | Here are some examples: 50 | { 51 | "original_local_description": "a basketball", 52 | "original_global_description": "a girl holding a basketball", 53 | "local_description": "a Teddy bear doll", 54 | "global_description": "a girl holding a Teddy bear doll" 55 | } 56 | 57 | { 58 | "original_local_description": "an apple", 59 | "original_global_description": "an apple on the desk", 60 | "local_description": "a banana", 61 | "global_description": "a banana on the desk" 62 | } 63 | """ 64 | qwen_prompt_3 = """ 65 | Here are two images, denoted as image_1 and image_2 66 | 67 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption. 68 | 69 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions. 70 | 71 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2. 72 | 73 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 74 | { 75 | "image_1_caption": "...", 76 | "image_2_caption": "...", 77 | "editing_instruction": "...", 78 | "reverse_editing_instruction": "...", 79 | "artifacts_in_image_1": ..., 80 | "artifacts_in_image_2": ... 81 | } 82 | 83 | Here are some examples: 84 | { 85 | "image_1_caption": "a girl holding a basketball", 86 | "image_2_caption": "a girl holding a Teddy bear doll", 87 | "editing_instruction": "Change the basketball to a teddy bear.", 88 | "reverse_editing_instruction": "Change the teddy bear to a basketball.", 89 | "artifacts_in_image_1": false, 90 | "artifacts_in_image_2": false 91 | } 92 | 93 | { 94 | "image_1_caption": "an apple on the desk", 95 | "image_2_caption": "a desk", 96 | "editing_instruction": "Remove the apple.", 97 | "reverse_editing_instruction": "Add an apple on the desk.", 98 | "artifacts_in_image_1": false, 99 | "artifacts_in_image_2": true 100 | } 101 | """ 102 | 103 | 104 | def parse_args(): 105 | parser = argparse.ArgumentParser(description="Dataset generation script: Change, add & remove.") 106 | parser.add_argument( 107 | "--target_dir", 108 | type=str, 109 | default="data/dataset", 110 | required=True, 111 | help="Path to save dataset.", 112 | ) 113 | parser.add_argument( 114 | "--cache_dir", 115 | type=str, 116 | default="data/cache", 117 | help="Path to save cache files.", 118 | ) 119 | parser.add_argument( 120 | "--dashscope_api_key", 121 | type=str, 122 | default="", 123 | help="Dashscope api key.", 124 | ) 125 | parser.add_argument( 126 | "--qwenvl_model_id", 127 | type=str, 128 | default="qwen-vl-max-0809", 129 | help="QwenVL model id.", 130 | ) 131 | parser.add_argument( 132 | "--modelscope_access_token", 133 | type=str, 134 | default=None, 135 | help="Modelscope access token", 136 | ) 137 | parser.add_argument( 138 | "--modelscope_dataset_id", 139 | type=str, 140 | default=None, 141 | help="Modelscope Dataset ID", 142 | ) 143 | parser.add_argument( 144 | "--num_data", 145 | type=int, 146 | default=100000, 147 | help="Number of data samples", 148 | ) 149 | parser.add_argument( 150 | "--max_num_files_per_folder", 151 | type=int, 152 | default=5000, 153 | help="Max number of files per folder", 154 | ) 155 | args = parser.parse_args() 156 | return args 157 | 158 | 159 | def initialize(args): 160 | dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data") 161 | download_models(["FLUX.1-dev", "alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta"]) 162 | download_preference_model("MPS", cache_dir="./models") 163 | 164 | t2i = FLUXT2I( 165 | model_path=[ 166 | "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", 167 | "models/FLUX/FLUX.1-dev/text_encoder_2", 168 | "models/FLUX/FLUX.1-dev/ae.safetensors", 169 | "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", 170 | "models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta/diffusion_pytorch_model.safetensors" 171 | ], 172 | device="cuda", 173 | model_kwargs={ 174 | "controlnet_config_units": [ 175 | ControlNetConfigUnit( 176 | processor_id="inpaint", 177 | model_path="models/ControlNet/alimama-creative/FLUX.1-dev-Controlnet-Inpainting-Beta/diffusion_pytorch_model.safetensors", 178 | scale=0.9 179 | ), 180 | ] 181 | } 182 | ) 183 | preference_model = ImagePreferenceModel("MPS", cache_dir="./models", device="cuda") 184 | cache = ImageCache(cache_dir=args.cache_dir) 185 | 186 | dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data) 187 | 188 | pipe = DataPipeline(units=[ 189 | DataProcessUnit( 190 | processor=t2i, 191 | input_params={"prompt": "prompt"}, 192 | output_params=("image_1",), 193 | extra_input_kwargs={"progress_bar_cmd": lambda x: x} 194 | ), 195 | DataProcessUnit( 196 | processor=cache, 197 | input_params={"image": "image_1"}, 198 | output_params=("image_1_path",) 199 | ), 200 | DataProcessUnit( 201 | processor=QwenVLI2T( 202 | api_key=args.dashscope_api_key, 203 | model_id=args.qwenvl_model_id, 204 | prompt=qwen_prompt_1 205 | ), 206 | input_params={"images": "image_1_path"}, 207 | output_params=("grounding_results_str",) 208 | ), 209 | DataProcessUnit( 210 | processor=QwenJsonParser(), 211 | input_params={"text": "grounding_results_str"}, 212 | output_params=("grounding_results_list",) 213 | ), 214 | DataProcessUnit( 215 | processor=ListSampler(), 216 | input_params={"ls": "grounding_results_list"}, 217 | parse_output_dict=True, 218 | ), 219 | DataProcessUnit( 220 | processor=QwenBbox2Mask(), 221 | input_params={"bbox": "bbox"}, 222 | output_params=("mask",) 223 | ), 224 | DataProcessUnit( 225 | processor=cache, 226 | input_params={"image": "mask"}, 227 | output_params=("mask_path",) 228 | ), 229 | DataProcessUnit( 230 | processor=TextFormater(template=qwen_prompt_2), 231 | input_params={"prompt": "prompt", "bbox": "bbox", "description": "description"}, 232 | output_params=("editing_prompt_for_qwen",) 233 | ), 234 | DataProcessUnit( 235 | processor=QwenVLI2T( 236 | api_key=args.dashscope_api_key, 237 | model_id=args.qwenvl_model_id, 238 | ), 239 | input_params={"images": "image_1_path", "prompt": "editing_prompt_for_qwen"}, 240 | output_params=("editing_str",) 241 | ), 242 | DataProcessUnit( 243 | processor=QwenJsonParser(), 244 | input_params={"text": "editing_str"}, 245 | parse_output_dict=True, 246 | ), 247 | DataProcessUnit( 248 | processor=t2i, 249 | input_params={ 250 | "prompt": "local_description", 251 | "controlnet_image": "image_1", 252 | "controlnet_inpaint_mask": "mask" 253 | }, 254 | output_params=("image_2",), 255 | extra_input_kwargs={"progress_bar_cmd": lambda x: x, "num_inference_steps": 50} 256 | ), 257 | DataProcessUnit( 258 | processor=cache, 259 | input_params={"image": "image_2"}, 260 | output_params=("image_2_path",) 261 | ), 262 | DataProcessUnit( 263 | processor=ListPacker(), 264 | input_params={"image_1_path": "image_1_path", "image_2_path": "image_2_path"}, 265 | output_params=("image_list",) 266 | ), 267 | DataProcessUnit( 268 | processor=QwenVLI2T( 269 | api_key=args.dashscope_api_key, 270 | model_id=args.qwenvl_model_id, 271 | prompt=qwen_prompt_3, 272 | ), 273 | input_params={"images": "image_list"}, 274 | output_params=("generated_instructions",) 275 | ), 276 | DataProcessUnit( 277 | processor=QwenJsonParser(), 278 | input_params={"text": "generated_instructions"}, 279 | parse_output_dict=True, 280 | ), 281 | DataProcessUnit( 282 | processor=preference_model, 283 | input_params={"image": "image_1", "prompt": "image_1_caption"}, 284 | output_params=("image_1_preference_score",) 285 | ), 286 | DataProcessUnit( 287 | processor=preference_model, 288 | input_params={"image": "image_2", "prompt": "image_2_caption"}, 289 | output_params=("image_2_preference_score",) 290 | ), 291 | DataProcessUnit( 292 | processor=ImageDatasetStorage( 293 | target_dir=args.target_dir, 294 | image_keys=("image_1", "image_2", "mask"), 295 | metadata_keys=( 296 | "editing_instruction", "reverse_editing_instruction", "prompt", "image_1_caption", "image_2_caption", 297 | "image_1_preference_score", "image_2_preference_score", "artifacts_in_image_1", "artifacts_in_image_2" 298 | ), 299 | modelscope_access_token=args.modelscope_access_token, 300 | modelscope_dataset_id=args.modelscope_dataset_id, 301 | max_num_files_per_folder=args.max_num_files_per_folder, 302 | ), 303 | input_params={ 304 | "image_1": "image_1", "image_2": "image_2", "mask": "mask", 305 | "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction", 306 | "prompt": "prompt", "image_1_caption": "image_1_caption", "image_2_caption": "image_2_caption", 307 | "image_1_preference_score": "image_1_preference_score", "image_2_preference_score": "image_2_preference_score", 308 | "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_2": "artifacts_in_image_2", 309 | }, 310 | output_params=("metadata_path"), 311 | ) 312 | ]) 313 | return dataset, pipe 314 | 315 | 316 | if __name__ == "__main__": 317 | args = parse_args() 318 | dataset, pipe = initialize(args) 319 | for data_id, data in enumerate(tqdm(dataset)): 320 | pipe(data, ignore_errors=True) 321 | if (data_id + 1) % 100 == 0: 322 | pipe.report_log() 323 | -------------------------------------------------------------------------------- /scripts/style_transfer.py: -------------------------------------------------------------------------------- 1 | from pulse.processor.flux_t2i import FLUXT2I 2 | from pulse.processor.qwenvl_i2t import QwenVLI2T, QwenJsonParser, QwenBbox2Mask, QwenBbox2Square 3 | from pulse.processor.general import ListSampler, ListPacker, ImageCropper, ImageResizer, TextFormater 4 | from pulse.processor.image_cache import ImageCache 5 | from pulse.processor.preference import ImagePreferenceModel 6 | from pulse.processor.style import RandomPromptStyler 7 | from pulse.processor.sdxl_t2i import SDXLT2I 8 | from pulse.dataset.dataset import ImageDatasetStorage 9 | from pulse.pipeline import DataProcessUnit, DataPipeline 10 | from pulse.dataset.diffusiondb import DiffusionDB 11 | from diffsynth import ControlNetConfigUnit, download_models 12 | from diffsynth.extensions.ImageQualityMetric import download_preference_model 13 | from modelscope import dataset_snapshot_download 14 | from tqdm import tqdm 15 | import argparse 16 | 17 | 18 | qwen_prompt_1 = """ 19 | Please describe the content of the image in concise text, focusing only on the elements present in the image without discussing its style. 20 | 21 | Here are some examples: 22 | * A dog is running 23 | * Red and blue flowers in a garden 24 | * An apple and a cup on the desk 25 | 26 | Just return the string description, do not return anything else. 27 | """ 28 | qwen_prompt_2 = """ 29 | Here are two images, denoted as image_1 and image_2 30 | 31 | Generate a caption (image_1_caption and image_2_caption) according to each image so that another image generation model can generate the image via the caption. 32 | 33 | Write image editing instructions (editing_instruction) to edit from image_1 to image_2. Write another image editing instructions (reverse_editing_instruction) to edit from image 2 to image 1. Do not say "change back" or "transform back" in the instructions. Please ensure that the editing instructions emphasize the style of the image. 34 | 35 | Determine whether there are artifacts (e.g., distorted limbs, extra fingers, abnormal composition) in Image 1 and Image 2, denoted by artifacts_in_image_1 and artifacts_in_image_2. 36 | 37 | Please provide the results in JSON format as follows, which can be directly loads by json.loads() in Python: 38 | { 39 | "image_1_caption": "...", 40 | "image_2_caption": "...", 41 | "editing_instruction": "...", 42 | "reverse_editing_instruction": "...", 43 | "artifacts_in_image_1": ..., 44 | "artifacts_in_image_2": ... 45 | } 46 | 47 | Here are some examples: 48 | { 49 | "image_1_caption": "a photo of a girl holding a basketball.", 50 | "image_2_caption": "an oil painting of a teenage girl holding a basketball.", 51 | "editing_instruction": "transform the photo into an oil painting style.", 52 | "reverse_editing_instruction": "generate a realistic scene based on the content of this oil painting.", 53 | "artifacts_in_image_1": false, 54 | "artifacts_in_image_2": false 55 | } 56 | 57 | { 58 | "image_1_caption": "flat illustration, anime style, featuring an orange kitten.", 59 | "image_2_caption": "highly outlined anime illustration, featuring an orange kitten.", 60 | "editing_instruction": "transform the style of the image to enhance the line definition.", 61 | "reverse_editing_instruction": "transform the style of the image to make it appear more flat.", 62 | "artifacts_in_image_1": false, 63 | "artifacts_in_image_2": true 64 | } 65 | """ 66 | 67 | def parse_args(): 68 | parser = argparse.ArgumentParser(description="Dataset generation script: Style Transfer.") 69 | parser.add_argument( 70 | "--target_dir", 71 | type=str, 72 | default="data/dataset", 73 | required=True, 74 | help="Path to save dataset.", 75 | ) 76 | parser.add_argument( 77 | "--cache_dir", 78 | type=str, 79 | default="data/cache", 80 | help="Path to save cache files.", 81 | ) 82 | parser.add_argument( 83 | "--dashscope_api_key", 84 | type=str, 85 | default="", 86 | help="Dashscope api key.", 87 | ) 88 | parser.add_argument( 89 | "--qwenvl_model_id", 90 | type=str, 91 | default="qwen-vl-max-0809", 92 | help="QwenVL model id.", 93 | ) 94 | parser.add_argument( 95 | "--modelscope_access_token", 96 | type=str, 97 | default=None, 98 | help="Modelscope access token", 99 | ) 100 | parser.add_argument( 101 | "--modelscope_dataset_id", 102 | type=str, 103 | default=None, 104 | help="Modelscope Dataset ID", 105 | ) 106 | parser.add_argument( 107 | "--num_data", 108 | type=int, 109 | default=100000, 110 | help="Number of data samples", 111 | ) 112 | parser.add_argument( 113 | "--max_num_files_per_folder", 114 | type=int, 115 | default=5000, 116 | help="Max number of files per folder", 117 | ) 118 | args = parser.parse_args() 119 | return args 120 | 121 | 122 | def initialize(args): 123 | dataset_snapshot_download("AI-ModelScope/diffusiondb", allow_file_pattern=["metadata-large.parquet"], cache_dir="./data") 124 | download_models(["FLUX.1-dev", "InstantX/FLUX.1-dev-IP-Adapter", "StableDiffusionXL_v1", "IP-Adapter-SDXL", "ControlNet_union_sdxl_promax"]) 125 | download_preference_model("MPS", cache_dir="./models") 126 | 127 | t2i = FLUXT2I( 128 | model_path=[ 129 | "models/FLUX/FLUX.1-dev/text_encoder/model.safetensors", 130 | "models/FLUX/FLUX.1-dev/text_encoder_2", 131 | "models/FLUX/FLUX.1-dev/ae.safetensors", 132 | "models/FLUX/FLUX.1-dev/flux1-dev.safetensors", 133 | "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/ip-adapter.bin", 134 | "models/IpAdapter/InstantX/FLUX.1-dev-IP-Adapter/image_encoder", 135 | ], 136 | device="cuda", 137 | ) 138 | instant_style = SDXLT2I( 139 | model_path=[ 140 | "models/stable_diffusion_xl/sd_xl_base_1.0.safetensors", 141 | "models/IpAdapter/stable_diffusion_xl/image_encoder/model.safetensors", 142 | "models/IpAdapter/stable_diffusion_xl/ip-adapter_sdxl.bin", 143 | "models/ControlNet/controlnet_union/diffusion_pytorch_model_promax.safetensors", 144 | ], 145 | device="cuda", 146 | model_kwargs={ 147 | "controlnet_config_units": [ 148 | ControlNetConfigUnit( 149 | processor_id="canny", 150 | model_path="models/ControlNet/controlnet_union/diffusion_pytorch_model_promax.safetensors", 151 | scale=0.6 152 | ) 153 | ] 154 | }, 155 | pipeline_kwargs={ 156 | "negative_prompt": "text, watermark, lowres, low quality, worst quality, deformed, glitch, low contrast, noisy, saturation, blurry", 157 | "cfg_scale": 5, 158 | "height": 1024, 159 | "width": 1024, 160 | "num_inference_steps": 50, 161 | "ipadapter_use_instant_style": True 162 | } 163 | ) 164 | preference_model = ImagePreferenceModel("MPS", cache_dir="./models", device="cuda") 165 | cache = ImageCache(cache_dir=args.cache_dir) 166 | 167 | dataset = DiffusionDB("data/AI-ModelScope/diffusiondb/metadata-large.parquet", shuffle=True, num_data=args.num_data) 168 | 169 | pipe = DataPipeline(units=[ 170 | DataProcessUnit( 171 | processor=t2i, 172 | input_params={"prompt": "prompt"}, 173 | output_params=("image_1",), 174 | extra_input_kwargs={"progress_bar_cmd": lambda x: x} 175 | ), 176 | DataProcessUnit( 177 | processor=cache, 178 | input_params={"image": "image_1"}, 179 | output_params=("image_1_path",) 180 | ), 181 | DataProcessUnit( 182 | processor=QwenVLI2T( 183 | api_key=args.dashscope_api_key, 184 | model_id=args.qwenvl_model_id, 185 | prompt=qwen_prompt_1 186 | ), 187 | input_params={"images": "image_1_path"}, 188 | output_params=("image_content_description",) 189 | ), 190 | DataProcessUnit( 191 | processor=RandomPromptStyler(), 192 | input_params={"prompt": "image_content_description"}, 193 | output_params=("image_content_style_description",) 194 | ), 195 | DataProcessUnit( 196 | processor=t2i, 197 | input_params={"prompt": "image_content_style_description"}, 198 | output_params=("image_2",), 199 | extra_input_kwargs={"progress_bar_cmd": lambda x: x} 200 | ), 201 | DataProcessUnit( 202 | processor=cache, 203 | input_params={"image": "image_2"}, 204 | output_params=("image_2_path",) 205 | ), 206 | DataProcessUnit( 207 | processor=ListPacker(), 208 | input_params={"image": "image_2"}, 209 | output_params=("ipadapter_images",) 210 | ), 211 | DataProcessUnit( 212 | processor=instant_style, 213 | input_params={ 214 | "prompt": "image_content_style_description", 215 | "controlnet_image": "image_1", 216 | "ipadapter_images": "ipadapter_images", 217 | }, 218 | output_params=("image_3",), 219 | extra_input_kwargs={"progress_bar_cmd": lambda x: x} 220 | ), 221 | DataProcessUnit( 222 | processor=cache, 223 | input_params={"image": "image_3"}, 224 | output_params=("image_3_path",) 225 | ), 226 | DataProcessUnit( 227 | processor=t2i, 228 | input_params={ 229 | "prompt": "image_content_style_description", 230 | "input_image": "image_3", 231 | "ipadapter_images": "ipadapter_images", 232 | }, 233 | output_params=("image_4",), 234 | extra_input_kwargs={ 235 | "progress_bar_cmd": lambda x: x, 236 | "denoising_strength": 0.6, 237 | "num_inference_steps": 50 238 | } 239 | ), 240 | DataProcessUnit( 241 | processor=cache, 242 | input_params={"image": "image_4"}, 243 | output_params=("image_4_path",) 244 | ), 245 | DataProcessUnit( 246 | processor=ListPacker(), 247 | input_params={"image_1_path": "image_1_path", "image_4_path": "image_4_path"}, 248 | output_params=("image_list",) 249 | ), 250 | DataProcessUnit( 251 | processor=QwenVLI2T( 252 | api_key=args.dashscope_api_key, 253 | model_id=args.qwenvl_model_id, 254 | prompt=qwen_prompt_2 255 | ), 256 | input_params={"images": "image_list"}, 257 | output_params=("generated_instructions",) 258 | ), 259 | DataProcessUnit( 260 | processor=QwenJsonParser(), 261 | input_params={"text": "generated_instructions"}, 262 | parse_output_dict=True, 263 | ), 264 | DataProcessUnit( 265 | processor=preference_model, 266 | input_params={"image": "image_1", "prompt": "image_1_caption"}, 267 | output_params=("image_1_preference_score",) 268 | ), 269 | DataProcessUnit( 270 | processor=preference_model, 271 | input_params={"image": "image_4", "prompt": "image_2_caption"}, 272 | output_params=("image_4_preference_score",) 273 | ), 274 | DataProcessUnit( 275 | processor=ImageDatasetStorage( 276 | target_dir=args.target_dir, 277 | image_keys=("image_1", "image_2", "image_3", "image_4"), 278 | metadata_keys=( 279 | "editing_instruction", "reverse_editing_instruction", 280 | "prompt", "image_content_description", "image_content_style_description", 281 | "image_1_caption", "image_4_caption", "artifacts_in_image_1", "artifacts_in_image_4", 282 | "image_1_preference_score", "image_4_preference_score" 283 | ), 284 | modelscope_access_token=args.modelscope_access_token, 285 | modelscope_dataset_id=args.modelscope_dataset_id, 286 | max_num_files_per_folder=args.max_num_files_per_folder, 287 | ), 288 | input_params={ 289 | "image_1": "image_1", "image_2": "image_2", "image_3": "image_3", "image_4": "image_4", 290 | "editing_instruction": "editing_instruction", "reverse_editing_instruction": "reverse_editing_instruction", 291 | "prompt": "prompt", "image_content_description": "image_content_description", "image_content_style_description": "image_content_style_description", 292 | "image_1_caption": "image_1_caption", "image_4_caption": "image_2_caption", 293 | "artifacts_in_image_1": "artifacts_in_image_1", "artifacts_in_image_4": "artifacts_in_image_2", 294 | "image_1_preference_score": "image_1_preference_score", "image_4_preference_score": "image_4_preference_score" 295 | }, 296 | output_params=("metadata_path") 297 | ) 298 | ]) 299 | return dataset, pipe 300 | 301 | 302 | if __name__ == "__main__": 303 | args = parse_args() 304 | dataset, pipe = initialize(args) 305 | for data_id, data in enumerate(tqdm(dataset)): 306 | pipe(data, ignore_errors=True) 307 | if (data_id + 1) % 100 == 0: 308 | pipe.report_log() 309 | -------------------------------------------------------------------------------- /pulse/processor/style.py: -------------------------------------------------------------------------------- 1 | import torch 2 | 3 | 4 | class RandomPromptStyler: 5 | def __init__(self): 6 | self.styles = [ 7 | { 8 | "name": "cinematic-diva", 9 | "name_zh": "电影歌星画风", 10 | "template": "UHD, 8K, ultra detailed, a cinematic photograph of {prompt}, beautiful lighting, great composition" 11 | }, 12 | { 13 | "name": "Abstract Expressionism", 14 | "name_zh": "抽象表现主义", 15 | "template": "Abstract Expressionism Art, {prompt}, High contrast, minimalistic, colorful, stark, dramatic, expressionism" 16 | }, 17 | { 18 | "name": "Academia", 19 | "name_zh": "学院风", 20 | "template": "Academia, {prompt}, preppy Ivy League style, stark, dramatic, chic boarding school, academia" 21 | }, 22 | { 23 | "name": "Action Figure", 24 | "name_zh": "动作人偶", 25 | "template": "Action Figure, {prompt}, plastic collectable action figure, collectable toy action figure" 26 | }, 27 | { 28 | "name": "Adorable 3D Character", 29 | "name_zh": "可爱的3D角色", 30 | "template": "Adorable 3D Character, {prompt}, 3D render, adorable character, 3D art" 31 | }, 32 | { 33 | "name": "Adorable Kawaii", 34 | "name_zh": "可爱卡哇伊风格", 35 | "template": "Adorable Kawaii, {prompt}, pretty, cute, adorable, kawaii" 36 | }, 37 | { 38 | "name": "Art Deco", 39 | "name_zh": "艺术装饰风格", 40 | "template": "Art Deco, {prompt}, sleek, geometric forms, art deco style" 41 | }, 42 | { 43 | "name": "Art Nouveau", 44 | "name_zh": "新艺术风格", 45 | "template": "Art Nouveau, beautiful art, {prompt}, sleek, organic forms, long, sinuous, art nouveau style" 46 | }, 47 | { 48 | "name": "Astral Aura", 49 | "name_zh": "星体光环", 50 | "template": "Astral Aura, {prompt}, astral, colorful aura, vibrant energy" 51 | }, 52 | { 53 | "name": "Avant-garde", 54 | "name_zh": "先锋派", 55 | "template": "Avant-garde, {prompt}, unusual, experimental, avant-garde art" 56 | }, 57 | { 58 | "name": "Baroque", 59 | "name_zh": "巴洛克风格", 60 | "template": "Baroque, {prompt}, dramatic, exuberant, grandeur, baroque art" 61 | }, 62 | { 63 | "name": "Bauhaus-Style Poster", 64 | "name_zh": "包豪斯风格海报", 65 | "template": "Bauhaus-Style Poster, {prompt}, simple geometric shapes, clean lines, primary colors, Bauhaus-Style Poster" 66 | }, 67 | { 68 | "name": "Blueprint Schematic Drawing", 69 | "name_zh": "蓝图原理图绘制", 70 | "template": "Blueprint Schematic Drawing, {prompt}, technical drawing, blueprint, schematic" 71 | }, 72 | { 73 | "name": "Caricature", 74 | "name_zh": "漫画", 75 | "template": "Caricature, {prompt}, exaggerated, comical, caricature" 76 | }, 77 | { 78 | "name": "Cel Shaded Art", 79 | "name_zh": "单色阴影艺术", 80 | "template": "Cel Shaded Art, {prompt}, 2D, flat color, toon shading, cel shaded style" 81 | }, 82 | { 83 | "name": "Character Design Sheet", 84 | "name_zh": "角色设计图", 85 | "template": "Character Design Sheet, {prompt}, character reference sheet, character turn around" 86 | }, 87 | { 88 | "name": "Classicism Art", 89 | "name_zh": "古典主义艺术", 90 | "template": "Classicism Art, {prompt}, inspired by Roman and Greek culture, clarity, harmonious, classicism art" 91 | }, 92 | { 93 | "name": "Color Field Painting", 94 | "name_zh": "色域绘画", 95 | "template": "Color Field Painting, {prompt}, abstract, simple, geometic, color field painting style" 96 | }, 97 | { 98 | "name": "Colored Pencil Art", 99 | "name_zh": "彩色铅笔艺术", 100 | "template": "Colored Pencil Art, {prompt}, colored pencil strokes, light color, visible paper texture, colored pencil art" 101 | }, 102 | { 103 | "name": "Conceptual Art", 104 | "name_zh": "概念艺术", 105 | "template": "Conceptual Art, {prompt}, concept art" 106 | }, 107 | { 108 | "name": "Constructivism", 109 | "name_zh": "结构主义", 110 | "template": "Constructivism Art, {prompt}, minimalistic, geometric forms, constructivism art" 111 | }, 112 | { 113 | "name": "Cubism", 114 | "name_zh": "立体主义", 115 | "template": "Cubism Art, {prompt}, flat geometric forms, cubism art" 116 | }, 117 | { 118 | "name": "Dadaism", 119 | "name_zh": "达达主义", 120 | "template": "Dadaism Art, {prompt}, satirical, nonsensical, dadaism art" 121 | }, 122 | { 123 | "name": "Dark Fantasy", 124 | "name_zh": "黑暗幻想", 125 | "template": "Dark Fantasy Art, {prompt}, dark, moody, dark fantasy style" 126 | }, 127 | { 128 | "name": "Dark Moody Atmosphere", 129 | "name_zh": "暗色忧郁氛围", 130 | "template": "Dark Moody Atmosphere, {prompt}, dramatic, mysterious, dark moody atmosphere" 131 | }, 132 | { 133 | "name": "DMT Art Style", 134 | "name_zh": "DMT艺术风格", 135 | "template": "DMT Art Style, {prompt}, bright colors, surreal visuals, swirling patterns, DMT art style" 136 | }, 137 | { 138 | "name": "Doodle Art", 139 | "name_zh": "涂鸦艺术", 140 | "template": "Doodle Art Style, {prompt}, drawing, freeform, swirling patterns, doodle art style" 141 | }, 142 | { 143 | "name": "Double Exposure", 144 | "name_zh": "双重曝光", 145 | "template": "Double Exposure Style, {prompt}, double image ghost effect, image combination, double exposure style" 146 | }, 147 | { 148 | "name": "Dripping Paint Splatter Art", 149 | "name_zh": "滴漆溅画艺术", 150 | "template": "Dripping Paint Splatter Art, {prompt}, dramatic, paint drips, splatters, dripping paint" 151 | }, 152 | { 153 | "name": "Expressionism", 154 | "name_zh": "表现主义", 155 | "template": "Expressionism Art Style, {prompt}, movement, contrast, emotional, exaggerated forms, expressionism art style" 156 | }, 157 | { 158 | "name": "Faded Polaroid Photo", 159 | "name_zh": "褪色的宝丽来照片", 160 | "template": "Faded Polaroid Photo, {prompt}, analog, old faded photo, old polaroid" 161 | }, 162 | { 163 | "name": "Fauvism", 164 | "name_zh": "野兽派", 165 | "template": "Fauvism Art, {prompt}, painterly, bold colors, textured brushwork, fauvism art" 166 | }, 167 | { 168 | "name": "Flat 2D Art", 169 | "name_zh": "扁平2D艺术", 170 | "template": "Flat 2D Art, {prompt}, simple flat color, 2-dimensional, Flat 2D Art Style" 171 | }, 172 | { 173 | "name": "Fortnite Art Style", 174 | "name_zh": "堡垒之夜艺术风格", 175 | "template": "Fortnite Art Style, {prompt}, 3D cartoon, colorful, Fortnite Art Style" 176 | }, 177 | { 178 | "name": "Futurism", 179 | "name_zh": "未来主义", 180 | "template": "Futurism Art Style, {prompt}, dynamic, dramatic, Futurism Art Style" 181 | }, 182 | { 183 | "name": "Glitchcore", 184 | "name_zh": "故障核心", 185 | "template": "Glitchcore Art Style, {prompt}, dynamic, dramatic, distorted, vibrant colors, glitchcore art style" 186 | }, 187 | { 188 | "name": "Glo-fi", 189 | "name_zh": "光环音乐风格", 190 | "template": "Glo-fi Art Style, {prompt}, dynamic, dramatic, vibrant colors, glo-fi art style" 191 | }, 192 | { 193 | "name": "Googie Art Style", 194 | "name_zh": "古奇艺术风格", 195 | "template": "Googie Art Style, {prompt}, dynamic, dramatic, 1950's futurism, bold boomerang angles, Googie art style" 196 | }, 197 | { 198 | "name": "Graffiti Art", 199 | "name_zh": "涂鸦艺术", 200 | "template": "Graffiti Art Style, {prompt}, dynamic, dramatic, vibrant colors, graffiti art style" 201 | }, 202 | { 203 | "name": "Harlem Renaissance Art", 204 | "name_zh": "哈莱姆文艺复兴艺术", 205 | "template": "Harlem Renaissance Art Style, {prompt}, dynamic, dramatic, 1920s African American culture, Harlem Renaissance art style" 206 | }, 207 | { 208 | "name": "High Fashion", 209 | "name_zh": "高级时装", 210 | "template": "High Fashion, {prompt}, dynamic, dramatic, haute couture, elegant, ornate clothing, High Fashion" 211 | }, 212 | { 213 | "name": "Idyllic", 214 | "name_zh": "田园诗般的", 215 | "template": "Idyllic, {prompt}, peaceful, happy, pleasant, happy, harmonious, picturesque, charming" 216 | }, 217 | { 218 | "name": "Impressionism", 219 | "name_zh": "印象主义", 220 | "template": "Impressionism, {prompt}, painterly, small brushstrokes, visible brushstrokes, impressionistic style" 221 | }, 222 | { 223 | "name": "Infographic Drawing", 224 | "name_zh": "信息图表绘制", 225 | "template": "Infographic Drawing, {prompt}, diagram, infographic" 226 | }, 227 | { 228 | "name": "Ink Dripping Drawing", 229 | "name_zh": "墨水滴画", 230 | "template": "Ink Dripping Drawing, {prompt}, ink drawing, dripping ink" 231 | }, 232 | { 233 | "name": "Japanese Ink Drawing", 234 | "name_zh": "日本墨画", 235 | "template": "Japanese Ink Drawing, {prompt}, ink drawing, inkwash, Japanese Ink Drawing" 236 | }, 237 | { 238 | "name": "Knolling Photography", 239 | "name_zh": "秩序拍摄", 240 | "template": "Knolling Photography, {prompt}, flat lay photography, object arrangment, knolling photography" 241 | }, 242 | { 243 | "name": "Light Cheery Atmosphere", 244 | "name_zh": "轻快愉快的氛围", 245 | "template": "Light Cheery Atmosphere, {prompt}, happy, joyful, cheerful, carefree, gleeful, lighthearted, pleasant atmosphere" 246 | }, 247 | { 248 | "name": "Logo Design", 249 | "name_zh": "标志设计", 250 | "template": "Logo Design, {prompt}, dynamic graphic art, vector art, minimalist, professional logo design" 251 | }, 252 | { 253 | "name": "Luxurious Elegance", 254 | "name_zh": "奢华优雅", 255 | "template": "Luxurious Elegance, {prompt}, extravagant, ornate, designer, opulent, picturesque, lavish" 256 | }, 257 | { 258 | "name": "Macro Photography", 259 | "name_zh": "微距摄影", 260 | "template": "Macro Photography, {prompt}, close-up, macro 100mm, macro photography" 261 | }, 262 | { 263 | "name": "Mandola Art", 264 | "name_zh": "曼陀罗艺术", 265 | "template": "Mandola art style, {prompt}, complex, circular design, mandola" 266 | }, 267 | { 268 | "name": "Marker Drawing", 269 | "name_zh": "马克笔绘图", 270 | "template": "Marker Drawing, {prompt}, bold marker lines, visibile paper texture, marker drawing" 271 | }, 272 | { 273 | "name": "Medievalism", 274 | "name_zh": "中世纪主义", 275 | "template": "Medievalism, {prompt}, inspired by The Middle Ages, medieval art, elaborate patterns and decoration, Medievalism" 276 | }, 277 | { 278 | "name": "Minimalism", 279 | "name_zh": "极简主义", 280 | "template": "Minimalism, {prompt}, abstract, simple geometic shapes, hard edges, sleek contours, Minimalism" 281 | }, 282 | { 283 | "name": "Neo-Baroque", 284 | "name_zh": "新巴洛克", 285 | "template": "Neo-Baroque, {prompt}, ornate and elaborate, dynaimc, Neo-Baroque" 286 | }, 287 | { 288 | "name": "Neo-Byzantine", 289 | "name_zh": "新拜占庭", 290 | "template": "Neo-Byzantine, {prompt}, grand decorative religious style, Orthodox Christian inspired, Neo-Byzantine" 291 | }, 292 | { 293 | "name": "Neo-Futurism", 294 | "name_zh": "新未来主义", 295 | "template": "Neo-Futurism, {prompt}, high-tech, curves, spirals, flowing lines, idealistic future, Neo-Futurism" 296 | }, 297 | { 298 | "name": "Neo-Impressionism", 299 | "name_zh": "新印象主义", 300 | "template": "Neo-Impressionism, {prompt}, tiny dabs of color, Pointillism, painterly, Neo-Impressionism" 301 | }, 302 | { 303 | "name": "Neo-Rococo", 304 | "name_zh": "新洛可可", 305 | "template": "Neo-Rococo, {prompt}, curved forms, naturalistic ornamentation, elaborate, decorative, gaudy, Neo-Rococo" 306 | }, 307 | { 308 | "name": "Neoclassicism", 309 | "name_zh": "新古典主义", 310 | "template": "Neoclassicism, {prompt}, ancient Rome and Greece inspired, idealic, sober colors, Neoclassicism" 311 | }, 312 | { 313 | "name": "Op Art", 314 | "name_zh": "视觉艺术", 315 | "template": "Op Art, {prompt}, optical illusion, abstract, geometric pattern, impression of movement, Op Art" 316 | }, 317 | { 318 | "name": "Ornate and Intricate", 319 | "name_zh": "华丽复杂", 320 | "template": "Ornate and Intricate, {prompt}, decorative, highly detailed, elaborate, ornate, intricate" 321 | }, 322 | { 323 | "name": "Pencil Sketch Drawing", 324 | "name_zh": "铅笔素描", 325 | "template": "Pencil Sketch Drawing, {prompt}, black and white drawing, graphite drawing" 326 | }, 327 | { 328 | "name": "Pop Art 2", 329 | "name_zh": "流行艺术", 330 | "template": "Pop Art, {prompt}, vivid colors, flat color, 2D, strong lines, Pop Art" 331 | }, 332 | { 333 | "name": "Rococo", 334 | "name_zh": "洛可可", 335 | "template": "Rococo, {prompt}, flamboyant, pastel colors, curved lines, elaborate detail, Rococo" 336 | }, 337 | { 338 | "name": "Silhouette Art", 339 | "name_zh": "剪影艺术", 340 | "template": "Silhouette Art, {prompt}, high contrast, well defined, Silhouette Art" 341 | }, 342 | { 343 | "name": "Simple Vector Art", 344 | "name_zh": "简单矢量艺术", 345 | "template": "Simple Vector Art, {prompt}, 2D flat, simple shapes, minimalistic, professional graphic, flat color, high contrast, Simple Vector Art" 346 | }, 347 | { 348 | "name": "Sketchup", 349 | "name_zh": "草图大师", 350 | "template": "Sketchup, {prompt}, CAD, professional design, Sketchup" 351 | }, 352 | { 353 | "name": "Steampunk 2", 354 | "name_zh": "蒸汽朋克", 355 | "template": "Steampunk, {prompt}, retrofuturistic science fantasy, steam-powered tech, vintage industry, gears, neo-victorian, steampunk" 356 | }, 357 | { 358 | "name": "Surrealism", 359 | "name_zh": "超现实主义", 360 | "template": "Surrealism, {prompt}, expressive, dramatic, organic lines and forms, dreamlike and mysterious, Surrealism" 361 | }, 362 | { 363 | "name": "Suprematism", 364 | "name_zh": "至上主义", 365 | "template": "Suprematism, {prompt}, abstract, limited color palette, geometric forms, Suprematism" 366 | }, 367 | { 368 | "name": "Terragen", 369 | "name_zh": "地形生成", 370 | "template": "Terragen, {prompt}, beautiful massive landscape, epic scenery, Terragen" 371 | }, 372 | { 373 | "name": "Tranquil Relaxing Atmosphere", 374 | "name_zh": "宁静放松的氛围", 375 | "template": "Tranquil Relaxing Atmosphere, {prompt}, calming style, soothing colors, peaceful, idealic, Tranquil Relaxing Atmosphere" 376 | }, 377 | { 378 | "name": "Sticker Designs", 379 | "name_zh": "贴纸设计", 380 | "template": "Vector Art Stickers, {prompt}, professional vector design, sticker designs, Sticker Sheet" 381 | }, 382 | { 383 | "name": "Vibrant Rim Light", 384 | "name_zh": "生动的边缘光", 385 | "template": "Vibrant Rim Light, {prompt}, bright rim light, high contrast, bold edge light" 386 | }, 387 | { 388 | "name": "Volumetric Lighting", 389 | "name_zh": "体积光照明", 390 | "template": "Volumetric Lighting, {prompt}, light depth, dramatic atmospheric lighting, Volumetric Lighting" 391 | }, 392 | { 393 | "name": "Watercolor 2", 394 | "name_zh": "水彩", 395 | "template": "Watercolor style painting, {prompt}, visible paper texture, colorwash, watercolor" 396 | }, 397 | { 398 | "name": "Whimsical and Playful", 399 | "name_zh": "异想天开和俏皮", 400 | "template": "Whimsical and Playful, {prompt}, imaginative, fantastical, bight colors, stylized, happy, Whimsical and Playful" 401 | }, 402 | { 403 | "name": "Fooocus Sharp", 404 | "name_zh": "焦点锐化", 405 | "template": "cinematic still {prompt} . emotional, harmonious, vignette, 4k epic detailed, shot on kodak, 35mm photo, sharp focus, high budget, cinemascope, moody, epic, gorgeous, film grain, grainy" 406 | }, 407 | { 408 | "name": "Fooocus Masterpiece", 409 | "name_zh": "焦点杰作", 410 | "template": "(masterpiece), (best quality), (ultra-detailed), {prompt}, illustration, disheveled hair, detailed eyes, perfect composition, moist skin, intricate details, earrings, by wlop" 411 | }, 412 | { 413 | "name": "Fooocus Photograph", 414 | "name_zh": "焦点摄影", 415 | "template": "photograph {prompt}, 50mm . cinematic 4k epic detailed 4k epic detailed photograph shot on kodak detailed cinematic hbo dark moody, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage" 416 | }, 417 | { 418 | "name": "Fooocus Cinematic", 419 | "name_zh": "焦点电影", 420 | "template": "cinematic still {prompt} . emotional, harmonious, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy" 421 | }, 422 | { 423 | "name": "mre-cinematic-dynamic", 424 | "name_zh": "MRE电影动态", 425 | "template": "epic cinematic shot of dynamic {prompt} in motion. main subject of high budget action movie. raw photo, motion blur. best quality, high resolution" 426 | }, 427 | { 428 | "name": "mre-spontaneous-picture", 429 | "name_zh": "MRE自发图片", 430 | "template": "spontaneous picture of {prompt}, taken by talented amateur. best quality, high resolution. magical moment, natural look. simple but good looking" 431 | }, 432 | { 433 | "name": "mre-artistic-vision", 434 | "name_zh": "MRE艺术视觉", 435 | "template": "powerful artistic vision of {prompt}. breathtaking masterpiece made by great artist. best quality, high resolution" 436 | }, 437 | { 438 | "name": "mre-dark-dream", 439 | "name_zh": "MRE黑暗梦境", 440 | "template": "dark and unsettling dream showing {prompt}. best quality, high resolution. created by genius but depressed mad artist. grim beauty" 441 | }, 442 | { 443 | "name": "mre-gloomy-art", 444 | "name_zh": "MRE忧郁艺术", 445 | "template": "astonishing gloomy art made mainly of shadows and lighting, forming {prompt}. masterful usage of lighting, shadows and chiaroscuro. made by black-hearted artist, drawing from darkness. best quality, high resolution" 446 | }, 447 | { 448 | "name": "mre-bad-dream", 449 | "name_zh": "MRE恶梦", 450 | "template": "picture from really bad dream about terrifying {prompt}, true horror. bone-chilling vision. mad world that shouldn't exist. best quality, high resolution" 451 | }, 452 | { 453 | "name": "mre-underground", 454 | "name_zh": "MRE地下", 455 | "template": "uncanny caliginous vision of {prompt}, created by remarkable underground artist. best quality, high resolution. raw and brutal art, careless but impressive style. inspired by darkness and chaos" 456 | }, 457 | { 458 | "name": "mre-surreal-painting", 459 | "name_zh": "MRE超现实绘画", 460 | "template": "surreal painting representing strange vision of {prompt}. harmonious madness, synergy with chance. unique artstyle, mindbending art, magical surrealism. best quality, high resolution" 461 | }, 462 | { 463 | "name": "mre-dynamic-illustration", 464 | "name_zh": "MRE动态插画", 465 | "template": "insanely dynamic illustration of {prompt}. best quality, high resolution. crazy artstyle, careless brushstrokes, emotional and fun" 466 | }, 467 | { 468 | "name": "mre-undead-art", 469 | "name_zh": "MRE不死艺术", 470 | "template": "long forgotten art created by undead artist illustrating {prompt}, tribute to the death and decay. miserable art of the damned. wretched and decaying world. best quality, high resolution" 471 | }, 472 | { 473 | "name": "mre-elemental-art", 474 | "name_zh": "MRE元素艺术", 475 | "template": "art illustrating insane amounts of raging elemental energy turning into {prompt}, avatar of elements. magical surrealism, wizardry. best quality, high resolution" 476 | }, 477 | { 478 | "name": "mre-space-art", 479 | "name_zh": "MRE太空艺术", 480 | "template": "winner of inter-galactic art contest illustrating {prompt}, symbol of the interstellar singularity. best quality, high resolution. artstyle previously unseen in the whole galaxy" 481 | }, 482 | { 483 | "name": "mre-ancient-illustration", 484 | "name_zh": "MRE古代插画", 485 | "template": "sublime ancient illustration of {prompt}, predating human civilization. crude and simple, but also surprisingly beautiful artwork, made by genius primeval artist. best quality, high resolution" 486 | }, 487 | { 488 | "name": "mre-brave-art", 489 | "name_zh": "MRE勇敢艺术", 490 | "template": "brave, shocking, and brutally true art showing {prompt}. inspired by courage and unlimited creativity. truth found in chaos. best quality, high resolution" 491 | }, 492 | { 493 | "name": "mre-heroic-fantasy", 494 | "name_zh": "MRE英雄幻想", 495 | "template": "heroic fantasy painting of {prompt}, in the dangerous fantasy world. airbrush over oil on canvas. best quality, high resolution" 496 | }, 497 | { 498 | "name": "mre-dark-cyberpunk", 499 | "name_zh": "MRE黑暗赛博朋克", 500 | "template": "dark cyberpunk illustration of brutal {prompt} in a world without hope, ruled by ruthless criminal corporations. best quality, high resolution" 501 | }, 502 | { 503 | "name": "mre-lyrical-geometry", 504 | "name_zh": "MRE抒情几何", 505 | "template": "geometric and lyrical abstraction painting presenting {prompt}. oil on metal. best quality, high resolution" 506 | }, 507 | { 508 | "name": "mre-sumi-e-symbolic", 509 | "name_zh": "MRE墨绘象征", 510 | "template": "big long brushstrokes of deep black sumi-e turning into symbolic painting of {prompt}. master level raw art. best quality, high resolution" 511 | }, 512 | { 513 | "name": "mre-sumi-e-detailed", 514 | "name_zh": "MRE墨绘精细", 515 | "template": "highly detailed black sumi-e painting of {prompt}. in-depth study of perfection, created by a master. best quality, high resolution" 516 | }, 517 | { 518 | "name": "mre-manga", 519 | "name_zh": "MRE漫画", 520 | "template": "manga artwork presenting {prompt}. created by japanese manga artist. highly emotional. best quality, high resolution" 521 | }, 522 | { 523 | "name": "mre-anime", 524 | "name_zh": "MRE动漫", 525 | "template": "anime artwork illustrating {prompt}. created by japanese anime studio. highly emotional. best quality, high resolution" 526 | }, 527 | { 528 | "name": "mre-comic", 529 | "name_zh": "MRE漫画书", 530 | "template": "breathtaking illustration from adult comic book presenting {prompt}. fabulous artwork. best quality, high resolution" 531 | }, 532 | { 533 | "name": "sai-3d-model", 534 | "name_zh": "SAI三维模型", 535 | "template": "professional 3d model {prompt} . octane render, highly detailed, volumetric, dramatic lighting" 536 | }, 537 | { 538 | "name": "sai-analog film", 539 | "name_zh": "SAI模拟胶片", 540 | "template": "analog film photo {prompt} . faded film, desaturated, 35mm photo, grainy, vignette, vintage, Kodachrome, Lomography, stained, highly detailed, found footage" 541 | }, 542 | { 543 | "name": "sai-anime", 544 | "name_zh": "SAI动漫", 545 | "template": "anime artwork {prompt} . anime style, key visual, vibrant, studio anime, highly detailed" 546 | }, 547 | { 548 | "name": "sai-cinematic", 549 | "name_zh": "SAI电影", 550 | "template": "cinematic film still {prompt} . shallow depth of field, vignette, highly detailed, high budget, bokeh, cinemascope, moody, epic, gorgeous, film grain, grainy" 551 | }, 552 | { 553 | "name": "sai-comic book", 554 | "name_zh": "SAI漫画书", 555 | "template": "comic {prompt} . graphic illustration, comic art, graphic novel art, vibrant, highly detailed" 556 | }, 557 | { 558 | "name": "sai-craft clay", 559 | "name_zh": "SAI手工粘土", 560 | "template": "play-doh style {prompt} . sculpture, clay art, centered composition, Claymation" 561 | }, 562 | { 563 | "name": "sai-digital art", 564 | "name_zh": "SAI数字艺术", 565 | "template": "concept art {prompt} . digital artwork, illustrative, painterly, matte painting, highly detailed" 566 | }, 567 | { 568 | "name": "sai-enhance", 569 | "name_zh": "SAI增强", 570 | "template": "breathtaking {prompt} . award-winning, professional, highly detailed" 571 | }, 572 | { 573 | "name": "sai-fantasy art", 574 | "name_zh": "SAI幻想艺术", 575 | "template": "ethereal fantasy concept art of {prompt} . magnificent, celestial, ethereal, painterly, epic, majestic, magical, fantasy art, cover art, dreamy" 576 | }, 577 | { 578 | "name": "sai-isometric", 579 | "name_zh": "SAI等距", 580 | "template": "isometric style {prompt} . vibrant, beautiful, crisp, detailed, ultra detailed, intricate" 581 | }, 582 | { 583 | "name": "sai-line art", 584 | "name_zh": "SAI线条艺术", 585 | "template": "line art drawing {prompt} . professional, sleek, modern, minimalist, graphic, line art, vector graphics" 586 | }, 587 | { 588 | "name": "sai-lowpoly", 589 | "name_zh": "SAI低多边形", 590 | "template": "low-poly style {prompt} . low-poly game art, polygon mesh, jagged, blocky, wireframe edges, centered composition" 591 | }, 592 | { 593 | "name": "sai-neonpunk", 594 | "name_zh": "SAI霓虹朋克", 595 | "template": "neonpunk style {prompt} . cyberpunk, vaporwave, neon, vibes, vibrant, stunningly beautiful, crisp, detailed, sleek, ultramodern, magenta highlights, dark purple shadows, high contrast, cinematic, ultra detailed, intricate, professional" 596 | }, 597 | { 598 | "name": "sai-origami", 599 | "name_zh": "SAI折纸", 600 | "template": "origami style {prompt} . paper art, pleated paper, folded, origami art, pleats, cut and fold, centered composition" 601 | }, 602 | { 603 | "name": "sai-photographic", 604 | "name_zh": "SAI摄影", 605 | "template": "cinematic photo {prompt} . 35mm photograph, film, bokeh, professional, 4k, highly detailed" 606 | }, 607 | { 608 | "name": "sai-pixel art", 609 | "name_zh": "SAI像素艺术", 610 | "template": "pixel-art {prompt} . low-res, blocky, pixel art style, 8-bit graphics" 611 | }, 612 | { 613 | "name": "sai-texture", 614 | "name_zh": "SAI质地", 615 | "template": "texture {prompt} top down close-up" 616 | }, 617 | { 618 | "name": "ads-advertising", 619 | "name_zh": "广告", 620 | "template": "advertising poster style {prompt} . Professional, modern, product-focused, commercial, eye-catching, highly detailed" 621 | }, 622 | { 623 | "name": "ads-automotive", 624 | "name_zh": "汽车广告", 625 | "template": "automotive advertisement style {prompt} . sleek, dynamic, professional, commercial, vehicle-focused, high-resolution, highly detailed" 626 | }, 627 | { 628 | "name": "ads-corporate", 629 | "name_zh": "企业广告", 630 | "template": "corporate branding style {prompt} . professional, clean, modern, sleek, minimalist, business-oriented, highly detailed" 631 | }, 632 | { 633 | "name": "ads-fashion editorial", 634 | "name_zh": "时尚编辑", 635 | "template": "fashion editorial style {prompt} . high fashion, trendy, stylish, editorial, magazine style, professional, highly detailed" 636 | }, 637 | { 638 | "name": "ads-food photography", 639 | "name_zh": "食品摄影", 640 | "template": "food photography style {prompt} . appetizing, professional, culinary, high-resolution, commercial, highly detailed" 641 | }, 642 | { 643 | "name": "ads-gourmet food photography", 644 | "name_zh": "美食摄影", 645 | "template": "gourmet food photo of {prompt} . soft natural lighting, macro details, vibrant colors, fresh ingredients, glistening textures, bokeh background, styled plating, wooden tabletop, garnished, tantalizing, editorial quality" 646 | }, 647 | { 648 | "name": "ads-luxury", 649 | "name_zh": "奢华广告", 650 | "template": "luxury product style {prompt} . elegant, sophisticated, high-end, luxurious, professional, highly detailed" 651 | }, 652 | { 653 | "name": "ads-real estate", 654 | "name_zh": "房地产广告", 655 | "template": "real estate photography style {prompt} . professional, inviting, well-lit, high-resolution, property-focused, commercial, highly detailed" 656 | }, 657 | { 658 | "name": "ads-retail", 659 | "name_zh": "零售广告", 660 | "template": "retail packaging style {prompt} . vibrant, enticing, commercial, product-focused, eye-catching, professional, highly detailed" 661 | }, 662 | { 663 | "name": "artstyle-abstract", 664 | "name_zh": "抽象艺术风格", 665 | "template": "abstract style {prompt} . non-representational, colors and shapes, expression of feelings, imaginative, highly detailed" 666 | }, 667 | { 668 | "name": "artstyle-abstract expressionism", 669 | "name_zh": "抽象表现主义", 670 | "template": "abstract expressionist painting {prompt} . energetic brushwork, bold colors, abstract forms, expressive, emotional" 671 | }, 672 | { 673 | "name": "artstyle-art deco", 674 | "name_zh": "艺术装饰风格", 675 | "template": "art deco style {prompt} . geometric shapes, bold colors, luxurious, elegant, decorative, symmetrical, ornate, detailed" 676 | }, 677 | { 678 | "name": "artstyle-art nouveau", 679 | "name_zh": "新艺术风格", 680 | "template": "art nouveau style {prompt} . elegant, decorative, curvilinear forms, nature-inspired, ornate, detailed" 681 | }, 682 | { 683 | "name": "artstyle-constructivist", 684 | "name_zh": "构成主义", 685 | "template": "constructivist style {prompt} . geometric shapes, bold colors, dynamic composition, propaganda art style" 686 | }, 687 | { 688 | "name": "artstyle-cubist", 689 | "name_zh": "立体主义", 690 | "template": "cubist artwork {prompt} . geometric shapes, abstract, innovative, revolutionary" 691 | }, 692 | { 693 | "name": "artstyle-expressionist", 694 | "name_zh": "表现主义", 695 | "template": "expressionist {prompt} . raw, emotional, dynamic, distortion for emotional effect, vibrant, use of unusual colors, detailed" 696 | }, 697 | { 698 | "name": "artstyle-graffiti", 699 | "name_zh": "涂鸦", 700 | "template": "graffiti style {prompt} . street art, vibrant, urban, detailed, tag, mural" 701 | }, 702 | { 703 | "name": "artstyle-hyperrealism", 704 | "name_zh": "超现实主义", 705 | "template": "hyperrealistic art {prompt} . extremely high-resolution details, photographic, realism pushed to extreme, fine texture, incredibly lifelike" 706 | }, 707 | { 708 | "name": "artstyle-impressionist", 709 | "name_zh": "印象主义", 710 | "template": "impressionist painting {prompt} . loose brushwork, vibrant color, light and shadow play, captures feeling over form" 711 | }, 712 | { 713 | "name": "artstyle-pointillism", 714 | "name_zh": "点彩主义", 715 | "template": "pointillism style {prompt} . composed entirely of small, distinct dots of color, vibrant, highly detailed" 716 | }, 717 | { 718 | "name": "artstyle-pop art", 719 | "name_zh": "波普艺术", 720 | "template": "pop Art style {prompt} . bright colors, bold outlines, popular culture themes, ironic or kitsch" 721 | }, 722 | { 723 | "name": "artstyle-psychedelic", 724 | "name_zh": "迷幻艺术", 725 | "template": "psychedelic style {prompt} . vibrant colors, swirling patterns, abstract forms, surreal, trippy" 726 | }, 727 | { 728 | "name": "artstyle-renaissance", 729 | "name_zh": "文艺复兴", 730 | "template": "renaissance style {prompt} . realistic, perspective, light and shadow, religious or mythological themes, highly detailed" 731 | }, 732 | { 733 | "name": "artstyle-steampunk", 734 | "name_zh": "蒸汽朋克", 735 | "template": "steampunk style {prompt} . antique, mechanical, brass and copper tones, gears, intricate, detailed" 736 | }, 737 | { 738 | "name": "artstyle-surrealist", 739 | "name_zh": "超现实主义", 740 | "template": "surrealist art {prompt} . dreamlike, mysterious, provocative, symbolic, intricate, detailed" 741 | }, 742 | { 743 | "name": "artstyle-typography", 744 | "name_zh": "排版艺术", 745 | "template": "typographic art {prompt} . stylized, intricate, detailed, artistic, text-based" 746 | }, 747 | { 748 | "name": "artstyle-watercolor", 749 | "name_zh": "水彩艺术", 750 | "template": "watercolor painting {prompt} . vibrant, beautiful, painterly, detailed, textural, artistic" 751 | }, 752 | { 753 | "name": "futuristic-biomechanical", 754 | "name_zh": "未来生物力学", 755 | "template": "biomechanical style {prompt} . blend of organic and mechanical elements, futuristic, cybernetic, detailed, intricate" 756 | }, 757 | { 758 | "name": "futuristic-biomechanical cyberpunk", 759 | "name_zh": "未来生物力学赛博朋克", 760 | "template": "biomechanical cyberpunk {prompt} . cybernetics, human-machine fusion, dystopian, organic meets artificial, dark, intricate, highly detailed" 761 | }, 762 | { 763 | "name": "futuristic-cybernetic", 764 | "name_zh": "未来赛博", 765 | "template": "cybernetic style {prompt} . futuristic, technological, cybernetic enhancements, robotics, artificial intelligence themes" 766 | }, 767 | { 768 | "name": "futuristic-cybernetic robot", 769 | "name_zh": "未来机器人", 770 | "template": "cybernetic robot {prompt} . android, AI, machine, metal, wires, tech, futuristic, highly detailed" 771 | }, 772 | { 773 | "name": "futuristic-cyberpunk cityscape", 774 | "name_zh": "未来赛博朋克城市景观", 775 | "template": "cyberpunk cityscape {prompt} . neon lights, dark alleys, skyscrapers, futuristic, vibrant colors, high contrast, highly detailed" 776 | }, 777 | { 778 | "name": "futuristic-futuristic", 779 | "name_zh": "未来主义", 780 | "template": "futuristic style {prompt} . sleek, modern, ultramodern, high tech, detailed" 781 | }, 782 | { 783 | "name": "futuristic-retro cyberpunk", 784 | "name_zh": "未来复古赛博朋克", 785 | "template": "retro cyberpunk {prompt} . 80's inspired, synthwave, neon, vibrant, detailed, retro futurism" 786 | }, 787 | { 788 | "name": "futuristic-retro futurism", 789 | "name_zh": "未来复古主义", 790 | "template": "retro-futuristic {prompt} . vintage sci-fi, 50s and 60s style, atomic age, vibrant, highly detailed" 791 | }, 792 | { 793 | "name": "futuristic-sci-fi", 794 | "name_zh": "科幻未来主义", 795 | "template": "sci-fi style {prompt} . futuristic, technological, alien worlds, space themes, advanced civilizations" 796 | }, 797 | { 798 | "name": "futuristic-vaporwave", 799 | "name_zh": "未来波", 800 | "template": "vaporwave style {prompt} . retro aesthetic, cyberpunk, vibrant, neon colors, vintage 80s and 90s style, highly detailed" 801 | }, 802 | { 803 | "name": "game-bubble bobble", 804 | "name_zh": "游戏-泡泡龙", 805 | "template": "Bubble Bobble style {prompt} . 8-bit, cute, pixelated, fantasy, vibrant, reminiscent of Bubble Bobble game" 806 | }, 807 | { 808 | "name": "game-cyberpunk game", 809 | "name_zh": "赛博朋克游戏", 810 | "template": "cyberpunk game style {prompt} . neon, dystopian, futuristic, digital, vibrant, detailed, high contrast, reminiscent of cyberpunk genre video games" 811 | }, 812 | { 813 | "name": "game-fighting game", 814 | "name_zh": "格斗游戏", 815 | "template": "fighting game style {prompt} . dynamic, vibrant, action-packed, detailed character design, reminiscent of fighting video games" 816 | }, 817 | { 818 | "name": "game-gta", 819 | "name_zh": "侠盗猎车手游戏", 820 | "template": "GTA-style artwork {prompt} . satirical, exaggerated, pop art style, vibrant colors, iconic characters, action-packed" 821 | }, 822 | { 823 | "name": "game-mario", 824 | "name_zh": "马里奥游戏", 825 | "template": "Super Mario style {prompt} . vibrant, cute, cartoony, fantasy, playful, reminiscent of Super Mario series" 826 | }, 827 | { 828 | "name": "game-minecraft", 829 | "name_zh": "我的世界游戏", 830 | "template": "Minecraft style {prompt} . blocky, pixelated, vibrant colors, recognizable characters and objects, game assets" 831 | }, 832 | { 833 | "name": "game-pokemon", 834 | "name_zh": "宝可梦游戏", 835 | "template": "Pokémon style {prompt} . vibrant, cute, anime, fantasy, reminiscent of Pokémon series" 836 | }, 837 | { 838 | "name": "game-retro arcade", 839 | "name_zh": "复古街机", 840 | "template": "retro arcade style {prompt} . 8-bit, pixelated, vibrant, classic video game, old school gaming, reminiscent of 80s and 90s arcade games" 841 | }, 842 | { 843 | "name": "game-retro game", 844 | "name_zh": "复古游戏", 845 | "template": "retro game art {prompt} . 16-bit, vibrant colors, pixelated, nostalgic, charming, fun" 846 | }, 847 | { 848 | "name": "game-rpg fantasy game", 849 | "name_zh": "角色扮演幻想游戏", 850 | "template": "role-playing game (RPG) style fantasy {prompt} . detailed, vibrant, immersive, reminiscent of high fantasy RPG games" 851 | }, 852 | { 853 | "name": "game-strategy game", 854 | "name_zh": "策略游戏", 855 | "template": "strategy game style {prompt} . overhead view, detailed map, units, reminiscent of real-time strategy video games" 856 | }, 857 | { 858 | "name": "game-streetfighter", 859 | "name_zh": "街头霸王游戏", 860 | "template": "Street Fighter style {prompt} . vibrant, dynamic, arcade, 2D fighting game, highly detailed, reminiscent of Street Fighter series" 861 | }, 862 | { 863 | "name": "game-zelda", 864 | "name_zh": "塞尔达传说游戏", 865 | "template": "Legend of Zelda style {prompt} . vibrant, fantasy, detailed, epic, heroic, reminiscent of The Legend of Zelda series" 866 | }, 867 | { 868 | "name": "misc-architectural", 869 | "name_zh": "建筑", 870 | "template": "architectural style {prompt} . clean lines, geometric shapes, minimalist, modern, architectural drawing, highly detailed" 871 | }, 872 | { 873 | "name": "misc-disco", 874 | "name_zh": "迪斯科", 875 | "template": "disco-themed {prompt} . vibrant, groovy, retro 70s style, shiny disco balls, neon lights, dance floor, highly detailed" 876 | }, 877 | { 878 | "name": "misc-dreamscape", 879 | "name_zh": "梦境", 880 | "template": "dreamscape {prompt} . surreal, ethereal, dreamy, mysterious, fantasy, highly detailed" 881 | }, 882 | { 883 | "name": "misc-dystopian", 884 | "name_zh": "反乌托邦", 885 | "template": "dystopian style {prompt} . bleak, post-apocalyptic, somber, dramatic, highly detailed" 886 | }, 887 | { 888 | "name": "misc-fairy tale", 889 | "name_zh": "童话故事", 890 | "template": "fairy tale {prompt} . magical, fantastical, enchanting, storybook style, highly detailed" 891 | }, 892 | { 893 | "name": "misc-gothic", 894 | "name_zh": "哥特", 895 | "template": "gothic style {prompt} . dark, mysterious, haunting, dramatic, ornate, detailed" 896 | }, 897 | { 898 | "name": "misc-grunge", 899 | "name_zh": "垃圾摇滚", 900 | "template": "grunge style {prompt} . textured, distressed, vintage, edgy, punk rock vibe, dirty, noisy" 901 | }, 902 | { 903 | "name": "misc-horror", 904 | "name_zh": "恐怖", 905 | "template": "horror-themed {prompt} . eerie, unsettling, dark, spooky, suspenseful, grim, highly detailed" 906 | }, 907 | { 908 | "name": "misc-kawaii", 909 | "name_zh": "卡哇伊", 910 | "template": "kawaii style {prompt} . cute, adorable, brightly colored, cheerful, anime influence, highly detailed" 911 | }, 912 | { 913 | "name": "misc-lovecraftian", 914 | "name_zh": "克苏鲁神话", 915 | "template": "lovecraftian horror {prompt} . eldritch, cosmic horror, unknown, mysterious, surreal, highly detailed" 916 | }, 917 | { 918 | "name": "misc-macabre", 919 | "name_zh": "恐怖的", 920 | "template": "macabre style {prompt} . dark, gothic, grim, haunting, highly detailed" 921 | }, 922 | { 923 | "name": "misc-manga", 924 | "name_zh": "漫画", 925 | "template": "manga style {prompt} . vibrant, high-energy, detailed, iconic, Japanese comic style" 926 | }, 927 | { 928 | "name": "misc-metropolis", 929 | "name_zh": "大都市", 930 | "template": "metropolis-themed {prompt} . urban, cityscape, skyscrapers, modern, futuristic, highly detailed" 931 | }, 932 | { 933 | "name": "misc-minimalist", 934 | "name_zh": "极简主义", 935 | "template": "minimalist style {prompt} . simple, clean, uncluttered, modern, elegant" 936 | }, 937 | { 938 | "name": "misc-monochrome", 939 | "name_zh": "单色", 940 | "template": "monochrome {prompt} . black and white, contrast, tone, texture, detailed" 941 | }, 942 | { 943 | "name": "misc-nautical", 944 | "name_zh": "航海", 945 | "template": "nautical-themed {prompt} . sea, ocean, ships, maritime, beach, marine life, highly detailed" 946 | }, 947 | { 948 | "name": "misc-space", 949 | "name_zh": "太空", 950 | "template": "space-themed {prompt} . cosmic, celestial, stars, galaxies, nebulas, planets, science fiction, highly detailed" 951 | }, 952 | { 953 | "name": "misc-stained glass", 954 | "name_zh": "彩色玻璃", 955 | "template": "stained glass style {prompt} . vibrant, beautiful, translucent, intricate, detailed" 956 | }, 957 | { 958 | "name": "misc-techwear fashion", 959 | "name_zh": "科技服饰", 960 | "template": "techwear fashion {prompt} . futuristic, cyberpunk, urban, tactical, sleek, dark, highly detailed" 961 | }, 962 | { 963 | "name": "misc-tribal", 964 | "name_zh": "部落", 965 | "template": "tribal style {prompt} . indigenous, ethnic, traditional patterns, bold, natural colors, highly detailed" 966 | }, 967 | { 968 | "name": "misc-zentangle", 969 | "name_zh": "禅绕画", 970 | "template": "zentangle {prompt} . intricate, abstract, monochrome, patterns, meditative, highly detailed" 971 | }, 972 | { 973 | "name": "papercraft-collage", 974 | "name_zh": "纸艺拼贴", 975 | "template": "collage style {prompt} . mixed media, layered, textural, detailed, artistic" 976 | }, 977 | { 978 | "name": "papercraft-flat papercut", 979 | "name_zh": "平面剪纸", 980 | "template": "flat papercut style {prompt} . silhouette, clean cuts, paper, sharp edges, minimalist, color block" 981 | }, 982 | { 983 | "name": "papercraft-kirigami", 984 | "name_zh": "剪纸", 985 | "template": "kirigami representation of {prompt} . 3D, paper folding, paper cutting, Japanese, intricate, symmetrical, precision, clean lines" 986 | }, 987 | { 988 | "name": "papercraft-paper mache", 989 | "name_zh": "纸浆塑型", 990 | "template": "paper mache representation of {prompt} . 3D, sculptural, textured, handmade, vibrant, fun" 991 | }, 992 | { 993 | "name": "papercraft-paper quilling", 994 | "name_zh": "纸卷艺术", 995 | "template": "paper quilling art of {prompt} . intricate, delicate, curling, rolling, shaping, coiling, loops, 3D, dimensional, ornamental" 996 | }, 997 | { 998 | "name": "papercraft-papercut collage", 999 | "name_zh": "剪纸拼贴", 1000 | "template": "papercut collage of {prompt} . mixed media, textured paper, overlapping, asymmetrical, abstract, vibrant" 1001 | }, 1002 | { 1003 | "name": "papercraft-papercut shadow box", 1004 | "name_zh": "剪纸影箱", 1005 | "template": "3D papercut shadow box of {prompt} . layered, dimensional, depth, silhouette, shadow, papercut, handmade, high contrast" 1006 | }, 1007 | { 1008 | "name": "papercraft-stacked papercut", 1009 | "name_zh": "堆叠剪纸", 1010 | "template": "stacked papercut art of {prompt} . 3D, layered, dimensional, depth, precision cut, stacked layers, papercut, high contrast" 1011 | }, 1012 | { 1013 | "name": "papercraft-thick layered papercut", 1014 | "name_zh": "厚层剪纸", 1015 | "template": "thick layered papercut art of {prompt} . deep 3D, volumetric, dimensional, depth, thick paper, high stack, heavy texture, tangible layers" 1016 | }, 1017 | { 1018 | "name": "photo-alien", 1019 | "name_zh": "异形", 1020 | "template": "alien-themed {prompt} . extraterrestrial, cosmic, otherworldly, mysterious, sci-fi, highly detailed" 1021 | }, 1022 | { 1023 | "name": "photo-film noir", 1024 | "name_zh": "黑色电影", 1025 | "template": "film noir style {prompt} . monochrome, high contrast, dramatic shadows, 1940s style, mysterious, cinematic" 1026 | }, 1027 | { 1028 | "name": "photo-glamour", 1029 | "name_zh": "魅力", 1030 | "template": "glamorous photo {prompt} . high fashion, luxurious, extravagant, stylish, sensual, opulent, elegance, stunning beauty, professional, high contrast, detailed" 1031 | }, 1032 | { 1033 | "name": "photo-hdr", 1034 | "name_zh": "高动态范围", 1035 | "template": "HDR photo of {prompt} . High dynamic range, vivid, rich details, clear shadows and highlights, realistic, intense, enhanced contrast, highly detailed" 1036 | }, 1037 | { 1038 | "name": "photo-iphone photographic", 1039 | "name_zh": "iPhone摄影", 1040 | "template": "iphone photo {prompt} . large depth of field, deep depth of field, highly detailed" 1041 | }, 1042 | { 1043 | "name": "photo-long exposure", 1044 | "name_zh": "长曝光", 1045 | "template": "long exposure photo of {prompt} . Blurred motion, streaks of light, surreal, dreamy, ghosting effect, highly detailed" 1046 | }, 1047 | { 1048 | "name": "photo-neon noir", 1049 | "name_zh": "霓虹黑色", 1050 | "template": "neon noir {prompt} . cyberpunk, dark, rainy streets, neon signs, high contrast, low light, vibrant, highly detailed" 1051 | }, 1052 | { 1053 | "name": "photo-silhouette", 1054 | "name_zh": "剪影", 1055 | "template": "silhouette style {prompt} . high contrast, minimalistic, black and white, stark, dramatic" 1056 | }, 1057 | { 1058 | "name": "photo-tilt-shift", 1059 | "name_zh": "倾斜移位", 1060 | "template": "tilt-shift photo of {prompt} . selective focus, miniature effect, blurred background, highly detailed, vibrant, perspective control" 1061 | } 1062 | ] 1063 | 1064 | def __call__(self, prompt): 1065 | style_id = torch.randint(0, len(self.styles), size=(1,)).tolist()[0] 1066 | prompt = self.styles[style_id]["template"].format(prompt=prompt) 1067 | return prompt 1068 | --------------------------------------------------------------------------------