├── README.md ├── assets ├── OpenSans-Regular.ttf ├── README.md ├── backgrounds │ ├── 11th-gate-e-kl_vOpwLg-unsplash.jpg │ ├── adolfo-felix-PG8NyM_Mcts-unsplash.jpg │ ├── alexandra-zelena-phskyemu_c4-unsplash.jpg │ ├── benjamin-3WdChmuv7mE-unsplash.jpg │ ├── benjamin-child-0sT9YhNgSEs-unsplash.jpg │ ├── carlo-lisa-GHuT3dkZxYM-unsplash.jpg │ ├── changbok-ko-F8t2VGnI47I-unsplash.jpg │ ├── conscious-design-mLpbHWquEYM-unsplash.jpg │ ├── diego-jimenez-A-NVHPka9Rk-unsplash.jpg │ ├── dogancan-ozturan-urY_iHk3nm0-unsplash.jpg │ ├── elena-ktenopoulou-cjzV4WK46qY-unsplash.jpg │ ├── evaldas-grizas-9-WvhPP1I6M-unsplash.jpg │ ├── heriberto-garcia-YdjrYLvLO5Y-unsplash.jpg │ ├── inaki-del-olmo-NIJuEQw0RKg-unsplash.jpg │ ├── jordan-steranka-NcF6MVPW644-unsplash.jpg │ ├── joseph-barrientos-oQl0eVYd_n8-unsplash.jpg │ ├── juli-kosolapova-Us_dv71f1bc-unsplash.jpg │ ├── masjid-pogung-dalangan-8I6hAdjM76Q-unsplash.jpg │ ├── meritt-thomas-GktK3Jb9BRE-unsplash.jpg │ ├── nastuh-abootalebi-yWwob8kwOCk-unsplash.jpg │ ├── neom-aWHKsYkbCi8-unsplash.jpg │ ├── neom-cYy-o9i8aCs-unsplash.jpg │ ├── neom-jTxhUMyPTrE-unsplash.jpg │ ├── neom-kXbit_yx8t4-unsplash.jpg │ ├── presentsquare-cSK33MUaVzQ-unsplash.jpg │ ├── robert-lukeman-PH0HYjsf2n8-unsplash.jpg │ ├── sam-moghadam-khamseh-cuSPt5uP2iQ-unsplash.jpg │ ├── tj-holowaychuk-1EYMue_AwDw-unsplash.jpg │ ├── toby-elliott-17yY9Lyddrc-unsplash.jpg │ ├── yaodan-fc6cEU1sTBY-unsplash.jpg │ └── yevhenii-deshko-Tkh5CmSzmaM-unsplash.jpg ├── objects │ ├── alexandr-sadkov-BnG4KWAzt9c-unsplash.jpg │ ├── allec-gomes-xnRg3xDcNnE-unsplash.jpg │ ├── allenwhm-wh-RgpT4_5g-unsplash.jpg │ ├── alvan-nee-T-0EW-SEbsE-unsplash.jpg │ ├── amit-lahav-rxN2MRdFJVg-unsplash.jpg │ ├── andre-taissin-hnyZg63sRCY-unsplash.jpg │ ├── arno-senoner-HFE2RyC76tw-unsplash.jpg │ ├── artem-maltsev-VOO5ojMQ_9A-unsplash.jpg │ ├── aurelia-dubois-xzrJCS4grC4-unsplash.jpg │ ├── bermix-studio-ZMxHvB9J7YU-unsplash.jpg │ ├── birmingham-museums-trust-q2OwlfXAYfo-unsplash.jpg │ ├── bonnie-kittle-MUcxe_wDurE-unsplash.jpg │ ├── brigitte-tohm-51AK6yJDgv0-unsplash.jpg │ ├── chastity-cortijo-3OENu9mJ0i0-unsplash.jpg │ ├── cici-hung-nV3v8ZMRLNc-unsplash.jpg │ ├── claire-abdo-_-635EI3nV8-unsplash.jpg │ ├── coppertist-wu-OrQvIBYNPcw-unsplash.jpg │ ├── coppertist-wu-XlFSnJOeyQs-unsplash.jpg │ ├── coppertist-wu-its52T6D4bo-unsplash.jpg │ ├── coppertist-wu-mwjuTJzJ9w4-unsplash.jpg │ ├── coppertist-wu-w1AK7wWsTPs-unsplash.jpg │ ├── courtney-cook-KcY7DCqc9VI-unsplash.jpg │ ├── creative-headline-APNnyM36puU-unsplash.jpg │ ├── daniel-k-cheung-WJuwxFIpidc-unsplash.jpg │ ├── daniil-silantev-1P6AnKDw6S8-unsplash.jpg │ ├── dominic-phillips-QEVT_XYXKPs-unsplash.jpg │ ├── engin-akyurt-TDOClniEwmI-unsplash.jpg │ ├── engin-akyurt-iLHCV4ZBH7s-unsplash.jpg │ ├── erfan-tajik-m_hgaJLqCRM-unsplash.jpg │ ├── erick-butler-3XQlnryKz0o-unsplash.jpg │ ├── erik-mclean-kNRKg1pfiqE-unsplash.jpg │ ├── fatty-corgi-1QsQRkxnU6I-unsplash.jpg │ ├── fernando-andrade-Q33VONoOfSU-unsplash.jpg │ ├── frank-zimmermann-xYh8uAwqZK0-unsplash.jpg │ ├── gaby-yerden--OIinu1YDTk-unsplash.jpg │ ├── gary-walker-jones-uC5jnQPdY7I-unsplash.jpg │ ├── gilbert-beltran-EUQRWgmvhr8-unsplash.jpg │ ├── giorgio-trovato-p0OlRAAYXLY-unsplash.jpg │ ├── hannah-pemberton-3d82e5_ylGo-unsplash.jpg │ ├── irene-kredenets-tcVH_BwHtrc-unsplash.jpg │ ├── isaac-martin-Jewkfj03OUU-unsplash.jpg │ ├── isabela-kronemberger-KaMRBaHSsjM-unsplash.jpg │ ├── ivan-lopatin-PZ2KhQnOZb8-unsplash.jpg │ ├── ivan-pergasi-CZT7lkrt5sU-unsplash.jpg │ ├── jessica-tan-Rufz-e6Qrqg-unsplash.jpg │ ├── joshua-coleman-8V4y-XXT3MQ-unsplash.jpg │ ├── juan-mayobre-_IAhW7a4pWA-unsplash.jpg │ ├── kate-0I8hNhkFUWQ-unsplash.jpg │ ├── kelly-sikkema-LHxU7QYSn7c-unsplash.jpg │ ├── kenny-eliason-lbqZUefMLvQ-unsplash.jpg │ ├── kojirou-sasaki-rdLQVeroHQ0-unsplash.jpg │ ├── lora-seis-dS5xpjW38Qk-unsplash.jpg │ ├── lucas-george-wendt-UDWhEik1L1Q-unsplash.jpg │ ├── lucas-george-wendt-lhWaL0pgpEg-unsplash.jpg │ ├── lucas-van-oort-Tv9w8mgoVzs-unsplash.jpg │ ├── maria-oswalt-GKE6i5mQplI-unsplash.jpg │ ├── mario-losereit-mTZyJeR1Rnc-unsplash.jpg │ ├── mediamodifier-gAUUqaI83b8-unsplash.jpg │ ├── mehmet-keskin-qHdGjahnx48-unsplash.jpg │ ├── milad-fakurian-3CoSLrSrvhY-unsplash.jpg │ ├── milad-fakurian-gpkPI7qs1cg-unsplash.jpg │ ├── mink-mingle-Riz1qAplMQk-unsplash.jpg │ ├── mo-U3Kst7MY4Ok-unsplash.jpg │ ├── mockup-graphics-aUkYaG12Dgs-unsplash.jpg │ ├── mockup-graphics-biK3YJHhBfM-unsplash.jpg │ ├── mockup-graphics-enNffryKuQI-unsplash.jpg │ ├── mockup-graphics-haSJEJYzl5A-unsplash.jpg │ ├── morgan-sessions-k5zv-Hv4Kpc-unsplash.jpg │ ├── mulyadi-JEfwd_OkQGE-unsplash.jpg │ ├── naomi-hebert-2dcYhvbHV-M-unsplash.jpg │ ├── nguyen-ngoc-tung-ZkG4JdoMANM-unsplash.jpg │ ├── nik-JbAz6eG5GS4-unsplash.jpg │ ├── nikolett-emmert-_g2jz1SghvQ-unsplash.jpg │ ├── olena-bohovyk-InlMkMNlrhY-unsplash.jpg │ ├── or-hakim-VQxKattL-X4-unsplash.jpg │ ├── pablo-merchan-montes-_Tw4vCs9C-8-unsplash.jpg │ ├── personalgraphic-com-IFlg3kFbR0E-unsplash.jpg │ ├── personalgraphic-com-glY1L-eo0Fc-unsplash.jpg │ ├── pexels-amar-19086869.jpg │ ├── pexels-cottonbro-3661226.jpg │ ├── pexels-damir-10608624.jpg │ ├── pexels-eva-bronzini-5777472.jpg │ ├── pexels-laarkstudio-7186448.jpg │ ├── pexels-laarkstudio-7434298.jpg │ ├── pexels-photoscene-7271951.jpg │ ├── pexels-sirikul-r-2043201-11393334.jpg │ ├── pixmike-t1Lr0BPQfKg-unsplash.jpg │ ├── r-n-tyfqOL1FAQc-unsplash.jpg │ ├── reno-laithienne-kzX7dcKoMTM-unsplash.jpg │ ├── ron-dauphin-UgidX4V13Gc-unsplash.jpg │ ├── ruslan-bardash-4kTbAMRAHtQ-unsplash.jpg │ ├── sami-ahmed-ngS0S-ZjOpc-unsplash.jpg │ ├── santhosh-kumar-RqYTuWkTdEs-unsplash.jpg │ ├── sebastien-goldberg-6b-B6ZphlXo-unsplash.jpg │ ├── shirley-tittermary-JnymxncvrrY-unsplash.jpg │ ├── sincerely-media-MWcwKjwY8yo-unsplash.jpg │ ├── siora-photography-GMWh_I-odL0-unsplash.jpg │ ├── sun-lingyan-_H0fjILH5Vw-unsplash.jpg │ ├── thoa-ngo-AZr6AOMu3l8-unsplash.jpg │ ├── thought-catalog-9aOswReDKPo-unsplash.jpg │ ├── timothy-dykes-LhqLdDPcSV8-unsplash.jpg │ ├── tom-crew-Mz__0nr1AM8-unsplash.jpg │ └── yucel-moran-L0VzWT2Y3K8-unsplash.jpg ├── openimages_classes.txt └── textures │ ├── alexander-grey-LV7lkepljdM-unsplash.jpg │ ├── annie-spratt-pwAvA5CvuS8-unsplash.jpg │ ├── bernard-hermant-X-Bu9X6gok0-unsplash.jpg │ ├── boliviainteligente-zeQ5n-03Y40-unsplash.jpg │ ├── british-library-z0SQJUnOKm8-unsplash.jpg │ ├── clay-banks-VppPygAZKU0-unsplash.jpg │ ├── dominic-phillips-QEVT_XYXKPs-unsplash.jpg │ ├── emily-bernal-r2F5ZIEUPtk-unsplash.jpg │ ├── engin-akyurt-aXVro7lQyUM-unsplash.jpg │ ├── erick-butler-3XQlnryKz0o-unsplash.jpg │ ├── eugene-golovesov-htMfQCwKrro-unsplash.jpg │ ├── fons-heijnsbroek-abstract-art-zleiqVXMqkc-unsplash.jpg │ ├── fruit-basket-agency-caH-ZLrisZA-unsplash.jpg │ ├── george-webster-p1VZ5IbT2Tg-unsplash.jpg │ ├── hans-eiskonen-8Pm_A-OHJGg-unsplash.jpg │ ├── ioana-cristiana-0WW38q7lGZA-unsplash.jpg │ ├── james-lee-vpBPwauyeos-unsplash.jpg │ ├── jean-wimmerlin-dcasj22jmCk-unsplash.jpg │ ├── joel-filipe-Wc8k-KryEPM-unsplash.jpg │ ├── kier-in-sight-archives-shLU6SZfIQY-unsplash.jpg │ ├── krzysztof-kowalik-9pFK1FR_5KQ-unsplash.jpg │ ├── li-zhang-K-DwbsTXliY-unsplash.jpg │ ├── marcus-urbenz-_a7JjjqgurE-unsplash.jpg │ ├── maria-fernanda-pissioli-6BOGBGy2-sU-unsplash.jpg │ ├── mario-losereit-mTZyJeR1Rnc-unsplash.jpg │ ├── martin-neuhold-K8WDdu4FIGk-unsplash.jpg │ ├── maxim-berg-6-NP_CdNqtU-unsplash.jpg │ ├── mihaly-varga-AQFfdEY3X4Q-unsplash.jpg │ ├── mo-U3Kst7MY4Ok-unsplash.jpg │ ├── mockup-graphics-BAcrUzXyg18-unsplash.jpg │ ├── natasha-6N-PvrURkZE-unsplash.jpg │ ├── rene-porter-jQijOI757W0-unsplash.jpg │ ├── ricardo-gomez-angel-2mjl2uvz9ic-unsplash.jpg │ ├── rick-rothenberg-L-SoVuu1dTI-unsplash.jpg │ ├── rivage-mFcsYcSSiMQ-unsplash.jpg │ ├── sarah-claeys-lxw686JyMT8-unsplash.jpg │ ├── sergiu-zarnescu-orhUl3Yk03M-unsplash.jpg │ ├── simon-lee-HmHOhR5meGo-unsplash.jpg │ ├── simon-lee-ORT46c9-AKA-unsplash.jpg │ ├── taylor-ann-art-hsWLFtOeDkg-unsplash.jpg │ ├── tomas-m-thjJIoJhMv4-unsplash.jpg │ ├── vadim-bogulov--PwZWV5AWV0-unsplash.jpg │ └── vinicius-amnx-amano-f9oQZOk9vnk-unsplash.jpg ├── configs ├── infer │ ├── instruct.yaml │ ├── scene.yaml │ ├── texturing.yaml │ └── union.yaml └── train │ ├── clothes.yaml │ ├── instruct.yaml │ ├── scene.yaml │ ├── texturing.yaml │ └── union.yaml ├── data_generation ├── generate_random_images.py ├── generate_scenes_data.py ├── generate_texture_data.py ├── generate_union_data.py ├── remove_backs.py └── words_bank.py ├── kandinsky2 ├── __init__.py ├── configs.py ├── kandinsky2_1_model.py ├── kandinsky2_2_model.py ├── kandinsky2_model.py ├── model │ ├── fp16_util.py │ ├── gaussian_diffusion.py │ ├── losses.py │ ├── model_creation.py │ ├── nn.py │ ├── prior.py │ ├── resample.py │ ├── respace.py │ ├── samplers.py │ ├── text2im_model.py │ ├── text2im_model2_1.py │ ├── text_encoders.py │ ├── unet.py │ └── utils.py ├── train_utils │ ├── data │ │ ├── dataset_prior.py │ │ └── dataset_unclip_2_1.py │ ├── ema.py │ ├── train_module_pl2_1.py │ ├── trainer_2_1_uclip.py │ ├── trainer_prior.py │ └── utils.py ├── utils.py └── vqgan │ ├── autoencoder.py │ ├── movq_modules.py │ ├── quntize.py │ └── vqgan_blocks.py ├── model ├── __init__.py ├── pipeline_pops.py └── pops_utils.py ├── requirements.txt ├── scripts ├── __init__.py ├── infer_binary.py ├── infer_instruct.py └── train.py ├── training ├── __init__.py ├── coach.py ├── dataset.py └── train_config.py └── utils ├── __init__.py ├── clothes_utils.py └── vis_utils.py /README.md: -------------------------------------------------------------------------------- 1 | # pOps: Photo-Inspired Diffusion Operators 2 | 3 | > Elad Richardson, Yuval Alaluf, Ali Mahdavi-Amiri, Daniel Cohen-Or 4 | > Tel Aviv University, Simon Fraser University 5 | > 6 | > Text-guided image generation enables the creation of visual content from textual descriptions. 7 | However, certain visual concepts cannot be effectively conveyed through language alone. This has sparked a renewed interest in utilizing the CLIP image embedding space for more visually-oriented tasks through methods such as IP-Adapter. Interestingly, the CLIP image embedding space has been shown to be semantically meaningful, where linear operations within this space yield semantically meaningful results. Yet, the specific meaning of these operations can vary unpredictably across different images. 8 | To harness this potential, we introduce pOps, a framework that trains specific semantic operators directly on CLIP image embeddings. 9 | Each pOps operator is built upon a pretrained Diffusion Prior model. 10 | While the Diffusion Prior model was originally trained to map between text embeddings and image embeddings, we demonstrate that it can be tuned to accommodate new input conditions, resulting in a diffusion operator. 11 | Working directly over image embeddings not only improves our ability to learn semantic operations but also allows us to directly use a textual CLIP loss as an additional supervision when needed. 12 | We show that pOps can be used to learn a variety of photo-inspired operators with distinct semantic meanings, highlighting the semantic diversity and potential of our proposed approach. 13 | 14 | 15 | 16 | 17 | [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/pOpsPaper/pOps-space) 18 | 19 | 20 | 21 |

22 | 23 |
24 | Different operators trained using pOps. Our method learns operators that are applied directly in the image embedding space, resulting in a variety of semantic operations that can then be realized as images using an image diffusion model. 25 |

26 | 27 | 28 | ## Description :scroll: 29 | Official implementation of the paper "pOps: Photo-Inspired Diffusion Operators" 30 | 31 | ## Getting started with pOps :rocket: 32 | To set up the environment with all necessary dependencies, please run: 33 | ``` 34 | pip install -r requirements.txt 35 | ``` 36 | 37 | 38 | ## Inference 🧑‍🎨 39 | 40 | 41 | 42 | We provide pretrained models for our different operators under an huggingface model card. 43 | 44 | ### Binary Operators 45 | 46 | To run a binary operator, simply use the `scripts.infer_binary` script with the corresponding config file. 47 | 48 | ```bash 49 | python -m scripts.infer_binary --config_path=configs/infer/texturing.yaml 50 | # or 51 | python -m scripts.infer_binary --config_path=configs/infer/union.yaml 52 | # or 53 | python -m scripts.infer_binary --config_path=configs/infer/scene.yaml 54 | ``` 55 | 56 | This will automatically download the pretrained model and run the inference on the default input images. 57 | 58 | Configuration is managed by pyrallis, some useful flags to use with the `scripts.infer_binary` script are: 59 | - `--output_dir_name`: The name of the output directory where the results will be saved. 60 | - `--dir_a`: The path to the directory containing the input images for the first input. 61 | - `--dir_b`: The path to the directory containing the input images for the second input. 62 | - `--vis_mean`: Show results of the mean of the two inputs. 63 | 64 | For compositions of multiple operators note that the inference script outputs both the resulting images and the corresponding clip embeddings. 65 | Thus, you can simply feed a directory of embeddings to either `dir_a` or `dir_b`. Useful filtering flags are: 66 | - `--file_exts_a` (/b): Filter to only `.jpg` images or `.pth` embeddings. 67 | - `--name_filter_a` (/b): Filter to only images with specific names. 68 | 69 | To sample results with missing input conditions, use the `--drop_condition_a` or `--drop_condition_b` flags. 70 | 71 | Finally, to use the IP-Adapter with the inference script, use the `--use_ipadapter` flag and to use additional depth conditioning, use the `--use_depth` flag. 72 | 73 | ### Instruct Operator 74 | 75 | To run the instruct operator, use the `scripts.infer_instruct` script with the corresponding config file. 76 | 77 | ```bash 78 | python -m scripts.infer_instruct --config_path=configs/infer/instruct.yaml 79 | ``` 80 | 81 | ## Training 📉 82 | 83 | ### Data Generation 84 | We provide several scripts for data generation under the `data_generation` directory. 85 | - `generate_textures.py`: Generates textures data. 86 | - `generate_scenes.py`: Generates scenes data. 87 | - `generate_unions.py`: Generates unions data. 88 | 89 | The scene operator also requires random backgrounds which can be generated using the `generate_random_images.py` script. 90 | ```bash 91 | python -m data_generation.generate_random_images --output_dir=datasets/random_backgrounds --type=scenes 92 | ``` 93 | 94 | The `generate_random_images.py` script can also be used to generate random images for the other operators 95 | ```bash 96 | python -m data_generation.generate_random_images --output_dir=datasets/random_images --type=objects 97 | ``` 98 | 99 | These images can be used for the unconditional steps in training, as will be described in the training section. 100 | 101 | ### Training Script 102 | Training itself is managed by the `scripts.train` script. See the `configs/training` directory for the different training configurations. 103 | 104 | ```bash 105 | python -m scripts.train --config_path=configs/training/texturing.yaml 106 | # or 107 | python -m scripts.train --config_path=configs/training/scene.yaml 108 | # or 109 | python -m scripts.train --config_path=configs/training/union.yaml 110 | # or 111 | python -m scripts.train --config_path=configs/training/instruct.yaml 112 | # or 113 | python -m scripts.train --config_path=configs/training/clothes.yaml 114 | ``` 115 | 116 | The operator itself is defined via the `--mode` flag, which can be set to the specific operator. 117 | 118 | Relevant data paths and validation paths can be set in the configuration file. 119 | 120 | Use the optional `randoms_dir` flag to specify the directory of random images for the unconditional steps. 121 | 122 | ## Acknowledgements 123 | Our codebase heavily relies on the [Kandinsky model](https://github.com/ai-forever/Kandinsky-2) 124 | 125 | ## Citation 126 | If you use this code for your research, please cite the following paper: 127 | ``` 128 | @article{richardson2024pops, 129 | title={pOps: Photo-Inspired Diffusion Operators}, 130 | author={Richardson, Elad and Alaluf, Yuval and Mahdavi-Amiri, Ali and Cohen-Or, Daniel}, 131 | journal={arXiv preprint arXiv:2406.01300}, 132 | year={2024} 133 | } 134 | ``` 135 | -------------------------------------------------------------------------------- /assets/OpenSans-Regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/OpenSans-Regular.ttf -------------------------------------------------------------------------------- /assets/README.md: -------------------------------------------------------------------------------- 1 | Images are taken from either [Unsplash](https://unsplash.com/) or [Pexels](https://www.pexels.com/), see the filename for the source. 2 | -------------------------------------------------------------------------------- /assets/backgrounds/11th-gate-e-kl_vOpwLg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/11th-gate-e-kl_vOpwLg-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/adolfo-felix-PG8NyM_Mcts-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/adolfo-felix-PG8NyM_Mcts-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/alexandra-zelena-phskyemu_c4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/alexandra-zelena-phskyemu_c4-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/benjamin-3WdChmuv7mE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/benjamin-3WdChmuv7mE-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/benjamin-child-0sT9YhNgSEs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/benjamin-child-0sT9YhNgSEs-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/carlo-lisa-GHuT3dkZxYM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/carlo-lisa-GHuT3dkZxYM-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/changbok-ko-F8t2VGnI47I-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/changbok-ko-F8t2VGnI47I-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/conscious-design-mLpbHWquEYM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/conscious-design-mLpbHWquEYM-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/diego-jimenez-A-NVHPka9Rk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/diego-jimenez-A-NVHPka9Rk-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/dogancan-ozturan-urY_iHk3nm0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/dogancan-ozturan-urY_iHk3nm0-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/elena-ktenopoulou-cjzV4WK46qY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/elena-ktenopoulou-cjzV4WK46qY-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/evaldas-grizas-9-WvhPP1I6M-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/evaldas-grizas-9-WvhPP1I6M-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/heriberto-garcia-YdjrYLvLO5Y-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/heriberto-garcia-YdjrYLvLO5Y-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/inaki-del-olmo-NIJuEQw0RKg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/inaki-del-olmo-NIJuEQw0RKg-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/jordan-steranka-NcF6MVPW644-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/jordan-steranka-NcF6MVPW644-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/joseph-barrientos-oQl0eVYd_n8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/joseph-barrientos-oQl0eVYd_n8-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/juli-kosolapova-Us_dv71f1bc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/juli-kosolapova-Us_dv71f1bc-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/masjid-pogung-dalangan-8I6hAdjM76Q-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/masjid-pogung-dalangan-8I6hAdjM76Q-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/meritt-thomas-GktK3Jb9BRE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/meritt-thomas-GktK3Jb9BRE-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/nastuh-abootalebi-yWwob8kwOCk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/nastuh-abootalebi-yWwob8kwOCk-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/neom-aWHKsYkbCi8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-aWHKsYkbCi8-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/neom-cYy-o9i8aCs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-cYy-o9i8aCs-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/neom-jTxhUMyPTrE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-jTxhUMyPTrE-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/neom-kXbit_yx8t4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-kXbit_yx8t4-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/presentsquare-cSK33MUaVzQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/presentsquare-cSK33MUaVzQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/robert-lukeman-PH0HYjsf2n8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/robert-lukeman-PH0HYjsf2n8-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/sam-moghadam-khamseh-cuSPt5uP2iQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/sam-moghadam-khamseh-cuSPt5uP2iQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/tj-holowaychuk-1EYMue_AwDw-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/tj-holowaychuk-1EYMue_AwDw-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/toby-elliott-17yY9Lyddrc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/toby-elliott-17yY9Lyddrc-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/yaodan-fc6cEU1sTBY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/yaodan-fc6cEU1sTBY-unsplash.jpg -------------------------------------------------------------------------------- /assets/backgrounds/yevhenii-deshko-Tkh5CmSzmaM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/yevhenii-deshko-Tkh5CmSzmaM-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/alexandr-sadkov-BnG4KWAzt9c-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/alexandr-sadkov-BnG4KWAzt9c-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/allec-gomes-xnRg3xDcNnE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/allec-gomes-xnRg3xDcNnE-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/allenwhm-wh-RgpT4_5g-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/allenwhm-wh-RgpT4_5g-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/alvan-nee-T-0EW-SEbsE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/alvan-nee-T-0EW-SEbsE-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/amit-lahav-rxN2MRdFJVg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/amit-lahav-rxN2MRdFJVg-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/andre-taissin-hnyZg63sRCY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/andre-taissin-hnyZg63sRCY-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/arno-senoner-HFE2RyC76tw-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/arno-senoner-HFE2RyC76tw-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/artem-maltsev-VOO5ojMQ_9A-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/artem-maltsev-VOO5ojMQ_9A-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/aurelia-dubois-xzrJCS4grC4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/aurelia-dubois-xzrJCS4grC4-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/bermix-studio-ZMxHvB9J7YU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/bermix-studio-ZMxHvB9J7YU-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/birmingham-museums-trust-q2OwlfXAYfo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/birmingham-museums-trust-q2OwlfXAYfo-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/bonnie-kittle-MUcxe_wDurE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/bonnie-kittle-MUcxe_wDurE-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/brigitte-tohm-51AK6yJDgv0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/brigitte-tohm-51AK6yJDgv0-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/chastity-cortijo-3OENu9mJ0i0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/chastity-cortijo-3OENu9mJ0i0-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/cici-hung-nV3v8ZMRLNc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/cici-hung-nV3v8ZMRLNc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/claire-abdo-_-635EI3nV8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/claire-abdo-_-635EI3nV8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/coppertist-wu-OrQvIBYNPcw-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-OrQvIBYNPcw-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/coppertist-wu-XlFSnJOeyQs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-XlFSnJOeyQs-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/coppertist-wu-its52T6D4bo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-its52T6D4bo-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/coppertist-wu-mwjuTJzJ9w4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-mwjuTJzJ9w4-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/coppertist-wu-w1AK7wWsTPs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-w1AK7wWsTPs-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/courtney-cook-KcY7DCqc9VI-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/courtney-cook-KcY7DCqc9VI-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/creative-headline-APNnyM36puU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/creative-headline-APNnyM36puU-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/daniel-k-cheung-WJuwxFIpidc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/daniel-k-cheung-WJuwxFIpidc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/daniil-silantev-1P6AnKDw6S8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/daniil-silantev-1P6AnKDw6S8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/dominic-phillips-QEVT_XYXKPs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/dominic-phillips-QEVT_XYXKPs-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/engin-akyurt-TDOClniEwmI-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/engin-akyurt-TDOClniEwmI-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/engin-akyurt-iLHCV4ZBH7s-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/engin-akyurt-iLHCV4ZBH7s-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/erfan-tajik-m_hgaJLqCRM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/erfan-tajik-m_hgaJLqCRM-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/erick-butler-3XQlnryKz0o-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/erick-butler-3XQlnryKz0o-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/erik-mclean-kNRKg1pfiqE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/erik-mclean-kNRKg1pfiqE-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/fatty-corgi-1QsQRkxnU6I-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/fatty-corgi-1QsQRkxnU6I-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/fernando-andrade-Q33VONoOfSU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/fernando-andrade-Q33VONoOfSU-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/frank-zimmermann-xYh8uAwqZK0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/frank-zimmermann-xYh8uAwqZK0-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/gaby-yerden--OIinu1YDTk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/gaby-yerden--OIinu1YDTk-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/gary-walker-jones-uC5jnQPdY7I-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/gary-walker-jones-uC5jnQPdY7I-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/gilbert-beltran-EUQRWgmvhr8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/gilbert-beltran-EUQRWgmvhr8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/giorgio-trovato-p0OlRAAYXLY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/giorgio-trovato-p0OlRAAYXLY-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/hannah-pemberton-3d82e5_ylGo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/hannah-pemberton-3d82e5_ylGo-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/irene-kredenets-tcVH_BwHtrc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/irene-kredenets-tcVH_BwHtrc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/isaac-martin-Jewkfj03OUU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/isaac-martin-Jewkfj03OUU-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/isabela-kronemberger-KaMRBaHSsjM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/isabela-kronemberger-KaMRBaHSsjM-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/ivan-lopatin-PZ2KhQnOZb8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ivan-lopatin-PZ2KhQnOZb8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/ivan-pergasi-CZT7lkrt5sU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ivan-pergasi-CZT7lkrt5sU-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/jessica-tan-Rufz-e6Qrqg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/jessica-tan-Rufz-e6Qrqg-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/joshua-coleman-8V4y-XXT3MQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/joshua-coleman-8V4y-XXT3MQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/juan-mayobre-_IAhW7a4pWA-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/juan-mayobre-_IAhW7a4pWA-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/kate-0I8hNhkFUWQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kate-0I8hNhkFUWQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/kelly-sikkema-LHxU7QYSn7c-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kelly-sikkema-LHxU7QYSn7c-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/kenny-eliason-lbqZUefMLvQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kenny-eliason-lbqZUefMLvQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/kojirou-sasaki-rdLQVeroHQ0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kojirou-sasaki-rdLQVeroHQ0-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/lora-seis-dS5xpjW38Qk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lora-seis-dS5xpjW38Qk-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/lucas-george-wendt-UDWhEik1L1Q-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lucas-george-wendt-UDWhEik1L1Q-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/lucas-george-wendt-lhWaL0pgpEg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lucas-george-wendt-lhWaL0pgpEg-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/lucas-van-oort-Tv9w8mgoVzs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lucas-van-oort-Tv9w8mgoVzs-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/maria-oswalt-GKE6i5mQplI-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/maria-oswalt-GKE6i5mQplI-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mario-losereit-mTZyJeR1Rnc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mario-losereit-mTZyJeR1Rnc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mediamodifier-gAUUqaI83b8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mediamodifier-gAUUqaI83b8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mehmet-keskin-qHdGjahnx48-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mehmet-keskin-qHdGjahnx48-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/milad-fakurian-3CoSLrSrvhY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/milad-fakurian-3CoSLrSrvhY-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/milad-fakurian-gpkPI7qs1cg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/milad-fakurian-gpkPI7qs1cg-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mink-mingle-Riz1qAplMQk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mink-mingle-Riz1qAplMQk-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mo-U3Kst7MY4Ok-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mo-U3Kst7MY4Ok-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mockup-graphics-aUkYaG12Dgs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-aUkYaG12Dgs-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mockup-graphics-biK3YJHhBfM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-biK3YJHhBfM-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mockup-graphics-enNffryKuQI-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-enNffryKuQI-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mockup-graphics-haSJEJYzl5A-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-haSJEJYzl5A-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/morgan-sessions-k5zv-Hv4Kpc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/morgan-sessions-k5zv-Hv4Kpc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/mulyadi-JEfwd_OkQGE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mulyadi-JEfwd_OkQGE-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/naomi-hebert-2dcYhvbHV-M-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/naomi-hebert-2dcYhvbHV-M-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/nguyen-ngoc-tung-ZkG4JdoMANM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/nguyen-ngoc-tung-ZkG4JdoMANM-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/nik-JbAz6eG5GS4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/nik-JbAz6eG5GS4-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/nikolett-emmert-_g2jz1SghvQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/nikolett-emmert-_g2jz1SghvQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/olena-bohovyk-InlMkMNlrhY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/olena-bohovyk-InlMkMNlrhY-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/or-hakim-VQxKattL-X4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/or-hakim-VQxKattL-X4-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/pablo-merchan-montes-_Tw4vCs9C-8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pablo-merchan-montes-_Tw4vCs9C-8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/personalgraphic-com-IFlg3kFbR0E-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/personalgraphic-com-IFlg3kFbR0E-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/personalgraphic-com-glY1L-eo0Fc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/personalgraphic-com-glY1L-eo0Fc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-amar-19086869.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-amar-19086869.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-cottonbro-3661226.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-cottonbro-3661226.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-damir-10608624.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-damir-10608624.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-eva-bronzini-5777472.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-eva-bronzini-5777472.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-laarkstudio-7186448.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-laarkstudio-7186448.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-laarkstudio-7434298.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-laarkstudio-7434298.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-photoscene-7271951.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-photoscene-7271951.jpg -------------------------------------------------------------------------------- /assets/objects/pexels-sirikul-r-2043201-11393334.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-sirikul-r-2043201-11393334.jpg -------------------------------------------------------------------------------- /assets/objects/pixmike-t1Lr0BPQfKg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pixmike-t1Lr0BPQfKg-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/r-n-tyfqOL1FAQc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/r-n-tyfqOL1FAQc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/reno-laithienne-kzX7dcKoMTM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/reno-laithienne-kzX7dcKoMTM-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/ron-dauphin-UgidX4V13Gc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ron-dauphin-UgidX4V13Gc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/ruslan-bardash-4kTbAMRAHtQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ruslan-bardash-4kTbAMRAHtQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/sami-ahmed-ngS0S-ZjOpc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sami-ahmed-ngS0S-ZjOpc-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/santhosh-kumar-RqYTuWkTdEs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/santhosh-kumar-RqYTuWkTdEs-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/sebastien-goldberg-6b-B6ZphlXo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sebastien-goldberg-6b-B6ZphlXo-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/shirley-tittermary-JnymxncvrrY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/shirley-tittermary-JnymxncvrrY-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/sincerely-media-MWcwKjwY8yo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sincerely-media-MWcwKjwY8yo-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/siora-photography-GMWh_I-odL0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/siora-photography-GMWh_I-odL0-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/sun-lingyan-_H0fjILH5Vw-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sun-lingyan-_H0fjILH5Vw-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/thoa-ngo-AZr6AOMu3l8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/thoa-ngo-AZr6AOMu3l8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/thought-catalog-9aOswReDKPo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/thought-catalog-9aOswReDKPo-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/timothy-dykes-LhqLdDPcSV8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/timothy-dykes-LhqLdDPcSV8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/tom-crew-Mz__0nr1AM8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/tom-crew-Mz__0nr1AM8-unsplash.jpg -------------------------------------------------------------------------------- /assets/objects/yucel-moran-L0VzWT2Y3K8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/yucel-moran-L0VzWT2Y3K8-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/alexander-grey-LV7lkepljdM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/alexander-grey-LV7lkepljdM-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/annie-spratt-pwAvA5CvuS8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/annie-spratt-pwAvA5CvuS8-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/bernard-hermant-X-Bu9X6gok0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/bernard-hermant-X-Bu9X6gok0-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/boliviainteligente-zeQ5n-03Y40-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/boliviainteligente-zeQ5n-03Y40-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/british-library-z0SQJUnOKm8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/british-library-z0SQJUnOKm8-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/clay-banks-VppPygAZKU0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/clay-banks-VppPygAZKU0-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/dominic-phillips-QEVT_XYXKPs-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/dominic-phillips-QEVT_XYXKPs-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/emily-bernal-r2F5ZIEUPtk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/emily-bernal-r2F5ZIEUPtk-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/engin-akyurt-aXVro7lQyUM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/engin-akyurt-aXVro7lQyUM-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/erick-butler-3XQlnryKz0o-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/erick-butler-3XQlnryKz0o-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/eugene-golovesov-htMfQCwKrro-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/eugene-golovesov-htMfQCwKrro-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/fons-heijnsbroek-abstract-art-zleiqVXMqkc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/fons-heijnsbroek-abstract-art-zleiqVXMqkc-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/fruit-basket-agency-caH-ZLrisZA-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/fruit-basket-agency-caH-ZLrisZA-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/george-webster-p1VZ5IbT2Tg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/george-webster-p1VZ5IbT2Tg-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/hans-eiskonen-8Pm_A-OHJGg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/hans-eiskonen-8Pm_A-OHJGg-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/ioana-cristiana-0WW38q7lGZA-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/ioana-cristiana-0WW38q7lGZA-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/james-lee-vpBPwauyeos-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/james-lee-vpBPwauyeos-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/jean-wimmerlin-dcasj22jmCk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/jean-wimmerlin-dcasj22jmCk-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/joel-filipe-Wc8k-KryEPM-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/joel-filipe-Wc8k-KryEPM-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/kier-in-sight-archives-shLU6SZfIQY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/kier-in-sight-archives-shLU6SZfIQY-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/krzysztof-kowalik-9pFK1FR_5KQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/krzysztof-kowalik-9pFK1FR_5KQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/li-zhang-K-DwbsTXliY-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/li-zhang-K-DwbsTXliY-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/marcus-urbenz-_a7JjjqgurE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/marcus-urbenz-_a7JjjqgurE-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/maria-fernanda-pissioli-6BOGBGy2-sU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/maria-fernanda-pissioli-6BOGBGy2-sU-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/mario-losereit-mTZyJeR1Rnc-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mario-losereit-mTZyJeR1Rnc-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/martin-neuhold-K8WDdu4FIGk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/martin-neuhold-K8WDdu4FIGk-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/maxim-berg-6-NP_CdNqtU-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/maxim-berg-6-NP_CdNqtU-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/mihaly-varga-AQFfdEY3X4Q-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mihaly-varga-AQFfdEY3X4Q-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/mo-U3Kst7MY4Ok-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mo-U3Kst7MY4Ok-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/mockup-graphics-BAcrUzXyg18-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mockup-graphics-BAcrUzXyg18-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/natasha-6N-PvrURkZE-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/natasha-6N-PvrURkZE-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/rene-porter-jQijOI757W0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/rene-porter-jQijOI757W0-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/ricardo-gomez-angel-2mjl2uvz9ic-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/ricardo-gomez-angel-2mjl2uvz9ic-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/rick-rothenberg-L-SoVuu1dTI-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/rick-rothenberg-L-SoVuu1dTI-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/rivage-mFcsYcSSiMQ-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/rivage-mFcsYcSSiMQ-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/sarah-claeys-lxw686JyMT8-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/sarah-claeys-lxw686JyMT8-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/sergiu-zarnescu-orhUl3Yk03M-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/sergiu-zarnescu-orhUl3Yk03M-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/simon-lee-HmHOhR5meGo-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/simon-lee-HmHOhR5meGo-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/simon-lee-ORT46c9-AKA-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/simon-lee-ORT46c9-AKA-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/taylor-ann-art-hsWLFtOeDkg-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/taylor-ann-art-hsWLFtOeDkg-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/tomas-m-thjJIoJhMv4-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/tomas-m-thjJIoJhMv4-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/vadim-bogulov--PwZWV5AWV0-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/vadim-bogulov--PwZWV5AWV0-unsplash.jpg -------------------------------------------------------------------------------- /assets/textures/vinicius-amnx-amano-f9oQZOk9vnk-unsplash.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/vinicius-amnx-amano-f9oQZOk9vnk-unsplash.jpg -------------------------------------------------------------------------------- /configs/infer/instruct.yaml: -------------------------------------------------------------------------------- 1 | prior_path: models/instruct/learned_prior.pth 2 | prior_repo: "pOpsPaper/operators" 3 | dir_a: assets/objects 4 | prior_guidance_scale: [1] 5 | output_dir_name: inference/instruct_results 6 | texts: ['shiny', 'enormous', 'aged'] -------------------------------------------------------------------------------- /configs/infer/scene.yaml: -------------------------------------------------------------------------------- 1 | prior_path: models/scene/learned_prior.pth 2 | prior_repo: "pOpsPaper/operators" 3 | dir_a: assets/objects 4 | dir_b: assets/backgrounds 5 | prior_guidance_scale: [1] 6 | output_dir_name: inference/scene_results -------------------------------------------------------------------------------- /configs/infer/texturing.yaml: -------------------------------------------------------------------------------- 1 | prior_path: models/texturing/learned_prior.pth 2 | prior_repo: "pOpsPaper/operators" 3 | dir_a: assets/objects 4 | dir_b: assets/textures 5 | prior_guidance_scale: [8] 6 | output_dir_name: inference/texturing_results -------------------------------------------------------------------------------- /configs/infer/union.yaml: -------------------------------------------------------------------------------- 1 | prior_path: models/union/learned_prior.pth 2 | prior_repo: "pOpsPaper/operators" 3 | dir_a: assets/objects 4 | dir_b: assets/objects 5 | prior_guidance_scale: [8] 6 | output_dir_name: inference/union_results -------------------------------------------------------------------------------- /configs/train/clothes.yaml: -------------------------------------------------------------------------------- 1 | mode: 'clothes' 2 | output_dir: 'results/clothes_experiment' -------------------------------------------------------------------------------- /configs/train/instruct.yaml: -------------------------------------------------------------------------------- 1 | mode: 'instruct' 2 | dataset_path: 'datasets/generated/random_objects' 3 | val_dataset_path: 'assets/objects' 4 | output_dir: 'results/instruct_experiment' -------------------------------------------------------------------------------- /configs/train/scene.yaml: -------------------------------------------------------------------------------- 1 | mode: 'scene' 2 | dataset_path: 'datasets/generated/scenes_data' 3 | backgrounds_dir: 'datasets/generated/random_backgrounds' 4 | val_dataset_path: 'assets/objects' 5 | # Reusing the same textures_dir argument for validation 6 | textures_dir: 'assets/backgrounds' 7 | output_dir: 'results/scene_experiment' -------------------------------------------------------------------------------- /configs/train/texturing.yaml: -------------------------------------------------------------------------------- 1 | mode: 'texture' 2 | dataset_path: 'datasets/generated/texture_data' 3 | val_dataset_path: 'assets/objects' 4 | textures_dir: 'assets/textures' 5 | output_dir: 'results/texturing_experiment' -------------------------------------------------------------------------------- /configs/train/union.yaml: -------------------------------------------------------------------------------- 1 | mode: 'union' 2 | dataset_path: 'datasets/generated/union_data' 3 | val_dataset_path: 'assets/objects' 4 | output_dir: 'results/union_experiment' -------------------------------------------------------------------------------- /data_generation/generate_random_images.py: -------------------------------------------------------------------------------- 1 | import random 2 | from pathlib import Path 3 | 4 | from diffusers import StableDiffusionXLPipeline 5 | 6 | from data_generation import words_bank 7 | from dataclasses import dataclass 8 | import pyrallis 9 | 10 | 11 | @dataclass 12 | class RunConfig: 13 | # Generation mode, should be either 'objects' or 'scenes' 14 | type: str = 'objects' 15 | out_dir: Path = Path('datasets/generated/random_objects') 16 | n_images: int = 100000 17 | 18 | 19 | @pyrallis.wrap() 20 | def generate(cfg: RunConfig): 21 | sdxl_pipe = StableDiffusionXLPipeline.from_pretrained( 22 | "stabilityai/sdxl-turbo", use_safetensors=True 23 | ).to("cuda") 24 | 25 | if cfg.type == 'objects': 26 | generate_objects = True 27 | elif cfg.type == 'scenes': 28 | generate_objects = False 29 | else: 30 | raise ValueError(f"Invalid type {cfg.type}") 31 | 32 | cfg.out_dir.mkdir(exist_ok=True, parents=True) 33 | 34 | if generate_objects: 35 | with open('assets/openimages_classes.txt', 'r') as f: 36 | objects = f.read().splitlines() 37 | 38 | for _ in range(cfg.n_images): 39 | try: 40 | placement = random.choice(words_bank.placements) if random.random() < 0.5 else '' 41 | if cfg.type == 'objects': 42 | object_name = random.choice(objects) 43 | object_name = ''.join(char if char.isalnum() else ' ' for char in object_name) 44 | prompt = f"A photo of a {object_name} {placement}" 45 | else: 46 | object_name = '' 47 | prompt = f"A photo of an empty {placement.split(' ')[-1]}" 48 | seed = random.randint(0, 1000000) 49 | 50 | base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0] 51 | 52 | out_path = cfg.out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg' 53 | base_image.save(out_path) 54 | except Exception as e: 55 | print(e) 56 | 57 | 58 | if __name__ == "__main__": 59 | # Use to generate objects or backgrounds 60 | generate(generate_objects=True) 61 | -------------------------------------------------------------------------------- /data_generation/generate_scenes_data.py: -------------------------------------------------------------------------------- 1 | import random 2 | from pathlib import Path 3 | 4 | import numpy as np 5 | import torch 6 | import transformers 7 | from PIL import Image, ImageFilter 8 | from diffusers import StableDiffusionXLPipeline, AutoPipelineForInpainting 9 | 10 | from data_generation import words_bank 11 | 12 | 13 | def main(): 14 | sdxl_pipe = StableDiffusionXLPipeline.from_pretrained( 15 | "stabilityai/sdxl-turbo", use_safetensors=True 16 | ).to("cuda") 17 | 18 | bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True) 19 | inpaint_pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1", 20 | torch_dtype=torch.float16, variant="fp16").to("cuda") 21 | 22 | with open('assets/openimages_classes.txt', 'r') as f: 23 | objects = f.read().splitlines() 24 | 25 | out_dir = Path(f'datasets/generated/scenes_data/') 26 | out_dir.mkdir(exist_ok=True, parents=True) 27 | 28 | for _ in range(100000): 29 | try: 30 | object_name = random.choice(objects) 31 | # Remove special characters 32 | object_name = ''.join(char if char.isalnum() else ' ' for char in object_name) 33 | # Restrict to two words 34 | object_name = ' '.join(object_name.split()[:2]) 35 | placement = random.choice(words_bank.placements) if random.random() < 0.5 else '' 36 | 37 | prompt = f"A photo of {object_name} {placement}" 38 | seed = random.randint(0, 1000000) 39 | 40 | base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0] 41 | 42 | tmp_path = 'tmp.jpg' 43 | base_image.save(tmp_path) 44 | crop_mask = bria_pipe(tmp_path, return_mask=True) # Retuns a PIL mask 45 | 46 | # Dilate mask 47 | crop_mask_for_inpaint = np.array(crop_mask) 48 | crop_mask_for_inpaint[crop_mask_for_inpaint > 10] = 255 49 | crop_mask_for_inpaint[crop_mask_for_inpaint <= 10] = 0 50 | crop_mask_for_inpaint = Image.fromarray(crop_mask_for_inpaint).filter(ImageFilter.MaxFilter(31)) 51 | crop_mask_for_inpaint = crop_mask_for_inpaint.convert("RGB") 52 | 53 | inpainted_image = inpaint_pipe( 54 | prompt=f'A photo of empty {placement.split(" ")[-1]}', 55 | image=base_image, 56 | mask_image=crop_mask_for_inpaint, 57 | guidance_scale=8.0, 58 | num_inference_steps=20, 59 | strength=1.0, 60 | ).images[0] 61 | 62 | # Restrict object_name to 50 characters 63 | object_name = object_name[:50] 64 | out_path = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg' 65 | base_image.save(out_path) 66 | 67 | out_path = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}_inpainted.jpg' 68 | inpainted_image.save(out_path) 69 | 70 | np.save(out_path.with_suffix('.npy'), np.array(crop_mask).astype(np.uint8)) 71 | 72 | 73 | except Exception as e: 74 | print(f'Error: {e}') 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /data_generation/generate_texture_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | from pathlib import Path 4 | 5 | import torch 6 | import transformers 7 | from diffusers import StableDiffusionDepth2ImgPipeline, StableDiffusionXLPipeline 8 | from transformers import Owlv2Processor, Owlv2ForObjectDetection 9 | 10 | from data_generation import words_bank 11 | 12 | 13 | def main(): 14 | sdxl_pipe = StableDiffusionXLPipeline.from_pretrained( 15 | "stabilityai/sdxl-turbo", use_safetensors=True 16 | ).to("cuda") 17 | 18 | pipe = StableDiffusionDepth2ImgPipeline.from_pretrained( 19 | "stabilityai/stable-diffusion-2-depth", torch_dtype=torch.float16, ).to("cuda") 20 | 21 | processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble") 22 | model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble") 23 | 24 | bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True) 25 | 26 | out_dir = Path(f'datasets/generated/texture_data') 27 | out_dir.mkdir(exist_ok=True, parents=True) 28 | 29 | with open('assets/openimages_classes.txt', 'r') as f: 30 | objects = f.read().splitlines() 31 | 32 | for _ in range(100000): 33 | object_name = random.choice(objects) 34 | # Remove special characters 35 | object_name = ''.join(char if char.isalnum() else ' ' for char in object_name) 36 | # Restrict to two words 37 | object_name = ' '.join(object_name.split()[:2]) 38 | placement = random.choice(words_bank.placements) if random.random() < 0.5 else '' 39 | prompt = f"A {object_name} {placement}" 40 | 41 | seed = random.randint(0, 1000000) 42 | object_out_dir = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}' 43 | if object_out_dir.exists(): 44 | continue 45 | object_out_dir.mkdir(exist_ok=True, parents=True) 46 | 47 | base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0] 48 | out_path = object_out_dir / f'base.jpg' 49 | base_image.save(out_path) 50 | 51 | # Find box 52 | texts = [[f"a {object_name}"]] 53 | inputs = processor(text=texts, images=base_image, return_tensors="pt") 54 | outputs = model(**inputs) 55 | 56 | # Target image sizes (height, width) to rescale box predictions [batch_size, 2] 57 | target_sizes = torch.Tensor([base_image.size[::-1]]) 58 | # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax) 59 | results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1) 60 | 61 | i = 0 # Retrieve predictions for the first image for the corresponding text queries 62 | boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"] 63 | # Filter to box with max score 64 | if len(boxes) == 0: 65 | scores = torch.tensor([1.0]) 66 | boxes = torch.tensor([[0, 0, base_image.size[0], base_image.size[1]]]) 67 | 68 | max_score_idx = scores.argmax() 69 | box = boxes[max_score_idx] 70 | 71 | # Save box to json file 72 | box_dict = { 73 | "x1": int(box[0].item()), 74 | "y1": int(box[1].item()), 75 | "x2": int(box[2].item()), 76 | "y2": int(box[3].item()), 77 | } 78 | crop = base_image.crop((box_dict['x1'], box_dict['y1'], box_dict['x2'], box_dict['y2'])) 79 | tmp_path = 'tmp.jpg' 80 | crop.save(tmp_path) 81 | crop_mask = bria_pipe(tmp_path, return_mask=True) 82 | crop_mask.save(object_out_dir / 'mask.png') 83 | with open(object_out_dir / 'box.json', 'w') as f: 84 | json.dump(box_dict, f) 85 | 86 | for _ in range(5): 87 | num_samples = random.randint(1, 5) 88 | sample_attributes = random.sample(words_bank.texture_attributes, num_samples) 89 | prompt = f"A {object_name} made from {' '.join(sample_attributes)} {placement}" 90 | n_propmt = "bad, deformed, ugly, bad anotomy" 91 | seed = random.randint(0, 1000000) 92 | image = pipe(prompt=prompt, image=base_image, negative_prompt=n_propmt, strength=1.0, 93 | generator=torch.Generator().manual_seed(seed)).images[0] 94 | attrs_str = '_'.join(sample_attributes)[:100] 95 | out_path = object_out_dir / f'{attrs_str}_{seed}.jpg' 96 | image.save(out_path) 97 | 98 | 99 | if __name__ == "__main__": 100 | main() 101 | -------------------------------------------------------------------------------- /data_generation/generate_union_data.py: -------------------------------------------------------------------------------- 1 | import json 2 | import random 3 | from pathlib import Path 4 | 5 | import numpy as np 6 | import torch 7 | import transformers 8 | from diffusers import StableDiffusionXLPipeline 9 | from transformers import Owlv2Processor, Owlv2ForObjectDetection 10 | 11 | from data_generation import words_bank 12 | 13 | 14 | def main(): 15 | sdxl_pipe = StableDiffusionXLPipeline.from_pretrained( 16 | "stabilityai/sdxl-turbo", use_safetensors=True 17 | ).to("cuda") 18 | 19 | processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble") 20 | model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble") 21 | 22 | bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True) 23 | 24 | out_dir = Path(f'datasets/generated/union_data') 25 | out_dir.mkdir(exist_ok=True, parents=True) 26 | 27 | # Generated images that did not meet the criteria 28 | out_dir_leftovers = Path(f'datasets/generated/union_data_leftovers') 29 | out_dir_leftovers.mkdir(exist_ok=True, parents=True) 30 | 31 | with open('assets/openimages_classes.txt', 'r') as f: 32 | objects = f.read().splitlines() 33 | 34 | base_image = None 35 | saved_base = False 36 | for _ in range(100000): 37 | if base_image is not None and not saved_base: 38 | alternative_path = out_dir_leftovers / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg' 39 | base_image.save(alternative_path) 40 | saved_base = False 41 | try: 42 | current_objects = [] 43 | object_count = 2 44 | for _ in range(object_count): 45 | object_name = random.choice(objects) 46 | # Remove '/' or any non english characters 47 | object_name = ''.join(char if char.isalnum() else ' ' for char in object_name) 48 | # Restrict to two words 49 | object_name = ' '.join(object_name.split()[:2]) 50 | current_objects.append(object_name) 51 | object_name = ' and a '.join(current_objects) 52 | print(object_name) 53 | placement = random.choice(words_bank.placements) if random.random() < 0.5 else '' 54 | 55 | prompt = f"A photo of {object_name} {placement}" 56 | seed = random.randint(0, 1000000) 57 | 58 | base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0] 59 | object_name = object_name[:50] 60 | out_path = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg' 61 | 62 | # Try to detect the objects in the generated image 63 | texts = [[f"a {obj}" for obj in current_objects]] 64 | inputs = processor(text=texts, images=base_image, return_tensors="pt") 65 | with torch.no_grad(): 66 | outputs = model(**inputs) 67 | 68 | # Target image sizes (height, width) to rescale box predictions [batch_size, 2] 69 | target_sizes = torch.Tensor([base_image.size[::-1]]) 70 | # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax) 71 | results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, 72 | threshold=0.2) 73 | 74 | i = 0 # Retrieve predictions for the first image for the corresponding text queries 75 | boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"] 76 | 77 | chosen_boxes = [] 78 | # Take highest scoring box for each label 79 | for obj_ind in range(len(current_objects)): 80 | relevant_boxes = boxes[labels == obj_ind] 81 | relevant_scores = scores[labels == obj_ind] 82 | if len(relevant_boxes) > 0: 83 | max_score_idx = relevant_scores.argmax() 84 | max_box = relevant_boxes[max_score_idx] 85 | if relevant_scores[max_score_idx] < 0.2: 86 | break 87 | chosen_boxes.append(max_box) 88 | 89 | # Filter to box with max score 90 | if len(chosen_boxes) != 2: 91 | print(f'Skipping, detected {len(chosen_boxes)} objects') 92 | continue 93 | 94 | # Verify small overlap between two using IoU 95 | box1, box2 = chosen_boxes 96 | box1 = box1.int() 97 | box2 = box2.int() 98 | x1 = max(box1[0], box2[0]) 99 | y1 = max(box1[1], box2[1]) 100 | x2 = min(box1[2], box2[2]) 101 | y2 = min(box1[3], box2[3]) 102 | intersection = max(0, x2 - x1) * max(0, y2 - y1) 103 | area1 = (box1[2] - box1[0]) * (box1[3] - box1[1]) 104 | area2 = (box2[2] - box2[0]) * (box2[3] - box2[1]) 105 | union = area1 + area2 - intersection 106 | iou = float(intersection / union) 107 | if iou > 0.1: 108 | print(f'Skipping, iou is {iou}') 109 | continue 110 | 111 | metadata = {'objects': [], 'placement': placement, 'prompt': prompt} 112 | masked_objects = [] 113 | for obj_ind, box in enumerate(chosen_boxes): 114 | box = box.int() 115 | obj_name = current_objects[obj_ind] 116 | box_dict = { 117 | "x1": int(box[0].item()), 118 | "y1": int(box[1].item()), 119 | "x2": int(box[2].item()), 120 | "y2": int(box[3].item()), 121 | "name": obj_name 122 | } 123 | object_size = (box_dict['x2'] - box_dict['x1']) * (box_dict['y2'] - box_dict['y1']) 124 | if object_size < 5000: 125 | print(f'Skipping, object size is {object_size}') 126 | continue 127 | metadata['objects'].append(box_dict) 128 | crop = base_image.crop((box_dict['x1'], box_dict['y1'], box_dict['x2'], box_dict['y2'])) 129 | tmp_path = 'tmp.jpg' 130 | crop.save(tmp_path) 131 | with torch.no_grad(): 132 | masked_object = bria_pipe(tmp_path) 133 | # Returns RGBA, take mask channel and check how many pixels 134 | crop_mask = np.array(masked_object)[..., 3] 135 | 136 | # Make sure at least 10000 pixels are non-masked 137 | # Check if over half of the pixels seen 138 | total_pixels = crop_mask.size 139 | seen_pixels = np.sum(crop_mask > 200) 140 | if seen_pixels / total_pixels < 0.1: 141 | print(f'Skipping, not enough pixels seen. only {seen_pixels / total_pixels:.3f} seen') 142 | continue 143 | masked_objects.append(masked_object) 144 | if len(masked_objects) != len(chosen_boxes): 145 | continue 146 | for obj_ind, masked_object in enumerate(masked_objects): 147 | masked_object.save(str(out_path).replace('.jpg', f'_OBJ_{obj_ind}.png')) 148 | with open(out_path.with_suffix('.json'), 'w') as f: 149 | json.dump(metadata, f) 150 | 151 | base_image.save(out_path) 152 | saved_base = True 153 | 154 | except ValueError as e: 155 | print(f'Error: {e}') 156 | 157 | 158 | if __name__ == "__main__": 159 | main() 160 | -------------------------------------------------------------------------------- /data_generation/remove_backs.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import transformers 4 | from PIL import Image 5 | 6 | 7 | def main(): 8 | # base_image = Image.open("assets/template_images/mug.png") 9 | bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True) 10 | 11 | source_dir = Path('assets/objects') 12 | out_dir = Path(f'assets/objects_no_back') 13 | out_dir.mkdir(exist_ok=True, parents=True) 14 | 15 | for path in source_dir.glob('*.jpg'): 16 | try: 17 | base_image = Image.open(path).resize((512, 512)) 18 | tmp_path = 'tmp.jpg' 19 | base_image.save(tmp_path) 20 | crop_mask = bria_pipe(tmp_path, return_mask=True) # Retuns a PIL mask 21 | # Apply mask on base_image 22 | base_image = Image.composite(base_image, Image.new('RGB', base_image.size, (255, 255, 255)), crop_mask) 23 | base_image.save(out_dir / path.name) 24 | except Exception as e: 25 | print(e) 26 | 27 | 28 | if __name__ == "__main__": 29 | main() 30 | -------------------------------------------------------------------------------- /data_generation/words_bank.py: -------------------------------------------------------------------------------- 1 | placements = ['on a table', 'on a shelf', 'on a wooden table', 'on the street', 'on the forest bed', 'on the floor', 2 | 'on the ground', 'on the grass', 'on the sand', 'on the beach', 'on the shore', 'on the pavement', 3 | 'in a room', 'in a living room', 'in a bedroom', 'in a kitchen', 'in a library', 'in a bathroom', 4 | 'in a garden', 'in a park', 'in a library', 'in a bathroom', 'in a garden', 'in a park', 'in the office', 5 | 'in the classroom', 'in the cafe', 'in the restaurant', 'on the balcony', 'on the rooftop', 6 | 'in the hallway', 'on the staircase', 'in the elevator', 'in the lobby', 'in the garage', 7 | 'in the basement', 'in the attic', 'on the porch', 'on the deck', 'on the patio', 'on the mountain', 8 | 'in the valley', 'in the forest', 'in the jungle', 'on the riverbank', 'at the waterfall', 'by the lake', 9 | 'by the pond', 'in the desert', 'in the canyon', 'on the glacier', 'by the iceberg', 'in the field', 10 | 'on the farm', 'in the vineyard', 'in the orchard', 'at the playground', 'in the stadium', 'in the arena', 11 | 'in the gym', 'in the pool', 'in the sauna', 'in the spa', 'in the beauty salon', 'in the bookstore', 12 | 'in the grocery store', 'in the market', 'in the mall', 'in the theater', 'in the cinema', 13 | 'in the studio', 'in the gallery', 'in the museum', 'at the aquarium', 'at the zoo', 14 | 'at the amusement park', 'at the campsite', 'in the RV park', 'at the resort', 'in the hotel', 15 | 'in the motel', 'in the inn', 'in the lodge', 'in the hostel', 'at the airport', 'at the train station', 16 | 'at the bus station', 'at the port', 'in the church', 'in the temple', 'in the mosque', 17 | 'in the synagogue', 'at the university', 'at the school', 'in the daycare', 'in the nursery', 18 | 'in the factory', 'in the warehouse', 'in the office building', 'in the skyscraper', 'in the cottage', 19 | 'in the bungalow', 'in the mansion', 'in the castle'] 20 | 21 | texture_attributes = ['glossy', 'shiny', 'matte', 'rough', 'bumpy', 'smooth', 'shimmering', 'sparkling', 'dusty', 22 | 'fuzzy', 'soft', 'hard', 'brittle', 'flexible', 'elastic', 'stiff', 'rigid', 'tough', 'flimsy', 23 | 'delicate', 'fragile', 'sturdy', 'solid', 'hollow', 'dense', 'light', 'heavy', 'grainy', 24 | 'slippery', 'sticky', 'warm', 'cool', 'metallic', 'pearlescent', 'satin', 'dull', 'reflective', 25 | 'translucent', 'opaque', 'homogeneous', 'heterogeneous', 'veined', 'flat', 'raised', 'textured', 26 | 'rugged', 'water-resistant', 'light-absorbing', 'dust-repellent', 'cozy', 'luxurious', 'rustic', 27 | 'weathered', 'polished', 'aged', 'green', 'brown', 'yellow', 'orange', 'red', 'blue', 'purple', 28 | 'pink', 'white', 'black', 'grey', 'silver', 'gold', 'cyan', 'magenta', 'turquoise', 'ivory', 29 | 'tan', 'beige', 'navy', 'maroon', 'charcoal', 'teal', 'olive', 'peach', 'lavender', 'uniform', 30 | 'abstract', 'geometric', 'random', 'symmetrical', 'asymmetrical', 'striped', 'checked', 'paisley', 31 | 'plaid', 'houndstooth', 'polka-dot', 'herringbone', 'chevron', 'argyle', 'non-reflective', 32 | 'marbled', 'grained', 'streaked', 'spotted', 'speckled', 'dappled', 'mottled', 'flecked', 33 | 'patched', 'layered', 'deep', 'shallow', 'pitted', 'embossed', 'engraved', 'new', 'old', 'worn', 34 | 'damaged', 'antique', 'vintage', 'distressed', 'refurbished', 'restored', 'pristine', 35 | 'immaculate', 'tarnished', 'faded', 'marble', 'granite', 'wood', 'metal', 'glass', 'plastic', 36 | 'fabric', 'leather', 'paper', 'cardboard', 'concrete', 'brick', 'stone', 'sand', 'dirt', 'mud', 37 | 'clay', 'ceramic', 'porcelain', 'rubber', 'sponge', 'foam', 'felt', 'velvet', 'silk', 'cotton', 38 | 'wool', 'linen', 'denim', 'lace', 'tweed', 'nylon', 'polyester', 'acrylic', 'spandex', 'suede', 39 | 'mesh', 'bamboo', 'hemp', 'leaf pattern', 'floral pattern', 'animal pattern', 'zebra pattern', 40 | 'tiger pattern', 'leopard pattern', 'cheetah pattern', 'giraffe pattern', 'snake pattern', 41 | 'crocodile pattern', 'camouflage pattern', 'mosaic pattern', 'kaleidoscope pattern', 42 | 'mandala pattern', 'tartan pattern', 'batik pattern', 'ikat pattern', 'quilted pattern', 43 | 'glittering', 'mirrored', 'satin-finish', 'carbon fiber', 'knitted', 'crocheted', 'embroidered', 44 | 'pleated', 'crinkled', 'crumpled', 'woven', 'braided', 'perforated', 'padded', 'quilted', 45 | 'thermal', 'insulated', 'gauzy', 'translucent-finish', 'iridescent', 'opalescent', 'neon', 46 | 'pastel', 'vibrant', 'dull-finish', 'chalky', 'silky-smooth', 'rubbery', 'gummy', 'waxy', 'oily', 47 | 'soapy', 'milky', 'crystal-clear', 'frosted', 'etched', 'blurred', 'swirled', 'twisted', 'coiled', 48 | 'looped', 'interwoven', 'knotted', 'spiral', 'diagonal', 'crosshatched', 'lacy', 'beaded', 49 | 'sequined', 'flocked', 'brushed metal', 'anodized', 'galvanized', 'powder-coated', 'acid-washed', 50 | 'sun-bleached', 'peeling', 'cracked', 'chipped', 'burnished', 'oxidized', 'corroded', 'stained', 51 | 'dyed', 'tie-dye pattern', 'ombre', 'gradient', 'speckled paint', 'splattered paint', 52 | 'marbleized', 'woodgrain', 'cork', 'terrazzo', 'bamboo texture', 'reed', 'sisal', 'sea grass', 53 | 'jute', 'chalkboard', 'magnetic', 'glazed', 'unglazed', 'raw', 'burnt', 'smoked', 'sanded', 54 | 'planed', 'rough-cut', 'varnished', 'unvarnished', 'waxed', 'oil-finished', 'shellac-finished', 55 | 'lacquered', 'patina', 'brushed', 'hammered', 'spun', 'wrought', 'forged', 'cast', 'molded', 56 | '3D printed', 'laminated', 'veneered', 'inlaid', 'gilded', 'silvered', 'leafed', 'foiled', 57 | 'embossed pattern', 'debossed pattern', 'puzzle pattern', 'geometric pattern', 58 | 'optical illusion pattern', 'holographic pattern', 'psychedelic pattern', 'pop art pattern', 59 | 'art deco pattern', 'Victorian pattern', 'Baroque pattern', 'Renaissance pattern', 60 | 'gothic pattern', 'Celtic pattern', 'tribal pattern', 'Ethnic pattern', 'folk pattern', 61 | 'historical pattern'] 62 | 63 | adjectives = ['rotten', 'big', 'small', 'many', 'burning', 'melting', 'shattered', 'dried', 'sliced', 'moldy', 64 | 'glistening', 'fluffy', 'plush', 'opaque', 'wrinkled', 'frosted', 'antique', 'futuristic', 'cracked', 65 | 'glowing', 'glossy', 'translucent', 'gothic', 'young', 'old', 'rustic', 'two', 'multiple', 'group', 66 | 'shiny', 'dull', 'colorful', 'floating', 'winged', 'soggy', 'ancient', 'tiny', 'enormous', 'skeletal', 67 | 'hairless', 'furry', 'grimy', 'frozen', 'dusty', 'muddy', 'bubbly', 'spiky', 'slimy', 'scaly', 'feathery', 68 | 'hairy', 'fuzzy', 'smooth', 'rough', 'gleaming', 'heavy', 'wet', 'dry', 'aged', 'transparent', 'empty', 69 | 'full'] 70 | 71 | art_types = ['photo', 'painting', 'sketch', 'sculpture', 'photograph', 'drawing', 'tapestry', 'mosaic', 'carving', 72 | 'pottery', 'ceramic', 'origami', 'stained glass', 'engraving', 'watercolor painting', 'oil painting', 73 | 'acrylic painting', 'charcoal drawing', 'pencil drawing', 'pastel drawing', 'ink drawing', 74 | 'digital painting', 'collage', 'mixed media', 'woodcut', 'lithograph', 'etching', 'engraving', 75 | 'comic drawing', 'cartoon', 'animation', 'illustration', 'concept art', 'conceptual art', '3D rendering', 76 | 'digital art', 'traditional art', 'abstract art', 'realistic art', 'minimalist art', 'abstract art' 77 | ] 78 | -------------------------------------------------------------------------------- /kandinsky2/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | from huggingface_hub import hf_hub_url, cached_download 3 | from copy import deepcopy 4 | from omegaconf.dictconfig import DictConfig 5 | 6 | from .configs import CONFIG_2_0, CONFIG_2_1 7 | from .kandinsky2_model import Kandinsky2 8 | from .kandinsky2_1_model import Kandinsky2_1 9 | from .kandinsky2_2_model import Kandinsky2_2 10 | 11 | 12 | def get_kandinsky2_0( 13 | device, 14 | task_type="text2img", 15 | cache_dir="/tmp/kandinsky2", 16 | use_auth_token=None, 17 | ): 18 | cache_dir = os.path.join(cache_dir, "2_0") 19 | config = deepcopy(CONFIG_2_0) 20 | if task_type == "inpainting": 21 | model_name = "Kandinsky-2-0-inpainting.pt" 22 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=model_name) 23 | elif task_type == "text2img": 24 | model_name = "Kandinsky-2-0.pt" 25 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=model_name) 26 | else: 27 | raise ValueError("Only text2img, img2img and inpainting is available") 28 | 29 | cached_download( 30 | config_file_url, 31 | cache_dir=cache_dir, 32 | force_filename=model_name, 33 | use_auth_token=use_auth_token, 34 | ) 35 | 36 | cache_dir_text_en1 = os.path.join(cache_dir, "text_encoder1") 37 | for name in [ 38 | "config.json", 39 | "pytorch_model.bin", 40 | "sentencepiece.bpe.model", 41 | "special_tokens_map.json", 42 | "tokenizer.json", 43 | "tokenizer_config.json", 44 | ]: 45 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=f"text_encoder1/{name}") 46 | cached_download( 47 | config_file_url, 48 | cache_dir=cache_dir_text_en1, 49 | force_filename=name, 50 | use_auth_token=use_auth_token, 51 | ) 52 | 53 | cache_dir_text_en2 = os.path.join(cache_dir, "text_encoder2") 54 | for name in [ 55 | "config.json", 56 | "pytorch_model.bin", 57 | "spiece.model", 58 | "special_tokens_map.json", 59 | "tokenizer_config.json", 60 | ]: 61 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=f"text_encoder2/{name}") 62 | cached_download( 63 | config_file_url, 64 | cache_dir=cache_dir_text_en2, 65 | force_filename=name, 66 | use_auth_token=use_auth_token, 67 | ) 68 | 69 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename="vae.ckpt") 70 | cached_download( 71 | config_file_url, 72 | cache_dir=cache_dir, 73 | force_filename="vae.ckpt", 74 | use_auth_token=use_auth_token, 75 | ) 76 | 77 | config["text_enc_params1"]["model_path"] = cache_dir_text_en1 78 | config["text_enc_params2"]["model_path"] = cache_dir_text_en2 79 | config["tokenizer_name1"] = cache_dir_text_en1 80 | config["tokenizer_name2"] = cache_dir_text_en2 81 | config["image_enc_params"]["params"]["ckpt_path"] = os.path.join( 82 | cache_dir, "vae.ckpt" 83 | ) 84 | unet_path = os.path.join(cache_dir, model_name) 85 | 86 | model = Kandinsky2(config, unet_path, device, task_type) 87 | return model 88 | 89 | 90 | def get_kandinsky2_1( 91 | device, 92 | task_type="text2img", 93 | cache_dir="/tmp/kandinsky2", 94 | use_auth_token=None, 95 | use_flash_attention=False, 96 | ): 97 | cache_dir = os.path.join(cache_dir, "2_1") 98 | config = DictConfig(deepcopy(CONFIG_2_1)) 99 | config["model_config"]["use_flash_attention"] = use_flash_attention 100 | if task_type == "text2img": 101 | model_name = "decoder_fp16.ckpt" 102 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=model_name) 103 | elif task_type == "inpainting": 104 | model_name = "inpainting_fp16.ckpt" 105 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=model_name) 106 | cached_download( 107 | config_file_url, 108 | cache_dir=cache_dir, 109 | force_filename=model_name, 110 | use_auth_token=use_auth_token, 111 | ) 112 | prior_name = "prior_fp16.ckpt" 113 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=prior_name) 114 | cached_download( 115 | config_file_url, 116 | cache_dir=cache_dir, 117 | force_filename=prior_name, 118 | use_auth_token=use_auth_token, 119 | ) 120 | 121 | cache_dir_text_en = os.path.join(cache_dir, "text_encoder") 122 | for name in [ 123 | "config.json", 124 | "pytorch_model.bin", 125 | "sentencepiece.bpe.model", 126 | "special_tokens_map.json", 127 | "tokenizer.json", 128 | "tokenizer_config.json", 129 | ]: 130 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=f"text_encoder/{name}") 131 | cached_download( 132 | config_file_url, 133 | cache_dir=cache_dir_text_en, 134 | force_filename=name, 135 | use_auth_token=use_auth_token, 136 | ) 137 | 138 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename="movq_final.ckpt") 139 | cached_download( 140 | config_file_url, 141 | cache_dir=cache_dir, 142 | force_filename="movq_final.ckpt", 143 | use_auth_token=use_auth_token, 144 | ) 145 | 146 | config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename="ViT-L-14_stats.th") 147 | cached_download( 148 | config_file_url, 149 | cache_dir=cache_dir, 150 | force_filename="ViT-L-14_stats.th", 151 | use_auth_token=use_auth_token, 152 | ) 153 | 154 | config["tokenizer_name"] = cache_dir_text_en 155 | config["text_enc_params"]["model_path"] = cache_dir_text_en 156 | config["prior"]["clip_mean_std_path"] = os.path.join(cache_dir, "ViT-L-14_stats.th") 157 | config["image_enc_params"]["ckpt_path"] = os.path.join(cache_dir, "movq_final.ckpt") 158 | cache_model_name = os.path.join(cache_dir, model_name) 159 | cache_prior_name = os.path.join(cache_dir, prior_name) 160 | model = Kandinsky2_1(config, cache_model_name, cache_prior_name, device, task_type=task_type) 161 | return model 162 | 163 | 164 | def get_kandinsky2( 165 | device, 166 | task_type="text2img", 167 | cache_dir="/tmp/kandinsky2", 168 | use_auth_token=None, 169 | model_version="2.1", 170 | use_flash_attention=False, 171 | ): 172 | if model_version == "2.0": 173 | model = get_kandinsky2_0( 174 | device, 175 | task_type=task_type, 176 | cache_dir=cache_dir, 177 | use_auth_token=use_auth_token, 178 | ) 179 | elif model_version == "2.1": 180 | model = get_kandinsky2_1( 181 | device, 182 | task_type=task_type, 183 | cache_dir=cache_dir, 184 | use_auth_token=use_auth_token, 185 | use_flash_attention=use_flash_attention, 186 | ) 187 | elif model_version == "2.2": 188 | model = Kandinsky2_2(device=device, task_type=task_type) 189 | else: 190 | raise ValueError("Only 2.0 and 2.1 is available") 191 | 192 | return model 193 | -------------------------------------------------------------------------------- /kandinsky2/configs.py: -------------------------------------------------------------------------------- 1 | CONFIG_2_0 = { 2 | "model_config": { 3 | "image_size": 64, 4 | "num_channels": 384, 5 | "num_res_blocks": 3, 6 | "channel_mult": "", 7 | "num_heads": 1, 8 | "num_head_channels": 64, 9 | "num_heads_upsample": -1, 10 | "attention_resolutions": "32,16,8", 11 | "dropout": 0, 12 | "model_dim": 768, 13 | "use_scale_shift_norm": True, 14 | "resblock_updown": True, 15 | "use_fp16": False, 16 | "cache_text_emb": True, 17 | "text_encoder_in_dim1": 1024, 18 | "text_encoder_in_dim2": 640, 19 | "pooling_type": "from_model", 20 | "in_channels": 4, 21 | "out_channels": 8, 22 | "up": False, 23 | "inpainting": False, 24 | }, 25 | "diffusion_config": { 26 | "learn_sigma": True, 27 | "sigma_small": False, 28 | "steps": 1000, 29 | "noise_schedule": "linear", 30 | "timestep_respacing": "", 31 | "use_kl": False, 32 | "predict_xstart": False, 33 | "rescale_timesteps": True, 34 | "rescale_learned_sigmas": True, 35 | "linear_start": 0.0001, 36 | "linear_end": 0.02, 37 | }, 38 | "image_enc_params": { 39 | "name": "AutoencoderKL", 40 | "scale": 0.0512, 41 | "params": { 42 | "ckpt_path": "", 43 | "embed_dim": 4, 44 | "ddconfig": { 45 | "double_z": True, 46 | "z_channels": 4, 47 | "resolution": 256, 48 | "in_channels": 3, 49 | "out_ch": 3, 50 | "ch": 128, 51 | "ch_mult": [1, 2, 4, 4], 52 | "num_res_blocks": 2, 53 | "attn_resolutions": [], 54 | "dropout": 0.0, 55 | }, 56 | }, 57 | }, 58 | "text_enc_params1": {"model_path": "", "model_name": "multiclip"}, 59 | "text_enc_params2": {"model_path": "", "model_name": "MT5EncoderModel"}, 60 | "tokenizer_name1": "", 61 | "tokenizer_name2": "", 62 | } 63 | 64 | CONFIG_2_1 = { 65 | "clip_name": "ViT-L/14", 66 | "clip_image_size": 224, 67 | "tokenizer_name": "", 68 | "image_enc_params": { 69 | "name": "MOVQ", 70 | "scale": 1, 71 | "ckpt_path": "", 72 | "params": { 73 | "embed_dim": 4, 74 | "n_embed": 16384, 75 | "ddconfig": { 76 | "double_z": False, 77 | "z_channels": 4, 78 | "resolution": 256, 79 | "in_channels": 3, 80 | "out_ch": 3, 81 | "ch": 128, 82 | "ch_mult": [1, 2, 2, 4], 83 | "num_res_blocks": 2, 84 | "attn_resolutions": [32], 85 | "dropout": 0.0, 86 | }, 87 | }, 88 | }, 89 | "text_enc_params": { 90 | "model_path": "", 91 | "model_name": "multiclip", 92 | "in_features": 1024, 93 | "out_features": 768, 94 | }, 95 | "prior": { 96 | "clip_mean_std_path": "ViT-L-14_stats.th", 97 | "params": { 98 | "model": { 99 | "type": "prior", 100 | "diffusion_sampler": "uniform", 101 | "hparams": { 102 | "text_ctx": 77, 103 | "xf_width": 2048, 104 | "xf_layers": 20, 105 | "xf_heads": 32, 106 | "xf_final_ln": True, 107 | "xf_padding": False, 108 | "text_drop": 0.2, 109 | "clip_dim": 768, 110 | "clip_xf_width": 768, 111 | }, 112 | }, 113 | "diffusion": { 114 | "steps": 1000, 115 | "learn_sigma": False, 116 | "sigma_small": True, 117 | "noise_schedule": "cosine", 118 | "use_kl": False, 119 | "predict_xstart": True, 120 | "rescale_learned_sigmas": False, 121 | "timestep_respacing": "", 122 | }, 123 | }, 124 | }, 125 | "model_config": { 126 | "version": "2.1", 127 | "image_size": 64, 128 | "num_channels": 384, 129 | "num_res_blocks": 3, 130 | "channel_mult": "", 131 | "num_heads": 1, 132 | "num_head_channels": 64, 133 | "num_heads_upsample": -1, 134 | "attention_resolutions": "32,16,8", 135 | "dropout": 0, 136 | "model_dim": 768, 137 | "use_scale_shift_norm": True, 138 | "resblock_updown": True, 139 | "use_fp16": True, 140 | "cache_text_emb": True, 141 | "text_encoder_in_dim1": 1024, 142 | "text_encoder_in_dim2": 768, 143 | "image_encoder_in_dim": 768, 144 | "num_image_embs": 10, 145 | "pooling_type": "from_model", 146 | "in_channels": 4, 147 | "out_channels": 8, 148 | "use_flash_attention": False, 149 | }, 150 | "diffusion_config": { 151 | "learn_sigma": True, 152 | "sigma_small": False, 153 | "steps": 1000, 154 | "noise_schedule": "linear", 155 | "timestep_respacing": "", 156 | "use_kl": False, 157 | "predict_xstart": False, 158 | "rescale_timesteps": True, 159 | "rescale_learned_sigmas": True, 160 | "linear_start": 0.00085, 161 | "linear_end": 0.012, 162 | }, 163 | } -------------------------------------------------------------------------------- /kandinsky2/model/fp16_util.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helpers to inference with 16-bit precision. 3 | """ 4 | 5 | import torch.nn as nn 6 | from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors 7 | 8 | 9 | def convert_module_to_f16(l): 10 | """ 11 | Convert primitive modules to float16. 12 | """ 13 | if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): 14 | l.weight.data = l.weight.data.half() 15 | if l.bias is not None: 16 | l.bias.data = l.bias.data.half() 17 | 18 | 19 | def convert_module_to_f32(l): 20 | """ 21 | Convert primitive modules to float32, undoing convert_module_to_f16(). 22 | """ 23 | if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)): 24 | l.weight.data = l.weight.data.float() 25 | if l.bias is not None: 26 | l.bias.data = l.bias.data.float() 27 | 28 | 29 | def make_master_params(model_params): 30 | """ 31 | Copy model parameters into a (differently-shaped) list of full-precision 32 | parameters. 33 | """ 34 | master_params = _flatten_dense_tensors( 35 | [param.detach().float() for param in model_params] 36 | ) 37 | master_params = nn.Parameter(master_params) 38 | master_params.requires_grad = True 39 | return [master_params] 40 | 41 | 42 | def model_grads_to_master_grads(model_params, master_params): 43 | """ 44 | Copy the gradients from the model parameters into the master parameters 45 | from make_master_params(). 46 | """ 47 | master_params[0].grad = _flatten_dense_tensors( 48 | [param.grad.data.detach().float() for param in model_params] 49 | ) 50 | 51 | 52 | def master_params_to_model_params(model_params, master_params): 53 | """ 54 | Copy the master parameter data back into the model parameters. 55 | """ 56 | # Without copying to a list, if a generator is passed, this will 57 | # silently not copy any parameters. 58 | model_params = list(model_params) 59 | 60 | for param, master_param in zip( 61 | model_params, unflatten_master_params(model_params, master_params) 62 | ): 63 | param.detach().copy_(master_param) 64 | 65 | 66 | def unflatten_master_params(model_params, master_params): 67 | """ 68 | Unflatten the master parameters to look like model_params. 69 | """ 70 | return _unflatten_dense_tensors(master_params[0].detach(), model_params) 71 | 72 | 73 | def zero_grad(model_params): 74 | for param in model_params: 75 | # Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group 76 | if param.grad is not None: 77 | param.grad.detach_() 78 | param.grad.zero_() 79 | -------------------------------------------------------------------------------- /kandinsky2/model/losses.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helpers for various likelihood-based losses. These are ported from the original 3 | Ho et al. diffusion models codebase: 4 | https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py 5 | """ 6 | 7 | import numpy as np 8 | 9 | import torch as th 10 | 11 | 12 | def normal_kl(mean1, logvar1, mean2, logvar2): 13 | """ 14 | Compute the KL divergence between two gaussians. 15 | Shapes are automatically broadcasted, so batches can be compared to 16 | scalars, among other use cases. 17 | """ 18 | tensor = None 19 | for obj in (mean1, logvar1, mean2, logvar2): 20 | if isinstance(obj, th.Tensor): 21 | tensor = obj 22 | break 23 | assert tensor is not None, "at least one argument must be a Tensor" 24 | 25 | # Force variances to be Tensors. Broadcasting helps convert scalars to 26 | # Tensors, but it does not work for th.exp(). 27 | logvar1, logvar2 = [ 28 | x if isinstance(x, th.Tensor) else th.tensor(x).to(tensor) 29 | for x in (logvar1, logvar2) 30 | ] 31 | 32 | return 0.5 * ( 33 | -1.0 34 | + logvar2 35 | - logvar1 36 | + th.exp(logvar1 - logvar2) 37 | + ((mean1 - mean2) ** 2) * th.exp(-logvar2) 38 | ) 39 | 40 | 41 | def approx_standard_normal_cdf(x): 42 | """ 43 | A fast approximation of the cumulative distribution function of the 44 | standard normal. 45 | """ 46 | return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3)))) 47 | 48 | 49 | def discretized_gaussian_log_likelihood(x, *, means, log_scales): 50 | """ 51 | Compute the log-likelihood of a Gaussian distribution discretizing to a 52 | given image. 53 | :param x: the target images. It is assumed that this was uint8 values, 54 | rescaled to the range [-1, 1]. 55 | :param means: the Gaussian mean Tensor. 56 | :param log_scales: the Gaussian log stddev Tensor. 57 | :return: a tensor like x of log probabilities (in nats). 58 | """ 59 | assert x.shape == means.shape == log_scales.shape 60 | centered_x = x - means 61 | inv_stdv = th.exp(-log_scales) 62 | plus_in = inv_stdv * (centered_x + 1.0 / 255.0) 63 | cdf_plus = approx_standard_normal_cdf(plus_in) 64 | min_in = inv_stdv * (centered_x - 1.0 / 255.0) 65 | cdf_min = approx_standard_normal_cdf(min_in) 66 | log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12)) 67 | log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12)) 68 | cdf_delta = cdf_plus - cdf_min 69 | log_probs = th.where( 70 | x < -0.999, 71 | log_cdf_plus, 72 | th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))), 73 | ) 74 | assert log_probs.shape == x.shape 75 | return log_probs 76 | -------------------------------------------------------------------------------- /kandinsky2/model/model_creation.py: -------------------------------------------------------------------------------- 1 | from .gaussian_diffusion import get_named_beta_schedule 2 | from . import gaussian_diffusion as gd 3 | from .respace import SpacedDiffusion, space_timesteps 4 | from .text2im_model import Text2ImUNet, InpaintText2ImUNet 5 | from .text2im_model2_1 import Text2ImUNet as Text2ImUNet2_1 6 | from .text2im_model2_1 import InpaintText2ImUNet as InpaintText2ImUNet2_1 7 | 8 | 9 | def create_model( 10 | image_size, 11 | num_channels, 12 | num_res_blocks, 13 | channel_mult, 14 | attention_resolutions, 15 | num_heads, 16 | num_head_channels, 17 | num_heads_upsample, 18 | use_scale_shift_norm, 19 | dropout, 20 | model_dim, 21 | resblock_updown, 22 | use_fp16, 23 | cache_text_emb, 24 | text_encoder_in_dim1, 25 | text_encoder_in_dim2, 26 | pooling_type, 27 | in_channels, 28 | out_channels, 29 | up, 30 | inpainting, 31 | version="2.0", 32 | **kwargs, 33 | ): 34 | if channel_mult == "": 35 | if image_size == 256: 36 | channel_mult = (1, 1, 2, 2, 4, 4) 37 | elif image_size == 128: 38 | channel_mult = (1, 1, 2, 3, 4) 39 | elif image_size == 64: 40 | channel_mult = (1, 2, 3, 4) 41 | else: 42 | raise ValueError(f"unsupported image size: {image_size}") 43 | else: 44 | channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(",")) 45 | 46 | attention_ds = [] 47 | for res in attention_resolutions.split(","): 48 | attention_ds.append(image_size // int(res)) 49 | if inpainting: 50 | if version == "2.0": 51 | model_cls = InpaintText2ImUNet 52 | elif version == "2.1": 53 | model_cls = InpaintText2ImUNet2_1 54 | else: 55 | ValueError("Only 2.0 and 2.1 versions are available") 56 | else: 57 | if version == "2.0": 58 | model_cls = Text2ImUNet 59 | elif version == "2.1": 60 | model_cls = Text2ImUNet2_1 61 | else: 62 | ValueError("Only 2.0 and 2.1 versions are available") 63 | return model_cls( 64 | in_channels=in_channels, 65 | model_channels=num_channels, 66 | out_channels=out_channels, 67 | num_res_blocks=num_res_blocks, 68 | attention_resolutions=tuple(attention_ds), 69 | dropout=dropout, 70 | model_dim=model_dim, 71 | channel_mult=channel_mult, 72 | use_fp16=use_fp16, 73 | num_heads=num_heads, 74 | num_head_channels=num_head_channels, 75 | num_heads_upsample=num_heads_upsample, 76 | use_scale_shift_norm=use_scale_shift_norm, 77 | resblock_updown=resblock_updown, 78 | cache_text_emb=cache_text_emb, 79 | text_encoder_in_dim1=text_encoder_in_dim1, 80 | text_encoder_in_dim2=text_encoder_in_dim2, 81 | pooling_type=pooling_type, 82 | **kwargs, 83 | ) 84 | 85 | 86 | def create_gaussian_diffusion( 87 | *, 88 | steps=1000, 89 | learn_sigma=False, 90 | sigma_small=False, 91 | noise_schedule="linear", 92 | use_kl=False, 93 | predict_xstart=False, 94 | rescale_timesteps=False, 95 | rescale_learned_sigmas=False, 96 | timestep_respacing="", 97 | linear_start=0.0001, 98 | linear_end=0.02, 99 | ): 100 | betas = get_named_beta_schedule( 101 | noise_schedule, steps, linear_start=linear_start, linear_end=linear_end 102 | ) 103 | if use_kl: 104 | loss_type = gd.LossType.RESCALED_KL 105 | elif rescale_learned_sigmas: 106 | loss_type = gd.LossType.RESCALED_MSE 107 | else: 108 | loss_type = gd.LossType.MSE 109 | if not timestep_respacing: 110 | timestep_respacing = [steps] 111 | return SpacedDiffusion( 112 | use_timesteps=space_timesteps(steps, timestep_respacing), 113 | betas=betas, 114 | model_mean_type=( 115 | gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X 116 | ), 117 | model_var_type=( 118 | ( 119 | gd.ModelVarType.FIXED_LARGE 120 | if not sigma_small 121 | else gd.ModelVarType.FIXED_SMALL 122 | ) 123 | if not learn_sigma 124 | else gd.ModelVarType.LEARNED_RANGE 125 | ), 126 | loss_type=loss_type, 127 | rescale_timesteps=rescale_timesteps, 128 | ) 129 | -------------------------------------------------------------------------------- /kandinsky2/model/nn.py: -------------------------------------------------------------------------------- 1 | """ 2 | Various utilities for neural networks. 3 | """ 4 | 5 | import math 6 | 7 | import torch 8 | import torch.nn as nn 9 | import torch.nn.functional as F 10 | 11 | 12 | def update_ema(target_params, source_params, rate=0.99): 13 | """ 14 | Update target parameters to be closer to those of source parameters using 15 | an exponential moving average. 16 | :param target_params: the target parameter sequence. 17 | :param source_params: the source parameter sequence. 18 | :param rate: the EMA rate (closer to 1 means slower). 19 | """ 20 | for targ, src in zip(target_params, source_params): 21 | targ.detach().mul_(rate).add_(src, alpha=1 - rate) 22 | 23 | return target_params 24 | 25 | 26 | class GroupNorm32(nn.GroupNorm): 27 | def __init__(self, num_groups, num_channels, swish, eps=1e-5): 28 | super().__init__(num_groups=num_groups, num_channels=num_channels, eps=eps) 29 | self.swish = swish 30 | 31 | def forward(self, x): 32 | y = super().forward(x.float()).to(x.dtype) 33 | if self.swish == 1.0: 34 | y = F.silu(y) 35 | elif self.swish: 36 | y = y * F.sigmoid(y * float(self.swish)) 37 | return y 38 | 39 | 40 | def conv_nd(dims, *args, **kwargs): 41 | """ 42 | Create a 1D, 2D, or 3D convolution module. 43 | """ 44 | if dims == 1: 45 | return nn.Conv1d(*args, **kwargs) 46 | elif dims == 2: 47 | return nn.Conv2d(*args, **kwargs) 48 | elif dims == 3: 49 | return nn.Conv3d(*args, **kwargs) 50 | raise ValueError(f"unsupported dimensions: {dims}") 51 | 52 | 53 | def linear(*args, **kwargs): 54 | """ 55 | Create a linear module. 56 | """ 57 | return nn.Linear(*args, **kwargs) 58 | 59 | 60 | def avg_pool_nd(dims, *args, **kwargs): 61 | """ 62 | Create a 1D, 2D, or 3D average pooling module. 63 | """ 64 | if dims == 1: 65 | return nn.AvgPool1d(*args, **kwargs) 66 | elif dims == 2: 67 | return nn.AvgPool2d(*args, **kwargs) 68 | elif dims == 3: 69 | return nn.AvgPool3d(*args, **kwargs) 70 | raise ValueError(f"unsupported dimensions: {dims}") 71 | 72 | 73 | def zero_module(module): 74 | """ 75 | Zero out the parameters of a module and return it. 76 | """ 77 | for p in module.parameters(): 78 | p.detach().zero_() 79 | return module 80 | 81 | 82 | def scale_module(module, scale): 83 | """ 84 | Scale the parameters of a module and return it. 85 | """ 86 | for p in module.parameters(): 87 | p.detach().mul_(scale) 88 | return module 89 | 90 | 91 | def normalization(channels, swish=0.0): 92 | """ 93 | Make a standard normalization layer, with an optional swish activation. 94 | 95 | :param channels: number of input channels. 96 | :return: an nn.Module for normalization. 97 | """ 98 | return GroupNorm32(num_channels=channels, num_groups=32, swish=swish) 99 | 100 | 101 | def timestep_embedding(timesteps, dim, max_period=10000): 102 | """ 103 | Create sinusoidal timestep embeddings. 104 | 105 | :param timesteps: a 1-D Tensor of N indices, one per batch element. 106 | These may be fractional. 107 | :param dim: the dimension of the output. 108 | :param max_period: controls the minimum frequency of the embeddings. 109 | :return: an [N x dim] Tensor of positional embeddings. 110 | """ 111 | half = dim // 2 112 | freqs = torch.exp( 113 | -math.log(max_period) 114 | * torch.arange(start=0, end=half, dtype=torch.float32) 115 | / half 116 | ).to(device=timesteps.device) 117 | args = timesteps[:, None].float() * freqs[None] 118 | embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1) 119 | if dim % 2: 120 | embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1) 121 | return embedding 122 | -------------------------------------------------------------------------------- /kandinsky2/model/resample.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | import numpy as np 4 | import torch 5 | import torch.distributed as dist 6 | 7 | 8 | def create_named_schedule_sampler(name, diffusion): 9 | """ 10 | Create a ScheduleSampler from a library of pre-defined samplers. 11 | :param name: the name of the sampler. 12 | :param diffusion: the diffusion object to sample for. 13 | """ 14 | if name == "uniform": 15 | return UniformSampler(diffusion) 16 | elif name == "loss-second-moment": 17 | return LossSecondMomentResampler(diffusion) 18 | else: 19 | raise NotImplementedError(f"unknown schedule sampler: {name}") 20 | 21 | 22 | class ScheduleSampler(ABC): 23 | """ 24 | A distribution over timesteps in the diffusion process, intended to reduce 25 | variance of the objective. 26 | By default, samplers perform unbiased importance sampling, in which the 27 | objective's mean is unchanged. 28 | However, subclasses may override sample() to change how the resampled 29 | terms are reweighted, allowing for actual changes in the objective. 30 | """ 31 | 32 | @abstractmethod 33 | def weights(self): 34 | """ 35 | Get a numpy array of weights, one per diffusion step. 36 | The weights needn't be normalized, but must be positive. 37 | """ 38 | 39 | def sample(self, batch_size, device): 40 | """ 41 | Importance-sample timesteps for a batch. 42 | :param batch_size: the number of timesteps. 43 | :param device: the torch device to save to. 44 | :return: a tuple (timesteps, weights): 45 | - timesteps: a tensor of timestep indices. 46 | - weights: a tensor of weights to scale the resulting losses. 47 | """ 48 | w = self.weights() 49 | p = w / np.sum(w) 50 | indices_np = np.random.choice(len(p), size=(batch_size,), p=p) 51 | indices = torch.from_numpy(indices_np).long().to(device) 52 | weights_np = 1 / (len(p) * p[indices_np]) 53 | weights = torch.from_numpy(weights_np).float().to(device) 54 | return indices, weights 55 | 56 | 57 | class UniformSampler(ScheduleSampler): 58 | def __init__(self, diffusion): 59 | self.diffusion = diffusion 60 | self._weights = np.ones([diffusion.num_timesteps]) 61 | 62 | def weights(self): 63 | return self._weights 64 | 65 | 66 | class LossAwareSampler(ScheduleSampler): 67 | def update_with_local_losses(self, local_ts, local_losses): 68 | """ 69 | Update the reweighting using losses from a model. 70 | Call this method from each rank with a batch of timesteps and the 71 | corresponding losses for each of those timesteps. 72 | This method will perform synchronization to make sure all of the ranks 73 | maintain the exact same reweighting. 74 | :param local_ts: an integer Tensor of timesteps. 75 | :param local_losses: a 1D Tensor of losses. 76 | """ 77 | batch_sizes = [ 78 | torch.tensor([0], dtype=torch.int32, device=local_ts.device) 79 | for _ in range(dist.get_world_size()) 80 | ] 81 | dist.all_gather( 82 | batch_sizes, 83 | torch.tensor([len(local_ts)], dtype=torch.int32, device=local_ts.device), 84 | ) 85 | 86 | # Pad all_gather batches to be the maximum batch size. 87 | batch_sizes = [x.item() for x in batch_sizes] 88 | max_bs = max(batch_sizes) 89 | 90 | timestep_batches = [torch.zeros(max_bs).to(local_ts) for bs in batch_sizes] 91 | loss_batches = [torch.zeros(max_bs).to(local_losses) for bs in batch_sizes] 92 | dist.all_gather(timestep_batches, local_ts) 93 | dist.all_gather(loss_batches, local_losses) 94 | timesteps = [ 95 | x.item() for y, bs in zip(timestep_batches, batch_sizes) for x in y[:bs] 96 | ] 97 | losses = [x.item() for y, bs in zip(loss_batches, batch_sizes) for x in y[:bs]] 98 | self.update_with_all_losses(timesteps, losses) 99 | 100 | @abstractmethod 101 | def update_with_all_losses(self, ts, losses): 102 | """ 103 | Update the reweighting using losses from a model. 104 | Sub-classes should override this method to update the reweighting 105 | using losses from the model. 106 | This method directly updates the reweighting without synchronizing 107 | between workers. It is called by update_with_local_losses from all 108 | ranks with identical arguments. Thus, it should have deterministic 109 | behavior to maintain state across workers. 110 | :param ts: a list of int timesteps. 111 | :param losses: a list of float losses, one per timestep. 112 | """ 113 | 114 | 115 | class LossSecondMomentResampler(LossAwareSampler): 116 | def __init__(self, diffusion, history_per_term=10, uniform_prob=0.001): 117 | self.diffusion = diffusion 118 | self.history_per_term = history_per_term 119 | self.uniform_prob = uniform_prob 120 | self._loss_history = np.zeros( 121 | [diffusion.num_timesteps, history_per_term], dtype=np.float64 122 | ) 123 | self._loss_counts = np.zeros([diffusion.num_timesteps], dtype=np.int) 124 | 125 | def weights(self): 126 | if not self._warmed_up(): 127 | return np.ones([self.diffusion.num_timesteps], dtype=np.float64) 128 | weights = np.sqrt(np.mean(self._loss_history**2, axis=-1)) 129 | weights /= np.sum(weights) 130 | weights *= 1 - self.uniform_prob 131 | weights += self.uniform_prob / len(weights) 132 | return weights 133 | 134 | def update_with_all_losses(self, ts, losses): 135 | for t, loss in zip(ts, losses): 136 | if self._loss_counts[t] == self.history_per_term: 137 | # Shift out the oldest loss term. 138 | self._loss_history[t, :-1] = self._loss_history[t, 1:] 139 | self._loss_history[t, -1] = loss 140 | else: 141 | self._loss_history[t, self._loss_counts[t]] = loss 142 | self._loss_counts[t] += 1 143 | 144 | def _warmed_up(self): 145 | return (self._loss_counts == self.history_per_term).all() 146 | -------------------------------------------------------------------------------- /kandinsky2/model/respace.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch 3 | 4 | from .gaussian_diffusion import GaussianDiffusion 5 | 6 | 7 | def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps): 8 | if ddim_discr_method == "uniform": 9 | c = num_ddpm_timesteps // num_ddim_timesteps 10 | ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c))) 11 | elif ddim_discr_method == "quad": 12 | ddim_timesteps = ( 13 | (np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2 14 | ).astype(int) 15 | else: 16 | raise NotImplementedError( 17 | f'There is no ddim discretization method called "{ddim_discr_method}"' 18 | ) 19 | 20 | steps_out = ddim_timesteps + 1 21 | return steps_out 22 | 23 | 24 | def space_timesteps(num_timesteps, section_counts): 25 | """ 26 | Create a list of timesteps to use from an original diffusion process, 27 | given the number of timesteps we want to take from equally-sized portions 28 | of the original process. 29 | For example, if there's 300 timesteps and the section counts are [10,15,20] 30 | then the first 100 timesteps are strided to be 10 timesteps, the second 100 31 | are strided to be 15 timesteps, and the final 100 are strided to be 20. 32 | If the stride is a string starting with "ddim", then the fixed striding 33 | from the DDIM paper is used, and only one section is allowed. 34 | :param num_timesteps: the number of diffusion steps in the original 35 | process to divide up. 36 | :param section_counts: either a list of numbers, or a string containing 37 | comma-separated numbers, indicating the step count 38 | per section. As a special case, use "ddimN" where N 39 | is a number of steps to use the striding from the 40 | DDIM paper. 41 | :return: a set of diffusion steps from the original process to use. 42 | """ 43 | if isinstance(section_counts, str): 44 | if section_counts.startswith("ddim"): 45 | return set( 46 | make_ddim_timesteps( 47 | "uniform", int(section_counts[len("ddim") :]), num_timesteps 48 | ) 49 | ) 50 | section_counts = [int(x) for x in section_counts.split(",")] 51 | size_per = num_timesteps // len(section_counts) 52 | extra = num_timesteps % len(section_counts) 53 | start_idx = 0 54 | all_steps = [] 55 | for i, section_count in enumerate(section_counts): 56 | size = size_per + (1 if i < extra else 0) 57 | if size < section_count: 58 | raise ValueError( 59 | f"cannot divide section of {size} steps into {section_count}" 60 | ) 61 | if section_count <= 1: 62 | frac_stride = 1 63 | else: 64 | frac_stride = (size - 1) / (section_count - 1) 65 | cur_idx = 0.0 66 | taken_steps = [] 67 | for _ in range(section_count): 68 | taken_steps.append(start_idx + round(cur_idx)) 69 | cur_idx += frac_stride 70 | all_steps += taken_steps 71 | start_idx += size 72 | return set(all_steps) 73 | 74 | 75 | class SpacedDiffusion(GaussianDiffusion): 76 | """ 77 | A diffusion process which can skip steps in a base diffusion process. 78 | :param use_timesteps: a collection (sequence or set) of timesteps from the 79 | original diffusion process to retain. 80 | :param kwargs: the kwargs to create the base diffusion process. 81 | """ 82 | 83 | def __init__(self, use_timesteps, **kwargs): 84 | self.use_timesteps = set(use_timesteps) 85 | self.timestep_map = [] 86 | self.original_num_steps = len(kwargs["betas"]) 87 | 88 | base_diffusion = GaussianDiffusion(**kwargs) # pylint: disable=missing-kwoa 89 | last_alpha_cumprod = 1.0 90 | new_betas = [] 91 | for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod): 92 | if i in self.use_timesteps: 93 | new_betas.append(1 - alpha_cumprod / last_alpha_cumprod) 94 | last_alpha_cumprod = alpha_cumprod 95 | self.timestep_map.append(i) 96 | kwargs["betas"] = np.array(new_betas) 97 | super().__init__(**kwargs) 98 | 99 | def p_mean_variance( 100 | self, model, *args, **kwargs 101 | ): # pylint: disable=signature-differs 102 | return super().p_mean_variance(self._wrap_model(model), *args, **kwargs) 103 | 104 | def training_losses( 105 | self, model, *args, **kwargs 106 | ): # pylint: disable=signature-differs 107 | return super().training_losses(self._wrap_model(model), *args, **kwargs) 108 | 109 | def _wrap_model(self, model): 110 | if isinstance(model, _WrappedModel): 111 | return model 112 | return _WrappedModel( 113 | model, self.timestep_map, self.rescale_timesteps, self.original_num_steps 114 | ) 115 | 116 | def _scale_timesteps(self, t): 117 | # Scaling is done by the wrapped model. 118 | return t 119 | 120 | 121 | class _WrappedModel: 122 | def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps): 123 | self.model = model 124 | self.timestep_map = timestep_map 125 | self.rescale_timesteps = rescale_timesteps 126 | self.original_num_steps = original_num_steps 127 | 128 | def __call__(self, x, ts, **kwargs): 129 | map_tensor = torch.tensor(self.timestep_map, device=ts.device, dtype=ts.dtype) 130 | new_ts = map_tensor[ts] 131 | if self.rescale_timesteps: 132 | new_ts = new_ts.float() * (1000.0 / self.original_num_steps) 133 | return self.model(x, new_ts, **kwargs) 134 | -------------------------------------------------------------------------------- /kandinsky2/model/text2im_model.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from copy import copy 5 | from .nn import timestep_embedding 6 | from .unet import UNetModel 7 | import math 8 | from abc import abstractmethod 9 | from .fp16_util import convert_module_to_f16, convert_module_to_f32 10 | from .text_encoders import AttentionPooling 11 | 12 | 13 | class Text2ImUNet(UNetModel): 14 | def __init__( 15 | self, 16 | model_dim, 17 | text_encoder_in_dim1=1024, 18 | text_encoder_in_dim2=640, 19 | pooling_type="attention_pooling", # ['from_model', 'attention_pooling'] 20 | *args, 21 | cache_text_emb=True, 22 | **kwargs, 23 | ): 24 | self.model_dim = model_dim 25 | super().__init__(*args, **kwargs, encoder_channels=model_dim) 26 | self.pooling_type = pooling_type 27 | 28 | self.to_model_dim = nn.Linear(text_encoder_in_dim1, model_dim) 29 | 30 | if self.pooling_type == "from_model": 31 | self.proj = nn.Linear(text_encoder_in_dim2, self.model_channels * 4) 32 | elif self.pooling_type == "attention_pooling": 33 | self.proj = AttentionPooling( 34 | 8, text_encoder_in_dim2, self.model_channels * 4 35 | ) 36 | self.proj2 = AttentionPooling(8, 512, self.model_channels * 4) 37 | self.to_model_dim2 = nn.Linear(512, model_dim) 38 | self.ln_model1 = nn.LayerNorm(model_dim) 39 | self.ln_model2 = nn.LayerNorm(self.model_channels * 4) 40 | self.ln_model3 = nn.LayerNorm(self.model_channels * 4) 41 | self.cache_text_emb = cache_text_emb 42 | self.cache = None 43 | 44 | def convert_to_fp16(self): 45 | super().convert_to_fp16() 46 | self.proj.to(torch.float16) 47 | self.to_model_dim.to(torch.float16) 48 | self.to_model_dim2.to(torch.float16) 49 | self.proj2.to(torch.float16) 50 | self.ln_model1.to(torch.float16) 51 | self.ln_model2.to(torch.float16) 52 | self.ln_model3.to(torch.float16) 53 | 54 | def get_text_emb( 55 | self, full_emb1=None, pooled_emb1=None, full_emb2=None, pooled_emb2=None 56 | ): 57 | if self.cache is not None and self.cache_text_emb: 58 | return self.cache 59 | if self.pooling_type == "from_model": 60 | xf_proj = self.proj(pooled_emb1) 61 | elif self.pooling_type == "attention_pooling": 62 | xf_proj = self.proj(full_emb1) 63 | xf_proj = self.ln_model2(xf_proj) 64 | pooled_emb2 = self.ln_model3(self.proj2(full_emb2)) 65 | xf_proj += pooled_emb2 66 | xf_out = self.ln_model1( 67 | torch.cat( 68 | [self.to_model_dim(full_emb1), self.to_model_dim2(full_emb2)], dim=1 69 | ) 70 | ) 71 | 72 | xf_out = xf_out.permute(0, 2, 1) # NLC -> NCL 73 | outputs = dict(xf_proj=xf_proj, xf_out=xf_out) 74 | 75 | if self.cache_text_emb: 76 | self.cache = outputs 77 | return outputs 78 | 79 | def del_cache(self): 80 | self.cache = None 81 | 82 | def forward( 83 | self, 84 | x, 85 | timesteps, 86 | full_emb1=None, 87 | pooled_emb1=None, 88 | full_emb2=None, 89 | pooled_emb2=None, 90 | ): 91 | hs = [] 92 | emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) 93 | text_outputs = self.get_text_emb( 94 | full_emb1=full_emb1, 95 | pooled_emb1=pooled_emb1, 96 | full_emb2=full_emb2, 97 | pooled_emb2=pooled_emb2, 98 | ) 99 | xf_proj, xf_out = text_outputs["xf_proj"], text_outputs["xf_out"] 100 | emb = emb + xf_proj.to(emb) 101 | h = x.type(self.dtype) 102 | for module in self.input_blocks: 103 | h = module(h, emb, xf_out) 104 | hs.append(h) 105 | h = self.middle_block(h, emb, xf_out) 106 | for module in self.output_blocks: 107 | h = torch.cat([h, hs.pop()], dim=1) 108 | h = module(h, emb, xf_out) 109 | h = h.type(torch.float32) 110 | h = self.out(h) 111 | return h 112 | 113 | 114 | class InpaintText2ImUNet(Text2ImUNet): 115 | """ 116 | A text2im model which can perform inpainting. 117 | """ 118 | 119 | def __init__(self, *args, **kwargs): 120 | if "in_channels" in kwargs: 121 | kwargs = dict(kwargs) 122 | kwargs["in_channels"] = kwargs["in_channels"] * 2 + 1 123 | else: 124 | args = list(args) 125 | args[1] = args[1] * 2 + 1 126 | super().__init__(*args, **kwargs) 127 | 128 | def forward(self, x, timesteps, inpaint_image=None, inpaint_mask=None, **kwargs): 129 | if inpaint_image is None: 130 | inpaint_image = torch.zeros_like(x) 131 | if inpaint_mask is None: 132 | inpaint_mask = torch.zeros_like(x[:, :1]) 133 | return super().forward( 134 | torch.cat([x, inpaint_image * inpaint_mask, inpaint_mask], dim=1), 135 | timesteps, 136 | **kwargs, 137 | ) 138 | -------------------------------------------------------------------------------- /kandinsky2/model/text2im_model2_1.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | from copy import copy 5 | from .nn import timestep_embedding 6 | from .unet import UNetModel 7 | import math 8 | from abc import abstractmethod 9 | from .fp16_util import convert_module_to_f16, convert_module_to_f32 10 | from .text_encoders import AttentionPooling 11 | 12 | 13 | class Text2ImUNet(UNetModel): 14 | def __init__( 15 | self, 16 | model_dim, 17 | image_encoder_in_dim=768, 18 | text_encoder_in_dim1=1024, 19 | text_encoder_in_dim2=768, 20 | num_image_embs=10, 21 | pooling_type="attention_pooling", # ['from_model', 'attention_pooling'] 22 | *args, 23 | cache_text_emb=True, 24 | **kwargs, 25 | ): 26 | self.model_dim = model_dim 27 | super().__init__(*args, **kwargs, encoder_channels=model_dim) 28 | self.pooling_type = pooling_type 29 | 30 | self.num_image_embs = num_image_embs 31 | self.clip_to_seq = nn.Linear( 32 | image_encoder_in_dim, model_dim * self.num_image_embs 33 | ) 34 | 35 | self.to_model_dim_n = nn.Linear(text_encoder_in_dim1, model_dim) 36 | 37 | if self.pooling_type == "from_model": 38 | self.proj_n = nn.Linear(text_encoder_in_dim2, self.model_channels * 4) 39 | elif self.pooling_type == "attention_pooling": 40 | self.proj_n = AttentionPooling( 41 | 8, text_encoder_in_dim1, self.model_channels * 4 42 | ) 43 | self.ln_model_n = nn.LayerNorm(self.model_channels * 4) 44 | self.img_layer = nn.Linear(image_encoder_in_dim, self.model_channels * 4) 45 | self.cache_text_emb = cache_text_emb 46 | self.cache = None 47 | self.model_dim = model_dim 48 | 49 | def convert_to_fp16(self): 50 | super().convert_to_fp16() 51 | self.clip_to_seq.to(torch.float16) 52 | self.proj_n.to(torch.float16) 53 | self.to_model_dim_n.to(torch.float16) 54 | self.ln_model_n.to(torch.float16) 55 | self.img_layer.to(torch.float16) 56 | 57 | def get_text_emb(self, full_emb=None, pooled_emb=None, image_emb=None): 58 | if self.cache is not None and self.cache_text_emb: 59 | return self.cache 60 | 61 | clip_seq = self.clip_to_seq(image_emb).reshape( 62 | image_emb.shape[0], self.num_image_embs, self.model_dim 63 | ) 64 | 65 | if self.pooling_type == "from_model": 66 | xf_proj = self.proj_n(pooled_emb) 67 | elif self.pooling_type == "attention_pooling": 68 | xf_proj = self.proj_n(full_emb) 69 | 70 | xf_proj = self.ln_model_n(xf_proj) 71 | if image_emb is not None: 72 | xf_proj = xf_proj + self.img_layer(image_emb) 73 | xf_out = torch.cat((clip_seq, self.to_model_dim_n(full_emb)), dim=1) 74 | 75 | xf_out = xf_out.permute(0, 2, 1) # NLC -> NCL 76 | outputs = dict(xf_proj=xf_proj, xf_out=xf_out) 77 | 78 | if self.cache_text_emb: 79 | self.cache = outputs 80 | return outputs 81 | 82 | def del_cache(self): 83 | self.cache = None 84 | 85 | def forward(self, x, timesteps, full_emb=None, pooled_emb=None, image_emb=None): 86 | hs = [] 87 | emb = self.time_embed(timestep_embedding(timesteps, self.model_channels)) 88 | text_outputs = self.get_text_emb( 89 | full_emb=full_emb, pooled_emb=pooled_emb, image_emb=image_emb 90 | ) 91 | xf_proj, xf_out = text_outputs["xf_proj"], text_outputs["xf_out"] 92 | emb = emb + xf_proj.to(emb) 93 | h = x.type(self.dtype) 94 | for module in self.input_blocks: 95 | h = module(h, emb, xf_out) 96 | hs.append(h) 97 | h = self.middle_block(h, emb, xf_out) 98 | for module in self.output_blocks: 99 | h = torch.cat([h, hs.pop()], dim=1) 100 | h = module(h, emb, xf_out) 101 | h = h.type(x.dtype) 102 | h = self.out(h) 103 | return h 104 | 105 | 106 | class SuperResText2ImUNet(Text2ImUNet): 107 | """ 108 | A text2im model that performs super-resolution. 109 | Expects an extra kwarg `low_res` to condition on a low-resolution image. 110 | """ 111 | 112 | def __init__(self, *args, **kwargs): 113 | if "in_channels" in kwargs: 114 | kwargs = dict(kwargs) 115 | kwargs["in_channels"] = kwargs["in_channels"] * 2 116 | else: 117 | # Curse you, Python. Or really, just curse positional arguments :|. 118 | args = list(args) 119 | args[1] = args[1] * 2 120 | super().__init__(*args, **kwargs) 121 | 122 | def forward(self, x, timesteps, low_res=None, **kwargs): 123 | _, _, new_height, new_width = x.shape 124 | upsampled = F.interpolate( 125 | low_res, (new_height, new_width), mode="bilinear", align_corners=False 126 | ) 127 | x = torch.cat([x, upsampled], dim=1) 128 | return super().forward(x, timesteps, **kwargs) 129 | 130 | 131 | class InpaintText2ImUNet(Text2ImUNet): 132 | """ 133 | A text2im model which can perform inpainting. 134 | """ 135 | 136 | def __init__(self, *args, **kwargs): 137 | if "in_channels" in kwargs: 138 | kwargs = dict(kwargs) 139 | kwargs["in_channels"] = kwargs["in_channels"] * 2 + 1 140 | else: 141 | # Curse you, Python. Or really, just curse positional arguments :|. 142 | args = list(args) 143 | args[1] = args[1] * 2 + 1 144 | super().__init__(*args, **kwargs) 145 | 146 | def forward(self, x, timesteps, inpaint_image=None, inpaint_mask=None, **kwargs): 147 | if inpaint_image is None: 148 | inpaint_image = torch.zeros_like(x) 149 | if inpaint_mask is None: 150 | inpaint_mask = torch.zeros_like(x[:, :1]) 151 | return super().forward( 152 | torch.cat([x, inpaint_image * inpaint_mask, inpaint_mask], dim=1), 153 | timesteps, 154 | **kwargs, 155 | ) 156 | -------------------------------------------------------------------------------- /kandinsky2/model/text_encoders.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import math 5 | from transformers import ( 6 | T5EncoderModel, 7 | MT5EncoderModel, 8 | BertModel, 9 | XLMRobertaModel, 10 | AutoConfig, 11 | XLMRobertaModel, 12 | ) 13 | import transformers 14 | import os 15 | 16 | 17 | def attention(q, k, v, d_k): 18 | scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k) 19 | scores = F.softmax(scores, dim=-1) 20 | output = torch.matmul(scores, v) 21 | return output 22 | 23 | 24 | class AttentionPooling(nn.Module): 25 | def __init__( 26 | self, 27 | heads, 28 | in_dim, 29 | out_dim, 30 | ): 31 | super().__init__() 32 | 33 | self.in_dim = in_dim 34 | self.d_k = in_dim // heads 35 | self.h = heads 36 | 37 | self.q_linear = nn.Linear(in_dim, in_dim) 38 | self.v_linear = nn.Linear(in_dim, in_dim) 39 | self.k_linear = nn.Linear(in_dim, in_dim) 40 | self.out = nn.Linear(in_dim, out_dim) 41 | 42 | def forward(self, x): 43 | bs = x.size(0) 44 | 45 | k = self.k_linear(x).view(bs, -1, self.h, self.d_k) 46 | q = self.q_linear(x).view(bs, -1, self.h, self.d_k) 47 | v = self.v_linear(x).view(bs, -1, self.h, self.d_k) 48 | 49 | k = k.transpose(1, 2) 50 | q = q.transpose(1, 2) 51 | v = v.transpose(1, 2) 52 | scores = attention(q, k, v, self.d_k) 53 | 54 | concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.in_dim) 55 | 56 | output = self.out(concat) 57 | 58 | return output[:, 0] 59 | 60 | 61 | class ImagenCLIP(nn.Module): 62 | def __init__(self): 63 | super().__init__() 64 | transformer_width = 768 65 | embed_dim = 768 66 | transformer_layers = 12 67 | transformer_heads = transformer_width // 64 68 | vocab_size = 49408 69 | self.context_length = 77 70 | self.transformer = clip.model.Transformer( 71 | width=transformer_width, 72 | layers=transformer_layers, 73 | heads=transformer_heads, 74 | attn_mask=self.build_attention_mask(), 75 | ) 76 | self.token_embedding = nn.Embedding(vocab_size, transformer_width) 77 | self.positional_embedding = nn.Parameter( 78 | torch.empty(self.context_length, transformer_width) 79 | ) 80 | self.ln_final = clip.model.LayerNorm(transformer_width) 81 | self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim)) 82 | 83 | @property 84 | def dtype(self): 85 | return self.transformer.resblocks[0].attn.out_proj.weight.dtype 86 | 87 | def build_attention_mask(self): 88 | # lazily create causal attention mask, with full attention between the vision tokens 89 | # pytorch uses additive attention mask; fill with -inf 90 | mask = torch.empty(self.context_length, self.context_length) 91 | mask.fill_(float("-inf")) 92 | mask.triu_(1) # zero out the lower diagonal 93 | return mask 94 | 95 | def forward(self, text, mask=None): 96 | x = self.token_embedding(text).type(self.dtype) 97 | x = x + self.positional_embedding.type(self.dtype) 98 | x = x.permute(1, 0, 2) 99 | x = self.transformer(x) 100 | x = x.permute(1, 0, 2) 101 | x = self.ln_final(x).type(self.dtype) 102 | pooled_out = ( 103 | x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection 104 | ) 105 | return x, pooled_out 106 | 107 | 108 | class MultilingualCLIP(nn.Module): 109 | def __init__(self, config, in_features=1024, out_features=640): 110 | super().__init__() 111 | loaded_config = AutoConfig.from_pretrained(config) 112 | self.transformer = XLMRobertaModel(loaded_config) 113 | self.LinearTransformation = torch.nn.Linear( 114 | in_features=in_features, out_features=out_features 115 | ) 116 | 117 | def forward(self, input_ids, attention_mask): 118 | embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0] 119 | embs2 = (embs * attention_mask.unsqueeze(2)).sum(dim=1) / attention_mask.sum( 120 | dim=1 121 | )[:, None] 122 | return self.LinearTransformation(embs2), embs 123 | 124 | 125 | class TextEncoder(nn.Module): 126 | def __init__(self, model_path, model_name, **kwargs): 127 | super().__init__() 128 | self.model_name = model_name 129 | if self.model_name == "clip": 130 | self.model = ImagenCLIP() 131 | self.model.load_state_dict(torch.load(model_path)) 132 | elif self.model_name == "T5EncoderModel": 133 | self.model = T5EncoderModel.from_pretrained(model_path) 134 | elif self.model_name == "MT5EncoderModel": 135 | self.model = MT5EncoderModel.from_pretrained(model_path) 136 | elif self.model_name == "BertModel": 137 | self.model = BertModel.from_pretrained(model_path) 138 | elif self.model_name == "multiclip": 139 | self.model = MultilingualCLIP(model_path, **kwargs) 140 | self.model.load_state_dict( 141 | torch.load(os.path.join(model_path, "pytorch_model.bin")), strict=False 142 | ) 143 | elif self.model_name == "xlm_roberta": 144 | self.model = XLMRobertaModel.from_pretrained(model_path).half() 145 | self.model.eval() 146 | for param in self.model.parameters(): 147 | param.requires_grad = False 148 | 149 | def forward(self, tokens, mask=None): 150 | if self.model_name == "clip": 151 | full_out, pooled_out = self.model(tokens) 152 | elif self.model_name in ["T5EncoderModel", "MT5EncoderModel"]: 153 | pooled_out = None 154 | full_out = self.model(input_ids=tokens, attention_mask=mask)[ 155 | "last_hidden_state" 156 | ] 157 | elif self.model_name in ["BertModel"]: 158 | out = self.model(input_ids=tokens, attention_mask=mask) 159 | full_out, pooled_out = out["last_hidden_state"], out["pooler_output"] 160 | elif self.model_name == "multiclip": 161 | pooled_out, full_out = self.model(input_ids=tokens, attention_mask=mask) 162 | elif self.model_name == "xlm_roberta": 163 | pooled_out = None 164 | full_out = self.model(input_ids=tokens, attention_mask=mask)[ 165 | "last_hidden_state" 166 | ].float() 167 | return full_out, pooled_out 168 | -------------------------------------------------------------------------------- /kandinsky2/model/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from PIL import Image 4 | import torch 5 | import torch.nn as nn 6 | import importlib 7 | 8 | 9 | def _extract_into_tensor(arr, timesteps, broadcast_shape): 10 | """ 11 | Extract values from a 1-D numpy array for a batch of indices. 12 | :param arr: the 1-D numpy array. 13 | :param timesteps: a tensor of indices into the array to extract. 14 | :param broadcast_shape: a larger shape of K dimensions with the batch 15 | dimension equal to the length of timesteps. 16 | :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims. 17 | """ 18 | res = torch.from_numpy(arr).to(device=timesteps.device)[timesteps].float() 19 | while len(res.shape) < len(broadcast_shape): 20 | res = res[..., None] 21 | return res.expand(broadcast_shape) 22 | 23 | 24 | def get_named_beta_schedule(schedule_name, num_diffusion_timesteps): 25 | """ 26 | Get a pre-defined beta schedule for the given name. 27 | The beta schedule library consists of beta schedules which remain similar 28 | in the limit of num_diffusion_timesteps. 29 | Beta schedules may be added, but should not be removed or changed once 30 | they are committed to maintain backwards compatibility. 31 | """ 32 | if schedule_name == "linear": 33 | # Linear schedule from Ho et al, extended to work for any number of 34 | # diffusion steps. 35 | scale = 1000 / num_diffusion_timesteps 36 | beta_start = scale * 0.0001 37 | beta_end = scale * 0.02 38 | return np.linspace( 39 | beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64 40 | ) 41 | elif schedule_name == "cosine": 42 | return betas_for_alpha_bar( 43 | num_diffusion_timesteps, 44 | lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2, 45 | ) 46 | else: 47 | raise NotImplementedError(f"unknown beta schedule: {schedule_name}") 48 | 49 | 50 | def mean_flat(tensor): 51 | """ 52 | Take the mean over all non-batch dimensions. 53 | """ 54 | return tensor.mean(dim=list(range(1, len(tensor.shape)))) 55 | 56 | 57 | def get_obj_from_str(string): 58 | module, cls = string.rsplit(".", 1) 59 | return getattr(importlib.import_module(module, package=None), cls) 60 | 61 | 62 | def dict_keys(d, keys): 63 | d2 = {} 64 | for i in keys: 65 | d2[i] = d[i] 66 | return d2 67 | 68 | 69 | def return_images(bath): 70 | scaled = ((batch + 1) * 127.5).round().clamp(0, 255).to(torch.uint8).cpu() 71 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/data/dataset_prior.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import pandas as pd 4 | from tqdm import tqdm 5 | from PIL import Image 6 | import io 7 | import re 8 | import os 9 | 10 | import torch 11 | import sys, time 12 | from torch.utils.data import Dataset, DataLoader 13 | import pytorch_lightning as pl 14 | from copy import deepcopy 15 | 16 | from transformers import AutoTokenizer 17 | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize 18 | from tqdm import tqdm 19 | from random import randint 20 | from ...model.prior import CustomizedTokenizer 21 | 22 | try: 23 | from torchvision.transforms import InterpolationMode 24 | 25 | BICUBIC = InterpolationMode.BICUBIC 26 | except ImportError: 27 | BICUBIC = Image.BICUBIC 28 | 29 | 30 | def _convert_image_to_rgb(image): 31 | return image.convert("RGB") 32 | 33 | 34 | def _transform(n_px): 35 | return Compose( 36 | [ 37 | Resize(n_px, interpolation=BICUBIC), 38 | CenterCrop(n_px), 39 | _convert_image_to_rgb, 40 | ToTensor(), 41 | Normalize( 42 | (0.48145466, 0.4578275, 0.40821073), 43 | (0.26862954, 0.26130258, 0.27577711), 44 | ), 45 | ] 46 | ) 47 | 48 | 49 | def center_crop(image): 50 | width, height = image.size 51 | new_size = min(width, height) 52 | left = (width - new_size) / 2 53 | top = (height - new_size) / 2 54 | right = (width + new_size) / 2 55 | bottom = (height + new_size) / 2 56 | return image.crop((left, top, right, bottom)) 57 | 58 | 59 | class TextImageDataset(Dataset): 60 | def __init__( 61 | self, 62 | df_path, 63 | clip_image_size=224, 64 | drop_text_prob=0.1, 65 | infinity=False, 66 | ): 67 | self.df = pd.read_csv(df_path) 68 | self.tokenizer = CustomizedTokenizer() 69 | self.transform1 = _transform(clip_image_size) 70 | self.drop_text_prob = drop_text_prob 71 | self.clip_image_size = clip_image_size 72 | self.infinity = infinity 73 | 74 | def __len__(self): 75 | if self.infinity: 76 | return 99999999 77 | else: 78 | return len(self.df) 79 | 80 | def __getitem__(self, item): 81 | if self.infinity: 82 | ind = randint(0, len(self.df) - 1) 83 | else: 84 | ind = item 85 | out_dict = {} 86 | image = Image.open(self.df["image_name"].iloc[ind]) 87 | clip_image = self.transform1(image) 88 | if np.random.binomial(1, self.drop_text_prob): 89 | text = "" 90 | else: 91 | text = self.df["caption"].iloc[ind] 92 | out_dict["tokens"], out_dict["mask"] = self.tokenizer.padded_tokens_and_mask([text], 77) 93 | out_dict["tokens"] = out_dict["tokens"][0] 94 | out_dict["mask"] = out_dict["mask"][0] 95 | return clip_image, out_dict 96 | 97 | 98 | def create_loader(batch_size, num_workers, shuffle=False, **dataset_params): 99 | dataset = TextImageDataset(**dataset_params) 100 | return DataLoader( 101 | dataset, 102 | batch_size=batch_size, 103 | num_workers=num_workers, 104 | shuffle=shuffle, 105 | pin_memory=True, 106 | ) 107 | 108 | 109 | class LightningDataModule(pl.LightningDataModule): 110 | """PyTorch Lightning data class""" 111 | 112 | def __init__(self, train_config, val_config): 113 | super().__init__() 114 | self.train_config = train_config 115 | self.val_config = val_config 116 | 117 | def train_dataloader(self): 118 | return create_loader(**self.train_config) 119 | 120 | def test_dataloader(self): 121 | return create_loader(**self.val_config) 122 | 123 | def val_dataloader(self): 124 | return create_loader(**self.val_config) 125 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/data/dataset_unclip_2_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random 3 | import pandas as pd 4 | from tqdm import tqdm 5 | from PIL import Image 6 | import io 7 | import re 8 | import os 9 | 10 | import torch 11 | import sys, time 12 | from torch.utils.data import Dataset, DataLoader 13 | import pytorch_lightning as pl 14 | from copy import deepcopy 15 | 16 | from transformers import AutoTokenizer 17 | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize 18 | from tqdm import tqdm 19 | from random import randint 20 | 21 | try: 22 | from torchvision.transforms import InterpolationMode 23 | 24 | BICUBIC = InterpolationMode.BICUBIC 25 | except ImportError: 26 | BICUBIC = Image.BICUBIC 27 | 28 | 29 | def _convert_image_to_rgb(image): 30 | return image.convert("RGB") 31 | 32 | 33 | def _transform(n_px): 34 | return Compose( 35 | [ 36 | Resize(n_px, interpolation=BICUBIC), 37 | CenterCrop(n_px), 38 | _convert_image_to_rgb, 39 | ToTensor(), 40 | Normalize( 41 | (0.48145466, 0.4578275, 0.40821073), 42 | (0.26862954, 0.26130258, 0.27577711), 43 | ), 44 | ] 45 | ) 46 | 47 | 48 | def center_crop(image): 49 | width, height = image.size 50 | new_size = min(width, height) 51 | left = (width - new_size) / 2 52 | top = (height - new_size) / 2 53 | right = (width + new_size) / 2 54 | bottom = (height + new_size) / 2 55 | return image.crop((left, top, right, bottom)) 56 | 57 | 58 | class TextImageDataset(Dataset): 59 | def __init__( 60 | self, 61 | df_path, 62 | tokenizer_name="M-CLIP/XLM-Roberta-Large-Vit-L-14", 63 | clip_image_size=224, 64 | seq_len=77, 65 | drop_text_prob=0.5, 66 | drop_image_prob=0.5, 67 | image_size=512, 68 | infinity=False, 69 | ): 70 | self.df = pd.read_csv(df_path) 71 | self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) 72 | self.transform1 = _transform(clip_image_size) 73 | self.seq_len = seq_len 74 | self.drop_text_prob = drop_text_prob 75 | self.drop_image_prob = drop_image_prob 76 | self.image_size = image_size 77 | self.clip_image_size = clip_image_size 78 | self.infinity = infinity 79 | 80 | def __len__(self): 81 | if self.infinity: 82 | return 99999999 83 | else: 84 | return len(self.df) 85 | 86 | def __getitem__(self, item): 87 | if self.infinity: 88 | ind = randint(0, len(self.df) - 1) 89 | else: 90 | ind = item 91 | out_dict = {} 92 | image = Image.open(self.df["image_name"].iloc[ind]) 93 | clip_image = self.transform1(deepcopy(image)) 94 | image = center_crop(image) 95 | image = image.resize( 96 | (self.image_size, self.image_size), resample=Image.BICUBIC, reducing_gap=1 97 | ) 98 | image = np.array(image.convert("RGB")) 99 | image = image.astype(np.float32) / 127.5 - 1 100 | if np.random.binomial(1, self.drop_text_prob): 101 | text = "" 102 | else: 103 | text = self.df["caption"].iloc[ind] 104 | text_encoding = self.tokenizer( 105 | text, 106 | max_length=self.seq_len, 107 | padding="max_length", 108 | truncation=True, 109 | return_attention_mask=True, 110 | add_special_tokens=True, 111 | return_tensors="pt", 112 | ) 113 | 114 | out_dict["tokens"] = text_encoding["input_ids"][0] 115 | out_dict["mask"] = text_encoding["attention_mask"][0] 116 | if np.random.binomial(1, self.drop_image_prob): 117 | out_dict["clip_image"] = torch.zeros( 118 | 3, self.clip_image_size, self.clip_image_size 119 | ) 120 | else: 121 | out_dict["clip_image"] = clip_image 122 | return np.transpose(image, [2, 0, 1]), out_dict 123 | 124 | 125 | def create_loader(batch_size, num_workers, shuffle=False, **dataset_params): 126 | dataset = TextImageDataset(**dataset_params) 127 | return DataLoader( 128 | dataset, 129 | batch_size=batch_size, 130 | num_workers=num_workers, 131 | shuffle=shuffle, 132 | pin_memory=True, 133 | ) 134 | 135 | 136 | class LightningDataModule(pl.LightningDataModule): 137 | """PyTorch Lightning data class""" 138 | 139 | def __init__(self, train_config, val_config): 140 | super().__init__() 141 | self.train_config = train_config 142 | self.val_config = val_config 143 | 144 | def train_dataloader(self): 145 | return create_loader(**self.train_config) 146 | 147 | def test_dataloader(self): 148 | return create_loader(**self.val_config) 149 | 150 | def val_dataloader(self): 151 | return create_loader(**self.val_config) 152 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/ema.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | 4 | 5 | class EMA(nn.Module): 6 | def __init__(self, model, decay=0.9999, use_num_updates=True): 7 | super().__init__() 8 | if decay < 0.0 or decay > 1.0: 9 | raise ValueError("Decay must be between 0 and 1") 10 | 11 | self.m_name2s_name = {} 12 | self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32)) 13 | self.register_buffer( 14 | "num_updates", 15 | torch.tensor(0, dtype=torch.int) 16 | if use_num_updates 17 | else torch.tensor(-1, dtype=torch.int), 18 | ) 19 | 20 | for name, p in model.named_parameters(): 21 | if p.requires_grad: 22 | # remove as '.'-character is not allowed in buffers 23 | s_name = name.replace(".", "") 24 | self.m_name2s_name.update({name: s_name}) 25 | self.register_buffer(s_name, p.clone().detach().data) 26 | 27 | self.collected_params = [] 28 | 29 | def forward(self, model): 30 | decay = self.decay 31 | 32 | if self.num_updates >= 0: 33 | self.num_updates += 1 34 | decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates)) 35 | 36 | one_minus_decay = 1.0 - decay 37 | 38 | with torch.no_grad(): 39 | m_param = dict(model.named_parameters()) 40 | shadow_params = dict(self.named_buffers()) 41 | 42 | for key in m_param: 43 | if m_param[key].requires_grad: 44 | sname = self.m_name2s_name[key] 45 | shadow_params[sname] = shadow_params[sname].type_as(m_param[key]) 46 | shadow_params[sname].sub_( 47 | one_minus_decay * (shadow_params[sname] - m_param[key]) 48 | ) 49 | else: 50 | assert not key in self.m_name2s_name 51 | 52 | def copy_to(self, model): 53 | m_param = dict(model.named_parameters()) 54 | shadow_params = dict(self.named_buffers()) 55 | for key in m_param: 56 | if m_param[key].requires_grad: 57 | m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data) 58 | else: 59 | assert not key in self.m_name2s_name 60 | 61 | def store(self, parameters): 62 | self.collected_params = [param.clone() for param in parameters] 63 | 64 | def restore(self, parameters): 65 | for c_param, param in zip(self.collected_params, parameters): 66 | param.data.copy_(c_param.data) 67 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/train_module_pl2_1.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import copy 4 | import functools 5 | import os 6 | import numpy as np 7 | import torch 8 | from torch import nn 9 | import pytorch_lightning as pl 10 | 11 | from ..model.resample import UniformSampler 12 | from ..vqgan.autoencoder import VQModelInterface, AutoencoderKL, MOVQ 13 | from ..model.text_encoders import TextEncoder 14 | from ..model.utils import get_obj_from_str 15 | from .ema import EMA 16 | from .utils import generate_mask, get_image_mask 17 | import clip 18 | 19 | 20 | class Decoder(pl.LightningModule): 21 | def __init__( 22 | self, 23 | unet, 24 | diffusion, 25 | ema_rate, 26 | optim_params, 27 | scheduler_params, 28 | image_enc_params, 29 | text_enc_params, 30 | clip_name, 31 | use_ema=False, 32 | inpainting=False, 33 | ): 34 | super().__init__() 35 | self.unet = unet 36 | self.diffusion = diffusion 37 | self.image_enc_params = image_enc_params 38 | self.text_enc_params = text_enc_params 39 | self.ema_rate = ema_rate 40 | self.use_ema = use_ema 41 | self.schedule_sampler = UniformSampler(diffusion) 42 | self.inpainting = inpainting 43 | 44 | self.create_image_encoder() 45 | self.create_text_encoder() 46 | 47 | self.optim_params = optim_params 48 | self.scheduler_params = scheduler_params 49 | if use_ema: 50 | self.ema_params = EMA( 51 | self.unet, 52 | ema_rate, 53 | ) 54 | 55 | self.clip_model, _ = clip.load(clip_name, device="cpu", jit=False) 56 | self.clip_model.transformer = None 57 | self.clip_model.positional_embedding = None 58 | self.clip_model.ln_final = None 59 | self.clip_model.token_embedding = None 60 | self.clip_model.text_projection = None 61 | 62 | def create_image_encoder( 63 | self, 64 | ): 65 | if self.image_enc_params is not None: 66 | self.use_image_enc = True 67 | self.scale = self.image_enc_params["scale"] 68 | self.image_enc_name = self.image_enc_params["name"] 69 | if self.image_enc_params["name"] == "AutoencoderKL": 70 | self.image_encoder = AutoencoderKL(**self.image_enc_params["params"]) 71 | elif self.image_enc_params["name"] == "VQModelInterface": 72 | self.image_encoder = VQModelInterface(**self.image_enc_params["params"]) 73 | elif self.image_enc_params["name"] == "MOVQ": 74 | self.image_encoder = MOVQ(**self.image_enc_params["params"]) 75 | self.image_encoder.load_state_dict( 76 | torch.load(self.image_enc_params["ckpt_path"]) 77 | ) 78 | self.image_encoder.eval() 79 | for param in self.image_encoder.parameters(): 80 | param.requires_grad = False 81 | else: 82 | self.use_image_enc = False 83 | 84 | def create_text_encoder( 85 | self, 86 | ): 87 | if self.text_enc_params is not None: 88 | self.use_text_enc = True 89 | self.text_encoder = TextEncoder(**self.text_enc_params).eval().half() 90 | else: 91 | self.use_text_enc = False 92 | 93 | def configure_optimizers(self): 94 | optimizer = get_obj_from_str(self.optim_params["name"])( 95 | self.unet.parameters(), **self.optim_params["params"] 96 | ) 97 | lr_scheduler = get_obj_from_str(self.scheduler_params["name"])( 98 | optimizer, **self.scheduler_params["params"] 99 | ) 100 | return [optimizer], { 101 | "scheduler": lr_scheduler, 102 | "interval": "step", 103 | "frequency": 1, 104 | } 105 | 106 | def prepare_image(self, batch): 107 | if self.use_image_enc: 108 | with torch.no_grad(): 109 | if self.image_enc_name == "AutoencoderKL": 110 | batch = self.image_encoder.encode(batch).sample() 111 | elif self.image_enc_name == "VQModelInterface": 112 | batch = self.image_encoder.encode(batch) 113 | elif self.image_enc_name == "MOVQ": 114 | batch = self.image_encoder.encode(batch) 115 | batch = batch * self.scale 116 | return batch 117 | 118 | def prepare_cond(self, cond): 119 | if self.use_text_enc: 120 | mask = None 121 | new_cond = {} 122 | for key in cond.keys(): 123 | if key not in ["tokens", "mask", "clip_image"]: 124 | new_cond[key] = cond[key] 125 | if "mask" in cond: 126 | mask = cond["mask"] 127 | with torch.no_grad(): 128 | new_cond["image_emb"] = self.clip_model.encode_image( 129 | cond["clip_image"] 130 | ).float() 131 | with torch.no_grad(): 132 | new_cond["full_emb"], new_cond["pooled_emb"] = self.text_encoder( 133 | cond["tokens"].long(), mask 134 | ) 135 | del cond 136 | return new_cond 137 | return cond 138 | 139 | def model_step(self, batch, stage): 140 | image, cond = batch 141 | image = self.prepare_image(image) 142 | 143 | if self.inpainting: 144 | image_mask = get_image_mask(image.shape[0], image.shape[-2:]) 145 | image_mask = image_mask.to(image.device).unsqueeze(1).to(image.dtype) 146 | # image_mask = 1. - image_mask 147 | cond["inpaint_image"] = image * image_mask 148 | cond["inpaint_mask"] = image_mask 149 | 150 | cond = self.prepare_cond(cond) 151 | t, weights = self.schedule_sampler.sample(image.shape[0], image.device) 152 | compute_losses = functools.partial( 153 | self.diffusion.training_losses, 154 | self.unet, 155 | image, 156 | t, 157 | model_kwargs=cond, 158 | ) 159 | losses = compute_losses() 160 | loss = losses["loss"].mean() 161 | self.log(f"{stage}_loss", loss.detach().cpu().item(), sync_dist=True) 162 | 163 | return loss 164 | 165 | def training_step(self, batch, batch_idx): 166 | return self.model_step(batch, "train") 167 | 168 | def validation_step(self, batch, batch_idx): 169 | return self.model_step(batch, "valid") 170 | 171 | def on_train_batch_end(self, *args, **kwargs): 172 | if self.use_ema: 173 | self.ema_params(self.unet) 174 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/trainer_2_1_uclip.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import copy 4 | import functools 5 | import os 6 | import numpy as np 7 | import torch 8 | from torch import nn 9 | from tqdm import tqdm 10 | from ..vqgan.autoencoder import VQModelInterface, AutoencoderKL, MOVQ 11 | from .utils import generate_mask, get_image_mask 12 | import clip 13 | 14 | def prepare_image(batch, image_encoder, scale=1): 15 | with torch.no_grad(): 16 | batch = batch.half() 17 | batch = image_encoder.encode(batch) 18 | batch = batch * scale 19 | return batch.float() 20 | 21 | def prepare_cond(cond, text_encoder, clip_model): 22 | mask = None 23 | new_cond = {} 24 | for key in cond.keys(): 25 | if key not in ['tokens', 'mask', 'clip_image']: 26 | new_cond[key] = cond[key] 27 | if 'mask' in cond: 28 | mask = cond['mask'] 29 | with torch.no_grad(): 30 | new_cond['image_emb'] = clip_model.encode_image(cond['clip_image']).float() 31 | with torch.no_grad(): 32 | new_cond['full_emb'], new_cond['pooled_emb'] = text_encoder( 33 | cond['tokens'].long(), mask) 34 | new_cond['full_emb'] = new_cond['full_emb'].float() 35 | new_cond['pooled_emb'] = new_cond['pooled_emb'].float() 36 | del cond 37 | return new_cond 38 | 39 | def train_unclip(unet, diffusion, image_encoder, 40 | clip_model, text_encoder, optimizer, 41 | lr_scheduler=None, schedule_sampler=None, 42 | train_loader=None, val_loader=None, scale=1, 43 | num_epochs=2, save_every=1000, save_name='model', 44 | save_path='', inpainting=False, device='cuda:0'): 45 | train_step = 0 46 | 47 | for epoch in range(num_epochs): 48 | progress = tqdm(total=len(train_loader), desc='finetuning goes brrr') 49 | for batch in train_loader: 50 | optimizer.zero_grad() 51 | image, cond = batch 52 | image = image.to(device) 53 | for key in cond.keys(): 54 | cond[key] = cond[key].to(device) 55 | image = prepare_image(image, image_encoder, scale=scale) 56 | if inpainting: 57 | image_mask = get_image_mask(image.shape[0], image.shape[-2:]) 58 | image_mask = image_mask.to(image.device).unsqueeze(1).to(image.dtype) 59 | image_mask = 1. - image_mask 60 | cond['inpaint_image'] = image * image_mask 61 | cond['inpaint_mask'] = image_mask 62 | cond = prepare_cond(cond, text_encoder, clip_model) 63 | t, weights = schedule_sampler.sample(image.shape[0], image.device) 64 | compute_losses = functools.partial( 65 | diffusion.training_losses, 66 | unet, 67 | image, 68 | t, 69 | model_kwargs=cond, 70 | ) 71 | losses = compute_losses() 72 | loss = losses["loss"].mean() 73 | loss.backward() 74 | optimizer.step() 75 | if lr_scheduler is not None: 76 | lr_scheduler.step() 77 | train_step += 1 78 | if train_step % save_every == 0: 79 | torch.save(unet.state_dict(), os.path.join(save_path, save_name + str(train_step) + '.ckpt')) 80 | progress.update() 81 | progress.set_postfix({"loss": loss.item()}) 82 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/trainer_prior.py: -------------------------------------------------------------------------------- 1 | import sys 2 | 3 | import copy 4 | import functools 5 | import os 6 | import numpy as np 7 | import torch 8 | from torch import nn 9 | from tqdm import tqdm 10 | from .utils import generate_mask, get_image_mask 11 | import clip 12 | 13 | def encode_text(tok, clip_model): 14 | with torch.no_grad(): 15 | x = clip_model.token_embedding(tok).type(clip_model.dtype) 16 | x = x + clip_model.positional_embedding.type(clip_model.dtype) 17 | x = x.permute(1, 0, 2) 18 | x = clip_model.transformer(x) 19 | x = x.permute(1, 0, 2) 20 | txt_feat_seq = x 21 | txt_feat = x[torch.arange(x.shape[0]), tok.argmax(dim=-1)] @ clip_model.text_projection 22 | txt_feat, txt_feat_seq = txt_feat.float(), txt_feat_seq.float() 23 | return txt_feat, txt_feat_seq 24 | 25 | def encode_image(image, clip_model, clip_mean, clip_std): 26 | with torch.no_grad(): 27 | return (clip_model.encode_image(image).float() - clip_mean) / clip_std 28 | 29 | def train_prior(model, diffusion, 30 | clip_model, optimizer, 31 | lr_scheduler=None, schedule_sampler=None, 32 | train_loader=None, val_loader=None, 33 | num_epochs=2, save_every=1000, save_name='model', 34 | save_path='', device='cuda:0'): 35 | train_step = 0 36 | for epoch in range(num_epochs): 37 | progress = tqdm(total=len(train_loader), desc='finetuning goes brrr') 38 | for batch in train_loader: 39 | optimizer.zero_grad() 40 | image, cond = batch 41 | image = image.to(device) 42 | for key in cond.keys(): 43 | cond[key] = cond[key].to(device) 44 | image = encode_image(image, clip_model, model.clip_mean.to(device), model.clip_std.to(device)) 45 | txt_feat, txt_feat_seq = encode_text(cond['tokens'], clip_model) 46 | cond = { 47 | "text_emb": txt_feat, 48 | "text_enc": txt_feat_seq, 49 | "mask": cond['mask'], 50 | "causal_mask": model.causal_mask, 51 | } 52 | t, weights = schedule_sampler.sample(image.shape[0], image.device) 53 | compute_losses = functools.partial( 54 | diffusion.training_losses, 55 | model.model, 56 | image, 57 | t, 58 | model_kwargs=cond, 59 | ) 60 | losses = compute_losses() 61 | loss = losses["loss"].mean() 62 | loss.backward() 63 | optimizer.step() 64 | if lr_scheduler is not None: 65 | lr_scheduler.step() 66 | train_step += 1 67 | if train_step % save_every == 0: 68 | torch.save(model.state_dict(), os.path.join(save_path, save_name + str(train_step) + '.ckpt')) 69 | progress.update() 70 | progress.set_postfix({"loss": loss.item()}) 71 | -------------------------------------------------------------------------------- /kandinsky2/train_utils/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch as th 4 | import torch.nn as nn 5 | import numpy as np 6 | from scipy.interpolate import interp1d 7 | import importlib 8 | from matplotlib.path import Path 9 | 10 | 11 | def get_polygon_mask_params( 12 | mask_size, box, num_vertices, mask_scale, min_scale, max_scale 13 | ): 14 | center = ((box[2] + box[0]) / 2, (box[3] + box[1]) / 2) 15 | sizes = (box[2] - box[0], box[3] - box[1]) 16 | 17 | part_avg_radii = np.linspace( 18 | mask_scale * sizes[0] / 2, mask_scale * sizes[1] / 2, num_vertices // 4 19 | ) 20 | part_avg_radii = np.clip( 21 | part_avg_radii, min_scale * min(mask_size), max_scale * min(mask_size) 22 | ) 23 | avg_radii = np.concatenate( 24 | [ 25 | part_avg_radii, 26 | part_avg_radii[::-1], 27 | part_avg_radii, 28 | part_avg_radii[::-1], 29 | ] 30 | ) 31 | return center, avg_radii 32 | 33 | 34 | def smooth_cerv(x, y): 35 | num_vertices = x.shape[0] 36 | x = np.concatenate((x[-3:-1], x, x[1:3])) 37 | y = np.concatenate((y[-3:-1], y, y[1:3])) 38 | t = np.arange(x.shape[0]) 39 | 40 | ti = np.linspace(2, num_vertices + 1, 4 * num_vertices) 41 | xi = interp1d(t, x, kind="quadratic")(ti) 42 | yi = interp1d(t, y, kind="quadratic")(ti) 43 | return xi, yi 44 | 45 | 46 | def get_polygon_mask(mask_size, mask_points): 47 | x, y = np.meshgrid(np.arange(mask_size[0]), np.arange(mask_size[1])) 48 | x, y = x.flatten(), y.flatten() 49 | points = np.vstack((x, y)).T 50 | 51 | path = Path(mask_points) 52 | grid = path.contains_points(points) 53 | grid = grid.reshape((mask_size[0], mask_size[1])) 54 | return 1.0 - grid.astype(np.int32) 55 | 56 | 57 | def generate_polygon( 58 | mask_size, center, num_vertices, radii, radii_var, angle_var, smooth=True 59 | ): 60 | angle_steps = np.random.uniform( 61 | 1.0 - angle_var, 1.0 + angle_var, size=(num_vertices,) 62 | ) 63 | angle_steps = 2 * np.pi * angle_steps / angle_steps.sum() 64 | 65 | radii = np.random.normal(radii, radii_var * radii) 66 | radii = np.clip(radii, 0, 2 * radii) 67 | angles = np.cumsum(angle_steps) 68 | x = center[0] + radii * np.cos(angles) 69 | y = center[1] + radii * np.sin(angles) 70 | 71 | if smooth: 72 | x, y = smooth_cerv(x, y) 73 | points = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1)), axis=-1) 74 | points = list(map(tuple, points.tolist())) 75 | return get_polygon_mask(mask_size, points) 76 | 77 | 78 | def generate_circle_frame(mask_size, side_scales, num_vertices, radii_var, smooth=True): 79 | num_vertices_per_side = num_vertices // 4 80 | x_size, y_size = mask_size 81 | up_radii = np.array([y_size * (1.0 - side_scales[0]) // 2] * num_vertices_per_side) 82 | down_radii = np.array( 83 | [y_size * (1.0 - side_scales[1]) // 2] * num_vertices_per_side 84 | ) 85 | left_radii = np.array( 86 | [x_size * (1.0 - side_scales[2]) // 2] * num_vertices_per_side 87 | ) 88 | right_radii = np.array( 89 | [x_size * (1.0 - side_scales[3]) // 2] * num_vertices_per_side 90 | ) 91 | 92 | center = (x_size // 2, y_size // 2) 93 | radii = np.concatenate( 94 | [ 95 | right_radii[num_vertices_per_side // 2 :], 96 | down_radii, 97 | left_radii, 98 | up_radii, 99 | right_radii[: num_vertices_per_side // 2], 100 | ] 101 | ) 102 | return 1.0 - generate_polygon( 103 | mask_size, center, num_vertices, radii, radii_var, 0.0, smooth=smooth 104 | ) 105 | 106 | 107 | def generate_square_frame(mask_size, side_scales, num_vertices, radii_var, smooth=True): 108 | num_vertices_per_side = num_vertices // 4 109 | x_size, y_size = mask_size 110 | diag_size = np.sqrt(x_size**2 + y_size**2) 111 | 112 | up_radii = np.linspace( 113 | diag_size * (1.0 - side_scales[0]) // 2, 114 | y_size * (1.0 - side_scales[0]) // 2, 115 | num_vertices_per_side // 2, 116 | ) 117 | down_radii = np.linspace( 118 | diag_size * (1.0 - side_scales[1]) // 2, 119 | y_size * (1.0 - side_scales[1]) // 2, 120 | num_vertices_per_side // 2, 121 | ) 122 | left_radii = np.linspace( 123 | diag_size * (1.0 - side_scales[2]) // 2, 124 | x_size * (1.0 - side_scales[2]) // 2, 125 | num_vertices_per_side // 2, 126 | ) 127 | right_radii = np.linspace( 128 | diag_size * (1.0 - side_scales[3]) // 2, 129 | x_size * (1.0 - side_scales[3]) // 2, 130 | num_vertices_per_side // 2, 131 | ) 132 | 133 | center = (x_size // 2, y_size // 2) 134 | radii = np.concatenate( 135 | [ 136 | right_radii[::-1], 137 | down_radii, 138 | down_radii[::-1], 139 | left_radii, 140 | left_radii[::-1], 141 | up_radii, 142 | up_radii[::-1], 143 | right_radii, 144 | ] 145 | ) 146 | return 1.0 - generate_polygon( 147 | mask_size, center, num_vertices, radii, radii_var, 0.0, smooth=smooth 148 | ) 149 | 150 | 151 | def generate_mask(mask_size, box, box_prob=0.1): 152 | mask = np.ones(mask_size) 153 | if np.random.binomial(1, box_prob): 154 | box = [int(i) for i in box] 155 | mask[box[1] : box[3], box[0] : box[2]] = 0 156 | 157 | else: 158 | actions = np.random.randint(0, 2, (2,)) 159 | if 0 in actions: 160 | num_vertices = 16 161 | center, radii = get_polygon_mask_params( 162 | mask_size, 163 | box, 164 | num_vertices, 165 | mask_scale=1.5, 166 | min_scale=0.1, 167 | max_scale=0.6, 168 | ) 169 | mask *= generate_polygon( 170 | mask_size, center, num_vertices, radii, radii_var=0.15, angle_var=0.15 171 | ) 172 | if 1 in actions: 173 | radii_var = 0.15 * np.random.random() 174 | num_vertices = np.random.choice([16, 32]) 175 | if np.random.random() < 0.5: 176 | side_scales = 0.25 * np.random.random((4,)) + 0.05 177 | mask *= generate_square_frame( 178 | mask_size, side_scales, num_vertices, radii_var=radii_var 179 | ) 180 | else: 181 | side_scales = 0.15 * np.random.random((4,)) + 0.1 182 | mask *= generate_circle_frame( 183 | mask_size, side_scales, num_vertices, radii_var=radii_var 184 | ) 185 | return mask 186 | 187 | 188 | def get_boxes(bs, target_size, min_scale=0.1, max_scale=0.62): 189 | min_size_x = min_scale * target_size[0] 190 | max_size_x = max_scale * target_size[0] 191 | min_size_y = min_scale * target_size[1] 192 | max_size_y = max_scale * target_size[1] 193 | 194 | boxes_size_x = (max_size_x - min_size_x) * np.random.random((bs, 1)) + min_size_x 195 | boxes_size_y = (max_size_y - min_size_y) * np.random.random((bs, 1)) + min_size_y 196 | 197 | x0 = (target_size[0] - max_size_x) * np.random.random((bs, 1)) 198 | y0 = (target_size[1] - max_size_y) * np.random.random((bs, 1)) 199 | 200 | boxes = np.concatenate((x0, y0, x0 + boxes_size_x, y0 + boxes_size_x), -1) 201 | return boxes.tolist() 202 | 203 | 204 | def get_image_mask(bs, target_size): 205 | boxes = get_boxes(bs, target_size) 206 | image_mask = torch.stack( 207 | [torch.tensor(generate_mask(target_size, box)) for box in boxes] 208 | ) 209 | return image_mask 210 | 211 | 212 | def freeze_decoder( 213 | model, 214 | freeze_resblocks=False, 215 | freeze_attention=False, 216 | ): 217 | for name, p in model.named_parameters(): 218 | name = name.lower() 219 | if ( 220 | "in_layers" in name 221 | or "h_upd" in name 222 | or "x_upd" in name 223 | or "emb_layers" in name 224 | or "out_layers" in name 225 | ): 226 | p.requires_grad = not freeze_resblocks 227 | elif "proj_out" in name or "qkv" in name: 228 | p.requires_grad = not freeze_attention 229 | return model 230 | -------------------------------------------------------------------------------- /kandinsky2/utils.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from PIL import Image 4 | import torch 5 | import torch.nn as nn 6 | import importlib 7 | from .model.utils import get_named_beta_schedule, _extract_into_tensor 8 | from copy import deepcopy 9 | 10 | 11 | 12 | def prepare_mask(mask): 13 | mask = mask.float()[0] 14 | old_mask = deepcopy(mask) 15 | for i in range(mask.shape[1]): 16 | for j in range(mask.shape[2]): 17 | if old_mask[0][i][j] == 1: 18 | continue 19 | if i != 0: 20 | mask[:, i - 1, j] = 0 21 | if j != 0: 22 | mask[:, i, j - 1] = 0 23 | if i != 0 and j != 0: 24 | mask[:, i - 1, j - 1] = 0 25 | if i != mask.shape[1] - 1: 26 | mask[:, i + 1, j] = 0 27 | if j != mask.shape[2] - 1: 28 | mask[:, i, j + 1] = 0 29 | if i != mask.shape[1] - 1 and j != mask.shape[2] - 1: 30 | mask[:, i + 1, j + 1] = 0 31 | return mask.unsqueeze(0) 32 | 33 | 34 | def prepare_image(pil_image, w=512, h=512): 35 | pil_image = pil_image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1) 36 | arr = np.array(pil_image.convert("RGB")) 37 | arr = arr.astype(np.float32) / 127.5 - 1 38 | arr = np.transpose(arr, [2, 0, 1]) 39 | image = torch.from_numpy(arr).unsqueeze(0) 40 | return image 41 | 42 | 43 | def q_sample(x_start, t, schedule_name="linear", num_steps=1000, noise=None): 44 | betas = get_named_beta_schedule(schedule_name, num_steps) 45 | alphas = 1.0 - betas 46 | alphas_cumprod = np.cumprod(alphas, axis=0) 47 | sqrt_alphas_cumprod = np.sqrt(alphas_cumprod) 48 | sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - alphas_cumprod) 49 | if noise is None: 50 | noise = torch.randn_like(x_start) 51 | assert noise.shape == x_start.shape 52 | return ( 53 | _extract_into_tensor(sqrt_alphas_cumprod, t, x_start.shape) * x_start 54 | + _extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise 55 | ) 56 | 57 | 58 | def process_images(batch): 59 | scaled = ( 60 | ((batch + 1) * 127.5) 61 | .round() 62 | .clamp(0, 255) 63 | .to(torch.uint8) 64 | .to("cpu") 65 | .permute(0, 2, 3, 1) 66 | .numpy() 67 | ) 68 | images = [] 69 | for i in range(scaled.shape[0]): 70 | images.append(Image.fromarray(scaled[i])) 71 | return images 72 | 73 | -------------------------------------------------------------------------------- /kandinsky2/vqgan/autoencoder.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | import pytorch_lightning as pl 4 | import torch.nn.functional as F 5 | from contextlib import contextmanager 6 | 7 | from .quntize import VectorQuantizer 8 | 9 | from .vqgan_blocks import Encoder, Decoder, DiagonalGaussianDistribution 10 | from .movq_modules import MOVQDecoder 11 | 12 | 13 | class VQModel(pl.LightningModule): 14 | def __init__( 15 | self, 16 | ddconfig, 17 | n_embed, 18 | embed_dim, 19 | ckpt_path=None, 20 | ignore_keys=[], 21 | scheduler_config=None, 22 | remap=None, 23 | sane_index_shape=False, # tell vector quantizer to return indices as bhw 24 | ): 25 | super().__init__() 26 | self.embed_dim = embed_dim 27 | self.n_embed = n_embed 28 | self.encoder = Encoder(**ddconfig) 29 | self.decoder = Decoder(**ddconfig) 30 | self.quantize = VectorQuantizer( 31 | n_embed, 32 | embed_dim, 33 | beta=0.25, 34 | remap=remap, 35 | sane_index_shape=sane_index_shape, 36 | ) 37 | self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) 38 | self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) 39 | 40 | if ckpt_path is not None: 41 | self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) 42 | self.scheduler_config = scheduler_config 43 | 44 | def init_from_ckpt(self, path, ignore_keys=list()): 45 | sd = torch.load(path, map_location="cpu")["state_dict"] 46 | keys = list(sd.keys()) 47 | for k in keys: 48 | for ik in ignore_keys: 49 | if k.startswith(ik): 50 | print("Deleting key {} from state_dict.".format(k)) 51 | del sd[k] 52 | missing, unexpected = self.load_state_dict(sd, strict=False) 53 | print( 54 | f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys" 55 | ) 56 | if len(missing) > 0: 57 | print(f"Missing Keys: {missing}") 58 | print(f"Unexpected Keys: {unexpected}") 59 | 60 | def encode(self, x): 61 | h = self.encoder(x) 62 | h = self.quant_conv(h) 63 | quant, emb_loss, info = self.quantize(h) 64 | return quant, emb_loss, info 65 | 66 | def encode_to_prequant(self, x): 67 | h = self.encoder(x) 68 | h = self.quant_conv(h) 69 | return h 70 | 71 | def decode(self, quant): 72 | quant = self.post_quant_conv(quant) 73 | dec = self.decoder(quant) 74 | return dec 75 | 76 | def decode_code(self, code_b): 77 | quant_b = self.quantize.embed_code(code_b) 78 | dec = self.decode(quant_b) 79 | return dec 80 | 81 | def forward(self, input, return_pred_indices=False): 82 | quant, diff, (_, _, ind) = self.encode(input) 83 | dec = self.decode(quant) 84 | if return_pred_indices: 85 | return dec, diff, ind 86 | return dec, diff 87 | 88 | 89 | class VQModelInterface(VQModel): 90 | def __init__(self, embed_dim, *args, **kwargs): 91 | super().__init__(embed_dim=embed_dim, *args, **kwargs) 92 | self.embed_dim = embed_dim 93 | 94 | def encode(self, x): 95 | h = self.encoder(x) 96 | h = self.quant_conv(h) 97 | return h 98 | 99 | def decode(self, h, force_not_quantize=False): 100 | # also go through quantization layer 101 | if not force_not_quantize: 102 | quant, emb_loss, info = self.quantize(h) 103 | else: 104 | quant = h 105 | quant = self.post_quant_conv(quant) 106 | dec = self.decoder(quant) 107 | return dec 108 | 109 | 110 | class AutoencoderKL(pl.LightningModule): 111 | def __init__( 112 | self, 113 | ddconfig, 114 | embed_dim, 115 | ckpt_path=None, 116 | ignore_keys=[], 117 | ): 118 | super().__init__() 119 | self.encoder = Encoder(**ddconfig) 120 | self.decoder = Decoder(**ddconfig) 121 | assert ddconfig["double_z"] 122 | self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1) 123 | self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) 124 | self.embed_dim = embed_dim 125 | if ckpt_path is not None: 126 | self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys) 127 | 128 | def init_from_ckpt(self, path, ignore_keys=list()): 129 | sd = torch.load(path, map_location="cpu")["state_dict"] 130 | keys = list(sd.keys()) 131 | for k in keys: 132 | for ik in ignore_keys: 133 | if k.startswith(ik): 134 | print("Deleting key {} from state_dict.".format(k)) 135 | del sd[k] 136 | self.load_state_dict(sd, strict=False) 137 | print(f"Restored from {path}") 138 | 139 | def encode(self, x): 140 | h = self.encoder(x) 141 | moments = self.quant_conv(h) 142 | posterior = DiagonalGaussianDistribution(moments) 143 | return posterior 144 | 145 | def decode(self, z): 146 | z = self.post_quant_conv(z) 147 | dec = self.decoder(z) 148 | return dec 149 | 150 | def forward(self, input, sample_posterior=True): 151 | posterior = self.encode(input) 152 | if sample_posterior: 153 | z = posterior.sample() 154 | else: 155 | z = posterior.mode() 156 | dec = self.decode(z) 157 | return dec, posterior 158 | 159 | 160 | class MOVQ(nn.Module): 161 | def __init__( 162 | self, 163 | ddconfig, 164 | n_embed, 165 | embed_dim, 166 | ): 167 | super().__init__() 168 | self.encoder = Encoder(**ddconfig) 169 | self.decoder = MOVQDecoder(zq_ch=embed_dim, **ddconfig) 170 | self.quantize = VectorQuantizer( 171 | n_embed, embed_dim, beta=0.25, remap=None, sane_index_shape=False 172 | ) 173 | self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1) 174 | self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1) 175 | 176 | def encode(self, x): 177 | h = self.encoder(x) 178 | h = self.quant_conv(h) 179 | # quant, emb_loss, info = self.quantize(h) 180 | return h 181 | 182 | def decode(self, quant): 183 | quant2 = self.post_quant_conv(quant) 184 | dec = self.decoder(quant2, quant) 185 | return dec 186 | 187 | def decode_code(self, code_b): 188 | batch_size = code_b.shape[0] 189 | quant = self.quantize.embedding(code_b.flatten()) 190 | grid_size = int((quant.shape[0] // batch_size) ** 0.5) 191 | quant = quant.view((1, 32, 32, 4)) 192 | quant = rearrange(quant, "b h w c -> b c h w").contiguous() 193 | print(quant.shape) 194 | quant2 = self.post_quant_conv(quant) 195 | dec = self.decoder(quant2, quant) 196 | return dec 197 | 198 | def forward(self, input): 199 | quant, diff, _ = self.encode(input) 200 | dec = self.decode(quant) 201 | return dec, diff 202 | -------------------------------------------------------------------------------- /kandinsky2/vqgan/quntize.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import torch.nn.functional as F 4 | import numpy as np 5 | from torch import einsum 6 | from einops import rearrange 7 | 8 | 9 | class VectorQuantizer(nn.Module): 10 | """ 11 | Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly 12 | avoids costly matrix multiplications and allows for post-hoc remapping of indices. 13 | """ 14 | 15 | # NOTE: due to a bug the beta term was applied to the wrong term. for 16 | # backwards compatibility we use the buggy version by default, but you can 17 | # specify legacy=False to fix it. 18 | def __init__( 19 | self, 20 | n_e, 21 | e_dim, 22 | beta, 23 | remap=None, 24 | unknown_index="random", 25 | sane_index_shape=False, 26 | legacy=True, 27 | ): 28 | super().__init__() 29 | self.n_e = n_e 30 | self.e_dim = e_dim 31 | self.beta = beta 32 | self.legacy = legacy 33 | 34 | self.embedding = nn.Embedding(self.n_e, self.e_dim) 35 | self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e) 36 | 37 | self.remap = remap 38 | if self.remap is not None: 39 | self.register_buffer("used", torch.tensor(np.load(self.remap))) 40 | self.re_embed = self.used.shape[0] 41 | self.unknown_index = unknown_index # "random" or "extra" or integer 42 | if self.unknown_index == "extra": 43 | self.unknown_index = self.re_embed 44 | self.re_embed = self.re_embed + 1 45 | print( 46 | f"Remapping {self.n_e} indices to {self.re_embed} indices. " 47 | f"Using {self.unknown_index} for unknown indices." 48 | ) 49 | else: 50 | self.re_embed = n_e 51 | 52 | self.sane_index_shape = sane_index_shape 53 | 54 | def remap_to_used(self, inds): 55 | ishape = inds.shape 56 | assert len(ishape) > 1 57 | inds = inds.reshape(ishape[0], -1) 58 | used = self.used.to(inds) 59 | match = (inds[:, :, None] == used[None, None, ...]).long() 60 | new = match.argmax(-1) 61 | unknown = match.sum(2) < 1 62 | if self.unknown_index == "random": 63 | new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to( 64 | device=new.device 65 | ) 66 | else: 67 | new[unknown] = self.unknown_index 68 | return new.reshape(ishape) 69 | 70 | def unmap_to_all(self, inds): 71 | ishape = inds.shape 72 | assert len(ishape) > 1 73 | inds = inds.reshape(ishape[0], -1) 74 | used = self.used.to(inds) 75 | if self.re_embed > self.used.shape[0]: # extra token 76 | inds[inds >= self.used.shape[0]] = 0 # simply set to zero 77 | back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds) 78 | return back.reshape(ishape) 79 | 80 | def forward(self, z, temp=None, rescale_logits=False, return_logits=False): 81 | assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel" 82 | assert rescale_logits == False, "Only for interface compatible with Gumbel" 83 | assert return_logits == False, "Only for interface compatible with Gumbel" 84 | # reshape z -> (batch, height, width, channel) and flatten 85 | z = rearrange(z, "b c h w -> b h w c").contiguous() 86 | z_flattened = z.view(-1, self.e_dim) 87 | # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z 88 | 89 | d = ( 90 | torch.sum(z_flattened**2, dim=1, keepdim=True) 91 | + torch.sum(self.embedding.weight**2, dim=1) 92 | - 2 93 | * torch.einsum( 94 | "bd,dn->bn", z_flattened, rearrange(self.embedding.weight, "n d -> d n") 95 | ) 96 | ) 97 | 98 | min_encoding_indices = torch.argmin(d, dim=1) 99 | z_q = self.embedding(min_encoding_indices).view(z.shape) 100 | perplexity = None 101 | min_encodings = None 102 | 103 | # compute loss for embedding 104 | if not self.legacy: 105 | loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + torch.mean( 106 | (z_q - z.detach()) ** 2 107 | ) 108 | else: 109 | loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean( 110 | (z_q - z.detach()) ** 2 111 | ) 112 | 113 | # preserve gradients 114 | z_q = z + (z_q - z).detach() 115 | 116 | # reshape back to match original input shape 117 | z_q = rearrange(z_q, "b h w c -> b c h w").contiguous() 118 | 119 | if self.remap is not None: 120 | min_encoding_indices = min_encoding_indices.reshape( 121 | z.shape[0], -1 122 | ) # add batch axis 123 | min_encoding_indices = self.remap_to_used(min_encoding_indices) 124 | min_encoding_indices = min_encoding_indices.reshape(-1, 1) # flatten 125 | 126 | if self.sane_index_shape: 127 | min_encoding_indices = min_encoding_indices.reshape( 128 | z_q.shape[0], z_q.shape[2], z_q.shape[3] 129 | ) 130 | 131 | return z_q, loss, (perplexity, min_encodings, min_encoding_indices) 132 | 133 | def get_codebook_entry(self, indices, shape): 134 | # shape specifying (batch, height, width, channel) 135 | if self.remap is not None: 136 | indices = indices.reshape(shape[0], -1) # add batch axis 137 | indices = self.unmap_to_all(indices) 138 | indices = indices.reshape(-1) # flatten again 139 | 140 | # get quantized latent vectors 141 | z_q = self.embedding(indices) 142 | 143 | if shape is not None: 144 | z_q = z_q.view(shape) 145 | # reshape back to match original input shape 146 | z_q = z_q.permute(0, 3, 1, 2).contiguous() 147 | 148 | return z_q 149 | -------------------------------------------------------------------------------- /model/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/model/__init__.py -------------------------------------------------------------------------------- /model/pops_utils.py: -------------------------------------------------------------------------------- 1 | 2 | from typing import List, Tuple 3 | 4 | import torch 5 | from torch import nn 6 | 7 | def preprocess(image_a: torch.Tensor, image_b: torch.Tensor, image_encoder: nn.Module, clip_mean: torch.Tensor, 8 | clip_std: torch.Tensor, should_drop_cond: List[Tuple[bool, bool]] = None, concat_hidden_states=None, 9 | image_list=None): 10 | with torch.no_grad(): 11 | image_list = [] if image_list is None else image_list 12 | additional_list = [] 13 | if image_a is not None: 14 | additional_list.append(image_a) 15 | if image_b is not None: 16 | additional_list.append(image_b) 17 | image_list = additional_list + image_list 18 | embeds_list = [] 19 | for image in image_list: 20 | # If already is vector skip encoder 21 | if len(image.shape) == 2: 22 | image_embeds = image 23 | else: 24 | encoder_outs = image_encoder(image, output_hidden_states=False) 25 | image_embeds = encoder_outs.image_embeds 26 | image_embeds = (image_embeds - clip_mean) / clip_std 27 | embeds_list.append(image_embeds.unsqueeze(1)) 28 | if should_drop_cond is not None: 29 | for b_ind in range(embeds_list[0].shape[0]): 30 | should_drop_a, should_drop_b = should_drop_cond[b_ind] 31 | if should_drop_a: 32 | embeds_list[0][b_ind] = torch.zeros_like(embeds_list[0][b_ind]) 33 | if should_drop_b and image_b is not None: 34 | embeds_list[1][b_ind] = torch.zeros_like(embeds_list[1][b_ind]) 35 | if concat_hidden_states is not None: 36 | embeds_list.append(concat_hidden_states) 37 | out_hidden_states = torch.concat(embeds_list, dim=1) 38 | 39 | image_embeds = torch.zeros_like(embeds_list[0].squeeze(1)) 40 | 41 | return image_embeds, out_hidden_states 42 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | diffusers 2 | transformers 3 | matplotlib 4 | pyrallis 5 | omegaconf 6 | opencv-python 7 | einops 8 | pytorch-lightning 9 | accelerate 10 | clip @ git+https://github.com/openai/CLIP.git@a9b1bf5920416aaeaec965c25dd9e8f98c864f16 -------------------------------------------------------------------------------- /scripts/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/scripts/__init__.py -------------------------------------------------------------------------------- /scripts/infer_instruct.py: -------------------------------------------------------------------------------- 1 | import random 2 | from dataclasses import dataclass, field 3 | from pathlib import Path 4 | from typing import List, Optional 5 | 6 | import pyrallis 7 | import torch 8 | from PIL import Image 9 | from diffusers import PriorTransformer, UNet2DConditionModel, KandinskyV22Pipeline 10 | from tqdm import tqdm 11 | from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor, CLIPTokenizer, CLIPTextModelWithProjection 12 | 13 | from model import pops_utils 14 | from model.pipeline_pops import pOpsPipeline 15 | from huggingface_hub import hf_hub_download 16 | from utils import vis_utils 17 | from data_generation import words_bank 18 | 19 | 20 | @dataclass 21 | class RunConfig: 22 | # Path to the learned prior in local filesystem or huggingface 23 | prior_path: Path 24 | # Input directory 25 | dir_a: Path 26 | # The repo to download the prior from, if None, assumes prior_path is a local path 27 | prior_repo: Optional[str] = None 28 | output_dir_name: Path = Path('inference/results_instruct') 29 | # Path to the kandinsky repo 30 | kandinsky_prior_repo: str = 'kandinsky-community/kandinsky-2-2-prior' 31 | kandinsky_decoder_repo: str = 'kandinsky-community/kandinsky-2-2-decoder' 32 | prior_guidance_scale: List[float] = field(default_factory=lambda: [1.0]) 33 | prior_seeds: List[int] = field(default_factory=lambda: [18, 42]) 34 | unet_seeds: List[int] = field(default_factory=lambda: [0, 1]) 35 | texts: List[str] = field(default_factory=lambda: words_bank.adjectives) 36 | 37 | 38 | @pyrallis.wrap() 39 | def main(cfg: RunConfig): 40 | output_dir = cfg.output_dir_name # cfg.prior_path.parent / cfg.output_dir_name 41 | output_dir.mkdir(parents=True, exist_ok=True) 42 | 43 | weight_dtype = torch.float16 44 | device = 'cuda:0' 45 | image_encoder = CLIPVisionModelWithProjection.from_pretrained(cfg.kandinsky_prior_repo, 46 | subfolder='image_encoder', 47 | torch_dtype=weight_dtype).eval() 48 | 49 | image_processor = CLIPImageProcessor.from_pretrained(cfg.kandinsky_prior_repo, 50 | subfolder='image_processor') 51 | 52 | tokenizer = CLIPTokenizer.from_pretrained(cfg.kandinsky_prior_repo, subfolder='tokenizer') 53 | text_encoder = CLIPTextModelWithProjection.from_pretrained(cfg.kandinsky_prior_repo, 54 | subfolder='text_encoder', 55 | torch_dtype=weight_dtype).eval().to(device) 56 | 57 | prior = PriorTransformer.from_pretrained( 58 | cfg.kandinsky_prior_repo, subfolder="prior" 59 | ) 60 | 61 | if cfg.prior_repo: 62 | # Load from huggingface 63 | prior_path = hf_hub_download(repo_id=cfg.prior_repo, filename=str(cfg.prior_path)) 64 | else: 65 | prior_path = cfg.prior_path 66 | 67 | prior_state_dict = torch.load(prior_path, map_location=device) 68 | msg = prior.load_state_dict(prior_state_dict, strict=False) 69 | print(msg) 70 | 71 | prior.eval() 72 | 73 | # Freeze text_encoder and image_encoder 74 | image_encoder.requires_grad_(False) 75 | 76 | # Load full model for vis 77 | unet = UNet2DConditionModel.from_pretrained(cfg.kandinsky_decoder_repo, 78 | subfolder='unet').to(torch.float16).to(device) 79 | prior_pipeline = pOpsPipeline.from_pretrained(cfg.kandinsky_prior_repo, 80 | prior=prior, 81 | image_encoder=image_encoder, 82 | torch_dtype=torch.float16) 83 | prior_pipeline = prior_pipeline.to(device) 84 | prior = prior.to(weight_dtype) 85 | decoder = KandinskyV22Pipeline.from_pretrained(cfg.kandinsky_decoder_repo, unet=unet, 86 | torch_dtype=torch.float16) 87 | decoder = decoder.to(device) 88 | 89 | # glob for both jpgs or pths 90 | inputs_a = [path for path in cfg.dir_a.glob('*.jpg')] + [path for path in cfg.dir_a.glob('*.pth')] 91 | 92 | paths = [(input_a, text) for input_a in inputs_a for text in cfg.texts] 93 | 94 | # just so we have more variety to look at during the inference 95 | random.shuffle(paths) 96 | 97 | for input_a_path, text in tqdm(paths): 98 | def process_image(input_path): 99 | image_caption_suffix = '' 100 | if input_path is not None and input_path.suffix == '.pth': 101 | image = torch.load(input_path).to(device).to(weight_dtype) 102 | embs_unnormed = (image * prior.clip_std) + prior.clip_mean 103 | zero_embeds = prior_pipeline.get_zero_embed(embs_unnormed.shape[0], device=embs_unnormed.device) 104 | direct_from_emb = decoder(image_embeds=embs_unnormed, negative_image_embeds=zero_embeds, 105 | num_inference_steps=50, height=512, 106 | width=512, guidance_scale=4).images 107 | image_pil = direct_from_emb[0] 108 | image_caption_suffix = '(embedding)' 109 | else: 110 | if input_path is not None: 111 | image_pil = Image.open(input_path).convert("RGB").resize((512, 512)) 112 | else: 113 | image_pil = Image.new('RGB', (512, 512), (255, 255, 255)) 114 | 115 | image = torch.Tensor(image_processor(image_pil)['pixel_values'][0]).to(device).unsqueeze(0).to( 116 | weight_dtype) 117 | 118 | return image, image_pil, image_caption_suffix 119 | 120 | # Process both inputs 121 | image_a, image_pil_a, caption_suffix_a = process_image(input_a_path) 122 | 123 | text_inputs = tokenizer( 124 | text, 125 | padding="max_length", 126 | max_length=tokenizer.model_max_length, 127 | truncation=True, 128 | return_tensors="pt", 129 | ) 130 | mask = text_inputs.attention_mask.bool() # [0] 131 | 132 | text_encoder_output = text_encoder(text_inputs.input_ids.to(device)) 133 | text_encoder_hidden_states = text_encoder_output.last_hidden_state 134 | text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()] 135 | # 136 | 137 | input_image_embeds, input_hidden_state = pops_utils.preprocess(image_a, None, 138 | image_encoder, 139 | prior.clip_mean.detach(), prior.clip_std.detach(), 140 | concat_hidden_states=text_encoder_concat) 141 | 142 | input_images = [image_pil_a] 143 | captions = [f'{text}{caption_suffix_a}'] 144 | 145 | out_name = f"{input_a_path.stem if input_a_path is not None else ''}_{text}" 146 | for seed in cfg.prior_seeds: 147 | negative_input_embeds = torch.zeros_like(input_image_embeds) 148 | negative_hidden_states = torch.zeros_like(input_hidden_state) 149 | for scale in cfg.prior_guidance_scale: 150 | img_emb = prior_pipeline(input_embeds=input_image_embeds, input_hidden_states=input_hidden_state, 151 | negative_input_embeds=negative_input_embeds, 152 | negative_input_hidden_states=negative_hidden_states, 153 | num_inference_steps=25, 154 | num_images_per_prompt=1, 155 | guidance_scale=scale, 156 | generator=torch.Generator(device=device).manual_seed(seed)) 157 | torch.save(img_emb, output_dir / f"{out_name}_s_{seed}_cfg_{scale}_img_emb.pth") 158 | negative_emb = img_emb.negative_image_embeds 159 | for seed_2 in cfg.unet_seeds: 160 | images = decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=negative_emb, 161 | num_inference_steps=50, height=512, 162 | width=512, guidance_scale=4, 163 | generator=torch.Generator(device=device).manual_seed(seed_2)).images 164 | input_images += images 165 | captions.append(f"prior_s {seed}, cfg {scale} unet_s {seed_2}") # , ") 166 | gen_images = vis_utils.create_table_plot(images=input_images, captions=captions) 167 | 168 | gen_images.save(output_dir / f"{out_name}.jpg") 169 | print('Done!') 170 | 171 | 172 | if __name__ == "__main__": 173 | main() 174 | -------------------------------------------------------------------------------- /scripts/train.py: -------------------------------------------------------------------------------- 1 | import pyrallis 2 | 3 | from training.coach import Coach 4 | from training.train_config import TrainConfig 5 | 6 | 7 | @pyrallis.wrap() 8 | def main(cfg: TrainConfig): 9 | coach = Coach(cfg) 10 | coach.train() 11 | 12 | 13 | if __name__ == "__main__": 14 | main() 15 | -------------------------------------------------------------------------------- /training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/training/__init__.py -------------------------------------------------------------------------------- /training/train_config.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, field 2 | from pathlib import Path 3 | from typing import List, Optional, Union 4 | 5 | 6 | @dataclass 7 | class TrainConfig: 8 | # Dataset path 9 | dataset_path: Union[Path, List[Path]] = Path('datasets/generated/generated_things') 10 | # Validation dataset path 11 | val_dataset_path: Path = Path('datasets/generated/generated_things_val') 12 | # Path to pretrained model WITHOUT 2_1 folder 13 | cache_root: Path = Path('/tmp/kandinsky2') 14 | # The output directory where the model predictions and checkpoints will be written. 15 | output_dir: Path = Path('results/my_pops_model') 16 | # GPU device 17 | device: str = 'cuda:0' 18 | # The resolution for input images, all the images will be resized to this size 19 | img_size: int = 512 20 | # Batch size (per device) for the training dataloader 21 | train_batch_size: int = 1 22 | # Initial learning rate (after the potential warmup period) to use 23 | lr: float = 1e-5 24 | # Dataloader num workers. 25 | num_workers: int = 8 26 | # The beta1 parameter for the Adam optimizer. 27 | adam_beta1: float = 0.9 28 | # The beta2 parameter for the Adam optimizer 29 | adam_beta2: float = 0.999 30 | # Weight decay to use 31 | adam_weight_decay: float = 0.0 # 1e-2 32 | # Epsilon value for the Adam optimizer 33 | adam_epsilon: float = 1e-08 34 | # How often save images. Values less zero - disable saving 35 | log_image_frequency: int = 500 36 | # How often to run validation 37 | log_validation: int = 5000 38 | # The number of images to save during each validation 39 | n_val_images: int = 10 40 | # A seed for reproducible training 41 | seed: Optional[int] = None 42 | # The number of accumulation steps to use 43 | gradient_accumulation_steps: int = 1 44 | # Whether to use mixed precision training 45 | mixed_precision: Optional[str] = 'fp16' 46 | # Log to wandb 47 | report_to: str = 'wandb' 48 | # Path to pretrained prior model or model identifier from huggingface.co/models. 49 | pretrained_prior_path: str = 'kandinsky-community/kandinsky-2-2-prior' 50 | # Path to pretrained image encoder. 51 | pretrained_image_encoder: str = 'kandinsky-community/kandinsky-2-2-prior' 52 | # Path to scheduler. 53 | scheduler_path: str = 'kandinsky-community/kandinsky-2-2-prior' 54 | # Path to image_processor. 55 | image_processor_path: str = 'kandinsky-community/kandinsky-2-2-prior' 56 | # Path to text_encoder. 57 | text_encoder_path: str = 'kandinsky-community/kandinsky-2-2-prior' 58 | # Path to tokenizer. 59 | tokenizer_path: str = 'kandinsky-community/kandinsky-2-2-prior' 60 | use_ema: bool = False 61 | allow_tf32: bool = False 62 | use_8bit_adam: bool = False 63 | lr_scheduler: str = 'constant' 64 | # The number of training steps to run 65 | max_train_steps: int = 1000000 66 | # Max grad for clipping 67 | max_grad_norm: float = 1.0 68 | # How often to save checkpoints 69 | checkpointing_steps: int = 5000 70 | # The path to resume from 71 | resume_from_path: Optional[Path] = None 72 | # The step to resume from, mainly for logging 73 | resume_from_step: Optional[int] = None 74 | # Lora mode, untested 75 | lora_rank: Optional[int] = None 76 | # Which operator to train 77 | mode: str = 'texture' 78 | # The path to the textures dataset if used 79 | textures_dir: Optional[Path] = None 80 | # The path to the backgrounds dataset if used 81 | backgrounds_dir: Optional[Path] = None 82 | # optional directory of plain images to use for unconditional denoising 83 | randoms_dir: Optional[Path] = None 84 | # Whether full model is trained or only some layers, x_layers is the format for training only x layers 85 | training_mode: str = 'full' 86 | # Whether to use clip loss 87 | use_clip_loss: bool = False 88 | # Clip lambda 89 | clip_strength: float = 10.0 90 | -------------------------------------------------------------------------------- /utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/utils/__init__.py -------------------------------------------------------------------------------- /utils/clothes_utils.py: -------------------------------------------------------------------------------- 1 | ind2labels_dict = { 2 | 0: "Background", 3 | 1: "Hat", 4 | 2: "Hair", 5 | 3: "Sunglasses", 6 | 4: "Upper-clothes", 7 | 5: "Skirt", 8 | 6: "Pants", 9 | 7: "Dress", 10 | 8: "Belt", 11 | 9: "Left-shoe", 12 | 10: "Right-shoe", 13 | 11: "Face", 14 | 12: "Left-leg", 15 | 13: "Right-leg", 16 | 14: "Left-arm", 17 | 15: "Right-arm", 18 | 16: "Bag", 19 | 17: "Scarf" 20 | } 21 | 22 | # Calculate the reverse dict 23 | labels2ind_dict = {v: k for k, v in ind2labels_dict.items()} 24 | 25 | 26 | relevant_inds = [1,2,3,4,5,6,7,8,9,10,16,17] 27 | -------------------------------------------------------------------------------- /utils/vis_utils.py: -------------------------------------------------------------------------------- 1 | import textwrap 2 | from typing import List, Tuple, Optional 3 | 4 | import numpy as np 5 | from PIL import Image, ImageDraw, ImageFont 6 | 7 | LINE_WIDTH = 20 8 | 9 | 10 | def add_text_to_image(image: np.ndarray, text: str, text_color: Tuple[int, int, int] = (0, 0, 0), 11 | min_lines: Optional[int] = None, add_below: bool = True): 12 | import textwrap 13 | lines = textwrap.wrap(text, width=LINE_WIDTH) 14 | if min_lines is not None and len(lines) < min_lines: 15 | if add_below: 16 | lines += [''] * (min_lines - len(lines)) 17 | else: 18 | lines = [''] * (min_lines - len(lines)) + lines 19 | h, w, c = image.shape 20 | offset = int(h * .12) 21 | img = np.ones((h + offset * len(lines), w, c), dtype=np.uint8) * 255 22 | font_size = int(offset * .8) 23 | 24 | try: 25 | font = ImageFont.truetype("assets/OpenSans-Regular.ttf", font_size) 26 | textsize = font.getbbox(text) 27 | y_offset = (offset - textsize[3]) // 2 28 | except: 29 | font = ImageFont.load_default() 30 | y_offset = offset // 2 31 | 32 | if add_below: 33 | img[:h] = image 34 | else: 35 | img[-h:] = image 36 | img = Image.fromarray(img) 37 | draw = ImageDraw.Draw(img) 38 | for i, line in enumerate(lines): 39 | line_size = font.getbbox(line) 40 | text_x = (w - line_size[2]) // 2 41 | if add_below: 42 | draw.text((text_x, h + y_offset + offset * i), line, font=font, fill=text_color) 43 | else: 44 | draw.text((text_x, 0 + y_offset + offset * i), line, font=font, fill=text_color) 45 | return np.array(img) 46 | 47 | 48 | def create_table_plot(images: List[Image.Image], titles: List[str]=None, captions: List[str]=None) -> Image.Image: 49 | title_max_lines = np.max([len(textwrap.wrap(text, width=LINE_WIDTH)) for text in titles]) if titles is not None else 0 50 | caption_max_lines = np.max([len(textwrap.wrap(text, width=LINE_WIDTH)) for text in captions]) if captions is not None else 0 51 | out_images = [] 52 | for i in range(len(images)): 53 | im = np.array(images[i]) 54 | if titles is not None: 55 | im = add_text_to_image(im, titles[i], add_below=False, min_lines=title_max_lines) 56 | if captions is not None: 57 | im = add_text_to_image(im, captions[i], add_below=True, min_lines=caption_max_lines) 58 | out_images.append(im) 59 | image = Image.fromarray(np.concatenate(out_images, axis=1)) 60 | return image 61 | --------------------------------------------------------------------------------