├── README.md
├── assets
    ├── OpenSans-Regular.ttf
    ├── README.md
    ├── backgrounds
    │   ├── 11th-gate-e-kl_vOpwLg-unsplash.jpg
    │   ├── adolfo-felix-PG8NyM_Mcts-unsplash.jpg
    │   ├── alexandra-zelena-phskyemu_c4-unsplash.jpg
    │   ├── benjamin-3WdChmuv7mE-unsplash.jpg
    │   ├── benjamin-child-0sT9YhNgSEs-unsplash.jpg
    │   ├── carlo-lisa-GHuT3dkZxYM-unsplash.jpg
    │   ├── changbok-ko-F8t2VGnI47I-unsplash.jpg
    │   ├── conscious-design-mLpbHWquEYM-unsplash.jpg
    │   ├── diego-jimenez-A-NVHPka9Rk-unsplash.jpg
    │   ├── dogancan-ozturan-urY_iHk3nm0-unsplash.jpg
    │   ├── elena-ktenopoulou-cjzV4WK46qY-unsplash.jpg
    │   ├── evaldas-grizas-9-WvhPP1I6M-unsplash.jpg
    │   ├── heriberto-garcia-YdjrYLvLO5Y-unsplash.jpg
    │   ├── inaki-del-olmo-NIJuEQw0RKg-unsplash.jpg
    │   ├── jordan-steranka-NcF6MVPW644-unsplash.jpg
    │   ├── joseph-barrientos-oQl0eVYd_n8-unsplash.jpg
    │   ├── juli-kosolapova-Us_dv71f1bc-unsplash.jpg
    │   ├── masjid-pogung-dalangan-8I6hAdjM76Q-unsplash.jpg
    │   ├── meritt-thomas-GktK3Jb9BRE-unsplash.jpg
    │   ├── nastuh-abootalebi-yWwob8kwOCk-unsplash.jpg
    │   ├── neom-aWHKsYkbCi8-unsplash.jpg
    │   ├── neom-cYy-o9i8aCs-unsplash.jpg
    │   ├── neom-jTxhUMyPTrE-unsplash.jpg
    │   ├── neom-kXbit_yx8t4-unsplash.jpg
    │   ├── presentsquare-cSK33MUaVzQ-unsplash.jpg
    │   ├── robert-lukeman-PH0HYjsf2n8-unsplash.jpg
    │   ├── sam-moghadam-khamseh-cuSPt5uP2iQ-unsplash.jpg
    │   ├── tj-holowaychuk-1EYMue_AwDw-unsplash.jpg
    │   ├── toby-elliott-17yY9Lyddrc-unsplash.jpg
    │   ├── yaodan-fc6cEU1sTBY-unsplash.jpg
    │   └── yevhenii-deshko-Tkh5CmSzmaM-unsplash.jpg
    ├── objects
    │   ├── alexandr-sadkov-BnG4KWAzt9c-unsplash.jpg
    │   ├── allec-gomes-xnRg3xDcNnE-unsplash.jpg
    │   ├── allenwhm-wh-RgpT4_5g-unsplash.jpg
    │   ├── alvan-nee-T-0EW-SEbsE-unsplash.jpg
    │   ├── amit-lahav-rxN2MRdFJVg-unsplash.jpg
    │   ├── andre-taissin-hnyZg63sRCY-unsplash.jpg
    │   ├── arno-senoner-HFE2RyC76tw-unsplash.jpg
    │   ├── artem-maltsev-VOO5ojMQ_9A-unsplash.jpg
    │   ├── aurelia-dubois-xzrJCS4grC4-unsplash.jpg
    │   ├── bermix-studio-ZMxHvB9J7YU-unsplash.jpg
    │   ├── birmingham-museums-trust-q2OwlfXAYfo-unsplash.jpg
    │   ├── bonnie-kittle-MUcxe_wDurE-unsplash.jpg
    │   ├── brigitte-tohm-51AK6yJDgv0-unsplash.jpg
    │   ├── chastity-cortijo-3OENu9mJ0i0-unsplash.jpg
    │   ├── cici-hung-nV3v8ZMRLNc-unsplash.jpg
    │   ├── claire-abdo-_-635EI3nV8-unsplash.jpg
    │   ├── coppertist-wu-OrQvIBYNPcw-unsplash.jpg
    │   ├── coppertist-wu-XlFSnJOeyQs-unsplash.jpg
    │   ├── coppertist-wu-its52T6D4bo-unsplash.jpg
    │   ├── coppertist-wu-mwjuTJzJ9w4-unsplash.jpg
    │   ├── coppertist-wu-w1AK7wWsTPs-unsplash.jpg
    │   ├── courtney-cook-KcY7DCqc9VI-unsplash.jpg
    │   ├── creative-headline-APNnyM36puU-unsplash.jpg
    │   ├── daniel-k-cheung-WJuwxFIpidc-unsplash.jpg
    │   ├── daniil-silantev-1P6AnKDw6S8-unsplash.jpg
    │   ├── dominic-phillips-QEVT_XYXKPs-unsplash.jpg
    │   ├── engin-akyurt-TDOClniEwmI-unsplash.jpg
    │   ├── engin-akyurt-iLHCV4ZBH7s-unsplash.jpg
    │   ├── erfan-tajik-m_hgaJLqCRM-unsplash.jpg
    │   ├── erick-butler-3XQlnryKz0o-unsplash.jpg
    │   ├── erik-mclean-kNRKg1pfiqE-unsplash.jpg
    │   ├── fatty-corgi-1QsQRkxnU6I-unsplash.jpg
    │   ├── fernando-andrade-Q33VONoOfSU-unsplash.jpg
    │   ├── frank-zimmermann-xYh8uAwqZK0-unsplash.jpg
    │   ├── gaby-yerden--OIinu1YDTk-unsplash.jpg
    │   ├── gary-walker-jones-uC5jnQPdY7I-unsplash.jpg
    │   ├── gilbert-beltran-EUQRWgmvhr8-unsplash.jpg
    │   ├── giorgio-trovato-p0OlRAAYXLY-unsplash.jpg
    │   ├── hannah-pemberton-3d82e5_ylGo-unsplash.jpg
    │   ├── irene-kredenets-tcVH_BwHtrc-unsplash.jpg
    │   ├── isaac-martin-Jewkfj03OUU-unsplash.jpg
    │   ├── isabela-kronemberger-KaMRBaHSsjM-unsplash.jpg
    │   ├── ivan-lopatin-PZ2KhQnOZb8-unsplash.jpg
    │   ├── ivan-pergasi-CZT7lkrt5sU-unsplash.jpg
    │   ├── jessica-tan-Rufz-e6Qrqg-unsplash.jpg
    │   ├── joshua-coleman-8V4y-XXT3MQ-unsplash.jpg
    │   ├── juan-mayobre-_IAhW7a4pWA-unsplash.jpg
    │   ├── kate-0I8hNhkFUWQ-unsplash.jpg
    │   ├── kelly-sikkema-LHxU7QYSn7c-unsplash.jpg
    │   ├── kenny-eliason-lbqZUefMLvQ-unsplash.jpg
    │   ├── kojirou-sasaki-rdLQVeroHQ0-unsplash.jpg
    │   ├── lora-seis-dS5xpjW38Qk-unsplash.jpg
    │   ├── lucas-george-wendt-UDWhEik1L1Q-unsplash.jpg
    │   ├── lucas-george-wendt-lhWaL0pgpEg-unsplash.jpg
    │   ├── lucas-van-oort-Tv9w8mgoVzs-unsplash.jpg
    │   ├── maria-oswalt-GKE6i5mQplI-unsplash.jpg
    │   ├── mario-losereit-mTZyJeR1Rnc-unsplash.jpg
    │   ├── mediamodifier-gAUUqaI83b8-unsplash.jpg
    │   ├── mehmet-keskin-qHdGjahnx48-unsplash.jpg
    │   ├── milad-fakurian-3CoSLrSrvhY-unsplash.jpg
    │   ├── milad-fakurian-gpkPI7qs1cg-unsplash.jpg
    │   ├── mink-mingle-Riz1qAplMQk-unsplash.jpg
    │   ├── mo-U3Kst7MY4Ok-unsplash.jpg
    │   ├── mockup-graphics-aUkYaG12Dgs-unsplash.jpg
    │   ├── mockup-graphics-biK3YJHhBfM-unsplash.jpg
    │   ├── mockup-graphics-enNffryKuQI-unsplash.jpg
    │   ├── mockup-graphics-haSJEJYzl5A-unsplash.jpg
    │   ├── morgan-sessions-k5zv-Hv4Kpc-unsplash.jpg
    │   ├── mulyadi-JEfwd_OkQGE-unsplash.jpg
    │   ├── naomi-hebert-2dcYhvbHV-M-unsplash.jpg
    │   ├── nguyen-ngoc-tung-ZkG4JdoMANM-unsplash.jpg
    │   ├── nik-JbAz6eG5GS4-unsplash.jpg
    │   ├── nikolett-emmert-_g2jz1SghvQ-unsplash.jpg
    │   ├── olena-bohovyk-InlMkMNlrhY-unsplash.jpg
    │   ├── or-hakim-VQxKattL-X4-unsplash.jpg
    │   ├── pablo-merchan-montes-_Tw4vCs9C-8-unsplash.jpg
    │   ├── personalgraphic-com-IFlg3kFbR0E-unsplash.jpg
    │   ├── personalgraphic-com-glY1L-eo0Fc-unsplash.jpg
    │   ├── pexels-amar-19086869.jpg
    │   ├── pexels-cottonbro-3661226.jpg
    │   ├── pexels-damir-10608624.jpg
    │   ├── pexels-eva-bronzini-5777472.jpg
    │   ├── pexels-laarkstudio-7186448.jpg
    │   ├── pexels-laarkstudio-7434298.jpg
    │   ├── pexels-photoscene-7271951.jpg
    │   ├── pexels-sirikul-r-2043201-11393334.jpg
    │   ├── pixmike-t1Lr0BPQfKg-unsplash.jpg
    │   ├── r-n-tyfqOL1FAQc-unsplash.jpg
    │   ├── reno-laithienne-kzX7dcKoMTM-unsplash.jpg
    │   ├── ron-dauphin-UgidX4V13Gc-unsplash.jpg
    │   ├── ruslan-bardash-4kTbAMRAHtQ-unsplash.jpg
    │   ├── sami-ahmed-ngS0S-ZjOpc-unsplash.jpg
    │   ├── santhosh-kumar-RqYTuWkTdEs-unsplash.jpg
    │   ├── sebastien-goldberg-6b-B6ZphlXo-unsplash.jpg
    │   ├── shirley-tittermary-JnymxncvrrY-unsplash.jpg
    │   ├── sincerely-media-MWcwKjwY8yo-unsplash.jpg
    │   ├── siora-photography-GMWh_I-odL0-unsplash.jpg
    │   ├── sun-lingyan-_H0fjILH5Vw-unsplash.jpg
    │   ├── thoa-ngo-AZr6AOMu3l8-unsplash.jpg
    │   ├── thought-catalog-9aOswReDKPo-unsplash.jpg
    │   ├── timothy-dykes-LhqLdDPcSV8-unsplash.jpg
    │   ├── tom-crew-Mz__0nr1AM8-unsplash.jpg
    │   └── yucel-moran-L0VzWT2Y3K8-unsplash.jpg
    ├── openimages_classes.txt
    └── textures
    │   ├── alexander-grey-LV7lkepljdM-unsplash.jpg
    │   ├── annie-spratt-pwAvA5CvuS8-unsplash.jpg
    │   ├── bernard-hermant-X-Bu9X6gok0-unsplash.jpg
    │   ├── boliviainteligente-zeQ5n-03Y40-unsplash.jpg
    │   ├── british-library-z0SQJUnOKm8-unsplash.jpg
    │   ├── clay-banks-VppPygAZKU0-unsplash.jpg
    │   ├── dominic-phillips-QEVT_XYXKPs-unsplash.jpg
    │   ├── emily-bernal-r2F5ZIEUPtk-unsplash.jpg
    │   ├── engin-akyurt-aXVro7lQyUM-unsplash.jpg
    │   ├── erick-butler-3XQlnryKz0o-unsplash.jpg
    │   ├── eugene-golovesov-htMfQCwKrro-unsplash.jpg
    │   ├── fons-heijnsbroek-abstract-art-zleiqVXMqkc-unsplash.jpg
    │   ├── fruit-basket-agency-caH-ZLrisZA-unsplash.jpg
    │   ├── george-webster-p1VZ5IbT2Tg-unsplash.jpg
    │   ├── hans-eiskonen-8Pm_A-OHJGg-unsplash.jpg
    │   ├── ioana-cristiana-0WW38q7lGZA-unsplash.jpg
    │   ├── james-lee-vpBPwauyeos-unsplash.jpg
    │   ├── jean-wimmerlin-dcasj22jmCk-unsplash.jpg
    │   ├── joel-filipe-Wc8k-KryEPM-unsplash.jpg
    │   ├── kier-in-sight-archives-shLU6SZfIQY-unsplash.jpg
    │   ├── krzysztof-kowalik-9pFK1FR_5KQ-unsplash.jpg
    │   ├── li-zhang-K-DwbsTXliY-unsplash.jpg
    │   ├── marcus-urbenz-_a7JjjqgurE-unsplash.jpg
    │   ├── maria-fernanda-pissioli-6BOGBGy2-sU-unsplash.jpg
    │   ├── mario-losereit-mTZyJeR1Rnc-unsplash.jpg
    │   ├── martin-neuhold-K8WDdu4FIGk-unsplash.jpg
    │   ├── maxim-berg-6-NP_CdNqtU-unsplash.jpg
    │   ├── mihaly-varga-AQFfdEY3X4Q-unsplash.jpg
    │   ├── mo-U3Kst7MY4Ok-unsplash.jpg
    │   ├── mockup-graphics-BAcrUzXyg18-unsplash.jpg
    │   ├── natasha-6N-PvrURkZE-unsplash.jpg
    │   ├── rene-porter-jQijOI757W0-unsplash.jpg
    │   ├── ricardo-gomez-angel-2mjl2uvz9ic-unsplash.jpg
    │   ├── rick-rothenberg-L-SoVuu1dTI-unsplash.jpg
    │   ├── rivage-mFcsYcSSiMQ-unsplash.jpg
    │   ├── sarah-claeys-lxw686JyMT8-unsplash.jpg
    │   ├── sergiu-zarnescu-orhUl3Yk03M-unsplash.jpg
    │   ├── simon-lee-HmHOhR5meGo-unsplash.jpg
    │   ├── simon-lee-ORT46c9-AKA-unsplash.jpg
    │   ├── taylor-ann-art-hsWLFtOeDkg-unsplash.jpg
    │   ├── tomas-m-thjJIoJhMv4-unsplash.jpg
    │   ├── vadim-bogulov--PwZWV5AWV0-unsplash.jpg
    │   └── vinicius-amnx-amano-f9oQZOk9vnk-unsplash.jpg
├── configs
    ├── infer
    │   ├── instruct.yaml
    │   ├── scene.yaml
    │   ├── texturing.yaml
    │   └── union.yaml
    └── train
    │   ├── clothes.yaml
    │   ├── instruct.yaml
    │   ├── scene.yaml
    │   ├── texturing.yaml
    │   └── union.yaml
├── data_generation
    ├── generate_random_images.py
    ├── generate_scenes_data.py
    ├── generate_texture_data.py
    ├── generate_union_data.py
    ├── remove_backs.py
    └── words_bank.py
├── kandinsky2
    ├── __init__.py
    ├── configs.py
    ├── kandinsky2_1_model.py
    ├── kandinsky2_2_model.py
    ├── kandinsky2_model.py
    ├── model
    │   ├── fp16_util.py
    │   ├── gaussian_diffusion.py
    │   ├── losses.py
    │   ├── model_creation.py
    │   ├── nn.py
    │   ├── prior.py
    │   ├── resample.py
    │   ├── respace.py
    │   ├── samplers.py
    │   ├── text2im_model.py
    │   ├── text2im_model2_1.py
    │   ├── text_encoders.py
    │   ├── unet.py
    │   └── utils.py
    ├── train_utils
    │   ├── data
    │   │   ├── dataset_prior.py
    │   │   └── dataset_unclip_2_1.py
    │   ├── ema.py
    │   ├── train_module_pl2_1.py
    │   ├── trainer_2_1_uclip.py
    │   ├── trainer_prior.py
    │   └── utils.py
    ├── utils.py
    └── vqgan
    │   ├── autoencoder.py
    │   ├── movq_modules.py
    │   ├── quntize.py
    │   └── vqgan_blocks.py
├── model
    ├── __init__.py
    ├── pipeline_pops.py
    └── pops_utils.py
├── requirements.txt
├── scripts
    ├── __init__.py
    ├── infer_binary.py
    ├── infer_instruct.py
    └── train.py
├── training
    ├── __init__.py
    ├── coach.py
    ├── dataset.py
    └── train_config.py
└── utils
    ├── __init__.py
    ├── clothes_utils.py
    └── vis_utils.py


/README.md:
--------------------------------------------------------------------------------
  1 | # pOps: Photo-Inspired Diffusion Operators
  2 |  
  3 | > Elad Richardson, Yuval Alaluf, Ali Mahdavi-Amiri, Daniel Cohen-Or  
  4 | > Tel Aviv University, Simon Fraser University  
  5 | > 
  6 | > Text-guided image generation enables the creation of visual content from textual descriptions. 
  7 | However, certain visual concepts cannot be effectively conveyed through language alone.  This has sparked a renewed interest in utilizing the CLIP image embedding space for more visually-oriented tasks through methods such as IP-Adapter. Interestingly, the CLIP image embedding space has been shown to be semantically meaningful, where linear operations within this space yield semantically meaningful results. Yet, the specific meaning of these operations can vary unpredictably across different images.
  8 | To harness this potential, we introduce pOps, a framework that trains specific semantic operators directly on CLIP image embeddings. 
  9 | Each pOps operator is built upon a pretrained Diffusion Prior model. 
 10 | While the Diffusion Prior model was originally trained to map between text embeddings and image embeddings, we demonstrate that it can be tuned to accommodate new input conditions, resulting in a diffusion operator.
 11 | Working directly over image embeddings not only improves our ability to learn semantic operations but also allows us to directly use a textual CLIP loss as an additional supervision when needed.
 12 | We show that pOps can be used to learn a variety of photo-inspired operators with distinct semantic meanings, highlighting the semantic diversity and potential of our proposed approach.
 13 | 
 14 | 
 15 | <a href="https://arxiv.org/abs/2406.01300"><img src="https://img.shields.io/badge/arXiv-2406.01300-b31b1b.svg" height=20.5></a>
 16 | <a href="https://popspaper.github.io/pOps/"><img src="https://img.shields.io/static/v1?label=Project&message=Website&color=red" height=20.5></a> 
 17 | [![Hugging Face Spaces](https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue)](https://huggingface.co/spaces/pOpsPaper/pOps-space)
 18 | 
 19 | 
 20 | 
 21 | <p align="center">
 22 | <img src="https://popspaper.github.io/pOps/static/figures/teaser_pops.jpg" width="800px"/>  
 23 | <br>
 24 | Different operators trained using pOps. Our method learns operators that are applied directly in the image embedding space, resulting in a variety of semantic operations that can then be realized as images using an image diffusion model.
 25 | </p>
 26 | 
 27 | 
 28 | ## Description :scroll:
 29 | Official implementation of the paper "pOps: Photo-Inspired Diffusion Operators"
 30 | 
 31 | ## Getting started with pOps :rocket:
 32 | To set up the environment with all necessary dependencies, please run:
 33 | ```
 34 | pip install -r requirements.txt
 35 | ```
 36 | 
 37 | 
 38 | ## Inference 🧑‍🎨
 39 | 
 40 | 
 41 | 
 42 | We provide pretrained models for our different operators under an huggingface model card.
 43 | 
 44 | ### Binary Operators
 45 | 
 46 | To run a binary operator, simply use the `scripts.infer_binary` script with the corresponding config file.
 47 | 
 48 | ```bash
 49 | python -m scripts.infer_binary --config_path=configs/infer/texturing.yaml
 50 | # or
 51 | python -m scripts.infer_binary --config_path=configs/infer/union.yaml
 52 | # or
 53 | python -m scripts.infer_binary --config_path=configs/infer/scene.yaml
 54 | ```
 55 | 
 56 | This will automatically download the pretrained model and run the inference on the default input images.
 57 | 
 58 | Configuration is managed by pyrallis, some useful flags to use with the `scripts.infer_binary` script are:
 59 | - `--output_dir_name`: The name of the output directory where the results will be saved.
 60 | - `--dir_a`: The path to the directory containing the input images for the first input.
 61 | - `--dir_b`: The path to the directory containing the input images for the second input.
 62 | - `--vis_mean`: Show results of the mean of the two inputs.
 63 | 
 64 | For compositions of multiple operators note that the inference script outputs both the resulting images and the corresponding clip embeddings.
 65 | Thus, you can simply feed a directory of embeddings to either `dir_a` or `dir_b`. Useful filtering flags are:
 66 | - `--file_exts_a` (/b): Filter to only `.jpg` images or `.pth` embeddings.
 67 | - `--name_filter_a` (/b): Filter to only images with specific names.
 68 | 
 69 | To sample results with missing input conditions, use the `--drop_condition_a` or `--drop_condition_b` flags.
 70 | 
 71 | Finally, to use the IP-Adapter with the inference script, use the `--use_ipadapter` flag and to use additional depth conditioning, use the `--use_depth` flag.
 72 | 
 73 | ### Instruct Operator
 74 | 
 75 | To run the instruct operator, use the `scripts.infer_instruct` script with the corresponding config file.
 76 | 
 77 | ```bash
 78 | python -m scripts.infer_instruct --config_path=configs/infer/instruct.yaml
 79 | ```
 80 | 
 81 | ## Training 📉
 82 | 
 83 | ### Data Generation
 84 | We provide several scripts for data generation under the `data_generation` directory.
 85 | - `generate_textures.py`: Generates textures data.
 86 | - `generate_scenes.py`: Generates scenes data.
 87 | - `generate_unions.py`: Generates unions data.
 88 | 
 89 | The scene operator also requires random backgrounds which can be generated using the `generate_random_images.py` script.
 90 | ```bash
 91 | python -m data_generation.generate_random_images --output_dir=datasets/random_backgrounds --type=scenes
 92 | ```
 93 | 
 94 | The `generate_random_images.py` script can also be used to generate random images for the other operators
 95 | ```bash
 96 | python -m data_generation.generate_random_images --output_dir=datasets/random_images --type=objects
 97 | ```
 98 | 
 99 | These images can be used for the unconditional steps in training, as will be described in the training section.
100 | 
101 | ### Training Script
102 | Training itself is managed by the `scripts.train` script. See the `configs/training` directory for the different training configurations.
103 | 
104 | ```bash
105 | python -m scripts.train --config_path=configs/training/texturing.yaml
106 | # or 
107 | python -m scripts.train --config_path=configs/training/scene.yaml
108 | # or
109 | python -m scripts.train --config_path=configs/training/union.yaml
110 | # or
111 | python -m scripts.train --config_path=configs/training/instruct.yaml
112 | # or
113 | python -m scripts.train --config_path=configs/training/clothes.yaml
114 | ```
115 | 
116 | The operator itself is defined via the `--mode` flag, which can be set to the specific operator.
117 | 
118 | Relevant data paths and validation paths can be set in the configuration file.
119 | 
120 | Use the optional `randoms_dir` flag to specify the directory of random images for the unconditional steps.
121 | 
122 | ## Acknowledgements
123 | Our codebase heavily relies on the [Kandinsky model](https://github.com/ai-forever/Kandinsky-2)
124 | 
125 | ## Citation
126 | If you use this code for your research, please cite the following paper:
127 | ```
128 | @article{richardson2024pops,
129 |   title={pOps: Photo-Inspired Diffusion Operators},
130 |   author={Richardson, Elad and Alaluf, Yuval and Mahdavi-Amiri, Ali and Cohen-Or, Daniel},
131 |   journal={arXiv preprint arXiv:2406.01300},
132 |   year={2024}
133 | }
134 | ```
135 | 


--------------------------------------------------------------------------------
/assets/OpenSans-Regular.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/OpenSans-Regular.ttf


--------------------------------------------------------------------------------
/assets/README.md:
--------------------------------------------------------------------------------
1 | Images are taken from either [Unsplash](https://unsplash.com/) or [Pexels](https://www.pexels.com/), see the filename for the source.
2 | 


--------------------------------------------------------------------------------
/assets/backgrounds/11th-gate-e-kl_vOpwLg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/11th-gate-e-kl_vOpwLg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/adolfo-felix-PG8NyM_Mcts-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/adolfo-felix-PG8NyM_Mcts-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/alexandra-zelena-phskyemu_c4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/alexandra-zelena-phskyemu_c4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/benjamin-3WdChmuv7mE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/benjamin-3WdChmuv7mE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/benjamin-child-0sT9YhNgSEs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/benjamin-child-0sT9YhNgSEs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/carlo-lisa-GHuT3dkZxYM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/carlo-lisa-GHuT3dkZxYM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/changbok-ko-F8t2VGnI47I-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/changbok-ko-F8t2VGnI47I-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/conscious-design-mLpbHWquEYM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/conscious-design-mLpbHWquEYM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/diego-jimenez-A-NVHPka9Rk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/diego-jimenez-A-NVHPka9Rk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/dogancan-ozturan-urY_iHk3nm0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/dogancan-ozturan-urY_iHk3nm0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/elena-ktenopoulou-cjzV4WK46qY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/elena-ktenopoulou-cjzV4WK46qY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/evaldas-grizas-9-WvhPP1I6M-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/evaldas-grizas-9-WvhPP1I6M-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/heriberto-garcia-YdjrYLvLO5Y-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/heriberto-garcia-YdjrYLvLO5Y-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/inaki-del-olmo-NIJuEQw0RKg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/inaki-del-olmo-NIJuEQw0RKg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/jordan-steranka-NcF6MVPW644-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/jordan-steranka-NcF6MVPW644-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/joseph-barrientos-oQl0eVYd_n8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/joseph-barrientos-oQl0eVYd_n8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/juli-kosolapova-Us_dv71f1bc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/juli-kosolapova-Us_dv71f1bc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/masjid-pogung-dalangan-8I6hAdjM76Q-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/masjid-pogung-dalangan-8I6hAdjM76Q-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/meritt-thomas-GktK3Jb9BRE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/meritt-thomas-GktK3Jb9BRE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/nastuh-abootalebi-yWwob8kwOCk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/nastuh-abootalebi-yWwob8kwOCk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/neom-aWHKsYkbCi8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-aWHKsYkbCi8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/neom-cYy-o9i8aCs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-cYy-o9i8aCs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/neom-jTxhUMyPTrE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-jTxhUMyPTrE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/neom-kXbit_yx8t4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/neom-kXbit_yx8t4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/presentsquare-cSK33MUaVzQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/presentsquare-cSK33MUaVzQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/robert-lukeman-PH0HYjsf2n8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/robert-lukeman-PH0HYjsf2n8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/sam-moghadam-khamseh-cuSPt5uP2iQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/sam-moghadam-khamseh-cuSPt5uP2iQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/tj-holowaychuk-1EYMue_AwDw-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/tj-holowaychuk-1EYMue_AwDw-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/toby-elliott-17yY9Lyddrc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/toby-elliott-17yY9Lyddrc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/yaodan-fc6cEU1sTBY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/yaodan-fc6cEU1sTBY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/backgrounds/yevhenii-deshko-Tkh5CmSzmaM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/backgrounds/yevhenii-deshko-Tkh5CmSzmaM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/alexandr-sadkov-BnG4KWAzt9c-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/alexandr-sadkov-BnG4KWAzt9c-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/allec-gomes-xnRg3xDcNnE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/allec-gomes-xnRg3xDcNnE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/allenwhm-wh-RgpT4_5g-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/allenwhm-wh-RgpT4_5g-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/alvan-nee-T-0EW-SEbsE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/alvan-nee-T-0EW-SEbsE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/amit-lahav-rxN2MRdFJVg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/amit-lahav-rxN2MRdFJVg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/andre-taissin-hnyZg63sRCY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/andre-taissin-hnyZg63sRCY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/arno-senoner-HFE2RyC76tw-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/arno-senoner-HFE2RyC76tw-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/artem-maltsev-VOO5ojMQ_9A-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/artem-maltsev-VOO5ojMQ_9A-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/aurelia-dubois-xzrJCS4grC4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/aurelia-dubois-xzrJCS4grC4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/bermix-studio-ZMxHvB9J7YU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/bermix-studio-ZMxHvB9J7YU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/birmingham-museums-trust-q2OwlfXAYfo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/birmingham-museums-trust-q2OwlfXAYfo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/bonnie-kittle-MUcxe_wDurE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/bonnie-kittle-MUcxe_wDurE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/brigitte-tohm-51AK6yJDgv0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/brigitte-tohm-51AK6yJDgv0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/chastity-cortijo-3OENu9mJ0i0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/chastity-cortijo-3OENu9mJ0i0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/cici-hung-nV3v8ZMRLNc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/cici-hung-nV3v8ZMRLNc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/claire-abdo-_-635EI3nV8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/claire-abdo-_-635EI3nV8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/coppertist-wu-OrQvIBYNPcw-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-OrQvIBYNPcw-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/coppertist-wu-XlFSnJOeyQs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-XlFSnJOeyQs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/coppertist-wu-its52T6D4bo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-its52T6D4bo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/coppertist-wu-mwjuTJzJ9w4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-mwjuTJzJ9w4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/coppertist-wu-w1AK7wWsTPs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/coppertist-wu-w1AK7wWsTPs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/courtney-cook-KcY7DCqc9VI-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/courtney-cook-KcY7DCqc9VI-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/creative-headline-APNnyM36puU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/creative-headline-APNnyM36puU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/daniel-k-cheung-WJuwxFIpidc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/daniel-k-cheung-WJuwxFIpidc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/daniil-silantev-1P6AnKDw6S8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/daniil-silantev-1P6AnKDw6S8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/dominic-phillips-QEVT_XYXKPs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/dominic-phillips-QEVT_XYXKPs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/engin-akyurt-TDOClniEwmI-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/engin-akyurt-TDOClniEwmI-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/engin-akyurt-iLHCV4ZBH7s-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/engin-akyurt-iLHCV4ZBH7s-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/erfan-tajik-m_hgaJLqCRM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/erfan-tajik-m_hgaJLqCRM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/erick-butler-3XQlnryKz0o-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/erick-butler-3XQlnryKz0o-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/erik-mclean-kNRKg1pfiqE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/erik-mclean-kNRKg1pfiqE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/fatty-corgi-1QsQRkxnU6I-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/fatty-corgi-1QsQRkxnU6I-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/fernando-andrade-Q33VONoOfSU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/fernando-andrade-Q33VONoOfSU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/frank-zimmermann-xYh8uAwqZK0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/frank-zimmermann-xYh8uAwqZK0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/gaby-yerden--OIinu1YDTk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/gaby-yerden--OIinu1YDTk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/gary-walker-jones-uC5jnQPdY7I-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/gary-walker-jones-uC5jnQPdY7I-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/gilbert-beltran-EUQRWgmvhr8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/gilbert-beltran-EUQRWgmvhr8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/giorgio-trovato-p0OlRAAYXLY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/giorgio-trovato-p0OlRAAYXLY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/hannah-pemberton-3d82e5_ylGo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/hannah-pemberton-3d82e5_ylGo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/irene-kredenets-tcVH_BwHtrc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/irene-kredenets-tcVH_BwHtrc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/isaac-martin-Jewkfj03OUU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/isaac-martin-Jewkfj03OUU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/isabela-kronemberger-KaMRBaHSsjM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/isabela-kronemberger-KaMRBaHSsjM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/ivan-lopatin-PZ2KhQnOZb8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ivan-lopatin-PZ2KhQnOZb8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/ivan-pergasi-CZT7lkrt5sU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ivan-pergasi-CZT7lkrt5sU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/jessica-tan-Rufz-e6Qrqg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/jessica-tan-Rufz-e6Qrqg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/joshua-coleman-8V4y-XXT3MQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/joshua-coleman-8V4y-XXT3MQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/juan-mayobre-_IAhW7a4pWA-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/juan-mayobre-_IAhW7a4pWA-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/kate-0I8hNhkFUWQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kate-0I8hNhkFUWQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/kelly-sikkema-LHxU7QYSn7c-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kelly-sikkema-LHxU7QYSn7c-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/kenny-eliason-lbqZUefMLvQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kenny-eliason-lbqZUefMLvQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/kojirou-sasaki-rdLQVeroHQ0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/kojirou-sasaki-rdLQVeroHQ0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/lora-seis-dS5xpjW38Qk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lora-seis-dS5xpjW38Qk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/lucas-george-wendt-UDWhEik1L1Q-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lucas-george-wendt-UDWhEik1L1Q-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/lucas-george-wendt-lhWaL0pgpEg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lucas-george-wendt-lhWaL0pgpEg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/lucas-van-oort-Tv9w8mgoVzs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/lucas-van-oort-Tv9w8mgoVzs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/maria-oswalt-GKE6i5mQplI-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/maria-oswalt-GKE6i5mQplI-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mario-losereit-mTZyJeR1Rnc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mario-losereit-mTZyJeR1Rnc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mediamodifier-gAUUqaI83b8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mediamodifier-gAUUqaI83b8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mehmet-keskin-qHdGjahnx48-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mehmet-keskin-qHdGjahnx48-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/milad-fakurian-3CoSLrSrvhY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/milad-fakurian-3CoSLrSrvhY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/milad-fakurian-gpkPI7qs1cg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/milad-fakurian-gpkPI7qs1cg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mink-mingle-Riz1qAplMQk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mink-mingle-Riz1qAplMQk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mo-U3Kst7MY4Ok-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mo-U3Kst7MY4Ok-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mockup-graphics-aUkYaG12Dgs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-aUkYaG12Dgs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mockup-graphics-biK3YJHhBfM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-biK3YJHhBfM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mockup-graphics-enNffryKuQI-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-enNffryKuQI-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mockup-graphics-haSJEJYzl5A-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mockup-graphics-haSJEJYzl5A-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/morgan-sessions-k5zv-Hv4Kpc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/morgan-sessions-k5zv-Hv4Kpc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/mulyadi-JEfwd_OkQGE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/mulyadi-JEfwd_OkQGE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/naomi-hebert-2dcYhvbHV-M-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/naomi-hebert-2dcYhvbHV-M-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/nguyen-ngoc-tung-ZkG4JdoMANM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/nguyen-ngoc-tung-ZkG4JdoMANM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/nik-JbAz6eG5GS4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/nik-JbAz6eG5GS4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/nikolett-emmert-_g2jz1SghvQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/nikolett-emmert-_g2jz1SghvQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/olena-bohovyk-InlMkMNlrhY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/olena-bohovyk-InlMkMNlrhY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/or-hakim-VQxKattL-X4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/or-hakim-VQxKattL-X4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/pablo-merchan-montes-_Tw4vCs9C-8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pablo-merchan-montes-_Tw4vCs9C-8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/personalgraphic-com-IFlg3kFbR0E-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/personalgraphic-com-IFlg3kFbR0E-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/personalgraphic-com-glY1L-eo0Fc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/personalgraphic-com-glY1L-eo0Fc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-amar-19086869.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-amar-19086869.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-cottonbro-3661226.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-cottonbro-3661226.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-damir-10608624.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-damir-10608624.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-eva-bronzini-5777472.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-eva-bronzini-5777472.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-laarkstudio-7186448.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-laarkstudio-7186448.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-laarkstudio-7434298.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-laarkstudio-7434298.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-photoscene-7271951.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-photoscene-7271951.jpg


--------------------------------------------------------------------------------
/assets/objects/pexels-sirikul-r-2043201-11393334.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pexels-sirikul-r-2043201-11393334.jpg


--------------------------------------------------------------------------------
/assets/objects/pixmike-t1Lr0BPQfKg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/pixmike-t1Lr0BPQfKg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/r-n-tyfqOL1FAQc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/r-n-tyfqOL1FAQc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/reno-laithienne-kzX7dcKoMTM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/reno-laithienne-kzX7dcKoMTM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/ron-dauphin-UgidX4V13Gc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ron-dauphin-UgidX4V13Gc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/ruslan-bardash-4kTbAMRAHtQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/ruslan-bardash-4kTbAMRAHtQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/sami-ahmed-ngS0S-ZjOpc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sami-ahmed-ngS0S-ZjOpc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/santhosh-kumar-RqYTuWkTdEs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/santhosh-kumar-RqYTuWkTdEs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/sebastien-goldberg-6b-B6ZphlXo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sebastien-goldberg-6b-B6ZphlXo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/shirley-tittermary-JnymxncvrrY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/shirley-tittermary-JnymxncvrrY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/sincerely-media-MWcwKjwY8yo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sincerely-media-MWcwKjwY8yo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/siora-photography-GMWh_I-odL0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/siora-photography-GMWh_I-odL0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/sun-lingyan-_H0fjILH5Vw-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/sun-lingyan-_H0fjILH5Vw-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/thoa-ngo-AZr6AOMu3l8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/thoa-ngo-AZr6AOMu3l8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/thought-catalog-9aOswReDKPo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/thought-catalog-9aOswReDKPo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/timothy-dykes-LhqLdDPcSV8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/timothy-dykes-LhqLdDPcSV8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/tom-crew-Mz__0nr1AM8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/tom-crew-Mz__0nr1AM8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/objects/yucel-moran-L0VzWT2Y3K8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/objects/yucel-moran-L0VzWT2Y3K8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/alexander-grey-LV7lkepljdM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/alexander-grey-LV7lkepljdM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/annie-spratt-pwAvA5CvuS8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/annie-spratt-pwAvA5CvuS8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/bernard-hermant-X-Bu9X6gok0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/bernard-hermant-X-Bu9X6gok0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/boliviainteligente-zeQ5n-03Y40-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/boliviainteligente-zeQ5n-03Y40-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/british-library-z0SQJUnOKm8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/british-library-z0SQJUnOKm8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/clay-banks-VppPygAZKU0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/clay-banks-VppPygAZKU0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/dominic-phillips-QEVT_XYXKPs-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/dominic-phillips-QEVT_XYXKPs-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/emily-bernal-r2F5ZIEUPtk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/emily-bernal-r2F5ZIEUPtk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/engin-akyurt-aXVro7lQyUM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/engin-akyurt-aXVro7lQyUM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/erick-butler-3XQlnryKz0o-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/erick-butler-3XQlnryKz0o-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/eugene-golovesov-htMfQCwKrro-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/eugene-golovesov-htMfQCwKrro-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/fons-heijnsbroek-abstract-art-zleiqVXMqkc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/fons-heijnsbroek-abstract-art-zleiqVXMqkc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/fruit-basket-agency-caH-ZLrisZA-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/fruit-basket-agency-caH-ZLrisZA-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/george-webster-p1VZ5IbT2Tg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/george-webster-p1VZ5IbT2Tg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/hans-eiskonen-8Pm_A-OHJGg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/hans-eiskonen-8Pm_A-OHJGg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/ioana-cristiana-0WW38q7lGZA-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/ioana-cristiana-0WW38q7lGZA-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/james-lee-vpBPwauyeos-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/james-lee-vpBPwauyeos-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/jean-wimmerlin-dcasj22jmCk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/jean-wimmerlin-dcasj22jmCk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/joel-filipe-Wc8k-KryEPM-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/joel-filipe-Wc8k-KryEPM-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/kier-in-sight-archives-shLU6SZfIQY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/kier-in-sight-archives-shLU6SZfIQY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/krzysztof-kowalik-9pFK1FR_5KQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/krzysztof-kowalik-9pFK1FR_5KQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/li-zhang-K-DwbsTXliY-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/li-zhang-K-DwbsTXliY-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/marcus-urbenz-_a7JjjqgurE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/marcus-urbenz-_a7JjjqgurE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/maria-fernanda-pissioli-6BOGBGy2-sU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/maria-fernanda-pissioli-6BOGBGy2-sU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/mario-losereit-mTZyJeR1Rnc-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mario-losereit-mTZyJeR1Rnc-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/martin-neuhold-K8WDdu4FIGk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/martin-neuhold-K8WDdu4FIGk-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/maxim-berg-6-NP_CdNqtU-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/maxim-berg-6-NP_CdNqtU-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/mihaly-varga-AQFfdEY3X4Q-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mihaly-varga-AQFfdEY3X4Q-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/mo-U3Kst7MY4Ok-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mo-U3Kst7MY4Ok-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/mockup-graphics-BAcrUzXyg18-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/mockup-graphics-BAcrUzXyg18-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/natasha-6N-PvrURkZE-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/natasha-6N-PvrURkZE-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/rene-porter-jQijOI757W0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/rene-porter-jQijOI757W0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/ricardo-gomez-angel-2mjl2uvz9ic-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/ricardo-gomez-angel-2mjl2uvz9ic-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/rick-rothenberg-L-SoVuu1dTI-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/rick-rothenberg-L-SoVuu1dTI-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/rivage-mFcsYcSSiMQ-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/rivage-mFcsYcSSiMQ-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/sarah-claeys-lxw686JyMT8-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/sarah-claeys-lxw686JyMT8-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/sergiu-zarnescu-orhUl3Yk03M-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/sergiu-zarnescu-orhUl3Yk03M-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/simon-lee-HmHOhR5meGo-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/simon-lee-HmHOhR5meGo-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/simon-lee-ORT46c9-AKA-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/simon-lee-ORT46c9-AKA-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/taylor-ann-art-hsWLFtOeDkg-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/taylor-ann-art-hsWLFtOeDkg-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/tomas-m-thjJIoJhMv4-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/tomas-m-thjJIoJhMv4-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/vadim-bogulov--PwZWV5AWV0-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/vadim-bogulov--PwZWV5AWV0-unsplash.jpg


--------------------------------------------------------------------------------
/assets/textures/vinicius-amnx-amano-f9oQZOk9vnk-unsplash.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/assets/textures/vinicius-amnx-amano-f9oQZOk9vnk-unsplash.jpg


--------------------------------------------------------------------------------
/configs/infer/instruct.yaml:
--------------------------------------------------------------------------------
1 | prior_path: models/instruct/learned_prior.pth
2 | prior_repo: "pOpsPaper/operators"
3 | dir_a: assets/objects
4 | prior_guidance_scale: [1]
5 | output_dir_name: inference/instruct_results
6 | texts: ['shiny', 'enormous', 'aged']


--------------------------------------------------------------------------------
/configs/infer/scene.yaml:
--------------------------------------------------------------------------------
1 | prior_path: models/scene/learned_prior.pth
2 | prior_repo: "pOpsPaper/operators"
3 | dir_a: assets/objects
4 | dir_b: assets/backgrounds
5 | prior_guidance_scale: [1]
6 | output_dir_name: inference/scene_results


--------------------------------------------------------------------------------
/configs/infer/texturing.yaml:
--------------------------------------------------------------------------------
1 | prior_path: models/texturing/learned_prior.pth
2 | prior_repo: "pOpsPaper/operators"
3 | dir_a: assets/objects
4 | dir_b: assets/textures
5 | prior_guidance_scale: [8]
6 | output_dir_name: inference/texturing_results


--------------------------------------------------------------------------------
/configs/infer/union.yaml:
--------------------------------------------------------------------------------
1 | prior_path: models/union/learned_prior.pth
2 | prior_repo: "pOpsPaper/operators"
3 | dir_a: assets/objects
4 | dir_b: assets/objects
5 | prior_guidance_scale: [8]
6 | output_dir_name: inference/union_results


--------------------------------------------------------------------------------
/configs/train/clothes.yaml:
--------------------------------------------------------------------------------
1 | mode: 'clothes'
2 | output_dir: 'results/clothes_experiment'


--------------------------------------------------------------------------------
/configs/train/instruct.yaml:
--------------------------------------------------------------------------------
1 | mode: 'instruct'
2 | dataset_path: 'datasets/generated/random_objects'
3 | val_dataset_path: 'assets/objects'
4 | output_dir: 'results/instruct_experiment'


--------------------------------------------------------------------------------
/configs/train/scene.yaml:
--------------------------------------------------------------------------------
1 | mode: 'scene'
2 | dataset_path: 'datasets/generated/scenes_data'
3 | backgrounds_dir: 'datasets/generated/random_backgrounds'
4 | val_dataset_path: 'assets/objects'
5 | # Reusing the same textures_dir argument for validation
6 | textures_dir: 'assets/backgrounds'
7 | output_dir: 'results/scene_experiment'


--------------------------------------------------------------------------------
/configs/train/texturing.yaml:
--------------------------------------------------------------------------------
1 | mode: 'texture'
2 | dataset_path: 'datasets/generated/texture_data'
3 | val_dataset_path: 'assets/objects'
4 | textures_dir: 'assets/textures'
5 | output_dir: 'results/texturing_experiment'


--------------------------------------------------------------------------------
/configs/train/union.yaml:
--------------------------------------------------------------------------------
1 | mode: 'union'
2 | dataset_path: 'datasets/generated/union_data'
3 | val_dataset_path: 'assets/objects'
4 | output_dir: 'results/union_experiment'


--------------------------------------------------------------------------------
/data_generation/generate_random_images.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from pathlib import Path
 3 | 
 4 | from diffusers import StableDiffusionXLPipeline
 5 | 
 6 | from data_generation import words_bank
 7 | from dataclasses import dataclass
 8 | import pyrallis
 9 | 
10 | 
11 | @dataclass
12 | class RunConfig:
13 |     # Generation mode, should be either 'objects' or 'scenes'
14 |     type: str = 'objects'
15 |     out_dir: Path = Path('datasets/generated/random_objects')
16 |     n_images: int = 100000
17 | 
18 | 
19 | @pyrallis.wrap()
20 | def generate(cfg: RunConfig):
21 |     sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
22 |         "stabilityai/sdxl-turbo", use_safetensors=True
23 |     ).to("cuda")
24 | 
25 |     if cfg.type == 'objects':
26 |         generate_objects = True
27 |     elif cfg.type == 'scenes':
28 |         generate_objects = False
29 |     else:
30 |         raise ValueError(f"Invalid type {cfg.type}")
31 | 
32 |     cfg.out_dir.mkdir(exist_ok=True, parents=True)
33 | 
34 |     if generate_objects:
35 |         with open('assets/openimages_classes.txt', 'r') as f:
36 |             objects = f.read().splitlines()
37 | 
38 |     for _ in range(cfg.n_images):
39 |         try:
40 |             placement = random.choice(words_bank.placements) if random.random() < 0.5 else ''
41 |             if cfg.type == 'objects':
42 |                 object_name = random.choice(objects)
43 |                 object_name = ''.join(char if char.isalnum() else ' ' for char in object_name)
44 |                 prompt = f"A photo of a {object_name} {placement}"
45 |             else:
46 |                 object_name = ''
47 |                 prompt = f"A photo of an empty {placement.split(' ')[-1]}"
48 |             seed = random.randint(0, 1000000)
49 | 
50 |             base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]
51 | 
52 |             out_path = cfg.out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg'
53 |             base_image.save(out_path)
54 |         except Exception as e:
55 |             print(e)
56 | 
57 | 
58 | if __name__ == "__main__":
59 |     # Use to generate objects or backgrounds
60 |     generate(generate_objects=True)
61 | 


--------------------------------------------------------------------------------
/data_generation/generate_scenes_data.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from pathlib import Path
 3 | 
 4 | import numpy as np
 5 | import torch
 6 | import transformers
 7 | from PIL import Image, ImageFilter
 8 | from diffusers import StableDiffusionXLPipeline, AutoPipelineForInpainting
 9 | 
10 | from data_generation import words_bank
11 | 
12 | 
13 | def main():
14 |     sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
15 |         "stabilityai/sdxl-turbo", use_safetensors=True
16 |     ).to("cuda")
17 | 
18 |     bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
19 |     inpaint_pipe = AutoPipelineForInpainting.from_pretrained("diffusers/stable-diffusion-xl-1.0-inpainting-0.1",
20 |                                                              torch_dtype=torch.float16, variant="fp16").to("cuda")
21 | 
22 |     with open('assets/openimages_classes.txt', 'r') as f:
23 |         objects = f.read().splitlines()
24 | 
25 |     out_dir = Path(f'datasets/generated/scenes_data/')
26 |     out_dir.mkdir(exist_ok=True, parents=True)
27 | 
28 |     for _ in range(100000):
29 |         try:
30 |             object_name = random.choice(objects)
31 |             # Remove special characters
32 |             object_name = ''.join(char if char.isalnum() else ' ' for char in object_name)
33 |             # Restrict to two words
34 |             object_name = ' '.join(object_name.split()[:2])
35 |             placement = random.choice(words_bank.placements) if random.random() < 0.5 else ''
36 | 
37 |             prompt = f"A photo of {object_name} {placement}"
38 |             seed = random.randint(0, 1000000)
39 | 
40 |             base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]
41 | 
42 |             tmp_path = 'tmp.jpg'
43 |             base_image.save(tmp_path)
44 |             crop_mask = bria_pipe(tmp_path, return_mask=True)  # Retuns a PIL mask
45 | 
46 |             # Dilate mask
47 |             crop_mask_for_inpaint = np.array(crop_mask)
48 |             crop_mask_for_inpaint[crop_mask_for_inpaint > 10] = 255
49 |             crop_mask_for_inpaint[crop_mask_for_inpaint <= 10] = 0
50 |             crop_mask_for_inpaint = Image.fromarray(crop_mask_for_inpaint).filter(ImageFilter.MaxFilter(31))
51 |             crop_mask_for_inpaint = crop_mask_for_inpaint.convert("RGB")
52 | 
53 |             inpainted_image = inpaint_pipe(
54 |                 prompt=f'A photo of empty {placement.split(" ")[-1]}',
55 |                 image=base_image,
56 |                 mask_image=crop_mask_for_inpaint,
57 |                 guidance_scale=8.0,
58 |                 num_inference_steps=20,
59 |                 strength=1.0,
60 |             ).images[0]
61 | 
62 |             # Restrict object_name to 50 characters
63 |             object_name = object_name[:50]
64 |             out_path = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg'
65 |             base_image.save(out_path)
66 | 
67 |             out_path = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}_inpainted.jpg'
68 |             inpainted_image.save(out_path)
69 | 
70 |             np.save(out_path.with_suffix('.npy'), np.array(crop_mask).astype(np.uint8))
71 | 
72 | 
73 |         except Exception as e:
74 |             print(f'Error: {e}')
75 | 
76 | 
77 | if __name__ == "__main__":
78 |     main()
79 | 


--------------------------------------------------------------------------------
/data_generation/generate_texture_data.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | from pathlib import Path
  4 | 
  5 | import torch
  6 | import transformers
  7 | from diffusers import StableDiffusionDepth2ImgPipeline, StableDiffusionXLPipeline
  8 | from transformers import Owlv2Processor, Owlv2ForObjectDetection
  9 | 
 10 | from data_generation import words_bank
 11 | 
 12 | 
 13 | def main():
 14 |     sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
 15 |         "stabilityai/sdxl-turbo", use_safetensors=True
 16 |     ).to("cuda")
 17 | 
 18 |     pipe = StableDiffusionDepth2ImgPipeline.from_pretrained(
 19 |         "stabilityai/stable-diffusion-2-depth", torch_dtype=torch.float16, ).to("cuda")
 20 | 
 21 |     processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
 22 |     model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
 23 | 
 24 |     bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
 25 | 
 26 |     out_dir = Path(f'datasets/generated/texture_data')
 27 |     out_dir.mkdir(exist_ok=True, parents=True)
 28 | 
 29 |     with open('assets/openimages_classes.txt', 'r') as f:
 30 |        objects = f.read().splitlines()
 31 | 
 32 |     for _ in range(100000):
 33 |         object_name = random.choice(objects)
 34 |         # Remove special characters
 35 |         object_name = ''.join(char if char.isalnum() else ' ' for char in object_name)
 36 |         # Restrict to two words
 37 |         object_name = ' '.join(object_name.split()[:2])
 38 |         placement = random.choice(words_bank.placements) if random.random() < 0.5 else ''
 39 |         prompt = f"A {object_name} {placement}"
 40 | 
 41 |         seed = random.randint(0, 1000000)
 42 |         object_out_dir = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}'
 43 |         if object_out_dir.exists():
 44 |             continue
 45 |         object_out_dir.mkdir(exist_ok=True, parents=True)
 46 | 
 47 |         base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]
 48 |         out_path = object_out_dir / f'base.jpg'
 49 |         base_image.save(out_path)
 50 | 
 51 |         # Find box
 52 |         texts = [[f"a {object_name}"]]
 53 |         inputs = processor(text=texts, images=base_image, return_tensors="pt")
 54 |         outputs = model(**inputs)
 55 | 
 56 |         # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
 57 |         target_sizes = torch.Tensor([base_image.size[::-1]])
 58 |         # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
 59 |         results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes, threshold=0.1)
 60 | 
 61 |         i = 0  # Retrieve predictions for the first image for the corresponding text queries
 62 |         boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
 63 |         # Filter to box with max score
 64 |         if len(boxes) == 0:
 65 |             scores = torch.tensor([1.0])
 66 |             boxes = torch.tensor([[0, 0, base_image.size[0], base_image.size[1]]])
 67 | 
 68 |         max_score_idx = scores.argmax()
 69 |         box = boxes[max_score_idx]
 70 | 
 71 |         # Save box to json file
 72 |         box_dict = {
 73 |             "x1": int(box[0].item()),
 74 |             "y1": int(box[1].item()),
 75 |             "x2": int(box[2].item()),
 76 |             "y2": int(box[3].item()),
 77 |         }
 78 |         crop = base_image.crop((box_dict['x1'], box_dict['y1'], box_dict['x2'], box_dict['y2']))
 79 |         tmp_path = 'tmp.jpg'
 80 |         crop.save(tmp_path)
 81 |         crop_mask = bria_pipe(tmp_path, return_mask=True)
 82 |         crop_mask.save(object_out_dir / 'mask.png')
 83 |         with open(object_out_dir / 'box.json', 'w') as f:
 84 |             json.dump(box_dict, f)
 85 | 
 86 |         for _ in range(5):
 87 |             num_samples = random.randint(1, 5)
 88 |             sample_attributes = random.sample(words_bank.texture_attributes, num_samples)
 89 |             prompt = f"A {object_name} made from {' '.join(sample_attributes)} {placement}"
 90 |             n_propmt = "bad, deformed, ugly, bad anotomy"
 91 |             seed = random.randint(0, 1000000)
 92 |             image = pipe(prompt=prompt, image=base_image, negative_prompt=n_propmt, strength=1.0,
 93 |                          generator=torch.Generator().manual_seed(seed)).images[0]
 94 |             attrs_str = '_'.join(sample_attributes)[:100]
 95 |             out_path = object_out_dir / f'{attrs_str}_{seed}.jpg'
 96 |             image.save(out_path)
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     main()
101 | 


--------------------------------------------------------------------------------
/data_generation/generate_union_data.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import random
  3 | from pathlib import Path
  4 | 
  5 | import numpy as np
  6 | import torch
  7 | import transformers
  8 | from diffusers import StableDiffusionXLPipeline
  9 | from transformers import Owlv2Processor, Owlv2ForObjectDetection
 10 | 
 11 | from data_generation import words_bank
 12 | 
 13 | 
 14 | def main():
 15 |     sdxl_pipe = StableDiffusionXLPipeline.from_pretrained(
 16 |         "stabilityai/sdxl-turbo", use_safetensors=True
 17 |     ).to("cuda")
 18 | 
 19 |     processor = Owlv2Processor.from_pretrained("google/owlv2-base-patch16-ensemble")
 20 |     model = Owlv2ForObjectDetection.from_pretrained("google/owlv2-base-patch16-ensemble")
 21 | 
 22 |     bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
 23 | 
 24 |     out_dir = Path(f'datasets/generated/union_data')
 25 |     out_dir.mkdir(exist_ok=True, parents=True)
 26 | 
 27 |     # Generated images that did not meet the criteria
 28 |     out_dir_leftovers = Path(f'datasets/generated/union_data_leftovers')
 29 |     out_dir_leftovers.mkdir(exist_ok=True, parents=True)
 30 | 
 31 |     with open('assets/openimages_classes.txt', 'r') as f:
 32 |         objects = f.read().splitlines()
 33 | 
 34 |     base_image = None
 35 |     saved_base = False
 36 |     for _ in range(100000):
 37 |         if base_image is not None and not saved_base:
 38 |             alternative_path = out_dir_leftovers / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg'
 39 |             base_image.save(alternative_path)
 40 |         saved_base = False
 41 |         try:
 42 |             current_objects = []
 43 |             object_count = 2
 44 |             for _ in range(object_count):
 45 |                 object_name = random.choice(objects)
 46 |                 # Remove '/' or any non english characters
 47 |                 object_name = ''.join(char if char.isalnum() else ' ' for char in object_name)
 48 |                 # Restrict to two words
 49 |                 object_name = ' '.join(object_name.split()[:2])
 50 |                 current_objects.append(object_name)
 51 |             object_name = ' and a '.join(current_objects)
 52 |             print(object_name)
 53 |             placement = random.choice(words_bank.placements) if random.random() < 0.5 else ''
 54 | 
 55 |             prompt = f"A photo of {object_name} {placement}"
 56 |             seed = random.randint(0, 1000000)
 57 | 
 58 |             base_image = sdxl_pipe(prompt=prompt, num_inference_steps=1, guidance_scale=0.0).images[0]
 59 |             object_name = object_name[:50]
 60 |             out_path = out_dir / f'{object_name.replace(" ", "_")}_{placement.replace(" ", "_")}_{seed}.jpg'
 61 | 
 62 |             # Try to detect the objects in the generated image
 63 |             texts = [[f"a {obj}" for obj in current_objects]]
 64 |             inputs = processor(text=texts, images=base_image, return_tensors="pt")
 65 |             with torch.no_grad():
 66 |                 outputs = model(**inputs)
 67 | 
 68 |             # Target image sizes (height, width) to rescale box predictions [batch_size, 2]
 69 |             target_sizes = torch.Tensor([base_image.size[::-1]])
 70 |             # Convert outputs (bounding boxes and class logits) to Pascal VOC Format (xmin, ymin, xmax, ymax)
 71 |             results = processor.post_process_object_detection(outputs=outputs, target_sizes=target_sizes,
 72 |                                                               threshold=0.2)
 73 | 
 74 |             i = 0  # Retrieve predictions for the first image for the corresponding text queries
 75 |             boxes, scores, labels = results[i]["boxes"], results[i]["scores"], results[i]["labels"]
 76 | 
 77 |             chosen_boxes = []
 78 |             # Take highest scoring box for each label
 79 |             for obj_ind in range(len(current_objects)):
 80 |                 relevant_boxes = boxes[labels == obj_ind]
 81 |                 relevant_scores = scores[labels == obj_ind]
 82 |                 if len(relevant_boxes) > 0:
 83 |                     max_score_idx = relevant_scores.argmax()
 84 |                     max_box = relevant_boxes[max_score_idx]
 85 |                     if relevant_scores[max_score_idx] < 0.2:
 86 |                         break
 87 |                     chosen_boxes.append(max_box)
 88 | 
 89 |             # Filter to box with max score
 90 |             if len(chosen_boxes) != 2:
 91 |                 print(f'Skipping, detected {len(chosen_boxes)} objects')
 92 |                 continue
 93 | 
 94 |             # Verify small overlap between two using IoU
 95 |             box1, box2 = chosen_boxes
 96 |             box1 = box1.int()
 97 |             box2 = box2.int()
 98 |             x1 = max(box1[0], box2[0])
 99 |             y1 = max(box1[1], box2[1])
100 |             x2 = min(box1[2], box2[2])
101 |             y2 = min(box1[3], box2[3])
102 |             intersection = max(0, x2 - x1) * max(0, y2 - y1)
103 |             area1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
104 |             area2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
105 |             union = area1 + area2 - intersection
106 |             iou = float(intersection / union)
107 |             if iou > 0.1:
108 |                 print(f'Skipping, iou is {iou}')
109 |                 continue
110 | 
111 |             metadata = {'objects': [], 'placement': placement, 'prompt': prompt}
112 |             masked_objects = []
113 |             for obj_ind, box in enumerate(chosen_boxes):
114 |                 box = box.int()
115 |                 obj_name = current_objects[obj_ind]
116 |                 box_dict = {
117 |                     "x1": int(box[0].item()),
118 |                     "y1": int(box[1].item()),
119 |                     "x2": int(box[2].item()),
120 |                     "y2": int(box[3].item()),
121 |                     "name": obj_name
122 |                 }
123 |                 object_size = (box_dict['x2'] - box_dict['x1']) * (box_dict['y2'] - box_dict['y1'])
124 |                 if object_size < 5000:
125 |                     print(f'Skipping, object size is {object_size}')
126 |                     continue
127 |                 metadata['objects'].append(box_dict)
128 |                 crop = base_image.crop((box_dict['x1'], box_dict['y1'], box_dict['x2'], box_dict['y2']))
129 |                 tmp_path = 'tmp.jpg'
130 |                 crop.save(tmp_path)
131 |                 with torch.no_grad():
132 |                     masked_object = bria_pipe(tmp_path)
133 |                 # Returns RGBA, take mask channel and check how many pixels
134 |                 crop_mask = np.array(masked_object)[..., 3]
135 | 
136 |                 # Make sure at least 10000 pixels are non-masked
137 |                 # Check if over half of the pixels seen
138 |                 total_pixels = crop_mask.size
139 |                 seen_pixels = np.sum(crop_mask > 200)
140 |                 if seen_pixels / total_pixels < 0.1:
141 |                     print(f'Skipping, not enough pixels seen. only {seen_pixels / total_pixels:.3f} seen')
142 |                     continue
143 |                 masked_objects.append(masked_object)
144 |             if len(masked_objects) != len(chosen_boxes):
145 |                 continue
146 |             for obj_ind, masked_object in enumerate(masked_objects):
147 |                 masked_object.save(str(out_path).replace('.jpg', f'_OBJ_{obj_ind}.png'))
148 |             with open(out_path.with_suffix('.json'), 'w') as f:
149 |                 json.dump(metadata, f)
150 | 
151 |             base_image.save(out_path)
152 |             saved_base = True
153 | 
154 |         except ValueError as e:
155 |             print(f'Error: {e}')
156 | 
157 | 
158 | if __name__ == "__main__":
159 |     main()
160 | 


--------------------------------------------------------------------------------
/data_generation/remove_backs.py:
--------------------------------------------------------------------------------
 1 | from pathlib import Path
 2 | 
 3 | import transformers
 4 | from PIL import Image
 5 | 
 6 | 
 7 | def main():
 8 |     # base_image = Image.open("assets/template_images/mug.png")
 9 |     bria_pipe = transformers.pipeline("image-segmentation", model="briaai/RMBG-1.4", trust_remote_code=True)
10 | 
11 |     source_dir = Path('assets/objects')
12 |     out_dir = Path(f'assets/objects_no_back')
13 |     out_dir.mkdir(exist_ok=True, parents=True)
14 | 
15 |     for path in source_dir.glob('*.jpg'):
16 |         try:
17 |             base_image = Image.open(path).resize((512, 512))
18 |             tmp_path = 'tmp.jpg'
19 |             base_image.save(tmp_path)
20 |             crop_mask = bria_pipe(tmp_path, return_mask=True) # Retuns a PIL mask
21 |             # Apply mask on base_image
22 |             base_image = Image.composite(base_image, Image.new('RGB', base_image.size, (255, 255, 255)), crop_mask)
23 |             base_image.save(out_dir / path.name)
24 |         except Exception as e:
25 |             print(e)
26 | 
27 | 
28 | if __name__ == "__main__":
29 |     main()
30 | 


--------------------------------------------------------------------------------
/data_generation/words_bank.py:
--------------------------------------------------------------------------------
 1 | placements = ['on a table', 'on a shelf', 'on a wooden table', 'on the street', 'on the forest bed', 'on the floor',
 2 |               'on the ground', 'on the grass', 'on the sand', 'on the beach', 'on the shore', 'on the pavement',
 3 |               'in a room', 'in a living room', 'in a bedroom', 'in a kitchen', 'in a library', 'in a bathroom',
 4 |               'in a garden', 'in a park', 'in a library', 'in a bathroom', 'in a garden', 'in a park', 'in the office',
 5 |               'in the classroom', 'in the cafe', 'in the restaurant', 'on the balcony', 'on the rooftop',
 6 |               'in the hallway', 'on the staircase', 'in the elevator', 'in the lobby', 'in the garage',
 7 |               'in the basement', 'in the attic', 'on the porch', 'on the deck', 'on the patio', 'on the mountain',
 8 |               'in the valley', 'in the forest', 'in the jungle', 'on the riverbank', 'at the waterfall', 'by the lake',
 9 |               'by the pond', 'in the desert', 'in the canyon', 'on the glacier', 'by the iceberg', 'in the field',
10 |               'on the farm', 'in the vineyard', 'in the orchard', 'at the playground', 'in the stadium', 'in the arena',
11 |               'in the gym', 'in the pool', 'in the sauna', 'in the spa', 'in the beauty salon', 'in the bookstore',
12 |               'in the grocery store', 'in the market', 'in the mall', 'in the theater', 'in the cinema',
13 |               'in the studio', 'in the gallery', 'in the museum', 'at the aquarium', 'at the zoo',
14 |               'at the amusement park', 'at the campsite', 'in the RV park', 'at the resort', 'in the hotel',
15 |               'in the motel', 'in the inn', 'in the lodge', 'in the hostel', 'at the airport', 'at the train station',
16 |               'at the bus station', 'at the port', 'in the church', 'in the temple', 'in the mosque',
17 |               'in the synagogue', 'at the university', 'at the school', 'in the daycare', 'in the nursery',
18 |               'in the factory', 'in the warehouse', 'in the office building', 'in the skyscraper', 'in the cottage',
19 |               'in the bungalow', 'in the mansion', 'in the castle']
20 | 
21 | texture_attributes = ['glossy', 'shiny', 'matte', 'rough', 'bumpy', 'smooth', 'shimmering', 'sparkling', 'dusty',
22 |                       'fuzzy', 'soft', 'hard', 'brittle', 'flexible', 'elastic', 'stiff', 'rigid', 'tough', 'flimsy',
23 |                       'delicate', 'fragile', 'sturdy', 'solid', 'hollow', 'dense', 'light', 'heavy', 'grainy',
24 |                       'slippery', 'sticky', 'warm', 'cool', 'metallic', 'pearlescent', 'satin', 'dull', 'reflective',
25 |                       'translucent', 'opaque', 'homogeneous', 'heterogeneous', 'veined', 'flat', 'raised', 'textured',
26 |                       'rugged', 'water-resistant', 'light-absorbing', 'dust-repellent', 'cozy', 'luxurious', 'rustic',
27 |                       'weathered', 'polished', 'aged', 'green', 'brown', 'yellow', 'orange', 'red', 'blue', 'purple',
28 |                       'pink', 'white', 'black', 'grey', 'silver', 'gold', 'cyan', 'magenta', 'turquoise', 'ivory',
29 |                       'tan', 'beige', 'navy', 'maroon', 'charcoal', 'teal', 'olive', 'peach', 'lavender', 'uniform',
30 |                       'abstract', 'geometric', 'random', 'symmetrical', 'asymmetrical', 'striped', 'checked', 'paisley',
31 |                       'plaid', 'houndstooth', 'polka-dot', 'herringbone', 'chevron', 'argyle', 'non-reflective',
32 |                       'marbled', 'grained', 'streaked', 'spotted', 'speckled', 'dappled', 'mottled', 'flecked',
33 |                       'patched', 'layered', 'deep', 'shallow', 'pitted', 'embossed', 'engraved', 'new', 'old', 'worn',
34 |                       'damaged', 'antique', 'vintage', 'distressed', 'refurbished', 'restored', 'pristine',
35 |                       'immaculate', 'tarnished', 'faded', 'marble', 'granite', 'wood', 'metal', 'glass', 'plastic',
36 |                       'fabric', 'leather', 'paper', 'cardboard', 'concrete', 'brick', 'stone', 'sand', 'dirt', 'mud',
37 |                       'clay', 'ceramic', 'porcelain', 'rubber', 'sponge', 'foam', 'felt', 'velvet', 'silk', 'cotton',
38 |                       'wool', 'linen', 'denim', 'lace', 'tweed', 'nylon', 'polyester', 'acrylic', 'spandex', 'suede',
39 |                       'mesh', 'bamboo', 'hemp', 'leaf pattern', 'floral pattern', 'animal pattern', 'zebra pattern',
40 |                       'tiger pattern', 'leopard pattern', 'cheetah pattern', 'giraffe pattern', 'snake pattern',
41 |                       'crocodile pattern', 'camouflage pattern', 'mosaic pattern', 'kaleidoscope pattern',
42 |                       'mandala pattern', 'tartan pattern', 'batik pattern', 'ikat pattern', 'quilted pattern',
43 |                       'glittering', 'mirrored', 'satin-finish', 'carbon fiber', 'knitted', 'crocheted', 'embroidered',
44 |                       'pleated', 'crinkled', 'crumpled', 'woven', 'braided', 'perforated', 'padded', 'quilted',
45 |                       'thermal', 'insulated', 'gauzy', 'translucent-finish', 'iridescent', 'opalescent', 'neon',
46 |                       'pastel', 'vibrant', 'dull-finish', 'chalky', 'silky-smooth', 'rubbery', 'gummy', 'waxy', 'oily',
47 |                       'soapy', 'milky', 'crystal-clear', 'frosted', 'etched', 'blurred', 'swirled', 'twisted', 'coiled',
48 |                       'looped', 'interwoven', 'knotted', 'spiral', 'diagonal', 'crosshatched', 'lacy', 'beaded',
49 |                       'sequined', 'flocked', 'brushed metal', 'anodized', 'galvanized', 'powder-coated', 'acid-washed',
50 |                       'sun-bleached', 'peeling', 'cracked', 'chipped', 'burnished', 'oxidized', 'corroded', 'stained',
51 |                       'dyed', 'tie-dye pattern', 'ombre', 'gradient', 'speckled paint', 'splattered paint',
52 |                       'marbleized', 'woodgrain', 'cork', 'terrazzo', 'bamboo texture', 'reed', 'sisal', 'sea grass',
53 |                       'jute', 'chalkboard', 'magnetic', 'glazed', 'unglazed', 'raw', 'burnt', 'smoked', 'sanded',
54 |                       'planed', 'rough-cut', 'varnished', 'unvarnished', 'waxed', 'oil-finished', 'shellac-finished',
55 |                       'lacquered', 'patina', 'brushed', 'hammered', 'spun', 'wrought', 'forged', 'cast', 'molded',
56 |                       '3D printed', 'laminated', 'veneered', 'inlaid', 'gilded', 'silvered', 'leafed', 'foiled',
57 |                       'embossed pattern', 'debossed pattern', 'puzzle pattern', 'geometric pattern',
58 |                       'optical illusion pattern', 'holographic pattern', 'psychedelic pattern', 'pop art pattern',
59 |                       'art deco pattern', 'Victorian pattern', 'Baroque pattern', 'Renaissance pattern',
60 |                       'gothic pattern', 'Celtic pattern', 'tribal pattern', 'Ethnic pattern', 'folk pattern',
61 |                       'historical pattern']
62 | 
63 | adjectives = ['rotten', 'big', 'small', 'many', 'burning', 'melting', 'shattered', 'dried', 'sliced', 'moldy',
64 |               'glistening', 'fluffy', 'plush', 'opaque', 'wrinkled', 'frosted', 'antique', 'futuristic', 'cracked',
65 |               'glowing', 'glossy', 'translucent', 'gothic', 'young', 'old', 'rustic', 'two', 'multiple', 'group',
66 |               'shiny', 'dull', 'colorful', 'floating', 'winged', 'soggy', 'ancient', 'tiny', 'enormous', 'skeletal',
67 |               'hairless', 'furry', 'grimy', 'frozen', 'dusty', 'muddy', 'bubbly', 'spiky', 'slimy', 'scaly', 'feathery',
68 |               'hairy', 'fuzzy', 'smooth', 'rough', 'gleaming', 'heavy', 'wet', 'dry', 'aged', 'transparent', 'empty',
69 |               'full']
70 | 
71 | art_types = ['photo', 'painting', 'sketch', 'sculpture', 'photograph', 'drawing', 'tapestry', 'mosaic', 'carving',
72 |              'pottery', 'ceramic', 'origami', 'stained glass', 'engraving', 'watercolor painting', 'oil painting',
73 |              'acrylic painting', 'charcoal drawing', 'pencil drawing', 'pastel drawing', 'ink drawing',
74 |              'digital painting', 'collage', 'mixed media', 'woodcut', 'lithograph', 'etching', 'engraving',
75 |              'comic drawing', 'cartoon', 'animation', 'illustration', 'concept art', 'conceptual art', '3D rendering',
76 |              'digital art', 'traditional art', 'abstract art', 'realistic art', 'minimalist art', 'abstract art'
77 |              ]
78 | 


--------------------------------------------------------------------------------
/kandinsky2/__init__.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from huggingface_hub import hf_hub_url, cached_download
  3 | from copy import deepcopy
  4 | from omegaconf.dictconfig import DictConfig
  5 | 
  6 | from .configs import CONFIG_2_0, CONFIG_2_1
  7 | from .kandinsky2_model import Kandinsky2
  8 | from .kandinsky2_1_model import Kandinsky2_1
  9 | from .kandinsky2_2_model import Kandinsky2_2
 10 | 
 11 | 
 12 | def get_kandinsky2_0(
 13 |     device,
 14 |     task_type="text2img",
 15 |     cache_dir="/tmp/kandinsky2",
 16 |     use_auth_token=None,
 17 | ):
 18 |     cache_dir = os.path.join(cache_dir, "2_0")
 19 |     config = deepcopy(CONFIG_2_0)
 20 |     if task_type == "inpainting":
 21 |         model_name = "Kandinsky-2-0-inpainting.pt"
 22 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=model_name)
 23 |     elif task_type == "text2img":
 24 |         model_name = "Kandinsky-2-0.pt"
 25 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=model_name)
 26 |     else:
 27 |         raise ValueError("Only text2img, img2img and inpainting is available")
 28 | 
 29 |     cached_download(
 30 |         config_file_url,
 31 |         cache_dir=cache_dir,
 32 |         force_filename=model_name,
 33 |         use_auth_token=use_auth_token,
 34 |     )
 35 | 
 36 |     cache_dir_text_en1 = os.path.join(cache_dir, "text_encoder1")
 37 |     for name in [
 38 |         "config.json",
 39 |         "pytorch_model.bin",
 40 |         "sentencepiece.bpe.model",
 41 |         "special_tokens_map.json",
 42 |         "tokenizer.json",
 43 |         "tokenizer_config.json",
 44 |     ]:
 45 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=f"text_encoder1/{name}")
 46 |         cached_download(
 47 |             config_file_url,
 48 |             cache_dir=cache_dir_text_en1,
 49 |             force_filename=name,
 50 |             use_auth_token=use_auth_token,
 51 |         )
 52 | 
 53 |     cache_dir_text_en2 = os.path.join(cache_dir, "text_encoder2")
 54 |     for name in [
 55 |         "config.json",
 56 |         "pytorch_model.bin",
 57 |         "spiece.model",
 58 |         "special_tokens_map.json",
 59 |         "tokenizer_config.json",
 60 |     ]:
 61 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename=f"text_encoder2/{name}")
 62 |         cached_download(
 63 |             config_file_url,
 64 |             cache_dir=cache_dir_text_en2,
 65 |             force_filename=name,
 66 |             use_auth_token=use_auth_token,
 67 |         )
 68 |     
 69 |     config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.0", filename="vae.ckpt")
 70 |     cached_download(
 71 |         config_file_url,
 72 |         cache_dir=cache_dir,
 73 |         force_filename="vae.ckpt",
 74 |         use_auth_token=use_auth_token,
 75 |     )
 76 | 
 77 |     config["text_enc_params1"]["model_path"] = cache_dir_text_en1
 78 |     config["text_enc_params2"]["model_path"] = cache_dir_text_en2
 79 |     config["tokenizer_name1"] = cache_dir_text_en1
 80 |     config["tokenizer_name2"] = cache_dir_text_en2
 81 |     config["image_enc_params"]["params"]["ckpt_path"] = os.path.join(
 82 |         cache_dir, "vae.ckpt"
 83 |     )
 84 |     unet_path = os.path.join(cache_dir, model_name)
 85 | 
 86 |     model = Kandinsky2(config, unet_path, device, task_type)
 87 |     return model
 88 | 
 89 | 
 90 | def get_kandinsky2_1(
 91 |     device,
 92 |     task_type="text2img",
 93 |     cache_dir="/tmp/kandinsky2",
 94 |     use_auth_token=None,
 95 |     use_flash_attention=False,
 96 | ):
 97 |     cache_dir = os.path.join(cache_dir, "2_1")
 98 |     config = DictConfig(deepcopy(CONFIG_2_1))
 99 |     config["model_config"]["use_flash_attention"] = use_flash_attention
100 |     if task_type == "text2img":
101 |         model_name = "decoder_fp16.ckpt"
102 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=model_name)
103 |     elif task_type == "inpainting":
104 |         model_name = "inpainting_fp16.ckpt"
105 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=model_name)
106 |     cached_download(
107 |         config_file_url,
108 |         cache_dir=cache_dir,
109 |         force_filename=model_name,
110 |         use_auth_token=use_auth_token,
111 |     )
112 |     prior_name = "prior_fp16.ckpt"
113 |     config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=prior_name)
114 |     cached_download(
115 |         config_file_url,
116 |         cache_dir=cache_dir,
117 |         force_filename=prior_name,
118 |         use_auth_token=use_auth_token,
119 |     )
120 | 
121 |     cache_dir_text_en = os.path.join(cache_dir, "text_encoder")
122 |     for name in [
123 |         "config.json",
124 |         "pytorch_model.bin",
125 |         "sentencepiece.bpe.model",
126 |         "special_tokens_map.json",
127 |         "tokenizer.json",
128 |         "tokenizer_config.json",
129 |     ]:
130 |         config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename=f"text_encoder/{name}")
131 |         cached_download(
132 |             config_file_url,
133 |             cache_dir=cache_dir_text_en,
134 |             force_filename=name,
135 |             use_auth_token=use_auth_token,
136 |         )
137 | 
138 |     config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename="movq_final.ckpt")
139 |     cached_download(
140 |         config_file_url,
141 |         cache_dir=cache_dir,
142 |         force_filename="movq_final.ckpt",
143 |         use_auth_token=use_auth_token,
144 |     )
145 | 
146 |     config_file_url = hf_hub_url(repo_id="sberbank-ai/Kandinsky_2.1", filename="ViT-L-14_stats.th")
147 |     cached_download(
148 |         config_file_url,
149 |         cache_dir=cache_dir,
150 |         force_filename="ViT-L-14_stats.th",
151 |         use_auth_token=use_auth_token,
152 |     )
153 | 
154 |     config["tokenizer_name"] = cache_dir_text_en
155 |     config["text_enc_params"]["model_path"] = cache_dir_text_en
156 |     config["prior"]["clip_mean_std_path"] = os.path.join(cache_dir, "ViT-L-14_stats.th")
157 |     config["image_enc_params"]["ckpt_path"] = os.path.join(cache_dir, "movq_final.ckpt")
158 |     cache_model_name = os.path.join(cache_dir, model_name)
159 |     cache_prior_name = os.path.join(cache_dir, prior_name)
160 |     model = Kandinsky2_1(config, cache_model_name, cache_prior_name, device, task_type=task_type)
161 |     return model
162 | 
163 | 
164 | def get_kandinsky2(
165 |     device,
166 |     task_type="text2img",
167 |     cache_dir="/tmp/kandinsky2",
168 |     use_auth_token=None,
169 |     model_version="2.1",
170 |     use_flash_attention=False,
171 | ):
172 |     if model_version == "2.0":
173 |         model = get_kandinsky2_0(
174 |             device,
175 |             task_type=task_type,
176 |             cache_dir=cache_dir,
177 |             use_auth_token=use_auth_token,
178 |         )
179 |     elif model_version == "2.1":
180 |         model = get_kandinsky2_1(
181 |             device,
182 |             task_type=task_type,
183 |             cache_dir=cache_dir,
184 |             use_auth_token=use_auth_token,
185 |             use_flash_attention=use_flash_attention,
186 |         )
187 |     elif model_version == "2.2":
188 |         model = Kandinsky2_2(device=device, task_type=task_type)
189 |     else:
190 |         raise ValueError("Only 2.0 and 2.1 is available")
191 |     
192 |     return model
193 | 


--------------------------------------------------------------------------------
/kandinsky2/configs.py:
--------------------------------------------------------------------------------
  1 | CONFIG_2_0 = {
  2 |     "model_config": {
  3 |         "image_size": 64,
  4 |         "num_channels": 384,
  5 |         "num_res_blocks": 3,
  6 |         "channel_mult": "",
  7 |         "num_heads": 1,
  8 |         "num_head_channels": 64,
  9 |         "num_heads_upsample": -1,
 10 |         "attention_resolutions": "32,16,8",
 11 |         "dropout": 0,
 12 |         "model_dim": 768,
 13 |         "use_scale_shift_norm": True,
 14 |         "resblock_updown": True,
 15 |         "use_fp16": False,
 16 |         "cache_text_emb": True,
 17 |         "text_encoder_in_dim1": 1024,
 18 |         "text_encoder_in_dim2": 640,
 19 |         "pooling_type": "from_model",
 20 |         "in_channels": 4,
 21 |         "out_channels": 8,
 22 |         "up": False,
 23 |         "inpainting": False,
 24 |     },
 25 |     "diffusion_config": {
 26 |         "learn_sigma": True,
 27 |         "sigma_small": False,
 28 |         "steps": 1000,
 29 |         "noise_schedule": "linear",
 30 |         "timestep_respacing": "",
 31 |         "use_kl": False,
 32 |         "predict_xstart": False,
 33 |         "rescale_timesteps": True,
 34 |         "rescale_learned_sigmas": True,
 35 |         "linear_start": 0.0001,
 36 |         "linear_end": 0.02,
 37 |     },
 38 |     "image_enc_params": {
 39 |         "name": "AutoencoderKL",
 40 |         "scale": 0.0512,
 41 |         "params": {
 42 |             "ckpt_path": "",
 43 |             "embed_dim": 4,
 44 |             "ddconfig": {
 45 |                 "double_z": True,
 46 |                 "z_channels": 4,
 47 |                 "resolution": 256,
 48 |                 "in_channels": 3,
 49 |                 "out_ch": 3,
 50 |                 "ch": 128,
 51 |                 "ch_mult": [1, 2, 4, 4],
 52 |                 "num_res_blocks": 2,
 53 |                 "attn_resolutions": [],
 54 |                 "dropout": 0.0,
 55 |             },
 56 |         },
 57 |     },
 58 |     "text_enc_params1": {"model_path": "", "model_name": "multiclip"},
 59 |     "text_enc_params2": {"model_path": "", "model_name": "MT5EncoderModel"},
 60 |     "tokenizer_name1": "",
 61 |     "tokenizer_name2": "",
 62 | }
 63 | 
 64 | CONFIG_2_1 = {
 65 |     "clip_name": "ViT-L/14",
 66 |     "clip_image_size": 224,
 67 |     "tokenizer_name": "",
 68 |     "image_enc_params": {
 69 |         "name": "MOVQ",
 70 |         "scale": 1,
 71 |         "ckpt_path": "",
 72 |         "params": {
 73 |             "embed_dim": 4,
 74 |             "n_embed": 16384,
 75 |             "ddconfig": {
 76 |                 "double_z": False,
 77 |                 "z_channels": 4,
 78 |                 "resolution": 256,
 79 |                 "in_channels": 3,
 80 |                 "out_ch": 3,
 81 |                 "ch": 128,
 82 |                 "ch_mult": [1, 2, 2, 4],
 83 |                 "num_res_blocks": 2,
 84 |                 "attn_resolutions": [32],
 85 |                 "dropout": 0.0,
 86 |             },
 87 |         },
 88 |     },
 89 |     "text_enc_params": {
 90 |         "model_path": "",
 91 |         "model_name": "multiclip",
 92 |         "in_features": 1024,
 93 |         "out_features": 768,
 94 |     },
 95 |     "prior": {
 96 |         "clip_mean_std_path": "ViT-L-14_stats.th",
 97 |         "params": {
 98 |             "model": {
 99 |                 "type": "prior",
100 |                 "diffusion_sampler": "uniform",
101 |                 "hparams": {
102 |                     "text_ctx": 77,
103 |                     "xf_width": 2048,
104 |                     "xf_layers": 20,
105 |                     "xf_heads": 32,
106 |                     "xf_final_ln": True,
107 |                     "xf_padding": False,
108 |                     "text_drop": 0.2,
109 |                     "clip_dim": 768,
110 |                     "clip_xf_width": 768,
111 |                 },
112 |             },
113 |             "diffusion": {
114 |                 "steps": 1000,
115 |                 "learn_sigma": False,
116 |                 "sigma_small": True,
117 |                 "noise_schedule": "cosine",
118 |                 "use_kl": False,
119 |                 "predict_xstart": True,
120 |                 "rescale_learned_sigmas": False,
121 |                 "timestep_respacing": "",
122 |             },
123 |         },
124 |     },
125 |     "model_config": {
126 |         "version": "2.1",
127 |         "image_size": 64,
128 |         "num_channels": 384,
129 |         "num_res_blocks": 3,
130 |         "channel_mult": "",
131 |         "num_heads": 1,
132 |         "num_head_channels": 64,
133 |         "num_heads_upsample": -1,
134 |         "attention_resolutions": "32,16,8",
135 |         "dropout": 0,
136 |         "model_dim": 768,
137 |         "use_scale_shift_norm": True,
138 |         "resblock_updown": True,
139 |         "use_fp16": True,
140 |         "cache_text_emb": True,
141 |         "text_encoder_in_dim1": 1024,
142 |         "text_encoder_in_dim2": 768,
143 |         "image_encoder_in_dim": 768,
144 |         "num_image_embs": 10,
145 |         "pooling_type": "from_model",
146 |         "in_channels": 4,
147 |         "out_channels": 8,
148 |         "use_flash_attention": False,
149 |     },
150 |     "diffusion_config": {
151 |         "learn_sigma": True,
152 |         "sigma_small": False,
153 |         "steps": 1000,
154 |         "noise_schedule": "linear",
155 |         "timestep_respacing": "",
156 |         "use_kl": False,
157 |         "predict_xstart": False,
158 |         "rescale_timesteps": True,
159 |         "rescale_learned_sigmas": True,
160 |         "linear_start": 0.00085,
161 |         "linear_end": 0.012,
162 |     },
163 | }


--------------------------------------------------------------------------------
/kandinsky2/model/fp16_util.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helpers to inference with 16-bit precision.
 3 | """
 4 | 
 5 | import torch.nn as nn
 6 | from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
 7 | 
 8 | 
 9 | def convert_module_to_f16(l):
10 |     """
11 |     Convert primitive modules to float16.
12 |     """
13 |     if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
14 |         l.weight.data = l.weight.data.half()
15 |         if l.bias is not None:
16 |             l.bias.data = l.bias.data.half()
17 | 
18 | 
19 | def convert_module_to_f32(l):
20 |     """
21 |     Convert primitive modules to float32, undoing convert_module_to_f16().
22 |     """
23 |     if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
24 |         l.weight.data = l.weight.data.float()
25 |         if l.bias is not None:
26 |             l.bias.data = l.bias.data.float()
27 | 
28 | 
29 | def make_master_params(model_params):
30 |     """
31 |     Copy model parameters into a (differently-shaped) list of full-precision
32 |     parameters.
33 |     """
34 |     master_params = _flatten_dense_tensors(
35 |         [param.detach().float() for param in model_params]
36 |     )
37 |     master_params = nn.Parameter(master_params)
38 |     master_params.requires_grad = True
39 |     return [master_params]
40 | 
41 | 
42 | def model_grads_to_master_grads(model_params, master_params):
43 |     """
44 |     Copy the gradients from the model parameters into the master parameters
45 |     from make_master_params().
46 |     """
47 |     master_params[0].grad = _flatten_dense_tensors(
48 |         [param.grad.data.detach().float() for param in model_params]
49 |     )
50 | 
51 | 
52 | def master_params_to_model_params(model_params, master_params):
53 |     """
54 |     Copy the master parameter data back into the model parameters.
55 |     """
56 |     # Without copying to a list, if a generator is passed, this will
57 |     # silently not copy any parameters.
58 |     model_params = list(model_params)
59 | 
60 |     for param, master_param in zip(
61 |         model_params, unflatten_master_params(model_params, master_params)
62 |     ):
63 |         param.detach().copy_(master_param)
64 | 
65 | 
66 | def unflatten_master_params(model_params, master_params):
67 |     """
68 |     Unflatten the master parameters to look like model_params.
69 |     """
70 |     return _unflatten_dense_tensors(master_params[0].detach(), model_params)
71 | 
72 | 
73 | def zero_grad(model_params):
74 |     for param in model_params:
75 |         # Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group
76 |         if param.grad is not None:
77 |             param.grad.detach_()
78 |             param.grad.zero_()
79 | 


--------------------------------------------------------------------------------
/kandinsky2/model/losses.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helpers for various likelihood-based losses. These are ported from the original
 3 | Ho et al. diffusion models codebase:
 4 | https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py
 5 | """
 6 | 
 7 | import numpy as np
 8 | 
 9 | import torch as th
10 | 
11 | 
12 | def normal_kl(mean1, logvar1, mean2, logvar2):
13 |     """
14 |     Compute the KL divergence between two gaussians.
15 |     Shapes are automatically broadcasted, so batches can be compared to
16 |     scalars, among other use cases.
17 |     """
18 |     tensor = None
19 |     for obj in (mean1, logvar1, mean2, logvar2):
20 |         if isinstance(obj, th.Tensor):
21 |             tensor = obj
22 |             break
23 |     assert tensor is not None, "at least one argument must be a Tensor"
24 | 
25 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
26 |     # Tensors, but it does not work for th.exp().
27 |     logvar1, logvar2 = [
28 |         x if isinstance(x, th.Tensor) else th.tensor(x).to(tensor)
29 |         for x in (logvar1, logvar2)
30 |     ]
31 | 
32 |     return 0.5 * (
33 |         -1.0
34 |         + logvar2
35 |         - logvar1
36 |         + th.exp(logvar1 - logvar2)
37 |         + ((mean1 - mean2) ** 2) * th.exp(-logvar2)
38 |     )
39 | 
40 | 
41 | def approx_standard_normal_cdf(x):
42 |     """
43 |     A fast approximation of the cumulative distribution function of the
44 |     standard normal.
45 |     """
46 |     return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3))))
47 | 
48 | 
49 | def discretized_gaussian_log_likelihood(x, *, means, log_scales):
50 |     """
51 |     Compute the log-likelihood of a Gaussian distribution discretizing to a
52 |     given image.
53 |     :param x: the target images. It is assumed that this was uint8 values,
54 |               rescaled to the range [-1, 1].
55 |     :param means: the Gaussian mean Tensor.
56 |     :param log_scales: the Gaussian log stddev Tensor.
57 |     :return: a tensor like x of log probabilities (in nats).
58 |     """
59 |     assert x.shape == means.shape == log_scales.shape
60 |     centered_x = x - means
61 |     inv_stdv = th.exp(-log_scales)
62 |     plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
63 |     cdf_plus = approx_standard_normal_cdf(plus_in)
64 |     min_in = inv_stdv * (centered_x - 1.0 / 255.0)
65 |     cdf_min = approx_standard_normal_cdf(min_in)
66 |     log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12))
67 |     log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12))
68 |     cdf_delta = cdf_plus - cdf_min
69 |     log_probs = th.where(
70 |         x < -0.999,
71 |         log_cdf_plus,
72 |         th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))),
73 |     )
74 |     assert log_probs.shape == x.shape
75 |     return log_probs
76 | 


--------------------------------------------------------------------------------
/kandinsky2/model/model_creation.py:
--------------------------------------------------------------------------------
  1 | from .gaussian_diffusion import get_named_beta_schedule
  2 | from . import gaussian_diffusion as gd
  3 | from .respace import SpacedDiffusion, space_timesteps
  4 | from .text2im_model import Text2ImUNet, InpaintText2ImUNet
  5 | from .text2im_model2_1 import Text2ImUNet as Text2ImUNet2_1
  6 | from .text2im_model2_1 import InpaintText2ImUNet as InpaintText2ImUNet2_1
  7 | 
  8 | 
  9 | def create_model(
 10 |     image_size,
 11 |     num_channels,
 12 |     num_res_blocks,
 13 |     channel_mult,
 14 |     attention_resolutions,
 15 |     num_heads,
 16 |     num_head_channels,
 17 |     num_heads_upsample,
 18 |     use_scale_shift_norm,
 19 |     dropout,
 20 |     model_dim,
 21 |     resblock_updown,
 22 |     use_fp16,
 23 |     cache_text_emb,
 24 |     text_encoder_in_dim1,
 25 |     text_encoder_in_dim2,
 26 |     pooling_type,
 27 |     in_channels,
 28 |     out_channels,
 29 |     up,
 30 |     inpainting,
 31 |     version="2.0",
 32 |     **kwargs,
 33 | ):
 34 |     if channel_mult == "":
 35 |         if image_size == 256:
 36 |             channel_mult = (1, 1, 2, 2, 4, 4)
 37 |         elif image_size == 128:
 38 |             channel_mult = (1, 1, 2, 3, 4)
 39 |         elif image_size == 64:
 40 |             channel_mult = (1, 2, 3, 4)
 41 |         else:
 42 |             raise ValueError(f"unsupported image size: {image_size}")
 43 |     else:
 44 |         channel_mult = tuple(int(ch_mult) for ch_mult in channel_mult.split(","))
 45 | 
 46 |     attention_ds = []
 47 |     for res in attention_resolutions.split(","):
 48 |         attention_ds.append(image_size // int(res))
 49 |     if inpainting:
 50 |         if version == "2.0":
 51 |             model_cls = InpaintText2ImUNet
 52 |         elif version == "2.1":
 53 |             model_cls = InpaintText2ImUNet2_1
 54 |         else:
 55 |             ValueError("Only 2.0 and 2.1 versions are available")
 56 |     else:
 57 |         if version == "2.0":
 58 |             model_cls = Text2ImUNet
 59 |         elif version == "2.1":
 60 |             model_cls = Text2ImUNet2_1
 61 |         else:
 62 |             ValueError("Only 2.0 and 2.1 versions are available")
 63 |     return model_cls(
 64 |         in_channels=in_channels,
 65 |         model_channels=num_channels,
 66 |         out_channels=out_channels,
 67 |         num_res_blocks=num_res_blocks,
 68 |         attention_resolutions=tuple(attention_ds),
 69 |         dropout=dropout,
 70 |         model_dim=model_dim,
 71 |         channel_mult=channel_mult,
 72 |         use_fp16=use_fp16,
 73 |         num_heads=num_heads,
 74 |         num_head_channels=num_head_channels,
 75 |         num_heads_upsample=num_heads_upsample,
 76 |         use_scale_shift_norm=use_scale_shift_norm,
 77 |         resblock_updown=resblock_updown,
 78 |         cache_text_emb=cache_text_emb,
 79 |         text_encoder_in_dim1=text_encoder_in_dim1,
 80 |         text_encoder_in_dim2=text_encoder_in_dim2,
 81 |         pooling_type=pooling_type,
 82 |         **kwargs,
 83 |     )
 84 | 
 85 | 
 86 | def create_gaussian_diffusion(
 87 |     *,
 88 |     steps=1000,
 89 |     learn_sigma=False,
 90 |     sigma_small=False,
 91 |     noise_schedule="linear",
 92 |     use_kl=False,
 93 |     predict_xstart=False,
 94 |     rescale_timesteps=False,
 95 |     rescale_learned_sigmas=False,
 96 |     timestep_respacing="",
 97 |     linear_start=0.0001,
 98 |     linear_end=0.02,
 99 | ):
100 |     betas = get_named_beta_schedule(
101 |         noise_schedule, steps, linear_start=linear_start, linear_end=linear_end
102 |     )
103 |     if use_kl:
104 |         loss_type = gd.LossType.RESCALED_KL
105 |     elif rescale_learned_sigmas:
106 |         loss_type = gd.LossType.RESCALED_MSE
107 |     else:
108 |         loss_type = gd.LossType.MSE
109 |     if not timestep_respacing:
110 |         timestep_respacing = [steps]
111 |     return SpacedDiffusion(
112 |         use_timesteps=space_timesteps(steps, timestep_respacing),
113 |         betas=betas,
114 |         model_mean_type=(
115 |             gd.ModelMeanType.EPSILON if not predict_xstart else gd.ModelMeanType.START_X
116 |         ),
117 |         model_var_type=(
118 |             (
119 |                 gd.ModelVarType.FIXED_LARGE
120 |                 if not sigma_small
121 |                 else gd.ModelVarType.FIXED_SMALL
122 |             )
123 |             if not learn_sigma
124 |             else gd.ModelVarType.LEARNED_RANGE
125 |         ),
126 |         loss_type=loss_type,
127 |         rescale_timesteps=rescale_timesteps,
128 |     )
129 | 


--------------------------------------------------------------------------------
/kandinsky2/model/nn.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Various utilities for neural networks.
  3 | """
  4 | 
  5 | import math
  6 | 
  7 | import torch
  8 | import torch.nn as nn
  9 | import torch.nn.functional as F
 10 | 
 11 | 
 12 | def update_ema(target_params, source_params, rate=0.99):
 13 |     """
 14 |     Update target parameters to be closer to those of source parameters using
 15 |     an exponential moving average.
 16 |     :param target_params: the target parameter sequence.
 17 |     :param source_params: the source parameter sequence.
 18 |     :param rate: the EMA rate (closer to 1 means slower).
 19 |     """
 20 |     for targ, src in zip(target_params, source_params):
 21 |         targ.detach().mul_(rate).add_(src, alpha=1 - rate)
 22 | 
 23 |     return target_params
 24 | 
 25 | 
 26 | class GroupNorm32(nn.GroupNorm):
 27 |     def __init__(self, num_groups, num_channels, swish, eps=1e-5):
 28 |         super().__init__(num_groups=num_groups, num_channels=num_channels, eps=eps)
 29 |         self.swish = swish
 30 | 
 31 |     def forward(self, x):
 32 |         y = super().forward(x.float()).to(x.dtype)
 33 |         if self.swish == 1.0:
 34 |             y = F.silu(y)
 35 |         elif self.swish:
 36 |             y = y * F.sigmoid(y * float(self.swish))
 37 |         return y
 38 | 
 39 | 
 40 | def conv_nd(dims, *args, **kwargs):
 41 |     """
 42 |     Create a 1D, 2D, or 3D convolution module.
 43 |     """
 44 |     if dims == 1:
 45 |         return nn.Conv1d(*args, **kwargs)
 46 |     elif dims == 2:
 47 |         return nn.Conv2d(*args, **kwargs)
 48 |     elif dims == 3:
 49 |         return nn.Conv3d(*args, **kwargs)
 50 |     raise ValueError(f"unsupported dimensions: {dims}")
 51 | 
 52 | 
 53 | def linear(*args, **kwargs):
 54 |     """
 55 |     Create a linear module.
 56 |     """
 57 |     return nn.Linear(*args, **kwargs)
 58 | 
 59 | 
 60 | def avg_pool_nd(dims, *args, **kwargs):
 61 |     """
 62 |     Create a 1D, 2D, or 3D average pooling module.
 63 |     """
 64 |     if dims == 1:
 65 |         return nn.AvgPool1d(*args, **kwargs)
 66 |     elif dims == 2:
 67 |         return nn.AvgPool2d(*args, **kwargs)
 68 |     elif dims == 3:
 69 |         return nn.AvgPool3d(*args, **kwargs)
 70 |     raise ValueError(f"unsupported dimensions: {dims}")
 71 | 
 72 | 
 73 | def zero_module(module):
 74 |     """
 75 |     Zero out the parameters of a module and return it.
 76 |     """
 77 |     for p in module.parameters():
 78 |         p.detach().zero_()
 79 |     return module
 80 | 
 81 | 
 82 | def scale_module(module, scale):
 83 |     """
 84 |     Scale the parameters of a module and return it.
 85 |     """
 86 |     for p in module.parameters():
 87 |         p.detach().mul_(scale)
 88 |     return module
 89 | 
 90 | 
 91 | def normalization(channels, swish=0.0):
 92 |     """
 93 |     Make a standard normalization layer, with an optional swish activation.
 94 | 
 95 |     :param channels: number of input channels.
 96 |     :return: an nn.Module for normalization.
 97 |     """
 98 |     return GroupNorm32(num_channels=channels, num_groups=32, swish=swish)
 99 | 
100 | 
101 | def timestep_embedding(timesteps, dim, max_period=10000):
102 |     """
103 |     Create sinusoidal timestep embeddings.
104 | 
105 |     :param timesteps: a 1-D Tensor of N indices, one per batch element.
106 |                       These may be fractional.
107 |     :param dim: the dimension of the output.
108 |     :param max_period: controls the minimum frequency of the embeddings.
109 |     :return: an [N x dim] Tensor of positional embeddings.
110 |     """
111 |     half = dim // 2
112 |     freqs = torch.exp(
113 |         -math.log(max_period)
114 |         * torch.arange(start=0, end=half, dtype=torch.float32)
115 |         / half
116 |     ).to(device=timesteps.device)
117 |     args = timesteps[:, None].float() * freqs[None]
118 |     embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
119 |     if dim % 2:
120 |         embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
121 |     return embedding
122 | 


--------------------------------------------------------------------------------
/kandinsky2/model/resample.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | 
  3 | import numpy as np
  4 | import torch
  5 | import torch.distributed as dist
  6 | 
  7 | 
  8 | def create_named_schedule_sampler(name, diffusion):
  9 |     """
 10 |     Create a ScheduleSampler from a library of pre-defined samplers.
 11 |     :param name: the name of the sampler.
 12 |     :param diffusion: the diffusion object to sample for.
 13 |     """
 14 |     if name == "uniform":
 15 |         return UniformSampler(diffusion)
 16 |     elif name == "loss-second-moment":
 17 |         return LossSecondMomentResampler(diffusion)
 18 |     else:
 19 |         raise NotImplementedError(f"unknown schedule sampler: {name}")
 20 | 
 21 | 
 22 | class ScheduleSampler(ABC):
 23 |     """
 24 |     A distribution over timesteps in the diffusion process, intended to reduce
 25 |     variance of the objective.
 26 |     By default, samplers perform unbiased importance sampling, in which the
 27 |     objective's mean is unchanged.
 28 |     However, subclasses may override sample() to change how the resampled
 29 |     terms are reweighted, allowing for actual changes in the objective.
 30 |     """
 31 | 
 32 |     @abstractmethod
 33 |     def weights(self):
 34 |         """
 35 |         Get a numpy array of weights, one per diffusion step.
 36 |         The weights needn't be normalized, but must be positive.
 37 |         """
 38 | 
 39 |     def sample(self, batch_size, device):
 40 |         """
 41 |         Importance-sample timesteps for a batch.
 42 |         :param batch_size: the number of timesteps.
 43 |         :param device: the torch device to save to.
 44 |         :return: a tuple (timesteps, weights):
 45 |                  - timesteps: a tensor of timestep indices.
 46 |                  - weights: a tensor of weights to scale the resulting losses.
 47 |         """
 48 |         w = self.weights()
 49 |         p = w / np.sum(w)
 50 |         indices_np = np.random.choice(len(p), size=(batch_size,), p=p)
 51 |         indices = torch.from_numpy(indices_np).long().to(device)
 52 |         weights_np = 1 / (len(p) * p[indices_np])
 53 |         weights = torch.from_numpy(weights_np).float().to(device)
 54 |         return indices, weights
 55 | 
 56 | 
 57 | class UniformSampler(ScheduleSampler):
 58 |     def __init__(self, diffusion):
 59 |         self.diffusion = diffusion
 60 |         self._weights = np.ones([diffusion.num_timesteps])
 61 | 
 62 |     def weights(self):
 63 |         return self._weights
 64 | 
 65 | 
 66 | class LossAwareSampler(ScheduleSampler):
 67 |     def update_with_local_losses(self, local_ts, local_losses):
 68 |         """
 69 |         Update the reweighting using losses from a model.
 70 |         Call this method from each rank with a batch of timesteps and the
 71 |         corresponding losses for each of those timesteps.
 72 |         This method will perform synchronization to make sure all of the ranks
 73 |         maintain the exact same reweighting.
 74 |         :param local_ts: an integer Tensor of timesteps.
 75 |         :param local_losses: a 1D Tensor of losses.
 76 |         """
 77 |         batch_sizes = [
 78 |             torch.tensor([0], dtype=torch.int32, device=local_ts.device)
 79 |             for _ in range(dist.get_world_size())
 80 |         ]
 81 |         dist.all_gather(
 82 |             batch_sizes,
 83 |             torch.tensor([len(local_ts)], dtype=torch.int32, device=local_ts.device),
 84 |         )
 85 | 
 86 |         # Pad all_gather batches to be the maximum batch size.
 87 |         batch_sizes = [x.item() for x in batch_sizes]
 88 |         max_bs = max(batch_sizes)
 89 | 
 90 |         timestep_batches = [torch.zeros(max_bs).to(local_ts) for bs in batch_sizes]
 91 |         loss_batches = [torch.zeros(max_bs).to(local_losses) for bs in batch_sizes]
 92 |         dist.all_gather(timestep_batches, local_ts)
 93 |         dist.all_gather(loss_batches, local_losses)
 94 |         timesteps = [
 95 |             x.item() for y, bs in zip(timestep_batches, batch_sizes) for x in y[:bs]
 96 |         ]
 97 |         losses = [x.item() for y, bs in zip(loss_batches, batch_sizes) for x in y[:bs]]
 98 |         self.update_with_all_losses(timesteps, losses)
 99 | 
100 |     @abstractmethod
101 |     def update_with_all_losses(self, ts, losses):
102 |         """
103 |         Update the reweighting using losses from a model.
104 |         Sub-classes should override this method to update the reweighting
105 |         using losses from the model.
106 |         This method directly updates the reweighting without synchronizing
107 |         between workers. It is called by update_with_local_losses from all
108 |         ranks with identical arguments. Thus, it should have deterministic
109 |         behavior to maintain state across workers.
110 |         :param ts: a list of int timesteps.
111 |         :param losses: a list of float losses, one per timestep.
112 |         """
113 | 
114 | 
115 | class LossSecondMomentResampler(LossAwareSampler):
116 |     def __init__(self, diffusion, history_per_term=10, uniform_prob=0.001):
117 |         self.diffusion = diffusion
118 |         self.history_per_term = history_per_term
119 |         self.uniform_prob = uniform_prob
120 |         self._loss_history = np.zeros(
121 |             [diffusion.num_timesteps, history_per_term], dtype=np.float64
122 |         )
123 |         self._loss_counts = np.zeros([diffusion.num_timesteps], dtype=np.int)
124 | 
125 |     def weights(self):
126 |         if not self._warmed_up():
127 |             return np.ones([self.diffusion.num_timesteps], dtype=np.float64)
128 |         weights = np.sqrt(np.mean(self._loss_history**2, axis=-1))
129 |         weights /= np.sum(weights)
130 |         weights *= 1 - self.uniform_prob
131 |         weights += self.uniform_prob / len(weights)
132 |         return weights
133 | 
134 |     def update_with_all_losses(self, ts, losses):
135 |         for t, loss in zip(ts, losses):
136 |             if self._loss_counts[t] == self.history_per_term:
137 |                 # Shift out the oldest loss term.
138 |                 self._loss_history[t, :-1] = self._loss_history[t, 1:]
139 |                 self._loss_history[t, -1] = loss
140 |             else:
141 |                 self._loss_history[t, self._loss_counts[t]] = loss
142 |                 self._loss_counts[t] += 1
143 | 
144 |     def _warmed_up(self):
145 |         return (self._loss_counts == self.history_per_term).all()
146 | 


--------------------------------------------------------------------------------
/kandinsky2/model/respace.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import torch
  3 | 
  4 | from .gaussian_diffusion import GaussianDiffusion
  5 | 
  6 | 
  7 | def make_ddim_timesteps(ddim_discr_method, num_ddim_timesteps, num_ddpm_timesteps):
  8 |     if ddim_discr_method == "uniform":
  9 |         c = num_ddpm_timesteps // num_ddim_timesteps
 10 |         ddim_timesteps = np.asarray(list(range(0, num_ddpm_timesteps, c)))
 11 |     elif ddim_discr_method == "quad":
 12 |         ddim_timesteps = (
 13 |             (np.linspace(0, np.sqrt(num_ddpm_timesteps * 0.8), num_ddim_timesteps)) ** 2
 14 |         ).astype(int)
 15 |     else:
 16 |         raise NotImplementedError(
 17 |             f'There is no ddim discretization method called "{ddim_discr_method}"'
 18 |         )
 19 | 
 20 |     steps_out = ddim_timesteps + 1
 21 |     return steps_out
 22 | 
 23 | 
 24 | def space_timesteps(num_timesteps, section_counts):
 25 |     """
 26 |     Create a list of timesteps to use from an original diffusion process,
 27 |     given the number of timesteps we want to take from equally-sized portions
 28 |     of the original process.
 29 |     For example, if there's 300 timesteps and the section counts are [10,15,20]
 30 |     then the first 100 timesteps are strided to be 10 timesteps, the second 100
 31 |     are strided to be 15 timesteps, and the final 100 are strided to be 20.
 32 |     If the stride is a string starting with "ddim", then the fixed striding
 33 |     from the DDIM paper is used, and only one section is allowed.
 34 |     :param num_timesteps: the number of diffusion steps in the original
 35 |                           process to divide up.
 36 |     :param section_counts: either a list of numbers, or a string containing
 37 |                            comma-separated numbers, indicating the step count
 38 |                            per section. As a special case, use "ddimN" where N
 39 |                            is a number of steps to use the striding from the
 40 |                            DDIM paper.
 41 |     :return: a set of diffusion steps from the original process to use.
 42 |     """
 43 |     if isinstance(section_counts, str):
 44 |         if section_counts.startswith("ddim"):
 45 |             return set(
 46 |                 make_ddim_timesteps(
 47 |                     "uniform", int(section_counts[len("ddim") :]), num_timesteps
 48 |                 )
 49 |             )
 50 |         section_counts = [int(x) for x in section_counts.split(",")]
 51 |     size_per = num_timesteps // len(section_counts)
 52 |     extra = num_timesteps % len(section_counts)
 53 |     start_idx = 0
 54 |     all_steps = []
 55 |     for i, section_count in enumerate(section_counts):
 56 |         size = size_per + (1 if i < extra else 0)
 57 |         if size < section_count:
 58 |             raise ValueError(
 59 |                 f"cannot divide section of {size} steps into {section_count}"
 60 |             )
 61 |         if section_count <= 1:
 62 |             frac_stride = 1
 63 |         else:
 64 |             frac_stride = (size - 1) / (section_count - 1)
 65 |         cur_idx = 0.0
 66 |         taken_steps = []
 67 |         for _ in range(section_count):
 68 |             taken_steps.append(start_idx + round(cur_idx))
 69 |             cur_idx += frac_stride
 70 |         all_steps += taken_steps
 71 |         start_idx += size
 72 |     return set(all_steps)
 73 | 
 74 | 
 75 | class SpacedDiffusion(GaussianDiffusion):
 76 |     """
 77 |     A diffusion process which can skip steps in a base diffusion process.
 78 |     :param use_timesteps: a collection (sequence or set) of timesteps from the
 79 |                           original diffusion process to retain.
 80 |     :param kwargs: the kwargs to create the base diffusion process.
 81 |     """
 82 | 
 83 |     def __init__(self, use_timesteps, **kwargs):
 84 |         self.use_timesteps = set(use_timesteps)
 85 |         self.timestep_map = []
 86 |         self.original_num_steps = len(kwargs["betas"])
 87 | 
 88 |         base_diffusion = GaussianDiffusion(**kwargs)  # pylint: disable=missing-kwoa
 89 |         last_alpha_cumprod = 1.0
 90 |         new_betas = []
 91 |         for i, alpha_cumprod in enumerate(base_diffusion.alphas_cumprod):
 92 |             if i in self.use_timesteps:
 93 |                 new_betas.append(1 - alpha_cumprod / last_alpha_cumprod)
 94 |                 last_alpha_cumprod = alpha_cumprod
 95 |                 self.timestep_map.append(i)
 96 |         kwargs["betas"] = np.array(new_betas)
 97 |         super().__init__(**kwargs)
 98 | 
 99 |     def p_mean_variance(
100 |         self, model, *args, **kwargs
101 |     ):  # pylint: disable=signature-differs
102 |         return super().p_mean_variance(self._wrap_model(model), *args, **kwargs)
103 | 
104 |     def training_losses(
105 |         self, model, *args, **kwargs
106 |     ):  # pylint: disable=signature-differs
107 |         return super().training_losses(self._wrap_model(model), *args, **kwargs)
108 | 
109 |     def _wrap_model(self, model):
110 |         if isinstance(model, _WrappedModel):
111 |             return model
112 |         return _WrappedModel(
113 |             model, self.timestep_map, self.rescale_timesteps, self.original_num_steps
114 |         )
115 | 
116 |     def _scale_timesteps(self, t):
117 |         # Scaling is done by the wrapped model.
118 |         return t
119 | 
120 | 
121 | class _WrappedModel:
122 |     def __init__(self, model, timestep_map, rescale_timesteps, original_num_steps):
123 |         self.model = model
124 |         self.timestep_map = timestep_map
125 |         self.rescale_timesteps = rescale_timesteps
126 |         self.original_num_steps = original_num_steps
127 | 
128 |     def __call__(self, x, ts, **kwargs):
129 |         map_tensor = torch.tensor(self.timestep_map, device=ts.device, dtype=ts.dtype)
130 |         new_ts = map_tensor[ts]
131 |         if self.rescale_timesteps:
132 |             new_ts = new_ts.float() * (1000.0 / self.original_num_steps)
133 |         return self.model(x, new_ts, **kwargs)
134 | 


--------------------------------------------------------------------------------
/kandinsky2/model/text2im_model.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from copy import copy
  5 | from .nn import timestep_embedding
  6 | from .unet import UNetModel
  7 | import math
  8 | from abc import abstractmethod
  9 | from .fp16_util import convert_module_to_f16, convert_module_to_f32
 10 | from .text_encoders import AttentionPooling
 11 | 
 12 | 
 13 | class Text2ImUNet(UNetModel):
 14 |     def __init__(
 15 |         self,
 16 |         model_dim,
 17 |         text_encoder_in_dim1=1024,
 18 |         text_encoder_in_dim2=640,
 19 |         pooling_type="attention_pooling",  # ['from_model', 'attention_pooling']
 20 |         *args,
 21 |         cache_text_emb=True,
 22 |         **kwargs,
 23 |     ):
 24 |         self.model_dim = model_dim
 25 |         super().__init__(*args, **kwargs, encoder_channels=model_dim)
 26 |         self.pooling_type = pooling_type
 27 | 
 28 |         self.to_model_dim = nn.Linear(text_encoder_in_dim1, model_dim)
 29 | 
 30 |         if self.pooling_type == "from_model":
 31 |             self.proj = nn.Linear(text_encoder_in_dim2, self.model_channels * 4)
 32 |         elif self.pooling_type == "attention_pooling":
 33 |             self.proj = AttentionPooling(
 34 |                 8, text_encoder_in_dim2, self.model_channels * 4
 35 |             )
 36 |         self.proj2 = AttentionPooling(8, 512, self.model_channels * 4)
 37 |         self.to_model_dim2 = nn.Linear(512, model_dim)
 38 |         self.ln_model1 = nn.LayerNorm(model_dim)
 39 |         self.ln_model2 = nn.LayerNorm(self.model_channels * 4)
 40 |         self.ln_model3 = nn.LayerNorm(self.model_channels * 4)
 41 |         self.cache_text_emb = cache_text_emb
 42 |         self.cache = None
 43 | 
 44 |     def convert_to_fp16(self):
 45 |         super().convert_to_fp16()
 46 |         self.proj.to(torch.float16)
 47 |         self.to_model_dim.to(torch.float16)
 48 |         self.to_model_dim2.to(torch.float16)
 49 |         self.proj2.to(torch.float16)
 50 |         self.ln_model1.to(torch.float16)
 51 |         self.ln_model2.to(torch.float16)
 52 |         self.ln_model3.to(torch.float16)
 53 | 
 54 |     def get_text_emb(
 55 |         self, full_emb1=None, pooled_emb1=None, full_emb2=None, pooled_emb2=None
 56 |     ):
 57 |         if self.cache is not None and self.cache_text_emb:
 58 |             return self.cache
 59 |         if self.pooling_type == "from_model":
 60 |             xf_proj = self.proj(pooled_emb1)
 61 |         elif self.pooling_type == "attention_pooling":
 62 |             xf_proj = self.proj(full_emb1)
 63 |         xf_proj = self.ln_model2(xf_proj)
 64 |         pooled_emb2 = self.ln_model3(self.proj2(full_emb2))
 65 |         xf_proj += pooled_emb2
 66 |         xf_out = self.ln_model1(
 67 |             torch.cat(
 68 |                 [self.to_model_dim(full_emb1), self.to_model_dim2(full_emb2)], dim=1
 69 |             )
 70 |         )
 71 | 
 72 |         xf_out = xf_out.permute(0, 2, 1)  # NLC -> NCL
 73 |         outputs = dict(xf_proj=xf_proj, xf_out=xf_out)
 74 | 
 75 |         if self.cache_text_emb:
 76 |             self.cache = outputs
 77 |         return outputs
 78 | 
 79 |     def del_cache(self):
 80 |         self.cache = None
 81 | 
 82 |     def forward(
 83 |         self,
 84 |         x,
 85 |         timesteps,
 86 |         full_emb1=None,
 87 |         pooled_emb1=None,
 88 |         full_emb2=None,
 89 |         pooled_emb2=None,
 90 |     ):
 91 |         hs = []
 92 |         emb = self.time_embed(timestep_embedding(timesteps, self.model_channels))
 93 |         text_outputs = self.get_text_emb(
 94 |             full_emb1=full_emb1,
 95 |             pooled_emb1=pooled_emb1,
 96 |             full_emb2=full_emb2,
 97 |             pooled_emb2=pooled_emb2,
 98 |         )
 99 |         xf_proj, xf_out = text_outputs["xf_proj"], text_outputs["xf_out"]
100 |         emb = emb + xf_proj.to(emb)
101 |         h = x.type(self.dtype)
102 |         for module in self.input_blocks:
103 |             h = module(h, emb, xf_out)
104 |             hs.append(h)
105 |         h = self.middle_block(h, emb, xf_out)
106 |         for module in self.output_blocks:
107 |             h = torch.cat([h, hs.pop()], dim=1)
108 |             h = module(h, emb, xf_out)
109 |         h = h.type(torch.float32)
110 |         h = self.out(h)
111 |         return h
112 | 
113 | 
114 | class InpaintText2ImUNet(Text2ImUNet):
115 |     """
116 |     A text2im model which can perform inpainting.
117 |     """
118 | 
119 |     def __init__(self, *args, **kwargs):
120 |         if "in_channels" in kwargs:
121 |             kwargs = dict(kwargs)
122 |             kwargs["in_channels"] = kwargs["in_channels"] * 2 + 1
123 |         else:
124 |             args = list(args)
125 |             args[1] = args[1] * 2 + 1
126 |         super().__init__(*args, **kwargs)
127 | 
128 |     def forward(self, x, timesteps, inpaint_image=None, inpaint_mask=None, **kwargs):
129 |         if inpaint_image is None:
130 |             inpaint_image = torch.zeros_like(x)
131 |         if inpaint_mask is None:
132 |             inpaint_mask = torch.zeros_like(x[:, :1])
133 |         return super().forward(
134 |             torch.cat([x, inpaint_image * inpaint_mask, inpaint_mask], dim=1),
135 |             timesteps,
136 |             **kwargs,
137 |         )
138 | 


--------------------------------------------------------------------------------
/kandinsky2/model/text2im_model2_1.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | from copy import copy
  5 | from .nn import timestep_embedding
  6 | from .unet import UNetModel
  7 | import math
  8 | from abc import abstractmethod
  9 | from .fp16_util import convert_module_to_f16, convert_module_to_f32
 10 | from .text_encoders import AttentionPooling
 11 | 
 12 | 
 13 | class Text2ImUNet(UNetModel):
 14 |     def __init__(
 15 |         self,
 16 |         model_dim,
 17 |         image_encoder_in_dim=768,
 18 |         text_encoder_in_dim1=1024,
 19 |         text_encoder_in_dim2=768,
 20 |         num_image_embs=10,
 21 |         pooling_type="attention_pooling",  # ['from_model', 'attention_pooling']
 22 |         *args,
 23 |         cache_text_emb=True,
 24 |         **kwargs,
 25 |     ):
 26 |         self.model_dim = model_dim
 27 |         super().__init__(*args, **kwargs, encoder_channels=model_dim)
 28 |         self.pooling_type = pooling_type
 29 | 
 30 |         self.num_image_embs = num_image_embs
 31 |         self.clip_to_seq = nn.Linear(
 32 |             image_encoder_in_dim, model_dim * self.num_image_embs
 33 |         )
 34 | 
 35 |         self.to_model_dim_n = nn.Linear(text_encoder_in_dim1, model_dim)
 36 | 
 37 |         if self.pooling_type == "from_model":
 38 |             self.proj_n = nn.Linear(text_encoder_in_dim2, self.model_channels * 4)
 39 |         elif self.pooling_type == "attention_pooling":
 40 |             self.proj_n = AttentionPooling(
 41 |                 8, text_encoder_in_dim1, self.model_channels * 4
 42 |             )
 43 |         self.ln_model_n = nn.LayerNorm(self.model_channels * 4)
 44 |         self.img_layer = nn.Linear(image_encoder_in_dim, self.model_channels * 4)
 45 |         self.cache_text_emb = cache_text_emb
 46 |         self.cache = None
 47 |         self.model_dim = model_dim
 48 | 
 49 |     def convert_to_fp16(self):
 50 |         super().convert_to_fp16()
 51 |         self.clip_to_seq.to(torch.float16)
 52 |         self.proj_n.to(torch.float16)
 53 |         self.to_model_dim_n.to(torch.float16)
 54 |         self.ln_model_n.to(torch.float16)
 55 |         self.img_layer.to(torch.float16)
 56 | 
 57 |     def get_text_emb(self, full_emb=None, pooled_emb=None, image_emb=None):
 58 |         if self.cache is not None and self.cache_text_emb:
 59 |             return self.cache
 60 | 
 61 |         clip_seq = self.clip_to_seq(image_emb).reshape(
 62 |             image_emb.shape[0], self.num_image_embs, self.model_dim
 63 |         )
 64 | 
 65 |         if self.pooling_type == "from_model":
 66 |             xf_proj = self.proj_n(pooled_emb)
 67 |         elif self.pooling_type == "attention_pooling":
 68 |             xf_proj = self.proj_n(full_emb)
 69 | 
 70 |         xf_proj = self.ln_model_n(xf_proj)
 71 |         if image_emb is not None:
 72 |             xf_proj = xf_proj + self.img_layer(image_emb)
 73 |         xf_out = torch.cat((clip_seq, self.to_model_dim_n(full_emb)), dim=1)
 74 | 
 75 |         xf_out = xf_out.permute(0, 2, 1)  # NLC -> NCL
 76 |         outputs = dict(xf_proj=xf_proj, xf_out=xf_out)
 77 | 
 78 |         if self.cache_text_emb:
 79 |             self.cache = outputs
 80 |         return outputs
 81 | 
 82 |     def del_cache(self):
 83 |         self.cache = None
 84 | 
 85 |     def forward(self, x, timesteps, full_emb=None, pooled_emb=None, image_emb=None):
 86 |         hs = []
 87 |         emb = self.time_embed(timestep_embedding(timesteps, self.model_channels))
 88 |         text_outputs = self.get_text_emb(
 89 |             full_emb=full_emb, pooled_emb=pooled_emb, image_emb=image_emb
 90 |         )
 91 |         xf_proj, xf_out = text_outputs["xf_proj"], text_outputs["xf_out"]
 92 |         emb = emb + xf_proj.to(emb)
 93 |         h = x.type(self.dtype)
 94 |         for module in self.input_blocks:
 95 |             h = module(h, emb, xf_out)
 96 |             hs.append(h)
 97 |         h = self.middle_block(h, emb, xf_out)
 98 |         for module in self.output_blocks:
 99 |             h = torch.cat([h, hs.pop()], dim=1)
100 |             h = module(h, emb, xf_out)
101 |         h = h.type(x.dtype)
102 |         h = self.out(h)
103 |         return h
104 | 
105 | 
106 | class SuperResText2ImUNet(Text2ImUNet):
107 |     """
108 |     A text2im model that performs super-resolution.
109 |     Expects an extra kwarg `low_res` to condition on a low-resolution image.
110 |     """
111 | 
112 |     def __init__(self, *args, **kwargs):
113 |         if "in_channels" in kwargs:
114 |             kwargs = dict(kwargs)
115 |             kwargs["in_channels"] = kwargs["in_channels"] * 2
116 |         else:
117 |             # Curse you, Python. Or really, just curse positional arguments :|.
118 |             args = list(args)
119 |             args[1] = args[1] * 2
120 |         super().__init__(*args, **kwargs)
121 | 
122 |     def forward(self, x, timesteps, low_res=None, **kwargs):
123 |         _, _, new_height, new_width = x.shape
124 |         upsampled = F.interpolate(
125 |             low_res, (new_height, new_width), mode="bilinear", align_corners=False
126 |         )
127 |         x = torch.cat([x, upsampled], dim=1)
128 |         return super().forward(x, timesteps, **kwargs)
129 | 
130 | 
131 | class InpaintText2ImUNet(Text2ImUNet):
132 |     """
133 |     A text2im model which can perform inpainting.
134 |     """
135 | 
136 |     def __init__(self, *args, **kwargs):
137 |         if "in_channels" in kwargs:
138 |             kwargs = dict(kwargs)
139 |             kwargs["in_channels"] = kwargs["in_channels"] * 2 + 1
140 |         else:
141 |             # Curse you, Python. Or really, just curse positional arguments :|.
142 |             args = list(args)
143 |             args[1] = args[1] * 2 + 1
144 |         super().__init__(*args, **kwargs)
145 | 
146 |     def forward(self, x, timesteps, inpaint_image=None, inpaint_mask=None, **kwargs):
147 |         if inpaint_image is None:
148 |             inpaint_image = torch.zeros_like(x)
149 |         if inpaint_mask is None:
150 |             inpaint_mask = torch.zeros_like(x[:, :1])
151 |         return super().forward(
152 |             torch.cat([x, inpaint_image * inpaint_mask, inpaint_mask], dim=1),
153 |             timesteps,
154 |             **kwargs,
155 |         )
156 | 


--------------------------------------------------------------------------------
/kandinsky2/model/text_encoders.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import math
  5 | from transformers import (
  6 |     T5EncoderModel,
  7 |     MT5EncoderModel,
  8 |     BertModel,
  9 |     XLMRobertaModel,
 10 |     AutoConfig,
 11 |     XLMRobertaModel,
 12 | )
 13 | import transformers
 14 | import os
 15 | 
 16 | 
 17 | def attention(q, k, v, d_k):
 18 |     scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(d_k)
 19 |     scores = F.softmax(scores, dim=-1)
 20 |     output = torch.matmul(scores, v)
 21 |     return output
 22 | 
 23 | 
 24 | class AttentionPooling(nn.Module):
 25 |     def __init__(
 26 |         self,
 27 |         heads,
 28 |         in_dim,
 29 |         out_dim,
 30 |     ):
 31 |         super().__init__()
 32 | 
 33 |         self.in_dim = in_dim
 34 |         self.d_k = in_dim // heads
 35 |         self.h = heads
 36 | 
 37 |         self.q_linear = nn.Linear(in_dim, in_dim)
 38 |         self.v_linear = nn.Linear(in_dim, in_dim)
 39 |         self.k_linear = nn.Linear(in_dim, in_dim)
 40 |         self.out = nn.Linear(in_dim, out_dim)
 41 | 
 42 |     def forward(self, x):
 43 |         bs = x.size(0)
 44 | 
 45 |         k = self.k_linear(x).view(bs, -1, self.h, self.d_k)
 46 |         q = self.q_linear(x).view(bs, -1, self.h, self.d_k)
 47 |         v = self.v_linear(x).view(bs, -1, self.h, self.d_k)
 48 | 
 49 |         k = k.transpose(1, 2)
 50 |         q = q.transpose(1, 2)
 51 |         v = v.transpose(1, 2)
 52 |         scores = attention(q, k, v, self.d_k)
 53 | 
 54 |         concat = scores.transpose(1, 2).contiguous().view(bs, -1, self.in_dim)
 55 | 
 56 |         output = self.out(concat)
 57 | 
 58 |         return output[:, 0]
 59 | 
 60 | 
 61 | class ImagenCLIP(nn.Module):
 62 |     def __init__(self):
 63 |         super().__init__()
 64 |         transformer_width = 768
 65 |         embed_dim = 768
 66 |         transformer_layers = 12
 67 |         transformer_heads = transformer_width // 64
 68 |         vocab_size = 49408
 69 |         self.context_length = 77
 70 |         self.transformer = clip.model.Transformer(
 71 |             width=transformer_width,
 72 |             layers=transformer_layers,
 73 |             heads=transformer_heads,
 74 |             attn_mask=self.build_attention_mask(),
 75 |         )
 76 |         self.token_embedding = nn.Embedding(vocab_size, transformer_width)
 77 |         self.positional_embedding = nn.Parameter(
 78 |             torch.empty(self.context_length, transformer_width)
 79 |         )
 80 |         self.ln_final = clip.model.LayerNorm(transformer_width)
 81 |         self.text_projection = nn.Parameter(torch.empty(transformer_width, embed_dim))
 82 | 
 83 |     @property
 84 |     def dtype(self):
 85 |         return self.transformer.resblocks[0].attn.out_proj.weight.dtype
 86 | 
 87 |     def build_attention_mask(self):
 88 |         # lazily create causal attention mask, with full attention between the vision tokens
 89 |         # pytorch uses additive attention mask; fill with -inf
 90 |         mask = torch.empty(self.context_length, self.context_length)
 91 |         mask.fill_(float("-inf"))
 92 |         mask.triu_(1)  # zero out the lower diagonal
 93 |         return mask
 94 | 
 95 |     def forward(self, text, mask=None):
 96 |         x = self.token_embedding(text).type(self.dtype)
 97 |         x = x + self.positional_embedding.type(self.dtype)
 98 |         x = x.permute(1, 0, 2)
 99 |         x = self.transformer(x)
100 |         x = x.permute(1, 0, 2)
101 |         x = self.ln_final(x).type(self.dtype)
102 |         pooled_out = (
103 |             x[torch.arange(x.shape[0]), text.argmax(dim=-1)] @ self.text_projection
104 |         )
105 |         return x, pooled_out
106 | 
107 | 
108 | class MultilingualCLIP(nn.Module):
109 |     def __init__(self, config, in_features=1024, out_features=640):
110 |         super().__init__()
111 |         loaded_config = AutoConfig.from_pretrained(config)
112 |         self.transformer = XLMRobertaModel(loaded_config)
113 |         self.LinearTransformation = torch.nn.Linear(
114 |             in_features=in_features, out_features=out_features
115 |         )
116 | 
117 |     def forward(self, input_ids, attention_mask):
118 |         embs = self.transformer(input_ids=input_ids, attention_mask=attention_mask)[0]
119 |         embs2 = (embs * attention_mask.unsqueeze(2)).sum(dim=1) / attention_mask.sum(
120 |             dim=1
121 |         )[:, None]
122 |         return self.LinearTransformation(embs2), embs
123 | 
124 | 
125 | class TextEncoder(nn.Module):
126 |     def __init__(self, model_path, model_name, **kwargs):
127 |         super().__init__()
128 |         self.model_name = model_name
129 |         if self.model_name == "clip":
130 |             self.model = ImagenCLIP()
131 |             self.model.load_state_dict(torch.load(model_path))
132 |         elif self.model_name == "T5EncoderModel":
133 |             self.model = T5EncoderModel.from_pretrained(model_path)
134 |         elif self.model_name == "MT5EncoderModel":
135 |             self.model = MT5EncoderModel.from_pretrained(model_path)
136 |         elif self.model_name == "BertModel":
137 |             self.model = BertModel.from_pretrained(model_path)
138 |         elif self.model_name == "multiclip":
139 |             self.model = MultilingualCLIP(model_path, **kwargs)
140 |             self.model.load_state_dict(
141 |                 torch.load(os.path.join(model_path, "pytorch_model.bin")), strict=False
142 |             )
143 |         elif self.model_name == "xlm_roberta":
144 |             self.model = XLMRobertaModel.from_pretrained(model_path).half()
145 |         self.model.eval()
146 |         for param in self.model.parameters():
147 |             param.requires_grad = False
148 | 
149 |     def forward(self, tokens, mask=None):
150 |         if self.model_name == "clip":
151 |             full_out, pooled_out = self.model(tokens)
152 |         elif self.model_name in ["T5EncoderModel", "MT5EncoderModel"]:
153 |             pooled_out = None
154 |             full_out = self.model(input_ids=tokens, attention_mask=mask)[
155 |                 "last_hidden_state"
156 |             ]
157 |         elif self.model_name in ["BertModel"]:
158 |             out = self.model(input_ids=tokens, attention_mask=mask)
159 |             full_out, pooled_out = out["last_hidden_state"], out["pooler_output"]
160 |         elif self.model_name == "multiclip":
161 |             pooled_out, full_out = self.model(input_ids=tokens, attention_mask=mask)
162 |         elif self.model_name == "xlm_roberta":
163 |             pooled_out = None
164 |             full_out = self.model(input_ids=tokens, attention_mask=mask)[
165 |                 "last_hidden_state"
166 |             ].float()
167 |         return full_out, pooled_out
168 | 


--------------------------------------------------------------------------------
/kandinsky2/model/utils.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | from PIL import Image
 4 | import torch
 5 | import torch.nn as nn
 6 | import importlib
 7 | 
 8 | 
 9 | def _extract_into_tensor(arr, timesteps, broadcast_shape):
10 |     """
11 |     Extract values from a 1-D numpy array for a batch of indices.
12 |     :param arr: the 1-D numpy array.
13 |     :param timesteps: a tensor of indices into the array to extract.
14 |     :param broadcast_shape: a larger shape of K dimensions with the batch
15 |                             dimension equal to the length of timesteps.
16 |     :return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
17 |     """
18 |     res = torch.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
19 |     while len(res.shape) < len(broadcast_shape):
20 |         res = res[..., None]
21 |     return res.expand(broadcast_shape)
22 | 
23 | 
24 | def get_named_beta_schedule(schedule_name, num_diffusion_timesteps):
25 |     """
26 |     Get a pre-defined beta schedule for the given name.
27 |     The beta schedule library consists of beta schedules which remain similar
28 |     in the limit of num_diffusion_timesteps.
29 |     Beta schedules may be added, but should not be removed or changed once
30 |     they are committed to maintain backwards compatibility.
31 |     """
32 |     if schedule_name == "linear":
33 |         # Linear schedule from Ho et al, extended to work for any number of
34 |         # diffusion steps.
35 |         scale = 1000 / num_diffusion_timesteps
36 |         beta_start = scale * 0.0001
37 |         beta_end = scale * 0.02
38 |         return np.linspace(
39 |             beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
40 |         )
41 |     elif schedule_name == "cosine":
42 |         return betas_for_alpha_bar(
43 |             num_diffusion_timesteps,
44 |             lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2,
45 |         )
46 |     else:
47 |         raise NotImplementedError(f"unknown beta schedule: {schedule_name}")
48 | 
49 | 
50 | def mean_flat(tensor):
51 |     """
52 |     Take the mean over all non-batch dimensions.
53 |     """
54 |     return tensor.mean(dim=list(range(1, len(tensor.shape))))
55 | 
56 | 
57 | def get_obj_from_str(string):
58 |     module, cls = string.rsplit(".", 1)
59 |     return getattr(importlib.import_module(module, package=None), cls)
60 | 
61 | 
62 | def dict_keys(d, keys):
63 |     d2 = {}
64 |     for i in keys:
65 |         d2[i] = d[i]
66 |     return d2
67 | 
68 | 
69 | def return_images(bath):
70 |     scaled = ((batch + 1) * 127.5).round().clamp(0, 255).to(torch.uint8).cpu()
71 | 


--------------------------------------------------------------------------------
/kandinsky2/train_utils/data/dataset_prior.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import pandas as pd
  4 | from tqdm import tqdm
  5 | from PIL import Image
  6 | import io
  7 | import re
  8 | import os
  9 | 
 10 | import torch
 11 | import sys, time
 12 | from torch.utils.data import Dataset, DataLoader
 13 | import pytorch_lightning as pl
 14 | from copy import deepcopy
 15 | 
 16 | from transformers import AutoTokenizer
 17 | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 18 | from tqdm import tqdm
 19 | from random import randint
 20 | from ...model.prior import CustomizedTokenizer
 21 | 
 22 | try:
 23 |     from torchvision.transforms import InterpolationMode
 24 | 
 25 |     BICUBIC = InterpolationMode.BICUBIC
 26 | except ImportError:
 27 |     BICUBIC = Image.BICUBIC
 28 | 
 29 | 
 30 | def _convert_image_to_rgb(image):
 31 |     return image.convert("RGB")
 32 | 
 33 | 
 34 | def _transform(n_px):
 35 |     return Compose(
 36 |         [
 37 |             Resize(n_px, interpolation=BICUBIC),
 38 |             CenterCrop(n_px),
 39 |             _convert_image_to_rgb,
 40 |             ToTensor(),
 41 |             Normalize(
 42 |                 (0.48145466, 0.4578275, 0.40821073),
 43 |                 (0.26862954, 0.26130258, 0.27577711),
 44 |             ),
 45 |         ]
 46 |     )
 47 | 
 48 | 
 49 | def center_crop(image):
 50 |     width, height = image.size
 51 |     new_size = min(width, height)
 52 |     left = (width - new_size) / 2
 53 |     top = (height - new_size) / 2
 54 |     right = (width + new_size) / 2
 55 |     bottom = (height + new_size) / 2
 56 |     return image.crop((left, top, right, bottom))
 57 | 
 58 | 
 59 | class TextImageDataset(Dataset):
 60 |     def __init__(
 61 |         self,
 62 |         df_path,
 63 |         clip_image_size=224,
 64 |         drop_text_prob=0.1,
 65 |         infinity=False,
 66 |     ):
 67 |         self.df = pd.read_csv(df_path)
 68 |         self.tokenizer = CustomizedTokenizer()
 69 |         self.transform1 = _transform(clip_image_size)
 70 |         self.drop_text_prob = drop_text_prob
 71 |         self.clip_image_size = clip_image_size
 72 |         self.infinity = infinity
 73 | 
 74 |     def __len__(self):
 75 |         if self.infinity:
 76 |             return 99999999
 77 |         else:
 78 |             return len(self.df)
 79 | 
 80 |     def __getitem__(self, item):
 81 |         if self.infinity:
 82 |             ind = randint(0, len(self.df) - 1)
 83 |         else:
 84 |             ind = item
 85 |         out_dict = {}
 86 |         image = Image.open(self.df["image_name"].iloc[ind])
 87 |         clip_image = self.transform1(image)
 88 |         if np.random.binomial(1, self.drop_text_prob):
 89 |             text = ""
 90 |         else:
 91 |             text = self.df["caption"].iloc[ind]
 92 |         out_dict["tokens"], out_dict["mask"] = self.tokenizer.padded_tokens_and_mask([text], 77)
 93 |         out_dict["tokens"] = out_dict["tokens"][0]
 94 |         out_dict["mask"] = out_dict["mask"][0]
 95 |         return clip_image, out_dict
 96 | 
 97 | 
 98 | def create_loader(batch_size, num_workers, shuffle=False, **dataset_params):
 99 |     dataset = TextImageDataset(**dataset_params)
100 |     return DataLoader(
101 |         dataset,
102 |         batch_size=batch_size,
103 |         num_workers=num_workers,
104 |         shuffle=shuffle,
105 |         pin_memory=True,
106 |     )
107 | 
108 | 
109 | class LightningDataModule(pl.LightningDataModule):
110 |     """PyTorch Lightning data class"""
111 | 
112 |     def __init__(self, train_config, val_config):
113 |         super().__init__()
114 |         self.train_config = train_config
115 |         self.val_config = val_config
116 | 
117 |     def train_dataloader(self):
118 |         return create_loader(**self.train_config)
119 | 
120 |     def test_dataloader(self):
121 |         return create_loader(**self.val_config)
122 | 
123 |     def val_dataloader(self):
124 |         return create_loader(**self.val_config)
125 | 


--------------------------------------------------------------------------------
/kandinsky2/train_utils/data/dataset_unclip_2_1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import random
  3 | import pandas as pd
  4 | from tqdm import tqdm
  5 | from PIL import Image
  6 | import io
  7 | import re
  8 | import os
  9 | 
 10 | import torch
 11 | import sys, time
 12 | from torch.utils.data import Dataset, DataLoader
 13 | import pytorch_lightning as pl
 14 | from copy import deepcopy
 15 | 
 16 | from transformers import AutoTokenizer
 17 | from torchvision.transforms import Compose, Resize, CenterCrop, ToTensor, Normalize
 18 | from tqdm import tqdm
 19 | from random import randint
 20 | 
 21 | try:
 22 |     from torchvision.transforms import InterpolationMode
 23 | 
 24 |     BICUBIC = InterpolationMode.BICUBIC
 25 | except ImportError:
 26 |     BICUBIC = Image.BICUBIC
 27 | 
 28 | 
 29 | def _convert_image_to_rgb(image):
 30 |     return image.convert("RGB")
 31 | 
 32 | 
 33 | def _transform(n_px):
 34 |     return Compose(
 35 |         [
 36 |             Resize(n_px, interpolation=BICUBIC),
 37 |             CenterCrop(n_px),
 38 |             _convert_image_to_rgb,
 39 |             ToTensor(),
 40 |             Normalize(
 41 |                 (0.48145466, 0.4578275, 0.40821073),
 42 |                 (0.26862954, 0.26130258, 0.27577711),
 43 |             ),
 44 |         ]
 45 |     )
 46 | 
 47 | 
 48 | def center_crop(image):
 49 |     width, height = image.size
 50 |     new_size = min(width, height)
 51 |     left = (width - new_size) / 2
 52 |     top = (height - new_size) / 2
 53 |     right = (width + new_size) / 2
 54 |     bottom = (height + new_size) / 2
 55 |     return image.crop((left, top, right, bottom))
 56 | 
 57 | 
 58 | class TextImageDataset(Dataset):
 59 |     def __init__(
 60 |         self,
 61 |         df_path,
 62 |         tokenizer_name="M-CLIP/XLM-Roberta-Large-Vit-L-14",
 63 |         clip_image_size=224,
 64 |         seq_len=77,
 65 |         drop_text_prob=0.5,
 66 |         drop_image_prob=0.5,
 67 |         image_size=512,
 68 |         infinity=False,
 69 |     ):
 70 |         self.df = pd.read_csv(df_path)
 71 |         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_name)
 72 |         self.transform1 = _transform(clip_image_size)
 73 |         self.seq_len = seq_len
 74 |         self.drop_text_prob = drop_text_prob
 75 |         self.drop_image_prob = drop_image_prob
 76 |         self.image_size = image_size
 77 |         self.clip_image_size = clip_image_size
 78 |         self.infinity = infinity
 79 | 
 80 |     def __len__(self):
 81 |         if self.infinity:
 82 |             return 99999999
 83 |         else:
 84 |             return len(self.df)
 85 | 
 86 |     def __getitem__(self, item):
 87 |         if self.infinity:
 88 |             ind = randint(0, len(self.df) - 1)
 89 |         else:
 90 |             ind = item
 91 |         out_dict = {}
 92 |         image = Image.open(self.df["image_name"].iloc[ind])
 93 |         clip_image = self.transform1(deepcopy(image))
 94 |         image = center_crop(image)
 95 |         image = image.resize(
 96 |             (self.image_size, self.image_size), resample=Image.BICUBIC, reducing_gap=1
 97 |         )
 98 |         image = np.array(image.convert("RGB"))
 99 |         image = image.astype(np.float32) / 127.5 - 1
100 |         if np.random.binomial(1, self.drop_text_prob):
101 |             text = ""
102 |         else:
103 |             text = self.df["caption"].iloc[ind]
104 |         text_encoding = self.tokenizer(
105 |             text,
106 |             max_length=self.seq_len,
107 |             padding="max_length",
108 |             truncation=True,
109 |             return_attention_mask=True,
110 |             add_special_tokens=True,
111 |             return_tensors="pt",
112 |         )
113 | 
114 |         out_dict["tokens"] = text_encoding["input_ids"][0]
115 |         out_dict["mask"] = text_encoding["attention_mask"][0]
116 |         if np.random.binomial(1, self.drop_image_prob):
117 |             out_dict["clip_image"] = torch.zeros(
118 |                 3, self.clip_image_size, self.clip_image_size
119 |             )
120 |         else:
121 |             out_dict["clip_image"] = clip_image
122 |         return np.transpose(image, [2, 0, 1]), out_dict
123 | 
124 | 
125 | def create_loader(batch_size, num_workers, shuffle=False, **dataset_params):
126 |     dataset = TextImageDataset(**dataset_params)
127 |     return DataLoader(
128 |         dataset,
129 |         batch_size=batch_size,
130 |         num_workers=num_workers,
131 |         shuffle=shuffle,
132 |         pin_memory=True,
133 |     )
134 | 
135 | 
136 | class LightningDataModule(pl.LightningDataModule):
137 |     """PyTorch Lightning data class"""
138 | 
139 |     def __init__(self, train_config, val_config):
140 |         super().__init__()
141 |         self.train_config = train_config
142 |         self.val_config = val_config
143 | 
144 |     def train_dataloader(self):
145 |         return create_loader(**self.train_config)
146 | 
147 |     def test_dataloader(self):
148 |         return create_loader(**self.val_config)
149 | 
150 |     def val_dataloader(self):
151 |         return create_loader(**self.val_config)
152 | 


--------------------------------------------------------------------------------
/kandinsky2/train_utils/ema.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torch import nn
 3 | 
 4 | 
 5 | class EMA(nn.Module):
 6 |     def __init__(self, model, decay=0.9999, use_num_updates=True):
 7 |         super().__init__()
 8 |         if decay < 0.0 or decay > 1.0:
 9 |             raise ValueError("Decay must be between 0 and 1")
10 | 
11 |         self.m_name2s_name = {}
12 |         self.register_buffer("decay", torch.tensor(decay, dtype=torch.float32))
13 |         self.register_buffer(
14 |             "num_updates",
15 |             torch.tensor(0, dtype=torch.int)
16 |             if use_num_updates
17 |             else torch.tensor(-1, dtype=torch.int),
18 |         )
19 | 
20 |         for name, p in model.named_parameters():
21 |             if p.requires_grad:
22 |                 # remove as '.'-character is not allowed in buffers
23 |                 s_name = name.replace(".", "")
24 |                 self.m_name2s_name.update({name: s_name})
25 |                 self.register_buffer(s_name, p.clone().detach().data)
26 | 
27 |         self.collected_params = []
28 | 
29 |     def forward(self, model):
30 |         decay = self.decay
31 | 
32 |         if self.num_updates >= 0:
33 |             self.num_updates += 1
34 |             decay = min(self.decay, (1 + self.num_updates) / (10 + self.num_updates))
35 | 
36 |         one_minus_decay = 1.0 - decay
37 | 
38 |         with torch.no_grad():
39 |             m_param = dict(model.named_parameters())
40 |             shadow_params = dict(self.named_buffers())
41 | 
42 |             for key in m_param:
43 |                 if m_param[key].requires_grad:
44 |                     sname = self.m_name2s_name[key]
45 |                     shadow_params[sname] = shadow_params[sname].type_as(m_param[key])
46 |                     shadow_params[sname].sub_(
47 |                         one_minus_decay * (shadow_params[sname] - m_param[key])
48 |                     )
49 |                 else:
50 |                     assert not key in self.m_name2s_name
51 | 
52 |     def copy_to(self, model):
53 |         m_param = dict(model.named_parameters())
54 |         shadow_params = dict(self.named_buffers())
55 |         for key in m_param:
56 |             if m_param[key].requires_grad:
57 |                 m_param[key].data.copy_(shadow_params[self.m_name2s_name[key]].data)
58 |             else:
59 |                 assert not key in self.m_name2s_name
60 | 
61 |     def store(self, parameters):
62 |         self.collected_params = [param.clone() for param in parameters]
63 | 
64 |     def restore(self, parameters):
65 |         for c_param, param in zip(self.collected_params, parameters):
66 |             param.data.copy_(c_param.data)
67 | 


--------------------------------------------------------------------------------
/kandinsky2/train_utils/train_module_pl2_1.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import copy
  4 | import functools
  5 | import os
  6 | import numpy as np
  7 | import torch
  8 | from torch import nn
  9 | import pytorch_lightning as pl
 10 | 
 11 | from ..model.resample import UniformSampler
 12 | from ..vqgan.autoencoder import VQModelInterface, AutoencoderKL, MOVQ
 13 | from ..model.text_encoders import TextEncoder
 14 | from ..model.utils import get_obj_from_str
 15 | from .ema import EMA
 16 | from .utils import generate_mask, get_image_mask
 17 | import clip
 18 | 
 19 | 
 20 | class Decoder(pl.LightningModule):
 21 |     def __init__(
 22 |         self,
 23 |         unet,
 24 |         diffusion,
 25 |         ema_rate,
 26 |         optim_params,
 27 |         scheduler_params,
 28 |         image_enc_params,
 29 |         text_enc_params,
 30 |         clip_name,
 31 |         use_ema=False,
 32 |         inpainting=False,
 33 |     ):
 34 |         super().__init__()
 35 |         self.unet = unet
 36 |         self.diffusion = diffusion
 37 |         self.image_enc_params = image_enc_params
 38 |         self.text_enc_params = text_enc_params
 39 |         self.ema_rate = ema_rate
 40 |         self.use_ema = use_ema
 41 |         self.schedule_sampler = UniformSampler(diffusion)
 42 |         self.inpainting = inpainting
 43 | 
 44 |         self.create_image_encoder()
 45 |         self.create_text_encoder()
 46 | 
 47 |         self.optim_params = optim_params
 48 |         self.scheduler_params = scheduler_params
 49 |         if use_ema:
 50 |             self.ema_params = EMA(
 51 |                 self.unet,
 52 |                 ema_rate,
 53 |             )
 54 | 
 55 |         self.clip_model, _ = clip.load(clip_name, device="cpu", jit=False)
 56 |         self.clip_model.transformer = None
 57 |         self.clip_model.positional_embedding = None
 58 |         self.clip_model.ln_final = None
 59 |         self.clip_model.token_embedding = None
 60 |         self.clip_model.text_projection = None
 61 | 
 62 |     def create_image_encoder(
 63 |         self,
 64 |     ):
 65 |         if self.image_enc_params is not None:
 66 |             self.use_image_enc = True
 67 |             self.scale = self.image_enc_params["scale"]
 68 |             self.image_enc_name = self.image_enc_params["name"]
 69 |             if self.image_enc_params["name"] == "AutoencoderKL":
 70 |                 self.image_encoder = AutoencoderKL(**self.image_enc_params["params"])
 71 |             elif self.image_enc_params["name"] == "VQModelInterface":
 72 |                 self.image_encoder = VQModelInterface(**self.image_enc_params["params"])
 73 |             elif self.image_enc_params["name"] == "MOVQ":
 74 |                 self.image_encoder = MOVQ(**self.image_enc_params["params"])
 75 |                 self.image_encoder.load_state_dict(
 76 |                     torch.load(self.image_enc_params["ckpt_path"])
 77 |                 )
 78 |             self.image_encoder.eval()
 79 |             for param in self.image_encoder.parameters():
 80 |                 param.requires_grad = False
 81 |         else:
 82 |             self.use_image_enc = False
 83 | 
 84 |     def create_text_encoder(
 85 |         self,
 86 |     ):
 87 |         if self.text_enc_params is not None:
 88 |             self.use_text_enc = True
 89 |             self.text_encoder = TextEncoder(**self.text_enc_params).eval().half()
 90 |         else:
 91 |             self.use_text_enc = False
 92 | 
 93 |     def configure_optimizers(self):
 94 |         optimizer = get_obj_from_str(self.optim_params["name"])(
 95 |             self.unet.parameters(), **self.optim_params["params"]
 96 |         )
 97 |         lr_scheduler = get_obj_from_str(self.scheduler_params["name"])(
 98 |             optimizer, **self.scheduler_params["params"]
 99 |         )
100 |         return [optimizer], {
101 |             "scheduler": lr_scheduler,
102 |             "interval": "step",
103 |             "frequency": 1,
104 |         }
105 | 
106 |     def prepare_image(self, batch):
107 |         if self.use_image_enc:
108 |             with torch.no_grad():
109 |                 if self.image_enc_name == "AutoencoderKL":
110 |                     batch = self.image_encoder.encode(batch).sample()
111 |                 elif self.image_enc_name == "VQModelInterface":
112 |                     batch = self.image_encoder.encode(batch)
113 |                 elif self.image_enc_name == "MOVQ":
114 |                     batch = self.image_encoder.encode(batch)
115 |                 batch = batch * self.scale
116 |         return batch
117 | 
118 |     def prepare_cond(self, cond):
119 |         if self.use_text_enc:
120 |             mask = None
121 |             new_cond = {}
122 |             for key in cond.keys():
123 |                 if key not in ["tokens", "mask", "clip_image"]:
124 |                     new_cond[key] = cond[key]
125 |             if "mask" in cond:
126 |                 mask = cond["mask"]
127 |             with torch.no_grad():
128 |                 new_cond["image_emb"] = self.clip_model.encode_image(
129 |                     cond["clip_image"]
130 |                 ).float()
131 |             with torch.no_grad():
132 |                 new_cond["full_emb"], new_cond["pooled_emb"] = self.text_encoder(
133 |                     cond["tokens"].long(), mask
134 |                 )
135 |             del cond
136 |             return new_cond
137 |         return cond
138 | 
139 |     def model_step(self, batch, stage):
140 |         image, cond = batch
141 |         image = self.prepare_image(image)
142 | 
143 |         if self.inpainting:
144 |             image_mask = get_image_mask(image.shape[0], image.shape[-2:])
145 |             image_mask = image_mask.to(image.device).unsqueeze(1).to(image.dtype)
146 |             # image_mask = 1. - image_mask
147 |             cond["inpaint_image"] = image * image_mask
148 |             cond["inpaint_mask"] = image_mask
149 | 
150 |         cond = self.prepare_cond(cond)
151 |         t, weights = self.schedule_sampler.sample(image.shape[0], image.device)
152 |         compute_losses = functools.partial(
153 |             self.diffusion.training_losses,
154 |             self.unet,
155 |             image,
156 |             t,
157 |             model_kwargs=cond,
158 |         )
159 |         losses = compute_losses()
160 |         loss = losses["loss"].mean()
161 |         self.log(f"{stage}_loss", loss.detach().cpu().item(), sync_dist=True)
162 | 
163 |         return loss
164 | 
165 |     def training_step(self, batch, batch_idx):
166 |         return self.model_step(batch, "train")
167 | 
168 |     def validation_step(self, batch, batch_idx):
169 |         return self.model_step(batch, "valid")
170 | 
171 |     def on_train_batch_end(self, *args, **kwargs):
172 |         if self.use_ema:
173 |             self.ema_params(self.unet)
174 | 


--------------------------------------------------------------------------------
/kandinsky2/train_utils/trainer_2_1_uclip.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import copy
 4 | import functools
 5 | import os
 6 | import numpy as np
 7 | import torch
 8 | from torch import nn
 9 | from tqdm import tqdm
10 | from ..vqgan.autoencoder import VQModelInterface, AutoencoderKL, MOVQ
11 | from .utils import generate_mask, get_image_mask
12 | import clip
13 | 
14 | def prepare_image(batch, image_encoder, scale=1):
15 |     with torch.no_grad():
16 |         batch = batch.half()
17 |         batch = image_encoder.encode(batch)
18 |         batch = batch * scale
19 |     return batch.float()
20 | 
21 | def prepare_cond(cond, text_encoder, clip_model):
22 |     mask = None
23 |     new_cond = {}
24 |     for key in cond.keys():
25 |         if key not in ['tokens', 'mask', 'clip_image']:
26 |             new_cond[key] = cond[key]
27 |     if 'mask' in cond:
28 |         mask = cond['mask']
29 |     with torch.no_grad():
30 |         new_cond['image_emb'] = clip_model.encode_image(cond['clip_image']).float()
31 |     with torch.no_grad():
32 |         new_cond['full_emb'], new_cond['pooled_emb'] = text_encoder(
33 |                     cond['tokens'].long(), mask)
34 |         new_cond['full_emb'] = new_cond['full_emb'].float()
35 |         new_cond['pooled_emb'] = new_cond['pooled_emb'].float()
36 |     del cond
37 |     return new_cond
38 | 
39 | def train_unclip(unet, diffusion, image_encoder,
40 |                   clip_model, text_encoder, optimizer,
41 |                   lr_scheduler=None, schedule_sampler=None, 
42 |                   train_loader=None, val_loader=None, scale=1,
43 |                   num_epochs=2, save_every=1000, save_name='model',
44 |                   save_path='',  inpainting=False, device='cuda:0'):
45 |     train_step = 0
46 |     
47 |     for epoch in range(num_epochs):
48 |         progress = tqdm(total=len(train_loader), desc='finetuning goes brrr')
49 |         for batch in train_loader:
50 |             optimizer.zero_grad()
51 |             image, cond = batch
52 |             image = image.to(device)
53 |             for key in cond.keys():
54 |                 cond[key] = cond[key].to(device)
55 |             image = prepare_image(image, image_encoder, scale=scale)
56 |             if inpainting:
57 |                 image_mask = get_image_mask(image.shape[0], image.shape[-2:])
58 |                 image_mask = image_mask.to(image.device).unsqueeze(1).to(image.dtype)
59 |                 image_mask = 1. - image_mask
60 |                 cond['inpaint_image'] = image * image_mask
61 |                 cond['inpaint_mask'] = image_mask
62 |             cond = prepare_cond(cond, text_encoder, clip_model)
63 |             t, weights = schedule_sampler.sample(image.shape[0], image.device)
64 |             compute_losses = functools.partial(
65 |                     diffusion.training_losses,
66 |                     unet,
67 |                     image,
68 |                     t,
69 |                     model_kwargs=cond,
70 |                 )
71 |             losses = compute_losses()
72 |             loss = losses["loss"].mean()
73 |             loss.backward()
74 |             optimizer.step()
75 |             if lr_scheduler is not None:
76 |                 lr_scheduler.step()
77 |             train_step += 1
78 |             if train_step % save_every == 0:
79 |                 torch.save(unet.state_dict(), os.path.join(save_path, save_name + str(train_step) + '.ckpt'))
80 |             progress.update()
81 |             progress.set_postfix({"loss": loss.item()})
82 |         


--------------------------------------------------------------------------------
/kandinsky2/train_utils/trainer_prior.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | 
 3 | import copy
 4 | import functools
 5 | import os
 6 | import numpy as np
 7 | import torch
 8 | from torch import nn
 9 | from tqdm import tqdm
10 | from .utils import generate_mask, get_image_mask
11 | import clip
12 | 
13 | def encode_text(tok, clip_model):
14 |     with torch.no_grad():
15 |         x = clip_model.token_embedding(tok).type(clip_model.dtype)
16 |         x = x + clip_model.positional_embedding.type(clip_model.dtype)
17 |         x = x.permute(1, 0, 2)
18 |         x = clip_model.transformer(x)
19 |         x = x.permute(1, 0, 2)
20 |         txt_feat_seq = x
21 |         txt_feat = x[torch.arange(x.shape[0]), tok.argmax(dim=-1)] @ clip_model.text_projection
22 |         txt_feat, txt_feat_seq = txt_feat.float(), txt_feat_seq.float()
23 |         return txt_feat, txt_feat_seq
24 | 
25 | def encode_image(image, clip_model, clip_mean, clip_std):
26 |     with torch.no_grad():
27 |         return (clip_model.encode_image(image).float() - clip_mean) / clip_std
28 | 
29 | def train_prior(model, diffusion,
30 |                   clip_model, optimizer,
31 |                   lr_scheduler=None, schedule_sampler=None, 
32 |                   train_loader=None, val_loader=None,
33 |                   num_epochs=2, save_every=1000, save_name='model',
34 |                   save_path='', device='cuda:0'):
35 |     train_step = 0
36 |     for epoch in range(num_epochs):
37 |         progress = tqdm(total=len(train_loader), desc='finetuning goes brrr')
38 |         for batch in train_loader:
39 |             optimizer.zero_grad()
40 |             image, cond = batch
41 |             image = image.to(device)
42 |             for key in cond.keys():
43 |                 cond[key] = cond[key].to(device)
44 |             image = encode_image(image, clip_model, model.clip_mean.to(device), model.clip_std.to(device))
45 |             txt_feat, txt_feat_seq = encode_text(cond['tokens'], clip_model)
46 |             cond = {
47 |             "text_emb": txt_feat,
48 |             "text_enc": txt_feat_seq,
49 |             "mask": cond['mask'],
50 |             "causal_mask": model.causal_mask,
51 |             }
52 |             t, weights = schedule_sampler.sample(image.shape[0], image.device)
53 |             compute_losses = functools.partial(
54 |                     diffusion.training_losses,
55 |                     model.model,
56 |                     image,
57 |                     t,
58 |                     model_kwargs=cond,
59 |                 )
60 |             losses = compute_losses()
61 |             loss = losses["loss"].mean()
62 |             loss.backward()
63 |             optimizer.step()
64 |             if lr_scheduler is not None:
65 |                 lr_scheduler.step()
66 |             train_step += 1
67 |             if train_step % save_every == 0:
68 |                 torch.save(model.state_dict(), os.path.join(save_path, save_name + str(train_step) + '.ckpt'))
69 |             progress.update()
70 |             progress.set_postfix({"loss": loss.item()})
71 |         


--------------------------------------------------------------------------------
/kandinsky2/train_utils/utils.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch as th
  4 | import torch.nn as nn
  5 | import numpy as np
  6 | from scipy.interpolate import interp1d
  7 | import importlib
  8 | from matplotlib.path import Path
  9 | 
 10 | 
 11 | def get_polygon_mask_params(
 12 |     mask_size, box, num_vertices, mask_scale, min_scale, max_scale
 13 | ):
 14 |     center = ((box[2] + box[0]) / 2, (box[3] + box[1]) / 2)
 15 |     sizes = (box[2] - box[0], box[3] - box[1])
 16 | 
 17 |     part_avg_radii = np.linspace(
 18 |         mask_scale * sizes[0] / 2, mask_scale * sizes[1] / 2, num_vertices // 4
 19 |     )
 20 |     part_avg_radii = np.clip(
 21 |         part_avg_radii, min_scale * min(mask_size), max_scale * min(mask_size)
 22 |     )
 23 |     avg_radii = np.concatenate(
 24 |         [
 25 |             part_avg_radii,
 26 |             part_avg_radii[::-1],
 27 |             part_avg_radii,
 28 |             part_avg_radii[::-1],
 29 |         ]
 30 |     )
 31 |     return center, avg_radii
 32 | 
 33 | 
 34 | def smooth_cerv(x, y):
 35 |     num_vertices = x.shape[0]
 36 |     x = np.concatenate((x[-3:-1], x, x[1:3]))
 37 |     y = np.concatenate((y[-3:-1], y, y[1:3]))
 38 |     t = np.arange(x.shape[0])
 39 | 
 40 |     ti = np.linspace(2, num_vertices + 1, 4 * num_vertices)
 41 |     xi = interp1d(t, x, kind="quadratic")(ti)
 42 |     yi = interp1d(t, y, kind="quadratic")(ti)
 43 |     return xi, yi
 44 | 
 45 | 
 46 | def get_polygon_mask(mask_size, mask_points):
 47 |     x, y = np.meshgrid(np.arange(mask_size[0]), np.arange(mask_size[1]))
 48 |     x, y = x.flatten(), y.flatten()
 49 |     points = np.vstack((x, y)).T
 50 | 
 51 |     path = Path(mask_points)
 52 |     grid = path.contains_points(points)
 53 |     grid = grid.reshape((mask_size[0], mask_size[1]))
 54 |     return 1.0 - grid.astype(np.int32)
 55 | 
 56 | 
 57 | def generate_polygon(
 58 |     mask_size, center, num_vertices, radii, radii_var, angle_var, smooth=True
 59 | ):
 60 |     angle_steps = np.random.uniform(
 61 |         1.0 - angle_var, 1.0 + angle_var, size=(num_vertices,)
 62 |     )
 63 |     angle_steps = 2 * np.pi * angle_steps / angle_steps.sum()
 64 | 
 65 |     radii = np.random.normal(radii, radii_var * radii)
 66 |     radii = np.clip(radii, 0, 2 * radii)
 67 |     angles = np.cumsum(angle_steps)
 68 |     x = center[0] + radii * np.cos(angles)
 69 |     y = center[1] + radii * np.sin(angles)
 70 | 
 71 |     if smooth:
 72 |         x, y = smooth_cerv(x, y)
 73 |     points = np.concatenate((x.reshape(-1, 1), y.reshape(-1, 1)), axis=-1)
 74 |     points = list(map(tuple, points.tolist()))
 75 |     return get_polygon_mask(mask_size, points)
 76 | 
 77 | 
 78 | def generate_circle_frame(mask_size, side_scales, num_vertices, radii_var, smooth=True):
 79 |     num_vertices_per_side = num_vertices // 4
 80 |     x_size, y_size = mask_size
 81 |     up_radii = np.array([y_size * (1.0 - side_scales[0]) // 2] * num_vertices_per_side)
 82 |     down_radii = np.array(
 83 |         [y_size * (1.0 - side_scales[1]) // 2] * num_vertices_per_side
 84 |     )
 85 |     left_radii = np.array(
 86 |         [x_size * (1.0 - side_scales[2]) // 2] * num_vertices_per_side
 87 |     )
 88 |     right_radii = np.array(
 89 |         [x_size * (1.0 - side_scales[3]) // 2] * num_vertices_per_side
 90 |     )
 91 | 
 92 |     center = (x_size // 2, y_size // 2)
 93 |     radii = np.concatenate(
 94 |         [
 95 |             right_radii[num_vertices_per_side // 2 :],
 96 |             down_radii,
 97 |             left_radii,
 98 |             up_radii,
 99 |             right_radii[: num_vertices_per_side // 2],
100 |         ]
101 |     )
102 |     return 1.0 - generate_polygon(
103 |         mask_size, center, num_vertices, radii, radii_var, 0.0, smooth=smooth
104 |     )
105 | 
106 | 
107 | def generate_square_frame(mask_size, side_scales, num_vertices, radii_var, smooth=True):
108 |     num_vertices_per_side = num_vertices // 4
109 |     x_size, y_size = mask_size
110 |     diag_size = np.sqrt(x_size**2 + y_size**2)
111 | 
112 |     up_radii = np.linspace(
113 |         diag_size * (1.0 - side_scales[0]) // 2,
114 |         y_size * (1.0 - side_scales[0]) // 2,
115 |         num_vertices_per_side // 2,
116 |     )
117 |     down_radii = np.linspace(
118 |         diag_size * (1.0 - side_scales[1]) // 2,
119 |         y_size * (1.0 - side_scales[1]) // 2,
120 |         num_vertices_per_side // 2,
121 |     )
122 |     left_radii = np.linspace(
123 |         diag_size * (1.0 - side_scales[2]) // 2,
124 |         x_size * (1.0 - side_scales[2]) // 2,
125 |         num_vertices_per_side // 2,
126 |     )
127 |     right_radii = np.linspace(
128 |         diag_size * (1.0 - side_scales[3]) // 2,
129 |         x_size * (1.0 - side_scales[3]) // 2,
130 |         num_vertices_per_side // 2,
131 |     )
132 | 
133 |     center = (x_size // 2, y_size // 2)
134 |     radii = np.concatenate(
135 |         [
136 |             right_radii[::-1],
137 |             down_radii,
138 |             down_radii[::-1],
139 |             left_radii,
140 |             left_radii[::-1],
141 |             up_radii,
142 |             up_radii[::-1],
143 |             right_radii,
144 |         ]
145 |     )
146 |     return 1.0 - generate_polygon(
147 |         mask_size, center, num_vertices, radii, radii_var, 0.0, smooth=smooth
148 |     )
149 | 
150 | 
151 | def generate_mask(mask_size, box, box_prob=0.1):
152 |     mask = np.ones(mask_size)
153 |     if np.random.binomial(1, box_prob):
154 |         box = [int(i) for i in box]
155 |         mask[box[1] : box[3], box[0] : box[2]] = 0
156 | 
157 |     else:
158 |         actions = np.random.randint(0, 2, (2,))
159 |         if 0 in actions:
160 |             num_vertices = 16
161 |             center, radii = get_polygon_mask_params(
162 |                 mask_size,
163 |                 box,
164 |                 num_vertices,
165 |                 mask_scale=1.5,
166 |                 min_scale=0.1,
167 |                 max_scale=0.6,
168 |             )
169 |             mask *= generate_polygon(
170 |                 mask_size, center, num_vertices, radii, radii_var=0.15, angle_var=0.15
171 |             )
172 |         if 1 in actions:
173 |             radii_var = 0.15 * np.random.random()
174 |             num_vertices = np.random.choice([16, 32])
175 |             if np.random.random() < 0.5:
176 |                 side_scales = 0.25 * np.random.random((4,)) + 0.05
177 |                 mask *= generate_square_frame(
178 |                     mask_size, side_scales, num_vertices, radii_var=radii_var
179 |                 )
180 |             else:
181 |                 side_scales = 0.15 * np.random.random((4,)) + 0.1
182 |                 mask *= generate_circle_frame(
183 |                     mask_size, side_scales, num_vertices, radii_var=radii_var
184 |                 )
185 |     return mask
186 | 
187 | 
188 | def get_boxes(bs, target_size, min_scale=0.1, max_scale=0.62):
189 |     min_size_x = min_scale * target_size[0]
190 |     max_size_x = max_scale * target_size[0]
191 |     min_size_y = min_scale * target_size[1]
192 |     max_size_y = max_scale * target_size[1]
193 | 
194 |     boxes_size_x = (max_size_x - min_size_x) * np.random.random((bs, 1)) + min_size_x
195 |     boxes_size_y = (max_size_y - min_size_y) * np.random.random((bs, 1)) + min_size_y
196 | 
197 |     x0 = (target_size[0] - max_size_x) * np.random.random((bs, 1))
198 |     y0 = (target_size[1] - max_size_y) * np.random.random((bs, 1))
199 | 
200 |     boxes = np.concatenate((x0, y0, x0 + boxes_size_x, y0 + boxes_size_x), -1)
201 |     return boxes.tolist()
202 | 
203 | 
204 | def get_image_mask(bs, target_size):
205 |     boxes = get_boxes(bs, target_size)
206 |     image_mask = torch.stack(
207 |         [torch.tensor(generate_mask(target_size, box)) for box in boxes]
208 |     )
209 |     return image_mask
210 | 
211 | 
212 | def freeze_decoder(
213 |     model,
214 |     freeze_resblocks=False,
215 |     freeze_attention=False,
216 | ):
217 |     for name, p in model.named_parameters():
218 |         name = name.lower()
219 |         if (
220 |             "in_layers" in name
221 |             or "h_upd" in name
222 |             or "x_upd" in name
223 |             or "emb_layers" in name
224 |             or "out_layers" in name
225 |         ):
226 |             p.requires_grad = not freeze_resblocks
227 |         elif "proj_out" in name or "qkv" in name:
228 |             p.requires_grad = not freeze_attention
229 |     return model
230 | 


--------------------------------------------------------------------------------
/kandinsky2/utils.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | from PIL import Image
 4 | import torch
 5 | import torch.nn as nn
 6 | import importlib
 7 | from .model.utils import get_named_beta_schedule, _extract_into_tensor
 8 | from copy import deepcopy
 9 | 
10 | 
11 | 
12 | def prepare_mask(mask):
13 |     mask = mask.float()[0]
14 |     old_mask = deepcopy(mask)
15 |     for i in range(mask.shape[1]):
16 |         for j in range(mask.shape[2]):
17 |             if old_mask[0][i][j] == 1:
18 |                 continue
19 |             if i != 0:
20 |                 mask[:, i - 1, j] = 0
21 |             if j != 0:
22 |                 mask[:, i, j - 1] = 0
23 |             if i != 0 and j != 0:
24 |                 mask[:, i - 1, j - 1] = 0
25 |             if i != mask.shape[1] - 1:
26 |                 mask[:, i + 1, j] = 0
27 |             if j != mask.shape[2] - 1:
28 |                 mask[:, i, j + 1] = 0
29 |             if i != mask.shape[1] - 1 and j != mask.shape[2] - 1:
30 |                 mask[:, i + 1, j + 1] = 0
31 |     return mask.unsqueeze(0)
32 | 
33 | 
34 | def prepare_image(pil_image, w=512, h=512):
35 |     pil_image = pil_image.resize((w, h), resample=Image.BICUBIC, reducing_gap=1)
36 |     arr = np.array(pil_image.convert("RGB"))
37 |     arr = arr.astype(np.float32) / 127.5 - 1
38 |     arr = np.transpose(arr, [2, 0, 1])
39 |     image = torch.from_numpy(arr).unsqueeze(0)
40 |     return image
41 | 
42 | 
43 | def q_sample(x_start, t, schedule_name="linear", num_steps=1000, noise=None):
44 |     betas = get_named_beta_schedule(schedule_name, num_steps)
45 |     alphas = 1.0 - betas
46 |     alphas_cumprod = np.cumprod(alphas, axis=0)
47 |     sqrt_alphas_cumprod = np.sqrt(alphas_cumprod)
48 |     sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - alphas_cumprod)
49 |     if noise is None:
50 |         noise = torch.randn_like(x_start)
51 |     assert noise.shape == x_start.shape
52 |     return (
53 |         _extract_into_tensor(sqrt_alphas_cumprod, t, x_start.shape) * x_start
54 |         + _extract_into_tensor(sqrt_one_minus_alphas_cumprod, t, x_start.shape) * noise
55 |     )
56 | 
57 | 
58 | def process_images(batch):
59 |     scaled = (
60 |         ((batch + 1) * 127.5)
61 |         .round()
62 |         .clamp(0, 255)
63 |         .to(torch.uint8)
64 |         .to("cpu")
65 |         .permute(0, 2, 3, 1)
66 |         .numpy()
67 |     )
68 |     images = []
69 |     for i in range(scaled.shape[0]):
70 |         images.append(Image.fromarray(scaled[i]))
71 |     return images
72 | 
73 | 


--------------------------------------------------------------------------------
/kandinsky2/vqgan/autoencoder.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | import pytorch_lightning as pl
  4 | import torch.nn.functional as F
  5 | from contextlib import contextmanager
  6 | 
  7 | from .quntize import VectorQuantizer
  8 | 
  9 | from .vqgan_blocks import Encoder, Decoder, DiagonalGaussianDistribution
 10 | from .movq_modules import MOVQDecoder
 11 | 
 12 | 
 13 | class VQModel(pl.LightningModule):
 14 |     def __init__(
 15 |         self,
 16 |         ddconfig,
 17 |         n_embed,
 18 |         embed_dim,
 19 |         ckpt_path=None,
 20 |         ignore_keys=[],
 21 |         scheduler_config=None,
 22 |         remap=None,
 23 |         sane_index_shape=False,  # tell vector quantizer to return indices as bhw
 24 |     ):
 25 |         super().__init__()
 26 |         self.embed_dim = embed_dim
 27 |         self.n_embed = n_embed
 28 |         self.encoder = Encoder(**ddconfig)
 29 |         self.decoder = Decoder(**ddconfig)
 30 |         self.quantize = VectorQuantizer(
 31 |             n_embed,
 32 |             embed_dim,
 33 |             beta=0.25,
 34 |             remap=remap,
 35 |             sane_index_shape=sane_index_shape,
 36 |         )
 37 |         self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
 38 |         self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
 39 | 
 40 |         if ckpt_path is not None:
 41 |             self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
 42 |         self.scheduler_config = scheduler_config
 43 | 
 44 |     def init_from_ckpt(self, path, ignore_keys=list()):
 45 |         sd = torch.load(path, map_location="cpu")["state_dict"]
 46 |         keys = list(sd.keys())
 47 |         for k in keys:
 48 |             for ik in ignore_keys:
 49 |                 if k.startswith(ik):
 50 |                     print("Deleting key {} from state_dict.".format(k))
 51 |                     del sd[k]
 52 |         missing, unexpected = self.load_state_dict(sd, strict=False)
 53 |         print(
 54 |             f"Restored from {path} with {len(missing)} missing and {len(unexpected)} unexpected keys"
 55 |         )
 56 |         if len(missing) > 0:
 57 |             print(f"Missing Keys: {missing}")
 58 |             print(f"Unexpected Keys: {unexpected}")
 59 | 
 60 |     def encode(self, x):
 61 |         h = self.encoder(x)
 62 |         h = self.quant_conv(h)
 63 |         quant, emb_loss, info = self.quantize(h)
 64 |         return quant, emb_loss, info
 65 | 
 66 |     def encode_to_prequant(self, x):
 67 |         h = self.encoder(x)
 68 |         h = self.quant_conv(h)
 69 |         return h
 70 | 
 71 |     def decode(self, quant):
 72 |         quant = self.post_quant_conv(quant)
 73 |         dec = self.decoder(quant)
 74 |         return dec
 75 | 
 76 |     def decode_code(self, code_b):
 77 |         quant_b = self.quantize.embed_code(code_b)
 78 |         dec = self.decode(quant_b)
 79 |         return dec
 80 | 
 81 |     def forward(self, input, return_pred_indices=False):
 82 |         quant, diff, (_, _, ind) = self.encode(input)
 83 |         dec = self.decode(quant)
 84 |         if return_pred_indices:
 85 |             return dec, diff, ind
 86 |         return dec, diff
 87 | 
 88 | 
 89 | class VQModelInterface(VQModel):
 90 |     def __init__(self, embed_dim, *args, **kwargs):
 91 |         super().__init__(embed_dim=embed_dim, *args, **kwargs)
 92 |         self.embed_dim = embed_dim
 93 | 
 94 |     def encode(self, x):
 95 |         h = self.encoder(x)
 96 |         h = self.quant_conv(h)
 97 |         return h
 98 | 
 99 |     def decode(self, h, force_not_quantize=False):
100 |         # also go through quantization layer
101 |         if not force_not_quantize:
102 |             quant, emb_loss, info = self.quantize(h)
103 |         else:
104 |             quant = h
105 |         quant = self.post_quant_conv(quant)
106 |         dec = self.decoder(quant)
107 |         return dec
108 | 
109 | 
110 | class AutoencoderKL(pl.LightningModule):
111 |     def __init__(
112 |         self,
113 |         ddconfig,
114 |         embed_dim,
115 |         ckpt_path=None,
116 |         ignore_keys=[],
117 |     ):
118 |         super().__init__()
119 |         self.encoder = Encoder(**ddconfig)
120 |         self.decoder = Decoder(**ddconfig)
121 |         assert ddconfig["double_z"]
122 |         self.quant_conv = torch.nn.Conv2d(2 * ddconfig["z_channels"], 2 * embed_dim, 1)
123 |         self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
124 |         self.embed_dim = embed_dim
125 |         if ckpt_path is not None:
126 |             self.init_from_ckpt(ckpt_path, ignore_keys=ignore_keys)
127 | 
128 |     def init_from_ckpt(self, path, ignore_keys=list()):
129 |         sd = torch.load(path, map_location="cpu")["state_dict"]
130 |         keys = list(sd.keys())
131 |         for k in keys:
132 |             for ik in ignore_keys:
133 |                 if k.startswith(ik):
134 |                     print("Deleting key {} from state_dict.".format(k))
135 |                     del sd[k]
136 |         self.load_state_dict(sd, strict=False)
137 |         print(f"Restored from {path}")
138 | 
139 |     def encode(self, x):
140 |         h = self.encoder(x)
141 |         moments = self.quant_conv(h)
142 |         posterior = DiagonalGaussianDistribution(moments)
143 |         return posterior
144 | 
145 |     def decode(self, z):
146 |         z = self.post_quant_conv(z)
147 |         dec = self.decoder(z)
148 |         return dec
149 | 
150 |     def forward(self, input, sample_posterior=True):
151 |         posterior = self.encode(input)
152 |         if sample_posterior:
153 |             z = posterior.sample()
154 |         else:
155 |             z = posterior.mode()
156 |         dec = self.decode(z)
157 |         return dec, posterior
158 | 
159 | 
160 | class MOVQ(nn.Module):
161 |     def __init__(
162 |         self,
163 |         ddconfig,
164 |         n_embed,
165 |         embed_dim,
166 |     ):
167 |         super().__init__()
168 |         self.encoder = Encoder(**ddconfig)
169 |         self.decoder = MOVQDecoder(zq_ch=embed_dim, **ddconfig)
170 |         self.quantize = VectorQuantizer(
171 |             n_embed, embed_dim, beta=0.25, remap=None, sane_index_shape=False
172 |         )
173 |         self.quant_conv = torch.nn.Conv2d(ddconfig["z_channels"], embed_dim, 1)
174 |         self.post_quant_conv = torch.nn.Conv2d(embed_dim, ddconfig["z_channels"], 1)
175 | 
176 |     def encode(self, x):
177 |         h = self.encoder(x)
178 |         h = self.quant_conv(h)
179 |         # quant, emb_loss, info = self.quantize(h)
180 |         return h
181 | 
182 |     def decode(self, quant):
183 |         quant2 = self.post_quant_conv(quant)
184 |         dec = self.decoder(quant2, quant)
185 |         return dec
186 | 
187 |     def decode_code(self, code_b):
188 |         batch_size = code_b.shape[0]
189 |         quant = self.quantize.embedding(code_b.flatten())
190 |         grid_size = int((quant.shape[0] // batch_size) ** 0.5)
191 |         quant = quant.view((1, 32, 32, 4))
192 |         quant = rearrange(quant, "b h w c -> b c h w").contiguous()
193 |         print(quant.shape)
194 |         quant2 = self.post_quant_conv(quant)
195 |         dec = self.decoder(quant2, quant)
196 |         return dec
197 | 
198 |     def forward(self, input):
199 |         quant, diff, _ = self.encode(input)
200 |         dec = self.decode(quant)
201 |         return dec, diff
202 | 


--------------------------------------------------------------------------------
/kandinsky2/vqgan/quntize.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import torch.nn.functional as F
  4 | import numpy as np
  5 | from torch import einsum
  6 | from einops import rearrange
  7 | 
  8 | 
  9 | class VectorQuantizer(nn.Module):
 10 |     """
 11 |     Improved version over VectorQuantizer, can be used as a drop-in replacement. Mostly
 12 |     avoids costly matrix multiplications and allows for post-hoc remapping of indices.
 13 |     """
 14 | 
 15 |     # NOTE: due to a bug the beta term was applied to the wrong term. for
 16 |     # backwards compatibility we use the buggy version by default, but you can
 17 |     # specify legacy=False to fix it.
 18 |     def __init__(
 19 |         self,
 20 |         n_e,
 21 |         e_dim,
 22 |         beta,
 23 |         remap=None,
 24 |         unknown_index="random",
 25 |         sane_index_shape=False,
 26 |         legacy=True,
 27 |     ):
 28 |         super().__init__()
 29 |         self.n_e = n_e
 30 |         self.e_dim = e_dim
 31 |         self.beta = beta
 32 |         self.legacy = legacy
 33 | 
 34 |         self.embedding = nn.Embedding(self.n_e, self.e_dim)
 35 |         self.embedding.weight.data.uniform_(-1.0 / self.n_e, 1.0 / self.n_e)
 36 | 
 37 |         self.remap = remap
 38 |         if self.remap is not None:
 39 |             self.register_buffer("used", torch.tensor(np.load(self.remap)))
 40 |             self.re_embed = self.used.shape[0]
 41 |             self.unknown_index = unknown_index  # "random" or "extra" or integer
 42 |             if self.unknown_index == "extra":
 43 |                 self.unknown_index = self.re_embed
 44 |                 self.re_embed = self.re_embed + 1
 45 |             print(
 46 |                 f"Remapping {self.n_e} indices to {self.re_embed} indices. "
 47 |                 f"Using {self.unknown_index} for unknown indices."
 48 |             )
 49 |         else:
 50 |             self.re_embed = n_e
 51 | 
 52 |         self.sane_index_shape = sane_index_shape
 53 | 
 54 |     def remap_to_used(self, inds):
 55 |         ishape = inds.shape
 56 |         assert len(ishape) > 1
 57 |         inds = inds.reshape(ishape[0], -1)
 58 |         used = self.used.to(inds)
 59 |         match = (inds[:, :, None] == used[None, None, ...]).long()
 60 |         new = match.argmax(-1)
 61 |         unknown = match.sum(2) < 1
 62 |         if self.unknown_index == "random":
 63 |             new[unknown] = torch.randint(0, self.re_embed, size=new[unknown].shape).to(
 64 |                 device=new.device
 65 |             )
 66 |         else:
 67 |             new[unknown] = self.unknown_index
 68 |         return new.reshape(ishape)
 69 | 
 70 |     def unmap_to_all(self, inds):
 71 |         ishape = inds.shape
 72 |         assert len(ishape) > 1
 73 |         inds = inds.reshape(ishape[0], -1)
 74 |         used = self.used.to(inds)
 75 |         if self.re_embed > self.used.shape[0]:  # extra token
 76 |             inds[inds >= self.used.shape[0]] = 0  # simply set to zero
 77 |         back = torch.gather(used[None, :][inds.shape[0] * [0], :], 1, inds)
 78 |         return back.reshape(ishape)
 79 | 
 80 |     def forward(self, z, temp=None, rescale_logits=False, return_logits=False):
 81 |         assert temp is None or temp == 1.0, "Only for interface compatible with Gumbel"
 82 |         assert rescale_logits == False, "Only for interface compatible with Gumbel"
 83 |         assert return_logits == False, "Only for interface compatible with Gumbel"
 84 |         # reshape z -> (batch, height, width, channel) and flatten
 85 |         z = rearrange(z, "b c h w -> b h w c").contiguous()
 86 |         z_flattened = z.view(-1, self.e_dim)
 87 |         # distances from z to embeddings e_j (z - e)^2 = z^2 + e^2 - 2 e * z
 88 | 
 89 |         d = (
 90 |             torch.sum(z_flattened**2, dim=1, keepdim=True)
 91 |             + torch.sum(self.embedding.weight**2, dim=1)
 92 |             - 2
 93 |             * torch.einsum(
 94 |                 "bd,dn->bn", z_flattened, rearrange(self.embedding.weight, "n d -> d n")
 95 |             )
 96 |         )
 97 | 
 98 |         min_encoding_indices = torch.argmin(d, dim=1)
 99 |         z_q = self.embedding(min_encoding_indices).view(z.shape)
100 |         perplexity = None
101 |         min_encodings = None
102 | 
103 |         # compute loss for embedding
104 |         if not self.legacy:
105 |             loss = self.beta * torch.mean((z_q.detach() - z) ** 2) + torch.mean(
106 |                 (z_q - z.detach()) ** 2
107 |             )
108 |         else:
109 |             loss = torch.mean((z_q.detach() - z) ** 2) + self.beta * torch.mean(
110 |                 (z_q - z.detach()) ** 2
111 |             )
112 | 
113 |         # preserve gradients
114 |         z_q = z + (z_q - z).detach()
115 | 
116 |         # reshape back to match original input shape
117 |         z_q = rearrange(z_q, "b h w c -> b c h w").contiguous()
118 | 
119 |         if self.remap is not None:
120 |             min_encoding_indices = min_encoding_indices.reshape(
121 |                 z.shape[0], -1
122 |             )  # add batch axis
123 |             min_encoding_indices = self.remap_to_used(min_encoding_indices)
124 |             min_encoding_indices = min_encoding_indices.reshape(-1, 1)  # flatten
125 | 
126 |         if self.sane_index_shape:
127 |             min_encoding_indices = min_encoding_indices.reshape(
128 |                 z_q.shape[0], z_q.shape[2], z_q.shape[3]
129 |             )
130 | 
131 |         return z_q, loss, (perplexity, min_encodings, min_encoding_indices)
132 | 
133 |     def get_codebook_entry(self, indices, shape):
134 |         # shape specifying (batch, height, width, channel)
135 |         if self.remap is not None:
136 |             indices = indices.reshape(shape[0], -1)  # add batch axis
137 |             indices = self.unmap_to_all(indices)
138 |             indices = indices.reshape(-1)  # flatten again
139 | 
140 |         # get quantized latent vectors
141 |         z_q = self.embedding(indices)
142 | 
143 |         if shape is not None:
144 |             z_q = z_q.view(shape)
145 |             # reshape back to match original input shape
146 |             z_q = z_q.permute(0, 3, 1, 2).contiguous()
147 | 
148 |         return z_q
149 | 


--------------------------------------------------------------------------------
/model/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/model/__init__.py


--------------------------------------------------------------------------------
/model/pops_utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from typing import List, Tuple
 3 | 
 4 | import torch
 5 | from torch import nn
 6 | 
 7 | def preprocess(image_a: torch.Tensor, image_b: torch.Tensor, image_encoder: nn.Module, clip_mean: torch.Tensor,
 8 |             clip_std: torch.Tensor, should_drop_cond: List[Tuple[bool, bool]] = None, concat_hidden_states=None,
 9 |             image_list=None):
10 |     with torch.no_grad():
11 |         image_list = [] if image_list is None else image_list
12 |         additional_list = []
13 |         if image_a is not None:
14 |             additional_list.append(image_a)
15 |         if image_b is not None:
16 |             additional_list.append(image_b)
17 |         image_list = additional_list + image_list
18 |         embeds_list = []
19 |         for image in image_list:
20 |             # If already is vector skip encoder
21 |             if len(image.shape) == 2:
22 |                 image_embeds = image
23 |             else:
24 |                 encoder_outs = image_encoder(image, output_hidden_states=False)
25 |                 image_embeds = encoder_outs.image_embeds
26 |             image_embeds = (image_embeds - clip_mean) / clip_std
27 |             embeds_list.append(image_embeds.unsqueeze(1))
28 |         if should_drop_cond is not None:
29 |             for b_ind in range(embeds_list[0].shape[0]):
30 |                 should_drop_a, should_drop_b = should_drop_cond[b_ind]
31 |                 if should_drop_a:
32 |                     embeds_list[0][b_ind] = torch.zeros_like(embeds_list[0][b_ind])
33 |                 if should_drop_b and image_b is not None:
34 |                     embeds_list[1][b_ind] = torch.zeros_like(embeds_list[1][b_ind])
35 |         if concat_hidden_states is not None:
36 |             embeds_list.append(concat_hidden_states)
37 |         out_hidden_states = torch.concat(embeds_list, dim=1)
38 | 
39 |         image_embeds = torch.zeros_like(embeds_list[0].squeeze(1))
40 | 
41 |     return image_embeds, out_hidden_states
42 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | diffusers
 2 | transformers
 3 | matplotlib
 4 | pyrallis
 5 | omegaconf
 6 | opencv-python
 7 | einops
 8 | pytorch-lightning
 9 | accelerate
10 | clip @ git+https://github.com/openai/CLIP.git@a9b1bf5920416aaeaec965c25dd9e8f98c864f16


--------------------------------------------------------------------------------
/scripts/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/scripts/__init__.py


--------------------------------------------------------------------------------
/scripts/infer_instruct.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | from dataclasses import dataclass, field
  3 | from pathlib import Path
  4 | from typing import List, Optional
  5 | 
  6 | import pyrallis
  7 | import torch
  8 | from PIL import Image
  9 | from diffusers import PriorTransformer, UNet2DConditionModel, KandinskyV22Pipeline
 10 | from tqdm import tqdm
 11 | from transformers import CLIPVisionModelWithProjection, CLIPImageProcessor, CLIPTokenizer, CLIPTextModelWithProjection
 12 | 
 13 | from model import pops_utils
 14 | from model.pipeline_pops import pOpsPipeline
 15 | from huggingface_hub import hf_hub_download
 16 | from utils import vis_utils
 17 | from data_generation import words_bank
 18 | 
 19 | 
 20 | @dataclass
 21 | class RunConfig:
 22 |     # Path to the learned prior in local filesystem or huggingface
 23 |     prior_path: Path
 24 |     # Input directory
 25 |     dir_a: Path
 26 |     # The repo to download the prior from, if None, assumes prior_path is a local path
 27 |     prior_repo: Optional[str] = None
 28 |     output_dir_name: Path = Path('inference/results_instruct')
 29 |     # Path to the kandinsky repo
 30 |     kandinsky_prior_repo: str = 'kandinsky-community/kandinsky-2-2-prior'
 31 |     kandinsky_decoder_repo: str = 'kandinsky-community/kandinsky-2-2-decoder'
 32 |     prior_guidance_scale: List[float] = field(default_factory=lambda: [1.0])
 33 |     prior_seeds: List[int] = field(default_factory=lambda: [18, 42])
 34 |     unet_seeds: List[int] = field(default_factory=lambda: [0, 1])
 35 |     texts: List[str] = field(default_factory=lambda: words_bank.adjectives)
 36 | 
 37 | 
 38 | @pyrallis.wrap()
 39 | def main(cfg: RunConfig):
 40 |     output_dir = cfg.output_dir_name  # cfg.prior_path.parent / cfg.output_dir_name
 41 |     output_dir.mkdir(parents=True, exist_ok=True)
 42 | 
 43 |     weight_dtype = torch.float16
 44 |     device = 'cuda:0'
 45 |     image_encoder = CLIPVisionModelWithProjection.from_pretrained(cfg.kandinsky_prior_repo,
 46 |                                                                   subfolder='image_encoder',
 47 |                                                                   torch_dtype=weight_dtype).eval()
 48 | 
 49 |     image_processor = CLIPImageProcessor.from_pretrained(cfg.kandinsky_prior_repo,
 50 |                                                          subfolder='image_processor')
 51 | 
 52 |     tokenizer = CLIPTokenizer.from_pretrained(cfg.kandinsky_prior_repo, subfolder='tokenizer')
 53 |     text_encoder = CLIPTextModelWithProjection.from_pretrained(cfg.kandinsky_prior_repo,
 54 |                                                                subfolder='text_encoder',
 55 |                                                                torch_dtype=weight_dtype).eval().to(device)
 56 | 
 57 |     prior = PriorTransformer.from_pretrained(
 58 |         cfg.kandinsky_prior_repo, subfolder="prior"
 59 |     )
 60 | 
 61 |     if cfg.prior_repo:
 62 |         # Load from huggingface
 63 |         prior_path = hf_hub_download(repo_id=cfg.prior_repo, filename=str(cfg.prior_path))
 64 |     else:
 65 |         prior_path = cfg.prior_path
 66 | 
 67 |     prior_state_dict = torch.load(prior_path, map_location=device)
 68 |     msg = prior.load_state_dict(prior_state_dict, strict=False)
 69 |     print(msg)
 70 | 
 71 |     prior.eval()
 72 | 
 73 |     # Freeze text_encoder and image_encoder
 74 |     image_encoder.requires_grad_(False)
 75 | 
 76 |     # Load full model for vis
 77 |     unet = UNet2DConditionModel.from_pretrained(cfg.kandinsky_decoder_repo,
 78 |                                                 subfolder='unet').to(torch.float16).to(device)
 79 |     prior_pipeline = pOpsPipeline.from_pretrained(cfg.kandinsky_prior_repo,
 80 |                                                   prior=prior,
 81 |                                                   image_encoder=image_encoder,
 82 |                                                   torch_dtype=torch.float16)
 83 |     prior_pipeline = prior_pipeline.to(device)
 84 |     prior = prior.to(weight_dtype)
 85 |     decoder = KandinskyV22Pipeline.from_pretrained(cfg.kandinsky_decoder_repo, unet=unet,
 86 |                                                    torch_dtype=torch.float16)
 87 |     decoder = decoder.to(device)
 88 | 
 89 |     # glob for both jpgs or pths
 90 |     inputs_a = [path for path in cfg.dir_a.glob('*.jpg')] + [path for path in cfg.dir_a.glob('*.pth')]
 91 | 
 92 |     paths = [(input_a, text) for input_a in inputs_a for text in cfg.texts]
 93 | 
 94 |     # just so we have more variety to look at during the inference
 95 |     random.shuffle(paths)
 96 | 
 97 |     for input_a_path, text in tqdm(paths):
 98 |         def process_image(input_path):
 99 |             image_caption_suffix = ''
100 |             if input_path is not None and input_path.suffix == '.pth':
101 |                 image = torch.load(input_path).to(device).to(weight_dtype)
102 |                 embs_unnormed = (image * prior.clip_std) + prior.clip_mean
103 |                 zero_embeds = prior_pipeline.get_zero_embed(embs_unnormed.shape[0], device=embs_unnormed.device)
104 |                 direct_from_emb = decoder(image_embeds=embs_unnormed, negative_image_embeds=zero_embeds,
105 |                                           num_inference_steps=50, height=512,
106 |                                           width=512, guidance_scale=4).images
107 |                 image_pil = direct_from_emb[0]
108 |                 image_caption_suffix = '(embedding)'
109 |             else:
110 |                 if input_path is not None:
111 |                     image_pil = Image.open(input_path).convert("RGB").resize((512, 512))
112 |                 else:
113 |                     image_pil = Image.new('RGB', (512, 512), (255, 255, 255))
114 | 
115 |                 image = torch.Tensor(image_processor(image_pil)['pixel_values'][0]).to(device).unsqueeze(0).to(
116 |                     weight_dtype)
117 | 
118 |             return image, image_pil, image_caption_suffix
119 | 
120 |         # Process both inputs
121 |         image_a, image_pil_a, caption_suffix_a = process_image(input_a_path)
122 | 
123 |         text_inputs = tokenizer(
124 |             text,
125 |             padding="max_length",
126 |             max_length=tokenizer.model_max_length,
127 |             truncation=True,
128 |             return_tensors="pt",
129 |         )
130 |         mask = text_inputs.attention_mask.bool()  # [0]
131 | 
132 |         text_encoder_output = text_encoder(text_inputs.input_ids.to(device))
133 |         text_encoder_hidden_states = text_encoder_output.last_hidden_state
134 |         text_encoder_concat = text_encoder_hidden_states[:, :mask.sum().item()]
135 |         #
136 | 
137 |         input_image_embeds, input_hidden_state = pops_utils.preprocess(image_a, None,
138 |                                                            image_encoder,
139 |                                                            prior.clip_mean.detach(), prior.clip_std.detach(),
140 |                                                            concat_hidden_states=text_encoder_concat)
141 | 
142 |         input_images = [image_pil_a]
143 |         captions = [f'{text}{caption_suffix_a}']
144 | 
145 |         out_name = f"{input_a_path.stem if input_a_path is not None else ''}_{text}"
146 |         for seed in cfg.prior_seeds:
147 |             negative_input_embeds = torch.zeros_like(input_image_embeds)
148 |             negative_hidden_states = torch.zeros_like(input_hidden_state)
149 |             for scale in cfg.prior_guidance_scale:
150 |                 img_emb = prior_pipeline(input_embeds=input_image_embeds, input_hidden_states=input_hidden_state,
151 |                                          negative_input_embeds=negative_input_embeds,
152 |                                          negative_input_hidden_states=negative_hidden_states,
153 |                                          num_inference_steps=25,
154 |                                          num_images_per_prompt=1,
155 |                                          guidance_scale=scale,
156 |                                          generator=torch.Generator(device=device).manual_seed(seed))
157 |                 torch.save(img_emb, output_dir / f"{out_name}_s_{seed}_cfg_{scale}_img_emb.pth")
158 |                 negative_emb = img_emb.negative_image_embeds
159 |                 for seed_2 in cfg.unet_seeds:
160 |                     images = decoder(image_embeds=img_emb.image_embeds, negative_image_embeds=negative_emb,
161 |                                      num_inference_steps=50, height=512,
162 |                                      width=512, guidance_scale=4,
163 |                                      generator=torch.Generator(device=device).manual_seed(seed_2)).images
164 |                     input_images += images
165 |                     captions.append(f"prior_s {seed}, cfg {scale} unet_s {seed_2}")  # , ")
166 |         gen_images = vis_utils.create_table_plot(images=input_images, captions=captions)
167 | 
168 |         gen_images.save(output_dir / f"{out_name}.jpg")
169 |     print('Done!')
170 | 
171 | 
172 | if __name__ == "__main__":
173 |     main()
174 | 


--------------------------------------------------------------------------------
/scripts/train.py:
--------------------------------------------------------------------------------
 1 | import pyrallis
 2 | 
 3 | from training.coach import Coach
 4 | from training.train_config import TrainConfig
 5 | 
 6 | 
 7 | @pyrallis.wrap()
 8 | def main(cfg: TrainConfig):
 9 |     coach = Coach(cfg)
10 |     coach.train()
11 | 
12 | 
13 | if __name__ == "__main__":
14 |     main()
15 | 


--------------------------------------------------------------------------------
/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/training/__init__.py


--------------------------------------------------------------------------------
/training/train_config.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass, field
 2 | from pathlib import Path
 3 | from typing import List, Optional, Union
 4 | 
 5 | 
 6 | @dataclass
 7 | class TrainConfig:
 8 |     # Dataset path
 9 |     dataset_path: Union[Path, List[Path]] = Path('datasets/generated/generated_things')
10 |     # Validation dataset path
11 |     val_dataset_path: Path = Path('datasets/generated/generated_things_val')
12 |     # Path to pretrained model WITHOUT 2_1 folder
13 |     cache_root: Path = Path('/tmp/kandinsky2')
14 |     # The output directory where the model predictions and checkpoints will be written.
15 |     output_dir: Path = Path('results/my_pops_model')
16 |     # GPU device
17 |     device: str = 'cuda:0'
18 |     # The resolution for input images, all the images will be resized to this size
19 |     img_size: int = 512
20 |     # Batch size (per device) for the training dataloader
21 |     train_batch_size: int = 1
22 |     # Initial learning rate (after the potential warmup period) to use
23 |     lr: float = 1e-5
24 |     # Dataloader num workers.
25 |     num_workers: int = 8
26 |     # The beta1 parameter for the Adam optimizer.
27 |     adam_beta1: float = 0.9
28 |     # The beta2 parameter for the Adam optimizer
29 |     adam_beta2: float = 0.999
30 |     # Weight decay to use
31 |     adam_weight_decay: float = 0.0  # 1e-2
32 |     # Epsilon value for the Adam optimizer
33 |     adam_epsilon: float = 1e-08
34 |     # How often save images. Values less zero - disable saving
35 |     log_image_frequency: int = 500
36 |     # How often to run validation
37 |     log_validation: int = 5000
38 |     # The number of images to save during each validation
39 |     n_val_images: int = 10
40 |     # A seed for reproducible training
41 |     seed: Optional[int] = None
42 |     # The number of accumulation steps to use
43 |     gradient_accumulation_steps: int = 1
44 |     # Whether to use mixed precision training
45 |     mixed_precision: Optional[str] = 'fp16'
46 |     # Log to wandb
47 |     report_to: str = 'wandb'
48 |     # Path to pretrained prior model or model identifier from huggingface.co/models.
49 |     pretrained_prior_path: str = 'kandinsky-community/kandinsky-2-2-prior'
50 |     # Path to pretrained image encoder.
51 |     pretrained_image_encoder: str = 'kandinsky-community/kandinsky-2-2-prior'
52 |     # Path to scheduler.
53 |     scheduler_path: str = 'kandinsky-community/kandinsky-2-2-prior'
54 |     # Path to image_processor.
55 |     image_processor_path: str = 'kandinsky-community/kandinsky-2-2-prior'
56 |     # Path to text_encoder.
57 |     text_encoder_path: str = 'kandinsky-community/kandinsky-2-2-prior'
58 |     # Path to tokenizer.
59 |     tokenizer_path: str = 'kandinsky-community/kandinsky-2-2-prior'
60 |     use_ema: bool = False
61 |     allow_tf32: bool = False
62 |     use_8bit_adam: bool = False
63 |     lr_scheduler: str = 'constant'
64 |     # The number of training steps to run
65 |     max_train_steps: int = 1000000
66 |     # Max grad for clipping
67 |     max_grad_norm: float = 1.0
68 |     # How often to save checkpoints
69 |     checkpointing_steps: int = 5000
70 |     # The path to resume from
71 |     resume_from_path: Optional[Path] = None
72 |     # The step to resume from, mainly for logging
73 |     resume_from_step: Optional[int] = None
74 |     # Lora mode, untested
75 |     lora_rank: Optional[int] = None
76 |     # Which operator to train
77 |     mode: str = 'texture'
78 |     # The path to the textures dataset if used
79 |     textures_dir: Optional[Path] = None
80 |     # The path to the backgrounds dataset if used
81 |     backgrounds_dir: Optional[Path] = None
82 |     # optional directory of plain images to use for unconditional denoising
83 |     randoms_dir: Optional[Path] = None
84 |     # Whether full model is trained or only some layers, x_layers is the format for training only x layers
85 |     training_mode: str = 'full'
86 |     # Whether to use clip loss
87 |     use_clip_loss: bool = False
88 |     # Clip lambda
89 |     clip_strength: float = 10.0
90 | 


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/pOpsPaper/pOps/1ba0fc8b06e3969e130acb8de8dd65bd9ad9561b/utils/__init__.py


--------------------------------------------------------------------------------
/utils/clothes_utils.py:
--------------------------------------------------------------------------------
 1 | ind2labels_dict = {
 2 |   0: "Background",
 3 |   1: "Hat",
 4 |   2: "Hair",
 5 |   3: "Sunglasses",
 6 |   4: "Upper-clothes",
 7 |   5: "Skirt",
 8 |   6: "Pants",
 9 |   7: "Dress",
10 |   8: "Belt",
11 |   9: "Left-shoe",
12 |   10: "Right-shoe",
13 |   11: "Face",
14 |   12: "Left-leg",
15 |   13: "Right-leg",
16 |   14: "Left-arm",
17 |   15: "Right-arm",
18 |   16: "Bag",
19 |   17: "Scarf"
20 | }
21 | 
22 | # Calculate the reverse dict
23 | labels2ind_dict = {v: k for k, v in ind2labels_dict.items()}
24 | 
25 | 
26 | relevant_inds = [1,2,3,4,5,6,7,8,9,10,16,17]
27 | 


--------------------------------------------------------------------------------
/utils/vis_utils.py:
--------------------------------------------------------------------------------
 1 | import textwrap
 2 | from typing import List, Tuple, Optional
 3 | 
 4 | import numpy as np
 5 | from PIL import Image, ImageDraw, ImageFont
 6 | 
 7 | LINE_WIDTH = 20
 8 | 
 9 | 
10 | def add_text_to_image(image: np.ndarray, text: str, text_color: Tuple[int, int, int] = (0, 0, 0),
11 |                       min_lines: Optional[int] = None, add_below: bool = True):
12 |     import textwrap
13 |     lines = textwrap.wrap(text, width=LINE_WIDTH)
14 |     if min_lines is not None and len(lines) < min_lines:
15 |         if add_below:
16 |             lines += [''] * (min_lines - len(lines))
17 |         else:
18 |             lines = [''] * (min_lines - len(lines)) + lines
19 |     h, w, c = image.shape
20 |     offset = int(h * .12)
21 |     img = np.ones((h + offset * len(lines), w, c), dtype=np.uint8) * 255
22 |     font_size = int(offset * .8)
23 | 
24 |     try:
25 |         font = ImageFont.truetype("assets/OpenSans-Regular.ttf", font_size)
26 |         textsize = font.getbbox(text)
27 |         y_offset = (offset - textsize[3]) // 2
28 |     except:
29 |         font = ImageFont.load_default()
30 |         y_offset = offset // 2
31 | 
32 |     if add_below:
33 |         img[:h] = image
34 |     else:
35 |         img[-h:] = image
36 |     img = Image.fromarray(img)
37 |     draw = ImageDraw.Draw(img)
38 |     for i, line in enumerate(lines):
39 |         line_size = font.getbbox(line)
40 |         text_x = (w - line_size[2]) // 2
41 |         if add_below:
42 |             draw.text((text_x, h + y_offset + offset * i), line, font=font, fill=text_color)
43 |         else:
44 |             draw.text((text_x, 0 + y_offset + offset * i), line, font=font, fill=text_color)
45 |     return np.array(img)
46 | 
47 | 
48 | def create_table_plot(images: List[Image.Image], titles: List[str]=None, captions: List[str]=None) -> Image.Image:
49 |     title_max_lines = np.max([len(textwrap.wrap(text, width=LINE_WIDTH)) for text in titles]) if titles is not None else 0
50 |     caption_max_lines = np.max([len(textwrap.wrap(text, width=LINE_WIDTH)) for text in captions]) if captions is not None else 0
51 |     out_images = []
52 |     for i in range(len(images)):
53 |         im = np.array(images[i])
54 |         if titles is not None:
55 |             im = add_text_to_image(im, titles[i], add_below=False, min_lines=title_max_lines)
56 |         if captions is not None:
57 |             im = add_text_to_image(im, captions[i], add_below=True, min_lines=caption_max_lines)
58 |         out_images.append(im)
59 |     image = Image.fromarray(np.concatenate(out_images, axis=1))
60 |     return image
61 | 


--------------------------------------------------------------------------------