├── LICENSE
├── README.md
├── asset
    ├── images
    │   ├── car
    │   │   ├── image_end.png
    │   │   └── image_start.png
    │   ├── cat
    │   │   ├── image_end.JPG
    │   │   └── image_start.JPG
    │   └── folwer
    │   │   ├── image_end.png
    │   │   └── image_start.png
    ├── logo.gif
    ├── samples.txt
    └── teaser.png
├── docs
    ├── index.html
    └── static
    │   ├── css
    │       ├── bulma-carousel.min.css
    │       ├── bulma-slider.min.css
    │       ├── bulma.css.map.txt
    │       ├── bulma.min.css
    │       ├── custom.css
    │       ├── fontawesome.all.min.css
    │       └── index.css
    │   ├── font_style
    │       └── jackbrush-alj9a.ttf
    │   ├── images
    │       ├── background.jpg
    │       ├── huggingface_logo.svg
    │       ├── i23d
    │       │   ├── 1-1.png
    │       │   ├── 1-2.png
    │       │   ├── 1-3.png
    │       │   ├── 1-4.png
    │       │   ├── 1-5.png
    │       │   ├── 1-6.png
    │       │   ├── 1.png
    │       │   ├── 2-1.png
    │       │   ├── 2-2.png
    │       │   ├── 2-3.png
    │       │   ├── 2-4.png
    │       │   ├── 2-5.png
    │       │   ├── 2-6.png
    │       │   ├── 2.png
    │       │   ├── 3-1.png
    │       │   ├── 3-2.png
    │       │   ├── 3-3.png
    │       │   ├── 3-4.png
    │       │   ├── 3-5.png
    │       │   ├── 3-6.png
    │       │   ├── 3.png
    │       │   ├── 4-1.png
    │       │   ├── 4-2.png
    │       │   ├── 4-3.png
    │       │   ├── 4-4.png
    │       │   ├── 4-5.png
    │       │   ├── 4-6.png
    │       │   └── 4.png
    │       ├── i2v
    │       │   ├── .DS_Store
    │       │   ├── boat
    │       │   │   ├── .DS_Store
    │       │   │   ├── boat.gif
    │       │   │   └── boat.png
    │       │   ├── cake
    │       │   │   ├── .DS_Store
    │       │   │   ├── cake.gif
    │       │   │   └── cake.png
    │       │   ├── cloud
    │       │   │   ├── cloud.gif
    │       │   │   └── cloud.png
    │       │   ├── fire
    │       │   │   ├── .DS_Store
    │       │   │   ├── fire.gif
    │       │   │   └── fire.png
    │       │   ├── meteor
    │       │   │   ├── .DS_Store
    │       │   │   ├── meteor.gif
    │       │   │   └── meteor.png
    │       │   └── waterfall
    │       │   │   ├── .DS_Store
    │       │   │   ├── waterfall.gif
    │       │   │   └── waterfall.png
    │       ├── icon.png
    │       ├── interpolation
    │       │   ├── car
    │       │   │   ├── end.png
    │       │   │   ├── interpolation.gif
    │       │   │   ├── interpolation.mp4
    │       │   │   └── start.png
    │       │   └── cat_tiger
    │       │   │   ├── end.png
    │       │   │   ├── interpolation.gif
    │       │   │   ├── interpolation.mp4
    │       │   │   └── start.png
    │       ├── logo.gif
    │       ├── logo.png
    │       ├── method
    │       │   └── method.png
    │       └── t2i
    │       │   ├── A baby rabbit wearing a tiny knitted hat, ultra-detailed, photorealistic.jpg
    │       │   ├── A bear with fur made of chocolate shavings, standing in a clearing filled with marshmallow mushrooms.jpeg
    │       │   ├── A breathtaking view of the Swiss Alps during sunrise, with snow-capped peaks and lush green valleys, ultra-realistic, high detail.jpg
    │       │   ├── A close-up of a sunlit butterfly resting on a flower in a garden.jpeg
    │       │   ├── A close-up of a vibrant, fully bloomed red rose with dew drops on its petals.jpeg
    │       │   ├── A close-up photograph of a lion with its mane blowing in the wind against the savanna backdrop.jpeg
    │       │   ├── A curious dolphin leaping out of the water, creating splashes in the sunlight.jpeg
    │       │   ├── A cyborg superhero with a robotic arm and high-tech gadgets, standing atop a skyscraper.jpg
    │       │   ├── A detailed close-up of a rusted vintage car abandoned in an overgrown field.jpg
    │       │   ├── A dragon made of molten chocolate, with scales that glisten like gold leaf and eyes of crystalline sugar.jpeg
    │       │   ├── A dramatic mountain range during a thunderstorm, with dark clouds, lightning strikes, and rugged terrain.jpg
    │       │   ├── A gorilla wearing an advanced robotic suit with pulsating energy cores, standing on the edge of a futuristic skyline.jpeg
    │       │   ├── A group of astronauts standing on the surface of Mars, with Earth visible in the distant sky.png
    │       │   ├── A hippopotamus with a body of jelly-like translucent gelatin, lounging in a pool of liquid sherbet.jpeg
    │       │   ├── A lion made entirely of layered caramel and chocolate, with a mane composed of spun sugar flames.jpeg
    │       │   ├── A lion with a mane made of holographic flames, standing on a crystal platform in a neon-lit jungle.jpeg
    │       │   ├── A magical forest with glowing plants, where a young anime girl with long hair discovers a hidden portal.jpg
    │       │   ├── A majestic bald eagle soaring over a snowy mountain range.jpeg
    │       │   ├── A majestic mountain range under a starry sky, with swirling clouds and glowing moonlight, inspired by Van Gogh.jpeg
    │       │   ├── A peaceful forest in autumn, with golden leaves falling and a stream running through it, illuminated by soft sunlight.png
    │       │   ├── A peaceful mountain lake reflecting the surrounding pine trees and snowy peaks, photorealistic, tranquil.jpg
    │       │   ├── A phoenix-like bird with wings made of fiery red fruit leather and a beak of candied citrus peel.jpeg
    │       │   ├── A realistic photograph of a wolf howling at the moon in a snowy forest.jpeg
    │       │   ├── A rustic bedroom showcasing a round bed, earth-toned decor, and a cluttered, yet charming ambiance.jpg
    │       │   ├── A stealthy ninja superhero in a dark alley, showcasing agility and advanced technology.jpg
    │       │   ├── A wolf constructed from layers of dark chocolate and nougat, with glowing eyes made of candied cherries.jpeg
    │       │   ├── An owl constructed from layers of caramel popcorn and hazelnut chocolate, perched on a pretzel branch.jpeg
    │       │   ├── Bentley Bacalar driving on black tarmac road with trees in background, Sumi-e drawing, white background 8k.jpg
    │       │   ├── Documentary-style photography of a bustling marketplace in Marrakech, with spices and textiles.png
    │       │   ├── Kraken is listening to music with headphones.png
    │       │   ├── Post-Apocalyptic Wanderer, character design, style by kim jung gi, zabrocki, karlkka, jayison devadas, 8k.png
    │       │   ├── The picture shows a cute little tiger, wearing a blue hoodie and hat, sitting on a small cardboard boat on calm water.png
    │       │   ├── Two baby ducks swimming in a pond at sunset, highly detailed, hyper-realistic.jpg
    │       │   ├── Two female rabbit adventurers dressed in a fancy velvet coats next to a Christmas tree, Christmas theme, on an antique opulent background , jean - baptiste monge , smooth, anthropomorphic photorealistic, photography, lifelike, high resolution, smooth.jpg
    │       │   ├── beautiful lady,freckles, big smile,blue eyes, short ginger hair, dark makeup, wearing a floral blue vest top, soft light,dark grey background.jpeg
    │       │   ├── cloud.jpg
    │       │   └── golden sunset shines on the top of snow-capped mountains, with small villages at its foot and surrounding buildings.png
    │   └── js
    │       ├── bulma-carousel.js
    │       ├── bulma-carousel.min.js
    │       ├── bulma-slider.js
    │       ├── bulma-slider.min.js
    │       ├── choose_image.js
    │       ├── fontawesome.all.min.js
    │       ├── image2gif.js
    │       ├── index.js
    │       ├── scroll.js
    │       └── video_comparison.js
├── gradio_demos
    ├── lumos_I2I.py
    └── lumos_T2I.py
├── lumos_diffusion
    ├── __init__.py
    ├── dpm_solver.py
    ├── dpm_solver_inter.py
    └── model
    │   ├── __init__.py
    │   ├── builder.py
    │   ├── diffusion_utils.py
    │   ├── dino
    │       └── vision_transformer.py
    │   ├── dpm_solver.py
    │   ├── dpm_solver_inter.py
    │   ├── gaussian_diffusion.py
    │   ├── gaussian_diffusion_inter.py
    │   ├── lumos
    │       ├── LumosI2I.py
    │       ├── LumosT2I.py
    │       ├── LumosT2IMS.py
    │       ├── Lumos_blocks.py
    │       └── __init__.py
    │   ├── t5.py
    │   ├── timestep_sampler.py
    │   └── utils.py
├── requirements.txt
└── utils
    ├── __init__.py
    ├── download.py
    └── resolution.py


/LICENSE:
--------------------------------------------------------------------------------
 1 | ------------------------------ LICENSE for Lumos ------------------------------
 2 | 
 3 | Copyright (c) 2024 Ant Group.
 4 | 
 5 | MIT License
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED *AS IS*, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <p align="center">
  2 |   <img src="asset/logo.gif"  height=120>
  3 | </p> 
  4 | 
  5 | ### <div align="center"> Learning Visual Generative Priors without Text<div> 
  6 | <div align="center">
  7 | <div style="text-align: center">
  8 |   <a href="https://scholar.google.com/citations?user=dNhzCu4AAAAJ&hl=zh-CN">Shuailei Ma*</a><sup>1</sup>,
  9 |   <a href="https://zkcys001.github.io/">Kecheng Zheng*</a><sup>2</sup>,
 10 |   <a href="https://ieeexplore.ieee.org/author/37836204100">Ying Wei✉️</a><sup>1</sup>,            <a href="https://weiwu-ww.github.io/">Wei Wu</a><sup>2</sup>, <a href="https://scholar.google.com/citations?user=ILpxpfwAAAAJ&hl=zh-CN">Fan Lu</a><sup>2</sup>,
 11 |   <a href="https://scholar.google.com/citations?hl=en&user=rQKkIykAAAAJ">Yifei Zhang</a><sup>3</sup>,<a href="https://scholar.google.com/citations?user=UHCDCRMAAAAJ&hl=en">Chen-Wei Xie</a><sup>4</sup>,
 12 |   <a href="https://scholar.google.com/citations?user=BwdpTiQAAAAJ&hl=zh-CN">Biao Gong</a><sup>2</sup>,
 13 |   <a href="https://scholar.google.com/citations?user=-ACBm-gAAAAJ&hl=zh-TW">Jiapeng Zhu</a><sup>5</sup>,
 14 |   <a href="https://shenyujun.github.io/">Yujun Shen✉️</a><sup>2</sup> <br>
 15 |   <sup>1</sup>Northeastern University, China <sup>2</sup>Ant Group <sup>3</sup>SJTU <sup>4</sup>Alibaba Group <sup>5</sup>HKUST <br>
 16 |   <sup>*</sup>equal contribution <sup>✉️</sup>corresponding author
 17 | </div> 
 18 | <br>
 19 | <div style="text-align: center;">
 20 |   <a href="https://arxiv.org/pdf/2412.07767"><img src="https://img.shields.io/static/v1?label=Paper&message=Arxiv:Lumos&color=red&logo=arxiv"></a> &ensp;
 21 |   <a href="https://ant-research.github.io/lumos/"><img src="https://img.shields.io/badge/Project-Website-blue"></a> &ensp;
 22 |   <a href="https://huggingface.co/Xiaomabufei/lumos"><img src="https://img.shields.io/static/v1?label=%F0%9F%A4%97%20Model&message=HuggingFace&color=yellow"></a> &ensp;
 23 | </div>
 24 | </div> 
 25 | 
 26 | ## 📝 Content
 27 | * [Update Log](#-update-log)
 28 | * [Abstract](#-abstract)
 29 | * [Setup](#️-setup)
 30 | * [Citation](#-citation)
 31 | * [License](#license)
 32 | * [Acknowledgement](#acknowledgement)
 33 | 
 34 | 
 35 | ## 📣 Update Log
 36 | - [2024.11.21] 🎉 Here comes Lumos, we release the code and gradio demos of Lumos-I2I and Lumos-T2I. 
 37 | 
 38 | ## 🪄✨ Abstract
 39 | <b>TL; DR: <font color="red">Lumos</font> is a pure vision-based generative framework, which confirms the feasibility and the scalability of learning visual generative priors. It can be efficiently adapted to visual generative tasks such as text-to-image, image-to-3D, and image-to-video generation.</b>
 40 | <details><summary>CLICK for the full abstract</summary>
 41 | Although text-to-image (T2I) models have recently thrived as visual generative priors, their reliance on high-quality text-image pairs makes scaling up expensive.
 42 | We argue that grasping the cross-modality alignment is not a necessity for a sound visual generative prior, whose focus should be on texture modeling.
 43 | Such a philosophy inspires us to study image-to-image (I2I) generation, where models can learn from in-the-wild images in a self-supervised manner.
 44 | We first develop a pure vision-based training framework, Lumos, and confirm the feasibility and the scalability of learning I2I models.
 45 | We then find that, as an upstream task of T2I, our I2I model serves as a more foundational visual prior and achieves on-par or better performance than existing T2I models using only 1/10 text-image pairs for fine-tuning.
 46 | We further demonstrate the superiority of I2I priors over T2I priors on some text-irrelevant visual generative tasks, like image-to-3D and image-to-video.
 47 | </details>
 48 | 
 49 | ![Visualization various downstream tasks  of Lumos](asset/teaser.png)
 50 | 
 51 | 
 52 | ## ⚙️ Setup
 53 | Follow the following guide to set up the environment.
 54 | - Python >= 3.9 (Recommend to use [Anaconda](https://www.anaconda.com/download/#linux) or [Miniconda](https://docs.conda.io/en/latest/miniconda.html))
 55 | - [PyTorch >= 2.2.1+cu11.8](https://pytorch.org/)
 56 | - Better create a virtual environment
 57 | 
 58 | Install the required dependencies by following the command.
 59 | 
 60 | 1. git clone repo.
 61 |     ```
 62 |     git clone https://github.com/xiaomabufei/lumos.git
 63 |     cd lumos
 64 |     ```
 65 | 2. download model checkpoints
 66 |     ```
 67 |     mkdir ./checkpoints && cd ./checkpoints
 68 |     git lfs install
 69 |     git clone https://huggingface.co/Xiaomabufei/lumos
 70 |     ```
 71 | 
 72 | 3. create environment
 73 |     ```
 74 |     conda create -n lumos python=3.9 -y
 75 |     conda activate lumos
 76 |     ```
 77 | 
 78 | 4. install torch with GPU support
 79 |     ```
 80 |     pip install torch==2.2.1+cu118 torchvision==0.17.1+cu118 -f https://download.pytorch.org/whl/torch_stable.html
 81 |     ```
 82 | 
 83 | 5. install xformers corresponding to torch and cuda
 84 |     ```
 85 |     pip install -U xformers==0.0.25
 86 |     ```
 87 | 
 88 | 6. install the remaining environment
 89 |     ```
 90 |     pip install -r requirements.txt
 91 |     ```
 92 | 
 93 | 7. run lumos Image Interpolation
 94 |     ```
 95 |     python gradio_demos/lumos_I2I.py
 96 |     ```
 97 | 
 98 | 8. run lumos Text-to-Image Generation
 99 |     ```
100 |     python gradio_demos/lumos_T2I.py
101 |     ```
102 |     If you are mainland user, you may try `export HF_ENDPOINT=https://hf-mirror.com` to use huggingface mirror to facilitate the download of some necessary checkpoints to run our system.
103 | 
104 | ## 📖 Citation
105 | Don't forget to cite this source if it proves useful in your research!
106 | ```bibtex
107 | @article{Lumos2024, 
108 | 	title={Learning Visual Generative Priors without Text}, 
109 | 	author={Ma, Shuailei and Zheng, Kecheng and Wei, Ying and Wu, Wei and Lu, Fan and Zhang, Yifei and Xie, Chen-Wei and Gong, Biao and Zhu, Jiapeng and Shen, Yujun}, 
110 | 	year={2024}, 
111 | 	eprint={arxiv}, 
112 | 	archivePrefix={arXiv}, 
113 | 	primaryClass={cs.CV}}
114 | ```
115 | 
116 | ## License
117 | This repository is released under the MiT license as found in the [LICENSE](LICENSE) file.
118 | 
119 | ## Acknowledgement
120 | Our implementation is based on [DiT](https://github.com/nullquant/ComfyUI-BrushNet), [Pixart-α](https://github.com/facebookresearch/DiT) and [Dino](https://github.com/facebookresearch/dino). Thanks for their remarkable contribution and released code!
121 | 


--------------------------------------------------------------------------------
/asset/images/car/image_end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/images/car/image_end.png


--------------------------------------------------------------------------------
/asset/images/car/image_start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/images/car/image_start.png


--------------------------------------------------------------------------------
/asset/images/cat/image_end.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/images/cat/image_end.JPG


--------------------------------------------------------------------------------
/asset/images/cat/image_start.JPG:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/images/cat/image_start.JPG


--------------------------------------------------------------------------------
/asset/images/folwer/image_end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/images/folwer/image_end.png


--------------------------------------------------------------------------------
/asset/images/folwer/image_start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/images/folwer/image_start.png


--------------------------------------------------------------------------------
/asset/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/logo.gif


--------------------------------------------------------------------------------
/asset/samples.txt:
--------------------------------------------------------------------------------
1 | A close-up of a vibrant, fully bloomed red rose with dew drops on its petals


--------------------------------------------------------------------------------
/asset/teaser.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/asset/teaser.png


--------------------------------------------------------------------------------
/docs/static/css/bulma-carousel.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.slider{position:relative;width:100%}.slider-container{display:flex;flex-wrap:nowrap;flex-direction:row;overflow:hidden;-webkit-transform:translate3d(0,0,0);transform:translate3d(0,0,0);min-height:100%}.slider-container.is-vertical{flex-direction:column}.slider-container .slider-item{flex:none}.slider-container .slider-item .image.is-covered img{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.slider-container .slider-item .video-container{height:0;padding-bottom:0;padding-top:56.25%;margin:0;position:relative}.slider-container .slider-item .video-container.is-1by1,.slider-container .slider-item .video-container.is-square{padding-top:100%}.slider-container .slider-item .video-container.is-4by3{padding-top:75%}.slider-container .slider-item .video-container.is-21by9{padding-top:42.857143%}.slider-container .slider-item .video-container embed,.slider-container .slider-item .video-container iframe,.slider-container .slider-item .video-container object{position:absolute;top:0;left:0;width:100%!important;height:100%!important}.slider-navigation-next,.slider-navigation-previous{display:flex;justify-content:center;align-items:center;position:absolute;width:42px;height:42px;background:#fff center center no-repeat;background-size:20px 20px;border:1px solid #fff;border-radius:25091983px;box-shadow:0 2px 5px #3232321a;top:50%;margin-top:-20px;left:0;cursor:pointer;transition:opacity .3s,-webkit-transform .3s;transition:transform .3s,opacity .3s;transition:transform .3s,opacity .3s,-webkit-transform .3s}.slider-navigation-next:hover,.slider-navigation-previous:hover{-webkit-transform:scale(1.2);transform:scale(1.2)}.slider-navigation-next.is-hidden,.slider-navigation-previous.is-hidden{display:none;opacity:0}.slider-navigation-next svg,.slider-navigation-previous svg{width:25%}.slider-navigation-next{left:auto;right:0;background:#fff center center no-repeat;background-size:20px 20px}.slider-pagination{display:none;justify-content:center;align-items:center;position:absolute;bottom:0;left:0;right:0;padding:.5rem 1rem;text-align:center}.slider-pagination .slider-page{background:#fff;width:10px;height:10px;border-radius:25091983px;display:inline-block;margin:0 3px;box-shadow:0 2px 5px #3232321a;transition:-webkit-transform .3s;transition:transform .3s;transition:transform .3s,-webkit-transform .3s;cursor:pointer}.slider-pagination .slider-page.is-active,.slider-pagination .slider-page:hover{-webkit-transform:scale(1.4);transform:scale(1.4)}@media screen and (min-width:800px){.slider-pagination{display:flex}}.hero.has-carousel{position:relative}.hero.has-carousel+.hero-body,.hero.has-carousel+.hero-footer,.hero.has-carousel+.hero-head{z-index:10;overflow:hidden}.hero.has-carousel .hero-carousel{position:absolute;top:0;left:0;bottom:0;right:0;height:auto;border:none;margin:auto;padding:0;z-index:0}.hero.has-carousel .hero-carousel .slider{width:100%;max-width:100%;overflow:hidden;height:100%!important;max-height:100%;z-index:0}.hero.has-carousel .hero-carousel .slider .has-background{max-height:100%}.hero.has-carousel .hero-carousel .slider .has-background .is-background{-o-object-fit:cover;object-fit:cover;-o-object-position:center center;object-position:center center;height:100%;width:100%}.hero.has-carousel .hero-body{margin:0 3rem;z-index:10}


--------------------------------------------------------------------------------
/docs/static/css/bulma-slider.min.css:
--------------------------------------------------------------------------------
1 | @-webkit-keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes spinAround{from{-webkit-transform:rotate(0);transform:rotate(0)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}input[type=range].slider{-webkit-appearance:none;-moz-appearance:none;appearance:none;margin:1rem 0;background:0 0;touch-action:none}input[type=range].slider.is-fullwidth{display:block;width:100%}input[type=range].slider:focus{outline:0}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{width:100%}input[type=range].slider:not([orient=vertical])::-moz-range-track{width:100%}input[type=range].slider:not([orient=vertical])::-ms-track{width:100%}input[type=range].slider:not([orient=vertical]).has-output+output,input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{width:3rem;background:#4a4a4a;border-radius:4px;padding:.4rem .8rem;font-size:.75rem;line-height:.75rem;text-align:center;text-overflow:ellipsis;white-space:nowrap;color:#fff;overflow:hidden;pointer-events:none;z-index:200}input[type=range].slider:not([orient=vertical]).has-output-tooltip:disabled+output,input[type=range].slider:not([orient=vertical]).has-output:disabled+output{opacity:.5}input[type=range].slider:not([orient=vertical]).has-output{display:inline-block;vertical-align:middle;width:calc(100% - (4.2rem))}input[type=range].slider:not([orient=vertical]).has-output+output{display:inline-block;margin-left:.75rem;vertical-align:middle}input[type=range].slider:not([orient=vertical]).has-output-tooltip{display:block}input[type=range].slider:not([orient=vertical]).has-output-tooltip+output{position:absolute;left:0;top:-.1rem}input[type=range].slider[orient=vertical]{-webkit-appearance:slider-vertical;-moz-appearance:slider-vertical;appearance:slider-vertical;-webkit-writing-mode:bt-lr;-ms-writing-mode:bt-lr;writing-mode:bt-lr}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{height:100%}input[type=range].slider[orient=vertical]::-moz-range-track{height:100%}input[type=range].slider[orient=vertical]::-ms-track{height:100%}input[type=range].slider::-webkit-slider-runnable-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-moz-range-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-track{cursor:pointer;animate:.2s;box-shadow:0 0 0 #7a7a7a;background:#dbdbdb;border-radius:4px;border:0 solid #7a7a7a}input[type=range].slider::-ms-fill-lower{background:#dbdbdb;border-radius:4px}input[type=range].slider::-ms-fill-upper{background:#dbdbdb;border-radius:4px}input[type=range].slider::-webkit-slider-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-moz-range-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-ms-thumb{box-shadow:none;border:1px solid #b5b5b5;border-radius:4px;background:#fff;cursor:pointer}input[type=range].slider::-webkit-slider-thumb{-webkit-appearance:none;appearance:none}input[type=range].slider.is-circle::-webkit-slider-thumb{border-radius:290486px}input[type=range].slider.is-circle::-moz-range-thumb{border-radius:290486px}input[type=range].slider.is-circle::-ms-thumb{border-radius:290486px}input[type=range].slider:active::-webkit-slider-thumb{-webkit-transform:scale(1.25);transform:scale(1.25)}input[type=range].slider:active::-moz-range-thumb{transform:scale(1.25)}input[type=range].slider:active::-ms-thumb{transform:scale(1.25)}input[type=range].slider:disabled{opacity:.5;cursor:not-allowed}input[type=range].slider:disabled::-webkit-slider-thumb{cursor:not-allowed;-webkit-transform:scale(1);transform:scale(1)}input[type=range].slider:disabled::-moz-range-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:disabled::-ms-thumb{cursor:not-allowed;transform:scale(1)}input[type=range].slider:not([orient=vertical]){min-height:calc((1rem + 2px) * 1.25)}input[type=range].slider:not([orient=vertical])::-webkit-slider-runnable-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-moz-range-track{height:.5rem}input[type=range].slider:not([orient=vertical])::-ms-track{height:.5rem}input[type=range].slider[orient=vertical]::-webkit-slider-runnable-track{width:.5rem}input[type=range].slider[orient=vertical]::-moz-range-track{width:.5rem}input[type=range].slider[orient=vertical]::-ms-track{width:.5rem}input[type=range].slider::-webkit-slider-thumb{height:1rem;width:1rem}input[type=range].slider::-moz-range-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{height:1rem;width:1rem}input[type=range].slider::-ms-thumb{margin-top:0}input[type=range].slider::-webkit-slider-thumb{margin-top:-.25rem}input[type=range].slider[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.25rem}input[type=range].slider.is-small:not([orient=vertical]){min-height:calc((.75rem + 2px) * 1.25)}input[type=range].slider.is-small:not([orient=vertical])::-webkit-slider-runnable-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-moz-range-track{height:.375rem}input[type=range].slider.is-small:not([orient=vertical])::-ms-track{height:.375rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-runnable-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-moz-range-track{width:.375rem}input[type=range].slider.is-small[orient=vertical]::-ms-track{width:.375rem}input[type=range].slider.is-small::-webkit-slider-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-moz-range-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{height:.75rem;width:.75rem}input[type=range].slider.is-small::-ms-thumb{margin-top:0}input[type=range].slider.is-small::-webkit-slider-thumb{margin-top:-.1875rem}input[type=range].slider.is-small[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.1875rem}input[type=range].slider.is-medium:not([orient=vertical]){min-height:calc((1.25rem + 2px) * 1.25)}input[type=range].slider.is-medium:not([orient=vertical])::-webkit-slider-runnable-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-moz-range-track{height:.625rem}input[type=range].slider.is-medium:not([orient=vertical])::-ms-track{height:.625rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-runnable-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-moz-range-track{width:.625rem}input[type=range].slider.is-medium[orient=vertical]::-ms-track{width:.625rem}input[type=range].slider.is-medium::-webkit-slider-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-moz-range-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{height:1.25rem;width:1.25rem}input[type=range].slider.is-medium::-ms-thumb{margin-top:0}input[type=range].slider.is-medium::-webkit-slider-thumb{margin-top:-.3125rem}input[type=range].slider.is-medium[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.3125rem}input[type=range].slider.is-large:not([orient=vertical]){min-height:calc((1.5rem + 2px) * 1.25)}input[type=range].slider.is-large:not([orient=vertical])::-webkit-slider-runnable-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-moz-range-track{height:.75rem}input[type=range].slider.is-large:not([orient=vertical])::-ms-track{height:.75rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-runnable-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-moz-range-track{width:.75rem}input[type=range].slider.is-large[orient=vertical]::-ms-track{width:.75rem}input[type=range].slider.is-large::-webkit-slider-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-moz-range-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{height:1.5rem;width:1.5rem}input[type=range].slider.is-large::-ms-thumb{margin-top:0}input[type=range].slider.is-large::-webkit-slider-thumb{margin-top:-.375rem}input[type=range].slider.is-large[orient=vertical]::-webkit-slider-thumb{margin-top:auto;margin-left:-.375rem}input[type=range].slider.is-white::-moz-range-track{background:#fff!important}input[type=range].slider.is-white::-webkit-slider-runnable-track{background:#fff!important}input[type=range].slider.is-white::-ms-track{background:#fff!important}input[type=range].slider.is-white::-ms-fill-lower{background:#fff}input[type=range].slider.is-white::-ms-fill-upper{background:#fff}input[type=range].slider.is-white .has-output-tooltip+output,input[type=range].slider.is-white.has-output+output{background-color:#fff;color:#0a0a0a}input[type=range].slider.is-black::-moz-range-track{background:#0a0a0a!important}input[type=range].slider.is-black::-webkit-slider-runnable-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-track{background:#0a0a0a!important}input[type=range].slider.is-black::-ms-fill-lower{background:#0a0a0a}input[type=range].slider.is-black::-ms-fill-upper{background:#0a0a0a}input[type=range].slider.is-black .has-output-tooltip+output,input[type=range].slider.is-black.has-output+output{background-color:#0a0a0a;color:#fff}input[type=range].slider.is-light::-moz-range-track{background:#f5f5f5!important}input[type=range].slider.is-light::-webkit-slider-runnable-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-track{background:#f5f5f5!important}input[type=range].slider.is-light::-ms-fill-lower{background:#f5f5f5}input[type=range].slider.is-light::-ms-fill-upper{background:#f5f5f5}input[type=range].slider.is-light .has-output-tooltip+output,input[type=range].slider.is-light.has-output+output{background-color:#f5f5f5;color:#363636}input[type=range].slider.is-dark::-moz-range-track{background:#363636!important}input[type=range].slider.is-dark::-webkit-slider-runnable-track{background:#363636!important}input[type=range].slider.is-dark::-ms-track{background:#363636!important}input[type=range].slider.is-dark::-ms-fill-lower{background:#363636}input[type=range].slider.is-dark::-ms-fill-upper{background:#363636}input[type=range].slider.is-dark .has-output-tooltip+output,input[type=range].slider.is-dark.has-output+output{background-color:#363636;color:#f5f5f5}input[type=range].slider.is-primary::-moz-range-track{background:#00d1b2!important}input[type=range].slider.is-primary::-webkit-slider-runnable-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-track{background:#00d1b2!important}input[type=range].slider.is-primary::-ms-fill-lower{background:#00d1b2}input[type=range].slider.is-primary::-ms-fill-upper{background:#00d1b2}input[type=range].slider.is-primary .has-output-tooltip+output,input[type=range].slider.is-primary.has-output+output{background-color:#00d1b2;color:#fff}input[type=range].slider.is-link::-moz-range-track{background:#3273dc!important}input[type=range].slider.is-link::-webkit-slider-runnable-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-track{background:#3273dc!important}input[type=range].slider.is-link::-ms-fill-lower{background:#3273dc}input[type=range].slider.is-link::-ms-fill-upper{background:#3273dc}input[type=range].slider.is-link .has-output-tooltip+output,input[type=range].slider.is-link.has-output+output{background-color:#3273dc;color:#fff}input[type=range].slider.is-info::-moz-range-track{background:#209cee!important}input[type=range].slider.is-info::-webkit-slider-runnable-track{background:#209cee!important}input[type=range].slider.is-info::-ms-track{background:#209cee!important}input[type=range].slider.is-info::-ms-fill-lower{background:#209cee}input[type=range].slider.is-info::-ms-fill-upper{background:#209cee}input[type=range].slider.is-info .has-output-tooltip+output,input[type=range].slider.is-info.has-output+output{background-color:#209cee;color:#fff}input[type=range].slider.is-success::-moz-range-track{background:#23d160!important}input[type=range].slider.is-success::-webkit-slider-runnable-track{background:#23d160!important}input[type=range].slider.is-success::-ms-track{background:#23d160!important}input[type=range].slider.is-success::-ms-fill-lower{background:#23d160}input[type=range].slider.is-success::-ms-fill-upper{background:#23d160}input[type=range].slider.is-success .has-output-tooltip+output,input[type=range].slider.is-success.has-output+output{background-color:#23d160;color:#fff}input[type=range].slider.is-warning::-moz-range-track{background:#ffdd57!important}input[type=range].slider.is-warning::-webkit-slider-runnable-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-track{background:#ffdd57!important}input[type=range].slider.is-warning::-ms-fill-lower{background:#ffdd57}input[type=range].slider.is-warning::-ms-fill-upper{background:#ffdd57}input[type=range].slider.is-warning .has-output-tooltip+output,input[type=range].slider.is-warning.has-output+output{background-color:#ffdd57;color:rgba(0,0,0,.7)}input[type=range].slider.is-danger::-moz-range-track{background:#ff3860!important}input[type=range].slider.is-danger::-webkit-slider-runnable-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-track{background:#ff3860!important}input[type=range].slider.is-danger::-ms-fill-lower{background:#ff3860}input[type=range].slider.is-danger::-ms-fill-upper{background:#ff3860}input[type=range].slider.is-danger .has-output-tooltip+output,input[type=range].slider.is-danger.has-output+output{background-color:#ff3860;color:#fff}


--------------------------------------------------------------------------------
/docs/static/css/custom.css:
--------------------------------------------------------------------------------
  1 | 
  2 | .div-1 {
  3 |     background-color: rgb(255, 255, 255);
  4 | }
  5 | 
  6 | .div-2 {
  7 |     background-color: #eaeaea;
  8 | }
  9 | 
 10 | .div-3 {
 11 |     background-color: #FBD603;
 12 | }
 13 | 
 14 | .scroll-container {
 15 |     display: flex; 
 16 |     overflow-x: auto;
 17 |     scroll-snap-type: x mandatory; 
 18 |     gap: 12px; 
 19 |     padding: 10px;
 20 |     scrollbar-width: 2px;
 21 |     height: 480px;
 22 | }
 23 | 
 24 | .scroll-item {
 25 |     flex: 0 0 auto; 
 26 |     scroll-snap-type: center;
 27 |     align-items: center;
 28 |     justify-content: center;
 29 |     text-align: center;
 30 | }
 31 | 
 32 | 
 33 | .scroll-item img {
 34 |     display: inline-block;
 35 |     height: 350px; 
 36 |     width: auto;
 37 |     border-radius: 6px; 
 38 |     }
 39 | 
 40 | .scroll-item .caption {
 41 |     display: block;
 42 |     max-width: 100%;
 43 |     margin-top: 8px;
 44 |     padding: 5px; 
 45 |     border: 2px solid #ccc; 
 46 |     border-radius: 5px; 
 47 |     background-color: #f9f9f9; 
 48 |     font-size: 18px; 
 49 |     color: #333; 
 50 |     word-wrap: break-word;
 51 |     word-break: break-word;
 52 |     white-space: normal;
 53 |     line-height:1.2;
 54 |     }
 55 | 
 56 | .gallery-image {
 57 |     display: flex;
 58 |     flex-direction: column;
 59 |     justify-content: center;
 60 |     text-align: center;
 61 |     line-height: 200px;
 62 | }
 63 | 
 64 | 
 65 | .btn {
 66 |     border: 1px solid;
 67 |     background-color: #f8f9f9;
 68 |     font-size: 19px;
 69 |     padding: 2px 8px;
 70 |     border-radius:10px;
 71 |     }
 72 | 
 73 | .one {
 74 | color: #7E57C2;
 75 | }
 76 | 
 77 | .two {
 78 | color: #FFAB40; 
 79 | }
 80 | 
 81 | .three {
 82 | color: #2980b9;
 83 | }
 84 | 
 85 | .btn:hover {
 86 | color: white;
 87 | border: 0;
 88 | }
 89 | 
 90 | .one:hover {
 91 | background-color: #7E57C2;
 92 | }
 93 | 
 94 | .two:hover {
 95 | background-color: #FFAB40;
 96 | }
 97 | 
 98 | .three:hover {
 99 | background-color: #2980b9;
100 | }
101 | 
102 | @keyframes change_text {
103 |     0% {
104 |         opacity: 0;
105 |     }
106 |     
107 |     10% {
108 |         opacity: 1;
109 |     }
110 | }
111 | 
112 | .choice_container {
113 |     display: flex;
114 |     align-items: center;
115 |     gap: 10px;
116 |     justify-content: center;
117 |   }
118 |   
119 | .image-list {
120 |     margin-top: 35px;
121 |     display: flex;
122 |     flex-direction: column; 
123 |     gap: 10px; 
124 |   }
125 | 
126 | .image-list img:hover {
127 |     border-color: #007bff; 
128 | }
129 |   
130 | .image-list img {
131 |     height: 80px;
132 |     cursor: pointer; 
133 |     border: 2px solid #ccc;
134 |     border-radius: 4px;
135 |   }
136 |   
137 | .selected-image img {
138 |     height: 200px;
139 |     border-radius: 4px;
140 |   }
141 | 
142 | .selected-image .caption {
143 |     margin-top: 5px;
144 |     padding: 5px;
145 |     border-radius: 5px;
146 |     font-size: 23px; 
147 |     font-weight: bold;
148 | }
149 | 
150 | .new-view-container {
151 |     width: 100%; 
152 |     height: 100%; 
153 |     border: 2px solid #ccc; 
154 |     border-radius: 4px;
155 |     display: grid; 
156 |     grid-template-columns: repeat(3, 1fr); 
157 |     padding-left: 20px;
158 |     padding-left: 5px;
159 | }
160 |     
161 | .new-view-container img {
162 |     width: 150px;
163 |     height: 160px;
164 |     border-radius: 4px;
165 | }
166 | 
167 | 
168 | .gif-container {
169 |     margin-top: 45px;
170 |     display: flex;
171 |     flex-direction: row; 
172 |     gap: 80px;
173 |     align-items: center;
174 |     justify-content: center;
175 |   }
176 | 
177 | .gif-container img {
178 |     width: 200px;
179 |     height: auto; 
180 |     border-radius: 4px;
181 |   }
182 | 
183 | .gif-hover-image {
184 |     display: block;
185 |     border-radius: 4px;
186 |     width: 100%; /* 图片宽度适配 */
187 |     height: auto; /* 保持宽高比 */
188 |     cursor: pointer;
189 |     transition: all 0.3s ease;
190 |   }
191 | 
192 | .gif-hover-image:hover {
193 |     transform: scale(1.05); 
194 | }


--------------------------------------------------------------------------------
/docs/static/css/index.css:
--------------------------------------------------------------------------------
  1 | body {
  2 |   font-family: 'Noto Sans', sans-serif;
  3 | }
  4 | 
  5 | 
  6 | .footer .icon-link {
  7 |     font-size: 25px;
  8 |     color: #000;
  9 | }
 10 | 
 11 | .link-block a {
 12 |     margin-top: 5px;
 13 |     margin-bottom: 5px;
 14 | }
 15 | 
 16 | .dnerf {
 17 |   font-variant: small-caps;
 18 | }
 19 | 
 20 | 
 21 | .teaser .hero-body {
 22 |   padding-top: 0;
 23 |   padding-bottom: 3rem;
 24 | }
 25 | 
 26 | .teaser {
 27 |   font-family: 'Google Sans', sans-serif;
 28 | }
 29 | 
 30 | 
 31 | .publication-title {
 32 | }
 33 | 
 34 | .publication-banner {
 35 |   max-height: parent;
 36 | 
 37 | }
 38 | 
 39 | .publication-banner video {
 40 |   position: relative;
 41 |   left: auto;
 42 |   top: auto;
 43 |   transform: none;
 44 |   object-fit: fit;
 45 | }
 46 | 
 47 | .publication-header .hero-body {
 48 | }
 49 | 
 50 | /* @font-face {
 51 |   font-family: 'jackbrush';
 52 |   src: url('static/font_style/jackbrush-alj9a.ttf');
 53 | } */
 54 | .model-name {
 55 |   font-family: 'Google Sans', sans-serif;
 56 |   font-weight: bold;
 57 |   color: white;
 58 |   font: xx-large;
 59 | }
 60 | 
 61 | 
 62 | .publication-title {
 63 |     font-family: 'Google Sans', sans-serif;
 64 |     font-weight: bold;
 65 |     color: white;
 66 |     font: xx-large;
 67 | }
 68 | 
 69 | .publication-authors {
 70 |     font-family: 'Google Sans', sans-serif;
 71 | }
 72 | 
 73 | .publication-venue {
 74 |     color: #555;
 75 |     width: fit-content;
 76 |     font-weight: bold;
 77 | }
 78 | 
 79 | .publication-awards {
 80 |     color: #ff3860;
 81 |     width: fit-content;
 82 |     font-weight: bolder;
 83 | }
 84 | 
 85 | .publication-authors {
 86 | }
 87 | 
 88 | .publication-authors a {
 89 |    color: hsl(204, 86%, 78%) !important;
 90 | }
 91 | 
 92 | .publication-authors a:hover {
 93 |     text-decoration: underline;
 94 | }
 95 | 
 96 | .author-block {
 97 |   display: inline-block;
 98 |   font-size: larger;
 99 |   color: hsl(0, 0%, 88%)
100 | }
101 | 
102 | .publication-banner img {
103 | }
104 | 
105 | .publication-authors {
106 |   /*color: #4286f4;*/
107 | }
108 | 
109 | .publication-video {
110 |     position: relative;
111 |     width: 100%;
112 |     height: auto;
113 | 
114 |     overflow: hidden;
115 |     border-radius: 10px !important;
116 | }
117 | 
118 | .publication-video iframe {
119 |     position: absolute;
120 |     top: 0;
121 |     left: 0;
122 |     width: 100%;
123 |     height: 100%;
124 | }
125 | 
126 | .publication-body img {
127 | }
128 | 
129 | .results-carousel {
130 |   overflow: hidden;
131 | }
132 | 
133 | .results-carousel .item {
134 |   margin: 5px;
135 |   overflow: hidden;
136 |   border: 1px solid #bbb;
137 |   border-radius: 10px;
138 |   padding: 0;
139 |   font-size: 0;
140 | }
141 | 
142 | .results-carousel video {
143 |   margin: 0;
144 | }
145 | 
146 | 
147 | .interpolation-panel {
148 |   background: #f5f5f5;
149 |   border-radius: 10px;
150 | }
151 | 
152 | .interpolation-panel .interpolation-image {
153 |   width: 100%;
154 |   border-radius: 5px;
155 | }
156 | 
157 | .interpolation-video-column {
158 | }
159 | 
160 | .interpolation-panel .slider {
161 |   margin: 0 !important;
162 | }
163 | 
164 | .interpolation-panel .slider {
165 |   margin: 0 !important;
166 | }
167 | 
168 | #interpolation-image-wrapper {
169 |   width: 100%;
170 | }
171 | #interpolation-image-wrapper img {
172 |   border-radius: 5px;
173 | }
174 | 
175 | .video-compare-container {
176 |   width: 63%;
177 |   margin: 0 auto;
178 |   position: relative;
179 |   display: block;
180 |   line-height: 0;
181 | }
182 | 
183 | .video {
184 |   width: 100%;
185 |   height: auto;
186 |   position: relative;
187 |   top: 0;
188 |   left: 0;
189 | }
190 | 
191 | .videoMerge {
192 |   position: relative;
193 |   top: 0;
194 |   left: 0;
195 |   z-index: 10;
196 |   width: 100%;
197 |   display: block;
198 |   margin: 0 auto;
199 |   background-size: cover;
200 | }
201 | 


--------------------------------------------------------------------------------
/docs/static/font_style/jackbrush-alj9a.ttf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/font_style/jackbrush-alj9a.ttf


--------------------------------------------------------------------------------
/docs/static/images/background.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/background.jpg


--------------------------------------------------------------------------------
/docs/static/images/huggingface_logo.svg:
--------------------------------------------------------------------------------
 1 | <svg xmlns="http://www.w3.org/2000/svg" width="95" height="88" fill="none">
 2 | 	<path
 3 | 		fill="#fff"
 4 | 		d="M94.25 70.08a8.28 8.28 0 0 1-.43 6.46 10.57 10.57 0 0 1-3 3.6 25.18 25.18 0 0 1-5.7 3.2 65.74 65.74 0 0 1-7.56 2.65 46.67 46.67 0 0 1-11.42 1.68c-5.42.05-10.09-1.23-13.4-4.5a40.4 40.4 0 0 1-10.14.03c-3.34 3.25-7.99 4.52-13.39 4.47a46.82 46.82 0 0 1-11.43-1.68 66.37 66.37 0 0 1-7.55-2.65c-2.28-.98-4.17-2-5.68-3.2a10.5 10.5 0 0 1-3.02-3.6c-.99-2-1.18-4.3-.42-6.46a8.54 8.54 0 0 1-.33-5.63c.25-.95.66-1.83 1.18-2.61a8.67 8.67 0 0 1 2.1-8.47 8.23 8.23 0 0 1 2.82-2.07 41.75 41.75 0 1 1 81.3-.12 8.27 8.27 0 0 1 3.11 2.19 8.7 8.7 0 0 1 2.1 8.47c.52.78.93 1.66 1.18 2.61a8.61 8.61 0 0 1-.32 5.63Z"
 5 | 	/>
 6 | 	<path fill="#FFD21E" d="M47.21 76.5a34.75 34.75 0 1 0 0-69.5 34.75 34.75 0 0 0 0 69.5Z" />
 7 | 	<path
 8 | 		fill="#FF9D0B"
 9 | 		d="M81.96 41.75a34.75 34.75 0 1 0-69.5 0 34.75 34.75 0 0 0 69.5 0Zm-73.5 0a38.75 38.75 0 1 1 77.5 0 38.75 38.75 0 0 1-77.5 0Z"
10 | 	/>
11 | 	<path
12 | 		fill="#3A3B45"
13 | 		d="M58.5 32.3c1.28.44 1.78 3.06 3.07 2.38a5 5 0 1 0-6.76-2.07c.61 1.15 2.55-.72 3.7-.32ZM34.95 32.3c-1.28.44-1.79 3.06-3.07 2.38a5 5 0 1 1 6.76-2.07c-.61 1.15-2.56-.72-3.7-.32Z"
14 | 	/>
15 | 	<path
16 | 		fill="#FF323D"
17 | 		d="M46.96 56.29c9.83 0 13-8.76 13-13.26 0-2.34-1.57-1.6-4.09-.36-2.33 1.15-5.46 2.74-8.9 2.74-7.19 0-13-6.88-13-2.38s3.16 13.26 13 13.26Z"
18 | 	/>
19 | 	<path
20 | 		fill="#3A3B45"
21 | 		fill-rule="evenodd"
22 | 		d="M39.43 54a8.7 8.7 0 0 1 5.3-4.49c.4-.12.81.57 1.24 1.28.4.68.82 1.37 1.24 1.37.45 0 .9-.68 1.33-1.35.45-.7.89-1.38 1.32-1.25a8.61 8.61 0 0 1 5 4.17c3.73-2.94 5.1-7.74 5.1-10.7 0-2.34-1.57-1.6-4.09-.36l-.14.07c-2.31 1.15-5.39 2.67-8.77 2.67s-6.45-1.52-8.77-2.67c-2.6-1.29-4.23-2.1-4.23.29 0 3.05 1.46 8.06 5.47 10.97Z"
23 | 		clip-rule="evenodd"
24 | 	/>
25 | 	<path
26 | 		fill="#FF9D0B"
27 | 		d="M70.71 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM24.21 37a3.25 3.25 0 1 0 0-6.5 3.25 3.25 0 0 0 0 6.5ZM17.52 48c-1.62 0-3.06.66-4.07 1.87a5.97 5.97 0 0 0-1.33 3.76 7.1 7.1 0 0 0-1.94-.3c-1.55 0-2.95.59-3.94 1.66a5.8 5.8 0 0 0-.8 7 5.3 5.3 0 0 0-1.79 2.82c-.24.9-.48 2.8.8 4.74a5.22 5.22 0 0 0-.37 5.02c1.02 2.32 3.57 4.14 8.52 6.1 3.07 1.22 5.89 2 5.91 2.01a44.33 44.33 0 0 0 10.93 1.6c5.86 0 10.05-1.8 12.46-5.34 3.88-5.69 3.33-10.9-1.7-15.92-2.77-2.78-4.62-6.87-5-7.77-.78-2.66-2.84-5.62-6.25-5.62a5.7 5.7 0 0 0-4.6 2.46c-1-1.26-1.98-2.25-2.86-2.82A7.4 7.4 0 0 0 17.52 48Zm0 4c.51 0 1.14.22 1.82.65 2.14 1.36 6.25 8.43 7.76 11.18.5.92 1.37 1.31 2.14 1.31 1.55 0 2.75-1.53.15-3.48-3.92-2.93-2.55-7.72-.68-8.01.08-.02.17-.02.24-.02 1.7 0 2.45 2.93 2.45 2.93s2.2 5.52 5.98 9.3c3.77 3.77 3.97 6.8 1.22 10.83-1.88 2.75-5.47 3.58-9.16 3.58-3.81 0-7.73-.9-9.92-1.46-.11-.03-13.45-3.8-11.76-7 .28-.54.75-.76 1.34-.76 2.38 0 6.7 3.54 8.57 3.54.41 0 .7-.17.83-.6.79-2.85-12.06-4.05-10.98-8.17.2-.73.71-1.02 1.44-1.02 3.14 0 10.2 5.53 11.68 5.53.11 0 .2-.03.24-.1.74-1.2.33-2.04-4.9-5.2-5.21-3.16-8.88-5.06-6.8-7.33.24-.26.58-.38 1-.38 3.17 0 10.66 6.82 10.66 6.82s2.02 2.1 3.25 2.1c.28 0 .52-.1.68-.38.86-1.46-8.06-8.22-8.56-11.01-.34-1.9.24-2.85 1.31-2.85Z"
28 | 	/>
29 | 	<path
30 | 		fill="#FFD21E"
31 | 		d="M38.6 76.69c2.75-4.04 2.55-7.07-1.22-10.84-3.78-3.77-5.98-9.3-5.98-9.3s-.82-3.2-2.69-2.9c-1.87.3-3.24 5.08.68 8.01 3.91 2.93-.78 4.92-2.29 2.17-1.5-2.75-5.62-9.82-7.76-11.18-2.13-1.35-3.63-.6-3.13 2.2.5 2.79 9.43 9.55 8.56 11-.87 1.47-3.93-1.71-3.93-1.71s-9.57-8.71-11.66-6.44c-2.08 2.27 1.59 4.17 6.8 7.33 5.23 3.16 5.64 4 4.9 5.2-.75 1.2-12.28-8.53-13.36-4.4-1.08 4.11 11.77 5.3 10.98 8.15-.8 2.85-9.06-5.38-10.74-2.18-1.7 3.21 11.65 6.98 11.76 7.01 4.3 1.12 15.25 3.49 19.08-2.12Z"
32 | 	/>
33 | 	<path
34 | 		fill="#FF9D0B"
35 | 		d="M77.4 48c1.62 0 3.07.66 4.07 1.87a5.97 5.97 0 0 1 1.33 3.76 7.1 7.1 0 0 1 1.95-.3c1.55 0 2.95.59 3.94 1.66a5.8 5.8 0 0 1 .8 7 5.3 5.3 0 0 1 1.78 2.82c.24.9.48 2.8-.8 4.74a5.22 5.22 0 0 1 .37 5.02c-1.02 2.32-3.57 4.14-8.51 6.1-3.08 1.22-5.9 2-5.92 2.01a44.33 44.33 0 0 1-10.93 1.6c-5.86 0-10.05-1.8-12.46-5.34-3.88-5.69-3.33-10.9 1.7-15.92 2.78-2.78 4.63-6.87 5.01-7.77.78-2.66 2.83-5.62 6.24-5.62a5.7 5.7 0 0 1 4.6 2.46c1-1.26 1.98-2.25 2.87-2.82A7.4 7.4 0 0 1 77.4 48Zm0 4c-.51 0-1.13.22-1.82.65-2.13 1.36-6.25 8.43-7.76 11.18a2.43 2.43 0 0 1-2.14 1.31c-1.54 0-2.75-1.53-.14-3.48 3.91-2.93 2.54-7.72.67-8.01a1.54 1.54 0 0 0-.24-.02c-1.7 0-2.45 2.93-2.45 2.93s-2.2 5.52-5.97 9.3c-3.78 3.77-3.98 6.8-1.22 10.83 1.87 2.75 5.47 3.58 9.15 3.58 3.82 0 7.73-.9 9.93-1.46.1-.03 13.45-3.8 11.76-7-.29-.54-.75-.76-1.34-.76-2.38 0-6.71 3.54-8.57 3.54-.42 0-.71-.17-.83-.6-.8-2.85 12.05-4.05 10.97-8.17-.19-.73-.7-1.02-1.44-1.02-3.14 0-10.2 5.53-11.68 5.53-.1 0-.19-.03-.23-.1-.74-1.2-.34-2.04 4.88-5.2 5.23-3.16 8.9-5.06 6.8-7.33-.23-.26-.57-.38-.98-.38-3.18 0-10.67 6.82-10.67 6.82s-2.02 2.1-3.24 2.1a.74.74 0 0 1-.68-.38c-.87-1.46 8.05-8.22 8.55-11.01.34-1.9-.24-2.85-1.31-2.85Z"
36 | 	/>
37 | 	<path
38 | 		fill="#FFD21E"
39 | 		d="M56.33 76.69c-2.75-4.04-2.56-7.07 1.22-10.84 3.77-3.77 5.97-9.3 5.97-9.3s.82-3.2 2.7-2.9c1.86.3 3.23 5.08-.68 8.01-3.92 2.93.78 4.92 2.28 2.17 1.51-2.75 5.63-9.82 7.76-11.18 2.13-1.35 3.64-.6 3.13 2.2-.5 2.79-9.42 9.55-8.55 11 .86 1.47 3.92-1.71 3.92-1.71s9.58-8.71 11.66-6.44c2.08 2.27-1.58 4.17-6.8 7.33-5.23 3.16-5.63 4-4.9 5.2.75 1.2 12.28-8.53 13.36-4.4 1.08 4.11-11.76 5.3-10.97 8.15.8 2.85 9.05-5.38 10.74-2.18 1.69 3.21-11.65 6.98-11.76 7.01-4.31 1.12-15.26 3.49-19.08-2.12Z"
40 | 	/>
41 | </svg>
42 | 


--------------------------------------------------------------------------------
/docs/static/images/i23d/1-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1-1.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/1-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1-2.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/1-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1-3.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/1-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1-4.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/1-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1-5.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/1-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1-6.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/1.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2-1.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2-2.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2-3.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2-4.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2-5.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2-6.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/2.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3-1.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3-2.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3-3.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3-4.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3-5.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3-6.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/3.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4-1.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4-2.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4-3.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4-4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4-4.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4-5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4-5.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4-6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4-6.png


--------------------------------------------------------------------------------
/docs/static/images/i23d/4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i23d/4.png


--------------------------------------------------------------------------------
/docs/static/images/i2v/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/i2v/boat/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/boat/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/i2v/boat/boat.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/boat/boat.gif


--------------------------------------------------------------------------------
/docs/static/images/i2v/boat/boat.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/boat/boat.png


--------------------------------------------------------------------------------
/docs/static/images/i2v/cake/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/cake/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/i2v/cake/cake.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/cake/cake.gif


--------------------------------------------------------------------------------
/docs/static/images/i2v/cake/cake.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/cake/cake.png


--------------------------------------------------------------------------------
/docs/static/images/i2v/cloud/cloud.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/cloud/cloud.gif


--------------------------------------------------------------------------------
/docs/static/images/i2v/cloud/cloud.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/cloud/cloud.png


--------------------------------------------------------------------------------
/docs/static/images/i2v/fire/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/fire/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/i2v/fire/fire.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/fire/fire.gif


--------------------------------------------------------------------------------
/docs/static/images/i2v/fire/fire.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/fire/fire.png


--------------------------------------------------------------------------------
/docs/static/images/i2v/meteor/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/meteor/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/i2v/meteor/meteor.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/meteor/meteor.gif


--------------------------------------------------------------------------------
/docs/static/images/i2v/meteor/meteor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/meteor/meteor.png


--------------------------------------------------------------------------------
/docs/static/images/i2v/waterfall/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/waterfall/.DS_Store


--------------------------------------------------------------------------------
/docs/static/images/i2v/waterfall/waterfall.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/waterfall/waterfall.gif


--------------------------------------------------------------------------------
/docs/static/images/i2v/waterfall/waterfall.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/i2v/waterfall/waterfall.png


--------------------------------------------------------------------------------
/docs/static/images/icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/icon.png


--------------------------------------------------------------------------------
/docs/static/images/interpolation/car/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/car/end.png


--------------------------------------------------------------------------------
/docs/static/images/interpolation/car/interpolation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/car/interpolation.gif


--------------------------------------------------------------------------------
/docs/static/images/interpolation/car/interpolation.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/car/interpolation.mp4


--------------------------------------------------------------------------------
/docs/static/images/interpolation/car/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/car/start.png


--------------------------------------------------------------------------------
/docs/static/images/interpolation/cat_tiger/end.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/cat_tiger/end.png


--------------------------------------------------------------------------------
/docs/static/images/interpolation/cat_tiger/interpolation.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/cat_tiger/interpolation.gif


--------------------------------------------------------------------------------
/docs/static/images/interpolation/cat_tiger/interpolation.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/cat_tiger/interpolation.mp4


--------------------------------------------------------------------------------
/docs/static/images/interpolation/cat_tiger/start.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/interpolation/cat_tiger/start.png


--------------------------------------------------------------------------------
/docs/static/images/logo.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/logo.gif


--------------------------------------------------------------------------------
/docs/static/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/logo.png


--------------------------------------------------------------------------------
/docs/static/images/method/method.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/method/method.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/A baby rabbit wearing a tiny knitted hat, ultra-detailed, photorealistic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A baby rabbit wearing a tiny knitted hat, ultra-detailed, photorealistic.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A bear with fur made of chocolate shavings, standing in a clearing filled with marshmallow mushrooms.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A bear with fur made of chocolate shavings, standing in a clearing filled with marshmallow mushrooms.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A breathtaking view of the Swiss Alps during sunrise, with snow-capped peaks and lush green valleys, ultra-realistic, high detail.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A breathtaking view of the Swiss Alps during sunrise, with snow-capped peaks and lush green valleys, ultra-realistic, high detail.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A close-up of a sunlit butterfly resting on a flower in a garden.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A close-up of a sunlit butterfly resting on a flower in a garden.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A close-up of a vibrant, fully bloomed red rose with dew drops on its petals.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A close-up of a vibrant, fully bloomed red rose with dew drops on its petals.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A close-up photograph of a lion with its mane blowing in the wind against the savanna backdrop.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A close-up photograph of a lion with its mane blowing in the wind against the savanna backdrop.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A curious dolphin leaping out of the water, creating splashes in the sunlight.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A curious dolphin leaping out of the water, creating splashes in the sunlight.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A cyborg superhero with a robotic arm and high-tech gadgets, standing atop a skyscraper.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A cyborg superhero with a robotic arm and high-tech gadgets, standing atop a skyscraper.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A detailed close-up of a rusted vintage car abandoned in an overgrown field.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A detailed close-up of a rusted vintage car abandoned in an overgrown field.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A dragon made of molten chocolate, with scales that glisten like gold leaf and eyes of crystalline sugar.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A dragon made of molten chocolate, with scales that glisten like gold leaf and eyes of crystalline sugar.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A dramatic mountain range during a thunderstorm, with dark clouds, lightning strikes, and rugged terrain.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A dramatic mountain range during a thunderstorm, with dark clouds, lightning strikes, and rugged terrain.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A gorilla wearing an advanced robotic suit with pulsating energy cores, standing on the edge of a futuristic skyline.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A gorilla wearing an advanced robotic suit with pulsating energy cores, standing on the edge of a futuristic skyline.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A group of astronauts standing on the surface of Mars, with Earth visible in the distant sky.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A group of astronauts standing on the surface of Mars, with Earth visible in the distant sky.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/A hippopotamus with a body of jelly-like translucent gelatin, lounging in a pool of liquid sherbet.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A hippopotamus with a body of jelly-like translucent gelatin, lounging in a pool of liquid sherbet.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A lion made entirely of layered caramel and chocolate, with a mane composed of spun sugar flames.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A lion made entirely of layered caramel and chocolate, with a mane composed of spun sugar flames.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A lion with a mane made of holographic flames, standing on a crystal platform in a neon-lit jungle.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A lion with a mane made of holographic flames, standing on a crystal platform in a neon-lit jungle.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A magical forest with glowing plants, where a young anime girl with long hair discovers a hidden portal.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A magical forest with glowing plants, where a young anime girl with long hair discovers a hidden portal.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A majestic bald eagle soaring over a snowy mountain range.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A majestic bald eagle soaring over a snowy mountain range.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A majestic mountain range under a starry sky, with swirling clouds and glowing moonlight, inspired by Van Gogh.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A majestic mountain range under a starry sky, with swirling clouds and glowing moonlight, inspired by Van Gogh.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A peaceful forest in autumn, with golden leaves falling and a stream running through it, illuminated by soft sunlight.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A peaceful forest in autumn, with golden leaves falling and a stream running through it, illuminated by soft sunlight.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/A peaceful mountain lake reflecting the surrounding pine trees and snowy peaks, photorealistic, tranquil.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A peaceful mountain lake reflecting the surrounding pine trees and snowy peaks, photorealistic, tranquil.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A phoenix-like bird with wings made of fiery red fruit leather and a beak of candied citrus peel.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A phoenix-like bird with wings made of fiery red fruit leather and a beak of candied citrus peel.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A realistic photograph of a wolf howling at the moon in a snowy forest.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A realistic photograph of a wolf howling at the moon in a snowy forest.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A rustic bedroom showcasing a round bed, earth-toned decor, and a cluttered, yet charming ambiance.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A rustic bedroom showcasing a round bed, earth-toned decor, and a cluttered, yet charming ambiance.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A stealthy ninja superhero in a dark alley, showcasing agility and advanced technology.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A stealthy ninja superhero in a dark alley, showcasing agility and advanced technology.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/A wolf constructed from layers of dark chocolate and nougat, with glowing eyes made of candied cherries.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/A wolf constructed from layers of dark chocolate and nougat, with glowing eyes made of candied cherries.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/An owl constructed from layers of caramel popcorn and hazelnut chocolate, perched on a pretzel branch.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/An owl constructed from layers of caramel popcorn and hazelnut chocolate, perched on a pretzel branch.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/Bentley Bacalar driving on black tarmac road with trees in background, Sumi-e drawing, white background 8k.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/Bentley Bacalar driving on black tarmac road with trees in background, Sumi-e drawing, white background 8k.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/Documentary-style photography of a bustling marketplace in Marrakech, with spices and textiles.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/Documentary-style photography of a bustling marketplace in Marrakech, with spices and textiles.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/Kraken is listening to music with headphones.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/Kraken is listening to music with headphones.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/Post-Apocalyptic Wanderer, character design, style by kim jung gi, zabrocki, karlkka, jayison devadas, 8k.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/Post-Apocalyptic Wanderer, character design, style by kim jung gi, zabrocki, karlkka, jayison devadas, 8k.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/The picture shows a cute little tiger, wearing a blue hoodie and hat, sitting on a small cardboard boat on calm water.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/The picture shows a cute little tiger, wearing a blue hoodie and hat, sitting on a small cardboard boat on calm water.png


--------------------------------------------------------------------------------
/docs/static/images/t2i/Two baby ducks swimming in a pond at sunset, highly detailed, hyper-realistic.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/Two baby ducks swimming in a pond at sunset, highly detailed, hyper-realistic.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/Two female rabbit adventurers dressed in a fancy velvet coats next to a Christmas tree, Christmas theme, on an antique opulent background , jean - baptiste monge , smooth, anthropomorphic photorealistic, photography, lifelike, high resolution, smooth.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/Two female rabbit adventurers dressed in a fancy velvet coats next to a Christmas tree, Christmas theme, on an antique opulent background , jean - baptiste monge , smooth, anthropomorphic photorealistic, photography, lifelike, high resolution, smooth.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/beautiful lady,freckles, big smile,blue eyes, short ginger hair, dark makeup, wearing a floral blue vest top, soft light,dark grey background.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/beautiful lady,freckles, big smile,blue eyes, short ginger hair, dark makeup, wearing a floral blue vest top, soft light,dark grey background.jpeg


--------------------------------------------------------------------------------
/docs/static/images/t2i/cloud.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/cloud.jpg


--------------------------------------------------------------------------------
/docs/static/images/t2i/golden sunset shines on the top of snow-capped mountains, with small villages at its foot and surrounding buildings.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ant-research/lumos/3076dfee3128613c631b57cc20b52e6bbb61dc9f/docs/static/images/t2i/golden sunset shines on the top of snow-capped mountains, with small villages at its foot and surrounding buildings.png


--------------------------------------------------------------------------------
/docs/static/js/bulma-slider.js:
--------------------------------------------------------------------------------
  1 | (function webpackUniversalModuleDefinition(root, factory) {
  2 | 	if(typeof exports === 'object' && typeof module === 'object')
  3 | 		module.exports = factory();
  4 | 	else if(typeof define === 'function' && define.amd)
  5 | 		define([], factory);
  6 | 	else if(typeof exports === 'object')
  7 | 		exports["bulmaSlider"] = factory();
  8 | 	else
  9 | 		root["bulmaSlider"] = factory();
 10 | })(typeof self !== 'undefined' ? self : this, function() {
 11 | return /******/ (function(modules) { // webpackBootstrap
 12 | /******/ 	// The module cache
 13 | /******/ 	var installedModules = {};
 14 | /******/
 15 | /******/ 	// The require function
 16 | /******/ 	function __webpack_require__(moduleId) {
 17 | /******/
 18 | /******/ 		// Check if module is in cache
 19 | /******/ 		if(installedModules[moduleId]) {
 20 | /******/ 			return installedModules[moduleId].exports;
 21 | /******/ 		}
 22 | /******/ 		// Create a new module (and put it into the cache)
 23 | /******/ 		var module = installedModules[moduleId] = {
 24 | /******/ 			i: moduleId,
 25 | /******/ 			l: false,
 26 | /******/ 			exports: {}
 27 | /******/ 		};
 28 | /******/
 29 | /******/ 		// Execute the module function
 30 | /******/ 		modules[moduleId].call(module.exports, module, module.exports, __webpack_require__);
 31 | /******/
 32 | /******/ 		// Flag the module as loaded
 33 | /******/ 		module.l = true;
 34 | /******/
 35 | /******/ 		// Return the exports of the module
 36 | /******/ 		return module.exports;
 37 | /******/ 	}
 38 | /******/
 39 | /******/
 40 | /******/ 	// expose the modules object (__webpack_modules__)
 41 | /******/ 	__webpack_require__.m = modules;
 42 | /******/
 43 | /******/ 	// expose the module cache
 44 | /******/ 	__webpack_require__.c = installedModules;
 45 | /******/
 46 | /******/ 	// define getter function for harmony exports
 47 | /******/ 	__webpack_require__.d = function(exports, name, getter) {
 48 | /******/ 		if(!__webpack_require__.o(exports, name)) {
 49 | /******/ 			Object.defineProperty(exports, name, {
 50 | /******/ 				configurable: false,
 51 | /******/ 				enumerable: true,
 52 | /******/ 				get: getter
 53 | /******/ 			});
 54 | /******/ 		}
 55 | /******/ 	};
 56 | /******/
 57 | /******/ 	// getDefaultExport function for compatibility with non-harmony modules
 58 | /******/ 	__webpack_require__.n = function(module) {
 59 | /******/ 		var getter = module && module.__esModule ?
 60 | /******/ 			function getDefault() { return module['default']; } :
 61 | /******/ 			function getModuleExports() { return module; };
 62 | /******/ 		__webpack_require__.d(getter, 'a', getter);
 63 | /******/ 		return getter;
 64 | /******/ 	};
 65 | /******/
 66 | /******/ 	// Object.prototype.hasOwnProperty.call
 67 | /******/ 	__webpack_require__.o = function(object, property) { return Object.prototype.hasOwnProperty.call(object, property); };
 68 | /******/
 69 | /******/ 	// __webpack_public_path__
 70 | /******/ 	__webpack_require__.p = "";
 71 | /******/
 72 | /******/ 	// Load entry module and return exports
 73 | /******/ 	return __webpack_require__(__webpack_require__.s = 0);
 74 | /******/ })
 75 | /************************************************************************/
 76 | /******/ ([
 77 | /* 0 */
 78 | /***/ (function(module, __webpack_exports__, __webpack_require__) {
 79 | 
 80 | "use strict";
 81 | Object.defineProperty(__webpack_exports__, "__esModule", { value: true });
 82 | /* harmony export (binding) */ __webpack_require__.d(__webpack_exports__, "isString", function() { return isString; });
 83 | /* harmony import */ var __WEBPACK_IMPORTED_MODULE_0__events__ = __webpack_require__(1);
 84 | var _extends = Object.assign || function (target) { for (var i = 1; i < arguments.length; i++) { var source = arguments[i]; for (var key in source) { if (Object.prototype.hasOwnProperty.call(source, key)) { target[key] = source[key]; } } } return target; };
 85 | 
 86 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
 87 | 
 88 | var _typeof = typeof Symbol === "function" && typeof Symbol.iterator === "symbol" ? function (obj) { return typeof obj; } : function (obj) { return obj && typeof Symbol === "function" && obj.constructor === Symbol && obj !== Symbol.prototype ? "symbol" : typeof obj; };
 89 | 
 90 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
 91 | 
 92 | function _possibleConstructorReturn(self, call) { if (!self) { throw new ReferenceError("this hasn't been initialised - super() hasn't been called"); } return call && (typeof call === "object" || typeof call === "function") ? call : self; }
 93 | 
 94 | function _inherits(subClass, superClass) { if (typeof superClass !== "function" && superClass !== null) { throw new TypeError("Super expression must either be null or a function, not " + typeof superClass); } subClass.prototype = Object.create(superClass && superClass.prototype, { constructor: { value: subClass, enumerable: false, writable: true, configurable: true } }); if (superClass) Object.setPrototypeOf ? Object.setPrototypeOf(subClass, superClass) : subClass.__proto__ = superClass; }
 95 | 
 96 | 
 97 | 
 98 | var isString = function isString(unknown) {
 99 |   return typeof unknown === 'string' || !!unknown && (typeof unknown === 'undefined' ? 'undefined' : _typeof(unknown)) === 'object' && Object.prototype.toString.call(unknown) === '[object String]';
100 | };
101 | 
102 | var bulmaSlider = function (_EventEmitter) {
103 |   _inherits(bulmaSlider, _EventEmitter);
104 | 
105 |   function bulmaSlider(selector) {
106 |     var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
107 | 
108 |     _classCallCheck(this, bulmaSlider);
109 | 
110 |     var _this = _possibleConstructorReturn(this, (bulmaSlider.__proto__ || Object.getPrototypeOf(bulmaSlider)).call(this));
111 | 
112 |     _this.element = typeof selector === 'string' ? document.querySelector(selector) : selector;
113 |     // An invalid selector or non-DOM node has been provided.
114 |     if (!_this.element) {
115 |       throw new Error('An invalid selector or non-DOM node has been provided.');
116 |     }
117 | 
118 |     _this._clickEvents = ['click'];
119 |     /// Set default options and merge with instance defined
120 |     _this.options = _extends({}, options);
121 | 
122 |     _this.onSliderInput = _this.onSliderInput.bind(_this);
123 | 
124 |     _this.init();
125 |     return _this;
126 |   }
127 | 
128 |   /**
129 |    * Initiate all DOM element containing selector
130 |    * @method
131 |    * @return {Array} Array of all slider instances
132 |    */
133 | 
134 | 
135 |   _createClass(bulmaSlider, [{
136 |     key: 'init',
137 | 
138 | 
139 |     /**
140 |      * Initiate plugin
141 |      * @method init
142 |      * @return {void}
143 |      */
144 |     value: function init() {
145 |       this._id = 'bulmaSlider' + new Date().getTime() + Math.floor(Math.random() * Math.floor(9999));
146 |       this.output = this._findOutputForSlider();
147 | 
148 |       this._bindEvents();
149 | 
150 |       if (this.output) {
151 |         if (this.element.classList.contains('has-output-tooltip')) {
152 |           // Get new output position
153 |           var newPosition = this._getSliderOutputPosition();
154 | 
155 |           // Set output position
156 |           this.output.style['left'] = newPosition.position;
157 |         }
158 |       }
159 | 
160 |       this.emit('bulmaslider:ready', this.element.value);
161 |     }
162 |   }, {
163 |     key: '_findOutputForSlider',
164 |     value: function _findOutputForSlider() {
165 |       var _this2 = this;
166 | 
167 |       var result = null;
168 |       var outputs = document.getElementsByTagName('output') || [];
169 | 
170 |       Array.from(outputs).forEach(function (output) {
171 |         if (output.htmlFor == _this2.element.getAttribute('id')) {
172 |           result = output;
173 |           return true;
174 |         }
175 |       });
176 |       return result;
177 |     }
178 |   }, {
179 |     key: '_getSliderOutputPosition',
180 |     value: function _getSliderOutputPosition() {
181 |       // Update output position
182 |       var newPlace, minValue;
183 | 
184 |       var style = window.getComputedStyle(this.element, null);
185 |       // Measure width of range input
186 |       var sliderWidth = parseInt(style.getPropertyValue('width'), 10);
187 | 
188 |       // Figure out placement percentage between left and right of input
189 |       if (!this.element.getAttribute('min')) {
190 |         minValue = 0;
191 |       } else {
192 |         minValue = this.element.getAttribute('min');
193 |       }
194 |       var newPoint = (this.element.value - minValue) / (this.element.getAttribute('max') - minValue);
195 | 
196 |       // Prevent bubble from going beyond left or right (unsupported browsers)
197 |       if (newPoint < 0) {
198 |         newPlace = 0;
199 |       } else if (newPoint > 1) {
200 |         newPlace = sliderWidth;
201 |       } else {
202 |         newPlace = sliderWidth * newPoint;
203 |       }
204 | 
205 |       return {
206 |         'position': newPlace + 'px'
207 |       };
208 |     }
209 | 
210 |     /**
211 |      * Bind all events
212 |      * @method _bindEvents
213 |      * @return {void}
214 |      */
215 | 
216 |   }, {
217 |     key: '_bindEvents',
218 |     value: function _bindEvents() {
219 |       if (this.output) {
220 |         // Add event listener to update output when slider value change
221 |         this.element.addEventListener('input', this.onSliderInput, false);
222 |       }
223 |     }
224 |   }, {
225 |     key: 'onSliderInput',
226 |     value: function onSliderInput(e) {
227 |       e.preventDefault();
228 | 
229 |       if (this.element.classList.contains('has-output-tooltip')) {
230 |         // Get new output position
231 |         var newPosition = this._getSliderOutputPosition();
232 | 
233 |         // Set output position
234 |         this.output.style['left'] = newPosition.position;
235 |       }
236 | 
237 |       // Check for prefix and postfix
238 |       var prefix = this.output.hasAttribute('data-prefix') ? this.output.getAttribute('data-prefix') : '';
239 |       var postfix = this.output.hasAttribute('data-postfix') ? this.output.getAttribute('data-postfix') : '';
240 | 
241 |       // Update output with slider value
242 |       this.output.value = prefix + this.element.value + postfix;
243 | 
244 |       this.emit('bulmaslider:ready', this.element.value);
245 |     }
246 |   }], [{
247 |     key: 'attach',
248 |     value: function attach() {
249 |       var _this3 = this;
250 | 
251 |       var selector = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : 'input[type="range"].slider';
252 |       var options = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : {};
253 | 
254 |       var instances = new Array();
255 | 
256 |       var elements = isString(selector) ? document.querySelectorAll(selector) : Array.isArray(selector) ? selector : [selector];
257 |       elements.forEach(function (element) {
258 |         if (typeof element[_this3.constructor.name] === 'undefined') {
259 |           var instance = new bulmaSlider(element, options);
260 |           element[_this3.constructor.name] = instance;
261 |           instances.push(instance);
262 |         } else {
263 |           instances.push(element[_this3.constructor.name]);
264 |         }
265 |       });
266 | 
267 |       return instances;
268 |     }
269 |   }]);
270 | 
271 |   return bulmaSlider;
272 | }(__WEBPACK_IMPORTED_MODULE_0__events__["a" /* default */]);
273 | 
274 | /* harmony default export */ __webpack_exports__["default"] = (bulmaSlider);
275 | 
276 | /***/ }),
277 | /* 1 */
278 | /***/ (function(module, __webpack_exports__, __webpack_require__) {
279 | 
280 | "use strict";
281 | var _createClass = function () { function defineProperties(target, props) { for (var i = 0; i < props.length; i++) { var descriptor = props[i]; descriptor.enumerable = descriptor.enumerable || false; descriptor.configurable = true; if ("value" in descriptor) descriptor.writable = true; Object.defineProperty(target, descriptor.key, descriptor); } } return function (Constructor, protoProps, staticProps) { if (protoProps) defineProperties(Constructor.prototype, protoProps); if (staticProps) defineProperties(Constructor, staticProps); return Constructor; }; }();
282 | 
283 | function _classCallCheck(instance, Constructor) { if (!(instance instanceof Constructor)) { throw new TypeError("Cannot call a class as a function"); } }
284 | 
285 | var EventEmitter = function () {
286 |   function EventEmitter() {
287 |     var listeners = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : [];
288 | 
289 |     _classCallCheck(this, EventEmitter);
290 | 
291 |     this._listeners = new Map(listeners);
292 |     this._middlewares = new Map();
293 |   }
294 | 
295 |   _createClass(EventEmitter, [{
296 |     key: "listenerCount",
297 |     value: function listenerCount(eventName) {
298 |       if (!this._listeners.has(eventName)) {
299 |         return 0;
300 |       }
301 | 
302 |       var eventListeners = this._listeners.get(eventName);
303 |       return eventListeners.length;
304 |     }
305 |   }, {
306 |     key: "removeListeners",
307 |     value: function removeListeners() {
308 |       var _this = this;
309 | 
310 |       var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
311 |       var middleware = arguments.length > 1 && arguments[1] !== undefined ? arguments[1] : false;
312 | 
313 |       if (eventName !== null) {
314 |         if (Array.isArray(eventName)) {
315 |           name.forEach(function (e) {
316 |             return _this.removeListeners(e, middleware);
317 |           });
318 |         } else {
319 |           this._listeners.delete(eventName);
320 | 
321 |           if (middleware) {
322 |             this.removeMiddleware(eventName);
323 |           }
324 |         }
325 |       } else {
326 |         this._listeners = new Map();
327 |       }
328 |     }
329 |   }, {
330 |     key: "middleware",
331 |     value: function middleware(eventName, fn) {
332 |       var _this2 = this;
333 | 
334 |       if (Array.isArray(eventName)) {
335 |         name.forEach(function (e) {
336 |           return _this2.middleware(e, fn);
337 |         });
338 |       } else {
339 |         if (!Array.isArray(this._middlewares.get(eventName))) {
340 |           this._middlewares.set(eventName, []);
341 |         }
342 | 
343 |         this._middlewares.get(eventName).push(fn);
344 |       }
345 |     }
346 |   }, {
347 |     key: "removeMiddleware",
348 |     value: function removeMiddleware() {
349 |       var _this3 = this;
350 | 
351 |       var eventName = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
352 | 
353 |       if (eventName !== null) {
354 |         if (Array.isArray(eventName)) {
355 |           name.forEach(function (e) {
356 |             return _this3.removeMiddleware(e);
357 |           });
358 |         } else {
359 |           this._middlewares.delete(eventName);
360 |         }
361 |       } else {
362 |         this._middlewares = new Map();
363 |       }
364 |     }
365 |   }, {
366 |     key: "on",
367 |     value: function on(name, callback) {
368 |       var _this4 = this;
369 | 
370 |       var once = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
371 | 
372 |       if (Array.isArray(name)) {
373 |         name.forEach(function (e) {
374 |           return _this4.on(e, callback);
375 |         });
376 |       } else {
377 |         name = name.toString();
378 |         var split = name.split(/,|, | /);
379 | 
380 |         if (split.length > 1) {
381 |           split.forEach(function (e) {
382 |             return _this4.on(e, callback);
383 |           });
384 |         } else {
385 |           if (!Array.isArray(this._listeners.get(name))) {
386 |             this._listeners.set(name, []);
387 |           }
388 | 
389 |           this._listeners.get(name).push({ once: once, callback: callback });
390 |         }
391 |       }
392 |     }
393 |   }, {
394 |     key: "once",
395 |     value: function once(name, callback) {
396 |       this.on(name, callback, true);
397 |     }
398 |   }, {
399 |     key: "emit",
400 |     value: function emit(name, data) {
401 |       var _this5 = this;
402 | 
403 |       var silent = arguments.length > 2 && arguments[2] !== undefined ? arguments[2] : false;
404 | 
405 |       name = name.toString();
406 |       var listeners = this._listeners.get(name);
407 |       var middlewares = null;
408 |       var doneCount = 0;
409 |       var execute = silent;
410 | 
411 |       if (Array.isArray(listeners)) {
412 |         listeners.forEach(function (listener, index) {
413 |           // Start Middleware checks unless we're doing a silent emit
414 |           if (!silent) {
415 |             middlewares = _this5._middlewares.get(name);
416 |             // Check and execute Middleware
417 |             if (Array.isArray(middlewares)) {
418 |               middlewares.forEach(function (middleware) {
419 |                 middleware(data, function () {
420 |                   var newData = arguments.length > 0 && arguments[0] !== undefined ? arguments[0] : null;
421 | 
422 |                   if (newData !== null) {
423 |                     data = newData;
424 |                   }
425 |                   doneCount++;
426 |                 }, name);
427 |               });
428 | 
429 |               if (doneCount >= middlewares.length) {
430 |                 execute = true;
431 |               }
432 |             } else {
433 |               execute = true;
434 |             }
435 |           }
436 | 
437 |           // If Middleware checks have been passed, execute
438 |           if (execute) {
439 |             if (listener.once) {
440 |               listeners[index] = null;
441 |             }
442 |             listener.callback(data);
443 |           }
444 |         });
445 | 
446 |         // Dirty way of removing used Events
447 |         while (listeners.indexOf(null) !== -1) {
448 |           listeners.splice(listeners.indexOf(null), 1);
449 |         }
450 |       }
451 |     }
452 |   }]);
453 | 
454 |   return EventEmitter;
455 | }();
456 | 
457 | /* harmony default export */ __webpack_exports__["a"] = (EventEmitter);
458 | 
459 | /***/ })
460 | /******/ ])["default"];
461 | });


--------------------------------------------------------------------------------
/docs/static/js/bulma-slider.min.js:
--------------------------------------------------------------------------------
1 | !function(t,e){"object"==typeof exports&&"object"==typeof module?module.exports=e():"function"==typeof define&&define.amd?define([],e):"object"==typeof exports?exports.bulmaSlider=e():t.bulmaSlider=e()}("undefined"!=typeof self?self:this,function(){return function(n){var r={};function i(t){if(r[t])return r[t].exports;var e=r[t]={i:t,l:!1,exports:{}};return n[t].call(e.exports,e,e.exports,i),e.l=!0,e.exports}return i.m=n,i.c=r,i.d=function(t,e,n){i.o(t,e)||Object.defineProperty(t,e,{configurable:!1,enumerable:!0,get:n})},i.n=function(t){var e=t&&t.__esModule?function(){return t.default}:function(){return t};return i.d(e,"a",e),e},i.o=function(t,e){return Object.prototype.hasOwnProperty.call(t,e)},i.p="",i(i.s=0)}([function(t,e,n){"use strict";Object.defineProperty(e,"__esModule",{value:!0}),n.d(e,"isString",function(){return l});var r=n(1),i=Object.assign||function(t){for(var e=1;e<arguments.length;e++){var n=arguments[e];for(var r in n)Object.prototype.hasOwnProperty.call(n,r)&&(t[r]=n[r])}return t},u=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}(),o="function"==typeof Symbol&&"symbol"==typeof Symbol.iterator?function(t){return typeof t}:function(t){return t&&"function"==typeof Symbol&&t.constructor===Symbol&&t!==Symbol.prototype?"symbol":typeof t};var l=function(t){return"string"==typeof t||!!t&&"object"===(void 0===t?"undefined":o(t))&&"[object String]"===Object.prototype.toString.call(t)},a=function(t){function o(t){var e=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{};!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,o);var n=function(t,e){if(!t)throw new ReferenceError("this hasn't been initialised - super() hasn't been called");return!e||"object"!=typeof e&&"function"!=typeof e?t:e}(this,(o.__proto__||Object.getPrototypeOf(o)).call(this));if(n.element="string"==typeof t?document.querySelector(t):t,!n.element)throw new Error("An invalid selector or non-DOM node has been provided.");return n._clickEvents=["click"],n.options=i({},e),n.onSliderInput=n.onSliderInput.bind(n),n.init(),n}return function(t,e){if("function"!=typeof e&&null!==e)throw new TypeError("Super expression must either be null or a function, not "+typeof e);t.prototype=Object.create(e&&e.prototype,{constructor:{value:t,enumerable:!1,writable:!0,configurable:!0}}),e&&(Object.setPrototypeOf?Object.setPrototypeOf(t,e):t.__proto__=e)}(o,r["a"]),u(o,[{key:"init",value:function(){if(this._id="bulmaSlider"+(new Date).getTime()+Math.floor(Math.random()*Math.floor(9999)),this.output=this._findOutputForSlider(),this._bindEvents(),this.output&&this.element.classList.contains("has-output-tooltip")){var t=this._getSliderOutputPosition();this.output.style.left=t.position}this.emit("bulmaslider:ready",this.element.value)}},{key:"_findOutputForSlider",value:function(){var e=this,n=null,t=document.getElementsByTagName("output")||[];return Array.from(t).forEach(function(t){if(t.htmlFor==e.element.getAttribute("id"))return n=t,!0}),n}},{key:"_getSliderOutputPosition",value:function(){var t,e=window.getComputedStyle(this.element,null),n=parseInt(e.getPropertyValue("width"),10);t=this.element.getAttribute("min")?this.element.getAttribute("min"):0;var r=(this.element.value-t)/(this.element.getAttribute("max")-t);return{position:(r<0?0:1<r?n:n*r)+"px"}}},{key:"_bindEvents",value:function(){this.output&&this.element.addEventListener("input",this.onSliderInput,!1)}},{key:"onSliderInput",value:function(t){if(t.preventDefault(),this.element.classList.contains("has-output-tooltip")){var e=this._getSliderOutputPosition();this.output.style.left=e.position}var n=this.output.hasAttribute("data-prefix")?this.output.getAttribute("data-prefix"):"",r=this.output.hasAttribute("data-postfix")?this.output.getAttribute("data-postfix"):"";this.output.value=n+this.element.value+r,this.emit("bulmaslider:ready",this.element.value)}}],[{key:"attach",value:function(){var n=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:'input[type="range"].slider',r=1<arguments.length&&void 0!==arguments[1]?arguments[1]:{},i=new Array;return(l(t)?document.querySelectorAll(t):Array.isArray(t)?t:[t]).forEach(function(t){if(void 0===t[n.constructor.name]){var e=new o(t,r);t[n.constructor.name]=e,i.push(e)}else i.push(t[n.constructor.name])}),i}}]),o}();e.default=a},function(t,e,n){"use strict";var r=function(){function r(t,e){for(var n=0;n<e.length;n++){var r=e[n];r.enumerable=r.enumerable||!1,r.configurable=!0,"value"in r&&(r.writable=!0),Object.defineProperty(t,r.key,r)}}return function(t,e,n){return e&&r(t.prototype,e),n&&r(t,n),t}}();var i=function(){function e(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:[];!function(t,e){if(!(t instanceof e))throw new TypeError("Cannot call a class as a function")}(this,e),this._listeners=new Map(t),this._middlewares=new Map}return r(e,[{key:"listenerCount",value:function(t){return this._listeners.has(t)?this._listeners.get(t).length:0}},{key:"removeListeners",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null,n=1<arguments.length&&void 0!==arguments[1]&&arguments[1];null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeListeners(t,n)}):(this._listeners.delete(t),n&&this.removeMiddleware(t)):this._listeners=new Map}},{key:"middleware",value:function(t,e){var n=this;Array.isArray(t)?name.forEach(function(t){return n.middleware(t,e)}):(Array.isArray(this._middlewares.get(t))||this._middlewares.set(t,[]),this._middlewares.get(t).push(e))}},{key:"removeMiddleware",value:function(){var e=this,t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t?Array.isArray(t)?name.forEach(function(t){return e.removeMiddleware(t)}):this._middlewares.delete(t):this._middlewares=new Map}},{key:"on",value:function(t,e){var n=this,r=2<arguments.length&&void 0!==arguments[2]&&arguments[2];if(Array.isArray(t))t.forEach(function(t){return n.on(t,e)});else{var i=(t=t.toString()).split(/,|, | /);1<i.length?i.forEach(function(t){return n.on(t,e)}):(Array.isArray(this._listeners.get(t))||this._listeners.set(t,[]),this._listeners.get(t).push({once:r,callback:e}))}}},{key:"once",value:function(t,e){this.on(t,e,!0)}},{key:"emit",value:function(n,r){var i=this,o=2<arguments.length&&void 0!==arguments[2]&&arguments[2];n=n.toString();var u=this._listeners.get(n),l=null,a=0,s=o;if(Array.isArray(u))for(u.forEach(function(t,e){o||(l=i._middlewares.get(n),Array.isArray(l)?(l.forEach(function(t){t(r,function(){var t=0<arguments.length&&void 0!==arguments[0]?arguments[0]:null;null!==t&&(r=t),a++},n)}),a>=l.length&&(s=!0)):s=!0),s&&(t.once&&(u[e]=null),t.callback(r))});-1!==u.indexOf(null);)u.splice(u.indexOf(null),1)}}]),e}();e.a=i}]).default});


--------------------------------------------------------------------------------
/docs/static/js/choose_image.js:
--------------------------------------------------------------------------------
 1 | 
 2 | const thumbnails = document.querySelectorAll('.image-list img');
 3 | const selectedImage = document.querySelector('.selected-image img');
 4 | const NewViewImages = document.querySelectorAll('.new-view-container img');
 5 | 
 6 | const NewViewSets = [
 7 |     [
 8 |         'static/images/i23d/1-1.png', 'static/images/i23d/1-2.png', 'static/images/i23d/1-3.png',
 9 |         'static/images/i23d/1-4.png', 'static/images/i23d/1-5.png', 'static/images/i23d/1-6.png',
10 |     ],
11 |     [
12 |         'static/images/i23d/2-1.png', 'static/images/i23d/2-2.png', 'static/images/i23d/2-3.png',
13 |         'static/images/i23d/2-4.png', 'static/images/i23d/2-5.png', 'static/images/i23d/2-6.png',
14 |     ],
15 |     [
16 |         'static/images/i23d/3-1.png', 'static/images/i23d/3-2.png', 'static/images/i23d/3-3.png',
17 |         'static/images/i23d/3-4.png', 'static/images/i23d/3-5.png', 'static/images/i23d/3-6.png',
18 |     ],
19 |     [
20 |         'static/images/i23d/4-1.png', 'static/images/i23d/4-2.png', 'static/images/i23d/4-3.png',
21 |         'static/images/i23d/4-4.png', 'static/images/i23d/4-5.png', 'static/images/i23d/4-6.png',
22 |     ]
23 | ];
24 | 
25 | 
26 | thumbnails.forEach(thumbnail => {
27 |     thumbnail.addEventListener('click', () => {
28 |         const largeSrc = thumbnail.getAttribute('data-large');        
29 |         
30 |         selectedImage.src = largeSrc;
31 | 
32 |         thumbnails.forEach(img => img.classList.remove('selected'));
33 | 
34 |         thumbnail.classList.add('selected');
35 | 
36 |         if (largeSrc=="static/images/i23d/1.png"){
37 |             NewViewImages.forEach((img, index)=>{
38 |                 img.src = NewViewSets[0][index]
39 |             });
40 |         } else if (largeSrc=="static/images/i23d/2.png"){
41 |             NewViewImages.forEach((img, index)=>{
42 |                 img.src = NewViewSets[1][index]
43 |             });
44 |         } else if (largeSrc=="static/images/i23d/3.png"){
45 |             const new_view_images = NewViewSets[2];
46 |             NewViewImages.forEach((img, index)=>{
47 |                 img.src = new_view_images[index]
48 |             });
49 |         } else if (largeSrc=="static/images/i23d/4.png"){
50 |             const new_view_images = NewViewSets[3];
51 |             NewViewImages.forEach((img, index)=>{
52 |                 img.src = new_view_images[index]
53 |             });
54 |         }
55 | 
56 |     });
57 | });
58 | 


--------------------------------------------------------------------------------
/docs/static/js/image2gif.js:
--------------------------------------------------------------------------------
 1 | document.querySelectorAll('.gif-hover-image').forEach(img => {
 2 |     const originalSrc = img.src; // 保存原始静态图片路径
 3 |     const gifSrc = img.dataset.gif; // 获取 data-gif 的路径
 4 |   
 5 |     // 鼠标悬浮事件
 6 |     img.addEventListener('mouseover', () => {
 7 |       img.src = gifSrc; // 切换为对应 GIF
 8 |     });
 9 |   
10 |     // 鼠标移开事件
11 |     img.addEventListener('mouseout', () => {
12 |       img.src = originalSrc; // 恢复为静态图片
13 |     });
14 |   });
15 |   


--------------------------------------------------------------------------------
/docs/static/js/index.js:
--------------------------------------------------------------------------------
 1 | window.HELP_IMPROVE_VIDEOJS = false;
 2 | 
 3 | var INTERP_BASE = "./static/interpolation/stacked";
 4 | var NUM_INTERP_FRAMES = 240;
 5 | 
 6 | var interp_images = [];
 7 | function preloadInterpolationImages() {
 8 |   for (var i = 0; i < NUM_INTERP_FRAMES; i++) {
 9 |     var path = INTERP_BASE + '/' + String(i).padStart(6, '0') + '.jpg';
10 |     interp_images[i] = new Image();
11 |     interp_images[i].src = path;
12 |   }
13 | }
14 | 
15 | function setInterpolationImage(i) {
16 |   var image = interp_images[i];
17 |   image.ondragstart = function() { return false; };
18 |   image.oncontextmenu = function() { return false; };
19 |   $('#interpolation-image-wrapper').empty().append(image);
20 | }
21 | 
22 | 
23 | $(document).ready(function() {
24 |     // Check for click events on the navbar burger icon
25 |     $(".navbar-burger").click(function() {
26 |       // Toggle the "is-active" class on both the "navbar-burger" and the "navbar-menu"
27 |       $(".navbar-burger").toggleClass("is-active");
28 |       $(".navbar-menu").toggleClass("is-active");
29 | 
30 |     });
31 | 
32 |     var options = {
33 | 			slidesToScroll: 1,
34 | 			slidesToShow: 3,
35 | 			loop: true,
36 | 			infinite: true,
37 | 			autoplay: false,
38 | 			autoplaySpeed: 3000,
39 |     }
40 | 
41 | 		// Initialize all div with carousel class
42 |     var carousels = bulmaCarousel.attach('.carousel', options);
43 | 
44 |     // Loop on each carousel initialized
45 |     for(var i = 0; i < carousels.length; i++) {
46 |     	// Add listener to  event
47 |     	carousels[i].on('before:show', state => {
48 |     		console.log(state);
49 |     	});
50 |     }
51 | 
52 |     // Access to bulmaCarousel instance of an element
53 |     var element = document.querySelector('#my-element');
54 |     if (element && element.bulmaCarousel) {
55 |     	// bulmaCarousel instance is available as element.bulmaCarousel
56 |     	element.bulmaCarousel.on('before-show', function(state) {
57 |     		console.log(state);
58 |     	});
59 |     }
60 | 
61 |     /*var player = document.getElementById('interpolation-video');
62 |     player.addEventListener('loadedmetadata', function() {
63 |       $('#interpolation-slider').on('input', function(event) {
64 |         console.log(this.value, player.duration);
65 |         player.currentTime = player.duration / 100 * this.value;
66 |       })
67 |     }, false);*/
68 |     preloadInterpolationImages();
69 | 
70 |     $('#interpolation-slider').on('input', function(event) {
71 |       setInterpolationImage(this.value);
72 |     });
73 |     setInterpolationImage(0);
74 |     $('#interpolation-slider').prop('max', NUM_INTERP_FRAMES - 1);
75 | 
76 |     bulmaSlider.attach();
77 | 
78 | })
79 | 


--------------------------------------------------------------------------------
/docs/static/js/scroll.js:
--------------------------------------------------------------------------------
 1 | const scrollContainer = document.querySelector('.scroll-container');
 2 | let isUserInteracting = false;
 3 | let autoScrollInterval;
 4 | let scrollSpeed = 0.8;
 5 | 
 6 | function autoScroll() {
 7 |   if (!isUserInteracting) {
 8 |     scrollContainer.scrollLeft += scrollSpeed; 
 9 |     if (scrollContainer.scrollLeft >= scrollContainer.scrollWidth - scrollContainer.offsetWidth) {
10 |       scrollContainer.scrollLeft = 0;
11 |     }
12 |   }
13 | }
14 | 
15 | scrollContainer.addEventListener('mousedown', () => {
16 |   isUserInteracting = true;
17 | });
18 | 
19 | scrollContainer.addEventListener('mouseup', () => {
20 |   isUserInteracting = false;
21 | });
22 | 
23 | function startAutoScroll() {
24 |   autoScrollInterval = setInterval(autoScroll, 20); 
25 | }
26 | 
27 | 
28 | function stopAutoScroll() {
29 |   clearInterval(autoScrollInterval);
30 | }
31 | 
32 | 
33 | scrollContainer.addEventListener('mouseover', stopAutoScroll);
34 | 
35 | scrollContainer.addEventListener('mouseout', startAutoScroll);
36 | 
37 | 
38 | // 初始化
39 | startAutoScroll();


--------------------------------------------------------------------------------
/docs/static/js/video_comparison.js:
--------------------------------------------------------------------------------
  1 | // This is based on: http://thenewcode.com/364/Interactive-Before-and-After-Video-Comparison-in-HTML5-Canvas
  2 | // With additional modifications based on: https://jsfiddle.net/7sk5k4gp/13/
  3 | 
  4 | function playVids(videoId) {
  5 |     var videoMerge = document.getElementById(videoId + "Merge");
  6 |     var vid = document.getElementById(videoId);
  7 |     
  8 |     var position = 0.5;
  9 |     var vidWidth = vid.videoWidth/2;
 10 |     var vidHeight = vid.videoHeight;
 11 | 
 12 |     var mergeContext = videoMerge.getContext("2d");
 13 | 
 14 |     
 15 |     if (vid.readyState > 3) {
 16 |         vid.play();
 17 | 
 18 |         function trackLocation(e) {
 19 |             // Normalize to [0, 1]
 20 |             bcr = videoMerge.getBoundingClientRect();
 21 |             position = ((e.pageX - bcr.x) / bcr.width);
 22 |         }
 23 |         function trackLocationTouch(e) {
 24 |             // Normalize to [0, 1]
 25 |             bcr = videoMerge.getBoundingClientRect();
 26 |             position = ((e.touches[0].pageX - bcr.x) / bcr.width);
 27 |         }
 28 | 
 29 |         videoMerge.addEventListener("mousemove",  trackLocation, false); 
 30 |         videoMerge.addEventListener("touchstart", trackLocationTouch, false);
 31 |         videoMerge.addEventListener("touchmove",  trackLocationTouch, false);
 32 | 
 33 | 
 34 |         function drawLoop() {
 35 |             mergeContext.drawImage(vid, 0, 0, vidWidth, vidHeight, 0, 0, vidWidth, vidHeight);
 36 |             var colStart = (vidWidth * position).clamp(0.0, vidWidth);
 37 |             var colWidth = (vidWidth - (vidWidth * position)).clamp(0.0, vidWidth);
 38 |             mergeContext.drawImage(vid, colStart+vidWidth, 0, colWidth, vidHeight, colStart, 0, colWidth, vidHeight);
 39 |             requestAnimationFrame(drawLoop);
 40 | 
 41 |             
 42 |             var arrowLength = 0.07 * vidHeight;
 43 |             var arrowheadWidth = 0.020 * vidHeight;
 44 |             var arrowheadLength = 0.04 * vidHeight;
 45 |             var arrowPosY = vidHeight / 10;
 46 |             var arrowWidth = 0.007 * vidHeight;
 47 |             var currX = vidWidth * position;
 48 | 
 49 |             // Draw circle
 50 |             mergeContext.arc(currX, arrowPosY, arrowLength*0.7, 0, Math.PI * 2, false);
 51 |             mergeContext.fillStyle = "#FFD79340";
 52 |             mergeContext.fill()
 53 |             //mergeContext.strokeStyle = "#444444";
 54 |             //mergeContext.stroke()
 55 |             
 56 |             // Draw border
 57 |             mergeContext.beginPath();
 58 |             mergeContext.moveTo(vidWidth*position, 0);
 59 |             mergeContext.lineTo(vidWidth*position, vidHeight);
 60 |             mergeContext.closePath()
 61 |             mergeContext.strokeStyle = "#444444";
 62 |             mergeContext.lineWidth = 3;            
 63 |             mergeContext.stroke();
 64 | 
 65 |             // Draw arrow
 66 |             mergeContext.beginPath();
 67 |             mergeContext.moveTo(currX, arrowPosY - arrowWidth/2);
 68 |             
 69 |             // Move right until meeting arrow head
 70 |             mergeContext.lineTo(currX + arrowLength/2 - arrowheadLength/2, arrowPosY - arrowWidth/2);
 71 |             
 72 |             // Draw right arrow head
 73 |             mergeContext.lineTo(currX + arrowLength/2 - arrowheadLength/2, arrowPosY - arrowheadWidth/2);
 74 |             mergeContext.lineTo(currX + arrowLength/2, arrowPosY);
 75 |             mergeContext.lineTo(currX + arrowLength/2 - arrowheadLength/2, arrowPosY + arrowheadWidth/2);
 76 |             mergeContext.lineTo(currX + arrowLength/2 - arrowheadLength/2, arrowPosY + arrowWidth/2);
 77 | 
 78 |             // Go back to the left until meeting left arrow head
 79 |             mergeContext.lineTo(currX - arrowLength/2 + arrowheadLength/2, arrowPosY + arrowWidth/2);
 80 |             
 81 |             // Draw left arrow head
 82 |             mergeContext.lineTo(currX - arrowLength/2 + arrowheadLength/2, arrowPosY + arrowheadWidth/2);
 83 |             mergeContext.lineTo(currX - arrowLength/2, arrowPosY);
 84 |             mergeContext.lineTo(currX - arrowLength/2 + arrowheadLength/2, arrowPosY  - arrowheadWidth/2);
 85 |             mergeContext.lineTo(currX - arrowLength/2 + arrowheadLength/2, arrowPosY);
 86 |             
 87 |             mergeContext.lineTo(currX - arrowLength/2 + arrowheadLength/2, arrowPosY - arrowWidth/2);
 88 |             mergeContext.lineTo(currX, arrowPosY - arrowWidth/2);
 89 | 
 90 |             mergeContext.closePath();
 91 | 
 92 |             mergeContext.fillStyle = "#444444";
 93 |             mergeContext.fill();
 94 | 
 95 |             
 96 |             
 97 |         }
 98 |         requestAnimationFrame(drawLoop);
 99 |     } 
100 | }
101 | 
102 | Number.prototype.clamp = function(min, max) {
103 |   return Math.min(Math.max(this, min), max);
104 | };
105 |     
106 |     
107 | function resizeAndPlay(element)
108 | {
109 |   var cv = document.getElementById(element.id + "Merge");
110 |   cv.width = element.videoWidth/2;
111 |   cv.height = element.videoHeight;
112 |   element.play();
113 |   element.style.height = "0px";  // Hide video without stopping it
114 |     
115 |   playVids(element.id);
116 | }
117 | 


--------------------------------------------------------------------------------
/gradio_demos/lumos_I2I.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | import simple_parsing
  3 | import numpy as np
  4 | import torch
  5 | from torchvision.utils import make_grid
  6 | import random
  7 | import torch
  8 | from diffusers.models import AutoencoderKL
  9 | from lumos_diffusion import DPMS_INTER
 10 | from utils.download import find_model
 11 | import lumos_diffusion.model.dino.vision_transformer as vits
 12 | import torchvision.transforms as T
 13 | from lumos_diffusion.model.lumos import LumosI2I_XL_2
 14 | from utils import find_model
 15 | 
 16 | _TITLE = 'Lumos-I2I: Image Interpolation Generation'
 17 | MAX_SEED = 2147483647
 18 | def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
 19 |     if randomize_seed:
 20 |         seed = random.randint(0, MAX_SEED)
 21 |     return seed
 22 | 
 23 | def dividable(n):
 24 |     for i in range(int(np.sqrt(n)), 0, -1):
 25 |         if n % i == 0:
 26 |             break
 27 |     return i, n // i
 28 | 
 29 | 
 30 | def stop_run():
 31 |     return (
 32 |         gr.update(value="Run", variant="primary", visible=True),
 33 |         gr.update(visible=False),
 34 |     )
 35 | 
 36 | def generate(
 37 |     prompt_img1,
 38 |     prompt_img2,
 39 |     bsz,
 40 |     guidance_scale=4.5,
 41 |     num_inference_steps=20,
 42 |     seed=10,
 43 |     randomize_seed=True
 44 | ):
 45 |     seed = int(randomize_seed_fn(seed, randomize_seed))
 46 |     np.random.seed(seed)
 47 |     torch.random.manual_seed(seed)
 48 |     vae, dino, transform, model = models["vae"], models["vision_encoder"], models["transform"], models["diffusion"]
 49 |     prompt_img1 = transform(prompt_img1).unsqueeze(0)
 50 |     prompt_img2 = transform(prompt_img2).unsqueeze(0)
 51 |     prompt_imgs = torch.cat([prompt_img1, prompt_img2], dim=0)
 52 |     with torch.no_grad():
 53 |         caption_embs = dino(prompt_imgs.to(device))
 54 |         caption_embs = torch.nn.functional.normalize(caption_embs, dim=-1).unsqueeze(1).unsqueeze(1)
 55 |         caption_emb1 = caption_embs[0]
 56 |         caption_emb2 = caption_embs[-1]
 57 |         weights = np.arange(0, 1, 1/bsz).tolist()
 58 |         caption_embs = [caption_emb2 * wei + caption_emb1 * (1-wei) for wei in weights]
 59 |         caption_embs = torch.stack(caption_embs).to(device)
 60 |         bsz = caption_embs.shape[0]
 61 |         null_y = model.y_embedder.y_embedding[None].repeat(bsz, 1, 1)[:, None]
 62 |         z = torch.randn(1, 4, 32, 32, device=device).repeat(bsz, 1, 1, 1)
 63 |         model_kwargs = dict(mask=None)
 64 |         dpm_solver = DPMS_INTER(model.forward_with_dpmsolver,
 65 |                             condition=caption_embs,
 66 |                             uncondition=null_y,
 67 |                             cfg_scale=guidance_scale,
 68 |                             model_kwargs=model_kwargs)
 69 |         output = dpm_solver.sample(
 70 |                 z,
 71 |                 steps=num_inference_steps,
 72 |                 order=2,
 73 |                 skip_type="time_uniform",
 74 |                 method="multistep")
 75 |         output = vae.decode(output / 0.18215).sample
 76 |         output = torch.clamp(output * 0.5 + 0.5, min=0, max=1).cpu()
 77 |         output = (
 78 |             make_grid(output, nrow=output.shape[0] // 3, padding=3, pad_value=1).permute(1, 2, 0).numpy() * 255
 79 |         ).astype(np.uint8)
 80 |         step = num_inference_steps
 81 |         yield output, seed, gr.update(
 82 |             value="Run",
 83 |             variant="primary",
 84 |             visible=(step == num_inference_steps),
 85 |         ), gr.update(
 86 |             value="Stop", variant="stop", visible=(step != num_inference_steps)
 87 |         )
 88 | 
 89 | 
 90 | def demo(args):
 91 |     css = """
 92 |     #col-container {
 93 |         margin: 0 auto;
 94 |         max-width: 640px;
 95 |     }
 96 |     """
 97 |     demo = gr.Blocks(css=css)
 98 |     with demo:
 99 |         with gr.Column(elem_id="col-container"):
100 |             gr.Markdown('# ' + _TITLE)
101 |             gr.Markdown("You can get various visual effects by adjusting the hyper-parameters in Advanced settings.")
102 |             pid = gr.State()
103 |             with gr.Row(equal_height=True):
104 |                 prompt_image1 = gr.Image(type="pil", label="Input Image 1")
105 |                 prompt_image2 = gr.Image(type="pil", label="Input Image 2")
106 |             with gr.Row(equal_height=True):
107 |                 num_generation = gr.Slider(
108 |                     value=12,
109 |                     minimum=1,
110 |                     maximum=100,
111 |                     step=2,
112 |                     label="Generation Num",
113 |                 )
114 |                 run_btn = gr.Button(value="Run", variant="primary", scale=1)
115 |                 stop_btn = gr.Button(value="Stop", variant="stop", visible=False)
116 |             with gr.Row(equal_height=False):
117 |                 output_image = gr.Image(value=None, label="Output image")
118 |             with gr.Accordion(
119 |                 "Advanced settings", open=False, elem_id="config-accordion"
120 |             ):
121 |                 with gr.Row(equal_height=False):
122 |                     num_inference_steps = gr.Slider(
123 |                         value=20,
124 |                         minimum=1,
125 |                         maximum=2000,
126 |                         step=1,
127 |                         label="# of steps",
128 |                     )
129 |                     guidance_scale = gr.Slider(
130 |                     value=4.5,
131 |                     minimum=0.0,
132 |                     maximum=50,
133 |                     step=0.1,
134 |                     label="Guidance scale",
135 |                     )
136 |                 randomize_seed = gr.Checkbox(label="Randomize seed", value=False)
137 |                 seed = gr.Slider(
138 |                     value=137,
139 |                     minimum=0,
140 |                     maximum=MAX_SEED,
141 |                     step=1,
142 |                     label="Random seed",
143 |                 )
144 | 
145 |             run_event = run_btn.click(
146 |                 fn=generate,
147 |                 inputs=[
148 |                     prompt_image1,
149 |                     prompt_image2,
150 |                     num_generation,
151 |                     guidance_scale,
152 |                     num_inference_steps,
153 |                     seed,
154 |                     randomize_seed
155 |                 ],
156 |                 outputs=[
157 |                     output_image,
158 |                     seed,
159 |                     run_btn,
160 |                     stop_btn,
161 |                 ],
162 |             )
163 | 
164 |             stop_btn.click(
165 |                 fn=stop_run,
166 |                 outputs=[run_btn, stop_btn],
167 |                 cancels=[run_event],
168 |                 queue=False,
169 |             )
170 |             with gr.Row(equal_height=False):
171 |                 example_images_1 = ["asset/images/car/image_start.png", "asset/images/cat/image_start.JPG", "asset/images/folwer/image_start.png"]
172 |                 example_images_2 = ["asset/images/car/image_end.png", "asset/images/cat/image_end.JPG", "asset/images/folwer/image_end.png"]
173 |                 example = gr.Examples(
174 |                     examples=[[t[0].strip(), t[-1].strip()] for t in zip(example_images_1, example_images_2)],
175 |                     inputs=[prompt_image1, prompt_image2],
176 |                 )
177 | 
178 |         launch_args = {"server_port": int(args.port), "server_name": "0.0.0.0"}
179 |         demo.queue(default_concurrency_limit=1).launch(**launch_args)
180 | 
181 | 
182 | if __name__ == "__main__":
183 |     parser = simple_parsing.ArgumentParser(description="Lumos Image Interpolation Generation Demo")
184 |     parser.add_argument("--vae-pretrained", type=str, default="stabilityai/sd-vae-ft-mse")
185 |     parser.add_argument("--dino-type", type=str, default="vit_base")
186 |     parser.add_argument("--dino-pretrained", type=str, default="./checkpoints/dino_vitbase16_pretrain.pth")
187 |     parser.add_argument("--lumos-i2i-ckpt", type=str, default="./checkpoints/Lumos_I2I.pth")
188 |     parser.add_argument("--port", type=int, default=19231)
189 |     args = parser.parse_known_args()[0]
190 |     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
191 |     # setting models
192 |     models = dict()
193 |     ## autoencoder
194 |     weight_dtype = torch.float32
195 |     vae = AutoencoderKL.from_pretrained(args.vae_pretrained).cuda()
196 |     vae.eval()
197 |     vae.to(weight_dtype)
198 |     models["vae"] = vae
199 |     ## vision encoder 
200 |     dino = vits.__dict__[args.dino_type](patch_size=16, num_classes=0).cuda()
201 |     state_dict = torch.load(args.dino_pretrained, map_location="cpu")
202 |     # remove `module.` prefix
203 |     state_dict = {k.replace("module.", ""): v for k, v in state_dict.items()}
204 |     # remove `backbone.` prefix induced by multicrop wrapper
205 |     state_dict = {k.replace("backbone.", ""): v for k, v in state_dict.items()}
206 |     msg = dino.load_state_dict(state_dict, strict=False)
207 |     del state_dict
208 |     dino.eval()
209 |     models["vision_encoder"] = dino
210 |     ## transform for vision encoder
211 |     transform = [
212 |             T.Lambda(lambda img: img.convert('RGB')),
213 |             T.Resize(224),  # Image.BICUBIC
214 |             T.CenterCrop(224),
215 |             T.ToTensor(),
216 |             T.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
217 |         ]
218 | 
219 |     transform = T.Compose(transform)
220 |     models["transform"] = transform
221 |     ## diffusion model
222 |     model_kwargs={"window_block_indexes": [], "window_size": 0, 
223 |                     "use_rel_pos": False, "lewei_scale": 1.0, 
224 |                     "caption_channels": dino.embed_dim, 'model_max_length': 1}
225 |     # build models
226 |     image_size = 256
227 |     latent_size = int(image_size) // 8
228 |     model = LumosI2I_XL_2(input_size=latent_size, **model_kwargs).to(device)
229 |     state_dict = find_model(args.lumos_i2i_ckpt)
230 |     missing, unexpected = model.load_state_dict(state_dict, strict=False)
231 |     model.eval()
232 |     model.to(weight_dtype)
233 |     models["diffusion"] = model
234 |     
235 |     demo(args)
236 | 


--------------------------------------------------------------------------------
/gradio_demos/lumos_T2I.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | import simple_parsing
  3 | import numpy as np
  4 | import torch
  5 | from torchvision.utils import make_grid
  6 | import random
  7 | import torch
  8 | from diffusers.models import AutoencoderKL
  9 | from lumos_diffusion import DPMS
 10 | from utils.download import find_model
 11 | from lumos_diffusion.model.t5 import T5Embedder
 12 | from lumos_diffusion.model.lumos import LumosT2IMS_XL_2
 13 | from utils import find_model, get_closest_ratio, ASPECT_RATIO_1024_TEST
 14 | 
 15 | _TITLE = 'Lumos-T2I: Zero-shot Text to Image'
 16 | MAX_SEED = 2147483647
 17 | 
 18 | def randomize_seed_fn(seed: int, randomize_seed: bool) -> int:
 19 |     if randomize_seed:
 20 |         seed = random.randint(0, MAX_SEED)
 21 |     return seed
 22 | 
 23 | def dividable(n):
 24 |     for i in range(int(np.sqrt(n)), 0, -1):
 25 |         if n % i == 0:
 26 |             break
 27 |     return i, n // i
 28 | 
 29 | 
 30 | def stop_run():
 31 |     return (
 32 |         gr.update(value="Run", variant="primary", visible=True),
 33 |         gr.update(visible=False),
 34 |     )
 35 | 
 36 | def generate(
 37 |     height=1024,
 38 |     width=1024,
 39 |     prompt="a chair",
 40 |     guidance_scale=4.5,
 41 |     num_inference_steps=250,
 42 |     seed=10,
 43 |     randomize_seed=True
 44 | ):
 45 |     seed = int(randomize_seed_fn(seed, randomize_seed))
 46 |     np.random.seed(seed)
 47 |     torch.random.manual_seed(seed)
 48 |     bsz = 1
 49 |     vae, t5, model = models["vae"], models["language_encoder"], models["diffusion"]
 50 |     prompt = prompt.strip() if prompt.endswith('.') else prompt
 51 |     close_hw, close_ratio = get_closest_ratio(height, width, ratios=ASPECT_RATIO_1024_TEST)
 52 |     output_comment = f"Convert Height: {height}, Width: {width} to [{close_hw[0]}, {close_hw[1]}]."
 53 |     hw, ar = torch.tensor([close_hw], dtype=torch.float, device=device), torch.tensor([[float(close_ratio)]], device=device)
 54 |     latent_size_h, latent_size_w = int(hw[0, 0] // 8), int(hw[0, 1] // 8)
 55 |     prompts = [prompt] * bsz
 56 |     with torch.no_grad():
 57 |         caption_embs, emb_masks = t5.get_text_embeddings(prompts)
 58 |         caption_embs = caption_embs.float()[:, None]
 59 |         null_y = model.y_embedder.y_embedding[None].repeat(bsz, 1, 1)[:, None]
 60 |         z = torch.randn(bsz, 4, latent_size_h, latent_size_w, device=device)
 61 |         model_kwargs = dict(data_info={'img_hw': hw, 'aspect_ratio': ar}, mask=emb_masks)
 62 |         dpm_solver = DPMS(model.forward_with_dpmsolver,
 63 |                             condition=caption_embs,
 64 |                             uncondition=null_y,
 65 |                             cfg_scale=guidance_scale,
 66 |                             model_kwargs=model_kwargs)
 67 |         output = dpm_solver.sample(
 68 |                 z,
 69 |                 steps=num_inference_steps,
 70 |                 order=2,
 71 |                 skip_type="time_uniform",
 72 |                 method="multistep")
 73 |         output = vae.decode(output / 0.18215).sample
 74 |         output = torch.clamp(output * 0.5 + 0.5, min=0, max=1).cpu()
 75 |         output = (
 76 |             make_grid(output, nrow=dividable(bsz)[0]).permute(1, 2, 0).numpy() * 255
 77 |         ).astype(np.uint8)
 78 |         step = num_inference_steps
 79 |         yield output, seed, close_hw[0], close_hw[1], gr.update(
 80 |             value="Run",
 81 |             variant="primary",
 82 |             visible=(step == num_inference_steps),
 83 |         ), gr.update(
 84 |             value="Stop", variant="stop", visible=(step != num_inference_steps)
 85 |         )
 86 | 
 87 | 
 88 | def demo(args):
 89 |     css = """
 90 |     #col-container {
 91 |         margin: 0 auto;
 92 |         max-width: 640px;
 93 |     }
 94 |     """
 95 |     example_texts = open("asset/samples.txt").readlines()
 96 |     demo = gr.Blocks(css=css)
 97 |     with demo:
 98 |         with gr.Column(elem_id="col-container"):
 99 |             gr.Markdown('# ' + _TITLE)
100 |             pid = gr.State()
101 |             with gr.Row(equal_height=True):
102 |                 prompt_input = gr.Text(
103 |                     label="Prompt",
104 |                     show_label=False,
105 |                     max_lines=1,
106 |                     placeholder="Enter your prompt",
107 |                     container=False,
108 |                     scale=5
109 |                 )
110 |                 run_btn = gr.Button(value="Run", variant="primary", scale=1)
111 |                 stop_btn = gr.Button(value="Stop", variant="stop", visible=False)
112 |             with gr.Row(equal_height=False):
113 |                 output_image = gr.Image(value=None, label="Output image")
114 |             with gr.Accordion(
115 |                 "Advanced settings", open=False, elem_id="config-accordion"
116 |             ):
117 |                 with gr.Row(equal_height=False):
118 |                     num_inference_steps = gr.Slider(
119 |                         value=20,
120 |                         minimum=1,
121 |                         maximum=2000,
122 |                         step=1,
123 |                         label="# of steps",
124 |                     )
125 |                     guidance_scale = gr.Slider(
126 |                     value=4.5,
127 |                     minimum=0.0,
128 |                     maximum=50,
129 |                     step=0.1,
130 |                     label="Guidance scale",
131 |                     )   
132 |                 with gr.Row(equal_height=False):
133 |                     height = gr.Slider(
134 |                     value=1024,
135 |                     minimum=512,
136 |                     maximum=2048,
137 |                     step=32,
138 |                     label="Height",
139 |                     )
140 |                     width = gr.Slider(
141 |                     value=1024,
142 |                     minimum=512,
143 |                     maximum=2048,
144 |                     step=32,
145 |                     label="Width",
146 |                     )
147 |                 randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
148 |                 seed = gr.Slider(
149 |                     value=10,
150 |                     minimum=0,
151 |                     maximum=MAX_SEED,
152 |                     step=1,
153 |                     label="Random seed",
154 |                 )
155 | 
156 |             run_event = run_btn.click(
157 |                 fn=generate,
158 |                 inputs=[
159 |                     height,
160 |                     width,
161 |                     prompt_input,
162 |                     guidance_scale,
163 |                     num_inference_steps,
164 |                     seed,
165 |                     randomize_seed
166 |                 ],
167 |                 outputs=[
168 |                     output_image,
169 |                     seed,
170 |                     height,
171 |                     width,
172 |                     run_btn,
173 |                     stop_btn,
174 |                 ],
175 |             )
176 | 
177 |             stop_btn.click(
178 |                 fn=stop_run,
179 |                 outputs=[run_btn, stop_btn],
180 |                 cancels=[run_event],
181 |                 queue=False,
182 |             )
183 | 
184 |             example0 = gr.Examples(
185 |                 examples=[[t.strip()] for t in example_texts],
186 |                 inputs=[prompt_input],
187 |             )
188 | 
189 |         launch_args = {"server_port": int(args.port), "server_name": "0.0.0.0"}
190 |         demo.queue(default_concurrency_limit=1).launch(**launch_args)
191 | 
192 | 
193 | if __name__ == "__main__":
194 |     parser = simple_parsing.ArgumentParser(description="Lumos Text to Image Generation Demo")
195 |     parser.add_argument("--vae-pretrained", type=str, default="stabilityai/sd-vae-ft-mse")
196 |     parser.add_argument("--t5-path", type=str, default="./checkpoints/")
197 |     parser.add_argument("--lumos-t2i-ckpt", type=str, default="./checkpoints/Lumos_T2I.pth")
198 |     parser.add_argument("--port", type=int, default=19231)
199 |     args = parser.parse_known_args()[0]
200 |     if torch.cuda.is_available():
201 |         device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
202 |         # setting models
203 |         models = dict()
204 |         ## autoencoder
205 |         weight_dtype = torch.float16
206 |         vae = AutoencoderKL.from_pretrained(args.vae_pretrained).cuda()
207 |         vae.eval()
208 |         models["vae"] = vae
209 |         ## language encoder 
210 |         t5 = T5Embedder(device="cuda", local_cache=True, cache_dir=args.t5_path, torch_dtype=torch.float)
211 |         models["language_encoder"] = t5
212 |         ## diffusion model
213 |         model_kwargs={"window_block_indexes": [], "window_size": 0,
214 |                         "use_rel_pos": False, "lewei_scale": 2.0}
215 |         # build models
216 |         image_size = 1024
217 |         latent_size = int(image_size) // 8
218 |         model = LumosT2IMS_XL_2(input_size=latent_size, **model_kwargs).to(device)
219 |         state_dict = find_model(args.lumos_t2i_ckpt)
220 |         missing, unexpected = model.load_state_dict(state_dict, strict=False)
221 |         model.eval()
222 |         model.to(weight_dtype)
223 |         models["diffusion"] = model
224 |     else:
225 |         raise ValueError("This Demo need gpu")
226 | 
227 |     demo(args)
228 | 


--------------------------------------------------------------------------------
/lumos_diffusion/__init__.py:
--------------------------------------------------------------------------------
1 | from .dpm_solver import DPMS
2 | from .dpm_solver_inter import DPMS as DPMS_INTER


--------------------------------------------------------------------------------
/lumos_diffusion/dpm_solver.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .model import gaussian_diffusion as gd
 3 | from .model.dpm_solver import model_wrapper, DPM_Solver, NoiseScheduleVP
 4 | 
 5 | 
 6 | def DPMS(model, condition, uncondition, cfg_scale, model_type='noise', noise_schedule="linear", guidance_type='classifier-free', model_kwargs=None, diffusion_steps=1000):
 7 |     if model_kwargs is None:
 8 |         model_kwargs = {}
 9 |     betas = torch.tensor(gd.get_named_beta_schedule(noise_schedule, diffusion_steps))
10 | 
11 |     ## 1. Define the noise schedule.
12 |     noise_schedule = NoiseScheduleVP(schedule='discrete', betas=betas)
13 | 
14 |     ## 2. Convert your discrete-time `model` to the continuous-time
15 |     ## noise prediction model. Here is an example for a diffusion model
16 |     ## `model` with the noise prediction type ("noise") .
17 |     model_fn = model_wrapper(
18 |         model,
19 |         noise_schedule,
20 |         model_type=model_type,
21 |         model_kwargs=model_kwargs,
22 |         guidance_type=guidance_type,
23 |         condition=condition,
24 |         unconditional_condition=uncondition,
25 |         guidance_scale=cfg_scale,
26 |     )
27 |     ## 3. Define dpm-solver and sample by multistep DPM-Solver.
28 |     return DPM_Solver(model_fn, noise_schedule, algorithm_type="dpmsolver++")


--------------------------------------------------------------------------------
/lumos_diffusion/dpm_solver_inter.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from .model import gaussian_diffusion_inter as gd
 3 | from .model.dpm_solver_inter import model_wrapper, DPM_Solver, NoiseScheduleVP
 4 | 
 5 | 
 6 | def DPMS(model, condition, uncondition, cfg_scale, model_type='noise', noise_schedule="linear", guidance_type='classifier-free', model_kwargs=None, diffusion_steps=1000):
 7 |     if model_kwargs is None:
 8 |         model_kwargs = {}
 9 |     betas = torch.tensor(gd.get_named_beta_schedule(noise_schedule, diffusion_steps))
10 | 
11 |     ## 1. Define the noise schedule.
12 |     noise_schedule = NoiseScheduleVP(schedule='discrete', betas=betas)
13 | 
14 |     ## 2. Convert your discrete-time `model` to the continuous-time
15 |     ## noise prediction model. Here is an example for a diffusion model
16 |     ## `model` with the noise prediction type ("noise") .
17 |     model_fn = model_wrapper(
18 |         model,
19 |         noise_schedule,
20 |         model_type=model_type,
21 |         model_kwargs=model_kwargs,
22 |         guidance_type=guidance_type,
23 |         condition=condition,
24 |         unconditional_condition=uncondition,
25 |         guidance_scale=cfg_scale,
26 |     )
27 |     ## 3. Define dpm-solver and sample by multistep DPM-Solver.
28 |     return DPM_Solver(model_fn, noise_schedule, algorithm_type="dpmsolver++")


--------------------------------------------------------------------------------
/lumos_diffusion/model/__init__.py:
--------------------------------------------------------------------------------
1 | from .lumos import *


--------------------------------------------------------------------------------
/lumos_diffusion/model/builder.py:
--------------------------------------------------------------------------------
 1 | from mmcv import Registry
 2 | 
 3 | from lumos_diffusion.model.utils import set_grad_checkpoint
 4 | 
 5 | MODELS = Registry('models')
 6 | 
 7 | 
 8 | def build_model(cfg, use_grad_checkpoint=False, use_fp32_attention=False, gc_step=1, **kwargs):
 9 |     if isinstance(cfg, str):
10 |         cfg = dict(type=cfg)
11 |     model = MODELS.build(cfg, default_args=kwargs)
12 |     if use_grad_checkpoint:
13 |         set_grad_checkpoint(model, use_fp32_attention=use_fp32_attention, gc_step=gc_step)
14 |     return model
15 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/diffusion_utils.py:
--------------------------------------------------------------------------------
 1 | # Modified from OpenAI's diffusion repos
 2 | #     GLIDE: https://github.com/openai/glide-text2im/blob/main/glide_text2im/gaussian_diffusion.py
 3 | #     ADM:   https://github.com/openai/guided-diffusion/blob/main/guided_diffusion
 4 | #     IDDPM: https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
 5 | 
 6 | import numpy as np
 7 | import torch as th
 8 | 
 9 | 
10 | def normal_kl(mean1, logvar1, mean2, logvar2):
11 |     """
12 |     Compute the KL divergence between two gaussians.
13 |     Shapes are automatically broadcasted, so batches can be compared to
14 |     scalars, among other use cases.
15 |     """
16 |     tensor = next(
17 |         (
18 |             obj
19 |             for obj in (mean1, logvar1, mean2, logvar2)
20 |             if isinstance(obj, th.Tensor)
21 |         ),
22 |         None,
23 |     )
24 |     assert tensor is not None, "at least one argument must be a Tensor"
25 | 
26 |     # Force variances to be Tensors. Broadcasting helps convert scalars to
27 |     # Tensors, but it does not work for th.exp().
28 |     logvar1, logvar2 = [
29 |         x if isinstance(x, th.Tensor) else th.tensor(x, device=tensor.device)
30 |         for x in (logvar1, logvar2)
31 |     ]
32 | 
33 |     return 0.5 * (
34 |         -1.0
35 |         + logvar2
36 |         - logvar1
37 |         + th.exp(logvar1 - logvar2)
38 |         + ((mean1 - mean2) ** 2) * th.exp(-logvar2)
39 |     )
40 | 
41 | 
42 | def approx_standard_normal_cdf(x):
43 |     """
44 |     A fast approximation of the cumulative distribution function of the
45 |     standard normal.
46 |     """
47 |     return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3))))
48 | 
49 | 
50 | def continuous_gaussian_log_likelihood(x, *, means, log_scales):
51 |     """
52 |     Compute the log-likelihood of a continuous Gaussian distribution.
53 |     :param x: the targets
54 |     :param means: the Gaussian mean Tensor.
55 |     :param log_scales: the Gaussian log stddev Tensor.
56 |     :return: a tensor like x of log probabilities (in nats).
57 |     """
58 |     centered_x = x - means
59 |     inv_stdv = th.exp(-log_scales)
60 |     normalized_x = centered_x * inv_stdv
61 |     return th.distributions.Normal(th.zeros_like(x), th.ones_like(x)).log_prob(
62 |         normalized_x
63 |     )
64 | 
65 | 
66 | def discretized_gaussian_log_likelihood(x, *, means, log_scales):
67 |     """
68 |     Compute the log-likelihood of a Gaussian distribution discretizing to a
69 |     given image.
70 |     :param x: the target images. It is assumed that this was uint8 values,
71 |               rescaled to the range [-1, 1].
72 |     :param means: the Gaussian mean Tensor.
73 |     :param log_scales: the Gaussian log stddev Tensor.
74 |     :return: a tensor like x of log probabilities (in nats).
75 |     """
76 |     assert x.shape == means.shape == log_scales.shape
77 |     centered_x = x - means
78 |     inv_stdv = th.exp(-log_scales)
79 |     plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
80 |     cdf_plus = approx_standard_normal_cdf(plus_in)
81 |     min_in = inv_stdv * (centered_x - 1.0 / 255.0)
82 |     cdf_min = approx_standard_normal_cdf(min_in)
83 |     log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12))
84 |     log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12))
85 |     cdf_delta = cdf_plus - cdf_min
86 |     log_probs = th.where(
87 |         x < -0.999,
88 |         log_cdf_plus,
89 |         th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))),
90 |     )
91 |     assert log_probs.shape == x.shape
92 |     return log_probs
93 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/dino/vision_transformer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Facebook, Inc. and its affiliates.
  2 | # 
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | # 
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | # 
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | """
 15 | Mostly copy-paste from timm library.
 16 | https://github.com/rwightman/pytorch-image-models/blob/master/timm/models/vision_transformer.py
 17 | """
 18 | import math
 19 | from functools import partial
 20 | import torch
 21 | import torch.nn as nn
 22 | 
 23 | def _no_grad_trunc_normal_(tensor, mean, std, a, b):
 24 |     # Cut & paste from PyTorch official master until it's in a few official releases - RW
 25 |     # Method based on https://people.sc.fsu.edu/~jburkardt/presentations/truncated_normal.pdf
 26 |     def norm_cdf(x):
 27 |         # Computes standard normal cumulative distribution function
 28 |         return (1. + math.erf(x / math.sqrt(2.))) / 2.
 29 | 
 30 |     if (mean < a - 2 * std) or (mean > b + 2 * std):
 31 |         print("mean is more than 2 std from [a, b] in nn.init.trunc_normal_. "
 32 |                       "The distribution of values may be incorrect.",
 33 |                       stacklevel=2)
 34 | 
 35 |     with torch.no_grad():
 36 |         # Values are generated by using a truncated uniform distribution and
 37 |         # then using the inverse CDF for the normal distribution.
 38 |         # Get upper and lower cdf values
 39 |         l = norm_cdf((a - mean) / std)
 40 |         u = norm_cdf((b - mean) / std)
 41 | 
 42 |         # Uniformly fill tensor with values from [l, u], then translate to
 43 |         # [2l-1, 2u-1].
 44 |         tensor.uniform_(2 * l - 1, 2 * u - 1)
 45 | 
 46 |         # Use inverse cdf transform for normal distribution to get truncated
 47 |         # standard normal
 48 |         tensor.erfinv_()
 49 | 
 50 |         # Transform to proper mean, std
 51 |         tensor.mul_(std * math.sqrt(2.))
 52 |         tensor.add_(mean)
 53 | 
 54 |         # Clamp to ensure it's in the proper range
 55 |         tensor.clamp_(min=a, max=b)
 56 |         return tensor
 57 | 
 58 | def trunc_normal_(tensor, mean=0., std=1., a=-2., b=2.):
 59 |     return _no_grad_trunc_normal_(tensor, mean, std, a, b)
 60 | 
 61 | 
 62 | def drop_path(x, drop_prob: float = 0., training: bool = False):
 63 |     if drop_prob == 0. or not training:
 64 |         return x
 65 |     keep_prob = 1 - drop_prob
 66 |     shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets
 67 |     random_tensor = keep_prob + torch.rand(shape, dtype=x.dtype, device=x.device)
 68 |     random_tensor.floor_()  # binarize
 69 |     output = x.div(keep_prob) * random_tensor
 70 |     return output
 71 | 
 72 | 
 73 | class DropPath(nn.Module):
 74 |     """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
 75 |     """
 76 |     def __init__(self, drop_prob=None):
 77 |         super(DropPath, self).__init__()
 78 |         self.drop_prob = drop_prob
 79 | 
 80 |     def forward(self, x):
 81 |         return drop_path(x, self.drop_prob, self.training)
 82 | 
 83 | 
 84 | class Mlp(nn.Module):
 85 |     def __init__(self, in_features, hidden_features=None, out_features=None, act_layer=nn.GELU, drop=0.):
 86 |         super().__init__()
 87 |         out_features = out_features or in_features
 88 |         hidden_features = hidden_features or in_features
 89 |         self.fc1 = nn.Linear(in_features, hidden_features)
 90 |         self.act = act_layer()
 91 |         self.fc2 = nn.Linear(hidden_features, out_features)
 92 |         self.drop = nn.Dropout(drop)
 93 | 
 94 |     def forward(self, x):
 95 |         x = self.fc1(x)
 96 |         x = self.act(x)
 97 |         x = self.drop(x)
 98 |         x = self.fc2(x)
 99 |         x = self.drop(x)
100 |         return x
101 | 
102 | 
103 | class Attention(nn.Module):
104 |     def __init__(self, dim, num_heads=8, qkv_bias=False, qk_scale=None, attn_drop=0., proj_drop=0.):
105 |         super().__init__()
106 |         self.num_heads = num_heads
107 |         head_dim = dim // num_heads
108 |         self.scale = qk_scale or head_dim ** -0.5
109 | 
110 |         self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
111 |         self.attn_drop = nn.Dropout(attn_drop)
112 |         self.proj = nn.Linear(dim, dim)
113 |         self.proj_drop = nn.Dropout(proj_drop)
114 | 
115 |     def forward(self, x):
116 |         B, N, C = x.shape
117 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
118 |         q, k, v = qkv[0], qkv[1], qkv[2]
119 | 
120 |         attn = (q @ k.transpose(-2, -1)) * self.scale
121 |         attn = attn.softmax(dim=-1)
122 |         attn = self.attn_drop(attn)
123 | 
124 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
125 |         x = self.proj(x)
126 |         x = self.proj_drop(x)
127 |         return x, attn
128 | 
129 | 
130 | class Block(nn.Module):
131 |     def __init__(self, dim, num_heads, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop=0., attn_drop=0.,
132 |                  drop_path=0., act_layer=nn.GELU, norm_layer=nn.LayerNorm):
133 |         super().__init__()
134 |         self.norm1 = norm_layer(dim)
135 |         self.attn = Attention(
136 |             dim, num_heads=num_heads, qkv_bias=qkv_bias, qk_scale=qk_scale, attn_drop=attn_drop, proj_drop=drop)
137 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
138 |         self.norm2 = norm_layer(dim)
139 |         mlp_hidden_dim = int(dim * mlp_ratio)
140 |         self.mlp = Mlp(in_features=dim, hidden_features=mlp_hidden_dim, act_layer=act_layer, drop=drop)
141 | 
142 |     def forward(self, x, return_attention=False):
143 |         y, attn = self.attn(self.norm1(x))
144 |         if return_attention:
145 |             return attn
146 |         x = x + self.drop_path(y)
147 |         x = x + self.drop_path(self.mlp(self.norm2(x)))
148 |         return x
149 | 
150 | 
151 | class PatchEmbed(nn.Module):
152 |     """ Image to Patch Embedding
153 |     """
154 |     def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
155 |         super().__init__()
156 |         num_patches = (img_size // patch_size) * (img_size // patch_size)
157 |         self.img_size = img_size
158 |         self.patch_size = patch_size
159 |         self.num_patches = num_patches
160 | 
161 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
162 | 
163 |     def forward(self, x):
164 |         B, C, H, W = x.shape
165 |         x = self.proj(x).flatten(2).transpose(1, 2)
166 |         return x
167 | 
168 | 
169 | class VisionTransformer(nn.Module):
170 |     """ Vision Transformer """
171 |     def __init__(self, img_size=[224], patch_size=16, in_chans=3, num_classes=0, embed_dim=768, depth=12,
172 |                  num_heads=12, mlp_ratio=4., qkv_bias=False, qk_scale=None, drop_rate=0., attn_drop_rate=0.,
173 |                  drop_path_rate=0., norm_layer=nn.LayerNorm, **kwargs):
174 |         super().__init__()
175 |         self.num_features = self.embed_dim = embed_dim
176 | 
177 |         self.patch_embed = PatchEmbed(
178 |             img_size=img_size[0], patch_size=patch_size, in_chans=in_chans, embed_dim=embed_dim)
179 |         num_patches = self.patch_embed.num_patches
180 | 
181 |         self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
182 |         self.pos_embed = nn.Parameter(torch.zeros(1, num_patches + 1, embed_dim))
183 |         self.pos_drop = nn.Dropout(p=drop_rate)
184 | 
185 |         dpr = [x.item() for x in torch.linspace(0, drop_path_rate, depth)]  # stochastic depth decay rule
186 |         self.blocks = nn.ModuleList([
187 |             Block(
188 |                 dim=embed_dim, num_heads=num_heads, mlp_ratio=mlp_ratio, qkv_bias=qkv_bias, qk_scale=qk_scale,
189 |                 drop=drop_rate, attn_drop=attn_drop_rate, drop_path=dpr[i], norm_layer=norm_layer)
190 |             for i in range(depth)])
191 |         self.norm = norm_layer(embed_dim)
192 | 
193 |         # Classifier head
194 |         self.head = nn.Linear(embed_dim, num_classes) if num_classes > 0 else nn.Identity()
195 | 
196 |         trunc_normal_(self.pos_embed, std=.02)
197 |         trunc_normal_(self.cls_token, std=.02)
198 |         self.apply(self._init_weights)
199 | 
200 |     def _init_weights(self, m):
201 |         if isinstance(m, nn.Linear):
202 |             trunc_normal_(m.weight, std=.02)
203 |             if isinstance(m, nn.Linear) and m.bias is not None:
204 |                 nn.init.constant_(m.bias, 0)
205 |         elif isinstance(m, nn.LayerNorm):
206 |             nn.init.constant_(m.bias, 0)
207 |             nn.init.constant_(m.weight, 1.0)
208 | 
209 |     def interpolate_pos_encoding(self, x, w, h):
210 |         npatch = x.shape[1] - 1
211 |         N = self.pos_embed.shape[1] - 1
212 |         if npatch == N and w == h:
213 |             return self.pos_embed
214 |         class_pos_embed = self.pos_embed[:, 0]
215 |         patch_pos_embed = self.pos_embed[:, 1:]
216 |         dim = x.shape[-1]
217 |         w0 = w // self.patch_embed.patch_size
218 |         h0 = h // self.patch_embed.patch_size
219 |         # we add a small number to avoid floating point error in the interpolation
220 |         # see discussion at https://github.com/facebookresearch/dino/issues/8
221 |         w0, h0 = w0 + 0.1, h0 + 0.1
222 |         patch_pos_embed = nn.functional.interpolate(
223 |             patch_pos_embed.reshape(1, int(math.sqrt(N)), int(math.sqrt(N)), dim).permute(0, 3, 1, 2),
224 |             scale_factor=(w0 / math.sqrt(N), h0 / math.sqrt(N)),
225 |             mode='bicubic',
226 |         )
227 |         assert int(w0) == patch_pos_embed.shape[-2] and int(h0) == patch_pos_embed.shape[-1]
228 |         patch_pos_embed = patch_pos_embed.permute(0, 2, 3, 1).view(1, -1, dim)
229 |         return torch.cat((class_pos_embed.unsqueeze(0), patch_pos_embed), dim=1)
230 | 
231 |     def prepare_tokens(self, x):
232 |         B, nc, w, h = x.shape
233 |         x = self.patch_embed(x)  # patch linear embedding
234 | 
235 |         # add the [CLS] token to the embed patch tokens
236 |         cls_tokens = self.cls_token.expand(B, -1, -1)
237 |         x = torch.cat((cls_tokens, x), dim=1)
238 | 
239 |         # add positional encoding to each token
240 |         x = x + self.interpolate_pos_encoding(x, w, h)
241 | 
242 |         return self.pos_drop(x)
243 | 
244 |     def forward(self, x):
245 |         x = self.prepare_tokens(x)
246 |         for blk in self.blocks:
247 |             x = blk(x)
248 |         x = self.norm(x)
249 |         return x[:, 0]
250 | 
251 |     def get_last_selfattention(self, x):
252 |         x = self.prepare_tokens(x)
253 |         for i, blk in enumerate(self.blocks):
254 |             if i < len(self.blocks) - 1:
255 |                 x = blk(x)
256 |             else:
257 |                 # return attention of the last block
258 |                 return blk(x, return_attention=True)
259 | 
260 |     def get_intermediate_layers(self, x, n=1):
261 |         x = self.prepare_tokens(x)
262 |         # we return the output tokens from the `n` last blocks
263 |         output = []
264 |         for i, blk in enumerate(self.blocks):
265 |             x = blk(x)
266 |             if len(self.blocks) - i <= n:
267 |                 output.append(self.norm(x))
268 |         return output
269 | 
270 | 
271 | def vit_tiny(patch_size=16, **kwargs):
272 |     model = VisionTransformer(
273 |         patch_size=patch_size, embed_dim=192, depth=12, num_heads=3, mlp_ratio=4,
274 |         qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
275 |     return model
276 | 
277 | 
278 | def vit_small(patch_size=16, **kwargs):
279 |     model = VisionTransformer(
280 |         patch_size=patch_size, embed_dim=384, depth=12, num_heads=6, mlp_ratio=4,
281 |         qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
282 |     return model
283 | 
284 | 
285 | def vit_base(patch_size=16, **kwargs):
286 |     model = VisionTransformer(
287 |         patch_size=patch_size, embed_dim=768, depth=12, num_heads=12, mlp_ratio=4,
288 |         qkv_bias=True, norm_layer=partial(nn.LayerNorm, eps=1e-6), **kwargs)
289 |     return model
290 | 
291 | 
292 | class DINOHead(nn.Module):
293 |     def __init__(self, in_dim, out_dim, use_bn=False, norm_last_layer=True, nlayers=3, hidden_dim=2048, bottleneck_dim=256):
294 |         super().__init__()
295 |         nlayers = max(nlayers, 1)
296 |         if nlayers == 1:
297 |             self.mlp = nn.Linear(in_dim, bottleneck_dim)
298 |         else:
299 |             layers = [nn.Linear(in_dim, hidden_dim)]
300 |             if use_bn:
301 |                 layers.append(nn.BatchNorm1d(hidden_dim))
302 |             layers.append(nn.GELU())
303 |             for _ in range(nlayers - 2):
304 |                 layers.append(nn.Linear(hidden_dim, hidden_dim))
305 |                 if use_bn:
306 |                     layers.append(nn.BatchNorm1d(hidden_dim))
307 |                 layers.append(nn.GELU())
308 |             layers.append(nn.Linear(hidden_dim, bottleneck_dim))
309 |             self.mlp = nn.Sequential(*layers)
310 |         self.apply(self._init_weights)
311 |         self.last_layer = nn.utils.weight_norm(nn.Linear(bottleneck_dim, out_dim, bias=False))
312 |         self.last_layer.weight_g.data.fill_(1)
313 |         if norm_last_layer:
314 |             self.last_layer.weight_g.requires_grad = False
315 | 
316 |     def _init_weights(self, m):
317 |         if isinstance(m, nn.Linear):
318 |             trunc_normal_(m.weight, std=.02)
319 |             if isinstance(m, nn.Linear) and m.bias is not None:
320 |                 nn.init.constant_(m.bias, 0)
321 | 
322 |     def forward(self, x):
323 |         x = self.mlp(x)
324 |         x = nn.functional.normalize(x, dim=-1, p=2)
325 |         x = self.last_layer(x)
326 |         return x
327 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/lumos/LumosI2I.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import os
  5 | import numpy as np
  6 | from timm.models.layers import DropPath
  7 | from timm.models.vision_transformer import PatchEmbed, Mlp
  8 | 
  9 | from lumos_diffusion.model.builder import MODELS
 10 | from lumos_diffusion.model.utils import auto_grad_checkpoint, to_2tuple
 11 | from lumos_diffusion.model.lumos.Lumos_blocks import modulate, CaptionEmbedder, WindowAttention, MultiHeadCrossAttention, T2IFinalLayer, TimestepEmbedder
 12 | 
 13 | class LumosI2IBlock(nn.Module):
 14 |     """
 15 |     A LumosI2I block with adaptive layer norm (adaLN-zero) conditioning.
 16 |     """
 17 | 
 18 |     def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, drop_path=0., window_size=0, input_size=None, use_rel_pos=False, **block_kwargs):
 19 |         super().__init__()
 20 |         self.hidden_size = hidden_size
 21 |         self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 22 |         self.attn = WindowAttention(hidden_size, num_heads=num_heads, qkv_bias=True,
 23 |                                     input_size=input_size if window_size == 0 else (window_size, window_size),
 24 |                                     use_rel_pos=use_rel_pos, **block_kwargs)
 25 |         self.cross_attn = MultiHeadCrossAttention(hidden_size, num_heads, **block_kwargs)
 26 |         self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 27 |         # to be compatible with lower version pytorch
 28 |         approx_gelu = lambda: nn.GELU(approximate="tanh")
 29 |         self.mlp = Mlp(in_features=hidden_size, hidden_features=int(hidden_size * mlp_ratio), act_layer=approx_gelu, drop=0)
 30 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 31 |         self.window_size = window_size
 32 |         self.adaLN_modulation = nn.Sequential(
 33 |             nn.SiLU(),
 34 |             nn.Linear(hidden_size, 6 * hidden_size, bias=True)
 35 |         )
 36 |     def forward(self, x, y, t, mask=None, **kwargs):
 37 |         B, N, C = x.shape
 38 |         shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(t).chunk(6, dim=1)
 39 |         x = x + self.drop_path(gate_msa.unsqueeze(1) * self.attn(modulate(self.norm1(x), shift_msa, scale_msa)).reshape(B, N, C))
 40 |         x = x + self.cross_attn(x, y, mask)
 41 |         x = x + self.drop_path(gate_mlp.unsqueeze(1) * self.mlp(modulate(self.norm2(x), shift_mlp, scale_mlp)))
 42 | 
 43 |         return x
 44 |     
 45 | ###########################################################################
 46 | #                    Core Lumos Image-to-Image Model                      #
 47 | ###########################################################################
 48 | @MODELS.register_module()
 49 | class LumosI2I(nn.Module):
 50 |     """
 51 |     Diffusion model with a Transformer backbone.
 52 |     """
 53 |     def __init__(self, input_size=32, patch_size=2, in_channels=4, hidden_size=1152, depth=28, num_heads=16, mlp_ratio=4.0, class_dropout_prob=0.1, pred_sigma=True, drop_path: float = 0., window_size=0, window_block_indexes=None, use_rel_pos=False, caption_channels=4096, lewei_scale=1.0, config=None, model_max_length=1, **kwargs):
 54 |         if window_block_indexes is None:
 55 |             window_block_indexes = []
 56 |         super().__init__()
 57 |         self.pred_sigma = pred_sigma
 58 |         self.in_channels = in_channels
 59 |         self.out_channels = in_channels * 2 if pred_sigma else in_channels
 60 |         self.patch_size = patch_size
 61 |         self.num_heads = num_heads
 62 |         self.lewei_scale = lewei_scale,
 63 | 
 64 |         self.x_embedder = PatchEmbed(input_size, patch_size, in_channels, hidden_size, bias=True)
 65 |         self.t_embedder = TimestepEmbedder(hidden_size)
 66 |         num_patches = self.x_embedder.num_patches
 67 |         self.base_size = input_size // self.patch_size
 68 |         # Will use fixed sin-cos embedding:
 69 |         self.register_buffer("pos_embed", torch.zeros(1, num_patches, hidden_size))
 70 |         approx_gelu = lambda: nn.GELU(approximate="tanh")
 71 |         self.y_embedder = CaptionEmbedder(in_channels=caption_channels, hidden_size=hidden_size, uncond_prob=class_dropout_prob, act_layer=approx_gelu, token_num=model_max_length)
 72 |         drop_path = [x.item() for x in torch.linspace(0, drop_path, depth)]  # stochastic depth decay rule
 73 |         self.blocks = nn.ModuleList([
 74 |             LumosI2IBlock(hidden_size, num_heads, mlp_ratio=mlp_ratio, drop_path=drop_path[i],
 75 |                           input_size=(input_size // patch_size, input_size // patch_size),
 76 |                           window_size=window_size if i in window_block_indexes else 0,
 77 |                           use_rel_pos=use_rel_pos if i in window_block_indexes else False)
 78 |             for i in range(depth)
 79 |         ])
 80 |         self.final_layer = T2IFinalLayer(hidden_size, patch_size, self.out_channels)
 81 | 
 82 |         self.initialize_weights()
 83 | 
 84 |         print(f'Warning: lewei scale: {self.lewei_scale}, base size: {self.base_size}')
 85 | 
 86 |     def forward(self, x, timestep, y, mask=None, data_info=None, **kwargs):
 87 |         """
 88 |         Forward pass of Lumos-I2I.
 89 |         x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
 90 |         t: (N,) tensor of diffusion timesteps
 91 |         y: (N, 1, 120, C) tensor of cond rep
 92 |         """
 93 |         x = x.to(self.dtype)
 94 |         timestep = timestep.to(self.dtype)
 95 |         y = y.to(self.dtype)
 96 |         pos_embed = self.pos_embed.to(self.dtype)
 97 |         self.h, self.w = x.shape[-2]//self.patch_size, x.shape[-1]//self.patch_size
 98 |         x = self.x_embedder(x) + pos_embed  # (N, T, D), where T = H * W / patch_size ** 2
 99 |         t = self.t_embedder(timestep.to(x.dtype))  # (N, D)
100 |         y = self.y_embedder(y, self.training)  # (N, 1, L, D)
101 |         if mask is not None:
102 |             if mask.shape[0] != y.shape[0]:
103 |                 mask = mask.repeat(y.shape[0] // mask.shape[0], 1)
104 |             mask = mask.squeeze(1).squeeze(1)
105 |             y = y.squeeze(1).masked_select(mask.unsqueeze(-1) != 0).view(1, -1, x.shape[-1])
106 |             y_lens = mask.sum(dim=1).tolist()
107 |         else:
108 |             y_lens = [y.shape[2]] * y.shape[0]
109 |             y = y.squeeze(1).view(1, -1, x.shape[-1])
110 |         for block in self.blocks:
111 |             x = auto_grad_checkpoint(block, x, y, t, y_lens)  # (N, T, D) #support grad checkpoint
112 |         x = self.final_layer(x, t)  # (N, T, patch_size ** 2 * out_channels)
113 |         x = self.unpatchify(x)  # (N, out_channels, H, W)
114 |         return x
115 | 
116 |     def forward_with_dpmsolver(self, x, timestep, y, mask=None, **kwargs):
117 |         """
118 |         dpm solver donnot need variance prediction
119 |         """
120 |         # https://github.com/openai/glide-text2im/blob/main/notebooks/text2im.ipynb
121 |         model_out = self.forward(x, timestep, y, mask)
122 |         return model_out.chunk(2, dim=1)[0]
123 | 
124 |     def unpatchify(self, x):
125 |         """
126 |         x: (N, T, patch_size**2 * C)
127 |         imgs: (N, H, W, C)
128 |         """
129 |         c = self.out_channels
130 |         p = self.x_embedder.patch_size[0]
131 |         h = w = int(x.shape[1] ** 0.5)
132 |         assert h * w == x.shape[1]
133 | 
134 |         x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
135 |         x = torch.einsum('nhwpqc->nchpwq', x)
136 |         return x.reshape(shape=(x.shape[0], c, h * p, h * p))
137 | 
138 |     def initialize_weights(self):
139 |         # Initialize transformer layers:
140 |         def _basic_init(module):
141 |             if isinstance(module, nn.Linear):
142 |                 torch.nn.init.xavier_uniform_(module.weight)
143 |                 if module.bias is not None:
144 |                     nn.init.constant_(module.bias, 0)
145 | 
146 |         self.apply(_basic_init)
147 | 
148 |         # Initialize (and freeze) pos_embed by sin-cos embedding:
149 |         pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.x_embedder.num_patches ** 0.5), lewei_scale=self.lewei_scale, base_size=self.base_size)
150 |         self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
151 | 
152 |         # Initialize patch_embed like nn.Linear (instead of nn.Conv2d):
153 |         w = self.x_embedder.proj.weight.data
154 |         nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
155 | 
156 |         # Initialize timestep embedding MLP:
157 |         nn.init.normal_(self.t_embedder.mlp[0].weight, std=0.02)
158 |         nn.init.normal_(self.t_embedder.mlp[2].weight, std=0.02)
159 |         for block in self.blocks:
160 |             nn.init.constant_(block.adaLN_modulation[-1].weight, 0)
161 |             nn.init.constant_(block.adaLN_modulation[-1].bias, 0)
162 | 
163 |         # Initialize caption embedding MLP:
164 |         nn.init.normal_(self.y_embedder.y_proj.fc1.weight, std=0.02)
165 |         nn.init.normal_(self.y_embedder.y_proj.fc2.weight, std=0.02)
166 | 
167 |         # Zero-out adaLN modulation layers in LumosI2I blocks:
168 |         for block in self.blocks:
169 |             nn.init.constant_(block.cross_attn.proj.weight, 0)
170 |             nn.init.constant_(block.cross_attn.proj.bias, 0)
171 | 
172 |         # Zero-out output layers:
173 |         nn.init.constant_(self.final_layer.linear.weight, 0)
174 |         nn.init.constant_(self.final_layer.linear.bias, 0)
175 | 
176 |     @property
177 |     def dtype(self):
178 |         return next(self.parameters()).dtype
179 | 
180 | 
181 | def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0, lewei_scale=1.0, base_size=16):
182 |     """
183 |     grid_size: int of the grid height and width
184 |     return:
185 |     pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
186 |     """
187 |     if isinstance(grid_size, int):
188 |         grid_size = to_2tuple(grid_size)
189 |     grid_h = np.arange(grid_size[0], dtype=np.float32) / (grid_size[0]/base_size) / lewei_scale
190 |     grid_w = np.arange(grid_size[1], dtype=np.float32) / (grid_size[1]/base_size) / lewei_scale
191 |     grid = np.meshgrid(grid_w, grid_h)  # here w goes first
192 |     grid = np.stack(grid, axis=0)
193 |     grid = grid.reshape([2, 1, grid_size[1], grid_size[0]])
194 | 
195 |     pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
196 |     if cls_token and extra_tokens > 0:
197 |         pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0)
198 |     return pos_embed
199 | 
200 | 
201 | def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
202 |     assert embed_dim % 2 == 0
203 | 
204 |     # use half of dimensions to encode grid_h
205 |     emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
206 |     emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
207 | 
208 |     return np.concatenate([emb_h, emb_w], axis=1)
209 | 
210 | 
211 | def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
212 |     """
213 |     embed_dim: output dimension for each position
214 |     pos: a list of positions to be encoded: size (M,)
215 |     out: (M, D)
216 |     """
217 |     assert embed_dim % 2 == 0
218 |     omega = np.arange(embed_dim // 2, dtype=np.float64)
219 |     omega /= embed_dim / 2.
220 |     omega = 1. / 10000 ** omega  # (D/2,)
221 | 
222 |     pos = pos.reshape(-1)  # (M,)
223 |     out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
224 | 
225 |     emb_sin = np.sin(out)  # (M, D/2)
226 |     emb_cos = np.cos(out)  # (M, D/2)
227 | 
228 |     return np.concatenate([emb_sin, emb_cos], axis=1)
229 | 
230 | 
231 | #################################################################################
232 | #                          Lumos Image-to-Image Configs                         #
233 | #################################################################################
234 | @MODELS.register_module()
235 | def LumosI2I_XL_2(**kwargs):
236 |     return LumosI2I(depth=28, hidden_size=1152, patch_size=2, num_heads=16, **kwargs)
237 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/lumos/LumosT2I.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import os
  5 | import numpy as np
  6 | from timm.models.layers import DropPath
  7 | from timm.models.vision_transformer import PatchEmbed, Mlp
  8 | 
  9 | from lumos_diffusion.model.builder import MODELS
 10 | from lumos_diffusion.model.utils import auto_grad_checkpoint, to_2tuple
 11 | from lumos_diffusion.model.lumos.Lumos_blocks import modulate, CaptionEmbedder, WindowAttention, MultiHeadCrossAttention, T2IFinalLayer, TimestepEmbedder
 12 | 
 13 | class LumosT2IBlock(nn.Module):
 14 |     """
 15 |     A LumosT2I block with adaptive layer norm (adaLN-zero) conditioning.
 16 |     """
 17 | 
 18 |     def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, drop_path=0., window_size=0, input_size=None, use_rel_pos=False, **block_kwargs):
 19 |         super().__init__()
 20 |         self.hidden_size = hidden_size
 21 |         self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 22 |         self.attn = WindowAttention(hidden_size, num_heads=num_heads, qkv_bias=True,
 23 |                                     input_size=input_size if window_size == 0 else (window_size, window_size),
 24 |                                     use_rel_pos=use_rel_pos, **block_kwargs)
 25 |         self.cross_attn = MultiHeadCrossAttention(hidden_size, num_heads, **block_kwargs)
 26 |         self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 27 |         # to be compatible with lower version pytorch
 28 |         approx_gelu = lambda: nn.GELU(approximate="tanh")
 29 |         self.mlp = Mlp(in_features=hidden_size, hidden_features=int(hidden_size * mlp_ratio), act_layer=approx_gelu, drop=0)
 30 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 31 |         self.window_size = window_size
 32 |         self.adaLN_modulation = nn.Sequential(
 33 |             nn.SiLU(),
 34 |             nn.Linear(hidden_size, 6 * hidden_size, bias=True)
 35 |         )
 36 |     def forward(self, x, y, t, mask=None, **kwargs):
 37 |         B, N, C = x.shape
 38 |         shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(t).chunk(6, dim=1)
 39 |         x = x + self.drop_path(gate_msa.unsqueeze(1) * self.attn(modulate(self.norm1(x), shift_msa, scale_msa)).reshape(B, N, C))
 40 |         x = x + self.cross_attn(x, y, mask)
 41 |         x = x + self.drop_path(gate_mlp.unsqueeze(1) * self.mlp(modulate(self.norm2(x), shift_mlp, scale_mlp)))
 42 | 
 43 |         return x
 44 | 
 45 | #############################################################################
 46 | #                    Core Lumos Text-to-Image Model                         #
 47 | #############################################################################
 48 | @MODELS.register_module()
 49 | class LumosT2I(nn.Module):
 50 |     """
 51 |     Diffusion model with a Transformer backbone.
 52 |     """
 53 | 
 54 |     def __init__(self, input_size=32, patch_size=2, in_channels=4, hidden_size=1152, depth=28, num_heads=16, mlp_ratio=4.0, class_dropout_prob=0.1, pred_sigma=True, drop_path: float = 0., window_size=0, window_block_indexes=None, use_rel_pos=False, caption_channels=4096, lewei_scale=1.0, config=None, model_max_length=120, **kwargs):
 55 |         if window_block_indexes is None:
 56 |             window_block_indexes = []
 57 |         super().__init__()
 58 |         self.pred_sigma = pred_sigma
 59 |         self.in_channels = in_channels
 60 |         self.out_channels = in_channels * 2 if pred_sigma else in_channels
 61 |         self.patch_size = patch_size
 62 |         self.num_heads = num_heads
 63 |         self.lewei_scale = lewei_scale,
 64 | 
 65 |         self.x_embedder = PatchEmbed(input_size, patch_size, in_channels, hidden_size, bias=True)
 66 |         self.t_embedder = TimestepEmbedder(hidden_size)
 67 |         num_patches = self.x_embedder.num_patches
 68 |         self.base_size = input_size // self.patch_size
 69 |         # Will use fixed sin-cos embedding:
 70 |         self.register_buffer("pos_embed", torch.zeros(1, num_patches, hidden_size))
 71 | 
 72 |         approx_gelu = lambda: nn.GELU(approximate="tanh")
 73 |         self.y_embedder = CaptionEmbedder(in_channels=caption_channels, hidden_size=hidden_size, uncond_prob=class_dropout_prob, act_layer=approx_gelu, token_num=model_max_length)
 74 |         drop_path = [x.item() for x in torch.linspace(0, drop_path, depth)]  # stochastic depth decay rule
 75 |         self.blocks = nn.ModuleList([
 76 |             LumosT2IBlock(hidden_size, num_heads, mlp_ratio=mlp_ratio, drop_path=drop_path[i],
 77 |                           input_size=(input_size // patch_size, input_size // patch_size),
 78 |                           window_size=window_size if i in window_block_indexes else 0,
 79 |                           use_rel_pos=use_rel_pos if i in window_block_indexes else False)
 80 |             for i in range(depth)
 81 |         ])
 82 |         self.final_layer = T2IFinalLayer(hidden_size, patch_size, self.out_channels)
 83 | 
 84 |         self.initialize_weights()
 85 | 
 86 |         print(f'Warning: lewei scale: {self.lewei_scale}, base size: {self.base_size}')
 87 | 
 88 |     def forward(self, x, timestep, y, mask=None, data_info=None, **kwargs):
 89 |         """
 90 |         Forward pass of Lumos-T2I.
 91 |         x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
 92 |         t: (N,) tensor of diffusion timesteps
 93 |         y: (N, 1, 120, C) tensor of caption embeddings
 94 |         """
 95 |         x = x.to(self.dtype)
 96 |         timestep = timestep.to(self.dtype)
 97 |         y = y.to(self.dtype)
 98 |         pos_embed = self.pos_embed.to(self.dtype)
 99 |         self.h, self.w = x.shape[-2]//self.patch_size, x.shape[-1]//self.patch_size
100 |         x = self.x_embedder(x) + pos_embed  # (N, T, D), where T = H * W / patch_size ** 2
101 |         t = self.t_embedder(timestep.to(x.dtype))  # (N, D)
102 |         # t0 = self.t_block(t)
103 |         y = self.y_embedder(y, self.training)  # (N, 1, L, D)
104 |         if mask is not None:
105 |             if mask.shape[0] != y.shape[0]:
106 |                 mask = mask.repeat(y.shape[0] // mask.shape[0], 1)
107 |             mask = mask.squeeze(1).squeeze(1)
108 |             y = y.squeeze(1).masked_select(mask.unsqueeze(-1) != 0).view(1, -1, x.shape[-1])
109 |             y_lens = mask.sum(dim=1).tolist()
110 |         else:
111 |             y_lens = [y.shape[2]] * y.shape[0]
112 |             y = y.squeeze(1).view(1, -1, x.shape[-1])
113 |         for block in self.blocks:
114 |             x = auto_grad_checkpoint(block, x, y, t, y_lens)  # (N, T, D) #support grad checkpoint
115 |         x = self.final_layer(x, t)  # (N, T, patch_size ** 2 * out_channels)
116 |         x = self.unpatchify(x)  # (N, out_channels, H, W)
117 |         return x
118 | 
119 |     def forward_with_dpmsolver(self, x, timestep, y, mask=None, **kwargs):
120 |         """
121 |         dpm solver donnot need variance prediction
122 |         """
123 |         # https://github.com/openai/glide-text2im/blob/main/notebooks/text2im.ipynb
124 |         model_out = self.forward(x, timestep, y, mask)
125 |         return model_out.chunk(2, dim=1)[0]
126 | 
127 |     def unpatchify(self, x):
128 |         """
129 |         x: (N, T, patch_size**2 * C)
130 |         imgs: (N, H, W, C)
131 |         """
132 |         c = self.out_channels
133 |         p = self.x_embedder.patch_size[0]
134 |         h = w = int(x.shape[1] ** 0.5)
135 |         assert h * w == x.shape[1]
136 | 
137 |         x = x.reshape(shape=(x.shape[0], h, w, p, p, c))
138 |         x = torch.einsum('nhwpqc->nchpwq', x)
139 |         return x.reshape(shape=(x.shape[0], c, h * p, h * p))
140 | 
141 |     def initialize_weights(self):
142 |         # Initialize transformer layers:
143 |         def _basic_init(module):
144 |             if isinstance(module, nn.Linear):
145 |                 torch.nn.init.xavier_uniform_(module.weight)
146 |                 if module.bias is not None:
147 |                     nn.init.constant_(module.bias, 0)
148 | 
149 |         self.apply(_basic_init)
150 | 
151 |         # Initialize (and freeze) pos_embed by sin-cos embedding:
152 |         pos_embed = get_2d_sincos_pos_embed(self.pos_embed.shape[-1], int(self.x_embedder.num_patches ** 0.5), lewei_scale=self.lewei_scale, base_size=self.base_size)
153 |         self.pos_embed.data.copy_(torch.from_numpy(pos_embed).float().unsqueeze(0))
154 | 
155 |         # Initialize patch_embed like nn.Linear (instead of nn.Conv2d):
156 |         w = self.x_embedder.proj.weight.data
157 |         nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
158 | 
159 |         # Initialize timestep embedding MLP:
160 |         nn.init.normal_(self.t_embedder.mlp[0].weight, std=0.02)
161 |         nn.init.normal_(self.t_embedder.mlp[2].weight, std=0.02)
162 |         for block in self.blocks:
163 |             nn.init.constant_(block.adaLN_modulation[-1].weight, 0)
164 |             nn.init.constant_(block.adaLN_modulation[-1].bias, 0)
165 | 
166 |         # Initialize caption embedding MLP:
167 |         nn.init.normal_(self.y_embedder.y_proj.fc1.weight, std=0.02)
168 |         nn.init.normal_(self.y_embedder.y_proj.fc2.weight, std=0.02)
169 | 
170 |         # Zero-out adaLN modulation layers in LumosT2I blocks:
171 |         for block in self.blocks:
172 |             nn.init.constant_(block.cross_attn.proj.weight, 0)
173 |             nn.init.constant_(block.cross_attn.proj.bias, 0)
174 | 
175 |         # Zero-out output layers:
176 |         nn.init.constant_(self.final_layer.linear.weight, 0)
177 |         nn.init.constant_(self.final_layer.linear.bias, 0)
178 | 
179 |     @property
180 |     def dtype(self):
181 |         return next(self.parameters()).dtype
182 | 
183 | 
184 | def get_2d_sincos_pos_embed(embed_dim, grid_size, cls_token=False, extra_tokens=0, lewei_scale=1.0, base_size=16):
185 |     """
186 |     grid_size: int of the grid height and width
187 |     return:
188 |     pos_embed: [grid_size*grid_size, embed_dim] or [1+grid_size*grid_size, embed_dim] (w/ or w/o cls_token)
189 |     """
190 |     if isinstance(grid_size, int):
191 |         grid_size = to_2tuple(grid_size)
192 |     grid_h = np.arange(grid_size[0], dtype=np.float32) / (grid_size[0]/base_size) / lewei_scale
193 |     grid_w = np.arange(grid_size[1], dtype=np.float32) / (grid_size[1]/base_size) / lewei_scale
194 |     grid = np.meshgrid(grid_w, grid_h)  # here w goes first
195 |     grid = np.stack(grid, axis=0)
196 |     grid = grid.reshape([2, 1, grid_size[1], grid_size[0]])
197 | 
198 |     pos_embed = get_2d_sincos_pos_embed_from_grid(embed_dim, grid)
199 |     if cls_token and extra_tokens > 0:
200 |         pos_embed = np.concatenate([np.zeros([extra_tokens, embed_dim]), pos_embed], axis=0)
201 |     return pos_embed
202 | 
203 | 
204 | def get_2d_sincos_pos_embed_from_grid(embed_dim, grid):
205 |     assert embed_dim % 2 == 0
206 | 
207 |     # use half of dimensions to encode grid_h
208 |     emb_h = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[0])  # (H*W, D/2)
209 |     emb_w = get_1d_sincos_pos_embed_from_grid(embed_dim // 2, grid[1])  # (H*W, D/2)
210 | 
211 |     return np.concatenate([emb_h, emb_w], axis=1)
212 | 
213 | 
214 | def get_1d_sincos_pos_embed_from_grid(embed_dim, pos):
215 |     """
216 |     embed_dim: output dimension for each position
217 |     pos: a list of positions to be encoded: size (M,)
218 |     out: (M, D)
219 |     """
220 |     assert embed_dim % 2 == 0
221 |     omega = np.arange(embed_dim // 2, dtype=np.float64)
222 |     omega /= embed_dim / 2.
223 |     omega = 1. / 10000 ** omega  # (D/2,)
224 | 
225 |     pos = pos.reshape(-1)  # (M,)
226 |     out = np.einsum('m,d->md', pos, omega)  # (M, D/2), outer product
227 | 
228 |     emb_sin = np.sin(out)  # (M, D/2)
229 |     emb_cos = np.cos(out)  # (M, D/2)
230 | 
231 |     return np.concatenate([emb_sin, emb_cos], axis=1)
232 | 
233 | 
234 | #################################################################################
235 | #                             LumosT2I Configs                                  #
236 | #################################################################################
237 | @MODELS.register_module()
238 | def LumosT2I_XL_2(**kwargs):
239 |     return LumosT2I(depth=28, hidden_size=1152, patch_size=2, num_heads=16, **kwargs)
240 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/lumos/LumosT2IMS.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Meta Platforms, Inc. and affiliates.
  2 | # All rights reserved.
  3 | 
  4 | # This source code is licensed under the license found in the
  5 | # LICENSE file in the root directory of this source tree.
  6 | # --------------------------------------------------------
  7 | # References:
  8 | # GLIDE: https://github.com/openai/glide-text2im
  9 | # MAE: https://github.com/facebookresearch/mae/blob/main/models_mae.py
 10 | # --------------------------------------------------------
 11 | import torch
 12 | import torch.nn as nn
 13 | from timm.models.layers import DropPath
 14 | from timm.models.vision_transformer import Mlp
 15 | 
 16 | from lumos_diffusion.model.builder import MODELS
 17 | from lumos_diffusion.model.utils import auto_grad_checkpoint, to_2tuple
 18 | from lumos_diffusion.model.lumos.Lumos_blocks import modulate, CaptionEmbedder, WindowAttention, MultiHeadCrossAttention, T2IFinalLayer, TimestepEmbedder, SizeEmbedder
 19 | from lumos_diffusion.model.lumos.LumosT2I import LumosT2I, get_2d_sincos_pos_embed
 20 | 
 21 | 
 22 | class PatchEmbed(nn.Module):
 23 |     """ 2D Image to Patch Embedding
 24 |     """
 25 |     def __init__(
 26 |             self,
 27 |             patch_size=16,
 28 |             in_chans=3,
 29 |             embed_dim=768,
 30 |             norm_layer=None,
 31 |             flatten=True,
 32 |             bias=True,
 33 |     ):
 34 |         super().__init__()
 35 |         patch_size = to_2tuple(patch_size)
 36 |         self.patch_size = patch_size
 37 |         self.flatten = flatten
 38 |         self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size, bias=bias)
 39 |         self.norm = norm_layer(embed_dim) if norm_layer else nn.Identity()
 40 | 
 41 |     def forward(self, x):
 42 |         x = self.proj(x)
 43 |         if self.flatten:
 44 |             x = x.flatten(2).transpose(1, 2)  # BCHW -> BNC
 45 |         x = self.norm(x)
 46 |         return x
 47 | 
 48 | 
 49 | class LumosT2IMSBlock(nn.Module):
 50 |     """
 51 |     A LumosT2IMS block with adaptive layer norm zero (adaLN-Zero) conditioning.
 52 |     """
 53 | 
 54 |     def __init__(self, hidden_size, num_heads, mlp_ratio=4.0, drop_path=0., window_size=0, input_size=None, use_rel_pos=False, **block_kwargs):
 55 |         super().__init__()
 56 |         self.hidden_size = hidden_size
 57 |         self.norm1 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 58 |         self.attn = WindowAttention(hidden_size, num_heads=num_heads, qkv_bias=True,
 59 |                               input_size=input_size if window_size == 0 else (window_size, window_size),
 60 |                               use_rel_pos=use_rel_pos, **block_kwargs)
 61 |         self.cross_attn = MultiHeadCrossAttention(hidden_size, num_heads, **block_kwargs)
 62 |         self.norm2 = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
 63 |         # to be compatible with lower version pytorch
 64 |         approx_gelu = lambda: nn.GELU(approximate="tanh")
 65 |         self.mlp = Mlp(in_features=hidden_size, hidden_features=int(hidden_size * mlp_ratio), act_layer=approx_gelu, drop=0)
 66 |         self.drop_path = DropPath(drop_path) if drop_path > 0. else nn.Identity()
 67 |         self.window_size = window_size
 68 |         # self.scale_shift_table = nn.Parameter(torch.randn(6, hidden_size) / hidden_size ** 0.5)
 69 |         self.adaLN_modulation = nn.Sequential(
 70 |             nn.SiLU(),
 71 |             nn.Linear(hidden_size, 6 * hidden_size, bias=True)
 72 |         )
 73 | 
 74 |     def forward(self, x, y, t, mask=None, **kwargs):
 75 |         B, N, C = x.shape
 76 |         shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = self.adaLN_modulation(t).chunk(6, dim=1)
 77 |         # shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None] + t.reshape(B, 6, -1)).chunk(6, dim=1)
 78 |         x = x + self.drop_path(gate_msa.unsqueeze(1) * self.attn(modulate(self.norm1(x), shift_msa, scale_msa)).reshape(B, N, C))
 79 |         # x = x + self.drop_path(gate_msa * self.attn(t2i_modulate(self.norm1(x), shift_msa, scale_msa)))
 80 |         x = x + self.cross_attn(x, y, mask)
 81 |         # x = x + self.drop_path(gate_mlp * self.mlp(t2i_modulate(self.norm2(x), shift_mlp, scale_mlp)))
 82 |         x = x + self.drop_path(gate_mlp.unsqueeze(1) * self.mlp(modulate(self.norm2(x), shift_mlp, scale_mlp)))
 83 |         return x
 84 | 
 85 | 
 86 | ######################################################################################################
 87 | #                             Core Lumos Text-to-Image Mluti-Scale Model                             #
 88 | ######################################################################################################
 89 | @MODELS.register_module()
 90 | class LumosT2IMS(LumosT2I):
 91 |     """
 92 |     Diffusion model with a Transformer backbone.
 93 |     """
 94 | 
 95 |     def __init__(self, input_size=32, patch_size=2, in_channels=4, hidden_size=1152, depth=28, num_heads=16, mlp_ratio=4.0, class_dropout_prob=0.1, learn_sigma=True, pred_sigma=True, drop_path: float = 0., window_size=0, window_block_indexes=None, use_rel_pos=False, caption_channels=4096, lewei_scale=1., config=None, model_max_length=120, **kwargs):
 96 |         if window_block_indexes is None:
 97 |             window_block_indexes = []
 98 |         super().__init__(
 99 |             input_size=input_size,
100 |             patch_size=patch_size,
101 |             in_channels=in_channels,
102 |             hidden_size=hidden_size,
103 |             depth=depth,
104 |             num_heads=num_heads,
105 |             mlp_ratio=mlp_ratio,
106 |             class_dropout_prob=class_dropout_prob,
107 |             learn_sigma=learn_sigma,
108 |             pred_sigma=pred_sigma,
109 |             drop_path=drop_path,
110 |             window_size=window_size,
111 |             window_block_indexes=window_block_indexes,
112 |             use_rel_pos=use_rel_pos,
113 |             lewei_scale=lewei_scale,
114 |             config=config,
115 |             model_max_length=model_max_length,
116 |             **kwargs,
117 |         )
118 |         self.h = self.w = 0
119 |         approx_gelu = lambda: nn.GELU(approximate="tanh")
120 |         self.x_embedder = PatchEmbed(patch_size, in_channels, hidden_size, bias=True)
121 |         self.y_embedder = CaptionEmbedder(in_channels=caption_channels, hidden_size=hidden_size, uncond_prob=class_dropout_prob, act_layer=approx_gelu, token_num=model_max_length)
122 |         self.csize_embedder = SizeEmbedder(hidden_size//3)  # c_size embed
123 |         self.ar_embedder = SizeEmbedder(hidden_size//3)     # aspect ratio embed
124 |         drop_path = [x.item() for x in torch.linspace(0, drop_path, depth)]  # stochastic depth decay rule
125 |         self.blocks = nn.ModuleList([
126 |             LumosT2IMSBlock(hidden_size, num_heads, mlp_ratio=mlp_ratio, drop_path=drop_path[i],
127 |                           input_size=(input_size // patch_size, input_size // patch_size),
128 |                           window_size=window_size if i in window_block_indexes else 0,
129 |                           use_rel_pos=use_rel_pos if i in window_block_indexes else False)
130 |             for i in range(depth)
131 |         ])
132 |         self.final_layer = T2IFinalLayer(hidden_size, patch_size, self.out_channels)
133 | 
134 |         self.initialize()
135 | 
136 |     def forward(self, x, timestep, y, mask=None, data_info=None, **kwargs):
137 |         """
138 |         Forward pass of Lumos-T2I MS.
139 |         x: (N, C, H, W) tensor of spatial inputs (images or latent representations of images)
140 |         t: (N,) tensor of diffusion timesteps
141 |         y: (N, 1, 120, C) tensor of class labels
142 |         """
143 |         bs = x.shape[0]
144 |         x = x.to(self.dtype)
145 |         timestep = timestep.to(self.dtype)
146 |         y = y.to(self.dtype)
147 |         c_size, ar = data_info['img_hw'].to(self.dtype), data_info['aspect_ratio'].to(self.dtype)
148 |         self.h, self.w = x.shape[-2]//self.patch_size, x.shape[-1]//self.patch_size
149 |         pos_embed = torch.from_numpy(get_2d_sincos_pos_embed(self.pos_embed.shape[-1], (self.h, self.w), lewei_scale=self.lewei_scale, base_size=self.base_size)).unsqueeze(0).to(x.device).to(self.dtype)
150 |         x = self.x_embedder(x) + pos_embed  # (N, T, D), where T = H * W / patch_size ** 2
151 |         t = self.t_embedder(timestep)  # (N, D)
152 |         csize = self.csize_embedder(c_size, bs)  # (N, D)
153 |         ar = self.ar_embedder(ar, bs)  # (N, D)
154 |         t = t + torch.cat([csize, ar], dim=1)
155 |         # t0 = self.t_block(t)
156 |         y = self.y_embedder(y, self.training)  # (N, D)
157 |         if mask is not None:
158 |             if mask.shape[0] != y.shape[0]:
159 |                 mask = mask.repeat(y.shape[0] // mask.shape[0], 1)
160 |             mask = mask.squeeze(1).squeeze(1)
161 |             y = y.squeeze(1).masked_select(mask.unsqueeze(-1) != 0).view(1, -1, x.shape[-1])
162 |             y_lens = mask.sum(dim=1).tolist()
163 |         else:
164 |             y_lens = [y.shape[2]] * y.shape[0]
165 |             y = y.squeeze(1).view(1, -1, x.shape[-1])
166 |         for block in self.blocks:
167 |             x = auto_grad_checkpoint(block, x, y, t, y_lens, **kwargs)  # (N, T, D) #support grad checkpoint
168 |         x = self.final_layer(x, t)  # (N, T, patch_size ** 2 * out_channels)
169 |         x = self.unpatchify(x)  # (N, out_channels, H, W)
170 |         return x
171 | 
172 |     def forward_with_dpmsolver(self, x, timestep, y, data_info, **kwargs):
173 |         """
174 |         dpm solver donnot need variance prediction
175 |         """
176 |         # https://github.com/openai/glide-text2im/blob/main/notebooks/text2im.ipynb
177 |         model_out = self.forward(x, timestep, y, data_info=data_info, **kwargs)
178 |         return model_out.chunk(2, dim=1)[0]
179 | 
180 |     def unpatchify(self, x):
181 |         """
182 |         x: (N, T, patch_size**2 * C)
183 |         imgs: (N, H, W, C)
184 |         """
185 |         c = self.out_channels
186 |         p = self.x_embedder.patch_size[0]
187 |         assert self.h * self.w == x.shape[1]
188 | 
189 |         x = x.reshape(shape=(x.shape[0], self.h, self.w, p, p, c))
190 |         x = torch.einsum('nhwpqc->nchpwq', x)
191 |         return x.reshape(shape=(x.shape[0], c, self.h * p, self.w * p))
192 | 
193 |     def initialize(self):
194 |         # Initialize transformer layers:
195 |         def _basic_init(module):
196 |             if isinstance(module, nn.Linear):
197 |                 torch.nn.init.xavier_uniform_(module.weight)
198 |                 if module.bias is not None:
199 |                     nn.init.constant_(module.bias, 0)
200 | 
201 |         self.apply(_basic_init)
202 | 
203 |         # Initialize patch_embed like nn.Linear (instead of nn.Conv2d):
204 |         w = self.x_embedder.proj.weight.data
205 |         nn.init.xavier_uniform_(w.view([w.shape[0], -1]))
206 | 
207 |         # Initialize timestep embedding MLP:
208 |         nn.init.normal_(self.t_embedder.mlp[0].weight, std=0.02)
209 |         nn.init.normal_(self.t_embedder.mlp[2].weight, std=0.02)
210 |         nn.init.normal_(self.csize_embedder.mlp[0].weight, std=0.02)
211 |         nn.init.normal_(self.csize_embedder.mlp[2].weight, std=0.02)
212 |         nn.init.normal_(self.ar_embedder.mlp[0].weight, std=0.02)
213 |         nn.init.normal_(self.ar_embedder.mlp[2].weight, std=0.02)
214 | 
215 |         for block in self.blocks:
216 |             nn.init.constant_(block.adaLN_modulation[-1].weight, 0)
217 |             nn.init.constant_(block.adaLN_modulation[-1].bias, 0)
218 |             
219 |         # Initialize caption embedding MLP:
220 |         nn.init.normal_(self.y_embedder.y_proj.fc1.weight, std=0.02)
221 |         nn.init.normal_(self.y_embedder.y_proj.fc2.weight, std=0.02)
222 | 
223 |         # Zero-out adaLN modulation layers in LumosT2IMS blocks:
224 |         for block in self.blocks:
225 |             nn.init.constant_(block.cross_attn.proj.weight, 0)
226 |             nn.init.constant_(block.cross_attn.proj.bias, 0)
227 | 
228 |         # Zero-out output layers:
229 |         nn.init.constant_(self.final_layer.linear.weight, 0)
230 |         nn.init.constant_(self.final_layer.linear.bias, 0)
231 | 
232 | 
233 | #########################################################################################
234 | #                                   LumosT2IMS Configs                                  #
235 | #########################################################################################
236 | @MODELS.register_module()
237 | def LumosT2IMS_XL_2(**kwargs):
238 |     return LumosT2IMS(depth=28, hidden_size=1152, patch_size=2, num_heads=16, **kwargs)
239 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/lumos/Lumos_blocks.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | from timm.models.vision_transformer import Mlp, Attention as Attention_
  5 | from einops import rearrange, repeat
  6 | import xformers.ops
  7 | 
  8 | from lumos_diffusion.model.utils import add_decomposed_rel_pos
  9 | 
 10 | 
 11 | def modulate(x, shift, scale):
 12 |     return x * (1 + scale.unsqueeze(1)) + shift.unsqueeze(1)
 13 | 
 14 | 
 15 | def t2i_modulate(x, shift, scale):
 16 |     return x * (1 + scale) + shift
 17 | 
 18 | 
 19 | class MultiHeadCrossAttention(nn.Module):
 20 |     def __init__(self, d_model, num_heads, attn_drop=0., proj_drop=0., **block_kwargs):
 21 |         super(MultiHeadCrossAttention, self).__init__()
 22 |         assert d_model % num_heads == 0, "d_model must be divisible by num_heads"
 23 | 
 24 |         self.d_model = d_model
 25 |         self.num_heads = num_heads
 26 |         self.head_dim = d_model // num_heads
 27 | 
 28 |         self.q_linear = nn.Linear(d_model, d_model)
 29 |         self.kv_linear = nn.Linear(d_model, d_model*2)
 30 |         self.attn_drop = nn.Dropout(attn_drop)
 31 |         self.proj = nn.Linear(d_model, d_model)
 32 |         self.proj_drop = nn.Dropout(proj_drop)
 33 | 
 34 |     def forward(self, x, cond, mask=None):
 35 |         # query: img tokens; key/value: condition; mask: if padding tokens
 36 |         B, N, C = x.shape
 37 | 
 38 |         q = self.q_linear(x).view(1, -1, self.num_heads, self.head_dim)
 39 |         kv = self.kv_linear(cond).view(1, -1, 2, self.num_heads, self.head_dim)
 40 |         k, v = kv.unbind(2)
 41 |         attn_bias = None
 42 |         if mask is not None:
 43 |             attn_bias = xformers.ops.fmha.BlockDiagonalMask.from_seqlens([N] * B, mask)
 44 |         x = xformers.ops.memory_efficient_attention(q, k, v, p=self.attn_drop.p, attn_bias=attn_bias)
 45 |         x = x.view(B, -1, C)
 46 |         x = self.proj(x)
 47 |         x = self.proj_drop(x)
 48 | 
 49 |         return x
 50 | 
 51 | 
 52 | class WindowAttention(Attention_):
 53 |     """Multi-head Attention block with relative position embeddings."""
 54 | 
 55 |     def __init__(
 56 |         self,
 57 |         dim,
 58 |         num_heads=8,
 59 |         qkv_bias=True,
 60 |         use_rel_pos=False,
 61 |         rel_pos_zero_init=True,
 62 |         input_size=None,
 63 |         **block_kwargs,
 64 |     ):
 65 |         """
 66 |         Args:
 67 |             dim (int): Number of input channels.
 68 |             num_heads (int): Number of attention heads.
 69 |             qkv_bias (bool:  If True, add a learnable bias to query, key, value.
 70 |             rel_pos (bool): If True, add relative positional embeddings to the attention map.
 71 |             rel_pos_zero_init (bool): If True, zero initialize relative positional parameters.
 72 |             input_size (int or None): Input resolution for calculating the relative positional
 73 |                 parameter size.
 74 |         """
 75 |         super().__init__(dim, num_heads=num_heads, qkv_bias=qkv_bias, **block_kwargs)
 76 | 
 77 |         self.use_rel_pos = use_rel_pos
 78 |         if self.use_rel_pos:
 79 |             # initialize relative positional embeddings
 80 |             self.rel_pos_h = nn.Parameter(torch.zeros(2 * input_size[0] - 1, self.head_dim))
 81 |             self.rel_pos_w = nn.Parameter(torch.zeros(2 * input_size[1] - 1, self.head_dim))
 82 | 
 83 |             if not rel_pos_zero_init:
 84 |                 nn.init.trunc_normal_(self.rel_pos_h, std=0.02)
 85 |                 nn.init.trunc_normal_(self.rel_pos_w, std=0.02)
 86 | 
 87 |     def forward(self, x, mask=None):
 88 |         B, N, C = x.shape
 89 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads)
 90 |         q, k, v = qkv.unbind(2)
 91 |         if use_fp32_attention := getattr(self, 'fp32_attention', False):
 92 |             q, k, v = q.float(), k.float(), v.float()
 93 | 
 94 |         attn_bias = None
 95 |         if mask is not None:
 96 |             attn_bias = torch.zeros([B * self.num_heads, q.shape[1], k.shape[1]], dtype=q.dtype, device=q.device)
 97 |             attn_bias.masked_fill_(mask.squeeze(1).repeat(self.num_heads, 1, 1) == 0, float('-inf'))
 98 |         x = xformers.ops.memory_efficient_attention(q, k, v, p=self.attn_drop.p, attn_bias=attn_bias)
 99 | 
100 |         x = x.view(B, N, C)
101 |         x = self.proj(x)
102 |         x = self.proj_drop(x)
103 |         return x
104 | 
105 | 
106 | #################################################################################
107 | #   AMP attention with fp32 softmax to fix loss NaN problem during training     #
108 | #################################################################################
109 | class Attention(Attention_):
110 |     def forward(self, x):
111 |         B, N, C = x.shape
112 |         qkv = self.qkv(x).reshape(B, N, 3, self.num_heads, C // self.num_heads).permute(2, 0, 3, 1, 4)
113 |         q, k, v = qkv.unbind(0)  # make torchscript happy (cannot use tensor as tuple)
114 |         use_fp32_attention = getattr(self, 'fp32_attention', False)
115 |         if use_fp32_attention:
116 |             q, k = q.float(), k.float()
117 |         with torch.cuda.amp.autocast(enabled=not use_fp32_attention):
118 |             attn = (q @ k.transpose(-2, -1)) * self.scale
119 |             attn = attn.softmax(dim=-1)
120 | 
121 |         attn = self.attn_drop(attn)
122 | 
123 |         x = (attn @ v).transpose(1, 2).reshape(B, N, C)
124 |         x = self.proj(x)
125 |         x = self.proj_drop(x)
126 |         return x
127 | 
128 | 
129 | class FinalLayer(nn.Module):
130 |     """
131 |     The final layer of Lumos.
132 |     """
133 | 
134 |     def __init__(self, hidden_size, patch_size, out_channels):
135 |         super().__init__()
136 |         self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
137 |         self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
138 |         self.adaLN_modulation = nn.Sequential(
139 |             nn.SiLU(),
140 |             nn.Linear(hidden_size, 2 * hidden_size, bias=True)
141 |         )
142 | 
143 |     def forward(self, x, c):
144 |         shift, scale = self.adaLN_modulation(c).chunk(2, dim=1)
145 |         x = modulate(self.norm_final(x), shift, scale)
146 |         x = self.linear(x)
147 |         return x
148 | 
149 | 
150 | class T2IFinalLayer(nn.Module):
151 |     """
152 |     The final layer of Lumos.
153 |     """
154 | 
155 |     def __init__(self, hidden_size, patch_size, out_channels):
156 |         super().__init__()
157 |         self.norm_final = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
158 |         self.linear = nn.Linear(hidden_size, patch_size * patch_size * out_channels, bias=True)
159 |         self.scale_shift_table = nn.Parameter(torch.randn(2, hidden_size) / hidden_size ** 0.5)
160 |         self.out_channels = out_channels
161 | 
162 |     def forward(self, x, t):
163 |         shift, scale = (self.scale_shift_table[None] + t[:, None]).chunk(2, dim=1)
164 |         x = t2i_modulate(self.norm_final(x), shift, scale)
165 |         x = self.linear(x)
166 |         return x
167 | 
168 | 
169 | class MaskFinalLayer(nn.Module):
170 |     """
171 |     The final layer of Lumos.
172 |     """
173 | 
174 |     def __init__(self, final_hidden_size, c_emb_size, patch_size, out_channels):
175 |         super().__init__()
176 |         self.norm_final = nn.LayerNorm(final_hidden_size, elementwise_affine=False, eps=1e-6)
177 |         self.linear = nn.Linear(final_hidden_size, patch_size * patch_size * out_channels, bias=True)
178 |         self.adaLN_modulation = nn.Sequential(
179 |             nn.SiLU(),
180 |             nn.Linear(c_emb_size, 2 * final_hidden_size, bias=True)
181 |         )
182 |     def forward(self, x, t):
183 |         shift, scale = self.adaLN_modulation(t).chunk(2, dim=1)
184 |         x = modulate(self.norm_final(x), shift, scale)
185 |         x = self.linear(x)
186 |         return x
187 | 
188 | 
189 | class DecoderLayer(nn.Module):
190 |     """
191 |     The final layer of Lumos.
192 |     """
193 | 
194 |     def __init__(self, hidden_size, decoder_hidden_size):
195 |         super().__init__()
196 |         self.norm_decoder = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6)
197 |         self.linear = nn.Linear(hidden_size, decoder_hidden_size, bias=True)
198 |         self.adaLN_modulation = nn.Sequential(
199 |             nn.SiLU(),
200 |             nn.Linear(hidden_size, 2 * hidden_size, bias=True)
201 |         )
202 |     def forward(self, x, t):
203 |         shift, scale = self.adaLN_modulation(t).chunk(2, dim=1)
204 |         x = modulate(self.norm_decoder(x), shift, scale)
205 |         x = self.linear(x)
206 |         return x
207 | 
208 | 
209 | #################################################################################
210 | #               Embedding Layers for Timesteps and Class Labels                 #
211 | #################################################################################
212 | class TimestepEmbedder(nn.Module):
213 |     """
214 |     Embeds scalar timesteps into vector representations.
215 |     """
216 | 
217 |     def __init__(self, hidden_size, frequency_embedding_size=256):
218 |         super().__init__()
219 |         self.mlp = nn.Sequential(
220 |             nn.Linear(frequency_embedding_size, hidden_size, bias=True),
221 |             nn.SiLU(),
222 |             nn.Linear(hidden_size, hidden_size, bias=True),
223 |         )
224 |         self.frequency_embedding_size = frequency_embedding_size
225 | 
226 |     @staticmethod
227 |     def timestep_embedding(t, dim, max_period=10000):
228 |         """
229 |         Create sinusoidal timestep embeddings.
230 |         :param t: a 1-D Tensor of N indices, one per batch element.
231 |                           These may be fractional.
232 |         :param dim: the dimension of the output.
233 |         :param max_period: controls the minimum frequency of the embeddings.
234 |         :return: an (N, D) Tensor of positional embeddings.
235 |         """
236 |         # https://github.com/openai/glide-text2im/blob/main/glide_text2im/nn.py
237 |         half = dim // 2
238 |         freqs = torch.exp(
239 |             -math.log(max_period) * torch.arange(start=0, end=half, dtype=torch.float32, device=t.device) / half)
240 |         args = t[:, None].float() * freqs[None]
241 |         embedding = torch.cat([torch.cos(args), torch.sin(args)], dim=-1)
242 |         if dim % 2:
243 |             embedding = torch.cat([embedding, torch.zeros_like(embedding[:, :1])], dim=-1)
244 |         return embedding
245 | 
246 |     def forward(self, t):
247 |         t_freq = self.timestep_embedding(t, self.frequency_embedding_size).to(self.dtype)
248 |         return self.mlp(t_freq)
249 | 
250 |     @property
251 |     def dtype(self):
252 |         return next(self.parameters()).dtype
253 | 
254 | 
255 | class SizeEmbedder(TimestepEmbedder):
256 |     """
257 |     Embeds scalar timesteps into vector representations.
258 |     """
259 | 
260 |     def __init__(self, hidden_size, frequency_embedding_size=256):
261 |         super().__init__(hidden_size=hidden_size, frequency_embedding_size=frequency_embedding_size)
262 |         self.mlp = nn.Sequential(
263 |             nn.Linear(frequency_embedding_size, hidden_size, bias=True),
264 |             nn.SiLU(),
265 |             nn.Linear(hidden_size, hidden_size, bias=True),
266 |         )
267 |         self.frequency_embedding_size = frequency_embedding_size
268 |         self.outdim = hidden_size
269 | 
270 |     def forward(self, s, bs):
271 |         if s.ndim == 1:
272 |             s = s[:, None]
273 |         assert s.ndim == 2
274 |         if s.shape[0] != bs:
275 |             s = s.repeat(bs//s.shape[0], 1)
276 |             assert s.shape[0] == bs
277 |         b, dims = s.shape[0], s.shape[1]
278 |         s = rearrange(s, "b d -> (b d)")
279 |         s_freq = self.timestep_embedding(s, self.frequency_embedding_size).to(self.dtype)
280 |         s_emb = self.mlp(s_freq)
281 |         s_emb = rearrange(s_emb, "(b d) d2 -> b (d d2)", b=b, d=dims, d2=self.outdim)
282 |         return s_emb
283 | 
284 |     @property
285 |     def dtype(self):
286 |         return next(self.parameters()).dtype
287 | 
288 | 
289 | class LabelEmbedder(nn.Module):
290 |     """
291 |     Embeds class labels into vector representations. Also handles label dropout for classifier-free guidance.
292 |     """
293 | 
294 |     def __init__(self, num_classes, hidden_size, dropout_prob):
295 |         super().__init__()
296 |         use_cfg_embedding = dropout_prob > 0
297 |         self.embedding_table = nn.Embedding(num_classes + use_cfg_embedding, hidden_size)
298 |         self.num_classes = num_classes
299 |         self.dropout_prob = dropout_prob
300 | 
301 |     def token_drop(self, labels, force_drop_ids=None):
302 |         """
303 |         Drops labels to enable classifier-free guidance.
304 |         """
305 |         if force_drop_ids is None:
306 |             drop_ids = torch.rand(labels.shape[0]).cuda() < self.dropout_prob
307 |         else:
308 |             drop_ids = force_drop_ids == 1
309 |         labels = torch.where(drop_ids, self.num_classes, labels)
310 |         return labels
311 | 
312 |     def forward(self, labels, train, force_drop_ids=None):
313 |         use_dropout = self.dropout_prob > 0
314 |         if (train and use_dropout) or (force_drop_ids is not None):
315 |             labels = self.token_drop(labels, force_drop_ids)
316 |         return self.embedding_table(labels)
317 | 
318 | 
319 | class CaptionEmbedder(nn.Module):
320 |     """
321 |     Embeds class labels into vector representations. Also handles label dropout for classifier-free guidance.
322 |     """
323 | 
324 |     def __init__(self, in_channels, hidden_size, uncond_prob, act_layer=nn.GELU(approximate='tanh'), token_num=120):
325 |         super().__init__()
326 |         self.y_proj = Mlp(in_features=in_channels, hidden_features=hidden_size, out_features=hidden_size, act_layer=act_layer, drop=0)
327 |         self.register_buffer("y_embedding", nn.Parameter(torch.randn(token_num, in_channels) / in_channels ** 0.5))
328 |         self.uncond_prob = uncond_prob
329 | 
330 |     def token_drop(self, caption, force_drop_ids=None):
331 |         """
332 |         Drops labels to enable classifier-free guidance.
333 |         """
334 |         if force_drop_ids is None:
335 |             drop_ids = torch.rand(caption.shape[0]).cuda() < self.uncond_prob
336 |         else:
337 |             drop_ids = force_drop_ids == 1
338 |         caption = torch.where(drop_ids[:, None, None, None], self.y_embedding, caption)
339 |         return caption
340 | 
341 |     def forward(self, caption, train, force_drop_ids=None):
342 |         if train:
343 |             assert caption.shape[2:] == self.y_embedding.shape
344 |         use_dropout = self.uncond_prob > 0
345 |         if (train and use_dropout) or (force_drop_ids is not None):
346 |             caption = self.token_drop(caption, force_drop_ids)
347 |         caption = self.y_proj(caption)
348 |         return caption
349 | 
350 | 
351 | class CaptionEmbedderDoubleBr(nn.Module):
352 |     """
353 |     Embeds class labels into vector representations. Also handles label dropout for classifier-free guidance.
354 |     """
355 | 
356 |     def __init__(self, in_channels, hidden_size, uncond_prob, act_layer=nn.GELU(approximate='tanh'), token_num=120):
357 |         super().__init__()
358 |         self.proj = Mlp(in_features=in_channels, hidden_features=hidden_size, out_features=hidden_size, act_layer=act_layer, drop=0)
359 |         self.embedding = nn.Parameter(torch.randn(1, in_channels) / 10 ** 0.5)
360 |         self.y_embedding = nn.Parameter(torch.randn(token_num, in_channels) / 10 ** 0.5)
361 |         self.uncond_prob = uncond_prob
362 | 
363 |     def token_drop(self, global_caption, caption, force_drop_ids=None):
364 |         """
365 |         Drops labels to enable classifier-free guidance.
366 |         """
367 |         if force_drop_ids is None:
368 |             drop_ids = torch.rand(global_caption.shape[0]).cuda() < self.uncond_prob
369 |         else:
370 |             drop_ids = force_drop_ids == 1
371 |         global_caption = torch.where(drop_ids[:, None], self.embedding, global_caption)
372 |         caption = torch.where(drop_ids[:, None, None, None], self.y_embedding, caption)
373 |         return global_caption, caption
374 | 
375 |     def forward(self, caption, train, force_drop_ids=None):
376 |         assert caption.shape[2: ] == self.y_embedding.shape
377 |         global_caption = caption.mean(dim=2).squeeze()
378 |         use_dropout = self.uncond_prob > 0
379 |         if (train and use_dropout) or (force_drop_ids is not None):
380 |             global_caption, caption = self.token_drop(global_caption, caption, force_drop_ids)
381 |         y_embed = self.proj(global_caption)
382 |         return y_embed, caption


--------------------------------------------------------------------------------
/lumos_diffusion/model/lumos/__init__.py:
--------------------------------------------------------------------------------
1 | from .LumosI2I import LumosI2I, LumosI2I_XL_2, LumosI2IBlock
2 | from .LumosT2I import LumosT2I, LumosT2I_XL_2, LumosT2IBlock
3 | from .LumosT2IMS import LumosT2IMS, LumosT2IMS_XL_2, LumosT2IMSBlock


--------------------------------------------------------------------------------
/lumos_diffusion/model/t5.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | import os
  3 | import re
  4 | import html
  5 | import urllib.parse as ul
  6 | 
  7 | import ftfy
  8 | import torch
  9 | from bs4 import BeautifulSoup
 10 | from transformers import T5EncoderModel, AutoTokenizer
 11 | from huggingface_hub import hf_hub_download
 12 | 
 13 | class T5Embedder:
 14 | 
 15 |     available_models = ['t5-v1_1-xxl']
 16 |     bad_punct_regex = re.compile(r'['+'#®•©™&@·º½¾¿¡§~'+'\)'+'\('+'\]'+'\['+'\}'+'\{'+'\|'+'\\'+'\/'+'\*' + r']{1,}')  # noqa
 17 | 
 18 |     def __init__(self, device, dir_or_name='t5-v1_1-xxl', *, local_cache=False, cache_dir=None, hf_token=None, use_text_preprocessing=True,
 19 |                  t5_model_kwargs=None, torch_dtype=None, use_offload_folder=None, model_max_length=120):
 20 |         self.device = torch.device(device)
 21 |         self.torch_dtype = torch_dtype or torch.bfloat16
 22 |         if t5_model_kwargs is None:
 23 |             t5_model_kwargs = {'low_cpu_mem_usage': True, 'torch_dtype': self.torch_dtype}
 24 |             if use_offload_folder is not None:
 25 |                 t5_model_kwargs['offload_folder'] = use_offload_folder
 26 |                 t5_model_kwargs['device_map'] = {
 27 |                     'shared': self.device,
 28 |                     'encoder.embed_tokens': self.device,
 29 |                     'encoder.block.0': self.device,
 30 |                     'encoder.block.1': self.device,
 31 |                     'encoder.block.2': self.device,
 32 |                     'encoder.block.3': self.device,
 33 |                     'encoder.block.4': self.device,
 34 |                     'encoder.block.5': self.device,
 35 |                     'encoder.block.6': self.device,
 36 |                     'encoder.block.7': self.device,
 37 |                     'encoder.block.8': self.device,
 38 |                     'encoder.block.9': self.device,
 39 |                     'encoder.block.10': self.device,
 40 |                     'encoder.block.11': self.device,
 41 |                     'encoder.block.12': 'disk',
 42 |                     'encoder.block.13': 'disk',
 43 |                     'encoder.block.14': 'disk',
 44 |                     'encoder.block.15': 'disk',
 45 |                     'encoder.block.16': 'disk',
 46 |                     'encoder.block.17': 'disk',
 47 |                     'encoder.block.18': 'disk',
 48 |                     'encoder.block.19': 'disk',
 49 |                     'encoder.block.20': 'disk',
 50 |                     'encoder.block.21': 'disk',
 51 |                     'encoder.block.22': 'disk',
 52 |                     'encoder.block.23': 'disk',
 53 |                     'encoder.final_layer_norm': 'disk',
 54 |                     'encoder.dropout': 'disk',
 55 |                 }
 56 |             else:
 57 |                 t5_model_kwargs['device_map'] = {'shared': self.device, 'encoder': self.device}
 58 | 
 59 |         self.use_text_preprocessing = use_text_preprocessing
 60 |         self.hf_token = hf_token
 61 |         self.cache_dir = cache_dir or os.path.expanduser('~/.cache/IF_')
 62 |         self.dir_or_name = dir_or_name
 63 |         tokenizer_path, path = dir_or_name, dir_or_name
 64 |         if local_cache:
 65 |             cache_dir = os.path.join(self.cache_dir, dir_or_name)
 66 |             tokenizer_path, path = cache_dir, cache_dir
 67 |         elif dir_or_name in self.available_models:
 68 |             cache_dir = os.path.join(self.cache_dir, dir_or_name)
 69 |             for filename in [
 70 |                 'config.json', 'special_tokens_map.json', 'spiece.model', 'tokenizer_config.json',
 71 |                 'pytorch_model.bin.index.json', 'pytorch_model-00001-of-00002.bin', 'pytorch_model-00002-of-00002.bin'
 72 |             ]:
 73 |                 hf_hub_download(repo_id=f'DeepFloyd/{dir_or_name}', filename=filename, cache_dir=cache_dir,
 74 |                                 force_filename=filename, token=self.hf_token)
 75 |             tokenizer_path, path = cache_dir, cache_dir
 76 |         else:
 77 |             cache_dir = os.path.join(self.cache_dir, 't5-v1_1-xxl')
 78 |             for filename in [
 79 |                 'config.json', 'special_tokens_map.json', 'spiece.model', 'tokenizer_config.json',
 80 |             ]:
 81 |                 hf_hub_download(repo_id='DeepFloyd/t5-v1_1-xxl', filename=filename, cache_dir=cache_dir,
 82 |                                 force_filename=filename, token=self.hf_token)
 83 |             tokenizer_path = cache_dir
 84 | 
 85 |         print(tokenizer_path)
 86 |         self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_path)
 87 |         self.model = T5EncoderModel.from_pretrained(path, **t5_model_kwargs).eval()
 88 |         self.model_max_length = model_max_length
 89 | 
 90 |     def get_text_embeddings(self, texts):
 91 |         texts = [self.text_preprocessing(text) for text in texts]
 92 | 
 93 |         text_tokens_and_mask = self.tokenizer(
 94 |             texts,
 95 |             max_length=self.model_max_length,
 96 |             padding='max_length',
 97 |             truncation=True,
 98 |             return_attention_mask=True,
 99 |             add_special_tokens=True,
100 |             return_tensors='pt'
101 |         )
102 | 
103 |         text_tokens_and_mask['input_ids'] = text_tokens_and_mask['input_ids']
104 |         text_tokens_and_mask['attention_mask'] = text_tokens_and_mask['attention_mask']
105 | 
106 |         with torch.no_grad():
107 |             text_encoder_embs = self.model(
108 |                 input_ids=text_tokens_and_mask['input_ids'].to(self.device),
109 |                 attention_mask=text_tokens_and_mask['attention_mask'].to(self.device),
110 |             )['last_hidden_state'].detach()
111 |         return text_encoder_embs, text_tokens_and_mask['attention_mask'].to(self.device)
112 | 
113 |     def text_preprocessing(self, text):
114 |         if self.use_text_preprocessing:
115 |             # The exact text cleaning as was in the training stage:
116 |             text = self.clean_caption(text)
117 |             text = self.clean_caption(text)
118 |             return text
119 |         else:
120 |             return text.lower().strip()
121 | 
122 |     @staticmethod
123 |     def basic_clean(text):
124 |         text = ftfy.fix_text(text)
125 |         text = html.unescape(html.unescape(text))
126 |         return text.strip()
127 | 
128 |     def clean_caption(self, caption):
129 |         caption = str(caption)
130 |         caption = ul.unquote_plus(caption)
131 |         caption = caption.strip().lower()
132 |         caption = re.sub('<person>', 'person', caption)
133 |         # urls:
134 |         caption = re.sub(
135 |             r'\b((?:https?:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))',  # noqa
136 |             '', caption)  # regex for urls
137 |         caption = re.sub(
138 |             r'\b((?:www:(?:\/{1,3}|[a-zA-Z0-9%])|[a-zA-Z0-9.\-]+[.](?:com|co|ru|net|org|edu|gov|it)[\w/-]*\b\/?(?!@)))',  # noqa
139 |             '', caption)  # regex for urls
140 |         # html:
141 |         caption = BeautifulSoup(caption, features='html.parser').text
142 | 
143 |         # @<nickname>
144 |         caption = re.sub(r'@[\w\d]+\b', '', caption)
145 | 
146 |         # 31C0—31EF CJK Strokes
147 |         # 31F0—31FF Katakana Phonetic Extensions
148 |         # 3200—32FF Enclosed CJK Letters and Months
149 |         # 3300—33FF CJK Compatibility
150 |         # 3400—4DBF CJK Unified Ideographs Extension A
151 |         # 4DC0—4DFF Yijing Hexagram Symbols
152 |         # 4E00—9FFF CJK Unified Ideographs
153 |         caption = re.sub(r'[\u31c0-\u31ef]+', '', caption)
154 |         caption = re.sub(r'[\u31f0-\u31ff]+', '', caption)
155 |         caption = re.sub(r'[\u3200-\u32ff]+', '', caption)
156 |         caption = re.sub(r'[\u3300-\u33ff]+', '', caption)
157 |         caption = re.sub(r'[\u3400-\u4dbf]+', '', caption)
158 |         caption = re.sub(r'[\u4dc0-\u4dff]+', '', caption)
159 |         caption = re.sub(r'[\u4e00-\u9fff]+', '', caption)
160 |         #######################################################
161 | 
162 |         # все виды тире / all types of dash --> "-"
163 |         caption = re.sub(
164 |             r'[\u002D\u058A\u05BE\u1400\u1806\u2010-\u2015\u2E17\u2E1A\u2E3A\u2E3B\u2E40\u301C\u3030\u30A0\uFE31\uFE32\uFE58\uFE63\uFF0D]+',  # noqa
165 |             '-', caption)
166 | 
167 |         # кавычки к одному стандарту
168 |         caption = re.sub(r'[`´«»“”¨]', '"', caption)
169 |         caption = re.sub(r'[‘’]', "'", caption)
170 | 
171 |         # &quot;
172 |         caption = re.sub(r'&quot;?', '', caption)
173 |         # &amp
174 |         caption = re.sub(r'&amp', '', caption)
175 | 
176 |         # ip adresses:
177 |         caption = re.sub(r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}', ' ', caption)
178 | 
179 |         # article ids:
180 |         caption = re.sub(r'\d:\d\d\s+$', '', caption)
181 | 
182 |         # \n
183 |         caption = re.sub(r'\\n', ' ', caption)
184 | 
185 |         # "#123"
186 |         caption = re.sub(r'#\d{1,3}\b', '', caption)
187 |         # "#12345.."
188 |         caption = re.sub(r'#\d{5,}\b', '', caption)
189 |         # "123456.."
190 |         caption = re.sub(r'\b\d{6,}\b', '', caption)
191 |         # filenames:
192 |         caption = re.sub(r'[\S]+\.(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)', '', caption)
193 | 
194 |         #
195 |         caption = re.sub(r'[\"\']{2,}', r'"', caption)  # """AUSVERKAUFT"""
196 |         caption = re.sub(r'[\.]{2,}', r' ', caption)  # """AUSVERKAUFT"""
197 | 
198 |         caption = re.sub(self.bad_punct_regex, r' ', caption)  # ***AUSVERKAUFT***, #AUSVERKAUFT
199 |         caption = re.sub(r'\s+\.\s+', r' ', caption)  # " . "
200 | 
201 |         # this-is-my-cute-cat / this_is_my_cute_cat
202 |         regex2 = re.compile(r'(?:\-|\_)')
203 |         if len(re.findall(regex2, caption)) > 3:
204 |             caption = re.sub(regex2, ' ', caption)
205 | 
206 |         caption = self.basic_clean(caption)
207 | 
208 |         caption = re.sub(r'\b[a-zA-Z]{1,3}\d{3,15}\b', '', caption)  # jc6640
209 |         caption = re.sub(r'\b[a-zA-Z]+\d+[a-zA-Z]+\b', '', caption)  # jc6640vc
210 |         caption = re.sub(r'\b\d+[a-zA-Z]+\d+\b', '', caption)  # 6640vc231
211 | 
212 |         caption = re.sub(r'(worldwide\s+)?(free\s+)?shipping', '', caption)
213 |         caption = re.sub(r'(free\s)?download(\sfree)?', '', caption)
214 |         caption = re.sub(r'\bclick\b\s(?:for|on)\s\w+', '', caption)
215 |         caption = re.sub(r'\b(?:png|jpg|jpeg|bmp|webp|eps|pdf|apk|mp4)(\simage[s]?)?', '', caption)
216 |         caption = re.sub(r'\bpage\s+\d+\b', '', caption)
217 | 
218 |         caption = re.sub(r'\b\d*[a-zA-Z]+\d+[a-zA-Z]+\d+[a-zA-Z\d]*\b', r' ', caption)  # j2d1a2a...
219 | 
220 |         caption = re.sub(r'\b\d+\.?\d*[xх×]\d+\.?\d*\b', '', caption)
221 | 
222 |         caption = re.sub(r'\b\s+\:\s+', r': ', caption)
223 |         caption = re.sub(r'(\D[,\./])\b', r'\1 ', caption)
224 |         caption = re.sub(r'\s+', ' ', caption)
225 | 
226 |         caption.strip()
227 | 
228 |         caption = re.sub(r'^[\"\']([\w\W]+)[\"\']$', r'\1', caption)
229 |         caption = re.sub(r'^[\'\_,\-\:;]', r'', caption)
230 |         caption = re.sub(r'[\'\_,\-\:\-\+]$', r'', caption)
231 |         caption = re.sub(r'^\.\S+$', '', caption)
232 | 
233 |         return caption.strip()
234 | 


--------------------------------------------------------------------------------
/lumos_diffusion/model/timestep_sampler.py:
--------------------------------------------------------------------------------
  1 | # Modified from OpenAI's diffusion repos
  2 | #     GLIDE: https://github.com/openai/glide-text2im/blob/main/glide_text2im/gaussian_diffusion.py
  3 | #     ADM:   https://github.com/openai/guided-diffusion/blob/main/guided_diffusion
  4 | #     IDDPM: https://github.com/openai/improved-diffusion/blob/main/improved_diffusion/gaussian_diffusion.py
  5 | 
  6 | from abc import ABC, abstractmethod
  7 | 
  8 | import numpy as np
  9 | import torch as th
 10 | import torch.distributed as dist
 11 | 
 12 | 
 13 | def create_named_schedule_sampler(name, diffusion):
 14 |     """
 15 |     Create a ScheduleSampler from a library of pre-defined samplers.
 16 |     :param name: the name of the sampler.
 17 |     :param diffusion: the diffusion object to sample for.
 18 |     """
 19 |     if name == "uniform":
 20 |         return UniformSampler(diffusion)
 21 |     elif name == "loss-second-moment":
 22 |         return LossSecondMomentResampler(diffusion)
 23 |     else:
 24 |         raise NotImplementedError(f"unknown schedule sampler: {name}")
 25 | 
 26 | 
 27 | class ScheduleSampler(ABC):
 28 |     """
 29 |     A distribution over timesteps in the diffusion process, intended to reduce
 30 |     variance of the objective.
 31 |     By default, samplers perform unbiased importance sampling, in which the
 32 |     objective's mean is unchanged.
 33 |     However, subclasses may override sample() to change how the resampled
 34 |     terms are reweighted, allowing for actual changes in the objective.
 35 |     """
 36 | 
 37 |     @abstractmethod
 38 |     def weights(self):
 39 |         """
 40 |         Get a numpy array of weights, one per diffusion step.
 41 |         The weights needn't be normalized, but must be positive.
 42 |         """
 43 | 
 44 |     def sample(self, batch_size, device):
 45 |         """
 46 |         Importance-sample timesteps for a batch.
 47 |         :param batch_size: the number of timesteps.
 48 |         :param device: the torch device to save to.
 49 |         :return: a tuple (timesteps, weights):
 50 |                  - timesteps: a tensor of timestep indices.
 51 |                  - weights: a tensor of weights to scale the resulting losses.
 52 |         """
 53 |         w = self.weights()
 54 |         p = w / np.sum(w)
 55 |         indices_np = np.random.choice(len(p), size=(batch_size,), p=p)
 56 |         indices = th.from_numpy(indices_np).long().to(device)
 57 |         weights_np = 1 / (len(p) * p[indices_np])
 58 |         weights = th.from_numpy(weights_np).float().to(device)
 59 |         return indices, weights
 60 | 
 61 | 
 62 | class UniformSampler(ScheduleSampler):
 63 |     def __init__(self, diffusion):
 64 |         self.diffusion = diffusion
 65 |         self._weights = np.ones([diffusion.num_timesteps])
 66 | 
 67 |     def weights(self):
 68 |         return self._weights
 69 | 
 70 | 
 71 | class LossAwareSampler(ScheduleSampler):
 72 |     def update_with_local_losses(self, local_ts, local_losses):
 73 |         """
 74 |         Update the reweighting using losses from a model.
 75 |         Call this method from each rank with a batch of timesteps and the
 76 |         corresponding losses for each of those timesteps.
 77 |         This method will perform synchronization to make sure all of the ranks
 78 |         maintain the exact same reweighting.
 79 |         :param local_ts: an integer Tensor of timesteps.
 80 |         :param local_losses: a 1D Tensor of losses.
 81 |         """
 82 |         batch_sizes = [
 83 |             th.tensor([0], dtype=th.int32, device=local_ts.device)
 84 |             for _ in range(dist.get_world_size())
 85 |         ]
 86 |         dist.all_gather(
 87 |             batch_sizes,
 88 |             th.tensor([len(local_ts)], dtype=th.int32, device=local_ts.device),
 89 |         )
 90 | 
 91 |         # Pad all_gather batches to be the maximum batch size.
 92 |         batch_sizes = [x.item() for x in batch_sizes]
 93 |         max_bs = max(batch_sizes)
 94 | 
 95 |         timestep_batches = [th.zeros(max_bs, device=local_ts.device) for _ in batch_sizes]
 96 |         loss_batches = [th.zeros(max_bs, device=local_losses.device) for _ in batch_sizes]
 97 |         dist.all_gather(timestep_batches, local_ts)
 98 |         dist.all_gather(loss_batches, local_losses)
 99 |         timesteps = [
100 |             x.item() for y, bs in zip(timestep_batches, batch_sizes) for x in y[:bs]
101 |         ]
102 |         losses = [x.item() for y, bs in zip(loss_batches, batch_sizes) for x in y[:bs]]
103 |         self.update_with_all_losses(timesteps, losses)
104 | 
105 |     @abstractmethod
106 |     def update_with_all_losses(self, ts, losses):
107 |         """
108 |         Update the reweighting using losses from a model.
109 |         Sub-classes should override this method to update the reweighting
110 |         using losses from the model.
111 |         This method directly updates the reweighting without synchronizing
112 |         between workers. It is called by update_with_local_losses from all
113 |         ranks with identical arguments. Thus, it should have deterministic
114 |         behavior to maintain state across workers.
115 |         :param ts: a list of int timesteps.
116 |         :param losses: a list of float losses, one per timestep.
117 |         """
118 | 
119 | 
120 | class LossSecondMomentResampler(LossAwareSampler):
121 |     def __init__(self, diffusion, history_per_term=10, uniform_prob=0.001):
122 |         self.diffusion = diffusion
123 |         self.history_per_term = history_per_term
124 |         self.uniform_prob = uniform_prob
125 |         self._loss_history = np.zeros(
126 |             [diffusion.num_timesteps, history_per_term], dtype=np.float64
127 |         )
128 |         self._loss_counts = np.zeros([diffusion.num_timesteps], dtype=np.int)
129 | 
130 |     def weights(self):
131 |         if not self._warmed_up():
132 |             return np.ones([self.diffusion.num_timesteps], dtype=np.float64)
133 |         weights = np.sqrt(np.mean(self._loss_history ** 2, axis=-1))
134 |         weights /= np.sum(weights)
135 |         weights *= 1 - self.uniform_prob
136 |         weights += self.uniform_prob / len(weights)
137 |         return weights
138 | 
139 |     def update_with_all_losses(self, ts, losses):
140 |         for t, loss in zip(ts, losses):
141 |             if self._loss_counts[t] == self.history_per_term:
142 |                 # Shift out the oldest loss term.
143 |                 self._loss_history[t, :-1] = self._loss_history[t, 1:]
144 |                 self._loss_history[t, -1] = loss
145 |             else:
146 |                 self._loss_history[t, self._loss_counts[t]] = loss
147 |                 self._loss_counts[t] += 1
148 | 
149 |     def _warmed_up(self):
150 |         return (self._loss_counts == self.history_per_term).all()
151 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | accelerate==0.29.2
 2 | beautifulsoup4==4.12.3
 3 | diffusers==0.27.2
 4 | einops==0.8.0
 5 | ftfy==6.3.1
 6 | huggingface-hub==0.23.3
 7 | mmcv==1.7.0
 8 | numpy==1.23.5
 9 | protobuf==5.28.3
10 | sentencepiece==0.2.0
11 | tqdm==4.66.4
12 | timm==0.9.16
13 | transformers==4.39.3
14 | gradio==4.40.0


--------------------------------------------------------------------------------
/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from .download import *
2 | from .resolution import *


--------------------------------------------------------------------------------
/utils/download.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import os
 3 | 
 4 | def find_model(model_name):
 5 |     """
 6 |     Finds a pre-trained G.pt model, downloading it if necessary. Alternatively, loads a model from a local path.
 7 |     """
 8 |     assert os.path.isfile(model_name), f'Could not find checkpoint at {model_name}'
 9 |     return torch.load(model_name, map_location=lambda storage, loc: storage)
10 | 
11 | 


--------------------------------------------------------------------------------
/utils/resolution.py:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | ASPECT_RATIO_1024 = {
 4 |     '0.25': [512., 2048.], '0.26': [512., 1984.], '0.27': [512., 1920.], '0.28': [512., 1856.],
 5 |     '0.32': [576., 1792.], '0.33': [576., 1728.], '0.35': [576., 1664.], '0.4':  [640., 1600.],
 6 |     '0.42':  [640., 1536.], '0.48': [704., 1472.], '0.5': [704., 1408.], '0.52': [704., 1344.],
 7 |     '0.57': [768., 1344.], '0.6': [768., 1280.], '0.68': [832., 1216.], '0.72': [832., 1152.],
 8 |     '0.78': [896., 1152.], '0.82': [896., 1088.], '0.88': [960., 1088.], '0.94': [960., 1024.],
 9 |     '1.0':  [1024., 1024.], '1.07': [1024.,  960.], '1.13': [1088.,  960.], '1.21': [1088.,  896.],
10 |     '1.29': [1152.,  896.], '1.38': [1152.,  832.], '1.46': [1216.,  832.], '1.67': [1280.,  768.],
11 |     '1.75': [1344.,  768.], '2.0':  [1408.,  704.], '2.09':  [1472.,  704.], '2.4':  [1536.,  640.],
12 |     '2.5':  [1600.,  640.], '2.89':  [1664.,  576.], '3.0':  [1728.,  576.], '3.11':  [1792.,  576.],
13 |     '3.62':  [1856.,  512.], '3.75':  [1920.,  512.], '3.88':  [1984.,  512.], '4.0':  [2048.,  512.],
14 | }
15 | 
16 | ASPECT_RATIO_512 = {
17 |      '0.25': [256.0, 1024.0], '0.26': [256.0, 992.0], '0.27': [256.0, 960.0], '0.28': [256.0, 928.0],
18 |      '0.32': [288.0, 896.0], '0.33': [288.0, 864.0], '0.35': [288.0, 832.0], '0.4': [320.0, 800.0],
19 |      '0.42': [320.0, 768.0], '0.48': [352.0, 736.0], '0.5': [352.0, 704.0], '0.52': [352.0, 672.0],
20 |      '0.57': [384.0, 672.0], '0.6': [384.0, 640.0], '0.68': [416.0, 608.0], '0.72': [416.0, 576.0],
21 |      '0.78': [448.0, 576.0], '0.82': [448.0, 544.0], '0.88': [480.0, 544.0], '0.94': [480.0, 512.0],
22 |      '1.0': [512.0, 512.0], '1.07': [512.0, 480.0], '1.13': [544.0, 480.0], '1.21': [544.0, 448.0],
23 |      '1.29': [576.0, 448.0], '1.38': [576.0, 416.0], '1.46': [608.0, 416.0], '1.67': [640.0, 384.0],
24 |      '1.75': [672.0, 384.0], '2.0': [704.0, 352.0], '2.09': [736.0, 352.0], '2.4': [768.0, 320.0],
25 |      '2.5': [800.0, 320.0], '2.89': [832.0, 288.0], '3.0': [864.0, 288.0], '3.11': [896.0, 288.0],
26 |      '3.62': [928.0, 256.0], '3.75': [960.0, 256.0], '3.88': [992.0, 256.0], '4.0': [1024.0, 256.0]
27 |      }
28 | 
29 | ASPECT_RATIO_256 = {
30 |      '0.25': [128.0, 512.0], '0.26': [128.0, 496.0], '0.27': [128.0, 480.0], '0.28': [128.0, 464.0],
31 |      '0.32': [144.0, 448.0], '0.33': [144.0, 432.0], '0.35': [144.0, 416.0], '0.4': [160.0, 400.0],
32 |      '0.42': [160.0, 384.0], '0.48': [176.0, 368.0], '0.5': [176.0, 352.0], '0.52': [176.0, 336.0],
33 |      '0.57': [192.0, 336.0], '0.6': [192.0, 320.0], '0.68': [208.0, 304.0], '0.72': [208.0, 288.0],
34 |      '0.78': [224.0, 288.0], '0.82': [224.0, 272.0], '0.88': [240.0, 272.0], '0.94': [240.0, 256.0],
35 |      '1.0': [256.0, 256.0], '1.07': [256.0, 240.0], '1.13': [272.0, 240.0], '1.21': [272.0, 224.0],
36 |      '1.29': [288.0, 224.0], '1.38': [288.0, 208.0], '1.46': [304.0, 208.0], '1.67': [320.0, 192.0],
37 |      '1.75': [336.0, 192.0], '2.0': [352.0, 176.0], '2.09': [368.0, 176.0], '2.4': [384.0, 160.0],
38 |      '2.5': [400.0, 160.0], '2.89': [416.0, 144.0], '3.0': [432.0, 144.0], '3.11': [448.0, 144.0],
39 |      '3.62': [464.0, 128.0], '3.75': [480.0, 128.0], '3.88': [496.0, 128.0], '4.0': [512.0, 128.0]
40 | }
41 | 
42 | ASPECT_RATIO_256_TEST = {
43 |      '0.25': [128.0, 512.0], '0.28': [128.0, 464.0],
44 |      '0.32': [144.0, 448.0], '0.33': [144.0, 432.0], '0.35': [144.0, 416.0], '0.4': [160.0, 400.0],
45 |      '0.42': [160.0, 384.0], '0.48': [176.0, 368.0], '0.5': [176.0, 352.0], '0.52': [176.0, 336.0],
46 |      '0.57': [192.0, 336.0], '0.6': [192.0, 320.0], '0.68': [208.0, 304.0], '0.72': [208.0, 288.0],
47 |      '0.78': [224.0, 288.0], '0.82': [224.0, 272.0], '0.88': [240.0, 272.0], '0.94': [240.0, 256.0],
48 |      '1.0': [256.0, 256.0], '1.07': [256.0, 240.0], '1.13': [272.0, 240.0], '1.21': [272.0, 224.0],
49 |      '1.29': [288.0, 224.0], '1.38': [288.0, 208.0], '1.46': [304.0, 208.0], '1.67': [320.0, 192.0],
50 |      '1.75': [336.0, 192.0], '2.0': [352.0, 176.0], '2.09': [368.0, 176.0], '2.4': [384.0, 160.0],
51 |      '2.5': [400.0, 160.0], '3.0': [432.0, 144.0],
52 |      '4.0': [512.0, 128.0]
53 | }
54 | 
55 | ASPECT_RATIO_512_TEST = {
56 |      '0.25': [256.0, 1024.0], '0.28': [256.0, 928.0],
57 |      '0.32': [288.0, 896.0], '0.33': [288.0, 864.0], '0.35': [288.0, 832.0], '0.4': [320.0, 800.0],
58 |      '0.42': [320.0, 768.0], '0.48': [352.0, 736.0], '0.5': [352.0, 704.0], '0.52': [352.0, 672.0],
59 |      '0.57': [384.0, 672.0], '0.6': [384.0, 640.0], '0.68': [416.0, 608.0], '0.72': [416.0, 576.0],
60 |      '0.78': [448.0, 576.0], '0.82': [448.0, 544.0], '0.88': [480.0, 544.0], '0.94': [480.0, 512.0],
61 |      '1.0': [512.0, 512.0], '1.07': [512.0, 480.0], '1.13': [544.0, 480.0], '1.21': [544.0, 448.0],
62 |      '1.29': [576.0, 448.0], '1.38': [576.0, 416.0], '1.46': [608.0, 416.0], '1.67': [640.0, 384.0],
63 |      '1.75': [672.0, 384.0], '2.0': [704.0, 352.0], '2.09': [736.0, 352.0], '2.4': [768.0, 320.0],
64 |      '2.5': [800.0, 320.0], '3.0': [864.0, 288.0],
65 |      '4.0': [1024.0, 256.0]
66 |      }
67 | 
68 | ASPECT_RATIO_1024_TEST = {
69 |     '0.25': [512., 2048.], '0.28': [512., 1856.],
70 |     '0.32': [576., 1792.], '0.33': [576., 1728.], '0.35': [576., 1664.], '0.4':  [640., 1600.],
71 |     '0.42':  [640., 1536.], '0.48': [704., 1472.], '0.5': [704., 1408.], '0.52': [704., 1344.],
72 |     '0.57': [768., 1344.], '0.6': [768., 1280.], '0.68': [832., 1216.], '0.72': [832., 1152.],
73 |     '0.78': [896., 1152.], '0.82': [896., 1088.], '0.88': [960., 1088.], '0.94': [960., 1024.],
74 |     '1.0':  [1024., 1024.], '1.07': [1024.,  960.], '1.13': [1088.,  960.], '1.21': [1088.,  896.],
75 |     '1.29': [1152.,  896.], '1.38': [1152.,  832.], '1.46': [1216.,  832.], '1.67': [1280.,  768.],
76 |     '1.75': [1344.,  768.], '2.0':  [1408.,  704.], '2.09':  [1472.,  704.], '2.4':  [1536.,  640.],
77 |     '2.5':  [1600.,  640.], '3.0':  [1728.,  576.],
78 |     '4.0':  [2048.,  512.],
79 | }
80 | 
81 | 
82 | def get_chunks(lst, n):
83 |     for i in range(0, len(lst), n):
84 |         yield lst[i:i + n]
85 | 
86 | def get_closest_ratio(height: float, width: float, ratios: dict):
87 |     aspect_ratio = height / width
88 |     closest_ratio = min(ratios.keys(), key=lambda ratio: abs(float(ratio) - aspect_ratio))
89 |     return ratios[closest_ratio], float(closest_ratio)
90 | 


--------------------------------------------------------------------------------