├── .gitignore
├── .gitmodules
├── .vscode
    └── settings.json
├── Dockerfile
├── Dockerfile.cuda118
├── Dockerfile.protoc
├── LICENSE
├── README.md
├── build.sh
├── client.py
├── docker_support
    ├── CMakeFile.txt.diff
    ├── build_protoc.sh
    ├── cpp_extension.py
    └── run.sh
├── environment.yaml
├── nonfree
    ├── README.md
    ├── tome_memory_efficient_cross_attention.py
    ├── tome_patcher.py
    └── tome_unet.py
├── pyproject.toml
├── requirements.txt
├── sdgrpcserver
    ├── __init__.py
    ├── config
    │   ├── dist_hashes
    │   ├── engines.yaml
    │   └── genhashes.sh
    ├── constants.py
    ├── debug_recorder.py
    ├── generated
    │   ├── __init__.py
    │   ├── dashboard_pb2.py
    │   ├── dashboard_pb2_grpc.py
    │   ├── engines_pb2.py
    │   ├── engines_pb2_grpc.py
    │   ├── generation_pb2.py
    │   ├── generation_pb2_grpc.py
    │   ├── tensors_pb2.py
    │   └── tensors_pb2_grpc.py
    ├── images.py
    ├── k_diffusion.py
    ├── manager.py
    ├── patching.py
    ├── pipeline
    │   ├── __init__.py
    │   ├── attention_replacer.py
    │   ├── common_scheduler.py
    │   ├── diffusers_types.py
    │   ├── easing.py
    │   ├── kschedulers
    │   │   ├── __init__.py
    │   │   ├── scheduling_dpm2_ancestral_discrete.py
    │   │   ├── scheduling_dpm2_discrete.py
    │   │   ├── scheduling_euler_ancestral_discrete.py
    │   │   ├── scheduling_euler_discrete.py
    │   │   ├── scheduling_heun_discrete.py
    │   │   └── scheduling_utils.py
    │   ├── latent_debugger.py
    │   ├── model_utils.py
    │   ├── models
    │   │   ├── __init__.py
    │   │   ├── memory_efficient_cross_attention.py
    │   │   └── structured_cross_attention.py
    │   ├── randtools.py
    │   ├── safety_checkers.py
    │   ├── schedulers
    │   │   ├── sample_dpmpp_2m.py
    │   │   └── scheduling_ddim.py
    │   ├── text_embedding
    │   │   ├── __init__.py
    │   │   ├── basic_text_embedding.py
    │   │   ├── lpw_text_embedding.py
    │   │   ├── structured_text_embedding.py
    │   │   ├── text_embedding.py
    │   │   └── text_encoder_alt_layer.py
    │   ├── unet
    │   │   ├── cfg.py
    │   │   ├── clipguided.py
    │   │   ├── core.py
    │   │   ├── graft.py
    │   │   ├── hires_fix.py
    │   │   ├── hires_fix_other.py
    │   │   ├── hires_fix_resize.py
    │   │   └── types.py
    │   ├── unified_pipeline.py
    │   ├── upscaler_pipeline.py
    │   ├── vae_approximator.py
    │   └── xformers_utils.py
    ├── ram_monitor.py
    ├── resize_right.py
    ├── server.py
    ├── services
    │   ├── __init__.py
    │   ├── dashboard.py
    │   ├── engines.py
    │   └── generate.py
    ├── sonora
    │   ├── __init__.py
    │   ├── __version__.py
    │   ├── aio.py
    │   ├── asgi.py
    │   ├── client.py
    │   ├── echotest.py
    │   ├── protocol.py
    │   └── wsgi.py
    ├── src
    │   └── __init__.py
    └── utils.py
├── server.py
├── stablecabal.png
├── tests
    ├── VRAMUsageMonitor.py
    ├── batch_independance.py
    ├── engines.basic.yaml
    ├── engines.clip.yaml
    ├── engines.inpaint.yaml
    ├── engines.sd14.yaml
    ├── graftedpaint.engine.yaml
    ├── graftedpaint.image_512_512.png
    ├── graftedpaint.image_768_600.png
    ├── graftedpaint.image_768_768.png
    ├── graftedpaint.py
    ├── happy_path.engines.yaml
    ├── happy_path.image_512.png
    ├── happy_path.image_768.png
    ├── happy_path.py
    ├── image.png
    ├── mask.png
    ├── prompt_weights.engine.yaml
    ├── prompt_weights.py
    ├── schedulers.py
    └── test_harness.py
└── weights
    └── .gitkeep


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Initially taken from Github's Python gitignore file
  2 | _[A-Za-z]*
  3 | weights/*
  4 | src/*
  5 | tests/out/*
  6 | /config.yaml
  7 | 
  8 | # Byte-compiled / optimized / DLL files
  9 | __pycache__/
 10 | *.py[cod]
 11 | *$py.class
 12 | 
 13 | # C extensions
 14 | *.so
 15 | 
 16 | # tests and logs
 17 | tests/fixtures/cached_*_text.txt
 18 | logs/
 19 | lightning_logs/
 20 | lang_code_data/
 21 | 
 22 | # Distribution / packaging
 23 | .Python
 24 | build/
 25 | develop-eggs/
 26 | dist/
 27 | downloads/
 28 | eggs/
 29 | .eggs/
 30 | lib/
 31 | lib64/
 32 | parts/
 33 | sdist/
 34 | var/
 35 | wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .nox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | .hypothesis/
 62 | .pytest_cache/
 63 | 
 64 | # Translations
 65 | *.mo
 66 | *.pot
 67 | 
 68 | # Django stuff:
 69 | *.log
 70 | local_settings.py
 71 | db.sqlite3
 72 | 
 73 | # Flask stuff:
 74 | instance/
 75 | .webassets-cache
 76 | 
 77 | # Scrapy stuff:
 78 | .scrapy
 79 | 
 80 | # Sphinx documentation
 81 | docs/_build/
 82 | 
 83 | # PyBuilder
 84 | target/
 85 | 
 86 | # Jupyter Notebook
 87 | .ipynb_checkpoints
 88 | 
 89 | # IPython
 90 | profile_default/
 91 | ipython_config.py
 92 | 
 93 | # pyenv
 94 | .python-version
 95 | 
 96 | # celery beat schedule file
 97 | celerybeat-schedule
 98 | 
 99 | # SageMath parsed files
100 | *.sage.py
101 | 
102 | # Environments
103 | .env
104 | .venv
105 | env/
106 | venv/
107 | ENV/
108 | env.bak/
109 | venv.bak/
110 | 
111 | # Spyder project settings
112 | .spyderproject
113 | .spyproject
114 | 
115 | # Rope project settings
116 | .ropeproject
117 | 
118 | # mkdocs documentation
119 | /site
120 | 
121 | # mypy
122 | .mypy_cache/
123 | .dmypy.json
124 | dmypy.json
125 | 
126 | # Pyre type checker
127 | .pyre/
128 | 
129 | # vscode
130 | .vs
131 | .vscode/*
132 | 
133 | # Pycharm
134 | .idea
135 | 
136 | # TF code
137 | tensorflow_code
138 | 
139 | # Models
140 | proc_data
141 | 
142 | # examples
143 | runs
144 | /runs_old
145 | /wandb
146 | /examples/runs
147 | /examples/**/*.args
148 | /examples/rag/sweep
149 | 
150 | # data
151 | /data
152 | serialization_dir
153 | 
154 | # emacs
155 | *.*~
156 | debug.env
157 | 
158 | # vim
159 | .*.swp
160 | 
161 | #ctags
162 | tags
163 | 
164 | # pre-commit
165 | .pre-commit*
166 | 
167 | # .lock
168 | *.lock
169 | 
170 | # DS_Store (MacOS)
171 | .DS_Store
172 | 
173 | # Always include .gitkeep files
174 | !.gitkeep
175 | !.vscode/settings.json


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
 1 | [submodule "api-interfaces"]
 2 | 	path = api-interfaces
 3 | 	url = https://github.com/hafriedlander/api-interfaces.git
 4 | [submodule "nonfree/ToMe"]
 5 | 	path = nonfree/ToMe
 6 | 	url = https://github.com/facebookresearch/ToMe.git
 7 | [submodule "sdgrpcserver/src/k-diffusion"]
 8 | 	path = sdgrpcserver/src/k-diffusion
 9 | 	url = https://github.com/crowsonkb/k-diffusion.git
10 | [submodule "sdgrpcserver/src/ResizeRight"]
11 | 	path = sdgrpcserver/src/ResizeRight
12 | 	url = https://github.com/assafshocher/ResizeRight.git
13 | 	ignore = dirty
14 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "python.analysis.typeCheckingMode": "basic",
 3 |     "python.analysis.diagnosticMode": "workspace",
 4 |     "python.analysis.include": [
 5 |         "sdgrpcserver/pipeline"
 6 |     ],
 7 |     "python.analysis.exclude": [
 8 |         "sdgrpcserver/pipeline/kschedulers",
 9 |         "sdgrpcserver/pipeline/schedulers"
10 |     ],
11 |     "python.analysis.extraPaths": [
12 |         "sdgrpcserver/generated"
13 |     ],
14 |     "python.formatting.provider": "black",
15 |     "python.linting.enabled": true,
16 |     "python.linting.lintOnSave": true,
17 |     "python.linting.flake8Enabled": true,
18 |     "editor.rulers": [
19 |         88,
20 |     ],
21 |     "editor.formatOnSave": true,
22 |     "editor.codeActionsOnSave": {
23 |         "source.organizeImports": true
24 |     },
25 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:11.6.2-devel-ubuntu20.04 AS devbase
  2 | 
  3 | # Basic updates. Do super early so we can cache for a long time
  4 | RUN apt update
  5 | RUN apt install -y curl
  6 | RUN apt install -y git
  7 | 
  8 | # Set up core python environment
  9 | RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
 10 | 
 11 | COPY environment.yaml .
 12 | RUN /bin/micromamba -r /env -y create -f environment.yaml
 13 | 
 14 | # Install dependancies
 15 | ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116
 16 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install torch~=1.12.1
 17 | 
 18 | 
 19 | 
 20 | 
 21 | FROM devbase AS regularbase
 22 | 
 23 | # Install dependancies
 24 | ENV PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116
 25 | ENV FLIT_ROOT_INSTALL=1
 26 | 
 27 | # We copy only the minimum for flit to run so avoid cache invalidation on code changes
 28 | COPY pyproject.toml .
 29 | COPY sdgrpcserver/__init__.py sdgrpcserver/
 30 | RUN touch README.md
 31 | RUN /bin/micromamba -r /env -n sd-grpc-server run flit install --pth-file
 32 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip cache purge
 33 | 
 34 | # Setup NVM & Node for Localtunnel
 35 | ENV NVM_DIR=/nvm
 36 | ENV NODE_VERSION=16.18.0
 37 | 
 38 | RUN mkdir -p $NVM_DIR
 39 | 
 40 | RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \
 41 |     && . $NVM_DIR/nvm.sh \
 42 |     && nvm install $NODE_VERSION \
 43 |     && nvm alias default $NODE_VERSION \
 44 |     && nvm use default
 45 | 
 46 | 
 47 | 
 48 | 
 49 | # Build Xformers
 50 | 
 51 | FROM devbase AS xformersbase
 52 | 
 53 | RUN git clone https://github.com/facebookresearch/xformers.git
 54 | WORKDIR /xformers
 55 | RUN git submodule update --init --recursive
 56 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt
 57 | 
 58 | ENV FORCE_CUDA=1
 59 | ENV TORCH_CUDA_ARCH_LIST="6.0;6.1;6.2;7.0;7.2;7.5;8.0;8.6"
 60 | 
 61 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install .
 62 | 
 63 | RUN tar cvjf /xformers.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/xformers*
 64 | 
 65 | 
 66 | 
 67 | 
 68 | FROM nvidia/cuda:11.6.2-base-ubuntu20.04 AS main
 69 | 
 70 | COPY --from=regularbase /bin/micromamba /bin/
 71 | RUN mkdir -p /env/envs
 72 | COPY --from=regularbase /env/envs /env/envs/
 73 | RUN mkdir -p /nvm
 74 | COPY --from=regularbase /nvm /nvm/
 75 | 
 76 | # Setup NVM & Node for Localtunnel
 77 | ENV NVM_DIR=/nvm
 78 | ENV NODE_VERSION=16.18.0
 79 | 
 80 | ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules
 81 | ENV PATH      $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
 82 | 
 83 | RUN npm install -g localtunnel
 84 | 
 85 | # Now we can copy everything we need
 86 | COPY sdgrpcserver /sdgrpcserver/
 87 | COPY server.py .
 88 | 
 89 | # Set up some config files
 90 | RUN mkdir -p /huggingface
 91 | RUN mkdir -p /weights
 92 | RUN mkdir -p /config
 93 | COPY sdgrpcserver/config/engines.yaml /config/engines.yaml
 94 | 
 95 | # Set up some environment files
 96 | 
 97 | ENV HF_HOME=/huggingface
 98 | ENV HF_API_TOKEN=mustset
 99 | ENV SD_ENGINECFG=/config/engines.yaml
100 | ENV SD_WEIGHT_ROOT=/weights
101 | 
102 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ]
103 | 
104 | 
105 | 
106 | 
107 | FROM main as xformers
108 | 
109 | COPY --from=xformersbase /xformers/requirements.txt /
110 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt
111 | RUN rm requirements.txt
112 | COPY --from=xformersbase /xformers.tbz /
113 | RUN tar xvjf /xformers.tbz
114 | RUN rm /xformers.tbz
115 | 
116 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ]
117 | 


--------------------------------------------------------------------------------
/Dockerfile.cuda118:
--------------------------------------------------------------------------------
  1 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 AS buildpytorch
  2 | 
  3 | # Basic updates. Do super early so we can cache for a long time
  4 | RUN apt update
  5 | RUN apt install -y curl
  6 | RUN apt install -y git
  7 | 
  8 | # Set up core python environment
  9 | RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
 10 | 
 11 | # Set up basic python
 12 | RUN /bin/micromamba -r /env -y create -c defaults -n sd-grpc-server python~=3.10.0 pip
 13 | # Install MKL from Intel - it's newer than conda-forge one
 14 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c intel mkl=2022.2.0 mkl-devel=2022.2.0
 15 | # Install dependancies
 16 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c defaults astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses
 17 | 
 18 | # Make sure cmake looks in our enviroment
 19 | ENV CMAKE_PREFIX_PATH=/env/envs/sd-grpc-server
 20 | 
 21 | # Download magma
 22 | RUN curl -OL http://icl.utk.edu/projectsfiles/magma/downloads/magma-2.6.2.tar.gz
 23 | RUN tar xvzf magma-2.6.2.tar.gz
 24 | RUN mkdir -p /magma-2.6.2/build
 25 | 
 26 | # Modify magma CMakeFile to allow arbitrary CUDA arches
 27 | WORKDIR /magma-2.6.2
 28 | COPY docker_support/CMakeFile.txt.diff /
 29 | RUN patch < /CMakeFile.txt.diff
 30 | 
 31 | # Build it
 32 | WORKDIR /magma-2.6.2/build
 33 | RUN /bin/micromamba -r /env -n sd-grpc-server run cmake .. -DMAGMA_WITH_MKL=1 -DUSE_FORTRAN=off -DGPU_TARGET="All" -DCUDA_ARCH_LIST="-gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_89,code=sm_89"
 34 | #RUN /bin/micromamba -r /env -n sd-grpc-server run cmake .. -DMAGMA_WITH_MKL=1 -DUSE_FORTRAN=off -DGPU_TARGET="sm_89" -DCUDA_ARCH_LIST="-gencode arch=compute_89,code=sm_89"
 35 | RUN /bin/micromamba -r /env -n sd-grpc-server run make lib
 36 | RUN /bin/micromamba -r /env -n sd-grpc-server run make sparse-lib
 37 | RUN /bin/micromamba -r /env -n sd-grpc-server run make install
 38 | 
 39 | # Move into env (TODO: just build here in the first place)
 40 | RUN mv /usr/local/magma/lib/*.so /env/envs/sd-grpc-server/lib/
 41 | RUN mv /usr/local/magma/lib/pkgconfig/*.pc /env/envs/sd-grpc-server/lib/pkgconfig/
 42 | RUN mv /usr/local/magma/include/* /env/envs/sd-grpc-server/include/
 43 | 
 44 | # Download pytorch
 45 | WORKDIR /
 46 | RUN git clone https://github.com/pytorch/pytorch
 47 | 
 48 | WORKDIR /pytorch
 49 | RUN git checkout -b v1.12.1 tags/v1.12.1
 50 | RUN git submodule update --init --recursive
 51 | 
 52 | # Built and install pytorch
 53 | ENV FORCE_CUDA=1
 54 | ENV TORCH_CUDA_ARCH_LIST="6.0;7.0;7.5;8.0;8.6;8.9"
 55 | ENV USE_KINETO=0
 56 | ENV USE_CUPTI=0
 57 | ENV PYTORCH_BUILD_VERSION=1.12.1+cu118 
 58 | ENV PYTORCH_BUILD_NUMBER=0 
 59 | COPY docker_support/cpp_extension.py /pytorch/torch/utils/
 60 | RUN /bin/micromamba -r /env -n sd-grpc-server run python tools/generate_torch_version.py --is_debug false --cuda_version 11.8
 61 | RUN /bin/micromamba -r /env -n sd-grpc-server run python setup.py install
 62 | 
 63 | # Download torchvision
 64 | WORKDIR /
 65 | RUN git clone https://github.com/pytorch/vision
 66 | 
 67 | WORKDIR /vision
 68 | RUN git checkout -b v0.13.1 tags/v0.13.1
 69 | 
 70 | # Install torchvision dependancies
 71 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c defaults libpng jpeg
 72 | 
 73 | # Built and install torchvision
 74 | ENV BUILD_VERSION=0.13.1+cu118
 75 | RUN /bin/micromamba -r /env -n sd-grpc-server run python setup.py install
 76 | 
 77 | 
 78 | 
 79 | 
 80 | FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu20.04 AS devbase
 81 | 
 82 | # Basic updates. Do super early so we can cache for a long time
 83 | RUN apt update
 84 | RUN apt install -y curl
 85 | RUN apt install -y git
 86 | RUN apt install -y libaio-dev
 87 | 
 88 | # Copy core python environment from pytorchbuild
 89 | RUN mkdir -p /env
 90 | COPY --from=buildpytorch /bin/micromamba /bin
 91 | COPY --from=buildpytorch /env /env/
 92 | 
 93 | 
 94 | 
 95 | 
 96 | FROM devbase AS regularbase
 97 | 
 98 | # Install dependancies
 99 | ENV FLIT_ROOT_INSTALL=1
100 | RUN /bin/micromamba -r /env -n sd-grpc-server install -c defaults flit
101 | 
102 | # We copy only the minimum for flit to run so avoid cache invalidation on code changes
103 | COPY pyproject.toml .
104 | COPY sdgrpcserver/__init__.py sdgrpcserver/
105 | RUN touch README.md
106 | RUN /bin/micromamba -r /env -n sd-grpc-server run flit install --pth-file
107 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip cache purge
108 | 
109 | # Setup NVM & Node for Localtunnel
110 | ENV NVM_DIR=/nvm
111 | ENV NODE_VERSION=16.18.0
112 | 
113 | RUN mkdir -p $NVM_DIR
114 | 
115 | RUN curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash \
116 |     && . $NVM_DIR/nvm.sh \
117 |     && nvm install $NODE_VERSION \
118 |     && nvm alias default $NODE_VERSION \
119 |     && nvm use default
120 | 
121 | 
122 | 
123 | 
124 | FROM devbase AS tritonbase
125 | 
126 | WORKDIR /
127 | ARG TRITON_REF=tags/v1.0
128 | RUN git clone https://github.com/openai/triton.git
129 | 
130 | WORKDIR /triton
131 | RUN git checkout $TRITON_REF
132 | 
133 | WORKDIR /triton/python
134 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install cmake
135 | RUN apt install -y zlib1g-dev libtinfo-dev 
136 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install .
137 | 
138 | RUN tar cvjf /triton.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/triton*
139 | 
140 | 
141 | 
142 | 
143 | # Build Xformers
144 | 
145 | FROM tritonbase AS xformersbase
146 | 
147 | WORKDIR /
148 | ARG XFORMERS_REF=main
149 | RUN git clone https://github.com/facebookresearch/xformers.git
150 | 
151 | WORKDIR /xformers
152 | RUN git checkout $XFORMERS_REF
153 | RUN git submodule update --init --recursive
154 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt
155 | 
156 | ENV FORCE_CUDA=1
157 | ENV TORCH_CUDA_ARCH_LIST="6.0;7.0;7.5;8.0;8.6;8.9"
158 | 
159 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install .
160 | 
161 | RUN tar cvjf /xformers.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/xformers*
162 | 
163 | 
164 | 
165 | 
166 | FROM tritonbase AS deepspeedbase
167 | 
168 | WORKDIR /
169 | ARG DEEPSPEED_REF=tags/v0.7.4
170 | RUN git clone https://github.com/microsoft/DeepSpeed.git
171 | 
172 | WORKDIR /DeepSpeed
173 | RUN git checkout $DEEPSPEED_REF
174 | RUN apt install -y libaio-dev
175 | 
176 | ENV DS_BUILD_OPS=1
177 | ENV TORCH_CUDA_ARCH_LIST="6.0;7.0;7.5;8.0;8.6;8.9"
178 | ENV DS_BUILD_SPARSE_ATTN=0
179 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install .
180 | 
181 | RUN tar cvjf /deepspeed.tbz /env/envs/sd-grpc-server/lib/python3.*/site-packages/deepspeed*
182 | 
183 | 
184 | 
185 | 
186 | 
187 | FROM nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu20.04 AS main
188 | 
189 | COPY --from=regularbase /bin/micromamba /bin/
190 | RUN mkdir -p /env/envs
191 | COPY --from=regularbase /env/envs /env/envs/
192 | RUN mkdir -p /nvm
193 | COPY --from=regularbase /nvm /nvm/
194 | 
195 | # Setup NVM & Node for Localtunnel
196 | ENV NVM_DIR=/nvm
197 | ENV NODE_VERSION=16.18.0
198 | 
199 | ENV NODE_PATH $NVM_DIR/versions/node/v$NODE_VERSION/lib/node_modules
200 | ENV PATH      $NVM_DIR/versions/node/v$NODE_VERSION/bin:$PATH
201 | 
202 | RUN npm install -g localtunnel
203 | 
204 | # Now we can copy everything we need
205 | COPY nonfree /nonfree/
206 | COPY sdgrpcserver /sdgrpcserver/
207 | COPY server.py .
208 | 
209 | # Set up some config files
210 | RUN mkdir -p /huggingface
211 | RUN mkdir -p /weights
212 | RUN mkdir -p /config
213 | COPY sdgrpcserver/config/engines.yaml /config/engines.yaml
214 | 
215 | # Set up some environment files
216 | 
217 | ENV HF_HOME=/huggingface
218 | ENV HF_API_TOKEN=mustset
219 | ENV SD_ENGINECFG=/config/engines.yaml
220 | ENV SD_WEIGHT_ROOT=/weights
221 | 
222 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ]
223 | 
224 | 
225 | 
226 | 
227 | FROM main as xformers
228 | 
229 | COPY --from=xformersbase /xformers/requirements.txt /
230 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt
231 | RUN rm requirements.txt
232 | 
233 | COPY --from=deepspeedbase /DeepSpeed/requirements/requirements.txt /
234 | RUN /bin/micromamba -r /env -n sd-grpc-server run pip install -r requirements.txt
235 | RUN rm requirements.txt
236 | 
237 | COPY --from=tritonbase /triton.tbz /
238 | RUN tar xvjf /triton.tbz
239 | COPY --from=xformersbase /xformers.tbz /
240 | RUN tar xvjf /xformers.tbz
241 | COPY --from=deepspeedbase /deepspeed.tbz /
242 | RUN tar xvjf /deepspeed.tbz
243 | 
244 | RUN rm /*.tbz
245 | 
246 | CMD [ "/bin/micromamba", "-r", "env", "-n", "sd-grpc-server", "run", "python", "./server.py" ]
247 | 
248 | 
249 | 


--------------------------------------------------------------------------------
/Dockerfile.protoc:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | RUN pip install grpcio==1.49.1
 4 | RUN pip install grpcio-tools==1.49.1
 5 | 
 6 | COPY docker_support/build_protoc.sh /
 7 | 
 8 | RUN mkdir /src
 9 | WORKDIR /src
10 | 
11 | CMD [ "/build_protoc.sh" ]
12 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | An implementation of a server for the Stability AI API
  2 | 
  3 | # Features
  4 | 
  5 | ## Standard Stable Diffusion features
  6 | 
  7 | - Create an image from just a text prompt (txt2img)
  8 | - Create an image from an existing image and a text prompt (img2img)
  9 | - Fill in a hole in an image, or extend an image (inpainting)
 10 | 
 11 | ## Enhancements
 12 | 
 13 | - Enhanced inpainting and outpainting, including Grafted Inpainting
 14 |   - When used with the standard Stable Diffusion V1.5 model, results are more consistent to the existing image
 15 |   - When used with a model such as Waifu Diffusion that does not have an inpaint model, can either "graft"
 16 |     the model on top of the Stable Diffusion inpainting or work in an exclusive model-independant model
 17 | - Custom CLIP guidance allows using newer CLIP models to more accurately follow prompts
 18 |   - Faster and better results than the standard Diffusers version
 19 | - Negative prompting and weighting of parts of a promt (send multiple `Prompt` objects with `text` and any positive or negative `weight`)
 20 | - All K_Diffusion schedulers available, and working correctly (including DPM2, DPM2 Ancestral and Heun
 21 | - Can load multiple pipelines, such as Stable and Waifu Diffusion, and swap between them as needed
 22 | - Adjustable NSFW behaviour
 23 | - Potentially lower memory requirements using a variety of model offloading techniques
 24 | - Cancel over API (using GRPC cancel will abort the currently in progress generation)
 25 | - Various performance optimisations
 26 |   + XFormers support, if installed
 27 |   + ToMe support, if nonfree code included (recommend XFormers instead where available, but ToMe doesn't have complicated dependancies)
 28 |   
 29 | # Installation
 30 | 
 31 | ## Colab (coming soon)
 32 | 
 33 | ## Docker (easiest if you already have Docker, and an Nvidia GPU with 4GB+ VRAM)
 34 | 
 35 | ```
 36 | docker run --gpus all -it -p 50051:50051 \
 37 |   -e HF_API_TOKEN={your huggingface token} \
 38 |   -e SD_LISTEN_TO_ALL=1 \
 39 |   -v $HOME/.cache/huggingface:/huggingface \
 40 |   -v `pwd`/weights:/weights \
 41 |   hafriedlander/stable-diffusion-grpcserver:xformers-latest
 42 | ```
 43 | 
 44 | #### Localtunnel
 45 | 
 46 | The docker image has built-in support for localtunnel, which
 47 | will expose the GRPC-WEB endpoint on an https domain. It will
 48 | automatically set an access token key if you don't provide one.
 49 | Check your Docker log for the values to use
 50 | 
 51 | ```
 52 |   -e SD_LOCALTUNNEL=1 \
 53 | ```
 54 | 
 55 | #### Volume mounts
 56 | 
 57 | This will share the weights and huggingface cache, but you can
 58 | mount other folders into the volume to do other things:
 59 | 
 60 | - You can check out the latest version of the server code and then
 61 | mount it into the Docker image to run the very latest code (including
 62 | any local edits you make)
 63 | 
 64 |   ```
 65 |     -v `pwd`/sdgrpcserver:/sdgrpcserver \
 66 |   ```
 67 | 
 68 | - Or override the engines.yaml config by making a config directory,
 69 | putting the engines.yaml in there, and mounting it to the image
 70 | 
 71 |   ```
 72 |    -v `pwd`/config:/config \
 73 |   ```
 74 | 
 75 | All the server arguments can be provided as environment variables, starting
 76 | with SD:
 77 | 
 78 | - SD_ENGINECFG
 79 | - SD_GRPC_PORT
 80 | - SD_HTTP_PORT
 81 | - SD_VRAM_OPTIMISATION_LEVEL
 82 | - SD_NSFW_BEHAVIOUR
 83 | - SD_WEIGHT_ROOT
 84 | - SD_HTTP_FILE_ROOT
 85 | - SD_ACCESS_TOKEN
 86 | - SD_LISTEN_TO_ALL
 87 | - SD_ENABLE_MPS
 88 | - SD_RELOAD
 89 | - SD_LOCALTUNNEL
 90 | 
 91 | #### Building the image locally
 92 | 
 93 | ```
 94 | docker build --target main .
 95 | ```
 96 | 
 97 | Or to build (slowly) with xformers
 98 | 
 99 | ```
100 | docker build --target xformers .
101 | ```
102 | 
103 | ## Locally (if you have an Nvidia GPU with 4GB+ VRAM, and prefer not to use Docker)
104 | 
105 | ### Option 1 (recommended):
106 | 
107 | Install Miniconda, then in a Conda console:
108 | 
109 | ```
110 | git clone https://github.com/hafriedlander/stable-diffusion-grpcserver.git
111 | cd stable-diffusion-grpcserver
112 | conda env create -f environment.yaml
113 | conda activate sd-grpc-server
114 | ```
115 | 
116 | Then for Windows:
117 | 
118 | ```
119 | set PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 
120 | flit install --pth-file
121 | set HF_API_TOKEN={your huggingface token}
122 | python ./server.py
123 | ```
124 | 
125 | Or for Linux
126 | 
127 | ```
128 | PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 flit install --pth-file
129 | HF_API_TOKEN={your huggingface token} python ./server.py
130 | ```
131 | 
132 | ### Option 2:
133 | 
134 | Create a directory and download https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/main/engines.yaml into it, then
135 | 
136 | ```
137 | set PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cu116 
138 | pip install stable-diffusion-grpcserver
139 | set HF_API_TOKEN={your huggingface token} 
140 | sdgrpcserver
141 | ```
142 | 
143 | 
144 | # Thanks to / Credits:
145 | 
146 | - Seamless outpainting https://github.com/parlance-zz/g-diffuser-bot/tree/g-diffuser-bot-beta2
147 | - Additional schedulers https://github.com/hlky/diffusers
148 | - K-Diffusion integration example https://github.com/Birch-san/diffusers/blob/1472b70194ae6d7e51646c0d6787815a5bc65f75/examples/community/play.py
149 | 
150 | # Roadmap
151 | 
152 | Core API functions not working yet:
153 | 
154 | - ChainGenerate not implemented
155 | 
156 | Extra features to add
157 | 
158 | - Progress reporting over the API is included but not exposed yet
159 | - Embedding params in png
160 | - Extra APIs
161 |   - Image resizing
162 |   - Aspect ratio shifting
163 |   - Asset management
164 |   - Extension negotiation so we can:
165 |     - Ping back progress notices
166 |     - Allow cancellation requests
167 |     - Specify negative prompts
168 | - Community features: 
169 |   - Prompt calculation https://github.com/pharmapsychotic/clip-interrogator/blob/main/clip_interrogator.ipynb
170 |   - Prompt suggestion https://huggingface.co/spaces/Gustavosta/MagicPrompt-Stable-Diffusion
171 |   - Prompt compositing https://github.com/energy-based-model/Compositional-Visual-Generation-with-Composable-Diffusion-Models-PyTorch
172 |   - Automasking https://github.com/ThstereforeGames/txt2mask
173 |   - Huge seeds
174 | 
175 | 
176 | # License
177 | 
178 | The main codebase is distributed under Apache-2.0. Dependancies are all compatible with that license, except as noted here:
179 | 
180 | - The nonfree directory contains code under some license that is more restrictive than Apache-2.0. Check the individual
181 |   projects for license details. To fully comply with the Apache-2.0 license, remove this folder before release.
182 |   + ToMe
183 |   + Codeformer
184 | - The Docker images contain a bunch of software under various open source licenses. The docker images tagged 'noncomm'
185 |   include the nonfree folder, and so cannot be used commercially.
186 | 
187 | [![Stable Cabal Logo](stablecabal.png)](https://www.stablecabal.org/)
188 | 


--------------------------------------------------------------------------------
/build.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | git submodule update --init --recursive
4 | docker run --rm -it -v `pwd`:/src $(docker build -q . -f Dockerfile.protoc) bash /build_protoc.sh
5 | 


--------------------------------------------------------------------------------
/docker_support/CMakeFile.txt.diff:
--------------------------------------------------------------------------------
 1 | --- CMakeLists.txt.orig 2022-04-20 21:35:50.000000000 +0000
 2 | +++ CMakeLists.txt      2022-10-19 23:14:02.999699993 +0000
 3 | @@ -130,11 +130,11 @@
 4 |      # NV_SM    accumulates sm_xx for all requested versions
 5 |      # NV_COMP  is compute_xx for highest requested version
 6 |      set( NV_SM    "" )
 7 |      set( NV_COMP  "" )
 8 | 
 9 | -    set(CUDA_SEPARABLE_COMPILATION ON)
10 | +    set(CUDA_SEPARABLE_COMPILATION OFF)
11 | 
12 |      # nvcc >= 6.5 supports -std=c++11, so propagate CXXFLAGS to NVCCFLAGS.
13 |      # Older nvcc didn't support -std=c++11, so previously we disabled propagation.
14 |      ##if (${CMAKE_CXX_FLAGS} MATCHES -std=)
15 |      ##    set( CUDA_PROPAGATE_HOST_FLAGS OFF )
16 | @@ -292,15 +292,31 @@
17 |          set( NV_SM ${NV_SM} -gencode arch=compute_80,code=sm_80 )
18 |          set( NV_COMP        -gencode arch=compute_80,code=compute_80 )
19 |          message( STATUS "    compile for CUDA arch 8.0 (Ampere)" )
20 |      endif()
21 | 
22 | +    if (GPU_TARGET MATCHES sm_89)
23 | +        if (NOT MIN_ARCH)
24 | +            set( MIN_ARCH 890 )
25 | +        endif()
26 | +        set( NV_SM ${NV_SM} -gencode arch=compute_89,code=sm_89 )
27 | +        set( NV_COMP        -gencode arch=compute_89,code=compute_89 )
28 | +        message( STATUS "    compile for CUDA arch 8.9 (Ada Lovelace)" )
29 | +    endif()
30 | +
31 | +    if ( ${GPU_TARGET} MATCHES "All")
32 | +      set( MIN_ARCH 600 )
33 | +      SET( NV_SM ${CUDA_ARCH_LIST})
34 | +      SET( NV_COMP "")
35 | +    endif()
36 | +
37 |      if (NOT MIN_ARCH)
38 |          message( FATAL_ERROR "GPU_TARGET must contain one or more of Fermi, Kepler, Maxwell, Pascal, Volta, Turing, Ampere, or valid sm_[0-9][0-9]" )
39 |      endif()
40 | 
41 | -    set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} )
42 | +    #set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} )
43 | +    set( CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -DHAVE_CUBLAS -Xfatbin -compress-all -Xcompiler -fPIC ${NV_SM} ${NV_COMP} ${FORTRAN_CONVENTION} )
44 |      #add_definitions( "-DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MIN_ARCH}" )
45 |      set(MAGMA_HAVE_CUDA "1")
46 |      set(MAGMA_CUDA_ARCH_MIN "${MIN_ARCH}")
47 |      message( STATUS "Define -DMAGMA_HAVE_CUDA -DMAGMA_CUDA_ARCH_MIN=${MIN_ARCH}" )
48 |    else()
49 | @@ -749,11 +765,11 @@
50 |    file( GLOB headers include/*.h sparse/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
51 |  else()
52 |    file( GLOB headers include/*.h sparse_hip/include/*.h "${CMAKE_BINARY_DIR}/include/*.h" )
53 |  endif()
54 |  if (USE_FORTRAN)
55 | -    install( FILES ${headers} ${modules}
56 | +    install( FILES ${headers}
57 |               DESTINATION include )
58 |  else()
59 |      install( FILES ${headers} DESTINATION include )
60 |  endif()
61 | 


--------------------------------------------------------------------------------
/docker_support/build_protoc.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/tensorizer/proto/tensors.proto
4 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto -Iapi-interfaces/src/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/proto/generation.proto
5 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto -Iapi-interfaces/src/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/proto/engines.proto
6 | python -m grpc_tools.protoc -Iapi-interfaces/src/tensorizer/proto -Iapi-interfaces/src/proto --python_out=sdgrpcserver/generated --grpc_python_out=sdgrpcserver/generated api-interfaces/src/proto/dashboard.proto
7 | 
8 |  


--------------------------------------------------------------------------------
/docker_support/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | if [ -z ${SD_CLOUDFLARE} ]; then
 3 |     SERVICE="python"
 4 |     if pgrep -x "$SERVICE" >/dev/null
 5 |     then
 6 |         echo "server is running"
 7 |     else 
 8 |         /bin/micromamba -r env -n sd-grpc-server run python ./server.py  
 9 |     fi
10 | else
11 |     SERVICE="python"
12 |     if pgrep -x "$SERVICE" >/dev/null
13 |     then
14 |             echo "server is running"
15 |     else
16 |         /bin/micromamba -r env -n sd-grpc-server run python ./server.py  &   
17 |     fi
18 |     FILE=./cloudflared-linux-amd64
19 |     if [ ! -f "$FILE" ]; then
20 |         wget https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64
21 |         chmod +x cloudflared-linux-amd64
22 |     fi
23 |     ./cloudflared-linux-amd64 tunnel  --url http://localhost:5000
24 | fi
25 | 


--------------------------------------------------------------------------------
/environment.yaml:
--------------------------------------------------------------------------------
 1 | name: sd-grpc-server
 2 | channels:
 3 |   - pytorch
 4 |   - defaults
 5 | dependencies:
 6 |   - git
 7 |   - python=3.10
 8 |   - pip
 9 |   - flit
10 |   - psutil
11 | 


--------------------------------------------------------------------------------
/nonfree/README.md:
--------------------------------------------------------------------------------
1 | Projects in this folder have a license that is more restricted that the global project license
2 | 
3 | For strict distribution and compliance with Apache-2.0, remove this folder prior to distribution
4 | 
5 | stable-diffusion-grpcserver will work fine without these files, and is not a derivative work.
6 | 


--------------------------------------------------------------------------------
/nonfree/tome_memory_efficient_cross_attention.py:
--------------------------------------------------------------------------------
 1 | 
 2 | # A merge of the CrossAttention blocks from ToMe and MemoryEfficientCrossAttention
 3 | # (C) Hamish Friedlander 2022, All Rights Reserved. Distributable under the same license as ToMe
 4 | 
 5 | from typing import Tuple, Union
 6 | 
 7 | import torch
 8 | from diffusers.models.attention import CrossAttention
 9 | from tome.merge import bipartite_soft_matching, merge_source, merge_wavg
10 | from tome.utils import parse_r
11 | 
12 | try:
13 |     import xformers
14 |     import xformers.ops
15 | except:
16 |     xformers = None
17 | 
18 | def has_xformers():
19 |     return xformers is not None
20 | 
21 | class ToMeMemoryEfficientCrossAttention(CrossAttention):
22 |     def forward(self, hidden_states, context=None, mask=None):
23 | 
24 |         # This bit from ToMe
25 | 
26 |         batch_size, sequence_length, _ = hidden_states.shape
27 | 
28 |         q = self.to_q(hidden_states)
29 |         context = context if context is not None else hidden_states
30 |         k = self.to_k(context)
31 |         v = self.to_v(context)
32 |         dim = q.shape[-1]
33 |         r = self._tome_info["r"].pop(0)
34 |         if r > 0:
35 |             # Apply ToMe here
36 |             merge, _ = bipartite_soft_matching(
37 |                 k,
38 |                 r,
39 |                 self._tome_info["class_token"],
40 |                 self._tome_info["distill_token"],
41 |             )
42 |             if self._tome_info["trace_source"]:
43 |                 self._tome_info["source"] = merge_source(
44 |                     merge, k, self._tome_info["source"]
45 |                 )
46 |                 self._tome_info["source"] = merge_source(
47 |                     merge, v, self._tome_info["source"]
48 |                 )
49 |             k, self._tome_info["size"] = merge_wavg(merge, k)
50 |             v, self._tome_info["size"] = merge_wavg(merge, v)
51 | 
52 |         # This bit from MemoryEfficientCrossAttention
53 | 
54 |         b, _, _ = q.shape
55 |         q, k, v = map(
56 |             lambda t: t.unsqueeze(3)
57 |             .reshape(b, t.shape[1], self.heads, self.dim_head)
58 |             .permute(0, 2, 1, 3)
59 |             .reshape(b * self.heads, t.shape[1], self.dim_head)
60 |             .contiguous(),
61 |             (q, k, v),
62 |         )
63 | 
64 |         # actually compute the attention, what we cannot get enough of
65 |         out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=None)
66 | 
67 |         # TODO: Use this directly in the attention operation, as a bias
68 |         if mask is not None:
69 |             raise NotImplementedError
70 |         out = (
71 |             out.unsqueeze(0)
72 |             .reshape(b, self.heads, out.shape[1], self.dim_head)
73 |             .permute(0, 2, 1, 3)
74 |             .reshape(b, out.shape[1], self.heads * self.dim_head)
75 |         )
76 |         return self.to_out(out)
77 | 


--------------------------------------------------------------------------------
/nonfree/tome_patcher.py:
--------------------------------------------------------------------------------
 1 | from diffusers import UNet2DConditionModel
 2 | from diffusers.models.attention import SpatialTransformer, BasicTransformerBlock
 3 | 
 4 | from nonfree.tome_unet import ToMeUNet, ToMeSpatialTransformer, ToMeCrossAttention
 5 | from nonfree.tome_memory_efficient_cross_attention import has_xformers, ToMeMemoryEfficientCrossAttention
 6 | 
 7 | from sdgrpcserver.pipeline.models.memory_efficient_cross_attention import MemoryEfficientCrossAttention
 8 | 
 9 | def apply_tome(
10 |     model: UNet2DConditionModel, trace_source: bool = False, prop_attn: bool = True
11 | ):
12 |     """
13 |     Applies ToMe to this transformer. Afterward, set r using model.r.
14 | 
15 |     If you want to know the source of each token (e.g., for visualization), set trace_source = true.
16 |     The sources will be available at model._tome_info["source"] afterward.
17 | 
18 |     For proportional attention, set prop_attn to True. This is only necessary when evaluating models off
19 |     the shelf. For trianing and for evaluating MAE models off the self set this to be False.
20 |     """
21 | 
22 |     model.__class__ = ToMeUNet
23 |     model.r = 0
24 |     model._tome_info = {
25 |         "r": model.r,
26 |         "size": None,
27 |         "source": None,
28 |         "trace_source": trace_source,
29 |         "prop_attn": prop_attn,
30 |         "class_token": False,
31 |         "distill_token": False,
32 |     }
33 | 
34 |     if hasattr(model, "dist_token") and model.dist_token is not None:
35 |         model._tome_info["distill_token"] = True
36 | 
37 |     for module in model.modules():
38 |         if isinstance(module, SpatialTransformer):
39 |             module.__class__ = ToMeSpatialTransformer
40 |         if isinstance(module, BasicTransformerBlock):
41 |             #module.__class__ = ToMeTransformerBlock
42 |             #module._tome_info = model._tome_info
43 |             if isinstance(module.attn1, MemoryEfficientCrossAttention):
44 |                 module.attn1.__class__ = ToMeMemoryEfficientCrossAttention
45 |             else:
46 |                 module.attn1.__class__ = ToMeCrossAttention
47 |             module.attn1._tome_info = model._tome_info
48 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["flit_core >=3.2,<4"]
 3 | build-backend = "flit_core.buildapi"
 4 | 
 5 | [project]
 6 | name = "stable-diffusion-grpcserver"
 7 | authors = [{ name = "Hamish Friedlander", email = "hafriedlander@gmail.com" }]
 8 | readme = "README.md"
 9 | license = { file = "LICENSE" }
10 | classifiers = ["License :: OSI Approved :: Apache Software License"]
11 | dynamic = ["version", "description"]
12 | dependencies = [
13 |   # Core pipeline
14 |   "torch ~= 1.12.1",
15 |   "einops ~= 0.5.0",
16 |   "torchvision ~= 0.13.1",
17 |   "numpy ~= 1.23.3",
18 |   "opencv-python-headless ~= 4.6.0.66",
19 |   "scipy ~= 1.9.1",
20 |   "ftfy ~= 6.1.1",
21 |   "transformers ~= 4.25.1",
22 |   "diffusers ~= 0.10.2",
23 |   "accelerate ~= 0.13.2",
24 |   "easing-functions ~= 1.0.4",
25 |   # For ToMe
26 |   "timm ~= 0.6.11",
27 |   # For Structured Diffusion
28 |   "nltk ~= 3.7",
29 |   "stanza ~= 1.4.2",
30 |   # For K-Diffusion
31 |   "torchdiffeq ~= 0.2.3",
32 |   "torchsde ~= 0.2.5",
33 |   # For Server
34 |   "protobuf ~= 3.20",
35 |   "grpcio ~= 1.48.1",
36 |   "wsgicors ~= 0.7.0",
37 |   "Twisted ~= 22.8.0",
38 |   "hupper ~= 1.10.3",
39 |   "watchdog ~= 2.1.9",
40 |   "python-dotenv ~= 0.21.0",
41 |   "service_identity ~= 21.1.0",
42 |   # For Tests
43 |   "pynvml ~= 11.4.1",
44 | ]
45 | 
46 | [project.optional-dependencies]
47 | dev = ["black ~= 22.10.0", "flake8 ~= 6.0.0", "flake8-pyproject ~= 1.2.1"]
48 | 
49 | [project.urls]
50 | Home = "https://github.com/hafriedlander/stable-diffusion-grpcserver"
51 | 
52 | [project.scripts]
53 | sdgrpcserver = "sdgrpcserver.server:main"
54 | 
55 | [tool.flit.module]
56 | name = "sdgrpcserver"
57 | 
58 | [tool.isort]
59 | profile = "black"
60 | 
61 | [tool.flake8]
62 | max-line-length = 88
63 | select = "C,E,F,W,B,B950"
64 | extend-ignore = "E203, E501, W503"
65 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Use the CUDA-enabled versions of pytorch
 2 | --extra-index-url https://download.pytorch.org/whl/cu116
 3 | 
 4 | # Python AI basics
 5 | torch~=1.12.1
 6 | torchvision~=0.13.1
 7 | numpy~=1.23.3
 8 | opencv-python~=4.6.0.66
 9 | scipy~=1.9.1
10 | 
11 | # Transformers
12 | transformers~=4.22.1
13 | -e git+https://github.com/CompVis/taming-transformers.git@master#egg=taming-transformers
14 | -e git+https://github.com/openai/CLIP.git@main#egg=clip
15 | diffusers~=0.4.1
16 | 
17 | # Server libraries
18 | protobuf~=3.20
19 | grpcio~=1.48.1
20 | Flask~=2.2.2
21 | wsgicors~=0.7.0
22 | waitress~=2.1.2
23 | hupper~=1.10.3
24 | watchdog~=2.1.9
25 | 
26 | # Additional AI libraries
27 | # These come from "taming transformers" or "diffusers" environment.yaml
28 | # and probably aren't needed for Stable Diffusion inferance
29 | #albumentations==0.4.3
30 | #pytorch-lightning==1.4.2
31 | #test-tube>=0.7.5
32 | #einops==0.3.0   
33 | #torch-fidelity==0.3.0
34 | #torchmetrics==0.6.0
35 | #invisible-watermark
36 | 
37 | # Other stuff from the taming transformers or diffusers environment.yaml
38 | #pudb==2019.2
39 | #imageio==2.9.0
40 | #imageio-ffmpeg==0.4.2
41 | #omegaconf==2.1.1
42 | #streamlit>=0.73.1
43 | 


--------------------------------------------------------------------------------
/sdgrpcserver/__init__.py:
--------------------------------------------------------------------------------
1 | """A local Stable Diffusion AI image generation server compatible with the Stability-AI GRPC protocol'"""
2 | 
3 | __version__ = "0.0.1"


--------------------------------------------------------------------------------
/sdgrpcserver/config/dist_hashes:
--------------------------------------------------------------------------------
 1 | cdb93bad7d27d5825d4dc2925173442b04540d81
 2 | 3bf70ef84926776a999f92d2641493942687dcbc
 3 | b334b20ec2b037f126bf61b4fc780bbff9004283
 4 | f327f51ccbc055a3e50596fd889bfd6abf60ef46
 5 | 7fc705e0e387d4fb762098145f72e650cf00fd3a
 6 | e4f24033e5ad4063b6b69758119920a389b2df8d
 7 | b4e4a1cdb19b459617f5e0c2e670e2079911bf62
 8 | 72e706b173461c04cae5607810b31ac425c1e719
 9 | 2147a0769fbdd03c2157cc9394542eca089d2f21
10 | 74228d116acbc1ee462eae5635a69d0975952841
11 | 5c2f2449e58a306f937c36bdc97ea039d337aa23
12 | 7dc95fb6dfa8fde48b66052d2044d1a1d8302964
13 | c639b7da0becbd74306ab94a1490e7811322b63c
14 | 88dcc9c93e23dce7723cbdc1bb55b14d80d13aaa
15 | 05e1fb9c1700f0833b8ffb593440f7000dc4cd35
16 | a6ab03a095ba36d06b15af41649989b38cd918b9
17 | c3e31f14155e6db72d2a67e6883ff14f16689332
18 | a21f0ca8937fce9638b729c38cac3ec288903ef7
19 | ed6826bb6a2e86c558c7eda5f65a8bc10bf28438
20 | 606b469ecbe60b74ab0006768b045c353239db9c
21 | 


--------------------------------------------------------------------------------
/sdgrpcserver/config/genhashes.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | (for x in `git log --pretty=format:"%H" --diff-filter=d --reverse -- ../../engines.yaml` ; do git rev-parse "${x}:../../engines.yaml" ; done) > dist_hashes
3 | (for x in `git log --pretty=format:"%H" --diff-filter=d --reverse -- ./engines.yaml` ; do git rev-parse "${x}:./engines.yaml" ; done) >> dist_hashes
4 | 
5 | 


--------------------------------------------------------------------------------
/sdgrpcserver/constants.py:
--------------------------------------------------------------------------------
1 | import os
2 | 
3 | debug_path = os.environ.get("SD_DEBUG_PATH", False)
4 | if not debug_path: debug_path = os.path.join(
5 |     os.path.dirname(os.path.dirname(__file__)),
6 |     "/tests/out/"
7 | )
8 | 


--------------------------------------------------------------------------------
/sdgrpcserver/debug_recorder.py:
--------------------------------------------------------------------------------
  1 | import glob, os, tempfile, platform, time
  2 | 
  3 | import yaml
  4 | try:
  5 |     from yaml import CLoader as Loader, CDumper as Dumper
  6 | except ImportError:
  7 |     from yaml import Loader, Dumper
  8 | 
  9 | try:
 10 |     import gzip
 11 | except:
 12 |     gzip = None
 13 | 
 14 | record_modules = [
 15 |   "torch",
 16 |   "torchvision",
 17 |   "numpy",
 18 |   "opencv-python-headless",
 19 |   "scipy",
 20 |   "transformers",
 21 |   "diffusers",
 22 | ]
 23 | 
 24 | try:
 25 |     from importlib.metadata import version
 26 |     def get_module_version(module): return version(module)
 27 | except:
 28 |     import pkg_resources
 29 |     def get_module_version(module): return pkg_resources.get_distribution(module).version
 30 | 
 31 | class DebugContext:
 32 |     def __init__(self, recorder, label):
 33 |         self.recorder = recorder
 34 |         self.events = []
 35 |         self.store('label', label)
 36 |         self.store('uname', platform.uname())
 37 |         self.store('python version', platform.python_version())
 38 |         self.store('module versions', self.get_module_versions())
 39 | 
 40 |     def get_module_versions(self):
 41 |         return {module: get_module_version(module) for module in record_modules}
 42 | 
 43 |     def store(self, label, data):
 44 |         self.events.append((label, data))
 45 | 
 46 |     def __enter__(self):
 47 |         return self
 48 | 
 49 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 50 |         if exc_type:
 51 |             self.store('exception', [exc_type, exc_value, exc_traceback])
 52 | 
 53 |         self.recorder.store(self.events)
 54 |     
 55 | class DebugRecorder:
 56 |     def __init__(self, storage_time=10*60):
 57 |         self.storage_time = storage_time
 58 |         self.storage_path = os.path.join(tempfile.gettempdir(), "sdgrpcserver_debug")
 59 | 
 60 |         if not os.path.exists(self.storage_path): os.mkdir(self.storage_path)
 61 | 
 62 |     def garbage_collect(self):
 63 |         now = time.time()
 64 |         for path in glob.glob(os.path.join(self.storage_path, "*.dump*")):
 65 |             mtime = os.path.getmtime(path)
 66 |             if mtime < now - self.storage_time: 
 67 |                 print("Debug record expired: ", path)
 68 |                 os.unlink(path)
 69 | 
 70 |     def record(self, label):
 71 |         return DebugContext(self, label)
 72 | 
 73 |     def store(self, events):
 74 |         now = time.time()
 75 |         path = f"debug-{now}.dump"
 76 |         data = yaml.dump(events, Dumper=Dumper)
 77 | 
 78 |         if gzip:
 79 |             path = f"debug-{now}.dump.gz"
 80 |             data = gzip.compress(bytes(data, "utf8"))
 81 | 
 82 |         with open(os.path.join(self.storage_path, path), "wb") as f:
 83 |             f.write(data)
 84 | 
 85 |         self.garbage_collect()
 86 | 
 87 | class DebugNullRecorder:
 88 |     def __init__(self):
 89 |         pass
 90 | 
 91 |     def record(self, label):
 92 |         return self
 93 |     
 94 |     def __enter__(self):
 95 |         return self
 96 |     
 97 |     def __exit__(self, exc_type, exc_value, exc_traceback):
 98 |         pass
 99 | 
100 |     def get_module_versions(self):
101 |         return {}
102 | 
103 |     def store(self, label, data):
104 |         pass
105 | 
106 | 


--------------------------------------------------------------------------------
/sdgrpcserver/generated/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/sdgrpcserver/generated/engines_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: engines.proto
 4 | """Generated protocol buffer code."""
 5 | from google.protobuf.internal import builder as _builder
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | # @@protoc_insertion_point(imports)
10 | 
11 | _sym_db = _symbol_database.Default()
12 | 
13 | 
14 | import generation_pb2 as generation__pb2
15 | 
16 | 
17 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rengines.proto\x12\x07gooseai\x1a\x10generation.proto\"\xdf\x01\n\rEngineSampler\x12*\n\x07sampler\x18\x01 \x01(\x0e\x32\x19.gooseai.DiffusionSampler\x12\x14\n\x0csupports_eta\x18\n \x01(\x08\x12\x16\n\x0esupports_churn\x18\x0b \x01(\x08\x12\x1d\n\x15supports_sigma_limits\x18\x0c \x01(\x08\x12\x1b\n\x13supports_karras_rho\x18\r \x01(\x08\x12\x38\n\x15supported_noise_types\x18\x14 \x03(\x0e\x32\x19.gooseai.SamplerNoiseType\"\xde\x01\n\nEngineInfo\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05owner\x18\x02 \x01(\t\x12\r\n\x05ready\x18\x03 \x01(\x08\x12!\n\x04type\x18\x04 \x01(\x0e\x32\x13.gooseai.EngineType\x12+\n\ttokenizer\x18\x05 \x01(\x0e\x32\x18.gooseai.EngineTokenizer\x12\x0c\n\x04name\x18\x06 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x07 \x01(\t\x12\x33\n\x12supported_samplers\x18\xf4\x03 \x03(\x0b\x32\x16.gooseai.EngineSampler\"\x14\n\x12ListEnginesRequest\".\n\x07\x45ngines\x12#\n\x06\x65ngine\x18\x01 \x03(\x0b\x32\x13.gooseai.EngineInfo*Z\n\nEngineType\x12\x08\n\x04TEXT\x10\x00\x12\x0b\n\x07PICTURE\x10\x01\x12\t\n\x05\x41UDIO\x10\x02\x12\t\n\x05VIDEO\x10\x03\x12\x12\n\x0e\x43LASSIFICATION\x10\x04\x12\x0b\n\x07STORAGE\x10\x05*%\n\x0f\x45ngineTokenizer\x12\x08\n\x04GPT2\x10\x00\x12\x08\n\x04PILE\x10\x01\x32P\n\x0e\x45nginesService\x12>\n\x0bListEngines\x12\x1b.gooseai.ListEnginesRequest\x1a\x10.gooseai.Engines\"\x00\x42\x0cZ\n./;enginesb\x06proto3')
18 | 
19 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
20 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'engines_pb2', globals())
21 | if _descriptor._USE_C_DESCRIPTORS == False:
22 | 
23 |   DESCRIPTOR._options = None
24 |   DESCRIPTOR._serialized_options = b'Z\n./;engines'
25 |   _ENGINETYPE._serialized_start=565
26 |   _ENGINETYPE._serialized_end=655
27 |   _ENGINETOKENIZER._serialized_start=657
28 |   _ENGINETOKENIZER._serialized_end=694
29 |   _ENGINESAMPLER._serialized_start=45
30 |   _ENGINESAMPLER._serialized_end=268
31 |   _ENGINEINFO._serialized_start=271
32 |   _ENGINEINFO._serialized_end=493
33 |   _LISTENGINESREQUEST._serialized_start=495
34 |   _LISTENGINESREQUEST._serialized_end=515
35 |   _ENGINES._serialized_start=517
36 |   _ENGINES._serialized_end=563
37 |   _ENGINESSERVICE._serialized_start=696
38 |   _ENGINESSERVICE._serialized_end=776
39 | # @@protoc_insertion_point(module_scope)
40 | 


--------------------------------------------------------------------------------
/sdgrpcserver/generated/engines_pb2_grpc.py:
--------------------------------------------------------------------------------
 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
 2 | """Client and server classes corresponding to protobuf-defined services."""
 3 | import grpc
 4 | 
 5 | import engines_pb2 as engines__pb2
 6 | 
 7 | 
 8 | class EnginesServiceStub(object):
 9 |     """Missing associated documentation comment in .proto file."""
10 | 
11 |     def __init__(self, channel):
12 |         """Constructor.
13 | 
14 |         Args:
15 |             channel: A grpc.Channel.
16 |         """
17 |         self.ListEngines = channel.unary_unary(
18 |                 '/gooseai.EnginesService/ListEngines',
19 |                 request_serializer=engines__pb2.ListEnginesRequest.SerializeToString,
20 |                 response_deserializer=engines__pb2.Engines.FromString,
21 |                 )
22 | 
23 | 
24 | class EnginesServiceServicer(object):
25 |     """Missing associated documentation comment in .proto file."""
26 | 
27 |     def ListEngines(self, request, context):
28 |         """Missing associated documentation comment in .proto file."""
29 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
30 |         context.set_details('Method not implemented!')
31 |         raise NotImplementedError('Method not implemented!')
32 | 
33 | 
34 | def add_EnginesServiceServicer_to_server(servicer, server):
35 |     rpc_method_handlers = {
36 |             'ListEngines': grpc.unary_unary_rpc_method_handler(
37 |                     servicer.ListEngines,
38 |                     request_deserializer=engines__pb2.ListEnginesRequest.FromString,
39 |                     response_serializer=engines__pb2.Engines.SerializeToString,
40 |             ),
41 |     }
42 |     generic_handler = grpc.method_handlers_generic_handler(
43 |             'gooseai.EnginesService', rpc_method_handlers)
44 |     server.add_generic_rpc_handlers((generic_handler,))
45 | 
46 | 
47 |  # This class is part of an EXPERIMENTAL API.
48 | class EnginesService(object):
49 |     """Missing associated documentation comment in .proto file."""
50 | 
51 |     @staticmethod
52 |     def ListEngines(request,
53 |             target,
54 |             options=(),
55 |             channel_credentials=None,
56 |             call_credentials=None,
57 |             insecure=False,
58 |             compression=None,
59 |             wait_for_ready=None,
60 |             timeout=None,
61 |             metadata=None):
62 |         return grpc.experimental.unary_unary(request, target, '/gooseai.EnginesService/ListEngines',
63 |             engines__pb2.ListEnginesRequest.SerializeToString,
64 |             engines__pb2.Engines.FromString,
65 |             options, channel_credentials,
66 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
67 | 


--------------------------------------------------------------------------------
/sdgrpcserver/generated/generation_pb2_grpc.py:
--------------------------------------------------------------------------------
  1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
  2 | """Client and server classes corresponding to protobuf-defined services."""
  3 | import grpc
  4 | 
  5 | import generation_pb2 as generation__pb2
  6 | 
  7 | 
  8 | class GenerationServiceStub(object):
  9 |     """
 10 |     gRPC services
 11 | 
 12 |     """
 13 | 
 14 |     def __init__(self, channel):
 15 |         """Constructor.
 16 | 
 17 |         Args:
 18 |             channel: A grpc.Channel.
 19 |         """
 20 |         self.Generate = channel.unary_stream(
 21 |                 '/gooseai.GenerationService/Generate',
 22 |                 request_serializer=generation__pb2.Request.SerializeToString,
 23 |                 response_deserializer=generation__pb2.Answer.FromString,
 24 |                 )
 25 |         self.ChainGenerate = channel.unary_stream(
 26 |                 '/gooseai.GenerationService/ChainGenerate',
 27 |                 request_serializer=generation__pb2.ChainRequest.SerializeToString,
 28 |                 response_deserializer=generation__pb2.Answer.FromString,
 29 |                 )
 30 | 
 31 | 
 32 | class GenerationServiceServicer(object):
 33 |     """
 34 |     gRPC services
 35 | 
 36 |     """
 37 | 
 38 |     def Generate(self, request, context):
 39 |         """Missing associated documentation comment in .proto file."""
 40 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 41 |         context.set_details('Method not implemented!')
 42 |         raise NotImplementedError('Method not implemented!')
 43 | 
 44 |     def ChainGenerate(self, request, context):
 45 |         """Missing associated documentation comment in .proto file."""
 46 |         context.set_code(grpc.StatusCode.UNIMPLEMENTED)
 47 |         context.set_details('Method not implemented!')
 48 |         raise NotImplementedError('Method not implemented!')
 49 | 
 50 | 
 51 | def add_GenerationServiceServicer_to_server(servicer, server):
 52 |     rpc_method_handlers = {
 53 |             'Generate': grpc.unary_stream_rpc_method_handler(
 54 |                     servicer.Generate,
 55 |                     request_deserializer=generation__pb2.Request.FromString,
 56 |                     response_serializer=generation__pb2.Answer.SerializeToString,
 57 |             ),
 58 |             'ChainGenerate': grpc.unary_stream_rpc_method_handler(
 59 |                     servicer.ChainGenerate,
 60 |                     request_deserializer=generation__pb2.ChainRequest.FromString,
 61 |                     response_serializer=generation__pb2.Answer.SerializeToString,
 62 |             ),
 63 |     }
 64 |     generic_handler = grpc.method_handlers_generic_handler(
 65 |             'gooseai.GenerationService', rpc_method_handlers)
 66 |     server.add_generic_rpc_handlers((generic_handler,))
 67 | 
 68 | 
 69 |  # This class is part of an EXPERIMENTAL API.
 70 | class GenerationService(object):
 71 |     """
 72 |     gRPC services
 73 | 
 74 |     """
 75 | 
 76 |     @staticmethod
 77 |     def Generate(request,
 78 |             target,
 79 |             options=(),
 80 |             channel_credentials=None,
 81 |             call_credentials=None,
 82 |             insecure=False,
 83 |             compression=None,
 84 |             wait_for_ready=None,
 85 |             timeout=None,
 86 |             metadata=None):
 87 |         return grpc.experimental.unary_stream(request, target, '/gooseai.GenerationService/Generate',
 88 |             generation__pb2.Request.SerializeToString,
 89 |             generation__pb2.Answer.FromString,
 90 |             options, channel_credentials,
 91 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
 92 | 
 93 |     @staticmethod
 94 |     def ChainGenerate(request,
 95 |             target,
 96 |             options=(),
 97 |             channel_credentials=None,
 98 |             call_credentials=None,
 99 |             insecure=False,
100 |             compression=None,
101 |             wait_for_ready=None,
102 |             timeout=None,
103 |             metadata=None):
104 |         return grpc.experimental.unary_stream(request, target, '/gooseai.GenerationService/ChainGenerate',
105 |             generation__pb2.ChainRequest.SerializeToString,
106 |             generation__pb2.Answer.FromString,
107 |             options, channel_credentials,
108 |             insecure, call_credentials, compression, wait_for_ready, timeout, metadata)
109 | 


--------------------------------------------------------------------------------
/sdgrpcserver/generated/tensors_pb2.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | # Generated by the protocol buffer compiler.  DO NOT EDIT!
 3 | # source: tensors.proto
 4 | """Generated protocol buffer code."""
 5 | from google.protobuf.internal import builder as _builder
 6 | from google.protobuf import descriptor as _descriptor
 7 | from google.protobuf import descriptor_pool as _descriptor_pool
 8 | from google.protobuf import symbol_database as _symbol_database
 9 | # @@protoc_insertion_point(imports)
10 | 
11 | _sym_db = _symbol_database.Default()
12 | 
13 | 
14 | 
15 | 
16 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rtensors.proto\x12\x07tensors\"\x82\x01\n\x06Tensor\x12\x1d\n\x05\x64type\x18\x01 \x01(\x0e\x32\x0e.tensors.Dtype\x12\r\n\x05shape\x18\x02 \x03(\x03\x12\x0c\n\x04\x64\x61ta\x18\x03 \x01(\x0c\x12.\n\tattr_type\x18\x04 \x01(\x0e\x32\x16.tensors.AttributeTypeH\x00\x88\x01\x01\x42\x0c\n\n_attr_type\"\xac\x01\n\tAttribute\x12\x0c\n\x04name\x18\x01 \x01(\t\x12!\n\x06module\x18\x03 \x01(\x0b\x32\x0f.tensors.ModuleH\x00\x12!\n\x06tensor\x18\x04 \x01(\x0b\x32\x0f.tensors.TensorH\x00\x12\x10\n\x06string\x18\x05 \x01(\tH\x00\x12\x0f\n\x05int64\x18\x06 \x01(\x03H\x00\x12\x0f\n\x05\x66loat\x18\x07 \x01(\x02H\x00\x12\x0e\n\x04\x62ool\x18\x08 \x01(\x08H\x00\x42\x07\n\x05value\"M\n\x06Module\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05names\x18\x02 \x03(\t\x12&\n\nattributes\x18\x03 \x03(\x0b\x32\x12.tensors.Attribute*\x9e\x02\n\x05\x44type\x12\x0e\n\nDT_INVALID\x10\x00\x12\x0e\n\nDT_FLOAT32\x10\x01\x12\x0e\n\nDT_FLOAT64\x10\x02\x12\x0e\n\nDT_FLOAT16\x10\x03\x12\x0f\n\x0b\x44T_BFLOAT16\x10\x04\x12\x10\n\x0c\x44T_COMPLEX32\x10\x05\x12\x10\n\x0c\x44T_COMPLEX64\x10\x06\x12\x11\n\rDT_COMPLEX128\x10\x07\x12\x0c\n\x08\x44T_UINT8\x10\x08\x12\x0b\n\x07\x44T_INT8\x10\t\x12\x0c\n\x08\x44T_INT16\x10\n\x12\x0c\n\x08\x44T_INT32\x10\x0b\x12\x0c\n\x08\x44T_INT64\x10\x0c\x12\x0b\n\x07\x44T_BOOL\x10\r\x12\r\n\tDT_QUINT8\x10\x0e\x12\x0c\n\x08\x44T_QINT8\x10\x0f\x12\r\n\tDT_QINT32\x10\x10\x12\x0f\n\x0b\x44T_QUINT4_2\x10\x11*0\n\rAttributeType\x12\x10\n\x0c\x41T_PARAMETER\x10\x00\x12\r\n\tAT_BUFFER\x10\x01\x42)Z\'github.com/coreweave/tensorizer/tensorsb\x06proto3')
17 | 
18 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals())
19 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'tensors_pb2', globals())
20 | if _descriptor._USE_C_DESCRIPTORS == False:
21 | 
22 |   DESCRIPTOR._options = None
23 |   DESCRIPTOR._serialized_options = b'Z\'github.com/coreweave/tensorizer/tensors'
24 |   _DTYPE._serialized_start=414
25 |   _DTYPE._serialized_end=700
26 |   _ATTRIBUTETYPE._serialized_start=702
27 |   _ATTRIBUTETYPE._serialized_end=750
28 |   _TENSOR._serialized_start=27
29 |   _TENSOR._serialized_end=157
30 |   _ATTRIBUTE._serialized_start=160
31 |   _ATTRIBUTE._serialized_end=332
32 |   _MODULE._serialized_start=334
33 |   _MODULE._serialized_end=411
34 | # @@protoc_insertion_point(module_scope)
35 | 


--------------------------------------------------------------------------------
/sdgrpcserver/generated/tensors_pb2_grpc.py:
--------------------------------------------------------------------------------
1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT!
2 | """Client and server classes corresponding to protobuf-defined services."""
3 | import grpc
4 | 
5 | 


--------------------------------------------------------------------------------
/sdgrpcserver/images.py:
--------------------------------------------------------------------------------
  1 | # Utility functions for handling images as PyTorch Tensors
  2 | 
  3 | # All images in are in BCHW unless specified in the variable name, as floating point 0..1
  4 | # All functions will handle RGB or RGBA images
  5 | 
  6 | from math import ceil
  7 | 
  8 | import cv2 as cv
  9 | import numpy as np
 10 | import PIL
 11 | import torch
 12 | import torchvision
 13 | 
 14 | 
 15 | def fromPIL(image):
 16 |     # Get as numpy HWC 0..1
 17 |     rgbHWC = np.array(image).astype(np.float32) / 255.0
 18 |     # Convert to BCHW
 19 |     rgbBCHW = rgbHWC[None].transpose(0, 3, 1, 2)
 20 |     # And convert to Tensor
 21 |     return torch.from_numpy(rgbBCHW)
 22 | 
 23 | 
 24 | def toPIL(tensor):
 25 |     # Convert to BCHW if just CHW
 26 |     if tensor.ndim == 3:
 27 |         tensor = tensor[None, ...]
 28 |     # Then convert to BHWC
 29 |     rgbBHWC = tensor.permute(0, 2, 3, 1)
 30 |     # Then convert from 0..1 to 0..255
 31 |     images = (rgbBHWC.to(torch.float32) * 255).round().to(torch.uint8).cpu().numpy()
 32 |     # And put into PIL image instances
 33 |     return [PIL.Image.fromarray(image) for image in images]
 34 | 
 35 | 
 36 | def fromCV(bgrHWC):
 37 |     bgrBCHW = bgrHWC[None].transpose(0, 3, 1, 2)
 38 |     channels = [2, 1, 0, 3][bgrBCHW.shape[1]]
 39 |     return torch.from_numpy(bgrBCHW)[:, channels].to(torch.float32) / 255.0
 40 | 
 41 | 
 42 | def toCV(tensor):
 43 |     if tensor.ndim == 3:
 44 |         tensor = tensor[None, ...]
 45 | 
 46 |     bgrBCHW = tensor[:, [2, 1, 0, 3][: tensor.shape[1]]]
 47 |     bgrBHWC = bgrBCHW.permute(0, 2, 3, 1)
 48 | 
 49 |     return (bgrBHWC.to(torch.float32) * 255).round().to(torch.uint8).cpu().numpy()
 50 | 
 51 | 
 52 | def fromPngBytes(bytes):
 53 |     intensor = torch.tensor(np.frombuffer(bytes, dtype=np.uint8))
 54 |     asuint8 = torchvision.io.decode_image(
 55 |         intensor, torchvision.io.image.ImageReadMode.RGB_ALPHA
 56 |     )
 57 |     return asuint8[None, ...].to(torch.float32) / 255
 58 | 
 59 | 
 60 | # Images with alpha will be slow for now. TODO: Move to OpenCV (torchvision does not support encoding alpha images)
 61 | def toPngBytes(tensor):
 62 |     if tensor.ndim == 3:
 63 |         tensor = tensor[None, ...]
 64 | 
 65 |     if tensor.shape[1] == 1 or tensor.shape[1] == 3:
 66 |         tensor = (tensor.to(torch.float32) * 255).round().to(torch.uint8)
 67 |         pngs = [torchvision.io.encode_png(image) for image in tensor]
 68 |         return [png.numpy().tobytes() for png in pngs]
 69 |     elif tensor.shape[1] == 4:
 70 |         images = toCV(tensor)
 71 |         return [cv.imencode(".png", image)[1].tobytes() for image in images]
 72 |     else:
 73 |         print(f"Don't know how to save PNGs with {tensor.shape[1]} channels")
 74 |         return []
 75 | 
 76 | 
 77 | # TOOD: This won't work on images with alpha
 78 | def levels(tensor, in0, in1, out0, out1):
 79 |     c = (out1 - out0) / (in1 - in0)
 80 |     return ((tensor - in0) * c + out0).clamp(0, 1)
 81 | 
 82 | 
 83 | def invert(tensor):
 84 |     return 1 - tensor
 85 | 
 86 | 
 87 | # 0, 1, 2, 3 = r, g, b, a | 4 = 0 | 5 = 1 | 6 = drop
 88 | # TODO: These are from generation.proto, but we should be nicer about the mapping
 89 | def channelmap(tensor, srcchannels):
 90 |     # Any that are 6 won't be in final output
 91 |     outchannels = [x for x in srcchannels if x != 6]
 92 |     # Any channel request that is higher than channels available, just use channel 0
 93 |     # (This also deals with channels we will later fill with zero or one)
 94 |     cpychannels = [x if x < tensor.shape[1] else 0 for x in outchannels]
 95 | 
 96 |     # Copy the desired source channel into place (or the first channel if we will replace in the next step)
 97 |     tensor = tensor[:, cpychannels]
 98 | 
 99 |     # Replace any channels with 0 or 1 if requested
100 |     for i, c in enumerate(outchannels):
101 |         if c == 4:
102 |             tensor[:, i] = torch.zeros_like(tensor[0][i])
103 |         elif c == 5:
104 |             tensor[:, i] = torch.ones_like(tensor[0][i])
105 | 
106 |     return tensor
107 | 
108 | 
109 | def gaussianblur(tensor, sigma):
110 |     if np.isscalar(sigma):
111 |         sigma = (sigma, sigma)
112 |     kernel = [ceil(sigma[0] * 6), ceil(sigma[1] * 6)]
113 |     kernel = [kernel[0] - kernel[0] % 2 + 1, kernel[1] - kernel[1] % 2 + 1]
114 |     return torchvision.transforms.functional.gaussian_blur(tensor, kernel, sigma)
115 | 
116 | 
117 | def crop(tensor, top, left, height, width):
118 |     return tensor[:, :, top : top + height, left : left + width]
119 | 


--------------------------------------------------------------------------------
/sdgrpcserver/k_diffusion.py:
--------------------------------------------------------------------------------
 1 | import os, sys, types
 2 | 
 3 | module_path = os.path.join(os.path.dirname(__file__), "src/k-diffusion/k_diffusion")
 4 | 
 5 | import importlib.util
 6 | 
 7 | # We load the k_diffusion files directly rather than relying on Python modules
 8 | # This allows us to only install the dependancies of the parts we use
 9 | 
10 | for name in ['utils', 'sampling', 'external']:
11 |     module_name = f"{__name__}.{name}"
12 |     file_path = os.path.join(module_path, f"{name}.py")
13 | 
14 |     # From https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly
15 |     spec = importlib.util.spec_from_file_location(module_name, file_path)
16 |     module = importlib.util.module_from_spec(spec)
17 |     sys.modules[module_name] = module
18 |     spec.loader.exec_module(module)
19 | 


--------------------------------------------------------------------------------
/sdgrpcserver/patching.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | import inspect
 3 | 
 4 | 
 5 | def patch_module_references(item, **patch):
 6 |     container_module = inspect.getmodule(item)
 7 | 
 8 |     # Handle the case of partial or other wrapped callables
 9 |     # (only for functools - other wrapper will break this function)
10 |     if container_module is functools:
11 |         container_module = inspect.getmodule(item.func)
12 | 
13 |     for k, v in patch.items():
14 |         setattr(container_module, k, v)
15 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/pipeline/__init__.py


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/attention_replacer.py:
--------------------------------------------------------------------------------
 1 | # Originally from https://github.com/shunk031/training-free-structured-diffusion-guidance/blob/main/tfsdg/utils/replace_layer.py
 2 | 
 3 | import inspect
 4 | from typing import Type
 5 | 
 6 | import torch.nn as nn
 7 | from diffusers.models.attention import CrossAttention
 8 | 
 9 | 
10 | def replace_cross_attention(
11 |     target: nn.Module, crossattention: Type[nn.Module], name: str
12 | ) -> None:
13 |     for attr_str in dir(target):
14 |         target_attr = getattr(target, attr_str)
15 | 
16 |         if isinstance(target_attr, CrossAttention):
17 |             query_dim = target_attr.to_q.in_features
18 |             assert target_attr.to_k.in_features == target_attr.to_v.in_features
19 |             context_dim = target_attr.to_k.in_features
20 |             heads = target_attr.heads
21 |             dim_head = int(target_attr.scale**-2)
22 |             dropout = target_attr.to_out[-1].p
23 | 
24 |             ca_kwargs = {
25 |                 "query_dim": query_dim,
26 |                 "context_dim": context_dim,
27 |                 "heads": heads,
28 |                 "dim_head": dim_head,
29 |                 "dropout": dropout,
30 |             }
31 | 
32 |             accepts_struct_attention = "struct_attention" in set(
33 |                 inspect.signature(crossattention.__init__).parameters.keys()
34 |             )
35 | 
36 |             if accepts_struct_attention:
37 |                 ca_kwargs["struct_attention"] = attr_str == "attn2"
38 | 
39 |             ca = crossattention(**ca_kwargs)
40 |             ca.to(
41 |                 device=target_attr.to_q.weight.device,
42 |                 dtype=target_attr.to_q.weight.dtype,
43 |             )
44 | 
45 |             original_params = list(target_attr.parameters())
46 |             proposed_params = list(ca.parameters())
47 |             assert len(original_params) == len(proposed_params)
48 | 
49 |             for p1, p2 in zip(original_params, proposed_params):
50 |                 p2.data.copy_(p1.data)
51 | 
52 |             setattr(target, attr_str, ca)
53 | 
54 |     for name, immediate_child_module in target.named_children():
55 |         replace_cross_attention(
56 |             target=immediate_child_module, crossattention=crossattention, name=name
57 |         )
58 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/diffusers_types.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class VaeConfig:
 6 |     block_out_channels: list[int]
 7 | 
 8 | 
 9 | @dataclass
10 | class UnetConfig:
11 |     sample_size: int | None
12 |     attention_head_dim: int | list[int]
13 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/easing.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal, Type
 2 | 
 3 | from easing_functions import easing
 4 | 
 5 | EASING_TYPE = Literal[
 6 |     "linear", "quad", "cubic", "quartic", "quintic", "sine", "circular", "expo"
 7 | ]
 8 | 
 9 | EASINGS: dict[EASING_TYPE, Type[easing.EasingBase]] = {
10 |     "linear": easing.LinearInOut,
11 |     "quad": easing.QuadEaseInOut,
12 |     "cubic": easing.CubicEaseInOut,
13 |     "quartic": easing.QuarticEaseInOut,
14 |     "quintic": easing.QuinticEaseInOut,
15 |     "sine": easing.SineEaseInOut,
16 |     "circular": easing.CircularEaseInOut,
17 |     "expo": easing.ExponentialEaseInOut,
18 | }
19 | 
20 | 
21 | class Easing:
22 |     def __init__(
23 |         self,
24 |         floor: float,
25 |         start: float,
26 |         end: float,
27 |         easing: EASING_TYPE | Type[easing.EasingBase],
28 |     ):
29 |         self.floor = 0
30 |         self.start = 0.1
31 |         self.end = 0.3
32 | 
33 |         if isinstance(easing, str):
34 |             easing = EASINGS[easing]
35 | 
36 |         self.easing = easing(
37 |             end=1 - floor, duration=1 - (start + end)  # type: ignore - easing_functions takes floats just fine
38 |         )
39 | 
40 |     def interp(self, u: float):
41 |         if u < self.start:
42 |             return self.floor
43 |         if u > 1 - self.end:
44 |             return 1
45 | 
46 |         return self.floor + self.easing(u - self.start)
47 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/kschedulers/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .scheduling_utils import KSchedulerMixin
3 | from .scheduling_dpm2_ancestral_discrete import DPM2AncestralDiscreteScheduler
4 | from .scheduling_dpm2_discrete import DPM2DiscreteScheduler
5 | from .scheduling_euler_ancestral_discrete import EulerAncestralDiscreteScheduler
6 | from .scheduling_euler_discrete import EulerDiscreteScheduler
7 | from .scheduling_heun_discrete import HeunDiscreteScheduler
8 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/kschedulers/scheduling_dpm2_discrete.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from typing import Optional, Tuple, Union
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | from scipy import integrate
 21 | 
 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config
 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput
 24 | from .scheduling_utils import KSchedulerMixin
 25 | 
 26 | 
 27 | class DPM2DiscreteScheduler(KSchedulerMixin, ConfigMixin):
 28 |     """
 29 |     A sampler inspired by DPM-Solver-2 and Algorithm 2 from Karras et al. (2022).
 30 |     for discrete beta schedules. Based on the original k-diffusion implementation by
 31 |     Katherine Crowson:
 32 |     https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L119
 33 | 
 34 |     [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
 35 |     function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
 36 |     [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
 37 |     [`~ConfigMixin.from_config`] functions.
 38 | 
 39 |     Args:
 40 |         num_train_timesteps (`int`): number of diffusion steps used to train the model.
 41 |         beta_start (`float`): the starting `beta` value of inference.
 42 |         beta_end (`float`): the final `beta` value.
 43 |         beta_schedule (`str`):
 44 |             the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
 45 |             `linear` or `scaled_linear`.
 46 |         trained_betas (`np.ndarray`, optional):
 47 |             option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
 48 |             options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
 49 |             `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
 50 | 
 51 |     """
 52 | 
 53 |     @register_to_config
 54 |     def __init__(
 55 |         self,
 56 |         num_train_timesteps: int = 1000,
 57 |         beta_start: float = 0.00085, #sensible defaults
 58 |         beta_end: float = 0.012,
 59 |         beta_schedule: str = "linear",
 60 |         trained_betas: Optional[np.ndarray] = None,
 61 |     ):
 62 |         if trained_betas is not None:
 63 |             self.betas = torch.from_numpy(trained_betas)
 64 |         elif beta_schedule == "linear":
 65 |             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
 66 |         elif beta_schedule == "scaled_linear":
 67 |             # this schedule is very specific to the latent diffusion model.
 68 |             self.betas = (
 69 |                 torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
 70 |             )
 71 |         else:
 72 |             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
 73 | 
 74 |         self.alphas = 1.0 - self.betas
 75 |         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
 76 | 
 77 |         self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
 78 |         self.log_sigmas = self.sigmas.log()
 79 | 
 80 |         # setable values
 81 |         self.num_inference_steps = None
 82 |         self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
 83 |         self.derivatives = []
 84 | 
 85 |     def set_timesteps(self, num_inference_steps: int):
 86 |         """
 87 |         Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
 88 | 
 89 |         Args:
 90 |             num_inference_steps (`int`):
 91 |                 the number of diffusion steps used when generating samples with a pre-trained model.
 92 |         """
 93 |         self.num_inference_steps = num_inference_steps
 94 |         timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float)
 95 |         self.timesteps = torch.from_numpy(timesteps)
 96 | 
 97 |         low_idx = np.floor(timesteps).astype(int)
 98 |         high_idx = np.ceil(timesteps).astype(int)
 99 |         frac = np.mod(timesteps, 1.0)
100 |         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
101 |         sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx]
102 |         sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
103 |         self.sigmas = torch.from_numpy(sigmas)
104 | 
105 |         self.init_noise_sigma = self.sigmas[0]
106 |         self.derivatives = []
107 | 
108 |     def step(
109 |         self,
110 |         model_output: Union[torch.FloatTensor, np.ndarray],
111 |         timestep: float,
112 |         sample: Union[torch.FloatTensor, np.ndarray],
113 |         s_churn: float = 0.,
114 |         s_tmin:  float = 0.,
115 |         s_tmax: float = float('inf'),
116 |         s_noise:  float = 1.,
117 |         generator = None,
118 |         noise_predictor = None,
119 |         return_dict: bool = True,
120 |     ) -> Union[SchedulerOutput, Tuple]:
121 |         """
122 |         Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
123 |         process from the learned model outputs (most often the predicted noise).
124 | 
125 |         Args:
126 |             model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
127 |             timestep (`int`): current discrete timestep in the diffusion chain.
128 |             sample (`torch.FloatTensor` or `np.ndarray`):
129 |                 current instance of sample being created by diffusion process.
130 |             s_churn (`float`)
131 |             s_tmin  (`float`)
132 |             s_tmax  (`float`)
133 |             s_noise (`float`)
134 |             return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
135 | 
136 |         Returns:
137 |             [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
138 |             [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
139 |             returning a tuple, the first element is the sample tensor.
140 | 
141 |         """
142 |         if not noise_predictor: print("Noise predictor not provided, result will not be correct.")
143 | 
144 |         index = self.t_to_index(timestep)
145 | 
146 |         sigma = self.sigmas[index]
147 |         gamma = min(s_churn / (len(self.sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.
148 |         eps = torch.randn(sample.size(), dtype=sample.dtype, layout=sample.layout, device=generator.device, generator=generator).to(sample.device) * s_noise
149 |         sigma_hat = sigma * (gamma + 1)
150 |         if gamma > 0:
151 |             sample = sample + eps * (sigma_hat ** 2 - sigma ** 2) ** 0.5
152 |         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
153 |         pred_original_sample = sample - sigma_hat * model_output
154 | 
155 |         # 2. Convert to an ODE derivative
156 |         derivative = (sample - pred_original_sample) / sigma_hat
157 |         self.derivatives.append(derivative)
158 | 
159 |         if self.sigmas[index + 1] == 0:
160 |             dt = self.sigmas[index + 1] - sigma_hat
161 |             sample = sample + derivative * dt
162 |         else:
163 |             sigma_mid = sigma_hat.log().lerp(self.sigmas[index + 1].log(), 0.5).exp()
164 |             dt_1 = sigma_mid - sigma_hat
165 |             dt_2 = self.sigmas[index + 1] - sigma_hat
166 |             sample_2 = sample + derivative * dt_1
167 | 
168 |             if noise_predictor:
169 |                 model_output_2 = noise_predictor(sample_2, self.sigma_to_t(sigma_mid))
170 |                 pred_original_sample_2 = sample_2 - sigma_mid * model_output_2
171 |             else:
172 |                 pred_original_sample_2 = sample_2 - sigma_mid * model_output
173 |             
174 |             derivative_2 = (sample_2 - pred_original_sample_2) / sigma_mid
175 |             sample = sample + derivative_2 * dt_2
176 | 
177 |         prev_sample = sample
178 | 
179 |         if not return_dict:
180 |             return (prev_sample,)
181 | 
182 |         return SchedulerOutput(prev_sample=prev_sample)
183 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/kschedulers/scheduling_euler_ancestral_discrete.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from typing import Optional, Tuple, Union
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | from scipy import integrate
 21 | 
 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config
 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput
 24 | from .scheduling_utils import KSchedulerMixin
 25 | 
 26 | 
 27 | class EulerAncestralDiscreteScheduler(KSchedulerMixin, ConfigMixin):
 28 |     """
 29 |     Ancestral sampling with Euler method steps.
 30 |     for discrete beta schedules. Based on the original k-diffusion implementation by
 31 |     Katherine Crowson:
 32 |     https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L72
 33 | 
 34 |     [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
 35 |     function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
 36 |     [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
 37 |     [`~ConfigMixin.from_config`] functions.
 38 | 
 39 |     Args:
 40 |         num_train_timesteps (`int`): number of diffusion steps used to train the model.
 41 |         beta_start (`float`): the starting `beta` value of inference.
 42 |         beta_end (`float`): the final `beta` value.
 43 |         beta_schedule (`str`):
 44 |             the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
 45 |             `linear` or `scaled_linear`.
 46 |         trained_betas (`np.ndarray`, optional):
 47 |             option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
 48 |             options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
 49 |             `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
 50 | 
 51 |     """
 52 | 
 53 |     @register_to_config
 54 |     def __init__(
 55 |         self,
 56 |         num_train_timesteps: int = 1000,
 57 |         beta_start: float = 0.00085, #sensible defaults
 58 |         beta_end: float = 0.012,
 59 |         beta_schedule: str = "linear",
 60 |         trained_betas: Optional[np.ndarray] = None,
 61 |     ):
 62 |         if trained_betas is not None:
 63 |             self.betas = torch.from_numpy(trained_betas)
 64 |         elif beta_schedule == "linear":
 65 |             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
 66 |         elif beta_schedule == "scaled_linear":
 67 |             # this schedule is very specific to the latent diffusion model.
 68 |             self.betas = (
 69 |                 torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
 70 |             )
 71 |         else:
 72 |             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
 73 | 
 74 |         self.alphas = 1.0 - self.betas
 75 |         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
 76 | 
 77 |         self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
 78 |         self.log_sigmas = self.sigmas.log()
 79 | 
 80 |         # setable values
 81 |         self.num_inference_steps = None
 82 |         self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
 83 |         self.derivatives = []
 84 | 
 85 |     def set_timesteps(self, num_inference_steps: int):
 86 |         """
 87 |         Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
 88 | 
 89 |         Args:
 90 |             num_inference_steps (`int`):
 91 |                 the number of diffusion steps used when generating samples with a pre-trained model.
 92 |         """
 93 |         self.num_inference_steps = num_inference_steps
 94 |         timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float)
 95 |         self.timesteps = torch.from_numpy(timesteps)
 96 | 
 97 |         low_idx = np.floor(timesteps).astype(int)
 98 |         high_idx = np.ceil(timesteps).astype(int)
 99 |         frac = np.mod(timesteps, 1.0)
100 |         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
101 |         sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx]
102 |         sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
103 |         self.sigmas = torch.from_numpy(sigmas)
104 | 
105 |         self.init_noise_sigma = self.sigmas[0]
106 |         self.derivatives = []
107 | 
108 |     def step(
109 |         self,
110 |         model_output: Union[torch.FloatTensor, np.ndarray],
111 |         timestep: float,
112 |         sample: Union[torch.FloatTensor, np.ndarray],
113 |         generator = None,
114 |         return_dict: bool = True,
115 |     ) -> Union[SchedulerOutput, Tuple]:
116 |         """
117 |         Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
118 |         process from the learned model outputs (most often the predicted noise).
119 | 
120 |         Args:
121 |             model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
122 |             timestep (`int`): current discrete timestep in the diffusion chain.
123 |             sample (`torch.FloatTensor` or `np.ndarray`):
124 |                 current instance of sample being created by diffusion process.
125 |             return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
126 | 
127 |         Returns:
128 |             [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
129 |             [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
130 |             returning a tuple, the first element is the sample tensor.
131 | 
132 |         """
133 |         index = self.t_to_index(timestep)
134 | 
135 |         sigma = self.sigmas[index]
136 | 
137 |         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
138 |         pred_original_sample = sample - sigma * model_output
139 |         sigma_from = self.sigmas[index]
140 |         sigma_to = self.sigmas[index + 1]
141 |         sigma_up = (sigma_to ** 2 * (sigma_from ** 2 - sigma_to ** 2) / sigma_from ** 2) ** 0.5
142 |         sigma_down = (sigma_to ** 2 - sigma_up ** 2) ** 0.5
143 |         # 2. Convert to an ODE derivative
144 |         derivative = (sample - pred_original_sample) / sigma
145 |         self.derivatives.append(derivative)
146 | 
147 |         dt = sigma_down - sigma
148 | 
149 |         prev_sample = sample + derivative * dt
150 | 
151 |         noise = torch.randn(prev_sample.size(), dtype=prev_sample.dtype, layout=prev_sample.layout, device=generator.device, generator=generator).to(prev_sample.device)
152 |         prev_sample = prev_sample + noise * sigma_up
153 | 
154 |         if not return_dict:
155 |             return (prev_sample,)
156 | 
157 |         return SchedulerOutput(prev_sample=prev_sample)
158 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/kschedulers/scheduling_euler_discrete.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from typing import Optional, Tuple, Union
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | from scipy import integrate
 21 | 
 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config
 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput
 24 | from .scheduling_utils import KSchedulerMixin
 25 | 
 26 | 
 27 | class EulerDiscreteScheduler(KSchedulerMixin, ConfigMixin):
 28 |     """
 29 |     Implements Algorithm 2 (Euler steps) from Karras et al. (2022).
 30 |     for discrete beta schedules. Based on the original k-diffusion implementation by
 31 |     Katherine Crowson:
 32 |     https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L51
 33 | 
 34 |     [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
 35 |     function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
 36 |     [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
 37 |     [`~ConfigMixin.from_config`] functions.
 38 | 
 39 |     Args:
 40 |         num_train_timesteps (`int`): number of diffusion steps used to train the model.
 41 |         beta_start (`float`): the starting `beta` value of inference.
 42 |         beta_end (`float`): the final `beta` value.
 43 |         beta_schedule (`str`):
 44 |             the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
 45 |             `linear` or `scaled_linear`.
 46 |         trained_betas (`np.ndarray`, optional):
 47 |             option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
 48 |             options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
 49 |             `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
 50 | 
 51 |     """
 52 | 
 53 |     @register_to_config
 54 |     def __init__(
 55 |         self,
 56 |         num_train_timesteps: int = 1000,
 57 |         beta_start: float = 0.00085, #sensible defaults
 58 |         beta_end: float = 0.012,
 59 |         beta_schedule: str = "linear",
 60 |         trained_betas: Optional[np.ndarray] = None,
 61 |     ):
 62 |         if trained_betas is not None:
 63 |             self.betas = torch.from_numpy(trained_betas)
 64 |         elif beta_schedule == "linear":
 65 |             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
 66 |         elif beta_schedule == "scaled_linear":
 67 |             # this schedule is very specific to the latent diffusion model.
 68 |             self.betas = (
 69 |                 torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
 70 |             )
 71 |         else:
 72 |             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
 73 | 
 74 |         self.alphas = 1.0 - self.betas
 75 |         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
 76 | 
 77 |         self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
 78 |         self.log_sigmas = self.sigmas.log()
 79 | 
 80 |         # setable values
 81 |         self.num_inference_steps = None
 82 |         self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
 83 |         self.derivatives = []
 84 | 
 85 |     def set_timesteps(self, num_inference_steps: int):
 86 |         """
 87 |         Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
 88 | 
 89 |         Args:
 90 |             num_inference_steps (`int`):
 91 |                 the number of diffusion steps used when generating samples with a pre-trained model.
 92 |         """
 93 |         self.num_inference_steps = num_inference_steps
 94 |         timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float)
 95 |         self.timesteps = torch.from_numpy(timesteps)
 96 | 
 97 |         low_idx = np.floor(timesteps).astype(int)
 98 |         high_idx = np.ceil(timesteps).astype(int)
 99 |         frac = np.mod(timesteps, 1.0)
100 |         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
101 |         sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx]
102 |         sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
103 |         self.sigmas = torch.from_numpy(sigmas)
104 | 
105 |         self.init_noise_sigma = self.sigmas[0]
106 |         self.derivatives = []
107 | 
108 |     def step(
109 |         self,
110 |         model_output: Union[torch.FloatTensor, np.ndarray],
111 |         timestep: int,
112 |         sample: Union[torch.FloatTensor, np.ndarray],
113 |         s_churn: float = 0.,
114 |         s_tmin:  float = 0.,
115 |         s_tmax: float = float('inf'),
116 |         s_noise:  float = 1.,
117 |         generator = None,
118 |         return_dict: bool = True,
119 |     ) -> Union[SchedulerOutput, Tuple]:
120 |         """
121 |         Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
122 |         process from the learned model outputs (most often the predicted noise).
123 | 
124 |         Args:
125 |             model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
126 |             timestep (`int`): current discrete timestep in the diffusion chain.
127 |             sample (`torch.FloatTensor` or `np.ndarray`):
128 |                 current instance of sample being created by diffusion process.
129 |             s_churn (`float`)
130 |             s_tmin  (`float`)
131 |             s_tmax  (`float`)
132 |             s_noise (`float`)
133 |             return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
134 | 
135 |         Returns:
136 |             [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
137 |             [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
138 |             returning a tuple, the first element is the sample tensor.
139 | 
140 |         """
141 |         index = self.t_to_index(timestep)
142 | 
143 |         sigma = self.sigmas[index]
144 |         gamma = min(s_churn / (len(self.sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.
145 |         eps = torch.randn(sample.size(), dtype=sample.dtype, layout=sample.layout, device=generator.device, generator=generator).to(sample.device) * s_noise
146 |         sigma_hat = sigma * (gamma + 1)
147 |         if gamma > 0:
148 |             sample = sample + eps * (sigma_hat ** 2 - sigma ** 2) ** 0.5
149 |         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
150 |         pred_original_sample = sample - sigma_hat * model_output
151 | 
152 |         # 2. Convert to an ODE derivative
153 |         derivative = (sample - pred_original_sample) / sigma_hat
154 |         self.derivatives.append(derivative)
155 | 
156 |         dt = self.sigmas[index + 1] - sigma_hat
157 | 
158 |         prev_sample = sample + derivative * dt
159 | 
160 |         if not return_dict:
161 |             return (prev_sample,)
162 | 
163 |         return SchedulerOutput(prev_sample=prev_sample)
164 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/kschedulers/scheduling_heun_discrete.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 Katherine Crowson, The HuggingFace Team and hlky. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | 
 15 | from typing import Optional, Tuple, Union
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | from scipy import integrate
 21 | 
 22 | from diffusers.configuration_utils import ConfigMixin, register_to_config
 23 | from diffusers.schedulers.scheduling_utils import SchedulerOutput
 24 | from .scheduling_utils import KSchedulerMixin
 25 | 
 26 | 
 27 | class HeunDiscreteScheduler(KSchedulerMixin, ConfigMixin):
 28 |     """
 29 |     Implements Algorithm 2 (Heun steps) from Karras et al. (2022).
 30 |     for discrete beta schedules. Based on the original k-diffusion implementation by
 31 |     Katherine Crowson:
 32 |     https://github.com/crowsonkb/k-diffusion/blob/481677d114f6ea445aa009cf5bd7a9cdee909e47/k_diffusion/sampling.py#L90
 33 | 
 34 |     [`~ConfigMixin`] takes care of storing all config attributes that are passed in the scheduler's `__init__`
 35 |     function, such as `num_train_timesteps`. They can be accessed via `scheduler.config.num_train_timesteps`.
 36 |     [`~ConfigMixin`] also provides general loading and saving functionality via the [`~ConfigMixin.save_config`] and
 37 |     [`~ConfigMixin.from_config`] functions.
 38 | 
 39 |     Args:
 40 |         num_train_timesteps (`int`): number of diffusion steps used to train the model.
 41 |         beta_start (`float`): the starting `beta` value of inference.
 42 |         beta_end (`float`): the final `beta` value.
 43 |         beta_schedule (`str`):
 44 |             the beta schedule, a mapping from a beta range to a sequence of betas for stepping the model. Choose from
 45 |             `linear` or `scaled_linear`.
 46 |         trained_betas (`np.ndarray`, optional):
 47 |             option to pass an array of betas directly to the constructor to bypass `beta_start`, `beta_end` etc.
 48 |             options to clip the variance used when adding noise to the denoised sample. Choose from `fixed_small`,
 49 |             `fixed_small_log`, `fixed_large`, `fixed_large_log`, `learned` or `learned_range`.
 50 | 
 51 |     """
 52 | 
 53 |     @register_to_config
 54 |     def __init__(
 55 |         self,
 56 |         num_train_timesteps: int = 1000,
 57 |         beta_start: float = 0.00085, #sensible defaults
 58 |         beta_end: float = 0.012,
 59 |         beta_schedule: str = "linear",
 60 |         trained_betas: Optional[np.ndarray] = None,
 61 |     ):
 62 |         if trained_betas is not None:
 63 |             self.betas = torch.from_numpy(trained_betas)
 64 |         elif beta_schedule == "linear":
 65 |             self.betas = torch.linspace(beta_start, beta_end, num_train_timesteps, dtype=torch.float32)
 66 |         elif beta_schedule == "scaled_linear":
 67 |             # this schedule is very specific to the latent diffusion model.
 68 |             self.betas = (
 69 |                 torch.linspace(beta_start**0.5, beta_end**0.5, num_train_timesteps, dtype=torch.float32) ** 2
 70 |             )
 71 |         else:
 72 |             raise NotImplementedError(f"{beta_schedule} does is not implemented for {self.__class__}")
 73 | 
 74 |         self.alphas = 1.0 - self.betas
 75 |         self.alphas_cumprod = torch.cumprod(self.alphas, dim=0)
 76 | 
 77 |         self.sigmas = ((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5
 78 |         self.log_sigmas = self.sigmas.log()
 79 | 
 80 |         # setable values
 81 |         self.num_inference_steps = None
 82 |         self.timesteps = torch.from_numpy(np.arange(0, num_train_timesteps)[::-1].copy().astype(np.int64))
 83 |         self.derivatives = []
 84 | 
 85 |     def set_timesteps(self, num_inference_steps: int):
 86 |         """
 87 |         Sets the timesteps used for the diffusion chain. Supporting function to be run before inference.
 88 | 
 89 |         Args:
 90 |             num_inference_steps (`int`):
 91 |                 the number of diffusion steps used when generating samples with a pre-trained model.
 92 |         """
 93 |         self.num_inference_steps = num_inference_steps
 94 |         timesteps = np.linspace(self.config.num_train_timesteps - 1, 0, num_inference_steps, dtype=float)
 95 |         self.timesteps = torch.from_numpy(timesteps)
 96 | 
 97 |         low_idx = np.floor(timesteps).astype(int)
 98 |         high_idx = np.ceil(timesteps).astype(int)
 99 |         frac = np.mod(timesteps, 1.0)
100 |         sigmas = np.array(((1 - self.alphas_cumprod) / self.alphas_cumprod) ** 0.5)
101 |         sigmas = (1 - frac) * sigmas[low_idx] + frac * sigmas[high_idx]
102 |         sigmas = np.concatenate([sigmas, [0.0]]).astype(np.float32)
103 |         self.sigmas = torch.from_numpy(sigmas)
104 | 
105 |         self.init_noise_sigma = self.sigmas[0]
106 |         self.derivatives = []
107 | 
108 |     def step(
109 |         self,
110 |         model_output: Union[torch.FloatTensor, np.ndarray],
111 |         timestep: int,
112 |         sample: Union[torch.FloatTensor, np.ndarray],
113 |         s_churn: float = 0.,
114 |         s_tmin:  float = 0.,
115 |         s_tmax: float = float('inf'),
116 |         s_noise:  float = 1.,
117 |         generator = None,
118 |         noise_predictor = None,
119 |         return_dict: bool = True,
120 |     ) -> Union[SchedulerOutput, Tuple]:
121 |         """
122 |         Predict the sample at the previous timestep by reversing the SDE. Core function to propagate the diffusion
123 |         process from the learned model outputs (most often the predicted noise).
124 | 
125 |         Args:
126 |             model_output (`torch.FloatTensor` or `np.ndarray`): direct output from learned diffusion model.
127 |             timestep (`int`): current discrete timestep in the diffusion chain.
128 |             sample (`torch.FloatTensor` or `np.ndarray`):
129 |                 current instance of sample being created by diffusion process.
130 |             s_churn (`float`)
131 |             s_tmin  (`float`)
132 |             s_tmax  (`float`)
133 |             s_noise (`float`)
134 |             return_dict (`bool`): option for returning tuple rather than SchedulerOutput class
135 | 
136 |         Returns:
137 |             [`~schedulers.scheduling_utils.SchedulerOutput`] or `tuple`:
138 |             [`~schedulers.scheduling_utils.SchedulerOutput`] if `return_dict` is True, otherwise a `tuple`. When
139 |             returning a tuple, the first element is the sample tensor.
140 | 
141 |         """
142 |         if not noise_predictor: print("Noise predictor not provided, result will not be correct.")
143 | 
144 |         index = self.t_to_index(timestep)
145 | 
146 |         sigma = self.sigmas[index]
147 |         gamma = min(s_churn / (len(self.sigmas) - 1), 2 ** 0.5 - 1) if s_tmin <= sigma <= s_tmax else 0.
148 |         eps = torch.randn(sample.size(), dtype=sample.dtype, layout=sample.layout, device=generator.device, generator=generator).to(sample.device) * s_noise
149 |         sigma_hat = sigma * (gamma + 1)
150 |         if gamma > 0:
151 |             sample = sample + eps * (sigma_hat ** 2 - sigma ** 2) ** 0.5
152 |         # 1. compute predicted original sample (x_0) from sigma-scaled predicted noise
153 |         pred_original_sample = sample - sigma_hat * model_output
154 | 
155 |         # 2. Convert to an ODE derivative
156 |         derivative = (sample - pred_original_sample) / sigma_hat
157 |         self.derivatives.append(derivative)
158 | 
159 |         dt = self.sigmas[index + 1] - sigma_hat
160 |         if self.sigmas[index + 1] == 0:
161 |             # Euler method
162 |             sample = sample + derivative * dt
163 |         else:
164 |             # Heun's method
165 |             sample_2 = sample + derivative * dt
166 | 
167 |             if noise_predictor:
168 |                 model_output_2 = noise_predictor(sample_2, self.timesteps[index + 1])
169 |                 pred_original_sample_2 = sample_2 - self.sigmas[index + 1] * model_output_2
170 |             else:
171 |                 pred_original_sample_2 = sample_2 - self.sigmas[index + 1] * model_output
172 | 
173 |             derivative_2 = (sample_2 - pred_original_sample_2) / self.sigmas[index + 1]
174 |             d_prime = (derivative + derivative_2) / 2
175 |             sample = sample + d_prime * dt
176 |         
177 |         prev_sample = sample
178 | 
179 |         if not return_dict:
180 |             return (prev_sample,)
181 | 
182 |         return SchedulerOutput(prev_sample=prev_sample)
183 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/kschedulers/scheduling_utils.py:
--------------------------------------------------------------------------------
  1 | # Copyright 2022 The HuggingFace Team. All rights reserved.
  2 | #
  3 | # Licensed under the Apache License, Version 2.0 (the "License");
  4 | # you may not use this file except in compliance with the License.
  5 | # You may obtain a copy of the License at
  6 | #
  7 | #     http://www.apache.org/licenses/LICENSE-2.0
  8 | #
  9 | # Unless required by applicable law or agreed to in writing, software
 10 | # distributed under the License is distributed on an "AS IS" BASIS,
 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | # See the License for the specific language governing permissions and
 13 | # limitations under the License.
 14 | from dataclasses import dataclass
 15 | from typing import Union
 16 | 
 17 | import numpy as np
 18 | import torch
 19 | 
 20 | SCHEDULER_CONFIG_NAME = "scheduler_config.json"
 21 | 
 22 | class KSchedulerMixin:
 23 |     """
 24 |     Mixin containing common functions for the schedulers.
 25 |     """
 26 | 
 27 |     config_name = SCHEDULER_CONFIG_NAME
 28 |     ignore_for_config = ["tensor_format"]
 29 | 
 30 |     def match_shape(self, values: Union[np.ndarray, torch.Tensor], broadcast_array: Union[np.ndarray, torch.Tensor]):
 31 |         """
 32 |         Turns a 1-D array into an array or tensor with len(broadcast_array.shape) dims.
 33 | 
 34 |         Args:
 35 |             values: an array or tensor of values to extract.
 36 |             broadcast_array: an array with a larger shape of K dimensions with the batch
 37 |                 dimension equal to the length of timesteps.
 38 |         Returns:
 39 |             a tensor of shape [batch_size, 1, ...] where the shape has K dims.
 40 |         """
 41 | 
 42 |         values = values.flatten()
 43 | 
 44 |         while len(values.shape) < len(broadcast_array.shape):
 45 |             values = values[..., None]
 46 |             
 47 |         values = values.to(broadcast_array.device)
 48 | 
 49 |         return values
 50 | 
 51 |     """
 52 |     All the K-Schedulers handle these methods in the same way
 53 |     """
 54 | 
 55 |     def scale_model_input(
 56 |         self, sample: torch.FloatTensor, timestep: Union[float, torch.FloatTensor]
 57 |     ) -> torch.FloatTensor:
 58 |         """
 59 |         Scales the denoising model input by `(sigma**2 + 1) ** 0.5` to match the K-LMS algorithm.
 60 | 
 61 |         Args:
 62 |             sample (`torch.FloatTensor`): input sample
 63 |             timestep (`float` or `torch.FloatTensor`): the current timestep in the diffusion chain
 64 | 
 65 |         Returns:
 66 |             `torch.FloatTensor`: scaled input sample
 67 |         """
 68 |         sigma = self.t_to_sigma(timestep)
 69 |         sample = sample / ((sigma**2 + 1) ** 0.5)
 70 |         return sample
 71 | 
 72 |     def add_noise(
 73 |         self,
 74 |         original_samples: Union[torch.FloatTensor, np.ndarray],
 75 |         noise: Union[torch.FloatTensor, np.ndarray],
 76 |         timesteps: Union[float, torch.FloatTensor],
 77 |     ) -> Union[torch.FloatTensor, np.ndarray]:
 78 |         index = self.t_to_index(timesteps)
 79 |         sigmas = self.match_shape(self.sigmas[index], noise)
 80 |         noisy_samples = original_samples + noise * sigmas
 81 | 
 82 |         return noisy_samples
 83 | 
 84 |     def __len__(self):
 85 |         return self.config.num_train_timesteps
 86 | 
 87 |     """
 88 |     Taken from https://github.com/crowsonkb/k-diffusion/blob/master/k_diffusion/external.py
 89 | 
 90 |     These assume that:
 91 |     len(self.timesteps) is num_inference_steps (not num_train_timesteps)
 92 |     len(self.sigmas) is num_inference_steps (not num_train_timesteps)
 93 |     
 94 |     BUT
 95 |     
 96 |     len(self.log_sigmas) is num_train_timesteps (not num_inference_steps)
 97 |     """
 98 | 
 99 |     def t_to_index(self, timestep):
100 |         self.timesteps = self.timesteps.to(timestep.device)
101 | 
102 |         dists = timestep - self.timesteps
103 |         return dists.abs().argmin().item()
104 | 
105 |     def sigma_to_t(self, sigma, quantize=True):
106 |         self.log_sigmas = self.log_sigmas.to(sigma.device)
107 | 
108 |         log_sigma = sigma.log()
109 |         dists = log_sigma - self.log_sigmas[:, None]
110 |         # Stable Diffusion should be quantized
111 |         if quantize:
112 |             return dists.abs().argmin(dim=0).view(sigma.shape)
113 |         # For continuous distributions
114 |         low_idx = dists.ge(0).cumsum(dim=0).argmax(dim=0).clamp(max=self.log_sigmas.shape[0] - 2)
115 |         high_idx = low_idx + 1
116 |         low, high = self.log_sigmas[low_idx], self.log_sigmas[high_idx]
117 |         w = (low - log_sigma) / (low - high)
118 |         w = w.clamp(0, 1)
119 |         t = (1 - w) * low_idx + w * high_idx
120 |         return t.view(sigma.shape)
121 | 
122 |     def t_to_sigma(self, t):
123 |         t = t.float()
124 |         low_idx, high_idx, w = t.floor().long(), t.ceil().long(), t.frac()
125 |         log_sigma = (1 - w) * self.log_sigmas[low_idx] + w * self.log_sigmas[high_idx]
126 |         return log_sigma.exp()
127 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/latent_debugger.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from sdgrpcserver import images
 4 | 
 5 | DEFAULT_ENABLED = set(
 6 |     [
 7 |         # "initial",
 8 |         # "step",
 9 |         # "mask",
10 |         # "shapednoise",
11 |         # "initnoise",
12 |         # "blendin",
13 |         # "blendout",
14 |         # "small",
15 |         # "hires_lo",
16 |         # "hires_hi",
17 |     ]
18 | )
19 | 
20 | DEFAULT_OUTPUT_PATH = "/tests/debug-out/"
21 | 
22 | 
23 | class LatentDebugger:
24 |     def __init__(self, vae, output_path=DEFAULT_OUTPUT_PATH, enabled=None, prefix=""):
25 |         self.vae = vae
26 |         self.output_path = output_path
27 |         self.enabled = enabled if enabled is not None else DEFAULT_ENABLED
28 |         self.prefix = prefix
29 | 
30 |         self.counters = {}
31 | 
32 |     def log(self, label, i, latents):
33 |         if label not in self.enabled:
34 |             return
35 | 
36 |         prefix = "debug" if not self.prefix else f"debug-{self.prefix}"
37 | 
38 |         self.counters[label] = i = self.counters.get(label, 0) + 1
39 | 
40 |         stage_latents = 1 / 0.18215 * latents
41 |         stage_image = self.vae.decode(stage_latents).sample
42 |         stage_image = (stage_image / 2 + 0.5).clamp(0, 1).cpu()
43 | 
44 |         for j, pngBytes in enumerate(images.toPngBytes(stage_image)):
45 |             path = os.path.join(self.output_path, f"{prefix}-{label}-{j}-{i}.png")
46 |             with open(path, "wb") as f:
47 |                 f.write(pngBytes)
48 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/model_utils.py:
--------------------------------------------------------------------------------
  1 | from copy import deepcopy
  2 | from typing import Literal
  3 | 
  4 | import torch
  5 | from accelerate.hooks import ModelHook, add_hook_to_module
  6 | from accelerate.utils import send_to_device, set_module_tensor_to_device
  7 | 
  8 | 
  9 | class CloneToGPUHook(ModelHook):
 10 |     def __init__(self, execution_device, exclusion_set, top, params, buffers):
 11 |         self.execution_device = execution_device
 12 |         self.exclusion_set = exclusion_set
 13 |         self.top = top
 14 |         self.params = params
 15 |         self.buffers = buffers
 16 | 
 17 |     def pre_forward(self, module, *args, **kwargs):
 18 |         if self.exclusion_set:
 19 |             self.exclusion_set.activate(self.top)
 20 | 
 21 |         dev = self.execution_device
 22 | 
 23 |         for name, param in module.named_parameters(recurse=False):
 24 |             if param.device == torch.device("meta"):
 25 |                 # explicitly copy, as set_module_tensor_to_device won't create
 26 |                 # a copy if the device is already correct
 27 |                 new_param = self.params[name].to(dev, copy=True)
 28 |                 set_module_tensor_to_device(module, name, dev, new_param)
 29 | 
 30 |         for name, buffer in module.named_buffers(recurse=False):
 31 |             if buffer.device == torch.device("meta"):
 32 |                 new_buffer = self.buffers[name].to(dev, copy=True)
 33 |                 set_module_tensor_to_device(module, name, dev, new_buffer)
 34 | 
 35 |         return (
 36 |             send_to_device(args, dev),
 37 |             send_to_device(kwargs, dev),
 38 |         )
 39 | 
 40 |     def reset(self, model):
 41 |         for name in self.params.keys():
 42 |             set_module_tensor_to_device(model, name, "meta")
 43 |         for name in self.buffers.keys():
 44 |             set_module_tensor_to_device(model, name, "meta")
 45 | 
 46 | 
 47 | class GPUExclusionSet:
 48 |     def __init__(self, max_activated=-1):
 49 |         self.sets = []
 50 |         self.activated = []
 51 |         self.max_activated = max_activated
 52 | 
 53 |     def add(self, top):
 54 |         models = [
 55 |             model
 56 |             for _, model in top.named_modules()
 57 |             if hasattr(model, "_hf_hook") and isinstance(model._hf_hook, CloneToGPUHook)
 58 |         ]
 59 | 
 60 |         self.sets.append((top, models))
 61 | 
 62 |     def reset(self, exclude=[]):
 63 |         exclude = list(exclude)
 64 | 
 65 |         for top, models in self.sets:
 66 |             if top in exclude:
 67 |                 continue
 68 | 
 69 |             for model in models:
 70 |                 model._hf_hook.reset(model)
 71 | 
 72 |     def activate(self, top):
 73 |         # No-op if top is already the most recently activated
 74 |         if self.activated and self.activated[0] is top:
 75 |             return
 76 | 
 77 |         # Update the LRU activated queue
 78 |         self.activated = [model for model in self.activated if model is not top]
 79 |         self.activated.insert(0, top)
 80 |         self.activated = self.activated[: self.max_activated]
 81 | 
 82 |         self.reset(exclude=self.activated)
 83 | 
 84 | 
 85 | def clone_model(
 86 |     model,
 87 |     clone_tensors: Literal["share"] | str | torch.device = "share",
 88 |     exclusion_set=None,
 89 | ):
 90 |     """
 91 |     Copies a model so you get a different set of instances, but they share
 92 |     all their parameters and buffers
 93 |     """
 94 | 
 95 |     # If this isn't actually a model, just return a deepcopy
 96 |     if not isinstance(model, torch.nn.Module):
 97 |         clone = deepcopy(model)
 98 |         if clone_tensors != "share":
 99 |             clone = clone.to(clone_tensors)
100 |         return clone
101 | 
102 |     # Start by pulling all the Tensors out of the model, so they're not copied on deepclone
103 |     cache = {}
104 | 
105 |     for (model_name, source) in model.named_modules():
106 |         model_params = {}
107 |         model_buffers = {}
108 | 
109 |         for name, param in source.named_parameters(recurse=False):
110 |             model_params[name] = param
111 |             source._parameters[name] = None
112 | 
113 |         for name, buffer in source.named_buffers(recurse=False):
114 |             model_buffers[name] = buffer
115 |             source._buffers[name] = None
116 | 
117 |         cache[model_name] = (model_params, model_buffers)
118 | 
119 |     # Deep clone the model
120 |     clone = deepcopy(model)
121 | 
122 |     # Put the tensors back into the model
123 |     for (model_name, dest) in model.named_modules():
124 |         model_params, model_buffers = cache[model_name]
125 | 
126 |         for name, param in model_params.items():
127 |             dest._parameters[name] = param
128 |         for name, buffer in model_buffers.items():
129 |             dest._buffers[name] = buffer
130 | 
131 |     # And into the clone
132 |     # Even if we're not sharing, set it to shared to start with
133 |     for (model_name, dest) in clone.named_modules():
134 |         model_params, model_buffers = cache[model_name]
135 | 
136 |         for name, param in model_params.items():
137 |             dest.register_parameter(name, param)
138 |         for name, buffer in model_buffers.items():
139 |             dest.register_buffer(name, buffer)
140 | 
141 |     if clone_tensors != "share":
142 |         if exclusion_set:
143 |             exclusion_set.add(clone)
144 | 
145 |         for (model_name, dest) in clone.named_modules():
146 |             model_params, model_buffers = cache[model_name]
147 | 
148 |             if exclusion_set:
149 |                 for name in model_params.keys():
150 |                     set_module_tensor_to_device(dest, name, "meta")
151 |                 for name in model_buffers.keys():
152 |                     set_module_tensor_to_device(dest, name, "meta")
153 | 
154 |                 add_hook_to_module(
155 |                     dest,
156 |                     CloneToGPUHook(
157 |                         clone_tensors, exclusion_set, clone, model_params, model_buffers
158 |                     ),
159 |                 )
160 |             else:
161 |                 for name, param in model_params.items():
162 |                     new_param = param.to(clone_tensors, copy=True)
163 |                     set_module_tensor_to_device(dest, name, clone_tensors, new_param)
164 |                 for name, buffer in model_buffers.items():
165 |                     new_buffer = buffer.to(clone_tensors, copy=True)
166 |                     set_module_tensor_to_device(dest, name, clone_tensors, new_buffer)
167 | 
168 |     return clone
169 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/models/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/pipeline/models/__init__.py


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/models/memory_efficient_cross_attention.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Optional
 2 | 
 3 | from torch import nn
 4 | 
 5 | try:
 6 |     import xformers
 7 |     import xformers.ops
 8 | except:
 9 |     xformers = None
10 | 
11 | def has_xformers():
12 |     return xformers is not None
13 | 
14 | # From https://github.com/huggingface/diffusers/pull/532
15 | 
16 | class MemoryEfficientCrossAttention(nn.Module):
17 |     def __init__(self, query_dim, context_dim=None, heads=8, dim_head=64, dropout=0.0):
18 |         super().__init__()
19 |         inner_dim = dim_head * heads
20 |         context_dim = context_dim if context_dim is not None else query_dim
21 | 
22 |         self.heads = heads
23 |         self.dim_head = dim_head
24 | 
25 |         self.to_q = nn.Linear(query_dim, inner_dim, bias=False)
26 |         self.to_k = nn.Linear(context_dim, inner_dim, bias=False)
27 |         self.to_v = nn.Linear(context_dim, inner_dim, bias=False)
28 | 
29 |         self.to_out = nn.Sequential(nn.Linear(inner_dim, query_dim), nn.Dropout(dropout))
30 |         self.attention_op: Optional[Any] = None
31 | 
32 |     def forward(self, x, context=None, mask=None):
33 |         q = self.to_q(x)
34 |         context = context if context is not None else x
35 |         k = self.to_k(context)
36 |         v = self.to_v(context)
37 | 
38 |         b, _, _ = q.shape
39 |         q, k, v = map(
40 |             lambda t: t.unsqueeze(3)
41 |             .reshape(b, t.shape[1], self.heads, self.dim_head)
42 |             .permute(0, 2, 1, 3)
43 |             .reshape(b * self.heads, t.shape[1], self.dim_head)
44 |             .contiguous(),
45 |             (q, k, v),
46 |         )
47 | 
48 |         # actually compute the attention, what we cannot get enough of
49 |         out = xformers.ops.memory_efficient_attention(q, k, v, attn_bias=None, op=self.attention_op)
50 | 
51 |         # TODO: Use this directly in the attention operation, as a bias
52 |         if mask is not None:
53 |             raise NotImplementedError
54 |         out = (
55 |             out.unsqueeze(0)
56 |             .reshape(b, self.heads, out.shape[1], self.dim_head)
57 |             .permute(0, 2, 1, 3)
58 |             .reshape(b, out.shape[1], self.heads * self.dim_head)
59 |         )
60 |         return self.to_out(out)
61 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/models/structured_cross_attention.py:
--------------------------------------------------------------------------------
  1 | # Mostly from https://github.com/shunk031/training-free-structured-diffusion-guidance
  2 | # 
  3 | # Changes:
  4 | #   - _attention changed to _sliced_attention to match Diffusers new(?) argument structure
  5 | 
  6 | 
  7 | from typing import Optional, Tuple
  8 | 
  9 | import torch as th
 10 | from diffusers.models.attention import CrossAttention
 11 | 
 12 | from sdgrpcserver.pipeline.text_embedding.structured_text_embedding import KeyValueTensors
 13 | 
 14 | from einops.layers.torch import Reduce
 15 | 
 16 | class StructuredCrossAttention(CrossAttention):
 17 |     def __init__(
 18 |         self,
 19 |         query_dim: int,
 20 |         context_dim: Optional[int] = None,
 21 |         heads: int = 8,
 22 |         dim_head: int = 64,
 23 |         dropout: int = 0,
 24 |         struct_attention: bool = False,
 25 |     ) -> None:
 26 |         super().__init__(query_dim, context_dim, heads, dim_head, dropout)
 27 |         self.struct_attention = struct_attention
 28 | 
 29 |         self.max_pooling_layer = Reduce(f"b c h w -> 1 c h w", 'max')
 30 | 
 31 | 
 32 |     def struct_qkv(
 33 |         self,
 34 |         q: th.Tensor,
 35 |         context: Tuple[th.Tensor, KeyValueTensors],
 36 |         mask: Optional[th.Tensor] = None,
 37 |     ) -> th.Tensor:
 38 | 
 39 |         assert len(context) == 2 and isinstance(context, tuple)
 40 |         uc_context = context[0]
 41 |         context_k = context[1].k
 42 |         context_v = context[1].v
 43 | 
 44 |         if isinstance(context_k, list) and isinstance(context_v, list):
 45 |             return self.multi_qkv(
 46 |                 q=q,
 47 |                 uc_context=uc_context,
 48 |                 context_k=context_k,
 49 |                 context_v=context_v,
 50 |                 mask=mask,
 51 |             )
 52 |         elif isinstance(context_k, th.Tensor) and isinstance(context_v, th.Tensor):
 53 |             return self.heterogenous_qkv(
 54 |                 q=q,
 55 |                 uc_context=uc_context,
 56 |                 context_k=context_k,
 57 |                 context_v=context_v,
 58 |                 mask=mask,
 59 |             )
 60 |         else:
 61 |             raise NotImplementedError
 62 | 
 63 |     def multi_qkv(
 64 |         self,
 65 |         q: th.Tensor,
 66 |         uc_context: th.Tensor,
 67 |         context_k: th.Tensor,
 68 |         context_v: th.Tensor,
 69 |         mask: Optional[th.Tensor] = None,
 70 |     ) -> None:
 71 |         h = self.heads
 72 |         assert uc_context.size(0) == context_k[0].size(0) == context_v[0].size(0)
 73 |         true_bs = uc_context.size(0)*h
 74 | 
 75 |         k_uc = self.to_k(uc_context)
 76 |         v_uc = self.to_v(uc_context)
 77 | 
 78 |         k_c = [self.to_k(c_k) for c_k in context_k]
 79 |         v_c = [self.to_v(c_v) for c_v in context_v]
 80 |                 
 81 |         q = self.reshape_heads_to_batch_dim(q)
 82 |         k_uc = self.reshape_heads_to_batch_dim(k_uc)
 83 |         v_uc = self.reshape_heads_to_batch_dim(v_uc)
 84 | 
 85 |         k_c = [self.reshape_heads_to_batch_dim(k) for k in k_c]
 86 |         v_c = [self.reshape_heads_to_batch_dim(v) for v in v_c]
 87 | 
 88 |         q_uc = q[:true_bs]
 89 |         q_c = q[true_bs:]
 90 | 
 91 |         sim_uc = th.matmul(q_uc, k_uc.transpose(-1, -2)) * self.scale
 92 |         sim_c = [th.matmul(q_c, k.transpose(-1, -2)) * self.scale for k in k_c]
 93 | 
 94 |         attn_uc = sim_uc.softmax(dim=-1)
 95 |         attn_c = [sim.softmax(dim=-1) for sim in sim_c]
 96 | 
 97 |         out_uc = th.matmul(attn_uc, v_uc)
 98 |         out_c = [th.matmul(attn, v) for attn, v in zip(attn_c, v_c)]
 99 | 
100 |         out_c = sum(out_c) / len(v_c)
101 | 
102 |         out = th.cat([out_uc, out_c])
103 | 
104 |         return self.reshape_batch_dim_to_heads(out)
105 |         
106 |     def normal_qkv(
107 |         self,
108 |         q: th.Tensor,
109 |         context: th.Tensor,
110 |         mask: Optional[th.Tensor] = None,
111 |     ) -> th.Tensor:
112 | 
113 |         batch_size, sequence_length, dim = q.shape
114 | 
115 |         k = self.to_k(context)
116 |         v = self.to_v(context)
117 | 
118 |         q = self.reshape_heads_to_batch_dim(q)
119 |         k = self.reshape_heads_to_batch_dim(k)
120 |         v = self.reshape_heads_to_batch_dim(v)
121 | 
122 |         hidden_states = self._sliced_attention(q, k, v, sequence_length, dim)
123 | 
124 |         return hidden_states
125 | 
126 |     def heterogenous_qkv(
127 |         self,
128 |         q: th.Tensor,
129 |         uc_context: th.Tensor,
130 |         context_k: th.Tensor,
131 |         context_v: th.Tensor,
132 |         mask: Optional[th.Tensor] = None,
133 |     ) -> th.Tensor:
134 | 
135 |         batch_size, sequence_length, dim = q.shape
136 | 
137 |         k = self.to_k(th.cat((uc_context, context_k), dim=0))
138 |         v = self.to_v(th.cat((uc_context, context_v), dim=0))
139 | 
140 |         q = self.reshape_heads_to_batch_dim(q)
141 |         k = self.reshape_heads_to_batch_dim(k)
142 |         v = self.reshape_heads_to_batch_dim(v)
143 | 
144 |         hidden_states = self._sliced_attention(q, k, v, sequence_length, dim)
145 | 
146 |         return hidden_states
147 | 
148 |     def get_kv(self, context: th.Tensor) -> KeyValueTensors:
149 |         return KeyValueTensors(k=self.to_k(context), v=self.to_v(context))
150 | 
151 |     def forward(
152 |         self,
153 |         x: th.Tensor,
154 |         context: Optional[Tuple[th.Tensor, KeyValueTensors]] = None,
155 |         mask: Optional[th.Tensor] = None,
156 |     ) -> th.Tensor:
157 | 
158 |         q = self.to_q(x)
159 | 
160 |         if isinstance(context, tuple):
161 |             assert len(context) == 2
162 |             assert isinstance(context[0], th.Tensor)  # unconditioned embedding
163 |             assert isinstance(context[1], KeyValueTensors)  # conditioned embedding
164 | 
165 |             if self.struct_attention:
166 |                 out = self.struct_qkv(q=q, context=context, mask=mask)
167 |             else:
168 |                 uc_context = context[0]
169 |                 c_full_seq = context[1].k[0].unsqueeze(dim=0)
170 |                 print("n", c_full_seq.shape)
171 |                 out = self.normal_qkv(
172 |                     q=q, context=th.cat((uc_context, c_full_seq), dim=0), mask=mask
173 |                 )
174 |         else:
175 |             ctx = context if context is not None else x
176 |             out = self.normal_qkv(q=q, context=ctx, mask=mask)
177 | 
178 |         return self.to_out(out)
179 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/randtools.py:
--------------------------------------------------------------------------------
  1 | from typing import List, Optional, Sequence
  2 | 
  3 | import torch
  4 | 
  5 | 
  6 | def batched_rand(
  7 |     shape: Sequence[int],
  8 |     generators: List[torch.Generator],
  9 |     device: torch.device,
 10 |     dtype: torch.dtype,
 11 | ) -> torch.Tensor:
 12 | 
 13 |     if shape[0] % len(generators) != 0:
 14 |         raise ValueError(
 15 |             f"shape[0] ({shape[0]}) needs to be a multiple of len(generators) ({len(generators)})"
 16 |         )
 17 | 
 18 |     latents = torch.cat(
 19 |         [
 20 |             torch.rand(
 21 |                 (1, *shape[1:]),
 22 |                 generator=generator,
 23 |                 device=generator.device,
 24 |                 dtype=dtype,
 25 |             )
 26 |             for generator in generators * (shape[0] // len(generators))
 27 |         ],
 28 |         dim=0,
 29 |     )
 30 | 
 31 |     return latents.to(device)
 32 | 
 33 | 
 34 | def batched_randn(
 35 |     shape: Sequence[int],
 36 |     generators: List[torch.Generator],
 37 |     device: torch.device,
 38 |     dtype: torch.dtype,
 39 | ) -> torch.Tensor:
 40 | 
 41 |     if shape[0] % len(generators) != 0:
 42 |         raise ValueError(
 43 |             f"shape[0] ({shape[0]}) needs to be a multiple of len(generators) ({len(generators)})"
 44 |         )
 45 | 
 46 |     latents = torch.cat(
 47 |         [
 48 |             torch.randn(
 49 |                 (1, *shape[1:]),
 50 |                 generator=generator,
 51 |                 device=generator.device,
 52 |                 dtype=dtype,
 53 |             )
 54 |             for generator in generators * (shape[0] // len(generators))
 55 |         ],
 56 |         dim=0,
 57 |     )
 58 | 
 59 |     return latents.to(device)
 60 | 
 61 | 
 62 | class TorchRandOverride:
 63 |     def __init__(self, generators):
 64 |         self.generators = generators
 65 | 
 66 |     def randn_like(
 67 |         self,
 68 |         input: torch.Tensor,
 69 |         *args,
 70 |         dtype: Optional[torch.dtype] = None,
 71 |         device=None,
 72 |         **kwargs,
 73 |     ):
 74 |         if input.shape[0] % len(self.generators) != 0:
 75 |             if dtype:
 76 |                 kwargs["dtype"] = dtype
 77 |             if device:
 78 |                 kwargs["device"] = device
 79 |             return torch.randn_like(input, *args, **kwargs)
 80 | 
 81 |         if device is None:
 82 |             device = input.device
 83 |         if dtype is None:
 84 |             dtype = input.dtype
 85 |         return batched_randn(input.shape, self.generators, device, dtype)
 86 | 
 87 |     def randint_like(
 88 |         self,
 89 |         input,
 90 |         *args,
 91 |         high=None,
 92 |         low=None,
 93 |         dtype=None,
 94 |         layout=torch.strided,
 95 |         device=None,
 96 |         **kwargs,
 97 |     ):
 98 |         if len(args) == 1:
 99 |             high = args[0]
100 |         elif args:
101 |             low = args[0]
102 |             high = args[1]
103 |         if low is None:
104 |             low = 0
105 | 
106 |         if input.shape[0] % len(self.generators) != 0:
107 |             print("Skip")
108 |             return torch.randint_like(
109 |                 input,
110 |                 low=low,
111 |                 high=high,
112 |                 dtype=dtype,
113 |                 layout=layout,
114 |                 device=device,
115 |                 **kwargs,
116 |             )
117 | 
118 |         latents = torch.cat(
119 |             [
120 |                 torch.randint(
121 |                     size=(1, *input.shape[1:]),
122 |                     low=low,
123 |                     high=high,
124 |                     generator=generator,
125 |                     device=generator.device,
126 |                     dtype=dtype,
127 |                 )
128 |                 for generator in self.generators
129 |             ],
130 |             dim=0,
131 |         )
132 | 
133 |         return latents.to(device)
134 | 
135 |     def __getattr__(self, item):
136 |         return getattr(torch, item)
137 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/safety_checkers.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import torch
 3 | import torch.nn as nn
 4 | 
 5 | from transformers import CLIPConfig, CLIPVisionModel, PreTrainedModel
 6 | from transformers.feature_extraction_utils import FeatureExtractionMixin
 7 | 
 8 | def cosine_distance(image_embeds, text_embeds):
 9 |     normalized_image_embeds = nn.functional.normalize(image_embeds)
10 |     normalized_text_embeds = nn.functional.normalize(text_embeds)
11 |     return torch.mm(normalized_image_embeds, normalized_text_embeds.t())
12 | 
13 | class FlagOnlySafetyChecker(PreTrainedModel):
14 |     config_class = CLIPConfig
15 | 
16 |     def __init__(self, config: CLIPConfig):
17 |         super().__init__(config)
18 | 
19 |         self.vision_model = CLIPVisionModel(config.vision_config)
20 |         self.visual_projection = nn.Linear(config.vision_config.hidden_size, config.projection_dim, bias=False)
21 | 
22 |         self.concept_embeds = nn.Parameter(torch.ones(17, config.projection_dim), requires_grad=False)
23 |         self.special_care_embeds = nn.Parameter(torch.ones(3, config.projection_dim), requires_grad=False)
24 | 
25 |         self.register_buffer("concept_embeds_weights", torch.ones(17))
26 |         self.register_buffer("special_care_embeds_weights", torch.ones(3))
27 | 
28 |     def __str__(self):
29 |         return "FlagOnlySafetyChecker"
30 | 
31 |     @torch.no_grad()
32 |     def forward(self, clip_input, images):
33 |         pooled_output = self.vision_model(clip_input)[1]  # pooled_output
34 |         image_embeds = self.visual_projection(pooled_output)
35 | 
36 |         special_cos_dist = cosine_distance(image_embeds, self.special_care_embeds).cpu().numpy()
37 |         cos_dist = cosine_distance(image_embeds, self.concept_embeds).cpu().numpy()
38 | 
39 |         result = []
40 |         batch_size = image_embeds.shape[0]
41 |         for i in range(batch_size):
42 |             result_img = {"special_scores": {}, "special_care": [], "concept_scores": {}, "bad_concepts": []}
43 | 
44 |             # increase this value to create a stronger `nfsw` filter
45 |             # at the cost of increasing the possibility of filtering benign images
46 |             adjustment = 0.0
47 | 
48 |             for concet_idx in range(len(special_cos_dist[0])):
49 |                 concept_cos = special_cos_dist[i][concet_idx]
50 |                 concept_threshold = self.special_care_embeds_weights[concet_idx].item()
51 |                 result_img["special_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3)
52 |                 if result_img["special_scores"][concet_idx] > 0:
53 |                     result_img["special_care"].append({concet_idx, result_img["special_scores"][concet_idx]})
54 |                     adjustment = 0.01
55 | 
56 |             for concet_idx in range(len(cos_dist[0])):
57 |                 concept_cos = cos_dist[i][concet_idx]
58 |                 concept_threshold = self.concept_embeds_weights[concet_idx].item()
59 |                 result_img["concept_scores"][concet_idx] = round(concept_cos - concept_threshold + adjustment, 3)
60 |                 if result_img["concept_scores"][concet_idx] > 0:
61 |                     result_img["bad_concepts"].append(concet_idx)
62 | 
63 |             result.append(result_img)
64 | 
65 |         has_nsfw_concepts = [len(res["bad_concepts"]) > 0 for res in result]
66 |         return images, has_nsfw_concepts
67 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/schedulers/sample_dpmpp_2m.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from tqdm import trange
 3 | 
 4 | 
 5 | @torch.no_grad()
 6 | def sample_dpmpp_2m(
 7 |     model,
 8 |     x,
 9 |     sigmas,
10 |     extra_args=None,
11 |     callback=None,
12 |     disable=None,
13 |     warmup_lms=False,
14 |     ddim_cutoff=0.0,
15 | ):
16 |     """DPM-Solver++(2M)."""
17 |     extra_args = {} if extra_args is None else extra_args
18 |     s_in = x.new_ones([x.shape[0]])
19 |     sigma_fn = lambda t: t.neg().exp()
20 |     t_fn = lambda sigma: sigma.log().neg()
21 |     old_denoised = None
22 | 
23 |     for i in trange(len(sigmas) - 1, disable=disable):
24 |         denoised = model(x, sigmas[i] * s_in, **extra_args)
25 |         if callback is not None:
26 |             callback(
27 |                 {
28 |                     "x": x,
29 |                     "i": i,
30 |                     "sigma": sigmas[i],
31 |                     "sigma_hat": sigmas[i],
32 |                     "denoised": denoised,
33 |                 }
34 |             )
35 |         t, t_next = t_fn(sigmas[i]), t_fn(sigmas[i + 1])
36 |         h = t_next - t
37 |         if old_denoised is None and warmup_lms:
38 |             r = 1 / 2
39 |             s = t + r * h
40 |             x_2 = (sigma_fn(s) / sigma_fn(t)) * x - (-h * r).expm1() * denoised
41 |             denoised_i = model(x_2, sigma_fn(s) * s_in, **extra_args)
42 |         elif sigmas[i + 1] <= ddim_cutoff or old_denoised is None:
43 |             denoised_i = denoised
44 |         else:
45 |             h_last = t - t_fn(sigmas[i - 1])
46 |             r = h_last / h
47 |             denoised_i = (1 + 1 / (2 * r)) * denoised - (1 / (2 * r)) * old_denoised
48 |         x = (sigma_fn(t_next) / sigma_fn(t)) * x - (-h).expm1() * denoised_i
49 |         old_denoised = denoised
50 |     return x
51 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/text_embedding/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | from .text_embedding import TextEmbedding
3 | from .basic_text_embedding import BasicTextEmbedding
4 | from .lpw_text_embedding import LPWTextEmbedding
5 | from .structured_text_embedding import StructuredTextEmbedding, KeyValueTensors
6 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/text_embedding/basic_text_embedding.py:
--------------------------------------------------------------------------------
 1 | from diffusers.utils import logging
 2 | 
 3 | logger = logging.get_logger(__name__)  # pylint: disable=invalid-name
 4 | 
 5 | from .text_embedding import TextEmbedding
 6 | 
 7 | 
 8 | class BasicTextEmbedding(TextEmbedding):
 9 |     def __init__(self, pipe, text_encoder, **kwargs):
10 |         super().__init__(pipe, text_encoder, **kwargs)
11 | 
12 |     def _get_embeddedings(self, strings, label):
13 |         tokenizer = self.tokenizer
14 | 
15 |         max_length = min(
16 |             tokenizer.model_max_length,
17 |             self.text_encoder.config.max_position_embeddings,
18 |         )
19 | 
20 |         # get prompt text embeddings
21 |         text_inputs = tokenizer(
22 |             strings,
23 |             padding="max_length",
24 |             max_length=max_length,
25 |             return_tensors="pt",
26 |         )
27 |         text_input_ids = text_inputs.input_ids
28 | 
29 |         if text_input_ids.shape[-1] > max_length:
30 |             removed_text = tokenizer.batch_decode(text_input_ids[:, max_length:])
31 |             logger.warning(
32 |                 f"The following part of your {label} input was truncated because CLIP can only handle sequences up to "
33 |                 f"{max_length} tokens: {removed_text}"
34 |             )
35 |             text_input_ids = text_input_ids[:, :max_length]
36 | 
37 |         text_embeddings = self.text_encoder(text_input_ids.to(self.device))
38 | 
39 |         return text_embeddings[0]
40 | 
41 |     def get_text_embeddings(self, prompt):
42 |         return self._get_embeddedings(prompt.as_unweighted_string(), "prompt")
43 | 
44 |     def get_uncond_embeddings(self, prompt):
45 |         return self._get_embeddedings(prompt.as_unweighted_string(), "negative prompt")
46 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/text_embedding/text_embedding.py:
--------------------------------------------------------------------------------
 1 | class TextEmbedding:
 2 |     def __init__(self, pipe, text_encoder, **kwargs):
 3 |         self.pipe = pipe
 4 |         self.tokenizer = pipe.tokenizer
 5 |         self.text_encoder = text_encoder
 6 |         self.device = pipe.execution_device
 7 | 
 8 |     def get_text_embeddings(self, prompt):
 9 |         raise NotImplementedError("Not implemented")
10 | 
11 |     def get_uncond_embeddings(self, prompt):
12 |         raise NotImplementedError("Not implemented")
13 | 
14 |     def get_embeddings(self, prompt, uncond_prompt=None):
15 |         """Prompt and negative a both expected to be lists of strings, and matching in length"""
16 |         text_embeddings = self.get_text_embeddings(prompt)
17 |         uncond_embeddings = (
18 |             self.get_uncond_embeddings(uncond_prompt)
19 |             if uncond_prompt is not None
20 |             else None
21 |         )
22 | 
23 |         return (text_embeddings, uncond_embeddings)
24 | 
25 |     def repeat(self, embedding, count):
26 |         bs_embed, seq_len, _ = embedding.shape
27 |         embedding = embedding.repeat(1, count, 1)
28 |         embedding = embedding.view(bs_embed * count, seq_len, -1)
29 | 
30 |         return embedding
31 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/text_embedding/text_encoder_alt_layer.py:
--------------------------------------------------------------------------------
 1 | from typing import Literal
 2 | 
 3 | 
 4 | class TextEncoderAltLayer:
 5 |     def __init__(
 6 |         self,
 7 |         text_encoder,
 8 |         layer: Literal["final", "penultimate"] | int = "final",
 9 |     ):
10 |         self.text_encoder = text_encoder
11 |         self.layer = layer
12 | 
13 |     def __call__(self, input_ids):
14 |         text_embeddings = self.text_encoder(
15 |             input_ids,
16 |             output_hidden_states=(self.layer != "final"),
17 |             return_dict=True,
18 |         )
19 | 
20 |         if self.layer == "final":
21 |             res = text_embeddings.last_hidden_state
22 |         elif self.layer == "penultimate":
23 |             res = self.text_encoder.text_model.final_layer_norm(
24 |                 text_embeddings.hidden_states[-2]
25 |             )
26 |         else:
27 |             res = self.text_encoder.text_model.final_layer_norm(
28 |                 text_embeddings.hidden_states[self.layer]
29 |             )
30 | 
31 |         # text_encoder clients expect tuple of (final layer, pool)
32 |         return (res, None)
33 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/cfg.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | import torch
 4 | 
 5 | from sdgrpcserver.pipeline.unet.types import (
 6 |     EpsTensor,
 7 |     NoisePredictionUNet,
 8 |     ScheduleTimestep,
 9 |     XtTensor,
10 | )
11 | 
12 | 
13 | @dataclass
14 | class CFGChildUnets:
15 |     g: NoisePredictionUNet
16 |     u: NoisePredictionUNet
17 |     f: NoisePredictionUNet
18 | 
19 |     def wrap_all(self, wrapper, *args, **kwargs):
20 |         return CFGChildUnets(
21 |             g=wrapper(self.g, *args, **kwargs),
22 |             u=wrapper(self.u, *args, **kwargs),
23 |             f=wrapper(self.f, *args, **kwargs),
24 |         )
25 | 
26 | 
27 | class CFGUnet_Seperated:
28 |     def __init__(self, cfg_unets: CFGChildUnets, guidance_scale, batch_total):
29 |         self.cfg_unets = cfg_unets
30 |         self.guidance_scale = guidance_scale
31 |         self.batch_total = batch_total
32 | 
33 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor:
34 |         noise_pred_g = self.cfg_unets.g(latents, t)
35 |         noise_pred_u = self.cfg_unets.u(latents, t)
36 | 
37 |         noise_pred = noise_pred_u + self.guidance_scale * (noise_pred_g - noise_pred_u)
38 |         return noise_pred
39 | 
40 | 
41 | class CFGUnet:
42 |     def __init__(self, cfg_unets: CFGChildUnets, guidance_scale, batch_total):
43 |         self.cfg_unets = cfg_unets
44 |         self.guidance_scale = guidance_scale
45 |         self.batch_total = batch_total
46 | 
47 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor:
48 |         latents = torch.cat([latents, latents])
49 | 
50 |         if isinstance(t, torch.Tensor) and t.shape:
51 |             t = torch.cat([t, t])
52 | 
53 |         noise_pred = self.cfg_unets.f(latents, t)
54 |         noise_pred_u, noise_pred_g = noise_pred.chunk(2)
55 | 
56 |         noise_pred = noise_pred_u + self.guidance_scale * (noise_pred_g - noise_pred_u)
57 |         return noise_pred
58 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/core.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | from sdgrpcserver.pipeline.unet.types import (
 4 |     DiffusersUNet,
 5 |     EpsTensor,
 6 |     ScheduleTimestep,
 7 |     XtTensor,
 8 | )
 9 | 
10 | 
11 | class UNetWithEmbeddings:
12 |     def __init__(self, unet: DiffusersUNet, text_embeddings: torch.Tensor):
13 |         self.unet = unet
14 |         self.text_embeddings = text_embeddings
15 | 
16 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor:
17 |         return self.unet(latents, t, encoder_hidden_states=self.text_embeddings).sample
18 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/graft.py:
--------------------------------------------------------------------------------
 1 | from typing import cast
 2 | 
 3 | import torch
 4 | 
 5 | from sdgrpcserver.pipeline.easing import Easing
 6 | from sdgrpcserver.pipeline.randtools import batched_rand
 7 | from sdgrpcserver.pipeline.unet.types import (
 8 |     DiffusersSchedulerUNet,
 9 |     GenericSchedulerUNet,
10 |     KDiffusionSchedulerUNet,
11 |     PX0Tensor,
12 |     XtTensor,
13 | )
14 | 
15 | 
16 | class GraftUnets(GenericSchedulerUNet):
17 |     def __init__(
18 |         self,
19 |         unet_root: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
20 |         unet_top: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
21 |         generators: list[torch.Generator],
22 |     ):
23 |         self.unet_root = unet_root
24 |         self.unet_top = unet_top
25 |         self.generators = generators
26 | 
27 |         self.easing = Easing(floor=0, start=0.1, end=0.3, easing="sine")
28 | 
29 |     def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor:
30 |         p = self.easing.interp(u)
31 | 
32 |         if p <= 0:
33 |             return self.unet_root(latents, __step, u=u)
34 |         elif p >= 1:
35 |             return self.unet_top(latents, __step, u=u)
36 | 
37 |         root = self.unet_root(latents, __step, u=u)
38 |         top = self.unet_top(latents, __step, u=u)
39 | 
40 |         # Build a map of 0..1 like latents
41 |         randmap = batched_rand(top.shape, self.generators, top.device, top.dtype)
42 | 
43 |         # Linear blend between base and graft
44 |         res = cast(type(top), torch.where(randmap >= p, root, top))
45 | 
46 |         return res
47 | 
48 |     @classmethod
49 |     def merge_initial_latents(cls, left, right):
50 |         return left
51 | 
52 |     @classmethod
53 |     def split_result(cls, left, right):
54 |         return right
55 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/hires_fix.py:
--------------------------------------------------------------------------------
  1 | from typing import cast
  2 | 
  3 | import torch
  4 | import torchvision.transforms as T
  5 | 
  6 | from sdgrpcserver import resize_right
  7 | from sdgrpcserver.pipeline.easing import Easing
  8 | from sdgrpcserver.pipeline.randtools import batched_rand
  9 | from sdgrpcserver.pipeline.unet.types import (
 10 |     DiffusersSchedulerUNet,
 11 |     GenericSchedulerUNet,
 12 |     KDiffusionSchedulerUNet,
 13 |     PX0Tensor,
 14 |     XtTensor,
 15 | )
 16 | 
 17 | # Indexes into a shape for the height and width dimensions
 18 | # Negative indexed to work for any number of dimensions
 19 | Hi, Wi = -2, -1
 20 | 
 21 | 
 22 | def pad_like(latents, like, mode="replicate"):
 23 |     wd = like.shape[Wi] - latents.shape[Wi]
 24 |     hd = like.shape[Hi] - latents.shape[Hi]
 25 |     l = wd // 2
 26 |     r = wd - l
 27 |     t = hd // 2
 28 |     b = hd - t
 29 | 
 30 |     pad = torch.nn.functional.pad
 31 | 
 32 |     if isinstance(mode, int | float):
 33 |         return pad(latents, pad=(l, r, t, b), mode="constant", value=mode)
 34 |     else:
 35 |         return pad(latents, pad=(l, r, t, b), mode=mode)
 36 | 
 37 | 
 38 | def resize_nearest(latents, scale_factor=1):
 39 |     hs = int(latents.shape[Hi] * scale_factor)
 40 |     ws = int(latents.shape[Wi] * scale_factor)
 41 | 
 42 |     return T.functional.resize(latents, [hs, ws], T.InterpolationMode.NEAREST)
 43 | 
 44 | 
 45 | def scale_into(latents, target, scale):
 46 |     if scale >= 1:
 47 |         # latents = resize_right.resize(latents, scale_factors=scale, pad_mode="reflect")
 48 |         latents = resize_nearest(latents, scale)
 49 |     else:
 50 |         latents = resize_nearest(latents, scale)
 51 | 
 52 |     # Now crop off anything that's outside target shape, and offset if it's inside target shape
 53 | 
 54 |     # Positive is offset into the shape, negative is crop amount
 55 |     offh = (target.shape[Hi] - latents.shape[Hi]) // 2
 56 |     offw = (target.shape[Wi] - latents.shape[Wi]) // 2
 57 | 
 58 |     if offh < 0:
 59 |         latents = latents[:, :, -offh : -offh + target.shape[Hi], :]
 60 |         offh = 0
 61 | 
 62 |     if offw < 0:
 63 |         latents = latents[:, :, :, -offw : -offw + target.shape[Wi]]
 64 |         offw = 0
 65 | 
 66 |     target[
 67 |         :, :, offh : offh + latents.shape[Hi], offw : offw + latents.shape[Wi]
 68 |     ] = latents
 69 |     return target
 70 | 
 71 | 
 72 | def downscale_into(latents, target, oos_fraction):
 73 |     scale_min = min(
 74 |         target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi]
 75 |     )
 76 |     scale_max = max(
 77 |         target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi]
 78 |     )
 79 | 
 80 |     # At oos_fraction == 1, we want to downscale to completely contain the latent within
 81 |     # the square target - i.e. scale_min. At oos_fraction == 0 we want to downscale to
 82 |     # completely cover the square target - i.e. scale_max
 83 | 
 84 |     scale = scale_min * oos_fraction + scale_max * (1 - oos_fraction)
 85 |     return scale_into(latents, target, scale)
 86 | 
 87 | 
 88 | def upscale_into(latents, target, oos_fraction):
 89 |     scale_min = min(
 90 |         target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi]
 91 |     )
 92 |     scale_max = max(
 93 |         target.shape[Hi] / latents.shape[Hi], target.shape[Wi] / latents.shape[Wi]
 94 |     )
 95 | 
 96 |     # At oos_fraction == 1, we want to upscale to completely cover the
 97 |     # target - i.e. scale_max. At oos_fraction = 0 we want to completely
 98 |     # fit square latent into OOS targe, i.e. scale_min
 99 | 
100 |     scale = scale_max * oos_fraction + scale_min * (1 - oos_fraction)
101 |     return scale_into(latents, target, scale)
102 | 
103 | 
104 | class HiresUnetWrapper(GenericSchedulerUNet):
105 |     def __init__(
106 |         self,
107 |         unet_natural: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
108 |         unet_hires: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
109 |         generators: list[torch.Generator],
110 |         natural_size: torch.Size,
111 |         oos_fraction: float,
112 |         latent_debugger,
113 |     ):
114 |         self.unet_natural = unet_natural
115 |         self.unet_hires = unet_hires
116 |         self.generators = generators
117 |         self.natural_size = natural_size
118 |         self.oos_fraction = oos_fraction
119 | 
120 |         self.easing = Easing(floor=0, start=0, end=0.4, easing="sine")
121 |         self.latent_debugger = latent_debugger
122 | 
123 |     def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor:
124 |         # Linear blend between base and graft
125 |         p = self.easing.interp(u)
126 | 
127 |         lo_in, hi_in = latents.chunk(2)
128 | 
129 |         if isinstance(__step, torch.Tensor) and __step.shape:
130 |             lo_t, hi_t = __step.chunk(2)
131 |         else:
132 |             lo_t = hi_t = __step
133 | 
134 |         hi = self.unet_hires(hi_in, hi_t, u=u)
135 | 
136 |         # Early out if we're passed the graft stage
137 |         if p >= 0.999:
138 |             return cast(type(hi), torch.concat([lo_in, hi]))
139 | 
140 |         *_, h, w = latents.shape
141 |         th, tw = self.natural_size
142 | 
143 |         offseth = (h - th) // 2
144 |         offsetw = (w - tw) // 2
145 | 
146 |         lo_in = lo_in[:, :, offseth : offseth + th, offsetw : offsetw + tw]
147 |         lo = self.unet_natural(lo_in, lo_t, u=u)
148 | 
149 |         # Downscale hi and merge into lo
150 |         hi_downscaled = torch.zeros_like(lo)  # Un-overlapped space is zero
151 |         hi_downscaled = downscale_into(hi, hi_downscaled, self.oos_fraction)
152 | 
153 |         randmap = batched_rand(lo.shape, self.generators, lo.device, lo.dtype)
154 |         lo_merged = torch.where(randmap >= p, lo, hi_downscaled)
155 | 
156 |         # Upscale lo and merge it back into hi
157 |         lo_upscaled = hi.clone()  # Un-overlapped space copied from hi
158 |         lo_upscaled = upscale_into(lo, lo_upscaled, self.oos_fraction)
159 | 
160 |         randmap = batched_rand(hi.shape, self.generators, hi.device, hi.dtype)
161 |         hi_merged = torch.where(randmap >= p, lo_upscaled, hi)
162 | 
163 |         # Expand lo back to full tensor size by wrapping with 0
164 |         lo_expanded = torch.zeros_like(hi_merged)
165 |         lo_expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = lo_merged
166 | 
167 |         self.latent_debugger.log("hires_lo", int(u * 1000), lo_expanded[0:1])
168 |         self.latent_debugger.log("hires_hi", int(u * 1000), hi_merged[0:1])
169 | 
170 |         res = torch.concat([lo_expanded, hi_merged])
171 |         return cast(type(hi), res)
172 | 
173 |     @classmethod
174 |     def image_to_natural(
175 |         cls,
176 |         natural_size: int,
177 |         image: torch.Tensor,
178 |         oos_fraction: float,
179 |         fill=torch.zeros,
180 |     ):
181 |         natural_image_size = (*image.shape[:-2], natural_size, natural_size)
182 |         natural_image = fill(natural_image_size, device=image.device, dtype=image.dtype)
183 | 
184 |         downscale_into(image, natural_image, oos_fraction)
185 |         return natural_image
186 | 
187 |     @classmethod
188 |     def merge_initial_latents(cls, left, right):
189 |         left_resized = torch.zeros_like(right)
190 | 
191 |         *_, th, tw = left.shape
192 |         *_, h, w = right.shape
193 | 
194 |         offseth = (h - th) // 2
195 |         offsetw = (w - tw) // 2
196 | 
197 |         left_resized[:, :, offseth : offseth + th, offsetw : offsetw + tw] = left
198 |         right[:, :, offseth : offseth + th, offsetw : offsetw + tw] = left
199 |         return torch.concat([left_resized, right])
200 | 
201 |     @classmethod
202 |     def split_result(cls, left, right):
203 |         return right.chunk(2)[1]
204 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/hires_fix_other.py:
--------------------------------------------------------------------------------
  1 | from typing import Literal, Sequence, cast
  2 | 
  3 | import torch
  4 | 
  5 | from sdgrpcserver import resize_right
  6 | from sdgrpcserver.pipeline.easing import Easing
  7 | from sdgrpcserver.pipeline.randtools import batched_rand
  8 | from sdgrpcserver.pipeline.unet.types import (
  9 |     DiffusersSchedulerUNet,
 10 |     EpsTensor,
 11 |     GenericSchedulerUNet,
 12 |     KDiffusionSchedulerUNet,
 13 |     PX0Tensor,
 14 |     ScheduleTimestep,
 15 |     XtTensor,
 16 | )
 17 | 
 18 | # from sdgrpcserver.pipeline.unet.types import *
 19 | 
 20 | 
 21 | def match_shape(latents: torch.Tensor, target: torch.Size):
 22 |     # If it's already the right size, just return it
 23 |     if latents.shape[-len(target) :] == target:
 24 |         return latents
 25 | 
 26 |     # Maybe scale it?
 27 |     scale = max(target[0] / latents.shape[2], target[1] / latents.shape[3])
 28 |     if scale != 1:
 29 |         latents = resize_right.resize(latents, scale_factors=scale, pad_mode="reflect")
 30 | 
 31 |     # If we don't need to crop, skip that bit
 32 |     if latents.shape[-len(target) :] == target:
 33 |         return latents
 34 | 
 35 |     offset2 = (latents.shape[2] - target[0]) // 2
 36 |     offset3 = (latents.shape[3] - target[1]) // 2
 37 | 
 38 |     return latents[:, :, offset2 : offset2 + target[0], offset3 : offset3 + target[1]]
 39 | 
 40 | 
 41 | class HiresUnetEpsWrapper:
 42 |     def __init__(
 43 |         self,
 44 |         parent: "HiresUnetWrapper",
 45 |         unet: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
 46 |         target: torch.Size,
 47 |     ):
 48 |         self.parent = parent
 49 |         self.unet = unet
 50 |         self.target = target
 51 | 
 52 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor:
 53 |         if self.parent.mode == "lo":
 54 |             *_, h, w = latents.shape
 55 |             th, tw = self.target
 56 | 
 57 |             offseth = (h - th) // 2
 58 |             offsetw = (w - tw) // 2
 59 | 
 60 |             in_latents = latents[:, :, offseth : offseth + th, offsetw : offsetw + tw]
 61 | 
 62 |             res = self.unet(in_latents, t)
 63 | 
 64 |             expanded = torch.zeros(
 65 |                 (*res.shape[:2], *latents.shape[2:]),
 66 |                 dtype=latents.dtype,
 67 |                 device=latents.device,
 68 |             )
 69 |             expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = res
 70 | 
 71 |             return expanded
 72 | 
 73 |         else:
 74 |             return self.unet(latents, t)
 75 | 
 76 | 
 77 | class HiresUnetGenericWrapper(GenericSchedulerUNet):
 78 |     def __init__(
 79 |         self,
 80 |         parent: "HiresUnetWrapper",
 81 |         unet: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
 82 |         generators: list[torch.Generator],
 83 |         target: torch.Size,
 84 |         latent_debugger,
 85 |     ):
 86 |         self.parent = parent
 87 |         self.unet = unet
 88 |         self.generators = generators
 89 |         self.target = target
 90 | 
 91 |         self.easing = Easing(floor=0, start=0, end=0.3, easing="quartic")
 92 |         self.latent_debugger = latent_debugger
 93 | 
 94 |     def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor:
 95 |         # Linear blend between base and graft
 96 |         p = self.easing.interp(u)
 97 | 
 98 |         lo_in, hi_in = latents.chunk(2)
 99 | 
100 |         if isinstance(__step, torch.Tensor) and __step.shape:
101 |             lo_t, hi_t = __step.chunk(2)
102 |         else:
103 |             lo_t = hi_t = __step
104 | 
105 |         self.parent.mode = "hi"
106 |         hi = self.unet(hi_in, hi_t, u=u)
107 | 
108 |         # Early out if we're passed the graft stage
109 |         if p >= 0.999:
110 |             return cast(type(hi), torch.concat([lo_in, hi]))
111 | 
112 |         *_, h, w = latents.shape
113 |         th, tw = self.target
114 | 
115 |         offseth = (h - th) // 2
116 |         offsetw = (w - tw) // 2
117 | 
118 |         self.parent.mode = "lo"
119 |         lo = self.unet(lo_in, lo_t, u=u)[
120 |             :, :, offseth : offseth + th, offsetw : offsetw + tw
121 |         ]
122 | 
123 |         # Crop hi and merge it back into lo
124 |         hi_crop = hi[:, :, offseth : offseth + th, offsetw : offsetw + tw]
125 | 
126 |         randmap = batched_rand(lo.shape, self.generators, lo.device, lo.dtype)
127 |         lo_merged = torch.where(randmap >= p, lo, hi_crop)
128 | 
129 |         # Scale lo and merge it back into hi
130 |         lo_scaled = match_shape(lo, hi.shape[-2:])
131 | 
132 |         randmap = batched_rand(hi.shape, self.generators, hi.device, hi.dtype)
133 |         hi_merged = torch.where(randmap >= p, lo_scaled, hi)
134 | 
135 |         # Expand lo back to full tensor size by wrapping with 0
136 |         lo_expanded = torch.zeros_like(hi_merged)
137 |         lo_expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = lo_merged
138 | 
139 |         self.latent_debugger.log("hires_lo", int(u * 1000), lo_expanded[0:1])
140 |         self.latent_debugger.log("hires_hi", int(u * 1000), hi_merged[0:1])
141 | 
142 |         res = torch.concat([lo_expanded, hi_merged])
143 |         return cast(type(hi), res)
144 | 
145 | 
146 | class HiresUnetWrapper:
147 |     def __init__(
148 |         self,
149 |         generators: list[torch.Generator],
150 |         target: torch.Size,
151 |         latent_debugger,
152 |     ):
153 |         self.generators = generators
154 |         self.target = target
155 |         self.latent_debugger = latent_debugger
156 | 
157 |         self.mode: Literal["lo", "hi"] = "lo"
158 | 
159 |     def get_eps_wrapper(self, unet):
160 |         return HiresUnetEpsWrapper(self, unet, self.target)
161 | 
162 |     def get_generic_wrapper(self, unet):
163 |         return HiresUnetGenericWrapper(
164 |             self, unet, self.generators, self.target, self.latent_debugger
165 |         )
166 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/hires_fix_resize.py:
--------------------------------------------------------------------------------
  1 | from typing import cast
  2 | 
  3 | import torch
  4 | import torchvision.transforms as T
  5 | 
  6 | from sdgrpcserver import resize_right
  7 | from sdgrpcserver.pipeline.easing import Easing
  8 | from sdgrpcserver.pipeline.randtools import batched_rand
  9 | from sdgrpcserver.pipeline.unet.types import (
 10 |     DiffusersSchedulerUNet,
 11 |     GenericSchedulerUNet,
 12 |     KDiffusionSchedulerUNet,
 13 |     PX0Tensor,
 14 |     XtTensor,
 15 | )
 16 | 
 17 | 
 18 | def match_shape(latents: torch.Tensor, target: torch.Size):
 19 |     # If it's already the right size, just return it
 20 |     if latents.shape[-len(target) :] == target:
 21 |         return latents
 22 | 
 23 |     # Maybe scale it?
 24 |     scale = max(target[0] / latents.shape[2], target[1] / latents.shape[3])
 25 |     if scale != 1:
 26 |         latents = resize_right.resize(latents, scale_factors=scale, pad_mode="reflect")
 27 | 
 28 |     # If we don't need to crop, skip that bit
 29 |     if latents.shape[-len(target) :] == target:
 30 |         return latents
 31 | 
 32 |     offset2 = (latents.shape[2] - target[0]) // 2
 33 |     offset3 = (latents.shape[3] - target[1]) // 2
 34 | 
 35 |     return latents[:, :, offset2 : offset2 + target[0], offset3 : offset3 + target[1]]
 36 | 
 37 | 
 38 | class HiresUnetWrapper(GenericSchedulerUNet):
 39 |     def __init__(
 40 |         self,
 41 |         unet_natural: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
 42 |         unet_hires: DiffusersSchedulerUNet | KDiffusionSchedulerUNet,
 43 |         generators: list[torch.Generator],
 44 |         target: torch.Size,
 45 |         latent_debugger,
 46 |     ):
 47 |         self.unet_natural = unet_natural
 48 |         self.unet_hires = unet_hires
 49 |         self.generators = generators
 50 |         self.target = target
 51 | 
 52 |         self.easing = Easing(floor=0, start=0, end=0.4, easing="sine")
 53 |         self.latent_debugger = latent_debugger
 54 | 
 55 |     def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor:
 56 |         # Linear blend between base and graft
 57 |         p = self.easing.interp(u)
 58 | 
 59 |         lo_in, hi_in = latents.chunk(2)
 60 | 
 61 |         if isinstance(__step, torch.Tensor) and __step.shape:
 62 |             lo_t, hi_t = __step.chunk(2)
 63 |         else:
 64 |             lo_t = hi_t = __step
 65 | 
 66 |         hi = self.unet_hires(hi_in, hi_t, u=u)
 67 | 
 68 |         # Early out if we're passed the graft stage
 69 |         if p >= 0.999:
 70 |             return cast(type(hi), torch.concat([lo_in, hi]))
 71 | 
 72 |         *_, h, w = latents.shape
 73 |         th, tw = self.target
 74 | 
 75 |         offseth = (h - th) // 2
 76 |         offsetw = (w - tw) // 2
 77 | 
 78 |         lo_in = lo_in[:, :, offseth : offseth + th, offsetw : offsetw + tw]
 79 |         lo = self.unet_natural(lo_in, lo_t, u=u)
 80 | 
 81 |         # Crop hi and merge it back into lo
 82 |         scale = min(tw / w, th / h)
 83 | 
 84 |         h_s = int(h * scale)
 85 |         w_s = int(w * scale)
 86 | 
 87 |         offseth2 = (th - h_s) // 2
 88 |         offsetw2 = (tw - w_s) // 2
 89 | 
 90 |         image_slice = (
 91 |             slice(0, None),
 92 |             slice(0, None),
 93 |             slice(offseth2, offseth2 + h_s),
 94 |             slice(offsetw2, offsetw2 + w_s),
 95 |         )
 96 | 
 97 |         hi_crop = torch.zeros_like(lo)
 98 |         # T.functional.resize(hi, [th, tw], T.InterpolationMode.NEAREST)
 99 |         hi_crop[image_slice] = T.functional.resize(
100 |             hi, [h_s, w_s], T.InterpolationMode.NEAREST
101 |         )
102 | 
103 |         # hi_crop = hi[:, :, offseth : offseth + th, offsetw : offsetw + tw]
104 | 
105 |         randmap = batched_rand(lo.shape, self.generators, lo.device, lo.dtype)
106 |         lo_merged = torch.where(randmap >= p, lo, hi_crop)
107 | 
108 |         # Scale lo and merge it back into hi
109 |         lo_scaled = match_shape(lo, hi.shape[-2:])
110 | 
111 |         randmap = batched_rand(hi.shape, self.generators, hi.device, hi.dtype)
112 |         hi_merged = torch.where(randmap >= p, lo_scaled, hi)
113 | 
114 |         # Expand lo back to full tensor size by wrapping with 0
115 |         lo_expanded = torch.zeros_like(hi_merged)
116 |         lo_expanded[:, :, offseth : offseth + th, offsetw : offsetw + tw] = lo_merged
117 | 
118 |         self.latent_debugger.log("hires_lo", int(u * 1000), lo_expanded[0:1])
119 |         self.latent_debugger.log("hires_hi", int(u * 1000), hi_merged[0:1])
120 | 
121 |         res = torch.concat([lo_expanded, hi_merged])
122 |         return cast(type(hi), res)
123 | 
124 |     @classmethod
125 |     def image_to_natural(cls, natural_size: int, image: torch.Tensor, fill=torch.zeros):
126 |         *_, height, width = image.shape
127 |         scale = min(natural_size / width, natural_size / height)
128 | 
129 |         height_scaled = int(height * scale)
130 |         width_scaled = int(width * scale)
131 | 
132 |         offseth = (natural_size - height_scaled) // 2
133 |         offsetw = (natural_size - width_scaled) // 2
134 | 
135 |         image_slice = (
136 |             slice(0, None),
137 |             slice(0, None),
138 |             slice(offseth, offseth + height_scaled),
139 |             slice(offsetw, offsetw + width_scaled),
140 |         )
141 | 
142 |         natural_image_size = (*image.shape[:-2], natural_size, natural_size)
143 | 
144 |         natural_image = fill(natural_image_size, device=image.device, dtype=image.dtype)
145 | 
146 |         natural_image[image_slice] = resize_right.resize(
147 |             image, scale_factors=scale, pad_mode="reflect"
148 |         )
149 | 
150 |         return natural_image
151 | 
152 |     @classmethod
153 |     def merge_initial_latents(cls, left, right):
154 |         left_resized = torch.zeros_like(right)
155 | 
156 |         *_, th, tw = left.shape
157 |         *_, h, w = right.shape
158 | 
159 |         offseth = (h - th) // 2
160 |         offsetw = (w - tw) // 2
161 | 
162 |         left_resized[:, :, offseth : offseth + th, offsetw : offsetw + tw] = left
163 |         return torch.concat([left_resized, right])
164 | 
165 |     @classmethod
166 |     def split_result(cls, left, right):
167 |         return right.chunk(2)[1]
168 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/unet/types.py:
--------------------------------------------------------------------------------
 1 | from abc import abstractmethod
 2 | from typing import NewType, Protocol, overload
 3 | 
 4 | from torch import Tensor
 5 | 
 6 | # Some types to describe the various structures of unet. First some return types
 7 | 
 8 | # An Xt (ie a sample that includes some amount of noise)
 9 | XtTensor = Tensor
10 | # The predicted noise in a sample (eps)
11 | EpsTensor = Tensor
12 | # The predicted X0 (i.e Xt - PredictedNoise)
13 | PX0Tensor = Tensor
14 | 
15 | # Sigma
16 | ScheduleSigma = float | Tensor
17 | # Timestep (from 1000 to 0 usually)
18 | ScheduleTimestep = int | Tensor
19 | # Progress float, range [0..1)
20 | ScheduleProgress = NewType("Progress", float)
21 | 
22 | 
23 | # The Core Diffusers UNet
24 | class DiffusersUNetOutput(Protocol):
25 |     sample: EpsTensor
26 | 
27 | 
28 | class DiffusersUNet(Protocol):
29 |     @abstractmethod
30 |     def __call__(
31 |         self, latents: XtTensor, t: ScheduleTimestep, encoder_hidden_states: Tensor
32 |     ) -> DiffusersUNetOutput:
33 |         raise NotImplementedError
34 | 
35 | 
36 | # A Wrapped UNet where the hidden_state argument inside the wrapping
37 | class NoisePredictionUNet(Protocol):
38 |     @abstractmethod
39 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep) -> EpsTensor:
40 |         raise NotImplementedError
41 | 
42 | 
43 | # A KDiffusion wrapped UNet
44 | class KDiffusionSchedulerUNet(Protocol):
45 |     @abstractmethod
46 |     def __call__(self, latents: XtTensor, sigma: ScheduleSigma, u: float) -> PX0Tensor:
47 |         raise NotImplementedError
48 | 
49 | 
50 | class DiffusersSchedulerUNet(Protocol):
51 |     @abstractmethod
52 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep, u: float) -> XtTensor:
53 |         raise NotImplementedError
54 | 
55 | 
56 | class GenericSchedulerUNet:
57 |     @overload
58 |     @abstractmethod
59 |     def __call__(self, latents: XtTensor, sigma: ScheduleSigma, u: float) -> PX0Tensor:
60 |         pass
61 | 
62 |     @overload
63 |     @abstractmethod
64 |     def __call__(self, latents: XtTensor, t: ScheduleTimestep, u: float) -> XtTensor:
65 |         pass
66 | 
67 |     @abstractmethod
68 |     def __call__(self, latents: XtTensor, __step, u: float) -> PX0Tensor | XtTensor:
69 |         raise NotImplementedError
70 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/vae_approximator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | 
 3 | 
 4 | class VaeApproximator:
 5 |     """Decodes latent data to an approximate representation in RGB.
 6 |     Values determined experimentally for Stable Diffusion 1.4.
 7 |     See https://discuss.huggingface.co/t/decoding-latents-to-rgb-without-upscaling/23204/2
 8 |     """
 9 | 
10 |     # grayscale_factors = torch.tensor([
11 |     #    #    R       G       B
12 |     #    [ 0.342,  0.341,  0.343 ], # L1
13 |     #    [ 0.342,  0.342,  0.340 ], # L2
14 |     #    [-0.110, -0.110, -0.113 ], # L3
15 |     #    [-0.208, -0.209, -0.208 ]  # L4
16 |     # ])
17 | 
18 |     def __init__(
19 |         self, device: torch.device | None = None, dtype: torch.dtype | None = None
20 |     ):
21 |         self.latent_rgb_factors = torch.tensor(
22 |             [
23 |                 #   R        G        B
24 |                 [0.298, 0.207, 0.208],  # L1
25 |                 [0.187, 0.286, 0.173],  # L2
26 |                 [-0.158, 0.189, 0.264],  # L3
27 |                 [-0.184, -0.271, -0.473],  # L4
28 |             ],
29 |             dtype=dtype,
30 |             device=device,
31 |         )
32 | 
33 |     @classmethod
34 |     def for_pipeline(cls, pipeline):
35 |         return cls(device=pipeline.execution_device, dtype=pipeline.unet.dtype)
36 | 
37 |     def __call__(self, latents):
38 |         """Get an RGB JPEG representation of the latent data."""
39 |         self.to(latents.device, latents.dtype)
40 |         return torch.einsum("...lhw,lr -> ...rhw", latents, self.latent_rgb_factors)
41 | 
42 |     def to(self, device, dtype):
43 |         self.latent_rgb_factors = self.latent_rgb_factors.to(device, dtype)
44 | 


--------------------------------------------------------------------------------
/sdgrpcserver/pipeline/xformers_utils.py:
--------------------------------------------------------------------------------
 1 | import functools
 2 | 
 3 | import torch
 4 | from diffusers.utils.import_utils import is_xformers_available
 5 | 
 6 | 
 7 | @functools.cache
 8 | def xformers_mea_available():
 9 |     available = False
10 | 
11 |     if is_xformers_available():
12 |         try:
13 |             from xformers.ops import memory_efficient_attention
14 | 
15 |             # Make sure we can run the memory efficient attention
16 |             _ = memory_efficient_attention(
17 |                 torch.randn((1, 2, 40), device="cuda"),
18 |                 torch.randn((1, 2, 40), device="cuda"),
19 |                 torch.randn((1, 2, 40), device="cuda"),
20 |             )
21 |         except Exception:
22 |             pass
23 |         else:
24 |             available = True
25 | 
26 |     return available
27 | 


--------------------------------------------------------------------------------
/sdgrpcserver/ram_monitor.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import time
 3 | 
 4 | import psutil
 5 | import pynvml
 6 | 
 7 | 
 8 | def mb(v):
 9 |     return f"{v / 1024 / 1024 :.2f}MB"
10 | 
11 | 
12 | UPDATE_PERIOD = 0.001
13 | 
14 | 
15 | class RamMonitor(threading.Thread):
16 |     stop_flag = False
17 |     ram_current = 0
18 |     ram_max_usage = 0
19 |     vram_current = 0
20 |     vram_max_usage = 0
21 | 
22 |     total = -1
23 | 
24 |     def __init__(self):
25 |         threading.Thread.__init__(self)
26 | 
27 |     def run(self):
28 |         ps = psutil.Process()
29 | 
30 |         self.loop_lock = threading.Lock()
31 | 
32 |         self.vram = False
33 |         try:
34 |             pynvml.nvmlInit()
35 |             self.vram = True
36 |         except:
37 |             print("Unable to initialize NVIDIA management. No VRAM stats. \n")
38 |             return
39 | 
40 |         print("Recording max memory usage...")
41 | 
42 |         self.ram_total = psutil.virtual_memory().total
43 | 
44 |         handle = pynvml.nvmlDeviceGetHandleByIndex(0)
45 |         self.vram_total = pynvml.nvmlDeviceGetMemoryInfo(handle).total
46 | 
47 |         print(f"Total available RAM: {mb(self.ram_total)}, VRAM: {mb(self.vram_total)}")
48 | 
49 |         while not self.stop_flag:
50 |             self.ram_current = ps.memory_info().rss
51 |             self.ram_max_usage = max(self.ram_max_usage, self.ram_current)
52 | 
53 |             self.vram_current = pynvml.nvmlDeviceGetMemoryInfo(handle).used
54 |             self.vram_max_usage = max(self.vram_max_usage, self.vram_current)
55 | 
56 |             if self.loop_lock.locked():
57 |                 self.loop_lock.release()
58 | 
59 |             time.sleep(UPDATE_PERIOD)
60 | 
61 |         print("Stopped recording.")
62 |         pynvml.nvmlShutdown()
63 | 
64 |     def print(self):
65 |         # Wait for the update loop to run at least once
66 |         self.loop_lock.acquire(timeout=0.5)
67 |         print(
68 |             f"Current RAM: {mb(self.ram_current)}, VRAM: {mb(self.vram_current)} | "
69 |             f"Peak RAM: {mb(self.ram_max_usage)}, VRAM: {mb(self.vram_max_usage)}"
70 |         )
71 | 
72 |     def read(self):
73 |         return dict(
74 |             ram_max=self.ram_max_usage,
75 |             ram_total=self.ram_total,
76 |             vram_max=self.vram_max_usage,
77 |             vram_total=self.vram_total,
78 |         )
79 | 
80 |     def read_and_reset(self):
81 |         result = self.read()
82 |         self.vram_current = self.ram_current = 0
83 |         self.vram_max_usage = self.ram_max_usage = 0
84 |         return result
85 | 
86 |     def stop(self):
87 |         self.stop_flag = True
88 | 
89 |     def read_and_stop(self):
90 |         self.stop()
91 |         return self.read()
92 | 


--------------------------------------------------------------------------------
/sdgrpcserver/resize_right.py:
--------------------------------------------------------------------------------
 1 | # Redirect to the embedded git submodule
 2 | 
 3 | import sys
 4 | from typing import TypeVar, cast
 5 | 
 6 | import numpy as np
 7 | import torch
 8 | 
 9 | from sdgrpcserver.src.ResizeRight import interp_methods
10 | 
11 | sys.modules["interp_methods"] = interp_methods
12 | from sdgrpcserver.src.ResizeRight import resize_right  # noqa: E402
13 | 
14 | T = TypeVar("T", bound=torch.Tensor | np.ndarray)
15 | 
16 | 
17 | def resize(
18 |     input: T,
19 |     scale_factors=None,
20 |     out_shape=None,
21 |     interp_method=interp_methods.cubic,
22 |     support_sz=None,
23 |     antialiasing=True,
24 |     by_convs=False,
25 |     scale_tolerance=None,
26 |     max_numerator=10,
27 |     pad_mode="constant",
28 | ) -> T:
29 |     result = resize_right.resize(
30 |         input,
31 |         scale_factors=scale_factors,
32 |         out_shape=out_shape,
33 |         interp_method=interp_method,
34 |         support_sz=support_sz,
35 |         antialiasing=antialiasing,
36 |         by_convs=by_convs,
37 |         scale_tolerance=scale_tolerance,
38 |         max_numerator=max_numerator,
39 |         pad_mode=pad_mode,
40 |     )
41 | 
42 |     if isinstance(result, torch.Tensor):
43 |         result = result.to(cast(torch.Tensor, input).dtype)
44 |     else:
45 |         result = result.astype(cast(np.ndarray, input).dtype)
46 | 
47 |     return cast(T, result)
48 | 


--------------------------------------------------------------------------------
/sdgrpcserver/services/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/services/__init__.py


--------------------------------------------------------------------------------
/sdgrpcserver/services/dashboard.py:
--------------------------------------------------------------------------------
 1 | import dashboard_pb2, dashboard_pb2_grpc
 2 | 
 3 | class DashboardServiceServicer(dashboard_pb2_grpc.DashboardServiceServicer):
 4 |     def __init__(self):
 5 |         pass
 6 | 
 7 |     def GetMe(self, request, context):
 8 |         user = dashboard_pb2.User()
 9 |         user.id="0000-0000-0000-0001"
10 |         return user
11 | 


--------------------------------------------------------------------------------
/sdgrpcserver/services/engines.py:
--------------------------------------------------------------------------------
 1 | import inspect
 2 | 
 3 | import engines_pb2
 4 | import engines_pb2_grpc
 5 | import generation_pb2
 6 | 
 7 | 
 8 | class EnginesServiceServicer(engines_pb2_grpc.EnginesServiceServicer):
 9 |     def __init__(self, manager):
10 |         self._manager = manager
11 | 
12 |     def ListEngines(self, request, context):
13 |         engines = engines_pb2.Engines()
14 | 
15 |         all_noise_types = [
16 |             generation_pb2.SAMPLER_NOISE_NORMAL,
17 |             generation_pb2.SAMPLER_NOISE_BROWNIAN,
18 |         ]
19 |         normal_only = [generation_pb2.SAMPLER_NOISE_NORMAL]
20 | 
21 |         status = self._manager.getStatus()
22 |         for engine in self._manager.engines:
23 |             if not (
24 |                 engine.get("id", False)
25 |                 and engine.get("enabled", False)
26 |                 and engine.get("visible", False)
27 |             ):
28 |                 continue
29 | 
30 |             info = engines_pb2.EngineInfo()
31 |             info.id = engine["id"]
32 |             info.name = engine["name"]
33 |             info.description = engine["description"]
34 |             info.owner = "stable-diffusion-grpcserver"
35 |             info.ready = status.get(engine["id"], False)
36 |             info.type = engines_pb2.EngineType.PICTURE
37 | 
38 |             if info.ready:
39 |                 pipeline = self._manager._pipelines[engine["id"]]
40 |                 for k, v in pipeline.get_samplers().items():
41 |                     if callable(v):
42 |                         args = set(inspect.signature(v).parameters.keys())
43 | 
44 |                         info.supported_samplers.append(
45 |                             engines_pb2.EngineSampler(
46 |                                 sampler=k,
47 |                                 supports_eta="eta" in args,
48 |                                 supports_churn="churn" in args,
49 |                                 supports_sigma_limits="sigmas" in args
50 |                                 or "sigma_min" in args,
51 |                                 supports_karras_rho="sigmas" in args,
52 |                                 supported_noise_types=all_noise_types
53 |                                 if "noise_sampler" in args
54 |                                 else normal_only,
55 |                             )
56 |                         )
57 |                     else:
58 |                         args = set(inspect.signature(v.step).parameters.keys())
59 | 
60 |                         info.supported_samplers.append(
61 |                             engines_pb2.EngineSampler(
62 |                                 sampler=k, supports_eta="eta" in args
63 |                             )
64 |                         )
65 | 
66 |             engines.engine.append(info)
67 | 
68 |         return engines
69 | 


--------------------------------------------------------------------------------
/sdgrpcserver/sonora/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/sonora/__init__.py


--------------------------------------------------------------------------------
/sdgrpcserver/sonora/__version__.py:
--------------------------------------------------------------------------------
1 | VERSION = (0, 1, 1)
2 | 
3 | __version__ = ".".join(map(str, VERSION))
4 | 


--------------------------------------------------------------------------------
/sdgrpcserver/sonora/aio.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import io
  3 | 
  4 | import aiohttp
  5 | import grpc.experimental.aio
  6 | 
  7 | from sdgrpcserver.sonora import client
  8 | from sdgrpcserver.sonora import protocol
  9 | 
 10 | def insecure_web_channel(url):
 11 |     return WebChannel(url)
 12 | 
 13 | 
 14 | class WebChannel:
 15 |     def __init__(self, url):
 16 |         if not url.startswith("http") and "://" not in url:
 17 |             url = f"http://{url}"
 18 | 
 19 |         self._url = url
 20 | 
 21 |         self._session = aiohttp.ClientSession()
 22 | 
 23 |     async def __aenter__(self):
 24 |         return self
 25 | 
 26 |     async def __aexit__(self, exception_type, exception_value, traceback):
 27 |         await self._session.close()
 28 | 
 29 |     def __await__(self):
 30 |         yield self
 31 | 
 32 |     def unary_unary(self, path, request_serializer, response_deserializer):
 33 |         return UnaryUnaryMulticallable(
 34 |             self._session, self._url, path, request_serializer, response_deserializer
 35 |         )
 36 | 
 37 |     def unary_stream(self, path, request_serializer, response_deserializer):
 38 |         return UnaryStreamMulticallable(
 39 |             self._session, self._url, path, request_serializer, response_deserializer
 40 |         )
 41 | 
 42 |     def stream_unary(self, path, request_serializer, response_deserializer):
 43 |         return client.NotImplementedMulticallable()
 44 | 
 45 |     def stream_stream(self, path, request_serializer, response_deserializer):
 46 |         return client.NotImplementedMulticallable()
 47 | 
 48 | 
 49 | class UnaryUnaryMulticallable(client.Multicallable):
 50 |     def __call__(self, request, timeout=None, metadata=None):
 51 |         call_metadata = self._metadata.copy()
 52 |         if metadata is not None:
 53 |             call_metadata.extend(protocol.encode_headers(metadata))
 54 | 
 55 |         return UnaryUnaryCall(
 56 |             request,
 57 |             timeout,
 58 |             call_metadata,
 59 |             self._rpc_url,
 60 |             self._session,
 61 |             self._serializer,
 62 |             self._deserializer,
 63 |         )
 64 | 
 65 | 
 66 | class UnaryStreamMulticallable(client.Multicallable):
 67 |     def __call__(self, request, timeout=None, metadata=None):
 68 |         call_metadata = self._metadata.copy()
 69 |         if metadata is not None:
 70 |             call_metadata.extend(protocol.encode_headers(metadata))
 71 | 
 72 |         return UnaryStreamCall(
 73 |             request,
 74 |             timeout,
 75 |             call_metadata,
 76 |             self._rpc_url,
 77 |             self._session,
 78 |             self._serializer,
 79 |             self._deserializer,
 80 |         )
 81 | 
 82 | 
 83 | class Call(client.Call):
 84 |     def __enter__(self):
 85 |         return self
 86 | 
 87 |     def __exit__(self, exception_type, exception_value, traceback):
 88 |         if self._response and not self._response.closed:
 89 |             self._response.close()
 90 | 
 91 |     def __del__(self):
 92 |         if self._response and not self._response.closed:
 93 |             self._response.close()
 94 | 
 95 |     async def _get_response(self):
 96 |         if self._response is None:
 97 |             timeout = aiohttp.ClientTimeout(total=self._timeout)
 98 | 
 99 |             self._response = await self._session.post(
100 |                 self._url,
101 |                 data=protocol.wrap_message(
102 |                     False, False, self._serializer(self._request)
103 |                 ),
104 |                 headers=dict(self._metadata),
105 |                 timeout=timeout,
106 |             )
107 | 
108 |             protocol.raise_for_status(self._response.headers)
109 | 
110 |         return self._response
111 | 
112 |     async def initial_metadata(self):
113 |         response = await self._get_response()
114 |         return response.headers.items()
115 | 
116 |     async def trailing_metadata(self):
117 |         return self._trailers
118 | 
119 | 
120 | class UnaryUnaryCall(Call):
121 |     @Call._raise_timeout(asyncio.TimeoutError)
122 |     def __await__(self):
123 |         response = yield from self._get_response().__await__()
124 | 
125 |         data = yield from response.read().__await__()
126 | 
127 |         response.release()
128 | 
129 |         if not data:
130 |             return
131 | 
132 |         buffer = io.BytesIO(data)
133 | 
134 |         messages = protocol.unwrap_message_stream(buffer)
135 | 
136 |         trailers, _, message = next(messages)
137 | 
138 |         if trailers:
139 |             self._trailers = protocol.unpack_trailers(message)
140 |             return
141 |         else:
142 |             result = self._deserializer(message)
143 | 
144 |         try:
145 |             trailers, _, message = next(messages)
146 |         except StopIteration:
147 |             pass
148 |         else:
149 |             if trailers:
150 |                 self._trailers = protocol.unpack_trailers(message)
151 |             else:
152 |                 raise ValueError("UnaryUnary should only return a single message")
153 | 
154 |         protocol.raise_for_status(response.headers)
155 | 
156 |         return result
157 | 
158 | 
159 | class UnaryStreamCall(Call):
160 |     @Call._raise_timeout(asyncio.TimeoutError)
161 |     async def read(self):
162 |         response = await self._get_response()
163 | 
164 |         async for trailers, _, message in protocol.unwrap_message_stream_async(
165 |             response.content
166 |         ):
167 |             if trailers:
168 |                 self._trailers = protocol.unpack_trailers(message)
169 |                 break
170 |             else:
171 |                 return self._deserializer(message)
172 | 
173 |         response.release()
174 | 
175 |         protocol.raise_for_status(response.headers, self._trailers)
176 | 
177 |         return grpc.experimental.aio.EOF
178 | 
179 |     @Call._raise_timeout(asyncio.TimeoutError)
180 |     async def __aiter__(self):
181 |         response = await self._get_response()
182 | 
183 |         async for trailers, _, message in protocol.unwrap_message_stream_async(
184 |             response.content
185 |         ):
186 |             if trailers:
187 |                 self._trailers = protocol.unpack_trailers(message)
188 |                 break
189 |             else:
190 |                 yield self._deserializer(message)
191 | 
192 |         response.release()
193 | 
194 |         protocol.raise_for_status(response.headers, self._trailers)
195 | 


--------------------------------------------------------------------------------
/sdgrpcserver/sonora/echotest.py:
--------------------------------------------------------------------------------
 1 | from google.protobuf.duration_pb2 import Duration
 2 | from sdgrpcserver.sonora import client
 3 | from echo.echo import echo_pb2_grpc, echo_pb2
 4 | 
 5 | c = client.insecure_web_channel("http://localhost:8888")
 6 | x = echo_pb2_grpc.EchoServiceStub(c)
 7 | d = Duration(seconds=1)
 8 | 
 9 | for r in x.ServerStreamingEcho(
10 |     echo_pb2.ServerStreamingEchoRequest(
11 |         message="honk", message_count=10, message_interval=d
12 |     )
13 | ):
14 |     print(r)
15 | 


--------------------------------------------------------------------------------
/sdgrpcserver/sonora/protocol.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | import functools
  3 | import struct
  4 | from urllib.parse import unquote
  5 | 
  6 | import grpc
  7 | 
  8 | 
  9 | _HEADER_FORMAT = ">BI"
 10 | _HEADER_LENGTH = struct.calcsize(_HEADER_FORMAT)
 11 | 
 12 | 
 13 | def _pack_header_flags(trailers, compressed):
 14 |     return (trailers << 7) | (compressed)
 15 | 
 16 | 
 17 | def _unpack_header_flags(flags):
 18 |     trailers = 1 << 7
 19 |     compressed = 1
 20 | 
 21 |     return bool(trailers & flags), bool(compressed & flags)
 22 | 
 23 | 
 24 | def wrap_message(trailers, compressed, message):
 25 |     return (
 26 |         struct.pack(
 27 |             _HEADER_FORMAT, _pack_header_flags(trailers, compressed), len(message)
 28 |         )
 29 |         + message
 30 |     )
 31 | 
 32 | 
 33 | def b64_wrap_message(trailers, compressed, message):
 34 |     return base64.b64encode(wrap_message(trailers, compressed, message))
 35 | 
 36 | 
 37 | def unwrap_message(message):
 38 |     flags, length = struct.unpack(_HEADER_FORMAT, message[:_HEADER_LENGTH])
 39 |     data = message[_HEADER_LENGTH : _HEADER_LENGTH + length]
 40 | 
 41 |     if length != len(data):
 42 |         raise ValueError()
 43 | 
 44 |     trailers, compressed = _unpack_header_flags(flags)
 45 | 
 46 |     return trailers, compressed, data
 47 | 
 48 | 
 49 | def b64_unwrap_message(message):
 50 |     return unwrap_message(base64.b64decode(message))
 51 | 
 52 | 
 53 | def unwrap_message_stream(stream):
 54 |     data = stream.read(_HEADER_LENGTH)
 55 | 
 56 |     while data:
 57 |         flags, length = struct.unpack(_HEADER_FORMAT, data)
 58 |         trailers, compressed = _unpack_header_flags(flags)
 59 | 
 60 |         yield trailers, compressed, stream.read(length)
 61 | 
 62 |         if trailers:
 63 |             break
 64 | 
 65 |         data = stream.read(_HEADER_LENGTH)
 66 | 
 67 | 
 68 | async def unwrap_message_stream_async(stream):
 69 |     data = await stream.readexactly(_HEADER_LENGTH)
 70 | 
 71 |     while data:
 72 |         flags, length = struct.unpack(_HEADER_FORMAT, data)
 73 |         trailers, compressed = _unpack_header_flags(flags)
 74 | 
 75 |         yield trailers, compressed, await stream.readexactly(length)
 76 | 
 77 |         if trailers:
 78 |             break
 79 | 
 80 |         data = await stream.readexactly(_HEADER_LENGTH)
 81 | 
 82 | 
 83 | async def unwrap_message_asgi(receive, decoder=None):
 84 |     buffer = bytearray()
 85 |     waiting = False
 86 |     flags = None
 87 |     length = None
 88 | 
 89 |     while True:
 90 |         event = await receive()
 91 |         assert event["type"].startswith("http.")
 92 | 
 93 |         if decoder:
 94 |             chunk = decoder(event["body"])
 95 |         else:
 96 |             chunk = event["body"]
 97 | 
 98 |         buffer += chunk
 99 | 
100 |         if len(buffer) >= _HEADER_LENGTH:
101 |             if not waiting:
102 |                 flags, length = struct.unpack(_HEADER_FORMAT, buffer[:_HEADER_LENGTH])
103 | 
104 |             if len(buffer) >= _HEADER_LENGTH + length:
105 |                 waiting = False
106 |                 data = buffer[_HEADER_LENGTH : _HEADER_LENGTH + length]
107 |                 trailers, compressed = _unpack_header_flags(flags)
108 | 
109 |                 yield trailers, compressed, data
110 |                 buffer = buffer[_HEADER_LENGTH + length :]
111 |             else:
112 |                 waiting = True
113 | 
114 |         if not event.get("more_body"):
115 |             break
116 | 
117 | 
118 | b64_unwrap_message_asgi = functools.partial(
119 |     unwrap_message_asgi, decoder=base64.b64decode
120 | )
121 | 
122 | 
123 | def pack_trailers(trailers):
124 |     message = []
125 |     for k, v in trailers:
126 |         k = k.lower()
127 |         message.append(f"{k}: {v}\r\n".encode("ascii"))
128 |     return b"".join(message)
129 | 
130 | 
131 | def unpack_trailers(message):
132 |     trailers = []
133 |     for line in message.decode("ascii").splitlines():
134 |         k, v = line.split(":", 1)
135 |         v = v.strip()
136 | 
137 |         trailers.append((k, v))
138 |     return trailers
139 | 
140 | 
141 | def encode_headers(metadata):
142 |     for header, value in metadata:
143 |         if isinstance(value, bytes):
144 |             if not header.endswith("-bin"):
145 |                 raise ValueError("binary headers must have the '-bin' suffix")
146 | 
147 |             value = base64.b64encode(value).decode("ascii")
148 | 
149 |         if isinstance(header, bytes):
150 |             header = header.decode("ascii")
151 | 
152 |         yield header, value
153 | 
154 | 
155 | class WebRpcError(grpc.RpcError):
156 |     _code_to_enum = {code.value[0]: code for code in grpc.StatusCode}  # type: ignore
157 | 
158 |     def __init__(self, code, details, *args, **kwargs):
159 |         super(WebRpcError, self).__init__(*args, **kwargs)
160 | 
161 |         self._code = code
162 |         self._details = details
163 | 
164 |     @classmethod
165 |     def from_metadata(cls, trailers):
166 |         status = int(trailers["grpc-status"])
167 |         details = trailers.get("grpc-message")
168 | 
169 |         code = cls._code_to_enum[status]
170 | 
171 |         return cls(code, details)
172 | 
173 |     def __str__(self):
174 |         return "WebRpcError(status_code={}, details='{}')".format(
175 |             self._code, self._details
176 |         )
177 | 
178 |     def code(self):
179 |         return self._code
180 | 
181 |     def details(self):
182 |         return self._details
183 | 
184 | 
185 | def raise_for_status(headers, trailers=None):
186 |     if trailers:
187 |         metadata = dict(trailers)
188 |     else:
189 |         metadata = headers
190 | 
191 |     if "grpc-status" in metadata and metadata["grpc-status"] != "0":
192 |         metadata = metadata.copy()
193 | 
194 |         if "grpc-message" in metadata:
195 |             metadata["grpc-message"] = unquote(metadata["grpc-message"])
196 | 
197 |         raise WebRpcError.from_metadata(metadata)
198 | 
199 | 
200 | _timeout_units = {
201 |     b"H": 3600.0,
202 |     b"M": 60.0,
203 |     b"S": 1.0,
204 |     b"m": 1 / 1000.0,
205 |     b"u": 1 / 1000000.0,
206 |     b"n": 1 / 1000000000.0,
207 | }
208 | 
209 | 
210 | def parse_timeout(value):
211 |     units = value[-1:]
212 |     coef = _timeout_units[units]
213 |     count = int(value[:-1])
214 |     return count * coef
215 | 
216 | 
217 | def serialize_timeout(seconds):
218 |     if seconds % 3600 == 0:
219 |         value = seconds // 3600
220 |         units = "H"
221 |     elif seconds % 60 == 0:
222 |         value = seconds // 60
223 |         units = "M"
224 |     elif seconds % 1 == 0:
225 |         value = seconds
226 |         units = "S"
227 |     elif seconds * 1000 % 1 == 0:
228 |         value = seconds * 1000
229 |         units = "m"
230 |     elif seconds * 1000000 % 1 == 0:
231 |         value = seconds * 1000000
232 |         units = "u"
233 |     else:
234 |         value = seconds * 1000000000
235 |         units = "n"
236 | 
237 |     return f"{int(value)}{units}"
238 | 


--------------------------------------------------------------------------------
/sdgrpcserver/src/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/sdgrpcserver/src/__init__.py


--------------------------------------------------------------------------------
/sdgrpcserver/utils.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from array import ArrayType
 3 | import io
 4 | import PIL
 5 | from PIL import PngImagePlugin
 6 | import numpy as np
 7 | import cv2 as cv
 8 | import torch
 9 | 
10 | import generation_pb2
11 | 
12 | from sdgrpcserver import images
13 | from sdgrpcserver.pipeline.vae_approximator import VaeApproximator
14 | 
15 | def artifact_to_image(artifact):
16 |     if artifact.type == generation_pb2.ARTIFACT_IMAGE or artifact.type == generation_pb2.ARTIFACT_MASK:
17 |         img = PIL.Image.open(io.BytesIO(artifact.binary))
18 |         return img
19 |     else:
20 |         raise NotImplementedError("Can't convert that artifact to an image")
21 | 
22 | def image_to_artifact(im, artifact_type=generation_pb2.ARTIFACT_IMAGE, meta=None):
23 |     binary=None
24 | 
25 |     if isinstance(im, torch.Tensor):
26 |         im = images.toPIL(im)[0]
27 |  
28 |     if isinstance(im, PIL.Image.Image):
29 |         buf = io.BytesIO()
30 |         info = PngImagePlugin.PngInfo()
31 |         if meta:
32 |             for k, v in meta.items(): info.add_text(k, v)
33 |         im.save(buf, format='PNG', pnginfo=info)
34 |         buf.seek(0)
35 |         binary=buf.getvalue()
36 |     else:
37 |         binary=cv.imencode(".png", im)[1]
38 | 
39 |     return generation_pb2.Artifact(
40 |         type=artifact_type,
41 |         binary=binary,
42 |         mime="image/png"
43 |     )
44 | 
45 | class CallbackImageWrapper:
46 |     def __init__(self, callback, device, dtype):
47 |         self.callback = callback
48 |         self.vae_approximator = VaeApproximator()
49 |     
50 |     def __call__(self, i, t, latents):
51 |         pixels = self.vae_approximator(latents)
52 |         pixels = (pixels / 2 + 0.5).clamp(0, 1)
53 |         self.callback(i, t, pixels)
54 | 


--------------------------------------------------------------------------------
/server.py:
--------------------------------------------------------------------------------
1 | from sdgrpcserver import server
2 | 
3 | if __name__ == "__main__":
4 |     server.main()
5 | 


--------------------------------------------------------------------------------
/stablecabal.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/stablecabal.png


--------------------------------------------------------------------------------
/tests/VRAMUsageMonitor.py:
--------------------------------------------------------------------------------
 1 | 
 2 | import pynvml
 3 | import threading
 4 | import time
 5 | 
 6 | class VRAMUsageMonitor(threading.Thread):
 7 |     stop_flag = False
 8 |     max_usage = 0
 9 |     total = -1
10 | 
11 |     def __init__(self):
12 |         threading.Thread.__init__(self)
13 | 
14 |     def run(self):
15 |         try:
16 |             pynvml.nvmlInit()
17 |         except:
18 |             print(f"Unable to initialize NVIDIA management. No memory stats. \n")
19 |             return
20 |         print(f"Recording max memory usage...\n")
21 |         handle = pynvml.nvmlDeviceGetHandleByIndex(0)
22 |         self.total = pynvml.nvmlDeviceGetMemoryInfo(handle).total
23 |         print(f"Total memory available {self.total}")
24 | 
25 |         while not self.stop_flag:
26 |             m = pynvml.nvmlDeviceGetMemoryInfo(handle)
27 |             self.max_usage = max(self.max_usage, m.used)
28 |             # print(self.max_usage)
29 |             time.sleep(0.1)
30 |         print(f"Stopped recording.\n")
31 |         pynvml.nvmlShutdown()
32 | 
33 |     def read(self):
34 |         return self.max_usage, self.total
35 | 
36 |     def read_and_reset(self):
37 |         max_usage = self.max_usage
38 |         self.max_usage = 0
39 |         return max_usage, self.total
40 | 
41 |     def stop(self):
42 |         self.stop_flag = True
43 | 
44 |     def read_and_stop(self):
45 |         self.stop_flag = True
46 |         return self.max_usage, self.total
47 | 


--------------------------------------------------------------------------------
/tests/batch_independance.py:
--------------------------------------------------------------------------------
 1 | from test_harness import TestHarness, VRAMUsageMonitor, ALGORITHMS
 2 | import os, sys, re, time
 3 | from types import SimpleNamespace as SN
 4 | 
 5 | import torch
 6 | 
 7 | from sdgrpcserver import images
 8 | import generation_pb2, generation_pb2_grpc
 9 | 
10 | class TestRunner(TestHarness):
11 |     """
12 |     Tests to make sure that the unified pipeline it batch-independant.
13 | 
14 |     Batch-independant means that we should get the same results for a single batch of four images,
15 |     two batches of two images each, or four batches of a single image each, so long as the seeds are the same.
16 | 
17 |     e.g [1,2,3,4,] == [1,2], [3,4] == [1], [2], [3], [4]
18 | 
19 |     This should be true both from batches that are created by a single prompt with num_images_per_prompt,
20 |     and for multiple prompts
21 |     """
22 | 
23 | 
24 | 
25 |     def params(self, **extra):
26 |         return {
27 |             "height": 512,
28 |             "width": 512,
29 |             "guidance_scale": 7.5,
30 |             "sampler": ALGORITHMS["plms"],
31 |             "eta": 0,
32 |             "num_inference_steps": 50,
33 |             "seed": -1,
34 |             "strength": 0.8,
35 |             **extra
36 |         }
37 | 
38 |     def test(self):
39 |         with open("image.png", "rb") as file:
40 |             test_image = file.read()
41 |             image = images.fromPngBytes(test_image).to(self.manager.mode.device)
42 | 
43 |         with open("mask.png", "rb") as file:
44 |             test_mask = file.read()
45 |             mask = images.fromPngBytes(test_mask).to(self.manager.mode.device)
46 | 
47 |         def gen(args, prompts, seeds, tag):
48 |             args = {
49 |                 **args, 
50 |                 "prompt": prompts,
51 |                 "seed": seeds, 
52 |                 "num_images_per_prompt": len(seeds) // len(prompts)
53 |             }
54 | 
55 |             suffix=f"{mode}{'_clip' if args['clip_guidance_scale'] > 0 else ''}_{tag}_" 
56 | 
57 |             self.save_output(suffix, self.get_pipeline().generate(**args)[0])
58 | 
59 |         for mode in ["txt2img", "img2img", "inpaint"]:
60 |             for clip_guidance in [1.0, 0]:
61 | 
62 |                 kwargs = self.params(clip_guidance_scale = clip_guidance)
63 |                 if mode == "img2img" or mode == "inpaint": kwargs["init_image"] = image
64 |                 if mode == "inpaint": kwargs["mask_image"] = mask
65 | 
66 |                 # Most common is going to be num_images_per_prompt, so check that first
67 | 
68 |                 for i, seed in enumerate([[420420420, 420420421], [420420420], [420420421]]):
69 |                     gen(kwargs, ["A Crocodile"], seed, f"croc{i}")
70 | 
71 |                 # Then check 2 prompts and 2 images per prompt - first all four              
72 | 
73 |                 seed = [420420420, 420420421, 520520520, 520520521]
74 |                 gen(kwargs, ["A Crocodile", "A Shark"], seed, f"both")
75 | 
76 |                 # Then create the two sharks independantly
77 | 
78 |                 for i, seed in enumerate([[520520520], [520520521]]):
79 |                     gen(kwargs, ["A Shark"], seed, f"shark{i}")
80 |                     
81 | 
82 | runner = TestRunner(engine_path="engines.clip.yaml", prefix=f"bi", vramO=2)
83 | runner.run()
84 | 


--------------------------------------------------------------------------------
/tests/engines.basic.yaml:
--------------------------------------------------------------------------------
 1 | - id: "testengine"
 2 |   default: True
 3 |   enabled: True
 4 |   visible: True
 5 |   name: "Test Engine"
 6 |   description: "Test Engine"
 7 |   class: "UnifiedPipeline"
 8 |   model: "runwayml/stable-diffusion-v1-5"
 9 |   use_auth_token: True
10 |   local_model: "./stable-diffusion-v1-5"
11 |   local_model_fp16: "./stable-diffusion-v1-5-fp16"
12 | 


--------------------------------------------------------------------------------
/tests/engines.clip.yaml:
--------------------------------------------------------------------------------
 1 | - id: "testengine"
 2 |   default: True
 3 |   enabled: True
 4 |   visible: True
 5 |   name: "Test Engine"
 6 |   description: "Test Engine"
 7 |   class: "UnifiedPipeline"
 8 |   model: "runwayml/stable-diffusion-v1-5"
 9 |   use_auth_token: True
10 |   local_model: "./stable-diffusion-v1-5"
11 |   local_model_fp16: "./stable-diffusion-v1-5-fp16"
12 |   overrides:
13 |     inpaint_unet:
14 |       model: "runwayml/stable-diffusion-inpainting"
15 |       subfolder: "unet"
16 |     clip_model: laion/CLIP-ViT-B-32-laion2B-s34B-b79K
17 |     feature_extractor: laion/CLIP-ViT-B-32-laion2B-s34B-b79K
18 | 


--------------------------------------------------------------------------------
/tests/engines.inpaint.yaml:
--------------------------------------------------------------------------------
 1 | - id: "testengine"
 2 |   default: True
 3 |   enabled: True
 4 |   visible: True
 5 |   name: "Test Engine"
 6 |   description: "Test Engine"
 7 |   class: "UnifiedPipeline"
 8 |   model: "runwayml/stable-diffusion-v1-5"
 9 |   use_auth_token: True
10 |   local_model: "./stable-diffusion-v1-5"
11 |   local_model_fp16: "./stable-diffusion-v1-5-fp16"
12 |   overrides:
13 |     inpaint_unet:
14 |       model: "runwayml/stable-diffusion-inpainting"
15 |       local_model: "./stable-diffusion-inpainting"
16 |       local_model_fp16: "./stable-diffusion-inpainting-fp16"
17 |       subfolder: "unet"
18 | 
19 | 


--------------------------------------------------------------------------------
/tests/engines.sd14.yaml:
--------------------------------------------------------------------------------
 1 | - id: "testengine"
 2 |   default: True
 3 |   enabled: True
 4 |   visible: True
 5 |   name: "Test Engine"
 6 |   description: "Test Engine"
 7 |   class: "UnifiedPipeline"
 8 |   model: "CompVis/stable-diffusion-v1-4"
 9 |   use_auth_token: True
10 |   local_model: "./stable-diffusion-v1-4"
11 |   local_model_fp16: "./stable-diffusion-v1-4-fp16"
12 | 


--------------------------------------------------------------------------------
/tests/graftedpaint.engine.yaml:
--------------------------------------------------------------------------------
 1 | - model_id: 'inkpunk-diffusion'
 2 |   model: "Envvi/Inkpunk-Diffusion"
 3 |   has_fp16: False
 4 | 
 5 | - model_id: 'sd2-inpainting'
 6 |   model: 'stabilityai/stable-diffusion-2-inpainting'
 7 |   whitelist: ["unet", "text_encoder"]
 8 | 
 9 | - model_id: "laion-clip-b"
10 |   type: "clip"
11 |   model: "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
12 |   has_fp16: False
13 | - model_id: "laion-clip-l"
14 |   type: "clip"
15 |   model: "laion/CLIP-ViT-L-14-laion2B-s32B-b82K"
16 |   has_fp16: False
17 | 
18 | - id: "justinkpunk"
19 |   default: True
20 |   enabled: True
21 |   name: "Just Inkpunk"
22 |   description: "Just Inkpunk"
23 |   class: "UnifiedPipeline"
24 |   model: "@inkpunk-diffusion"
25 |   overrides:
26 |     clip: 
27 |       model: "@laion-clip-b"
28 | 
29 | - id: "withsd2inpaint"
30 |   default: True
31 |   enabled: True
32 |   name: "Test Engine"
33 |   description: "Test Engine"
34 |   class: "UnifiedPipeline"
35 |   model: "@inkpunk-diffusion"
36 |   overrides:
37 |     inpaint_unet:
38 |       model: "@sd2-inpainting/unet"
39 |     inpaint_text_encoder:
40 |       model: "@sd2-inpainting/text_encoder"
41 |     clip:
42 |       model: "@laion-clip-b"
43 | 
44 | 


--------------------------------------------------------------------------------
/tests/graftedpaint.image_512_512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/graftedpaint.image_512_512.png


--------------------------------------------------------------------------------
/tests/graftedpaint.image_768_600.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/graftedpaint.image_768_600.png


--------------------------------------------------------------------------------
/tests/graftedpaint.image_768_768.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/graftedpaint.image_768_768.png


--------------------------------------------------------------------------------
/tests/graftedpaint.py:
--------------------------------------------------------------------------------
 1 | """
 2 | isort:skip_file
 3 | """
 4 | from test_harness import TestHarness, ALGORITHMS
 5 | from sdgrpcserver import images
 6 | from PIL import Image, ImageOps
 7 | 
 8 | 
 9 | class TestRunner(TestHarness):
10 |     """
11 |     Tests to ensure grafted inpaint works
12 |     """
13 | 
14 |     def params(self, **extra):
15 |         return {
16 |             "height": 768,
17 |             "width": 768,
18 |             "guidance_scale": 7.5,
19 |             "sampler": ALGORITHMS["k_euler_ancestral"],
20 |             "churn": 0.4,
21 |             "karras_rho": 7,
22 |             "num_inference_steps": 64,
23 |             "seed": 420420420,
24 |             "strength": 1.0,
25 |             **extra,
26 |         }
27 | 
28 |     def testres(self, width, height):
29 |         test_image = Image.open(f"graftedpaint.image_{width}_{height}.png")
30 | 
31 |         # Split into 3 channels
32 |         r, g, b, a = test_image.split()
33 |         # Recombine back to RGB image
34 |         test_image = Image.merge("RGB", (r, g, b))
35 |         test_mask = Image.merge("RGB", (a, a, a))
36 |         test_mask = ImageOps.invert(test_mask)
37 | 
38 |         image = images.fromPIL(test_image).to(self.manager.mode.device)
39 |         mask = images.fromPIL(test_mask).to(self.manager.mode.device)
40 | 
41 |         def gen(args, engine, grafted, tag):
42 |             pipeline = self.get_pipeline(engine)
43 |             pipeline._pipeline.set_options({"grafted_inpaint": grafted})
44 |             self.save_output(f"{tag}_{width}_{height}", pipeline.generate(**args)[0])
45 | 
46 |         kwargs = self.params(
47 |             width=width,
48 |             height=height,
49 |             init_image=image,
50 |             mask_image=mask,
51 |             prompt=["An nvinkpunk cat wearing a spacesuit stares at a large moon"],
52 |             seed=[420420420, 420420421, 420420422, 420420423],
53 |             num_images_per_prompt=4,
54 |         )
55 | 
56 |         gen(kwargs, "justinkpunk", False, "ink")
57 |         gen(kwargs, "withsd2inpaint", False, "sd2")
58 |         gen(kwargs, "withsd2inpaint", True, "graft")
59 | 
60 |         clipargs = dict(**kwargs, clip_guidance_scale=0.5)
61 |         clipargs["num_inference_steps"] = 96
62 | 
63 |         gen(clipargs, "justinkpunk", False, "clip_ink")
64 |         gen(clipargs, "withsd2inpaint", False, "clip_sd2")
65 |         gen(clipargs, "withsd2inpaint", True, "clip_graft")
66 | 
67 |     def test(self):
68 |         self.testres(512, 512)
69 |         self.testres(768, 600)
70 |         self.testres(768, 768)
71 | 
72 | 
73 | runner = TestRunner(
74 |     engine_path="graftedpaint.engine.yaml", prefix="graftedpaint", vramO=3
75 | )
76 | runner.run()
77 | 


--------------------------------------------------------------------------------
/tests/happy_path.engines.yaml:
--------------------------------------------------------------------------------
 1 | 
 2 | - model_id: "sd1"
 3 |   model: "runwayml/stable-diffusion-v1-5"
 4 |   local_model: "/weights/stable-diffusion-v1-5"
 5 |   local_model_fp16: "/weights/stable-diffusion-v1-5-fp16"
 6 |   use_auth_token: True
 7 | 
 8 | - model_id: "sd1-inpainting"
 9 |   whitelist: "unet"
10 |   model: "runwayml/stable-diffusion-inpainting"
11 |   local_model: "/weights/stable-diffusion-inpainting"
12 |   local_model_fp16: "/weights/stable-diffusion-inpainting-fp16"
13 |   whitelist: ["unet", "text_encoder"]
14 |   use_auth_token: True
15 | 
16 | - model_id: 'sd2'
17 |   model: 'stabilityai/stable-diffusion-2-base'
18 | 
19 | - model_id: 'sd2-inpainting'
20 |   model: 'stabilityai/stable-diffusion-2-inpainting'
21 |   whitelist: ["unet", "text_encoder"]
22 | 
23 | - model_id: 'sd2v'
24 |   model: 'stabilityai/stable-diffusion-2'
25 | 
26 | - model_id: "laion-clip-b"
27 |   type: "clip"
28 |   model: "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
29 |   has_fp16: False
30 | 
31 | - id: "sd1"
32 |   enabled: True
33 |   name: "Stable Diffusion 1.5"
34 |   description: "Stable Diffusion 1.5"
35 |   class: "UnifiedPipeline"
36 |   model: "@sd1"
37 |   overrides:
38 |     inpaint_unet:
39 |       model: "@sd1-inpainting/unet"
40 |     clip:
41 |       model: "@laion-clip-b"
42 | 
43 | - id: "sd2"
44 |   enabled: True
45 |   name: "Stable Diffusion 2"
46 |   description: "Stable Diffusion 2"
47 |   class: "UnifiedPipeline"
48 |   model: "@sd2"
49 |   overrides:
50 |     inpaint_unet:
51 |       model: "@sd2-inpainting/unet"
52 |     clip:
53 |       model: "@laion-clip-b"
54 | 
55 | - id: "sd2v"
56 |   enabled: True
57 |   name: "Stable Diffusion 2 VPred"
58 |   description: "Stable Diffusion 2"
59 |   class: "UnifiedPipeline"
60 |   model: "@sd2v"
61 |   overrides:
62 |     clip:
63 |       model: "@laion-clip-b"
64 | 
65 | 


--------------------------------------------------------------------------------
/tests/happy_path.image_512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/happy_path.image_512.png


--------------------------------------------------------------------------------
/tests/happy_path.image_768.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/happy_path.image_768.png


--------------------------------------------------------------------------------
/tests/happy_path.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import re
  3 | from collections import OrderedDict
  4 | 
  5 | from PIL import Image, ImageOps
  6 | from test_harness import ALGORITHMS, TestHarness, VRAMUsageMonitor, generation_pb2
  7 | 
  8 | from sdgrpcserver import images
  9 | 
 10 | 
 11 | def load_masked_image(path):
 12 |     test_image = Image.open(path)
 13 | 
 14 |     # Split into 3 channels
 15 |     r, g, b, a = test_image.split()
 16 |     # Recombine back to RGB image
 17 |     test_image = Image.merge("RGB", (r, g, b))
 18 |     test_mask = Image.merge("RGB", (a, a, a))
 19 |     test_mask = ImageOps.invert(test_mask)
 20 | 
 21 |     with io.BytesIO() as output:
 22 |         test_image.save(output, format="PNG")
 23 |         test_image_png = output.getvalue()
 24 | 
 25 |     with io.BytesIO() as output:
 26 |         test_mask.save(output, format="PNG")
 27 |         test_mask_png = output.getvalue()
 28 | 
 29 |     return test_image_png, test_mask_png
 30 | 
 31 | 
 32 | args = OrderedDict()
 33 | args["sampler"] = [
 34 |     {"sampler": "ddim", "eta": 0},
 35 |     {"sampler": "ddim", "eta": 0.8},
 36 |     {"sampler": "plms"},
 37 |     {"sampler": "k_lms"},
 38 |     {"sampler": "k_euler"},
 39 |     {"sampler": "k_euler_ancestral"},
 40 |     {"sampler": "k_heun"},
 41 |     {"sampler": "k_dpm_2"},
 42 |     {"sampler": "k_dpm_2_ancestral"},
 43 |     {"sampler": "dpm_fast"},
 44 |     {"sampler": "dpm_adaptive"},
 45 |     {"sampler": "dpmspp_1"},
 46 |     {"sampler": "dpmspp_2"},
 47 |     {"sampler": "dpmspp_3"},
 48 |     {"sampler": "dpmspp_2s_ancestral"},
 49 |     {"sampler": "dpmspp_sde"},
 50 |     {"sampler": "dpmspp_2m"},
 51 | ]
 52 | args["image"] = [
 53 |     {},
 54 |     {"image": True, "strength": 0.25},
 55 |     {"image": True, "strength": 0.5},
 56 |     {"image": True, "strength": 0.75},
 57 |     {"image": True, "mask": True, "strength": 0.5},
 58 |     {"image": True, "mask": True, "strength": 1},
 59 |     {
 60 |         "image": True,
 61 |         "mask": True,
 62 |         "strength": 1.5,
 63 |     },
 64 | ]
 65 | args["engine"] = [{"engine": "sd1"}, {"engine": "sd2"}, {"engine": "sd2v"}]
 66 | 
 67 | 
 68 | image_by_size = {
 69 |     512: load_masked_image("happy_path.image_512.png"),
 70 |     768: load_masked_image("happy_path.image_768.png"),
 71 | }
 72 | 
 73 | 
 74 | class TestRunner(TestHarness):
 75 |     def __init__(self, combos, *args, **kwargs):
 76 |         super().__init__(*args, **kwargs)
 77 |         self.combos = combos
 78 | 
 79 |     def engine(self, item, request, prompt, parameters):
 80 |         request.engine_id = item["engine"]
 81 | 
 82 |         if item["engine"] == "sd2v":
 83 |             request.image.width = 768
 84 |             request.image.height = 768
 85 | 
 86 |     def sampler(self, item, request, prompt, parameters):
 87 |         request.image.transform.diffusion = ALGORITHMS[item["sampler"]]
 88 | 
 89 |         eta = item.get("eta", None)
 90 |         if eta != None:
 91 |             parameters.sampler.eta = eta
 92 | 
 93 |     def image(self, item, request, prompt, parameters):
 94 |         image, mask = image_by_size[request.image.height]
 95 | 
 96 |         if item.get("image", False):
 97 |             prompt.append(
 98 |                 generation_pb2.Prompt(
 99 |                     parameters=generation_pb2.PromptParameters(init=True),
100 |                     artifact=generation_pb2.Artifact(
101 |                         type=generation_pb2.ARTIFACT_IMAGE, binary=image
102 |                     ),
103 |                 )
104 |             )
105 | 
106 |             parameters.schedule.start = item["strength"]
107 |             parameters.schedule.end = 0.01
108 | 
109 |         if item.get("mask", False):
110 |             prompt.append(
111 |                 generation_pb2.Prompt(
112 |                     artifact=generation_pb2.Artifact(
113 |                         type=generation_pb2.ARTIFACT_MASK, binary=mask
114 |                     )
115 |                 )
116 |             )
117 | 
118 |     def build_combinations(self, args, idx):
119 |         if idx == len(args.keys()) - 1:
120 |             key = list(args.keys())[idx]
121 |             return [{key: item} for item in args[key]]
122 | 
123 |         key = list(args.keys())[idx]
124 |         result = []
125 | 
126 |         for item in args[key]:
127 |             result += [
128 |                 {**combo, key: item} for combo in self.build_combinations(args, idx + 1)
129 |             ]
130 | 
131 |         return result
132 | 
133 |     def test(self):
134 |         combinations = self.build_combinations(self.combos, 0)
135 | 
136 |         for combo in combinations:
137 |             request_id = re.sub("[^\w]+", "_", repr(combo))
138 |             request_id = request_id.strip("_")
139 | 
140 |             prompt = [
141 |                 generation_pb2.Prompt(
142 |                     text="Award wining DSLR photo of a shark in the deep ocean, f2/8 35mm Portra 400, highly detailed, trending on artstation"
143 |                 )
144 |             ]
145 | 
146 |             parameters = generation_pb2.StepParameter()
147 | 
148 |             request = generation_pb2.Request(
149 |                 engine_id="testengine",
150 |                 request_id=request_id,
151 |                 prompt=[],
152 |                 image=generation_pb2.ImageParameters(
153 |                     height=512,
154 |                     width=512,
155 |                     seed=[420420420],  # It's the funny number
156 |                     steps=50,
157 |                     samples=1,
158 |                     parameters=[],
159 |                 ),
160 |             )
161 | 
162 |             if (
163 |                 combo["sampler"]["sampler"] == "plms"
164 |                 and combo["engine"]["engine"] == "sd2v"
165 |             ):
166 |                 continue
167 | 
168 |             for key, item in combo.items():
169 |                 getattr(self, key)(item, request, prompt, parameters)
170 | 
171 |             for part in prompt:
172 |                 request.prompt.append(part)
173 | 
174 |             request.image.parameters.append(parameters)
175 | 
176 |             self.save_output(request_id, self.call_generator(request))
177 | 
178 | 
179 | monitor = VRAMUsageMonitor()
180 | monitor.start()
181 | 
182 | stats = {}
183 | 
184 | for vramO in range(4):
185 |     instance = TestRunner(
186 |         engine_path="happy_path.engines.yaml",
187 |         combos=args,
188 |         prefix=f"hp_{vramO}",
189 |         vramO=vramO,
190 |         monitor=monitor,
191 |     )
192 |     stats[f"run vram-optimisation-level={vramO}"] = instance.run()
193 | 
194 | monitor.stop()
195 | 
196 | print("Stats")
197 | print(repr(stats))
198 | 


--------------------------------------------------------------------------------
/tests/image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/image.png


--------------------------------------------------------------------------------
/tests/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/tests/mask.png


--------------------------------------------------------------------------------
/tests/prompt_weights.engine.yaml:
--------------------------------------------------------------------------------
 1 | - model_id: 'sd2'
 2 |   model: 'stabilityai/stable-diffusion-2-base'
 3 | 
 4 | - model_id: "laion-clip-b"
 5 |   type: "clip"
 6 |   model: "laion/CLIP-ViT-B-32-laion2B-s34B-b79K"
 7 |   has_fp16: False
 8 | - model_id: "laion-clip-h"
 9 |   type: "clip"
10 |   model: "laion/CLIP-ViT-H-14-laion2B-s32B-b79K"
11 |   has_fp16: False
12 | 
13 | - id: "testengine"
14 |   default: True
15 |   enabled: True
16 |   name: "Test Engine"
17 |   description: "Test Engine"
18 |   class: "UnifiedPipeline"
19 |   model: "@sd2"
20 |   overrides:
21 |     clip: 
22 |       model: "@laion-clip-h"
23 | 


--------------------------------------------------------------------------------
/tests/prompt_weights.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from test_harness import ALGORITHMS, TestHarness
 3 | 
 4 | 
 5 | class TestRunner(TestHarness):
 6 |     """
 7 |     Tests to ensure grafted inpaint works
 8 |     """
 9 | 
10 |     def params(self, **extra):
11 |         return {
12 |             "height": 512,
13 |             "width": 512,
14 |             "guidance_scale": 7.5,
15 |             "sampler": ALGORITHMS["k_euler"],
16 |             "num_inference_steps": 64,
17 |             "seed": 420420420,
18 |             **extra,
19 |         }
20 | 
21 |     def test(self):
22 |         def gen(args, tag):
23 |             pipeline = self.get_pipeline()
24 |             self.save_output(f"{tag}", pipeline.generate(**args)[0])
25 | 
26 |         for i in np.linspace(-0.5, 0.5, 5):
27 |             prompt_tokens = [
28 |                 (
29 |                     "So let me tell you a story. One day I was walking under the summer sun. "
30 |                     "I had decided I wanted to take the evening air, and so had left the house around 5pm. "
31 |                     "It had been raining earlier, but in this golden hour the sun was gently warm against my skin. "
32 |                     "As I rounded a corner I had not walked around before I came across a wonderful sight. ",
33 |                     1.0,
34 |                 ),
35 |                 ("A DSLR photo of a meadow filled with ", 1.0),
36 |                 ("daisies", 1.0 + i),
37 |                 (" and ", 1.0),
38 |                 ("tulips", 1.0 - i),
39 |                 (", f/2.8 35mm Portra 400", 1.0),
40 |             ]
41 | 
42 |             kwargs = self.params(
43 |                 prompt=[prompt_tokens],
44 |             )
45 | 
46 |             gen(kwargs, f"{i}")
47 | 
48 |             clipargs = dict(**kwargs, clip_guidance_scale=0.5)
49 |             clipargs["num_inference_steps"] = 96
50 | 
51 |             gen(clipargs, f"clip_{i}")
52 | 
53 | 
54 | runner = TestRunner(
55 |     engine_path="prompt_weights.engine.yaml", prefix="prompt_weights", vramO=3
56 | )
57 | runner.run()
58 | 


--------------------------------------------------------------------------------
/tests/schedulers.py:
--------------------------------------------------------------------------------
 1 | from test_harness import TestHarness, VRAMUsageMonitor, ALGORITHMS
 2 | import os, sys, re, time
 3 | from types import SimpleNamespace as SN
 4 | 
 5 | import torch
 6 | 
 7 | import generation_pb2, generation_pb2_grpc
 8 | 
 9 | class TestRunner(TestHarness):
10 | 
11 |     def params(self, **extra):
12 |         return {
13 |             "height": 512,
14 |             "width": 512,
15 |             "guidance_scale": 7.5,
16 |             "sampler": ALGORITHMS["k_euler_ancestral"],
17 |             "eta": 0,
18 |             "num_inference_steps": 50,
19 |             "seed": -1,
20 |             "strength": 0.8,
21 |             **extra
22 |         }
23 | 
24 |     def test(self):
25 |         prompt = 'anime girl holding a giant NVIDIA Tesla A100 GPU graphics card, Anime Blu-Ray boxart, super high detail'
26 |         seed = self.string_to_seed('hlky')
27 | 
28 |         for name, sampler in ALGORITHMS.items():
29 |             kwargs = self.params(sampler=sampler, seed=seed)
30 |             self.save_output(name, self.get_pipeline('testengine').generate(prompt=prompt, **kwargs)[0])
31 | 
32 | runner = TestRunner(engine_path="engines.sd14.yaml", prefix=f"seed", vramO=2)
33 | runner.run()
34 | 


--------------------------------------------------------------------------------
/tests/test_harness.py:
--------------------------------------------------------------------------------
  1 | """
  2 | isort:skip_file
  3 | """
  4 | 
  5 | import os, sys, re, time, inspect, random
  6 | 
  7 | import yaml
  8 | 
  9 | try:
 10 |     from yaml import CLoader as Loader, CDumper as Dumper
 11 | except ImportError:
 12 |     from yaml import Loader, Dumper
 13 | 
 14 | import torch
 15 | 
 16 | basePath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | sys.path.append(basePath)
 18 | 
 19 | # This line adds the various other module paths into the import searchpath
 20 | from sdgrpcserver.server import main
 21 | 
 22 | from sdgrpcserver.services.generate import GenerationServiceServicer
 23 | from sdgrpcserver.manager import EngineMode, EngineManager
 24 | from sdgrpcserver import images
 25 | 
 26 | import generation_pb2
 27 | 
 28 | from VRAMUsageMonitor import VRAMUsageMonitor
 29 | 
 30 | ALGORITHMS = {
 31 |     "ddim": generation_pb2.SAMPLER_DDIM,
 32 |     "plms": generation_pb2.SAMPLER_DDPM,
 33 |     "k_euler": generation_pb2.SAMPLER_K_EULER,
 34 |     "k_euler_ancestral": generation_pb2.SAMPLER_K_EULER_ANCESTRAL,
 35 |     "k_heun": generation_pb2.SAMPLER_K_HEUN,
 36 |     "k_dpm_2": generation_pb2.SAMPLER_K_DPM_2,
 37 |     "k_dpm_2_ancestral": generation_pb2.SAMPLER_K_DPM_2_ANCESTRAL,
 38 |     "k_lms": generation_pb2.SAMPLER_K_LMS,
 39 |     "dpm_fast": generation_pb2.SAMPLER_DPM_FAST,
 40 |     "dpm_adaptive": generation_pb2.SAMPLER_DPM_ADAPTIVE,
 41 |     "dpmspp_1": generation_pb2.SAMPLER_DPMSOLVERPP_1ORDER,
 42 |     "dpmspp_2": generation_pb2.SAMPLER_DPMSOLVERPP_2ORDER,
 43 |     "dpmspp_3": generation_pb2.SAMPLER_DPMSOLVERPP_3ORDER,
 44 |     "dpmspp_2s_ancestral": generation_pb2.SAMPLER_DPMSOLVERPP_2S_ANCESTRAL,
 45 |     "dpmspp_sde": generation_pb2.SAMPLER_DPMSOLVERPP_SDE,
 46 |     "dpmspp_2m": generation_pb2.SAMPLER_DPMSOLVERPP_2M,
 47 | }
 48 | 
 49 | 
 50 | class FakeContext:
 51 |     def __init__(self, monitor):
 52 |         self.monitor = monitor
 53 | 
 54 |     def add_callback(self, callback):
 55 |         pass
 56 | 
 57 |     def set_code(self, code):
 58 |         print("Test failed")
 59 |         self.monitor.stop()
 60 |         sys.exit(-1)
 61 | 
 62 |     def set_details(self, code):
 63 |         pass
 64 | 
 65 | 
 66 | class TestHarness:
 67 |     def __init__(self, engine_path, vramO=2, monitor=None, prefix=None):
 68 |         self.monitor_is_ours = False
 69 | 
 70 |         if monitor is None:
 71 |             self.monitor_is_ours = True
 72 |             monitor = VRAMUsageMonitor()
 73 | 
 74 |         self.monitor = monitor
 75 | 
 76 |         self.prefix = self.__class__ if prefix is None else prefix
 77 | 
 78 |         with open(os.path.normpath(engine_path), "r") as cfg:
 79 |             engines = yaml.load(cfg, Loader=Loader)
 80 | 
 81 |             self.manager = EngineManager(
 82 |                 engines,
 83 |                 weight_root="../weights/",
 84 |                 mode=EngineMode(
 85 |                     vram_optimisation_level=vramO, enable_cuda=True, enable_mps=False
 86 |                 ),
 87 |                 nsfw_behaviour="ignore",
 88 |                 refresh_on_error=True,
 89 |             )
 90 | 
 91 |             self.manager.loadPipelines()
 92 | 
 93 |     def get_pipeline(self, id="testengine"):
 94 |         return self.manager.getPipe(id)
 95 | 
 96 |     def call_generator(self, request):
 97 |         generator = GenerationServiceServicer(self.manager)
 98 |         context = FakeContext(self.monitor)
 99 | 
100 |         return generator.Generate(request, context)
101 | 
102 |     def string_to_seed(self, string):
103 |         return random.Random(string).randint(0, 2**32 - 1)
104 | 
105 |     def _flatten_outputs(self, output):
106 |         if isinstance(output, list) or inspect.isgenerator(output):
107 |             for item in output:
108 |                 yield from self._flatten_outputs(item)
109 | 
110 |         elif isinstance(output, torch.Tensor):
111 |             if len(output.shape) == 4 and output.shape[0] > 1:
112 |                 yield from output.chunk(output.shape[0], dim=0)
113 |             else:
114 |                 yield output
115 | 
116 |         elif isinstance(output, generation_pb2.Answer):
117 |             yield from self._flatten_outputs(
118 |                 [
119 |                     artifact
120 |                     for artifact in output.artifacts
121 |                     if artifact.type == generation_pb2.ARTIFACT_IMAGE
122 |                 ]
123 |             )
124 | 
125 |         else:
126 |             yield output
127 | 
128 |     def save_output(self, suffix, output):
129 | 
130 |         for i, output in enumerate(self._flatten_outputs(output)):
131 |             path = (
132 |                 f"out/{self.prefix}_{suffix}_{i}.png"
133 |                 if i is not None
134 |                 else f"out/{self.prefix}_{suffix}.png"
135 |             )
136 | 
137 |             if isinstance(output, torch.Tensor):
138 |                 binary = images.toPngBytes(output)[0]
139 |                 with open(path, "wb") as f:
140 |                     f.write(binary)
141 | 
142 |             elif isinstance(output, generation_pb2.Artifact):
143 |                 with open(path, "wb") as f:
144 |                     f.write(output.binary)
145 | 
146 |             else:
147 |                 raise ValueError(
148 |                     f"Don't know how to handle output of class {output.__class__}"
149 |                 )
150 | 
151 |     def run(self):
152 |         if self.monitor_is_ours:
153 |             self.monitor.start()
154 | 
155 |         self.monitor.read_and_reset()
156 |         start_time = time.monotonic()
157 |         print("Running....")
158 |         self.test()
159 |         end_time = time.monotonic()
160 |         used, total = self.monitor.read_and_reset()
161 | 
162 |         if self.monitor_is_ours:
163 |             self.monitor.stop()
164 | 
165 |         runstats = {"vramused": used, "time": end_time - start_time}
166 |         print("Run complete", repr(runstats))
167 | 
168 |         return runstats
169 | 


--------------------------------------------------------------------------------
/weights/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hafriedlander/stable-diffusion-grpcserver/903d966a3ae565811865b5c260497f4d4ed06e17/weights/.gitkeep


--------------------------------------------------------------------------------