├── .dockerignore ├── .github ├── banner.svg ├── client-dalle.png ├── client-glid.png ├── client-select1.png ├── client-select2.png ├── client-select3.png ├── deprecation-banner.svg ├── docker-run.png ├── flow.svg ├── server-onstart.png ├── server-success.png └── server-wait.png ├── .gitignore ├── Dockerfile ├── LICENSE ├── README.md ├── client.ipynb ├── executors ├── clipseg │ ├── Dockerfile │ ├── config.yml │ ├── executor.py │ ├── manifest.yml │ └── requirements.txt ├── dalle │ ├── Dockerfile │ ├── config.yml │ ├── executor │ │ ├── __init__.py │ │ ├── dalle.py │ │ └── dm_helper.py │ ├── manifest.yml │ └── requirements.txt ├── glid3 │ ├── Dockerfile │ ├── clip_blank_encoding.json │ ├── config.yml │ ├── executor.py │ ├── manifest.yml │ └── requirements.txt ├── realesrgan │ ├── Dockerfile │ ├── config.yml │ ├── executor.py │ ├── manifest.yml │ └── requirements.txt ├── stable │ ├── Dockerfile │ ├── config.yml │ ├── executor.py │ ├── manifest.yml │ └── requirements.txt ├── store │ ├── config.yml │ ├── executor.py │ ├── manifest.yml │ └── requirements.txt ├── swinir │ ├── Dockerfile │ ├── config.yml │ ├── executor.py │ ├── manifest.yml │ └── requirements.txt └── waifu │ └── executor.py ├── flow-jcloud.yml ├── flow.yml ├── flow_parser.py ├── k8s_flow ├── dalle │ └── dalle.yml ├── diffusion │ └── diffusion.yml ├── gateway │ └── gateway.yml ├── ingress.yml ├── store │ └── store.yml └── upscaler │ └── upscaler.yml ├── requirements.txt └── start.sh /.dockerignore: -------------------------------------------------------------------------------- 1 | # ignore everything 2 | ** 3 | .* 4 | 5 | # Allow files and directories 6 | !executors/** 7 | !flow.yml 8 | !flow_parser.py 9 | !start.sh 10 | !*requirements.txt 11 | !README.md 12 | -------------------------------------------------------------------------------- /.github/client-dalle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/client-dalle.png -------------------------------------------------------------------------------- /.github/client-glid.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/client-glid.png -------------------------------------------------------------------------------- /.github/client-select1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/client-select1.png -------------------------------------------------------------------------------- /.github/client-select2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/client-select2.png -------------------------------------------------------------------------------- /.github/client-select3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/client-select3.png -------------------------------------------------------------------------------- /.github/deprecation-banner.svg: -------------------------------------------------------------------------------- 1 | <svg fill="none" viewBox="0 0 1000 100" xmlns="http://www.w3.org/2000/svg"> 2 | <foreignObject width="100%" height="100%"> 3 | <div xmlns="http://www.w3.org/1999/xhtml" 4 | style="background-color: #f8d7da; text-align:center; margin-left: auto; margin-right: auto; border-radius: 6px; border-style: solid; border-width: 1px; border-color: #f5c6cb; color: #24292e;"> 5 | <style> p { font-family: sans-serif; font-size: max(1em, 12px); ation: mymove 2s infinite;--> 6 | } @keyframes mymove { from { 7 | color: red; } to { color: yellow; } } </style> 8 | <p> 9 | <a href="fast"> 10 | <!--startmsg--> ⚠️ For cost-efficiency, we no longer host a DALL-E Flow demo server. Click 11 | here to see how you can deploy it for yourself! </a> 12 | </p> 13 | </div> 14 | </foreignObject> 15 | </svg> 16 | -------------------------------------------------------------------------------- /.github/docker-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/docker-run.png -------------------------------------------------------------------------------- /.github/flow.svg: -------------------------------------------------------------------------------- 1 | <svg viewBox="0 0 1191.96875 274" style="max-width: 1191.96875px;" height="274" aria-labelledby="chart-title-mermaid-svg chart-desc-mermaid-svg" role="img" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns="http://www.w3.org/2000/svg" width="100%" id="mermaid-svg"><title id="chart-title-mermaid-svg"></title><desc id="chart-desc-mermaid-svg"></desc><style>#mermaid-svg {font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;fill:#333;}#mermaid-svg .error-icon{fill:hsl(180, 0%, 100%);}#mermaid-svg .error-text{fill:rgb(0, 0, 0);stroke:rgb(0, 0, 0);}#mermaid-svg .edge-thickness-normal{stroke-width:2px;}#mermaid-svg .edge-thickness-thick{stroke-width:3.5px;}#mermaid-svg .edge-pattern-solid{stroke-dasharray:0;}#mermaid-svg .edge-pattern-dashed{stroke-dasharray:3;}#mermaid-svg .edge-pattern-dotted{stroke-dasharray:2;}#mermaid-svg .marker{fill:#a6d8da;stroke:#a6d8da;}#mermaid-svg .marker.cross{stroke:#a6d8da;}#mermaid-svg svg{font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:16px;}#mermaid-svg .label{font-family:"trebuchet ms",verdana,arial,sans-serif;color:#333;}#mermaid-svg .cluster-label text{fill:rgb(0, 0, 0);}#mermaid-svg .cluster-label span{color:rgb(0, 0, 0);}#mermaid-svg .label text,#mermaid-svg span{fill:#333;color:#333;}#mermaid-svg .node rect,#mermaid-svg .node circle,#mermaid-svg .node ellipse,#mermaid-svg .node polygon,#mermaid-svg .node path{fill:#32C8CD;stroke:#fff;stroke-width:1px;}#mermaid-svg .node .label{text-align:center;}#mermaid-svg .node.clickable{cursor:pointer;}#mermaid-svg .arrowheadPath{fill:undefined;}#mermaid-svg .edgePath .path{stroke:#a6d8da;stroke-width:2.0px;}#mermaid-svg .flowchart-link{stroke:#a6d8da;fill:none;}#mermaid-svg .edgeLabel{background-color:hsl(-120, 0%, 100%);text-align:center;}#mermaid-svg .edgeLabel rect{opacity:0.5;background-color:hsl(-120, 0%, 100%);fill:hsl(-120, 0%, 100%);}#mermaid-svg .cluster rect{fill:#EEEDE78C;stroke:none;stroke-width:1px;}#mermaid-svg .cluster text{fill:rgb(0, 0, 0);}#mermaid-svg .cluster span{color:rgb(0, 0, 0);}#mermaid-svg div.mermaidTooltip{position:absolute;text-align:center;max-width:200px;padding:2px;font-family:"trebuchet ms",verdana,arial,sans-serif;font-size:12px;background:hsl(180, 0%, 100%);border:1px solid undefined;border-radius:2px;pointer-events:none;z-index:100;}#mermaid-svg :root{--mermaid-font-family:"trebuchet ms",verdana,arial,sans-serif;}#mermaid-svg .INSPECT>*{stroke:#F29C9F!important;}#mermaid-svg .INSPECT span{stroke:#F29C9F!important;}#mermaid-svg .JOIN_INSPECT>*{stroke:#F29C9F!important;}#mermaid-svg .JOIN_INSPECT span{stroke:#F29C9F!important;}#mermaid-svg .GATEWAY>*{fill:none!important;color:#000!important;stroke:none!important;}#mermaid-svg .GATEWAY span{fill:none!important;color:#000!important;stroke:none!important;}#mermaid-svg .INSPECT_AUX_PASS>*{stroke-dasharray:2 2!important;}#mermaid-svg .INSPECT_AUX_PASS span{stroke-dasharray:2 2!important;}#mermaid-svg .HEADTAIL>*{fill:#32C8CD1D!important;}#mermaid-svg .HEADTAIL span{fill:#32C8CD1D!important;}#mermaid-svg .EXTERNAL>*{fill:#fff!important;stroke:#32C8CD!important;}#mermaid-svg .EXTERNAL span{fill:#fff!important;stroke:#32C8CD!important;}</style><g transform="translate(0, 0)"><marker orient="auto" markerHeight="12" markerWidth="12" markerUnits="userSpaceOnUse" refY="5" refX="9" viewBox="0 0 10 10" class="marker flowchart" id="flowchart-pointEnd"><path style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 0 0 L 10 5 L 0 10 z"></path></marker><marker orient="auto" markerHeight="12" markerWidth="12" markerUnits="userSpaceOnUse" refY="5" refX="0" viewBox="0 0 10 10" class="marker flowchart" id="flowchart-pointStart"><path style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 0 5 L 10 10 L 10 0 z"></path></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5" refX="11" viewBox="0 0 10 10" class="marker flowchart" id="flowchart-circleEnd"><circle style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"></circle></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5" refX="-1" viewBox="0 0 10 10" class="marker flowchart" id="flowchart-circleStart"><circle style="stroke-width: 1; stroke-dasharray: 1, 0;" class="arrowMarkerPath" r="5" cy="5" cx="5"></circle></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5.2" refX="12" viewBox="0 0 11 11" class="marker cross flowchart" id="flowchart-crossEnd"><path style="stroke-width: 2; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 1,1 l 9,9 M 10,1 l -9,9"></path></marker><marker orient="auto" markerHeight="11" markerWidth="11" markerUnits="userSpaceOnUse" refY="5.2" refX="-1" viewBox="0 0 11 11" class="marker cross flowchart" id="flowchart-crossStart"><path style="stroke-width: 2; stroke-dasharray: 1, 0;" class="arrowMarkerPath" d="M 1,1 l 9,9 M 10,1 l -9,9"></path></marker><g class="root"><g class="clusters"></g><g class="edgePaths"><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-gatewaystart LE-clip_encoder" id="L-gatewaystart-clip_encoder-0" d="M59.12702922077922,154L67.22564935064935,164C75.32426948051948,174,91.52150974025973,194,103.78679653679653,204C116.05208333333333,214,124.38541666666667,214,128.55208333333334,214L132.71875,214"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-gatewaystart LE-dalle" id="L-gatewaystart-dalle-0" d="M59.12702922077922,120L67.22564935064935,110C75.32426948051948,100,91.52150974025973,80,112.73210903679653,70C133.94270833333334,60,160.16666666666666,60,186.390625,60C212.61458333333334,60,238.83854166666666,60,256.1171875,60C273.3958333333333,60,281.7291666666667,60,285.8958333333333,60L290.0625,60"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-clip_encoder LE-diffusion" id="L-clip_encoder-diffusion-0" d="M240.0625,214L244.22916666666666,214C248.39583333333334,214,256.7291666666667,214,272.7838541666667,214C288.8385416666667,214,312.6145833333333,214,324.5026041666667,214L336.390625,214"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-dalle LE-rerank" id="L-dalle-rerank-0" d="M552.609375,60L556.7760416666666,60C560.9427083333334,60,569.2760416666666,60,580.6758658008658,70C592.0756899350649,80,606.5420048701299,100,613.7751623376623,110L621.0083198051948,120"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-diffusion LE-rerank" id="L-diffusion-rerank-0" d="M506.28125,214L518.1692708333334,214C530.0572916666666,214,553.8333333333334,214,572.9545116341991,204C592.0756899350649,194,606.5420048701299,174,613.7751623376623,164L621.0083198051948,154"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-rerank LE-upscaler" id="L-rerank-upscaler-0" d="M664,137L668.1666666666666,137C672.3333333333334,137,680.6666666666666,137,689,137C697.3333333333334,137,705.6666666666666,137,709.8333333333334,137L714,137"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-upscaler LE-store" id="L-upscaler-store-0" d="M887.4375,137L891.6041666666666,137C895.7708333333334,137,904.1041666666666,137,912.4375,137C920.7708333333334,137,929.1041666666666,137,933.2708333333334,137L937.4375,137"></path><path marker-end="url(#flowchart-pointEnd)" style="fill:none;" class="edge-thickness-normal edge-pattern-solid flowchart-link LS-store LE-gatewayend" id="L-store-gatewayend-0" d="M1059.25,137L1063.4166666666667,137C1067.5833333333333,137,1075.9166666666667,137,1084.25,137C1092.5833333333333,137,1100.9166666666667,137,1105.0833333333333,137L1109.25,137"></path></g><g class="edgeLabels"><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g><g class="edgeLabel"><g transform="translate(0, 0)" class="label"><foreignObject height="0" width="0"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="edgeLabel"></span></div></foreignObject></g></g></g><g class="nodes"><g transform="translate(929.9375, 77)" class="root"><g class="clusters"><g id="store" class="cluster DEPLOYMENT DEPLOYMENT DEPLOYMENT DEPLOYMENT"><rect height="104" width="121.8125" y="8" x="8" ry="0" rx="0" style=""></rect><g transform="translate(50.7265625, 13)" class="cluster-label"><foreignObject height="19" width="36.359375"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">store</span></div></foreignObject></g></g></g><g class="edgePaths"></g><g class="edgeLabels"></g><g class="nodes"><g transform="translate(68.90625, 60)" id="flowchart-store/rep-0-23" class="node default pod"><rect height="34" width="71.8125" y="-17" x="-35.90625" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-28.40625, -9.5)" style="" class="label"><foreignObject height="19" width="56.8125"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">MyStore</span></div></foreignObject></g></g></g></g><g transform="translate(706.5, 77)" class="root"><g class="clusters"><g id="upscaler" class="cluster DEPLOYMENT DEPLOYMENT DEPLOYMENT DEPLOYMENT"><rect height="104" width="173.4375" y="8" x="8" ry="0" rx="0" style=""></rect><g transform="translate(64.65625, 13)" class="cluster-label"><foreignObject height="19" width="60.125"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">upscaler</span></div></foreignObject></g></g></g><g class="edgePaths"></g><g class="edgeLabels"></g><g class="nodes"><g transform="translate(94.71875, 60)" id="flowchart-upscaler/rep-0-22" class="node default pod"><rect height="34" width="123.4375" y="-17" x="-61.71875" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-54.21875, -9.5)" style="" class="label"><foreignObject height="19" width="108.4375"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">SwinIRUpscaler</span></div></foreignObject></g></g></g></g><g transform="translate(328.890625, 154)" class="root"><g class="clusters"><g id="diffusion" class="cluster DEPLOYMENT DEPLOYMENT DEPLOYMENT DEPLOYMENT"><rect height="104" width="169.890625" y="8" x="8" ry="0" rx="0" style=""></rect><g transform="translate(61.734375, 13)" class="cluster-label"><foreignObject height="19" width="62.421875"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">diffusion</span></div></foreignObject></g></g></g><g class="edgePaths"></g><g class="edgeLabels"></g><g class="nodes"><g transform="translate(92.9453125, 60)" id="flowchart-diffusion/rep-0-21" class="node default pod"><rect height="34" width="119.890625" y="-17" x="-59.9453125" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-52.4453125, -9.5)" style="" class="label"><foreignObject height="19" width="104.890625"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">GLID3Diffusion</span></div></foreignObject></g></g></g></g><g transform="translate(282.5625, 0)" class="root"><g class="clusters"><g id="dalle" class="cluster DEPLOYMENT DEPLOYMENT DEPLOYMENT DEPLOYMENT"><rect height="104" width="262.546875" y="8" x="8" ry="0" rx="0" style=""></rect><g transform="translate(121.53125, 13)" class="cluster-label"><foreignObject height="19" width="35.484375"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">dalle</span></div></foreignObject></g></g></g><g class="edgePaths"></g><g class="edgeLabels"></g><g class="nodes"><g transform="translate(139.2734375, 60)" id="flowchart-dalle/rep-0-20" class="node default pod"><rect height="34" width="212.546875" y="-17" x="-106.2734375" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-98.7734375, -9.5)" style="" class="label"><foreignObject height="19" width="197.546875"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">executors/dalle/config.yml</span></div></foreignObject></g></g></g></g><g transform="translate(45.359375, 137)" id="flowchart-gatewaystart-24" class="node default GATEWAY GATEWAY"><rect height="34" width="74.71875" y="-17" x="-37.359375" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-29.859375, -9.5)" style="" class="label"><foreignObject height="19" width="59.71875"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">gateway</span></div></foreignObject></g></g><g transform="translate(186.390625, 214)" id="flowchart-clip_encoder-27" class="node default EXTERNAL EXTERNAL"><rect height="34" width="107.34375" y="-17" x="-53.671875" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-46.171875, -9.5)" style="" class="label"><foreignObject height="19" width="92.34375"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">clip_encoder</span></div></foreignObject></g></g><g transform="translate(633.3046875, 137)" id="flowchart-rerank-31" class="node default EXTERNAL EXTERNAL EXTERNAL"><rect height="34" width="61.390625" y="-17" x="-30.6953125" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-23.1953125, -9.5)" style="" class="label"><foreignObject height="19" width="46.390625"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">rerank</span></div></foreignObject></g></g><g transform="translate(1146.609375, 137)" id="flowchart-gatewayend-39" class="node default GATEWAY"><rect height="34" width="74.71875" y="-17" x="-37.359375" ry="0" rx="0" style="" class="basic label-container"></rect><g transform="translate(-29.859375, -9.5)" style="" class="label"><foreignObject height="19" width="59.71875"><div style="display: inline-block; white-space: nowrap;" xmlns="http://www.w3.org/1999/xhtml"><span class="nodeLabel">gateway</span></div></foreignObject></g></g></g></g></g><style>@import url("https://cdnjs.cloudflare.com/ajax/libs/font-awesome/5.15.2/css/all.min.css");</style></svg> -------------------------------------------------------------------------------- /.github/server-onstart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/server-onstart.png -------------------------------------------------------------------------------- /.github/server-success.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/server-success.png -------------------------------------------------------------------------------- /.github/server-wait.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jina-ai/dalle-flow/8539f97995f0d6d139edfefcd670c914cd06c366/.github/server-wait.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # OS 2 | .DS_Store 3 | 4 | # Byte-compiled / optimized / DLL files 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | pip-wheel-metadata/ 27 | share/python-wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | MANIFEST 32 | 33 | # PyInstaller 34 | # Usually these files are written by a python script from a template 35 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 36 | *.manifest 37 | *.spec 38 | 39 | # Installer logs 40 | pip-log.txt 41 | pip-delete-this-directory.txt 42 | 43 | # Unit test / coverage reports 44 | htmlcov/ 45 | .tox/ 46 | .nox/ 47 | .coverage 48 | .coverage.* 49 | .cache 50 | nosetests.xml 51 | coverage.xml 52 | *.cover 53 | *.py,cover 54 | .hypothesis/ 55 | .pytest_cache/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | target/ 79 | 80 | # Jupyter Notebook 81 | .ipynb_checkpoints 82 | 83 | # IPython 84 | profile_default/ 85 | ipython_config.py 86 | 87 | # pyenv 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # Jina 135 | .jina/ 136 | 137 | # Temporary flow files 138 | flow.tmp.yml -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 2 | 3 | # given by builder 4 | ARG PIP_TAG 5 | # something like "gcc libc-dev make libatlas-base-dev ruby-dev" 6 | ARG APT_PACKAGES="git wget" 7 | 8 | WORKDIR /dalle 9 | 10 | ADD requirements.txt dalle-flow/ 11 | ADD flow.yml dalle-flow/ 12 | ADD flow_parser.py dalle-flow/ 13 | ADD start.sh dalle-flow/ 14 | 15 | RUN chmod +x dalle-flow/start.sh 16 | 17 | ENV PIP_NO_CACHE_DIR=1 \ 18 | PIP_DISABLE_PIP_VERSION_CHECK=1 19 | ENV DEBIAN_FRONTEND=noninteractive 20 | ENV TZ=Etc/UTC 21 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 22 | 23 | RUN apt-get update \ 24 | && apt-get install software-properties-common -y \ 25 | && add-apt-repository ppa:deadsnakes/ppa \ 26 | && apt-get install python3.10 python3.10-dev -y \ 27 | && apt-get install -y --no-install-recommends sudo python3 python3-pip wget apt-utils libglib2.0-0 libsm6 libxrender1 libxext6 libgl1 \ 28 | && ln -sf python3 /usr/bin/python \ 29 | && ln -sf pip3 /usr/bin/pip \ 30 | && pip install --upgrade pip \ 31 | && pip install --upgrade virtualenv \ 32 | && pip install wheel setuptools 33 | 34 | RUN if [ -n "${APT_PACKAGES}" ]; then apt-get update && apt-get install --no-install-recommends -y ${APT_PACKAGES}; fi && \ 35 | git clone --depth=1 https://github.com/jina-ai/SwinIR.git && \ 36 | git clone --depth=1 https://github.com/CompVis/latent-diffusion.git && \ 37 | git clone --depth=1 https://github.com/jina-ai/glid-3-xl.git && \ 38 | git clone --depth=1 --branch v0.0.15 https://github.com/AmericanPresidentJimmyCarter/stable-diffusion.git && \ 39 | cd dalle-flow && python3 -m virtualenv --python=/usr/bin/python3.10 env && . env/bin/activate && cd - && \ 40 | pip install --upgrade cython && \ 41 | pip install --upgrade pyyaml && \ 42 | git clone --depth=1 https://github.com/timojl/clipseg.git && \ 43 | pip install jax[cuda11_cudnn82]~=0.3.24 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html && \ 44 | pip uninstall -y torch torchvision torchaudio && \ 45 | pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 && \ 46 | pip install PyYAML numpy tqdm pytorch_lightning einops numpy omegaconf && \ 47 | pip install https://github.com/crowsonkb/k-diffusion/archive/master.zip && \ 48 | pip install basicsr facexlib gfpgan && \ 49 | pip install realesrgan && \ 50 | pip install https://github.com/AmericanPresidentJimmyCarter/xformers-builds/raw/master/cu116/xformers-0.0.14.dev0-cp310-cp310-linux_x86_64.whl && \ 51 | cd latent-diffusion && pip install --timeout=1000 -e . && cd - && \ 52 | cd glid-3-xl && pip install --timeout=1000 -e . && cd - && \ 53 | cd dalle-flow && pip install --timeout=1000 --compile -r requirements.txt && cd - && \ 54 | cd stable-diffusion && pip install --timeout=1000 -e . && cd - && \ 55 | cd SwinIR && pip install --timeout=1000 -e . && cd - && \ 56 | cd clipseg && pip install --timeout=1000 -e . && cd - && \ 57 | cd glid-3-xl && \ 58 | # now remove apt packages 59 | if [ -n "${APT_PACKAGES}" ]; then apt-get remove -y --auto-remove ${APT_PACKAGES} && apt-get autoremove && apt-get clean && rm -rf /var/lib/apt/lists/*; fi 60 | 61 | COPY executors dalle-flow/executors 62 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 63 | 64 | ARG USER_ID=1000 65 | ARG GROUP_ID=1000 66 | 67 | ARG USER_NAME=dalle 68 | ARG GROUP_NAME=dalle 69 | 70 | RUN groupadd -g ${GROUP_ID} ${USER_NAME} && \ 71 | useradd -l -u ${USER_ID} -g ${USER_NAME} ${GROUP_NAME} | chpasswd && \ 72 | adduser ${USER_NAME} sudo && \ 73 | echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers && \ 74 | mkdir /home/${USER_NAME} && \ 75 | chown ${USER_NAME}:${GROUP_NAME} /home/${USER_NAME} && \ 76 | chown -R ${USER_NAME}:${GROUP_NAME} /dalle/ 77 | 78 | USER ${USER_NAME} 79 | 80 | WORKDIR /dalle/dalle-flow 81 | 82 | ENTRYPOINT ["./start.sh"] 83 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | <p align="center"> 2 | <!--startmsg--> 3 | <a href="#server"><img src="./.github/deprecation-banner.svg?raw=true"></a> 4 | <!--endmsg--><p align="center"> 5 | 6 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/banner.svg?raw=true" alt="DALL·E Flow: A Human-in-the-loop workflow for creating HD images from text" width="60%"> 7 | <br> 8 | <b>A Human-in-the-loop<sup><a href="https://en.wikipedia.org/wiki/Human-in-the-loop">?</a></sup> workflow for creating HD images from text</b> 9 | </p> 10 | 11 | <p align=center> 12 | <a href="https://discord.jina.ai"><img src="https://img.shields.io/discord/1106542220112302130?logo=discord&logoColor=white&style=flat-square"></a> 13 | <a href="https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb"><img src="https://img.shields.io/badge/Open-in%20Colab-brightgreen?logo=google-colab&style=flat-square" alt="Open in Google Colab"/></a> 14 | <a href="https://hub.docker.com/r/jinaai/dalle-flow"><img alt="Docker Image Size (latest by date)" src="https://img.shields.io/docker/image-size/jinaai/dalle-flow?logo=docker&logoColor=white&style=flat-square"></a> 15 | 16 | </p> 17 | 18 | 19 | 20 | 21 | DALL·E Flow is an interactive workflow for generating high-definition images from text prompt. First, it leverages [DALL·E-Mega](https://github.com/borisdayma/dalle-mini), [GLID-3 XL](https://github.com/Jack000/glid-3-xl), and [Stable Diffusion](https://github.com/CompVis/stable-diffusion) to generate image candidates, and then calls [CLIP-as-service](https://github.com/jina-ai/clip-as-service) to rank the candidates w.r.t. the prompt. The preferred candidate is fed to [GLID-3 XL](https://github.com/Jack000/glid-3-xl) for diffusion, which often enriches the texture and background. Finally, the candidate is upscaled to 1024x1024 via [SwinIR](https://github.com/JingyunLiang/SwinIR). 22 | 23 | DALL·E Flow is built with [Jina](https://github.com/jina-ai/jina) in a client-server architecture, which gives it high scalability, non-blocking streaming, and a modern Pythonic interface. Client can interact with the server via gRPC/Websocket/HTTP with TLS. 24 | 25 | **Why Human-in-the-loop?** Generative art is a creative process. While recent advances of DALL·E unleash people's creativity, having a single-prompt-single-output UX/UI locks the imagination to a _single_ possibility, which is bad no matter how fine this single result is. DALL·E Flow is an alternative to the one-liner, by formalizing the generative art as an iterative procedure. 26 | 27 | ## Usage 28 | 29 | DALL·E Flow is in client-server architecture. 30 | - [Client usage](#Client) 31 | - [Server usage, i.e. deploy your own server](#Server) 32 | 33 | 34 | ## Updates 35 | 36 | - 🌟 **2022/10/27** [RealESRGAN upscalers](https://github.com/xinntao/Real-ESRGAN) have been added. 37 | - ⚠️ **2022/10/26** To use CLIP-as-service available at `grpcs://api.clip.jina.ai:2096` (requires `jina >= v3.11.0`), you need first get an access token from [here](https://console.clip.jina.ai/get_started). See [Use the CLIP-as-service](#use-the-clip-as-service) for more details. 38 | - 🌟 **2022/9/25** Automated [CLIP-based segmentation](https://github.com/timojl/clipseg) from a prompt has been added. 39 | - 🌟 **2022/8/17** Text to image for [Stable Diffusion](https://github.com/CompVis/stable-diffusion) has been added. In order to use it you will need to agree to their ToS, download the weights, then enable the flag in docker or `flow_parser.py`. 40 | - ⚠️ **2022/8/8** Started using CLIP-as-service as an [external executor](https://docs.jina.ai/fundamentals/flow/add-executors/#external-executors). Now you can easily [deploy your own CLIP executor](#run-your-own-clip) if you want. There is [a small breaking change](https://github.com/jina-ai/dalle-flow/pull/74/files#diff-b335630551682c19a781afebcf4d07bf978fb1f8ac04c6bf87428ed5106870f5R103) as a result of this improvement, so [please _reopen_ the notebook in Google Colab](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb). 41 | - ⚠️ **2022/7/6** Demo server migration to AWS EKS for better availability and robustness, **server URL is now changing to `grpcs://dalle-flow.dev.jina.ai`**. All connections are now with TLS encryption, [please _reopen_ the notebook in Google Colab](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb). 42 | - ⚠️ **2022/6/25** Unexpected downtime between 6/25 0:00 - 12:00 CET due to out of GPU quotas. The new server now has 2 GPUs, add healthcheck in client notebook. 43 | - **2022/6/3** Reduce default number of images to 2 per pathway, 4 for diffusion. 44 | - 🐳 **2022/6/21** [A prebuilt image is now available on Docker Hub!](https://hub.docker.com/r/jinaai/dalle-flow) This image can be run out-of-the-box on CUDA 11.6. Fix an upstream bug in CLIP-as-service. 45 | - ⚠️ **2022/5/23** Fix an upstream bug in CLIP-as-service. This bug makes the 2nd diffusion step irrelevant to the given texts. New Dockerfile proved to be reproducible on a AWS EC2 `p2.x8large` instance. 46 | - **2022/5/13b** Removing TLS as Cloudflare gives 100s timeout, making DALLE Flow in usable [Please _reopen_ the notebook in Google Colab!](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb). 47 | - 🔐 **2022/5/13** New Mega checkpoint! All connections are now with TLS, [Please _reopen_ the notebook in Google Colab!](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb). 48 | - 🐳 **2022/5/10** [A Dockerfile is added! Now you can easily deploy your own DALL·E Flow](#run-in-docker). New Mega checkpoint! Smaller memory-footprint, the whole Flow can now fit into **one GPU with 21GB memory**. 49 | - 🌟 **2022/5/7** New Mega checkpoint & multiple optimization on GLID3: less memory-footprint, use `ViT-L/14@336px` from CLIP-as-service, `steps 100->200`. 50 | - 🌟 **2022/5/6** DALL·E Flow just got updated! [Please _reopen_ the notebook in Google Colab!](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb) 51 | - Revised the first step: 16 candidates are generated, 8 from DALL·E Mega, 8 from GLID3-XL; then ranked by CLIP-as-service. 52 | - Improved the flow efficiency: the overall speed, including diffusion and upscaling are much faster now! 53 | 54 | 55 | ## Gallery 56 | 57 | <img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20realistic%20photo%20of%20a%20muddy%20dog.png?raw=true" width="32%" alt="a realistic photo of a muddy dog" title="a realistic photo of a muddy dog"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/A%20scientist%20comparing%20apples%20and%20oranges%2C%20by%20Norman%20Rockwell.png?raw=true" width="32%" alt="A scientist comparing apples and oranges, by Norman Rockwell" title="A scientist comparing apples and oranges, by Norman Rockwell"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20oil%20painting%20portrait%20of%20the%20regal%20Burger%20King%20posing%20with%20a%20Whopper.png?raw=true" width="32%" alt="an oil painting portrait of the regal Burger King posing with a Whopper" title="an oil painting portrait of the regal Burger King posing with a Whopper"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/Eternal%20clock%20powered%20by%20a%20human%20cranium%2C%20artstation.png?raw=true" width="32%" alt="Eternal clock powered by a human cranium, artstation" title="Eternal clock powered by a human cranium, artstation"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/another%20planet%20amazing%20landscape.png?raw=true" width="32%" alt="another planet amazing landscape" title="another planet amazing landscape"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/The%20Decline%20and%20Fall%20of%20the%20Roman%20Empire%20board%20game%20kickstarter.png?raw=true" width="32%" alt="The Decline and Fall of the Roman Empire board game kickstarter" title="The Decline and Fall of the Roman Empire board game kickstarter"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/A%20raccoon%20astronaut%20with%20the%20cosmos%20reflecting%20on%20the%20glass%20of%20his%20helmet%20dreaming%20of%20the%20stars%2C%20digital%20art.png?raw=true" width="32%" alt="A raccoon astronaut with the cosmos reflecting on the glass of his helmet dreaming of the stars, digital art" title="A raccoon astronaut with the cosmos reflecting on the glass of his helmet dreaming of the stars, digital art"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/A%20photograph%20of%20an%20apple%20that%20is%20a%20disco%20ball%2C%2085%20mm%20lens%2C%20studio%20lighting.png?raw=true" width="32%" alt="A photograph of an apple that is a disco ball, 85 mm lens, studio lighting" title="A photograph of an apple that is a disco ball, 85 mm lens, studio lighting"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20cubism%20painting%20Donald%20trump%20happy%20cyberpunk.png?raw=true" width="32%" alt="a cubism painting Donald trump happy cyberpunk" title="a cubism painting Donald trump happy cyberpunk"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/oil%20painting%20of%20a%20hamster%20drinking%20tea%20outside.png?raw=true" width="32%" alt="oil painting of a hamster drinking tea outside" title="oil painting of a hamster drinking tea outside"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/Colossus%20of%20Rhodes%20by%20Max%20Ernst.png?raw=true" width="32%" alt="Colossus of Rhodes by Max Ernst" title="Colossus of Rhodes by Max Ernst"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/landscape%20with%20great%20castle%20in%20middle%20of%20forest.png?raw=true" width="32%" alt="landscape with great castle in middle of forest" title="landscape with great castle in middle of forest"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20medieval%20oil%20painting%20of%20Kanye%20west%20feels%20satisfied%20while%20playing%20chess%20in%20the%20style%20of%20Expressionism.png?raw=true" width="32%" alt="an medieval oil painting of Kanye west feels satisfied while playing chess in the style of Expressionism" title="an medieval oil painting of Kanye west feels satisfied while playing chess in the style of Expressionism"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/An%20oil%20pastel%20painting%20of%20an%20annoyed%20cat%20in%20a%20spaceship.png?raw=true" width="32%" alt="An oil pastel painting of an annoyed cat in a spaceship" title="An oil pastel painting of an annoyed cat in a spaceship"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/dinosaurs%20at%20the%20brink%20of%20a%20nuclear%20disaster.png?raw=true" width="32%" alt="dinosaurs at the brink of a nuclear disaster" title="dinosaurs at the brink of a nuclear disaster"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/fantasy%20landscape%20with%20medieval%20city.png?raw=true" width="32%" alt="fantasy landscape with medieval city" title="fantasy landscape with medieval city"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/GPU%20chip%20in%20the%20form%20of%20an%20avocado%2C%20digital%20art.png?raw=true" width="32%" alt="GPU chip in the form of an avocado, digital art" title="GPU chip in the form of an avocado, digital art"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20giant%20rubber%20duck%20in%20the%20ocean.png?raw=true" width="32%" alt="a giant rubber duck in the ocean" title="a giant rubber duck in the ocean"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/Paddington%20bear%20as%20austrian%20emperor%20in%20antique%20black%20%26%20white%20photography.png?raw=true" width="32%" alt="Paddington bear as austrian emperor in antique black & white photography" title="Paddington bear as austrian emperor in antique black & white photography"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20rainy%20night%20with%20a%20superhero%20perched%20above%20a%20city%2C%20in%20the%20style%20of%20a%20comic%20book.png?raw=true" width="32%" alt="a rainy night with a superhero perched above a city, in the style of a comic book" title="a rainy night with a superhero perched above a city, in the style of a comic book"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/A%20synthwave%20style%20sunset%20above%20the%20reflecting%20water%20of%20the%20sea%2C%20digital%20art.png?raw=true" width="32%" alt="A synthwave style sunset above the reflecting water of the sea, digital art" title="A synthwave style sunset above the reflecting water of the sea, digital art"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20oil%20painting%20of%20ocean%20beach%20front%20in%20the%20style%20of%20Titian.png?raw=true" width="32%" alt="an oil painting of ocean beach front in the style of Titian" title="an oil painting of ocean beach front in the style of Titian"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20oil%20painting%20of%20Klingon%20general%20in%20the%20style%20of%20Rubens.png?raw=true" width="32%" alt="an oil painting of Klingon general in the style of Rubens" title="an oil painting of Klingon general in the style of Rubens"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/city%2C%20top%20view%2C%20cyberpunk%2C%20digital%20realistic%20art.png?raw=true" width="32%" alt="city, top view, cyberpunk, digital realistic art" title="city, top view, cyberpunk, digital realistic art"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20oil%20painting%20of%20a%20medieval%20cyborg%20automaton%20made%20of%20magic%20parts%20and%20old%20steampunk%20mechanics.png?raw=true" width="32%" alt="an oil painting of a medieval cyborg automaton made of magic parts and old steampunk mechanics" title="an oil painting of a medieval cyborg automaton made of magic parts and old steampunk mechanics"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20watercolour%20painting%20of%20a%20top%20view%20of%20a%20pirate%20ship%20sailing%20on%20the%20clouds.png?raw=true" width="32%" alt="a watercolour painting of a top view of a pirate ship sailing on the clouds" title="a watercolour painting of a top view of a pirate ship sailing on the clouds"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20knight%20made%20of%20beautiful%20flowers%20and%20fruits%20by%20Rachel%20ruysch%20in%20the%20style%20of%20Syd%20brak.png?raw=true" width="32%" alt="a knight made of beautiful flowers and fruits by Rachel ruysch in the style of Syd brak" title="a knight made of beautiful flowers and fruits by Rachel ruysch in the style of Syd brak"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%203D%20render%20of%20a%20rainbow%20colored%20hot%20air%20balloon%20flying%20above%20a%20reflective%20lake.png?raw=true" width="32%" alt="a 3D render of a rainbow colored hot air balloon flying above a reflective lake" title="a 3D render of a rainbow colored hot air balloon flying above a reflective lake"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20teddy%20bear%20on%20a%20skateboard%20in%20Times%20Square%20.png?raw=true" width="32%" alt="a teddy bear on a skateboard in Times Square " title="a teddy bear on a skateboard in Times Square "><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/cozy%20bedroom%20at%20night.png?raw=true" width="32%" alt="cozy bedroom at night" title="cozy bedroom at night"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20oil%20painting%20of%20monkey%20using%20computer.png?raw=true" width="32%" alt="an oil painting of monkey using computer" title="an oil painting of monkey using computer"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/the%20diagram%20of%20a%20search%20machine%20invented%20by%20Leonardo%20da%20Vinci.png?raw=true" width="32%" alt="the diagram of a search machine invented by Leonardo da Vinci" title="the diagram of a search machine invented by Leonardo da Vinci"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/A%20stained%20glass%20window%20of%20toucans%20in%20outer%20space.png?raw=true" width="32%" alt="A stained glass window of toucans in outer space" title="A stained glass window of toucans in outer space"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20campfire%20in%20the%20woods%20at%20night%20with%20the%20milky-way%20galaxy%20in%20the%20sky.png?raw=true" width="32%" alt="a campfire in the woods at night with the milky-way galaxy in the sky" title="a campfire in the woods at night with the milky-way galaxy in the sky"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/Bionic%20killer%20robot%20made%20of%20AI%20scarab%20beetles.png?raw=true" width="32%" alt="Bionic killer robot made of AI scarab beetles" title="Bionic killer robot made of AI scarab beetles"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/The%20Hanging%20Gardens%20of%20Babylon%20in%20the%20middle%20of%20a%20city%2C%20in%20the%20style%20of%20Dal%C3%AD.png?raw=true" width="32%" alt="The Hanging Gardens of Babylon in the middle of a city, in the style of Dalí" title="The Hanging Gardens of Babylon in the middle of a city, in the style of Dalí"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/painting%20oil%20of%20Izhevsk.png?raw=true" width="32%" alt="painting oil of Izhevsk" title="painting oil of Izhevsk"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20hyper%20realistic%20photo%20of%20a%20marshmallow%20office%20chair.png?raw=true" width="32%" alt="a hyper realistic photo of a marshmallow office chair" title="a hyper realistic photo of a marshmallow office chair"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/fantasy%20landscape%20with%20city.png?raw=true" width="32%" alt="fantasy landscape with city" title="fantasy landscape with city"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/ocean%20beach%20front%20view%20in%20Van%20Gogh%20style.png?raw=true" width="32%" alt="ocean beach front view in Van Gogh style" title="ocean beach front view in Van Gogh style"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/An%20oil%20painting%20of%20a%20family%20reunited%20inside%20of%20an%20airport%2C%20digital%20art.png?raw=true" width="32%" alt="An oil painting of a family reunited inside of an airport, digital art" title="An oil painting of a family reunited inside of an airport, digital art"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/antique%20photo%20of%20a%20knight%20riding%20a%20T-Rex.png?raw=true" width="32%" alt="antique photo of a knight riding a T-Rex" title="antique photo of a knight riding a T-Rex"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20top%20view%20of%20a%20pirate%20ship%20sailing%20on%20the%20clouds.png?raw=true" width="32%" alt="a top view of a pirate ship sailing on the clouds" title="a top view of a pirate ship sailing on the clouds"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/an%20oil%20painting%20of%20a%20humanoid%20robot%20playing%20chess%20in%20the%20style%20of%20Matisse.png?raw=true" width="32%" alt="an oil painting of a humanoid robot playing chess in the style of Matisse" title="an oil painting of a humanoid robot playing chess in the style of Matisse"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20cubism%20painting%20of%20a%20cat%20dressed%20as%20French%20emperor%20Napoleon.png?raw=true" width="32%" alt="a cubism painting of a cat dressed as French emperor Napoleon" title="a cubism painting of a cat dressed as French emperor Napoleon"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/a%20husky%20dog%20wearing%20a%20hat%20with%20sunglasses.png?raw=true" width="32%" alt="a husky dog wearing a hat with sunglasses" title="a husky dog wearing a hat with sunglasses"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/A%20mystical%20castle%20appears%20between%20the%20clouds%20in%20the%20style%20of%20Vincent%20di%20Fate.png?raw=true" width="32%" alt="A mystical castle appears between the clouds in the style of Vincent di Fate" title="A mystical castle appears between the clouds in the style of Vincent di Fate"><img src="https://github.com/hanxiao/dalle/blob/gallery/.github/gallery/golden%20gucci%20airpods%20realistic%20photo.png?raw=true" width="32%" alt="golden gucci airpods realistic photo" title="golden gucci airpods realistic photo"> 58 | 59 | ## Client 60 | 61 | <a href="https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb"><img src="https://img.shields.io/badge/Open-in%20Colab-orange?logo=google-colab&style=flat-square" alt="Open in Google Colab"/></a> 62 | 63 | Using client is super easy. The following steps are best run in [Jupyter notebook](./client.ipynb) or [Google Colab](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb). 64 | 65 | You will need to install [DocArray](https://github.com/jina-ai/docarray) and [Jina](https://github.com/jina-ai/jina) first: 66 | 67 | ```bash 68 | pip install "docarray[common]>=0.13.5" jina 69 | ``` 70 | 71 | We have provided a demo server for you to play: 72 | > ⚠️ **Due to the massive requests, our server may be delay in response. Yet we are _very_ confident on keeping the uptime high.** You can also deploy your own server by [following the instruction here](#server). 73 | 74 | ```python 75 | server_url = 'grpcs://dalle-flow.dev.jina.ai' 76 | ``` 77 | 78 | 79 | ### Step 1: Generate via DALL·E Mega 80 | 81 | Now let's define the prompt: 82 | 83 | ```python 84 | prompt = 'an oil painting of a humanoid robot playing chess in the style of Matisse' 85 | ``` 86 | 87 | Let's submit it to the server and visualize the results: 88 | 89 | ```python 90 | from docarray import Document 91 | 92 | doc = Document(text=prompt).post(server_url, parameters={'num_images': 8}) 93 | da = doc.matches 94 | 95 | da.plot_image_sprites(fig_size=(10,10), show_index=True) 96 | ``` 97 | 98 | Here we generate 24 candidates, 8 from DALLE-mega, 8 from GLID3 XL, and 8 from Stable Diffusion, this is as defined in `num_images`, which takes about ~2 minutes. You can use a smaller value if it is too long for you. 99 | 100 | 101 | <p align="center"> 102 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/client-dalle.png?raw=true" width="70%"> 103 | </p> 104 | 105 | ### Step 2: Select and refinement via GLID3 XL 106 | 107 | The 24 candidates are sorted by [CLIP-as-service](https://github.com/jina-ai/clip-as-service), with index-`0` as the best candidate judged by CLIP. Of course, you may think differently. Notice the number in the top-left corner? Select the one you like the most and get a better view: 108 | 109 | ```python 110 | fav_id = 3 111 | fav = da[fav_id] 112 | fav.embedding = doc.embedding 113 | fav.display() 114 | ``` 115 | 116 | <p align="center"> 117 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/client-select1.png?raw=true" width="30%"> 118 | </p> 119 | 120 | Now let's submit the selected candidates to the server for diffusion. 121 | 122 | ```python 123 | diffused = fav.post(f'{server_url}', parameters={'skip_rate': 0.5, 'num_images': 36}, target_executor='diffusion').matches 124 | 125 | diffused.plot_image_sprites(fig_size=(10,10), show_index=True) 126 | ``` 127 | 128 | This will give 36 images based on the selected image. You may allow the model to improvise more by giving `skip_rate` a near-zero value, or a near-one value to force its closeness to the given image. The whole procedure takes about ~2 minutes. 129 | 130 | <p align="center"> 131 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/client-glid.png?raw=true" width="60%"> 132 | </p> 133 | 134 | ### Step 3: Select and upscale via SwinIR 135 | 136 | Select the image you like the most, and give it a closer look: 137 | 138 | ```python 139 | dfav_id = 34 140 | fav = diffused[dfav_id] 141 | fav.display() 142 | ``` 143 | 144 | <p align="center"> 145 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/client-select2.png?raw=true" width="30%"> 146 | </p> 147 | 148 | 149 | Finally, submit to the server for the last step: upscaling to 1024 x 1024px. 150 | 151 | ```python 152 | fav = fav.post(f'{server_url}/upscale') 153 | fav.display() 154 | ``` 155 | 156 | That's it! It is _the one_. If not satisfied, please repeat the procedure. 157 | 158 | <p align="center"> 159 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/client-select3.png?raw=true" width="50%"> 160 | </p> 161 | 162 | Btw, DocArray is a powerful and easy-to-use data structure for unstructured data. It is super productive for data scientists who work in cross-/multi-modal domain. To learn more about DocArray, [please check out the docs](https://docs.jina.ai). 163 | 164 | ## Server 165 | 166 | You can host your own server by following the instruction below. 167 | 168 | ### Hardware requirements 169 | 170 | DALL·E Flow needs one GPU with 21GB VRAM at its peak. All services are squeezed into this one GPU, this includes (roughly) 171 | - DALLE ~9GB 172 | - GLID Diffusion ~6GB 173 | - Stable Diffusion ~8GB (batch_size=4 in `config.yml`, 512x512) 174 | - SwinIR ~3GB 175 | - CLIP ViT-L/14-336px ~3GB 176 | 177 | The following reasonable tricks can be used for further reducing VRAM: 178 | - SwinIR can be moved to CPU (-3GB) 179 | - CLIP can be delegated to [CLIP-as-service free server](https://console.clip.jina.ai/get_started) (-3GB) 180 | 181 | 182 | It requires at least 50GB free space on the hard drive, mostly for downloading pretrained models. 183 | 184 | High-speed internet is required. Slow/unstable internet may throw frustrating timeout when downloading models. 185 | 186 | CPU-only environment is not tested and likely won't work. Google Colab is likely throwing OOM hence also won't work. 187 | 188 | 189 | ### Server architecture 190 | 191 | <p align="center"> 192 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/flow.svg?raw=true" width="70%"> 193 | </p> 194 | 195 | If you have installed Jina, the above flowchart can be generated via: 196 | 197 | ```bash 198 | # pip install jina 199 | jina export flowchart flow.yml flow.svg 200 | ``` 201 | 202 | 203 | ### Stable Diffusion weights 204 | 205 | If you want to use Stable Diffusion, you will first need to register an account on the website [Huggingface](https://huggingface.co/) and agree to the terms and conditions for the model. After logging in, you can find the version of the model required by going here: 206 | 207 | [CompVis / sd-v1-5-inpainting.ckpt](https://huggingface.co/runwayml/stable-diffusion-inpainting/blob/main/sd-v1-5-inpainting.ckpt) 208 | 209 | Under the **Download the Weights** section, click the link for `sd-v1-x.ckpt`. The latest weights at the time of writing are `sd-v1-5.ckpt`. 210 | 211 | **DOCKER USERS**: Put this file into a folder named `ldm/stable-diffusion-v1` and rename it `model.ckpt`. Follow the instructions below carefully because SD is not enabled by default. 212 | 213 | **NATIVE USERS**: Put this file into `dalle/stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt` after finishing the rest of the steps under "Run natively". Follow the instructions below carefully because SD is not enabled by default. 214 | 215 | 216 | ### Run in Docker 217 | 218 | #### Prebuilt image 219 | 220 | We have provided [a prebuilt Docker image](https://hub.docker.com/r/jinaai/dalle-flow) that can be pull directly. 221 | 222 | ```bash 223 | docker pull jinaai/dalle-flow:latest 224 | ``` 225 | 226 | #### Build it yourself 227 | 228 | We have provided [a Dockerfile](https://github.com/jina-ai/dalle-flow/blob/main/Dockerfile) which allows you to run a server out of the box. 229 | 230 | Our Dockerfile is using CUDA 11.6 as the base image, you may want to adjust it according to your system. 231 | 232 | ```bash 233 | git clone https://github.com/jina-ai/dalle-flow.git 234 | cd dalle-flow 235 | 236 | docker build --build-arg GROUP_ID=$(id -g ${USER}) --build-arg USER_ID=$(id -u ${USER}) -t jinaai/dalle-flow . 237 | ``` 238 | 239 | The building will take 10 minutes with average internet speed, which results in a 18GB Docker image. 240 | 241 | #### Run container 242 | 243 | To run it, simply do: 244 | 245 | ```bash 246 | docker run -p 51005:51005 \ 247 | -it \ 248 | -v $HOME/.cache:/home/dalle/.cache \ 249 | --gpus all \ 250 | jinaai/dalle-flow 251 | ``` 252 | 253 | Alternatively, you may also run with some workflows enabled or disabled to prevent out-of-memory crashes. To do that, pass one of these environment variables: 254 | ``` 255 | DISABLE_DALLE_MEGA 256 | DISABLE_GLID3XL 257 | DISABLE_SWINIR 258 | ENABLE_STABLE_DIFFUSION 259 | ENABLE_CLIPSEG 260 | ENABLE_REALESRGAN 261 | ``` 262 | 263 | For example, if you would like to disable GLID3XL workflows, run: 264 | 265 | ```bash 266 | docker run -e DISABLE_GLID3XL='1' \ 267 | -p 51005:51005 \ 268 | -it \ 269 | -v $HOME/.cache:/home/dalle/.cache \ 270 | --gpus all \ 271 | jinaai/dalle-flow 272 | ``` 273 | 274 | - The first run will take ~10 minutes with average internet speed. 275 | - `-v $HOME/.cache:/root/.cache` avoids repeated model downloading on every docker run. 276 | - The first part of `-p 51005:51005` is your host public port. Make sure people can access this port if you are serving publicly. The second par of it is [the port defined in flow.yml](https://github.com/jina-ai/dalle-flow/blob/e7e313522608668daeec1b7cd84afe56e5b19f1e/flow.yml#L4). 277 | - If you want to use Stable Diffusion, it must be enabled manually with the `ENABLE_STABLE_DIFFUSION`. 278 | - If you want to use clipseg, it must be enabled manually with the `ENABLE_CLIPSEG`. 279 | - If you want to use RealESRGAN, it must be enabled manually with the `ENABLE_REALESRGAN`. 280 | 281 | #### Special instructions for Stable Diffusion and Docker 282 | 283 | **Stable Diffusion may only be enabled if you have downloaded the weights and make them available as a virtual volume while enabling the environmental flag (`ENABLE_STABLE_DIFFUSION`) for SD**. 284 | 285 | You should have previously put the weights into a folder named `ldm/stable-diffusion-v1` and labeled them `model.ckpt`. Replace `YOUR_MODEL_PATH/ldm` below with the path on your own system to pipe the weights into the docker image. 286 | 287 | ```bash 288 | docker run -e ENABLE_STABLE_DIFFUSION="1" \ 289 | -e DISABLE_DALLE_MEGA="1" \ 290 | -e DISABLE_GLID3XL="1" \ 291 | -p 51005:51005 \ 292 | -it \ 293 | -v YOUR_MODEL_PATH/ldm:/dalle/stable-diffusion/models/ldm/ \ 294 | -v $HOME/.cache:/home/dalle/.cache \ 295 | --gpus all \ 296 | jinaai/dalle-flow 297 | ``` 298 | 299 | You should see the screen like following once running: 300 | 301 | <p align="center"> 302 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/docker-run.png?raw=true" width="50%"> 303 | </p> 304 | 305 | Note that unlike running natively, running inside Docker may give less vivid progressbar, color logs, and prints. This is due to the limitations of the terminal in a Docker container. It does not affect the actual usage. 306 | 307 | ### Run natively 308 | 309 | Running natively requires some manual steps, but it is often easier to debug. 310 | 311 | #### Clone repos 312 | 313 | ```bash 314 | mkdir dalle && cd dalle 315 | git clone https://github.com/jina-ai/dalle-flow.git 316 | git clone https://github.com/jina-ai/SwinIR.git 317 | git clone --branch v0.0.15 https://github.com/AmericanPresidentJimmyCarter/stable-diffusion.git 318 | git clone https://github.com/CompVis/latent-diffusion.git 319 | git clone https://github.com/jina-ai/glid-3-xl.git 320 | git clone https://github.com/timojl/clipseg.git 321 | ``` 322 | 323 | You should have the following folder structure: 324 | 325 | ```text 326 | dalle/ 327 | | 328 | |-- Real-ESRGAN/ 329 | |-- SwinIR/ 330 | |-- clipseg/ 331 | |-- dalle-flow/ 332 | |-- glid-3-xl/ 333 | |-- latent-diffusion/ 334 | |-- stable-diffusion/ 335 | ``` 336 | 337 | #### Install auxiliary repos 338 | 339 | ```bash 340 | cd dalle-flow 341 | python3 -m virtualenv env 342 | source env/bin/activate && cd - 343 | pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 344 | pip install numpy tqdm pytorch_lightning einops numpy omegaconf 345 | pip install https://github.com/crowsonkb/k-diffusion/archive/master.zip 346 | pip install git+https://github.com/AmericanPresidentJimmyCarter/stable-diffusion.git@v0.0.15 347 | pip install basicsr facexlib gfpgan 348 | pip install realesrgan 349 | pip install https://github.com/AmericanPresidentJimmyCarter/xformers-builds/raw/master/cu116/xformers-0.0.14.dev0-cp310-cp310-linux_x86_64.whl && \ 350 | cd latent-diffusion && pip install -e . && cd - 351 | cd stable-diffusion && pip install -e . && cd - 352 | cd SwinIR && pip install -e . && cd - 353 | cd glid-3-xl && pip install -e . && cd - 354 | cd clipseg && pip install -e . && cd - 355 | ``` 356 | 357 | There are couple models we need to download for GLID-3-XL if you are using that: 358 | 359 | ```bash 360 | cd glid-3-xl 361 | wget https://dall-3.com/models/glid-3-xl/bert.pt 362 | wget https://dall-3.com/models/glid-3-xl/kl-f8.pt 363 | wget https://dall-3.com/models/glid-3-xl/finetune.pt 364 | cd - 365 | ``` 366 | 367 | Both `clipseg` and `RealESRGAN` require you to set a correct cache folder path, 368 | typically something like $HOME/. 369 | 370 | #### Install flow 371 | 372 | ```bash 373 | cd dalle-flow 374 | pip install -r requirements.txt 375 | pip install jax~=0.3.24 376 | ``` 377 | 378 | ### Start the server 379 | 380 | Now you are under `dalle-flow/`, run the following command: 381 | 382 | ```bash 383 | # Optionally disable some generative models with the following flags when 384 | # using flow_parser.py: 385 | # --disable-dalle-mega 386 | # --disable-glid3xl 387 | # --disable-swinir 388 | # --enable-stable-diffusion 389 | python flow_parser.py 390 | jina flow --uses flow.tmp.yml 391 | ``` 392 | 393 | You should see this screen immediately: 394 | 395 | <p align="center"> 396 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/server-onstart.png?raw=true" width="50%"> 397 | </p> 398 | 399 | On the first start it will take ~8 minutes for downloading the DALL·E mega model and other necessary models. The proceeding runs should only take ~1 minute to reach the success message. 400 | 401 | <p align="center"> 402 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/server-wait.png?raw=true" width="50%"> 403 | </p> 404 | 405 | 406 | When everything is ready, you will see: 407 | 408 | <p align="center"> 409 | <img src="https://github.com/jina-ai/dalle-flow/blob/main/.github/server-success.png?raw=true" width="50%"> 410 | </p> 411 | 412 | 413 | Congrats! Now you should be able to [run the client](#client). 414 | 415 | You can modify and extend the server flow as you like, e.g. changing the model, adding persistence, or even auto-posting to Instagram/OpenSea. With Jina and DocArray, you can easily make DALL·E Flow [cloud-native and ready for production](https://github.com/jina-ai/jina). 416 | 417 | 418 | ### Use the CLIP-as-service 419 | 420 | To reduce the usage of vRAM, you can use the `CLIP-as-service` as an external executor freely available at `grpcs://api.clip.jina.ai:2096`. 421 | First, make sure you have created an access token from [console website](https://console.clip.jina.ai/get_started), or CLI as following 422 | 423 | ```bash 424 | jina auth token create <name of PAT> -e <expiration days> 425 | ``` 426 | 427 | Then, you need to change the executor related configs (`host`, `port`, `external`, `tls` and `grpc_metadata`) from [`flow.yml`](./flow.yml). 428 | 429 | ```yaml 430 | ... 431 | - name: clip_encoder 432 | uses: jinahub+docker://CLIPTorchEncoder/latest-gpu 433 | host: 'api.clip.jina.ai' 434 | port: 2096 435 | tls: true 436 | external: true 437 | grpc_metadata: 438 | authorization: "<your access token>" 439 | needs: [gateway] 440 | ... 441 | - name: rerank 442 | uses: jinahub+docker://CLIPTorchEncoder/latest-gpu 443 | host: 'api.clip.jina.ai' 444 | port: 2096 445 | uses_requests: 446 | '/': rank 447 | tls: true 448 | external: true 449 | grpc_metadata: 450 | authorization: "<your access token>" 451 | needs: [dalle, diffusion] 452 | ``` 453 | 454 | You can also use the `flow_parser.py` to automatically generate and run the flow with using the `CLIP-as-service` as external executor: 455 | 456 | ```bash 457 | python flow_parser.py --cas-token "<your access token>' 458 | jina flow --uses flow.tmp.yml 459 | ``` 460 | 461 | > ⚠️ `grpc_metadata` is only available after Jina `v3.11.0`. If you are using an older version, please upgrade to the latest version. 462 | 463 | Now, you can use the free `CLIP-as-service` in your flow. 464 | 465 | <!-- start support-pitch --> 466 | ## Support 467 | 468 | - To extend DALL·E Flow you will need to get familiar with [Jina](https://github.com/jina-ai/jina) and [DocArray](https://github.com/jina-ai/docarray). 469 | - Join our [Discord community](https://discord.jina.ai) and chat with other community members about ideas. 470 | - Join our [Engineering All Hands](https://youtube.com/playlist?list=PL3UBBWOUVhFYRUa_gpYYKBqEAkO4sxmne) meet-up to discuss your use case and learn Jina's new features. 471 | - **When?** The second Tuesday of every month 472 | - **Where?** 473 | Zoom ([see our public events calendar](https://calendar.google.com/calendar/embed?src=c_1t5ogfp2d45v8fit981j08mcm4%40group.calendar.google.com&ctz=Europe%2FBerlin)/[.ical](https://calendar.google.com/calendar/ical/c_1t5ogfp2d45v8fit981j08mcm4%40group.calendar.google.com/public/basic.ics)) 474 | and [live stream on YouTube](https://youtube.com/c/jina-ai) 475 | - Subscribe to the latest video tutorials on our [YouTube channel](https://youtube.com/c/jina-ai) 476 | 477 | ## Join Us 478 | 479 | DALL·E Flow is backed by [Jina AI](https://jina.ai) and licensed under [Apache-2.0](./LICENSE). [We are actively hiring](https://jobs.jina.ai) AI engineers, solution engineers to build the next neural search ecosystem in open-source. 480 | 481 | <!-- end support-pitch --> 482 | -------------------------------------------------------------------------------- /client.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "318c88c4-3b67-4a8a-9200-7e88ff9f08b2", 6 | "metadata": {}, 7 | "source": [ 8 | "# Warning\n", 9 | "\n", 10 | "[⚠️ For cost-efficiency, we no longer host a DALL-E Flow demo server. Click here to see how you can deploy it for yourself!](https://github.com/jina-ai/dalle-flow#server)\n" 11 | ] 12 | }, 13 | { 14 | "cell_type": "markdown", 15 | "id": "e13ed1c4", 16 | "metadata": { 17 | "pycharm": { 18 | "name": "#%% md\n" 19 | } 20 | }, 21 | "source": [ 22 | "<p align=\"center\">\n", 23 | "<a href=\"https://github.com/jina-ai/dalle\"><img src=\"https://res.cloudinary.com/startup-grind/image/upload/c_fill,dpr_2.0,f_auto,g_xy_center,h_650,q_auto:good,w_1440,x_w_mul_0.5,y_h_mul_0.0/v1/gcs/platform-data-dsc/event_banners/banner_8XSoAdr.png?md\" alt=\"DALL·E Flow: A Human-in-the-Loop workflow for creating HD images from text\" width=\"100%\"></a>\n", 24 | "<br>\n", 25 | "</p>\n", 26 | "\n", 27 | "\n", 28 | "<b>A Human-in-the-loop<sup><a href=\"https://en.wikipedia.org/wiki/Human-in-the-loop\">?</a></sup> workflow for creating HD images from text</b>" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "id": "0836667a", 34 | "metadata": { 35 | "pycharm": { 36 | "name": "#%% md\n" 37 | } 38 | }, 39 | "source": [ 40 | "[](https://github.com/jina-ai/dalle-flow) [](https://slack.jina.ai) [](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb)" 41 | ] 42 | }, 43 | { 44 | "cell_type": "markdown", 45 | "id": "f11f924b", 46 | "metadata": { 47 | "pycharm": { 48 | "name": "#%% md\n" 49 | } 50 | }, 51 | "source": [ 52 | "\n", 53 | "\n", 54 | "🚧 If you find your request fails, it is possible the server is occasionally down for maintaince. Please give it a try in 5 minutes.\n", 55 | "\n", 56 | "- ⚠️ **2022/8/8** Started using CLIP-as-service as an [external executor](https://docs.jina.ai/fundamentals/flow/add-executors/#external-executors). Now you can easily [deploy your own CLIP executor](#run-your-own-clip) if you want. There is [a small breaking change](https://github.com/jina-ai/dalle-flow/pull/74/files#diff-b335630551682c19a781afebcf4d07bf978fb1f8ac04c6bf87428ed5106870f5R103) as a result of this improvement, so [please _reopen_ the notebook in Google Colab](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb).\n", 57 | "- ⚠️ **2022/7/6** Due to server migration to AWS EKS, server url changed to `grpcs://dalle-flow.dev.jina.ai`. All connections are now with TLS again, [Please _reopen_ the notebook in Google Colab!](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb)\n", 58 | "- ⚠️ **2022/6/25** Unexpected downtime between 6/25 0:00 - 12:00 CET due to out of GPU quotas. The new server now has 2 GPUs, add healthcheck.\n", 59 | "- 2022/6/3 Reduce default number of images to 2 per pathway, 4 for diffusion.\n", 60 | "- ⚠️ 2022/5/23 Fix an upstream bug in CLIP-as-service. This bug makes the 2nd diffusion step irrelevant to the given texts. New Dockerfile proved to be reproducible on a AWS EC2 `p2.x8large` instance.\n", 61 | "- 2022/5/13b Removing TLS as Cloudflare gives 100s timeout, making DALLE Flow in usable [Please _reopen_ the notebook in Google Colab!](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb).\n", 62 | "- 🔐 2022/5/13 New Mega checkpoint! All connections are now with TLS, [Please _reopen_ the notebook in Google Colab!](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb).\n", 63 | "- 🌟 2022/5/10 [A Dockerfile is added! Now you can easily deploy your own DALL·E Flow](https://github.com/jina-ai/dalle-flow). New Mega checkpoint! Smaller memory-footprint, the whole Flow can now fit into **one GPU with 21GB memory**.\n", 64 | "- 🌟 2022/5/9 Less memory-footprint overall, the whole Flow can now fit into one GPU with 18GB memory!\n", 65 | "- 🌟 2022/5/7 DALL·E Flow just got updated!\n", 66 | " - New DALL·E Mega checkpoint\n", 67 | " - Improved GLID3 memory-efficiency and parameters\n", 68 | "- 🌟 2022/5/6 DALL·E Flow just got updated!\n", 69 | " - The first step will generate 16 candidates: **8 from DALL·E Mega, 8 from GLID3-XL**; ranked by CLIP-as-service.\n", 70 | " - Optimized the flow efficiency, diffusion and upscaling is much faster now!\n", 71 | "- ~~⚠️ 2022/5/3 **The number of images is restrict to 9 for DALL·E Mega, and 16 for GLID3-XL**~~\n", 72 | "- ⚠️ 2022/5/2 **Due to the massive requests now, the server is super busy.** You can deploy your own server by following [the instruction here](https://github.com/jina-ai/dalle-flow#server).\n" 73 | ] 74 | }, 75 | { 76 | "cell_type": "markdown", 77 | "id": "e6af8cfd", 78 | "metadata": { 79 | "pycharm": { 80 | "name": "#%% md\n" 81 | } 82 | }, 83 | "source": [ 84 | "Using client is super easy. The following steps are best run in Jupyter notebook or [Google Colab](https://colab.research.google.com/github/jina-ai/dalle-flow/blob/main/client.ipynb). \n", 85 | "\n", 86 | "The only dependency you will need are [DocArray](https://github.com/jina-ai/docarray) and [Jina](https://github.com/jina-ai/jina), as DocArray is already included in Jina you only need to install `jina`.\n", 87 | "\n", 88 | "> On Google Colab, you will be asked to restart the kernel. Go ahead and restart." 89 | ] 90 | }, 91 | { 92 | "cell_type": "code", 93 | "execution_count": null, 94 | "id": "c95e266f", 95 | "metadata": { 96 | "pycharm": { 97 | "name": "#%%\n" 98 | } 99 | }, 100 | "outputs": [], 101 | "source": [ 102 | "!pip install jina" 103 | ] 104 | }, 105 | { 106 | "cell_type": "markdown", 107 | "id": "3059e59c", 108 | "metadata": { 109 | "pycharm": { 110 | "name": "#%%\n" 111 | } 112 | }, 113 | "source": [ 114 | "We have provided a demo server for you to play:" 115 | ] 116 | }, 117 | { 118 | "cell_type": "code", 119 | "execution_count": null, 120 | "id": "3f1bb189", 121 | "metadata": { 122 | "pycharm": { 123 | "name": "#%%\n" 124 | } 125 | }, 126 | "outputs": [], 127 | "source": [ 128 | "server_url = 'grpcs://dalle-flow.dev.jina.ai'" 129 | ] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "id": "72aacbed", 134 | "metadata": { 135 | "pycharm": { 136 | "name": "#%% md\n" 137 | } 138 | }, 139 | "source": [ 140 | "### Step 1: Generate via DALL·E Mega\n", 141 | "\n", 142 | "Now let's define the prompt:" 143 | ] 144 | }, 145 | { 146 | "cell_type": "code", 147 | "execution_count": null, 148 | "id": "ac1ec094", 149 | "metadata": { 150 | "pycharm": { 151 | "name": "#%%\n" 152 | } 153 | }, 154 | "outputs": [], 155 | "source": [ 156 | "prompt = 'an oil painting of a humanoid robot playing chess in the style of Matisse'" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "id": "de6d7244", 162 | "metadata": {}, 163 | "source": [ 164 | "Do you need some hints and suggestions on the prompt? Check out those tricks:\n", 165 | " - [A Guide to Writing Prompts for Text-to-image AI](https://docs.google.com/document/d/17VPu3U2qXthOpt2zWczFvf-AH6z37hxUbvEe1rJTsEc/edit?usp=sharing)\n", 166 | " - [CLIP Templates](https://docs.google.com/document/d/1j2IAumYz4iZopOTAAOcCUKbFXP0jHK8mRgD4NLFKkaw/edit?usp=sharing)" 167 | ] 168 | }, 169 | { 170 | "cell_type": "markdown", 171 | "id": "69d0b10b", 172 | "metadata": { 173 | "pycharm": { 174 | "name": "#%% md\n" 175 | } 176 | }, 177 | "source": [ 178 | "Let's submit it to the server and visualize the results:" 179 | ] 180 | }, 181 | { 182 | "cell_type": "code", 183 | "execution_count": null, 184 | "id": "da672930", 185 | "metadata": { 186 | "pycharm": { 187 | "name": "#%%\n" 188 | } 189 | }, 190 | "outputs": [], 191 | "source": [ 192 | "%%time\n", 193 | "\n", 194 | "from docarray import Document\n", 195 | "\n", 196 | "doc = Document(text=prompt).post(server_url, parameters={'num_images': 2})\n", 197 | "da = doc.matches\n", 198 | "\n", 199 | "da.plot_image_sprites(fig_size=(10,10), show_index=True)" 200 | ] 201 | }, 202 | { 203 | "cell_type": "markdown", 204 | "id": "9298f59f", 205 | "metadata": { 206 | "pycharm": { 207 | "name": "#%% md\n" 208 | } 209 | }, 210 | "source": [ 211 | "Here we generate 4 candidates, 2 from DALLE-mega and 2 from GLID3 XL, this is as defined in `num_images`, which takes about ~2 minutes. You can use a smaller value if it is too long for you. The results are sorted by [CLIP-as-service](https://github.com/jina-ai/clip-as-service), with index-`0` as the best candidate judged by CLIP. \n", 212 | "\n", 213 | "> You could generate up to 8 images per pathway via `num_images`, resulting 16 candidate images in total. But it will be much slower." 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "id": "3bf3a62b", 219 | "metadata": { 220 | "pycharm": { 221 | "name": "#%% md\n" 222 | } 223 | }, 224 | "source": [ 225 | "### Step 2: Select and refinement via GLID3 XL\n", 226 | "\n", 227 | "Of course, you may think differently. Notice the number in the top-left corner? Select the one you like the most and get a better view:" 228 | ] 229 | }, 230 | { 231 | "cell_type": "code", 232 | "execution_count": null, 233 | "id": "8e106718", 234 | "metadata": { 235 | "pycharm": { 236 | "name": "#%%\n" 237 | } 238 | }, 239 | "outputs": [], 240 | "source": [ 241 | "fav_id = 3\n", 242 | "\n", 243 | "fav = da[fav_id]\n", 244 | "fav.embedding = doc.embedding\n", 245 | "\n", 246 | "fav.display()" 247 | ] 248 | }, 249 | { 250 | "cell_type": "markdown", 251 | "id": "eec3e4ea", 252 | "metadata": { 253 | "pycharm": { 254 | "name": "#%% md\n" 255 | } 256 | }, 257 | "source": [ 258 | "Now let's submit the selected candidates to the server for diffusion." 259 | ] 260 | }, 261 | { 262 | "cell_type": "code", 263 | "execution_count": null, 264 | "id": "13cf87ce", 265 | "metadata": { 266 | "pycharm": { 267 | "name": "#%%\n" 268 | } 269 | }, 270 | "outputs": [], 271 | "source": [ 272 | "%%time\n", 273 | "\n", 274 | "diffused = fav.post(f'{server_url}', parameters={'skip_rate': 0.6, 'num_images': 4}, target_executor='diffusion').matches\n", 275 | "\n", 276 | "diffused.plot_image_sprites(fig_size=(10,10), show_index=True)" 277 | ] 278 | }, 279 | { 280 | "cell_type": "markdown", 281 | "id": "fbb54ed9", 282 | "metadata": { 283 | "pycharm": { 284 | "name": "#%% md\n" 285 | } 286 | }, 287 | "source": [ 288 | "This will give 4 images based on the given image. You may allow the model to improvise more by giving `skip_rate` a near-zero value, or a near-one value to force its closeness to the given image. The whole procedure takes about ~1 minutes.\n", 289 | "\n", 290 | "> You could generate upto 8 images via `num_images`. But it will be slower." 291 | ] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "id": "978ccf66", 296 | "metadata": { 297 | "pycharm": { 298 | "name": "#%% md\n" 299 | } 300 | }, 301 | "source": [ 302 | "### Step 3: Select and upscale via SwinIR\n", 303 | "\n", 304 | "Select the image you like the most, and give it a closer look:\n" 305 | ] 306 | }, 307 | { 308 | "cell_type": "code", 309 | "execution_count": null, 310 | "id": "7f00f44e", 311 | "metadata": { 312 | "pycharm": { 313 | "name": "#%%\n" 314 | } 315 | }, 316 | "outputs": [], 317 | "source": [ 318 | "dfav_id = 2\n", 319 | "\n", 320 | "fav = diffused[dfav_id]\n", 321 | "\n", 322 | "fav.display()" 323 | ] 324 | }, 325 | { 326 | "cell_type": "markdown", 327 | "id": "ab54a790", 328 | "metadata": { 329 | "pycharm": { 330 | "name": "#%% md\n" 331 | } 332 | }, 333 | "source": [ 334 | "If not satisfied, you can rerun the last cell in step 2 with this new `fav`.\n", 335 | "\n", 336 | "Finally, submit to the server for the last step: upscaling to 1024 x 1024px.\n", 337 | "\n", 338 | "> This step should be much faster (~10s) as the Flow is designed in unblocked manner." 339 | ] 340 | }, 341 | { 342 | "cell_type": "code", 343 | "execution_count": null, 344 | "id": "b60e8bfc", 345 | "metadata": { 346 | "pycharm": { 347 | "name": "#%%\n" 348 | } 349 | }, 350 | "outputs": [], 351 | "source": [ 352 | "%%time\n", 353 | "\n", 354 | "fav = fav.post(f'{server_url}/upscale')\n", 355 | "fav.display()" 356 | ] 357 | }, 358 | { 359 | "cell_type": "markdown", 360 | "id": "e910e8aa", 361 | "metadata": { 362 | "pycharm": { 363 | "name": "#%%\n" 364 | } 365 | }, 366 | "source": [ 367 | "> 💁♂️ On Google colab this image may render exactly the same size as before. But it is in 1024x1024 already. Right click on the image and copy/save it. You will see.\n", 368 | "\n", 369 | "That's it! It is _the one_.\n", 370 | "\n", 371 | "\n", 372 | "Btw, [DocArray is a powerful and easy-to-use data structure for unstructured data](https://github.com/jina-ai/docarray), that makes all of these possible. It is super productive for data scientists who work in cross-/multi-modal domain. To learn more about DocArray, [please check out the docs](https://docarray.jina.ai)." 373 | ] 374 | } 375 | ], 376 | "metadata": { 377 | "kernelspec": { 378 | "display_name": "Python 3 (ipykernel)", 379 | "language": "python", 380 | "name": "python3" 381 | }, 382 | "language_info": { 383 | "codemirror_mode": { 384 | "name": "ipython", 385 | "version": 3 386 | }, 387 | "file_extension": ".py", 388 | "mimetype": "text/x-python", 389 | "name": "python", 390 | "nbconvert_exporter": "python", 391 | "pygments_lexer": "ipython3", 392 | "version": "3.10.8" 393 | } 394 | }, 395 | "nbformat": 4, 396 | "nbformat_minor": 5 397 | } 398 | -------------------------------------------------------------------------------- /executors/clipseg/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu22.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends wget git python3.10 python3-pip ffmpeg libsm6 libxext6 \ 5 | && ln -sf python3.10 /usr/bin/python \ 6 | && ln -sf pip3 /usr/bin/pip \ 7 | && pip install --upgrade pip \ 8 | && pip install wheel setuptools 9 | 10 | ENV PIP_NO_CACHE_DIR=1 \ 11 | PIP_DISABLE_PIP_VERSION_CHECK=1 12 | 13 | WORKDIR /clipseg 14 | 15 | COPY . /clipseg/executor 16 | 17 | RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 18 | RUN pip install --no-cache-dir --use-deprecated=legacy-resolver -r ./executor/requirements.txt 19 | 20 | RUN git clone --depth=1 https://github.com/timojl/clipseg.git 21 | RUN cd clipseg 22 | RUN pip install . 23 | RUN cd .. 24 | 25 | # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 26 | ENV XLA_PYTHON_CLIENT_ALLOCATOR=platform 27 | 28 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 29 | 30 | ARG USER_ID=1000 31 | ARG GROUP_ID=1000 32 | 33 | ARG USER_NAME=clipseg 34 | ARG GROUP_NAME=clipseg 35 | 36 | RUN groupadd -g ${GROUP_ID} ${USER_NAME} && \ 37 | useradd -l -u ${USER_ID} -g ${USER_NAME} ${GROUP_NAME} | chpasswd && \ 38 | mkdir /home/${USER_NAME} && \ 39 | chown ${USER_NAME}:${GROUP_NAME} /home/${USER_NAME} && \ 40 | chown -R ${USER_NAME}:${GROUP_NAME} /clipseg/ 41 | 42 | USER ${USER_NAME} 43 | 44 | WORKDIR /clipseg/executor 45 | ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] -------------------------------------------------------------------------------- /executors/clipseg/config.yml: -------------------------------------------------------------------------------- 1 | jtype: ClipSegmentation 2 | with: 3 | cache_path: ~/.cache 4 | metas: 5 | py_modules: 6 | - executor.py 7 | -------------------------------------------------------------------------------- /executors/clipseg/executor.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import os 3 | import sys 4 | import shutil 5 | import time 6 | 7 | from PIL import Image, ImageOps 8 | from io import BytesIO 9 | from pathlib import Path 10 | from typing import Dict 11 | from urllib.request import urlopen 12 | 13 | import cv2 14 | import numpy as np 15 | import torch 16 | 17 | from models.clipseg import CLIPDensePredT 18 | from jina import Executor, DocumentArray, Document, requests 19 | from torchvision import transforms 20 | 21 | 22 | class THRESHOLDING_METHODS(str, enum.Enum): 23 | NONE = 'none' # Do not threshold 24 | BINARY = 'binary' 25 | ADAPTIVE_MEAN = 'adaptive_mean' 26 | ADAPTIVE_GAUSSIAN = 'adaptive_gaussian' 27 | 28 | 29 | THRESHOLD_ADAPTIVE_DEFAULT_BLOCK_SIZE = 11 30 | THRESHOLD_ADAPTIVE_DEFAULT_C = 2. 31 | THRESHOLD_BINARY_DEFAULT_STRENGTH_VALUE = 85 32 | 33 | WEIGHT_FOLDER_NAME = 'clipseg_weights' 34 | WEIGHT_URL_DEFAULT = 'https://owncloud.gwdg.de/index.php/s/ioHbRzFx6th32hn/download' 35 | WEIGHT_ZIP_FILE_NAME = 'clipseg_weights.zip' 36 | 37 | class ClipSegmentation(Executor): 38 | model = None 39 | transformation = None 40 | 41 | def __init__(self, 42 | cache_path: str|Path, 43 | weights_url: str=WEIGHT_URL_DEFAULT, 44 | **kwargs, 45 | ): 46 | super().__init__(**kwargs) 47 | 48 | if '~' in str(Path(cache_path)): 49 | cache_path = Path(cache_path).expanduser() 50 | 51 | weights_path = Path('/') 52 | if Path(cache_path).is_dir(): 53 | weights_path = Path(cache_path) / WEIGHT_ZIP_FILE_NAME 54 | else: 55 | # Assume we're working locally, use local home. 56 | weights_path = Path.home() / WEIGHT_ZIP_FILE_NAME 57 | 58 | if not weights_path.is_file(): 59 | response = urlopen(weights_url) 60 | weights_bytes = response.read() 61 | with open(weights_path, 'wb') as w_f: 62 | w_f.write(weights_bytes) 63 | 64 | shutil.unpack_archive(weights_path, Path(cache_path).resolve()) 65 | 66 | model = CLIPDensePredT(version='ViT-B/16', reduce_dim=64) 67 | model.eval() 68 | model.load_state_dict( 69 | torch.load( 70 | f'{cache_path}/{WEIGHT_FOLDER_NAME}/rd64-uni.pth', 71 | map_location=torch.device('cuda'), 72 | ), 73 | strict=False, 74 | ) 75 | self.model = model 76 | 77 | self.transformation = self.default_transformation() 78 | 79 | @staticmethod 80 | def document_to_pil(doc: Document) -> Image: 81 | uri_data = urlopen(doc.uri) 82 | return Image.open(BytesIO(uri_data.read())) 83 | 84 | @staticmethod 85 | def default_transformation() -> transforms.Compose: 86 | return transforms.Compose([ 87 | transforms.ToTensor(), 88 | transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), 89 | transforms.Resize((512, 512)), 90 | ]) 91 | 92 | @requests(on='/segment') 93 | def segment(self, docs: DocumentArray, parameters: Dict, **kwargs): 94 | ''' 95 | Parameters for CLIP segmentation: 96 | 97 | Document.text: Prompt for segmentation. 98 | @parameters.adaptive_thresh_block_size: Adaptive thresholding blocksize, 99 | as integer. 100 | @parameters.adaptive_thresh_c: Adaptive thresholding c value, as float. 101 | @parameters.binary_thresh_strength: Strength of binary thresholding, 102 | lower = more promiscuous. 103 | @parameters.thresholding_type: Type of thresholding, default binary 104 | method. 105 | ''' 106 | request_time = time.time() 107 | 108 | # Parse parameters. 109 | invert = parameters.get('invert', False) 110 | try: 111 | thresholding_type = parameters.get('thresholding_type', 112 | THRESHOLDING_METHODS.BINARY.value) 113 | thresholding_type = THRESHOLDING_METHODS(thresholding_type) 114 | except ValueError: 115 | thresholding_type = THRESHOLDING_METHODS.BINARY 116 | 117 | adaptive_thresh_block_size = None 118 | adaptive_thresh_c = None 119 | binary_thresh_strength = None 120 | if thresholding_type == THRESHOLDING_METHODS.BINARY: 121 | binary_thresh_strength = parameters.get('binary_thresh_strength', 122 | THRESHOLD_BINARY_DEFAULT_STRENGTH_VALUE) 123 | try: 124 | binary_thresh_strength = int(binary_thresh_strength) 125 | except Exception: 126 | pass 127 | if not isinstance(binary_thresh_strength, int): 128 | binary_thresh_strength = THRESHOLD_BINARY_DEFAULT_STRENGTH_VALUE 129 | if thresholding_type == THRESHOLDING_METHODS.ADAPTIVE_MEAN or \ 130 | thresholding_type == THRESHOLDING_METHODS.ADAPTIVE_GAUSSIAN: 131 | adaptive_thresh_block_size = parameters.get( 132 | 'adaptive_thresh_block_size', 133 | THRESHOLD_ADAPTIVE_DEFAULT_BLOCK_SIZE) 134 | try: 135 | adaptive_thresh_block_size = int(adaptive_thresh_block_size) 136 | except Exception: 137 | pass 138 | if not isinstance(adaptive_thresh_block_size, int): 139 | adaptive_thresh_block_size = \ 140 | THRESHOLD_ADAPTIVE_DEFAULT_BLOCK_SIZE 141 | if adaptive_thresh_block_size % 2 != 1: 142 | adaptive_thresh_block_size -= 1 143 | adaptive_thresh_c = parameters.get( 144 | 'adaptive_thresh_c', 145 | THRESHOLD_ADAPTIVE_DEFAULT_C) 146 | if not isinstance(adaptive_thresh_c, float): 147 | adaptive_thresh_c = THRESHOLD_ADAPTIVE_DEFAULT_C 148 | 149 | with torch.no_grad(): 150 | for doc in docs: 151 | prompts = [doc.text] 152 | image_in = self.document_to_pil(doc) 153 | image_in = image_in.convert('RGB') 154 | image_unsqueezed = self.transformation(image_in).unsqueeze(0) 155 | 156 | mask_preds = self.model(image_unsqueezed.repeat(1,1,1,1), 157 | prompts)[0] 158 | sigmoidy = torch.sigmoid(mask_preds[0][0]).cpu().detach().numpy() 159 | mask_as_arr = (sigmoidy * 255 / np.max(sigmoidy)).astype('uint8') 160 | image_mask_init = Image.fromarray(mask_as_arr) 161 | mask_cv = cv2.cvtColor(np.array(image_mask_init), 162 | cv2.COLOR_RGB2BGR) 163 | gray_image = cv2.cvtColor(mask_cv, cv2.COLOR_BGR2GRAY) 164 | 165 | # Fallthrough (THRESHOLDING_METHODS.NONE) is just the gray 166 | # image. 167 | bw_image = gray_image 168 | if thresholding_type == THRESHOLDING_METHODS.BINARY: 169 | (_, bw_image) = cv2.threshold( 170 | gray_image, 171 | binary_thresh_strength, 172 | 255, 173 | cv2.THRESH_BINARY, 174 | ) 175 | if thresholding_type == THRESHOLDING_METHODS.ADAPTIVE_MEAN or \ 176 | thresholding_type == THRESHOLDING_METHODS.ADAPTIVE_GAUSSIAN: 177 | a_method = cv2.ADAPTIVE_THRESH_MEAN_C 178 | if thresholding_type == THRESHOLDING_METHODS.ADAPTIVE_GAUSSIAN: 179 | a_method = cv2.ADAPTIVE_THRESH_GAUSSIAN_C 180 | bw_image = cv2.adaptiveThreshold( 181 | gray_image, 182 | 255, 183 | a_method, 184 | cv2.THRESH_BINARY, 185 | adaptive_thresh_block_size, 186 | adaptive_thresh_c, 187 | ) 188 | 189 | cv2.cvtColor(bw_image, cv2.COLOR_BGR2RGB) 190 | image_mask = Image.fromarray(bw_image) \ 191 | .convert('L') \ 192 | .resize(image_in.size, Image.NEAREST) 193 | # Normally the mask "selects" the query with the alpha layer, 194 | # but if invert is on it selects the opposite. 195 | if not invert: 196 | image_mask = ImageOps.invert(image_mask) 197 | image_rgba = image_in.copy() 198 | image_rgba.putalpha(image_mask) 199 | 200 | buffered = BytesIO() 201 | image_rgba.save(buffered, format='PNG') 202 | _d = Document( 203 | blob=buffered.getvalue(), 204 | mime_type='image/png', 205 | tags={ 206 | 'request': { 207 | 'api': 'segment', 208 | 'adaptive_thresh_block_size': adaptive_thresh_block_size, 209 | 'adaptive_thresh_c': adaptive_thresh_c, 210 | 'binary_thresh_strength': binary_thresh_strength, 211 | 'invert': invert, 212 | 'thresholding_type': thresholding_type.value, 213 | }, 214 | 'text': doc.text, 215 | 'generator': 'clipseg', 216 | 'request_time': request_time, 217 | 'created_time': time.time(), 218 | }, 219 | ).convert_blob_to_datauri() 220 | _d.text = doc.text 221 | doc.matches.append(_d) 222 | -------------------------------------------------------------------------------- /executors/clipseg/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: ClipSegmentation 3 | -------------------------------------------------------------------------------- /executors/clipseg/requirements.txt: -------------------------------------------------------------------------------- 1 | jina 2 | docarray 3 | numpy 4 | scipy 5 | matplotlib 6 | opencv-python 7 | CLIP @ git+https://github.com/openai/CLIP.git 8 | -------------------------------------------------------------------------------- /executors/dalle/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-cudnn8-devel-ubuntu20.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends wget git python3.8 python3-pip \ 5 | && ln -sf python3.8 /usr/bin/python \ 6 | && ln -sf pip3 /usr/bin/pip \ 7 | && pip install --upgrade pip \ 8 | && pip install wheel setuptools 9 | 10 | ENV PIP_NO_CACHE_DIR=1 \ 11 | PIP_DISABLE_PIP_VERSION_CHECK=1 12 | 13 | WORKDIR /workdir 14 | 15 | COPY . /workdir/executor 16 | RUN pip install --no-cache-dir -r ./executor/requirements.txt 17 | RUN pip install jax[cuda11_cudnn82]==0.3.25 -f https://storage.googleapis.com/jax-releases/jax_cuda_releases.html 18 | 19 | # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 20 | ENV XLA_PYTHON_CLIENT_ALLOCATOR=platform 21 | 22 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 23 | 24 | WORKDIR /workdir/executor 25 | ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] -------------------------------------------------------------------------------- /executors/dalle/config.yml: -------------------------------------------------------------------------------- 1 | jtype: DalleGenerator 2 | metas: 3 | py_modules: 4 | - executor/__init__.py 5 | -------------------------------------------------------------------------------- /executors/dalle/executor/__init__.py: -------------------------------------------------------------------------------- 1 | from .dalle import DalleGenerator 2 | -------------------------------------------------------------------------------- /executors/dalle/executor/dalle.py: -------------------------------------------------------------------------------- 1 | import time 2 | from io import BytesIO 3 | from typing import Dict 4 | 5 | from jina import Executor, requests, DocumentArray, Document 6 | 7 | from . import dm_helper 8 | 9 | 10 | class DalleGenerator(Executor): 11 | @requests(on='/') 12 | def generate(self, docs: DocumentArray, parameters: Dict, **kwargs): 13 | 14 | # can be of course larger but to save time and reduce the queue when serving public 15 | num_images = max(1, min(9, int(parameters.get('num_images', 1)))) 16 | request_time = time.time() 17 | for d in docs: 18 | self.logger.info(f'dalle {num_images} [{d.text}]...') 19 | generated_imgs = dm_helper.generate_images(d.text, num_images) 20 | 21 | for img in generated_imgs: 22 | buffered = BytesIO() 23 | img.save(buffered, format='PNG') 24 | _d = Document( 25 | blob=buffered.getvalue(), 26 | mime_type='image/png', 27 | tags={ 28 | 'text': d.text, 29 | 'generator': 'DALLE-mega', 30 | 'request_time': request_time, 31 | 'created_time': time.time(), 32 | }, 33 | ).convert_blob_to_datauri() 34 | _d.text = d.text 35 | d.matches.append(_d) 36 | 37 | self.logger.info(f'done with [{d.text}]') 38 | -------------------------------------------------------------------------------- /executors/dalle/executor/dm_helper.py: -------------------------------------------------------------------------------- 1 | import random 2 | import time 3 | from functools import partial 4 | 5 | import jax 6 | import jax.numpy as jnp 7 | import numpy as np 8 | import wandb 9 | from PIL import Image 10 | from dalle_mini import DalleBart, DalleBartProcessor 11 | from flax.jax_utils import replicate 12 | from flax.training.common_utils import shard_prng_key 13 | from vqgan_jax.modeling_flax_vqgan import VQModel 14 | 15 | # # dalle-mini 16 | # DALLE_MODEL = "dalle-mini/dalle-mini/kvwti2c9:latest" 17 | # dtype = jnp.float32 18 | # 19 | # # dalle-mega 20 | # DALLE_MODEL = 'dalle-mini/dalle-mini/mega-1:latest' 21 | # dtype=jnp.float32 22 | 23 | # dall-mega-fp16 24 | DALLE_MODEL = "dalle-mini/dalle-mini/mega-1-fp16:latest" 25 | dtype = jnp.float16 26 | 27 | DALLE_COMMIT_ID = None 28 | 29 | VQGAN_REPO = 'dalle-mini/vqgan_imagenet_f16_16384' 30 | VQGAN_COMMIT_ID = 'e93a26e7707683d349bf5d5c41c5b0ef69b677a9' 31 | 32 | gen_top_k = None 33 | gen_top_p = 0.9 34 | temperature = None 35 | cond_scale = 10.0 36 | 37 | time.sleep(int(random.random() * 10)) # hack, to avoid concurrent wandb.init, which may cause error when replicas>1 38 | wandb.init(anonymous='must') 39 | 40 | # Load models & tokenizer 41 | 42 | model, params = DalleBart.from_pretrained( 43 | DALLE_MODEL, revision=DALLE_COMMIT_ID, dtype=dtype, _do_init=False 44 | ) 45 | vqgan, vqgan_params = VQModel.from_pretrained( 46 | VQGAN_REPO, revision=VQGAN_COMMIT_ID, dtype=jnp.float32, _do_init=False 47 | ) 48 | 49 | print('device count:', jax.device_count()) 50 | params = replicate(params) 51 | vqgan_params = replicate(vqgan_params) 52 | 53 | 54 | # model inference 55 | @partial(jax.pmap, axis_name='batch', static_broadcasted_argnums=(3, 4, 5, 6)) 56 | def p_generate( 57 | tokenized_prompt, key, params, top_k, top_p, temperature, condition_scale 58 | ): 59 | return model.generate( 60 | **tokenized_prompt, 61 | prng_key=key, 62 | params=params, 63 | top_k=top_k, 64 | top_p=top_p, 65 | temperature=temperature, 66 | condition_scale=condition_scale, 67 | ) 68 | 69 | 70 | # decode images 71 | @partial(jax.pmap, axis_name='batch') 72 | def p_decode(indices, params): 73 | return vqgan.decode_code(indices, params=params) 74 | 75 | 76 | processor = DalleBartProcessor.from_pretrained(DALLE_MODEL, revision=DALLE_COMMIT_ID) 77 | 78 | 79 | def tokenize_prompt(prompt: str): 80 | tokenized_prompt = processor([prompt]) 81 | return replicate(tokenized_prompt) 82 | 83 | 84 | def generate_images(prompt: str, num_predictions: int): 85 | tokenized_prompt = tokenize_prompt(prompt) 86 | 87 | # create a random key 88 | seed = random.randint(0, 2**32 - 1) 89 | key = jax.random.PRNGKey(seed) 90 | 91 | # generate images 92 | images = [] 93 | for i in range(max(1, num_predictions // jax.device_count())): 94 | # get a new key 95 | key, subkey = jax.random.split(key) 96 | 97 | # generate images 98 | encoded_images = p_generate( 99 | tokenized_prompt, 100 | shard_prng_key(subkey), 101 | params, 102 | gen_top_k, 103 | gen_top_p, 104 | temperature, 105 | cond_scale, 106 | ) 107 | 108 | # remove BOS 109 | encoded_images = encoded_images.sequences[..., 1:] 110 | 111 | # decode images 112 | decoded_images = p_decode(encoded_images, vqgan_params) 113 | decoded_images = decoded_images.clip(0.0, 1.0).reshape((-1, 256, 256, 3)) 114 | for img in decoded_images: 115 | images.append(Image.fromarray(np.asarray(img * 255, dtype=np.uint8))) 116 | 117 | return images 118 | -------------------------------------------------------------------------------- /executors/dalle/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: DalleGenerator -------------------------------------------------------------------------------- /executors/dalle/requirements.txt: -------------------------------------------------------------------------------- 1 | jina==3.12.0 2 | docarray==0.19.1 3 | 4 | jax~=0.3.24 5 | flax 6 | 7 | git+https://github.com/openai/CLIP.git 8 | git+https://github.com/huggingface/transformers.git 9 | git+https://github.com/patil-suraj/vqgan-jax.git 10 | git+https://github.com/borisdayma/dalle-mini.git 11 | -------------------------------------------------------------------------------- /executors/glid3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-runtime-ubuntu20.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends wget git python3.8 python3-pip \ 5 | && ln -sf python3.8 /usr/bin/python \ 6 | && ln -sf pip3 /usr/bin/pip \ 7 | && pip install --upgrade pip \ 8 | && pip install wheel setuptools 9 | 10 | ENV PIP_NO_CACHE_DIR=1 \ 11 | PIP_DISABLE_PIP_VERSION_CHECK=1 12 | 13 | WORKDIR /workdir 14 | 15 | RUN git clone --depth=1 https://github.com/CompVis/latent-diffusion.git && \ 16 | git clone --depth=1 https://github.com/jina-ai/glid-3-xl.git && \ 17 | pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 && \ 18 | cd latent-diffusion && pip install --timeout=1000 -e . && cd - && \ 19 | cd glid-3-xl && pip install --timeout=1000 -e . && cd - && \ 20 | cd glid-3-xl && \ 21 | wget -q https://dall-3.com/models/glid-3-xl/bert.pt && \ 22 | wget -q https://dall-3.com/models/glid-3-xl/kl-f8.pt && \ 23 | wget -q https://dall-3.com/models/glid-3-xl/finetune.pt && cd - 24 | 25 | COPY . /workdir/executor 26 | RUN pip install --no-cache-dir -r ./executor/requirements.txt 27 | 28 | WORKDIR /workdir/executor 29 | ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] -------------------------------------------------------------------------------- /executors/glid3/clip_blank_encoding.json: -------------------------------------------------------------------------------- 1 | [ 2 | 0.4267578125, 3 | 0.09423828125, 4 | 0.319091796875, 5 | -0.272705078125, 6 | -0.12139892578125, 7 | -0.0032444000244140625, 8 | -0.314208984375, 9 | 0.41796875, 10 | 0.1824951171875, 11 | -0.464111328125, 12 | 0.1988525390625, 13 | -0.0894775390625, 14 | -0.4609375, 15 | 0.402099609375, 16 | -0.326416015625, 17 | 0.286376953125, 18 | 0.1968994140625, 19 | 0.2425537109375, 20 | 0.005565643310546875, 21 | -0.478759765625, 22 | 0.1920166015625, 23 | -0.54150390625, 24 | 0.09613037109375, 25 | 0.265869140625, 26 | 0.2330322265625, 27 | 0.2034912109375, 28 | 0.16455078125, 29 | -0.1453857421875, 30 | 0.59033203125, 31 | 0.31640625, 32 | -0.0650634765625, 33 | -0.1490478515625, 34 | -0.52197265625, 35 | 0.09747314453125, 36 | -0.6328125, 37 | 0.2423095703125, 38 | 0.76904296875, 39 | -0.34130859375, 40 | -0.294921875, 41 | -0.71728515625, 42 | -0.1629638671875, 43 | -0.2763671875, 44 | 0.349853515625, 45 | -0.12384033203125, 46 | 0.31591796875, 47 | -0.1033935546875, 48 | -0.1754150390625, 49 | 0.0222320556640625, 50 | -0.276123046875, 51 | -0.80126953125, 52 | 0.07586669921875, 53 | -0.1688232421875, 54 | 0.166748046875, 55 | 0.043365478515625, 56 | 0.5517578125, 57 | -0.2442626953125, 58 | 0.1434326171875, 59 | -0.322021484375, 60 | 0.2464599609375, 61 | 0.013641357421875, 62 | -0.369384765625, 63 | -0.0181884765625, 64 | -0.041259765625, 65 | -0.0828857421875, 66 | 0.2039794921875, 67 | 0.8564453125, 68 | 0.1837158203125, 69 | 0.13818359375, 70 | -0.338134765625, 71 | -0.6181640625, 72 | -0.00714874267578125, 73 | 0.389892578125, 74 | -0.01666259765625, 75 | -0.40869140625, 76 | -0.20849609375, 77 | 0.36474609375, 78 | 0.26416015625, 79 | 0.202880859375, 80 | -0.26904296875, 81 | -0.9296875, 82 | -0.205078125, 83 | -0.1761474609375, 84 | 0.30126953125, 85 | -0.094970703125, 86 | -0.035430908203125, 87 | 0.155029296875, 88 | 0.0236663818359375, 89 | -0.033538818359375, 90 | -0.5654296875, 91 | 0.46044921875, 92 | -0.08294677734375, 93 | 0.09405517578125, 94 | -0.2406005859375, 95 | -0.04327392578125, 96 | 0.050079345703125, 97 | -0.2078857421875, 98 | 0.79541015625, 99 | -0.27294921875, 100 | 0.477783203125, 101 | -0.234619140625, 102 | 0.2376708984375, 103 | -0.2392578125, 104 | -0.501953125, 105 | -0.09136962890625, 106 | 0.251220703125, 107 | -0.01557159423828125, 108 | 0.16455078125, 109 | -0.177001953125, 110 | 0.045135498046875, 111 | 0.01027679443359375, 112 | -0.045440673828125, 113 | 0.04107666015625, 114 | -0.070556640625, 115 | 0.325927734375, 116 | 0.11676025390625, 117 | 0.29052734375, 118 | -0.8583984375, 119 | -0.19677734375, 120 | -0.0970458984375, 121 | -0.0728759765625, 122 | -0.28662109375, 123 | -0.213134765625, 124 | 0.01155853271484375, 125 | 0.55859375, 126 | -0.13916015625, 127 | -0.1534423828125, 128 | 0.226318359375, 129 | 0.128173828125, 130 | -0.330322265625, 131 | -0.1881103515625, 132 | -0.05975341796875, 133 | -0.7568359375, 134 | 0.2724609375, 135 | -0.65966796875, 136 | 0.1209716796875, 137 | 0.5615234375, 138 | 0.218017578125, 139 | 0.51953125, 140 | 0.1649169921875, 141 | -0.0517578125, 142 | 0.2022705078125, 143 | -0.08172607421875, 144 | -0.0079498291015625, 145 | -0.2626953125, 146 | 0.0416259765625, 147 | 0.188720703125, 148 | 0.62744140625, 149 | -0.599609375, 150 | -0.7734375, 151 | -0.44580078125, 152 | 0.08416748046875, 153 | -0.58349609375, 154 | 0.283447265625, 155 | -0.4384765625, 156 | 0.57080078125, 157 | -0.223388671875, 158 | 0.273193359375, 159 | 0.35595703125, 160 | 0.1824951171875, 161 | 0.034271240234375, 162 | 0.33447265625, 163 | 0.369384765625, 164 | 0.2498779296875, 165 | 0.2017822265625, 166 | -0.314208984375, 167 | -0.61376953125, 168 | 0.6328125, 169 | -0.00018024444580078125, 170 | 0.487548828125, 171 | -0.39794921875, 172 | 0.95849609375, 173 | -0.1937255859375, 174 | 0.361328125, 175 | -0.0301513671875, 176 | 0.183349609375, 177 | -0.059173583984375, 178 | 0.1392822265625, 179 | -1.455078125, 180 | 0.44287109375, 181 | 0.5126953125, 182 | 1.1103515625, 183 | -0.0845947265625, 184 | -0.2427978515625, 185 | 0.52734375, 186 | -0.00655364990234375, 187 | -0.544921875, 188 | -0.310791015625, 189 | -0.040618896484375, 190 | -0.1861572265625, 191 | -0.27490234375, 192 | -0.57666015625, 193 | -0.1861572265625, 194 | 0.31005859375, 195 | 0.40771484375, 196 | 0.1600341796875, 197 | -2.431640625, 198 | -0.138671875, 199 | 0.490234375, 200 | 0.0030040740966796875, 201 | 0.12548828125, 202 | 0.29736328125, 203 | 0.042999267578125, 204 | -0.1131591796875, 205 | -0.0635986328125, 206 | 0.2493896484375, 207 | -0.268798828125, 208 | -0.599609375, 209 | -0.2376708984375, 210 | 0.492431640625, 211 | 0.02752685546875, 212 | 0.4111328125, 213 | -0.03717041015625, 214 | 0.10552978515625, 215 | 0.24169921875, 216 | 0.294921875, 217 | 0.5185546875, 218 | 0.0855712890625, 219 | 0.0254669189453125, 220 | 0.138671875, 221 | 0.13427734375, 222 | -0.06884765625, 223 | -0.394287109375, 224 | 0.1759033203125, 225 | -0.24560546875, 226 | 0.12469482421875, 227 | 0.367919921875, 228 | -0.1817626953125, 229 | -0.09893798828125, 230 | 0.576171875, 231 | -0.1810302734375, 232 | 0.03875732421875, 233 | 0.496337890625, 234 | 0.09124755859375, 235 | -0.437744140625, 236 | 0.19189453125, 237 | 0.135498046875, 238 | -0.379150390625, 239 | 0.28173828125, 240 | 0.257080078125, 241 | -0.1134033203125, 242 | 0.350830078125, 243 | 0.10089111328125, 244 | 0.482177734375, 245 | 0.6015625, 246 | -0.07183837890625, 247 | 0.67578125, 248 | -0.006755828857421875, 249 | -0.0211334228515625, 250 | 0.4423828125, 251 | 0.373779296875, 252 | 0.151611328125, 253 | 0.1829833984375, 254 | -0.51025390625, 255 | -0.412841796875, 256 | 0.482666015625, 257 | 0.6015625, 258 | 0.023101806640625, 259 | 0.7373046875, 260 | -0.0750732421875, 261 | 0.548828125, 262 | 0.310302734375, 263 | 0.9140625, 264 | -0.1444091796875, 265 | -0.2294921875, 266 | -0.05206298828125, 267 | 0.049163818359375, 268 | -0.2274169921875, 269 | -0.81298828125, 270 | -0.1673583984375, 271 | -0.0038909912109375, 272 | -0.53759765625, 273 | 0.435302734375, 274 | 0.29931640625, 275 | -0.62890625, 276 | 0.031890869140625, 277 | -0.16650390625, 278 | -0.2264404296875, 279 | 0.2166748046875, 280 | -0.094482421875, 281 | -0.0994873046875, 282 | 0.1805419921875, 283 | -0.69140625, 284 | -0.42333984375, 285 | 0.0278167724609375, 286 | 0.127685546875, 287 | -0.2078857421875, 288 | 0.046356201171875, 289 | 0.39306640625, 290 | -2.359375, 291 | 0.1492919921875, 292 | -0.365234375, 293 | -0.125732421875, 294 | -0.76416015625, 295 | -0.093994140625, 296 | -0.1043701171875, 297 | 0.2237548828125, 298 | -0.26123046875, 299 | 0.76904296875, 300 | -0.248046875, 301 | -0.1484375, 302 | -0.341552734375, 303 | -0.224853515625, 304 | 0.02294921875, 305 | -0.1273193359375, 306 | 0.32666015625, 307 | -0.107421875, 308 | -0.10931396484375, 309 | 0.031494140625, 310 | -0.043121337890625, 311 | -0.181640625, 312 | -0.501953125, 313 | -0.07122802734375, 314 | 0.0350341796875, 315 | -0.2139892578125, 316 | 0.059356689453125, 317 | -0.1104736328125, 318 | -12.5390625, 319 | -0.17724609375, 320 | -0.51806640625, 321 | 0.52099609375, 322 | -0.275634765625, 323 | -0.415771484375, 324 | 0.08636474609375, 325 | 0.6279296875, 326 | 0.042327880859375, 327 | 0.91357421875, 328 | -0.7451171875, 329 | -0.353515625, 330 | -0.0435791015625, 331 | -0.34814453125, 332 | 0.1463623046875, 333 | 0.279296875, 334 | -0.006053924560546875, 335 | -0.175048828125, 336 | -0.07208251953125, 337 | 0.28857421875, 338 | 0.0623779296875, 339 | -0.170654296875, 340 | -0.57373046875, 341 | -0.307861328125, 342 | -0.61376953125, 343 | 0.064453125, 344 | 0.10040283203125, 345 | -0.12249755859375, 346 | 0.053375244140625, 347 | -0.0155029296875, 348 | 0.9970703125, 349 | -0.0170745849609375, 350 | 0.08184814453125, 351 | 0.33642578125, 352 | 0.2342529296875, 353 | -0.2318115234375, 354 | -0.08251953125, 355 | -0.39013671875, 356 | 0.048095703125, 357 | -0.298583984375, 358 | 1.205078125, 359 | -0.1558837890625, 360 | -0.28466796875, 361 | -0.54248046875, 362 | -0.0079498291015625, 363 | -0.29638671875, 364 | 0.66357421875, 365 | 0.19921875, 366 | 0.44189453125, 367 | 0.34619140625, 368 | 0.2890625, 369 | 0.20458984375, 370 | 0.12445068359375, 371 | 0.263427734375, 372 | -0.287353515625, 373 | 0.1959228515625, 374 | 0.08648681640625, 375 | -0.07989501953125, 376 | -0.23046875, 377 | -0.048980712890625, 378 | -0.0234527587890625, 379 | 0.0308685302734375, 380 | 0.50927734375, 381 | 0.269287109375, 382 | 0.470947265625, 383 | -0.3515625, 384 | -0.1947021484375, 385 | -0.2296142578125, 386 | 0.1258544921875, 387 | -0.046051025390625, 388 | 0.4296875, 389 | 0.064453125, 390 | 0.272216796875, 391 | 0.072265625, 392 | 0.256103515625, 393 | -0.018341064453125, 394 | -0.011199951171875, 395 | 0.287109375, 396 | -0.187744140625, 397 | -0.2470703125, 398 | 0.0248260498046875, 399 | 0.18798828125, 400 | 0.369140625, 401 | 0.1859130859375, 402 | 0.51318359375, 403 | -0.1571044921875, 404 | 0.24658203125, 405 | 0.418701171875, 406 | 0.91455078125, 407 | 0.432861328125, 408 | -0.140869140625, 409 | -0.0196990966796875, 410 | -0.244140625, 411 | 0.342529296875, 412 | 0.338623046875, 413 | -0.609375, 414 | 0.14404296875, 415 | -1.466796875, 416 | 0.10284423828125, 417 | 0.5791015625, 418 | -0.488525390625, 419 | -0.18310546875, 420 | -0.5390625, 421 | -0.41357421875, 422 | -0.62451171875, 423 | 0.29736328125, 424 | 0.037872314453125, 425 | -0.39453125, 426 | -0.12164306640625, 427 | -0.1346435546875, 428 | -0.375244140625, 429 | -0.0999755859375, 430 | 0.3896484375, 431 | -0.165283203125, 432 | -0.517578125, 433 | 0.06317138671875, 434 | 0.42041015625, 435 | 0.1961669921875, 436 | 0.12164306640625, 437 | 0.56640625, 438 | -0.078369140625, 439 | -0.1783447265625, 440 | -0.350341796875, 441 | 0.08612060546875, 442 | 11.09375, 443 | -0.193115234375, 444 | 0.638671875, 445 | 0.49609375, 446 | -0.498779296875, 447 | 0.16259765625, 448 | 0.32470703125, 449 | 0.5302734375, 450 | 0.49853515625, 451 | -0.65966796875, 452 | -0.94091796875, 453 | -0.041412353515625, 454 | 0.463623046875, 455 | 0.11834716796875, 456 | -0.47900390625, 457 | 0.25244140625, 458 | 0.46826171875, 459 | 0.0777587890625, 460 | -0.05560302734375, 461 | 0.5419921875, 462 | -0.48583984375, 463 | 0.630859375, 464 | 0.277099609375, 465 | 0.313232421875, 466 | -0.09954833984375, 467 | 0.07550048828125, 468 | 0.01261138916015625, 469 | 0.65087890625, 470 | -0.01079559326171875, 471 | -0.077392578125, 472 | 0.11322021484375, 473 | -0.227783203125, 474 | 0.1246337890625, 475 | -0.431884765625, 476 | 0.1763916015625, 477 | -0.60302734375, 478 | 0.2264404296875, 479 | 0.296630859375, 480 | -0.3349609375, 481 | -0.0023040771484375, 482 | -0.1368408203125, 483 | -0.4296875, 484 | 0.03179931640625, 485 | -1.041015625, 486 | -0.849609375, 487 | 0.08282470703125, 488 | 0.62939453125, 489 | -0.326416015625, 490 | 0.18896484375, 491 | 0.40869140625, 492 | 0.05841064453125, 493 | -0.07391357421875, 494 | -0.470458984375, 495 | -0.14501953125, 496 | -0.65087890625, 497 | -0.150634765625, 498 | 0.06378173828125, 499 | -0.7314453125, 500 | 0.06561279296875, 501 | 0.76025390625, 502 | 0.22998046875, 503 | -0.399658203125, 504 | 0.1884765625, 505 | 0.29443359375, 506 | -0.6240234375, 507 | -0.306396484375, 508 | 0.1612548828125, 509 | -0.28955078125, 510 | -0.6435546875, 511 | -0.01000213623046875, 512 | 0.564453125, 513 | -0.0131988525390625, 514 | 0.299072265625, 515 | -0.0295562744140625, 516 | -0.40185546875, 517 | 0.25830078125, 518 | 6.580352783203125e-05, 519 | -0.3115234375, 520 | 0.033935546875, 521 | -0.0880126953125, 522 | 0.223876953125, 523 | -0.328857421875, 524 | -0.11358642578125, 525 | 0.06439208984375, 526 | 0.14013671875, 527 | 0.48095703125, 528 | 0.556640625, 529 | -0.271728515625, 530 | -1.8212890625, 531 | -0.12078857421875, 532 | -0.1739501953125, 533 | -0.91650390625, 534 | -0.447265625, 535 | 0.180419921875, 536 | 0.6494140625, 537 | 0.00035500526428222656, 538 | -0.23095703125, 539 | -0.005664825439453125, 540 | -0.787109375, 541 | -0.2117919921875, 542 | 0.238037109375, 543 | 0.053955078125, 544 | -0.00911712646484375, 545 | 0.09051513671875, 546 | 0.057525634765625, 547 | 0.199462890625, 548 | 0.1065673828125, 549 | -0.037811279296875, 550 | 0.2724609375, 551 | -0.0565185546875, 552 | 0.4453125, 553 | -0.1824951171875, 554 | -0.07171630859375, 555 | 0.1258544921875, 556 | -0.05780029296875, 557 | 0.3310546875, 558 | -0.1885986328125, 559 | 0.223876953125, 560 | -0.11669921875, 561 | -0.09320068359375, 562 | 0.6796875, 563 | 0.463134765625, 564 | -0.1151123046875, 565 | -0.10345458984375, 566 | 0.9521484375, 567 | 0.480224609375, 568 | 0.0926513671875, 569 | 0.2548828125, 570 | 0.12841796875, 571 | 0.353759765625, 572 | 0.53076171875, 573 | -0.4404296875, 574 | -0.52880859375, 575 | -0.1378173828125, 576 | 0.029510498046875, 577 | -0.407958984375, 578 | 0.1527099609375, 579 | -0.1304931640625, 580 | 0.312744140625, 581 | -0.016143798828125, 582 | -0.10784912109375, 583 | -0.0928955078125, 584 | -0.433837890625, 585 | 0.2420654296875, 586 | 0.1614990234375, 587 | -0.5810546875, 588 | 0.0665283203125, 589 | -0.218017578125, 590 | -0.06744384765625, 591 | -0.31689453125, 592 | -0.06787109375, 593 | -0.063720703125, 594 | -0.420654296875, 595 | 0.1513671875, 596 | -0.1632080078125, 597 | 0.75390625, 598 | 0.317138671875, 599 | -0.4560546875, 600 | -0.145263671875, 601 | 0.55615234375, 602 | -0.6171875, 603 | 0.277099609375, 604 | 0.0941162109375, 605 | 0.182861328125, 606 | -0.2030029296875, 607 | 0.38134765625, 608 | 0.1688232421875, 609 | 0.1578369140625, 610 | 0.1190185546875, 611 | 0.347412109375, 612 | 0.3486328125, 613 | -0.359375, 614 | -0.9091796875, 615 | 0.0548095703125, 616 | 0.10174560546875, 617 | -0.3486328125, 618 | 0.264404296875, 619 | -0.798828125, 620 | 0.08587646484375, 621 | 0.08038330078125, 622 | 0.35595703125, 623 | 0.33544921875, 624 | -0.18505859375, 625 | 0.09552001953125, 626 | -0.71923828125, 627 | 0.30908203125, 628 | 0.2337646484375, 629 | -0.20703125, 630 | 0.54150390625, 631 | -0.114501953125, 632 | 0.013885498046875, 633 | 0.1988525390625, 634 | -0.1873779296875, 635 | 0.0638427734375, 636 | -0.62939453125, 637 | -0.09820556640625, 638 | 0.301025390625, 639 | -0.6513671875, 640 | 0.1280517578125, 641 | -0.416015625, 642 | 0.1966552734375, 643 | 0.45263671875, 644 | 0.27490234375, 645 | -0.421630859375, 646 | 1.4736328125, 647 | 0.0106353759765625, 648 | 0.07696533203125, 649 | -0.281494140625, 650 | 0.5302734375, 651 | -0.54345703125, 652 | 0.226806640625, 653 | -0.1661376953125, 654 | -0.211669921875, 655 | 0.042388916015625, 656 | 0.328369140625, 657 | 0.693359375, 658 | 0.2080078125, 659 | -0.451416015625, 660 | 0.036102294921875, 661 | -0.2225341796875, 662 | -0.371826171875, 663 | -0.6337890625, 664 | 0.072998046875, 665 | 0.1712646484375, 666 | 0.28955078125, 667 | -0.045562744140625, 668 | -0.436767578125, 669 | 0.2841796875, 670 | -0.88525390625, 671 | 0.417236328125, 672 | -0.57373046875, 673 | -0.05206298828125, 674 | 0.119384765625, 675 | 0.08599853515625, 676 | -0.10137939453125, 677 | -0.5302734375, 678 | 0.08843994140625, 679 | -0.141845703125, 680 | -0.06201171875, 681 | -0.440673828125, 682 | -0.25830078125, 683 | -0.1640625, 684 | -0.25634765625, 685 | -0.2021484375, 686 | 0.446533203125, 687 | 0.1524658203125, 688 | -0.126708984375, 689 | 0.352783203125, 690 | 0.0272369384765625, 691 | 0.34375, 692 | 0.211181640625, 693 | 0.042327880859375, 694 | -0.07745361328125, 695 | -0.051666259765625, 696 | 0.34765625, 697 | -0.0138702392578125, 698 | -0.1356201171875, 699 | 0.049652099609375, 700 | -0.0168914794921875, 701 | -0.33837890625, 702 | -0.05548095703125, 703 | 0.2218017578125, 704 | 0.07037353515625, 705 | -0.91015625, 706 | 0.240966796875, 707 | 0.06134033203125, 708 | -0.200927734375, 709 | -0.18310546875, 710 | 0.2489013671875, 711 | -0.2152099609375, 712 | -0.131591796875, 713 | 0.1297607421875, 714 | -0.4375, 715 | 0.693359375, 716 | 0.271484375, 717 | 0.05218505859375, 718 | 0.057647705078125, 719 | -0.196533203125, 720 | 0.1334228515625, 721 | 0.304443359375, 722 | -0.11651611328125, 723 | 0.2276611328125, 724 | 0.327880859375, 725 | -0.849609375, 726 | 0.8544921875, 727 | 0.274658203125, 728 | 0.29345703125, 729 | 0.11578369140625, 730 | -0.2430419921875, 731 | 0.195556640625, 732 | 0.195068359375, 733 | 0.279541015625, 734 | 0.33740234375, 735 | -0.49267578125, 736 | 0.890625, 737 | 0.43603515625, 738 | 0.08782958984375, 739 | 0.1571044921875, 740 | 0.323974609375, 741 | 0.60498046875, 742 | 0.57958984375, 743 | 0.404052734375, 744 | 0.0965576171875, 745 | -0.01528167724609375, 746 | 0.515625, 747 | -0.06396484375, 748 | 0.5869140625, 749 | 0.123291015625, 750 | -0.05450439453125, 751 | -0.78369140625, 752 | -0.040252685546875, 753 | -0.00777435302734375, 754 | 0.1846923828125, 755 | 0.2239990234375, 756 | -0.337890625, 757 | -0.322265625, 758 | 0.402099609375, 759 | 0.466552734375, 760 | 0.10711669921875, 761 | -0.391357421875, 762 | 0.098876953125, 763 | -0.6328125, 764 | 0.77685546875, 765 | 0.260986328125, 766 | 0.63916015625, 767 | -0.020294189453125, 768 | -0.267822265625, 769 | 0.18408203125 770 | ] -------------------------------------------------------------------------------- /executors/glid3/config.yml: -------------------------------------------------------------------------------- 1 | jtype: GLID3Diffusion 2 | with: 3 | glid3_path: ../glid-3-xl 4 | steps: 100 5 | metas: 6 | py_modules: 7 | - executor.py -------------------------------------------------------------------------------- /executors/glid3/executor.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import os 3 | import shutil 4 | import tempfile 5 | import time 6 | from typing import Dict 7 | import json 8 | 9 | from jina import Executor, DocumentArray, Document, requests 10 | 11 | 12 | class GLID3Diffusion(Executor): 13 | def __init__(self, glid3_path: str, steps: int = 100, **kwargs): 14 | super().__init__(**kwargs) 15 | os.environ['GLID_MODEL_PATH'] = glid3_path 16 | os.environ['GLID3_STEPS'] = str(steps) 17 | self.diffusion_steps = steps 18 | from dalle_flow_glid3.model import static_args 19 | from dalle_flow_glid3.blank_encoding import generate_blank_embeddings 20 | 21 | assert static_args 22 | 23 | self.logger.info('Generating blank embeddings') 24 | with open(os.path.join(os.path.dirname(__file__), 'clip_blank_encoding.json')) as f: 25 | self.blank_bert_embedding, self.blank_clip_embedding = generate_blank_embeddings('a', json.load(f)) 26 | 27 | def run_glid3(self, d: Document, text: str, skip_rate: float, num_images: int): 28 | request_time = time.time() 29 | 30 | with tempfile.NamedTemporaryFile( 31 | suffix='.png', 32 | ) as f_in: 33 | self.logger.info(f'diffusion [{text}] ...') 34 | from dalle_flow_glid3.cli_parser import parser 35 | 36 | kw = { 37 | 'init_image': f_in.name if d.uri else None, 38 | 'skip_timesteps': int(self.diffusion_steps * skip_rate) if d.uri else 0, 39 | 'steps': self.diffusion_steps, 40 | 'batch_size': num_images, 41 | 'num_batches': 1, 42 | 'text': f'"{text}"', 43 | 'output_path': d.id 44 | } 45 | kw_str_list = [] 46 | for k, v in kw.items(): 47 | if v is not None: 48 | kw_str_list.extend([f'--{k}', str(v)]) 49 | if d.uri: 50 | d.save_uri_to_file(f_in.name) 51 | 52 | from dalle_flow_glid3.sample import do_run 53 | 54 | args = parser.parse_args(kw_str_list) 55 | do_run(args, d.embedding, self.blank_bert_embedding, self.blank_clip_embedding) 56 | 57 | kw.update({ 58 | 'generator': 'GLID3-XL', 59 | 'request_time': request_time, 60 | 'created_time': time.time(), 61 | }) 62 | for f in glob.glob(f'{args.output_path}/*.png'): 63 | _d = Document(uri=f, text=d.text, tags=kw).convert_uri_to_datauri() 64 | d.matches.append(_d) 65 | 66 | # remove all outputs 67 | shutil.rmtree(args.output_path, ignore_errors=True) 68 | 69 | self.logger.info(f'done with [{text}]!') 70 | 71 | @requests(on='/') 72 | def diffusion(self, docs: DocumentArray, parameters: Dict, **kwargs): 73 | skip_rate = float(parameters.get('skip_rate', 0.5)) 74 | num_images = max(1, min(9, int(parameters.get('num_images', 1)))) 75 | for d in docs: 76 | self.run_glid3(d, d.text, skip_rate=skip_rate, num_images=num_images) 77 | -------------------------------------------------------------------------------- /executors/glid3/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: GLID3Diffusion -------------------------------------------------------------------------------- /executors/glid3/requirements.txt: -------------------------------------------------------------------------------- 1 | jina==3.12.0 2 | docarray==0.19.1 3 | 4 | dalle_pytorch 5 | -------------------------------------------------------------------------------- /executors/realesrgan/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu22.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends wget git python3.10 python3-pip ffmpeg libsm6 libxext6 \ 5 | && ln -sf python3.10 /usr/bin/python \ 6 | && ln -sf pip3 /usr/bin/pip \ 7 | && pip install --upgrade pip \ 8 | && pip install wheel setuptools 9 | 10 | ENV PIP_NO_CACHE_DIR=1 \ 11 | PIP_DISABLE_PIP_VERSION_CHECK=1 12 | 13 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 14 | 15 | WORKDIR /realesrgan 16 | 17 | COPY . /realesrgan/executor 18 | 19 | RUN pip install --no-cache-dir --use-deprecated=legacy-resolver -r ./executor/requirements.txt 20 | RUN pip uninstall -y torch torchvision torchaudio 21 | RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 22 | RUN pip install realesrgan 23 | 24 | ARG USER_ID=1000 25 | ARG GROUP_ID=1000 26 | 27 | ARG USER_NAME=realesrgan 28 | ARG GROUP_NAME=realesrgan 29 | 30 | RUN groupadd -g ${GROUP_ID} ${USER_NAME} && \ 31 | useradd -l -u ${USER_ID} -g ${USER_NAME} ${GROUP_NAME} | chpasswd && \ 32 | mkdir /home/${USER_NAME} && \ 33 | chown ${USER_NAME}:${GROUP_NAME} /home/${USER_NAME} && \ 34 | chown -R ${USER_NAME}:${GROUP_NAME} /realesrgan/ 35 | 36 | USER ${USER_NAME} 37 | 38 | # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 39 | ENV XLA_PYTHON_CLIENT_ALLOCATOR=platform 40 | 41 | WORKDIR /realesrgan/executor 42 | ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] -------------------------------------------------------------------------------- /executors/realesrgan/config.yml: -------------------------------------------------------------------------------- 1 | jtype: RealESRGANUpscaler 2 | with: 3 | cache_path: ~/.cache 4 | models_to_load: 5 | - RealESRGAN_x4plus 6 | - RealESRGAN_x4plus_anime_6B 7 | pre_pad: 10 8 | tile: 0 9 | tile_pad: 10 10 | use_half: True 11 | 12 | metas: 13 | py_modules: 14 | - executor.py 15 | -------------------------------------------------------------------------------- /executors/realesrgan/executor.py: -------------------------------------------------------------------------------- 1 | import enum 2 | import time 3 | 4 | from PIL import Image 5 | from io import BytesIO 6 | from pathlib import Path 7 | from typing import Any, Dict, List, Union 8 | from urllib.request import urlopen 9 | 10 | import numpy as np 11 | import torch 12 | import cv2 13 | 14 | from basicsr.archs.rrdbnet_arch import RRDBNet 15 | from basicsr.utils.download_util import load_file_from_url 16 | from gfpgan import GFPGANer 17 | from jina import Executor, DocumentArray, Document, requests 18 | from realesrgan import RealESRGANer 19 | from realesrgan.archs.srvgg_arch import SRVGGNetCompact 20 | 21 | 22 | GFPGAN_MODEL_NAME = "GFPGANv1.4.pth" 23 | GFPGAN_MODEL_URL = ( 24 | "https://github.com/TencentARC/GFPGAN/releases/download/v1.3.0/GFPGANv1.4.pth" 25 | ) 26 | 27 | 28 | class RESRGAN_MODELS(str, enum.Enum): 29 | RealESRGAN_x4plus = "RealESRGAN_x4plus" 30 | RealESRNet_x4plus = "RealESRNet_x4plus" 31 | RealESRGAN_x4plus_anime_6B = "RealESRGAN_x4plus_anime_6B" 32 | RealESRGAN_x2plus = "RealESRGAN_x2plus" 33 | RealESR_animevideov3 = "realesr-animevideov3" 34 | RealESR_general_x4v3 = "realesr-general-x4v3" 35 | 36 | 37 | class RealESRGANUpscaler(Executor): 38 | """ 39 | This is a module that provides access to the RealESRGAN models and API which 40 | upscale images and video. It also supports using GFPGAN to fix faces within 41 | photographic images. 42 | 43 | The module source code is available at: 44 | https://github.com/xinntao/Real-ESRGAN 45 | 46 | All models that are included in the config.yml file will be available for 47 | upscaling. 48 | """ 49 | cache_path: Union[str, Path] = '' 50 | gfpgan_weights_path: Union[str, Path] = '' 51 | models_to_load: List[str] = [] 52 | pre_pad = 10 53 | tile = 0 54 | tile_pad = 10 55 | use_half = True 56 | 57 | def __init__( 58 | self, 59 | cache_path: Union[str, Path], 60 | models_to_load: List[str], 61 | pre_pad: int = 10, 62 | tile: int = 0, 63 | tile_pad: int = 10, 64 | use_half: bool = True, 65 | **kwargs, 66 | ): 67 | """ 68 | Args: 69 | 70 | cache_path: path to the cache directory. 71 | models_to_load: list[str], list of the models to load into memory. 72 | 73 | tile (int): As too large images result in the out of GPU memory issue, 74 | so this tile option will first crop input images into tiles, and 75 | then process each of them. Finally, they will be merged into one 76 | image. 77 | 0 denotes for do not use tile. Default: 0. 78 | tile_pad (int): The pad size for each tile, to remove border artifacts. 79 | Default: 10. 80 | pre_pad (int): Pad the input images to avoid border artifacts. 81 | Default: 10. 82 | half (float): Whether to use half precision during inference. 83 | Default: True. 84 | """ 85 | super().__init__(**kwargs) 86 | if "~" in str(Path(cache_path)): 87 | cache_path = Path(cache_path).expanduser() 88 | 89 | # Download/find weights for GFPGAN. 90 | gfpgan_weights_path = Path.home() / str(GFPGAN_MODEL_NAME + ".pth") 91 | if Path(cache_path).is_dir(): 92 | gfpgan_weights_path = Path(cache_path) / str(GFPGAN_MODEL_NAME + ".pth") 93 | 94 | if not gfpgan_weights_path.is_file(): 95 | # Assume we're working locally, use local home. 96 | gfpgan_weights_dir = Path.home() 97 | gfpgan_weights_path = Path.home() / str(GFPGAN_MODEL_NAME + ".pth") 98 | # weights_path will be updated 99 | gfpgan_weights_path = load_file_from_url( 100 | url=GFPGAN_MODEL_URL, 101 | model_dir=str(gfpgan_weights_dir.absolute()), 102 | progress=True, 103 | file_name=None, 104 | ) 105 | 106 | self.cache_path = cache_path 107 | self.gfpgan_weights_path = gfpgan_weights_path 108 | self.models_to_load = models_to_load 109 | self.pre_pad = pre_pad 110 | self.tile = tile 111 | self.tile_pad = tile_pad 112 | self.use_half = use_half 113 | 114 | def load_model(self) -> Dict[str, Any]: 115 | ''' 116 | return a dictionary organized as: 117 | { 118 | model_name: { 119 | 'name': str, 120 | 'netscale': int, (scaling strength eg 4=4x) 121 | 'model': initialized RealESRGAN model, 122 | 'model_face_fix': initialized GFPGAN model, [optional, non-anime only] 123 | } 124 | ''' 125 | def gfpgan_wrapper(model_upscaler: Any, outscale: int): 126 | return GFPGANer( 127 | model_path=str(self.gfpgan_weights_path.absolute()) 128 | if isinstance(self.gfpgan_weights_path, Path) 129 | else self.gfpgan_weights_path, 130 | upscale=outscale, 131 | arch="clean", 132 | channel_multiplier=2, 133 | bg_upsampler=model_upscaler, 134 | ) 135 | 136 | resrgan_models: Dict[str, Any] = {} 137 | for model_name in self.models_to_load: 138 | model_type = None 139 | try: 140 | model_type = RESRGAN_MODELS(model_name) 141 | except ValueError: 142 | raise ValueError( 143 | f"Unknown model name '{model_name}', " 144 | + "please ensure all models in models_to_load configuration " 145 | + "option are valid" 146 | ) 147 | 148 | model = None 149 | netscale = 4 150 | file_url = [] 151 | if model_type == RESRGAN_MODELS.RealESRGAN_x4plus: # x4 RRDBNet model 152 | model = RRDBNet( 153 | num_in_ch=3, 154 | num_out_ch=3, 155 | num_feat=64, 156 | num_block=23, 157 | num_grow_ch=32, 158 | scale=4, 159 | ) 160 | netscale = 4 161 | file_url = [ 162 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.0/RealESRGAN_x4plus.pth" 163 | ] 164 | if model_type == RESRGAN_MODELS.RealESRNet_x4plus: # x4 RRDBNet model 165 | model = RRDBNet( 166 | num_in_ch=3, 167 | num_out_ch=3, 168 | num_feat=64, 169 | num_block=23, 170 | num_grow_ch=32, 171 | scale=4, 172 | ) 173 | netscale = 4 174 | file_url = [ 175 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.1.1/RealESRNet_x4plus.pth" 176 | ] 177 | if ( 178 | model_type == RESRGAN_MODELS.RealESRGAN_x4plus_anime_6B 179 | ): # x4 RRDBNet model with 6 blocks 180 | model = RRDBNet( 181 | num_in_ch=3, 182 | num_out_ch=3, 183 | num_feat=64, 184 | num_block=6, 185 | num_grow_ch=32, 186 | scale=4, 187 | ) 188 | netscale = 4 189 | file_url = [ 190 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.2.4/RealESRGAN_x4plus_anime_6B.pth" 191 | ] 192 | if model_type == RESRGAN_MODELS.RealESRGAN_x2plus: # x2 RRDBNet model 193 | model = RRDBNet( 194 | num_in_ch=3, 195 | num_out_ch=3, 196 | num_feat=64, 197 | num_block=23, 198 | num_grow_ch=32, 199 | scale=2, 200 | ) 201 | netscale = 2 202 | file_url = [ 203 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.1/RealESRGAN_x2plus.pth" 204 | ] 205 | if ( 206 | model_type == RESRGAN_MODELS.RealESR_animevideov3 207 | ): # x4 VGG-style model (XS size) 208 | model = SRVGGNetCompact( 209 | num_in_ch=3, 210 | num_out_ch=3, 211 | num_feat=64, 212 | num_conv=16, 213 | upscale=4, 214 | act_type="prelu", 215 | ) 216 | netscale = 4 217 | file_url = [ 218 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-animevideov3.pth" 219 | ] 220 | if ( 221 | model_type == RESRGAN_MODELS.RealESR_general_x4v3 222 | ): # x4 VGG-style model (S size) 223 | model = SRVGGNetCompact( 224 | num_in_ch=3, 225 | num_out_ch=3, 226 | num_feat=64, 227 | num_conv=32, 228 | upscale=4, 229 | act_type="prelu", 230 | ) 231 | netscale = 4 232 | file_url = [ 233 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-wdn-x4v3.pth", 234 | "https://github.com/xinntao/Real-ESRGAN/releases/download/v0.2.5.0/realesr-general-x4v3.pth", 235 | ] 236 | 237 | # determine model paths 238 | weights_path = Path.home() / str(model_name + ".pth") 239 | if Path(self.cache_path).is_dir(): 240 | weights_path = Path(self.cache_path) / str(model_name + ".pth") 241 | 242 | if not weights_path.is_file(): 243 | # Assume we're working locally, use local home. 244 | weights_dir = Path.home() 245 | weights_path = Path.home() / str(model_name + ".pth") 246 | for url in file_url: 247 | # weights_path will be updated 248 | weights_path = load_file_from_url( 249 | url=url, 250 | model_dir=str(weights_dir.absolute()), 251 | progress=True, 252 | file_name=None, 253 | ) 254 | 255 | # restorer 256 | upsampler = RealESRGANer( 257 | scale=netscale, 258 | model_path=str(weights_path.absolute()) 259 | if isinstance(weights_path, Path) 260 | else weights_path, 261 | model=model, 262 | tile=self.tile, 263 | tile_pad=self.tile_pad, 264 | pre_pad=self.pre_pad, 265 | half=self.use_half, 266 | ) 267 | 268 | model_face_fix = None 269 | if model_type != RESRGAN_MODELS.RealESRGAN_x4plus_anime_6B: 270 | model_face_fix = gfpgan_wrapper(upsampler, netscale) 271 | 272 | resrgan_models[model_name] = { 273 | "name": model_name, 274 | "netscale": netscale, 275 | "model": upsampler, 276 | "model_face_fix": model_face_fix, 277 | } 278 | 279 | return resrgan_models 280 | 281 | def document_to_pil(self, doc): 282 | uri_data = urlopen(doc.uri) 283 | return Image.open(BytesIO(uri_data.read())) 284 | 285 | @requests(on="/realesrgan") 286 | def realesrgan(self, docs: DocumentArray, parameters: Dict, **kwargs): 287 | """ 288 | Upscale using RealESRGAN, with or without face fix. 289 | 290 | @parameters.face_enhance: Whether or not to attempt to fix a human face. 291 | Not applicable to anime model. bool. 292 | @parameters.model_name: Which model to use, see RESRGAN_MODELS enum. 293 | str. 294 | """ 295 | request_time = time.time() 296 | resrgan_models = self.load_model() 297 | 298 | face_enhance = parameters.get("face_enhance", False) 299 | model_name = parameters.get( 300 | "model_name", list(resrgan_models.values())[0]["name"] 301 | ) 302 | 303 | for doc in docs: 304 | img = self.document_to_pil(doc) 305 | img_arr = np.asarray(img) 306 | img_arr = cv2.cvtColor(img_arr, cv2.COLOR_RGB2BGR) 307 | 308 | model_dict = resrgan_models.get(model_name, None) 309 | if model_dict is None: 310 | raise ValueError(f"Unknown RealESRGAN upscaler specified: {model_name}") 311 | upsampler = model_dict.get("model", None) 312 | face_enhancer = model_dict.get("model_face_fix", None) 313 | if face_enhance is True and face_enhancer is not None: 314 | _, _, output = face_enhancer.enhance( 315 | img_arr, 316 | has_aligned=False, 317 | only_center_face=False, 318 | paste_back=True, 319 | ) 320 | else: 321 | output, _ = upsampler.enhance(img_arr, model_dict["netscale"]) 322 | output = cv2.cvtColor(output, cv2.COLOR_BGR2RGB) 323 | image_big = Image.fromarray(output) 324 | 325 | buffered = BytesIO() 326 | image_big.save(buffered, format="PNG") 327 | _d = Document( 328 | blob=buffered.getvalue(), 329 | mime_type="image/png", 330 | tags={ 331 | "request": { 332 | "api": "realesrgan", 333 | "face_enhance": face_enhance, 334 | "model_name": model_name, 335 | }, 336 | "text": doc.text, 337 | "generator": "realesrgan", 338 | "request_time": request_time, 339 | "created_time": time.time(), 340 | }, 341 | ).convert_blob_to_datauri() 342 | _d.text = doc.text 343 | doc.matches.append(_d) 344 | 345 | torch.cuda.empty_cache() 346 | -------------------------------------------------------------------------------- /executors/realesrgan/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: RealESRGANUpscaler 3 | -------------------------------------------------------------------------------- /executors/realesrgan/requirements.txt: -------------------------------------------------------------------------------- 1 | jina 2 | docarray 3 | scipy 4 | matplotlib 5 | opencv-python 6 | basicsr>=1.4.2 7 | facexlib>=0.2.5 8 | gfpgan>=1.3.5 9 | numpy 10 | Pillow 11 | tqdm -------------------------------------------------------------------------------- /executors/stable/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.7.0-cudnn8-devel-ubuntu22.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends wget git python3.10 python3-pip \ 5 | && ln -sf python3.10 /usr/bin/python \ 6 | && ln -sf pip3 /usr/bin/pip \ 7 | && pip install --upgrade pip \ 8 | && pip install wheel setuptools 9 | 10 | ENV PIP_NO_CACHE_DIR=1 \ 11 | PIP_DISABLE_PIP_VERSION_CHECK=1 12 | 13 | WORKDIR /workdir 14 | 15 | COPY . /workdir/executor 16 | RUN pip install --no-cache-dir --use-deprecated=legacy-resolver -r ./executor/requirements.txt 17 | RUN pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu116 18 | 19 | # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 20 | ENV XLA_PYTHON_CLIENT_ALLOCATOR=platform 21 | 22 | ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64 23 | 24 | WORKDIR /workdir/executor 25 | ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] -------------------------------------------------------------------------------- /executors/stable/config.yml: -------------------------------------------------------------------------------- 1 | jtype: StableDiffusionGenerator 2 | with: 3 | batch_size: 4 4 | height: 512 5 | max_n_subprompts: 8 6 | max_resolution: 589824 # 768x768 7 | n_iter: 1 8 | use_half: True 9 | weights_path: ../stable-diffusion/models/ldm/stable-diffusion-v1/model.ckpt 10 | width: 512 11 | metas: 12 | py_modules: 13 | - executor.py -------------------------------------------------------------------------------- /executors/stable/executor.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import time 3 | import torch 4 | 5 | from io import BytesIO 6 | from operator import itemgetter 7 | from random import randint 8 | from typing import Dict, Optional 9 | from urllib.request import urlopen 10 | 11 | import numpy as np 12 | 13 | from PIL import Image 14 | from stable_inference import StableDiffusionInference 15 | from stable_inference.util import ( 16 | combine_weighted_subprompts, 17 | slerp, 18 | ) 19 | 20 | from tqdm import trange 21 | 22 | from jina import Executor, DocumentArray, Document, requests 23 | 24 | 25 | INPAINTING_CONFIG_NAME = 'v1-inpainting.yaml' 26 | K_DIFF_SAMPLERS = {'k_lms', 'dpm2', 'dpm2_ancestral', 'heun', 27 | 'euler', 'euler_ancestral', 'dpm_fast', 'dpm_adaptive', 28 | 'dpmpp_2s_ancestral', 'dpmpp_2m'} 29 | 30 | 31 | MAX_STEPS = 250 32 | MIN_HEIGHT = 384 33 | MIN_WIDTH = 384 34 | 35 | 36 | def document_to_pil(doc): 37 | uri_data = urlopen(doc.uri) 38 | return Image.open(BytesIO(uri_data.read())) 39 | 40 | 41 | class StableDiffusionGenerator(Executor): 42 | ''' 43 | Executor generator for all stable diffusion API paths. 44 | ''' 45 | batch_size = 4 46 | stable_diffusion_module = None 47 | 48 | def __init__(self, 49 | batch_size: int=4, 50 | height: int=512, 51 | max_n_subprompts=8, 52 | max_resolution=589824, 53 | n_iter: int=1, 54 | use_half: bool=False, 55 | weights_path='', 56 | width: int=512, 57 | config_path: Optional[str]=None, 58 | **kwargs, 59 | ): 60 | ''' 61 | @batch_size: The number of images to create at the same time. It only 62 | slightly speeds up inference while dramatically increasing memory 63 | usage. 64 | @height: Default height of image in pixels. 65 | @max_n_subprompts: Maximum number of subprompts you can add to an image 66 | in the denoising step. More subprompts = slower denoising. 67 | @max_resolution: The maximum resolution for images in pixels, to keep 68 | your GPU from OOMing in server applications. 69 | @n_iter: Default number of iterations for the sampler. 70 | @use_half: Sample with FP16 instead of FP32. Saves some memory for 71 | approximately the same results. 72 | @weights_path: Location of the Stable Diffusion weights checkpoint file. 73 | @width: Default width of image in pixels. 74 | @config_path: Location for the YAML configuration file for the model. 75 | ''' 76 | super().__init__(**kwargs) 77 | self.batch_size = batch_size 78 | self.stable_diffusion_module = StableDiffusionInference( 79 | checkpoint_loc=weights_path, 80 | height=height, 81 | max_n_subprompts=max_n_subprompts, 82 | max_resolution=max_resolution, 83 | n_iter=n_iter, 84 | use_half=use_half, 85 | width=width, 86 | config_loc=config_path, 87 | ) 88 | 89 | def _h_and_w_from_parameters(self, parameters, opt): 90 | height = parameters.get('height', opt.height) 91 | if height is not None: 92 | height = int(height) 93 | else: 94 | height = opt.height 95 | width = parameters.get('width', opt.width) 96 | if width is not None: 97 | width = int(width) 98 | else: 99 | width = opt.width 100 | 101 | return height, width 102 | 103 | @requests(on='/') 104 | def txt2img(self, docs: DocumentArray, parameters: Dict, **kwargs): 105 | request_time = time.time() 106 | 107 | # Default options for inherence engine. 108 | opt = self.stable_diffusion_module.opt 109 | 110 | sampler = parameters.get('sampler', 'k_lms') 111 | if sampler not in K_DIFF_SAMPLERS: 112 | raise ValueError(f'sampler must be in {K_DIFF_SAMPLERS}, got {sampler}') 113 | scale = parameters.get('scale', opt.scale) 114 | noiser = parameters.get('noiser', None) 115 | num_images = max(1, min(8, int(parameters.get('num_images', 1)))) 116 | seed = int(parameters.get('seed', randint(0, 2 ** 32 - 1))) 117 | steps = min(int(parameters.get('steps', opt.ddim_steps)), MAX_STEPS) 118 | height, width = self._h_and_w_from_parameters(parameters, opt) 119 | 120 | # If the number of samples we have is more than would currently be 121 | # given for batch_size * n_iter, increase n_iter to yield more images. 122 | n_samples = self.batch_size 123 | n_iter = opt.n_iter 124 | if num_images < n_samples: 125 | n_samples = num_images 126 | if num_images // n_samples > n_iter: 127 | n_iter = num_images // n_samples 128 | 129 | for d in docs: 130 | batch_size = n_samples 131 | prompt = d.text 132 | assert prompt is not None 133 | 134 | self.logger.info(f'stable diffusion start {num_images} images, prompt "{prompt}"...') 135 | for i in trange(n_iter, desc="Sampling"): 136 | samples, extra_data = self.stable_diffusion_module.sample( 137 | prompt, 138 | batch_size, 139 | sampler, 140 | seed + i, 141 | steps, 142 | height=height, 143 | noiser=noiser, 144 | scale=scale, 145 | width=width, 146 | ) 147 | 148 | ( 149 | conditioning, 150 | images, 151 | ) = itemgetter('conditioning', 'images')(extra_data) 152 | 153 | image_conditioning = None 154 | if isinstance(conditioning, dict): 155 | image_conditioning = conditioning['c_concat'] 156 | conditioning = conditioning['c_crossattn'] 157 | 158 | for img in images: 159 | buffered = BytesIO() 160 | img.save(buffered, format='PNG') 161 | 162 | samples_buffer = BytesIO() 163 | torch.save(samples, samples_buffer) 164 | samples_buffer.seek(0) 165 | 166 | if image_conditioning is not None: 167 | image_conditioning_buffer = BytesIO() 168 | torch.save(image_conditioning, image_conditioning_buffer) 169 | image_conditioning_buffer.seek(0) 170 | 171 | _d = Document( 172 | embedding=conditioning, 173 | blob=buffered.getvalue(), 174 | mime_type='image/png', 175 | tags={ 176 | 'latent_repr': base64.b64encode( 177 | samples_buffer.getvalue()).decode(), 178 | 'image_conditioning': base64.b64encode( 179 | image_conditioning_buffer.getvalue()).decode() 180 | if image_conditioning is not None else None, 181 | 'request': { 182 | 'api': 'txt2img', 183 | 'height': height, 184 | 'noiser': noiser, 185 | 'num_images': num_images, 186 | 'sampler': sampler, 187 | 'scale': scale, 188 | 'seed': seed, 189 | 'steps': steps, 190 | 'width': width, 191 | }, 192 | 'text': prompt, 193 | 'generator': 'stable-diffusion', 194 | 'request_time': request_time, 195 | 'created_time': time.time(), 196 | }, 197 | ).convert_blob_to_datauri() 198 | _d.text = prompt 199 | d.matches.append(_d) 200 | 201 | torch.cuda.empty_cache() 202 | 203 | @requests(on='/stablediffuse') 204 | def stablediffuse(self, docs: DocumentArray, parameters: Dict, **kwargs): 205 | ''' 206 | Called "img2img" in the scripts of the stable-diffusion repo. 207 | ''' 208 | request_time = time.time() 209 | 210 | # Default options for inherence engine. 211 | opt = self.stable_diffusion_module.opt 212 | 213 | latentless = parameters.get('latentless', False) 214 | noiser = parameters.get('noiser', None) 215 | num_images = max(1, min(8, int(parameters.get('num_images', 1)))) 216 | prompt_override = parameters.get('prompt', None) 217 | sampler = parameters.get('sampler', 'k_lms') 218 | scale = parameters.get('scale', opt.scale) 219 | seed = int(parameters.get('seed', randint(0, 2 ** 32 - 1))) 220 | strength = parameters.get('strength', 0.75) 221 | 222 | if sampler not in K_DIFF_SAMPLERS: 223 | raise ValueError(f'sampler must be in {K_DIFF_SAMPLERS}, got {sampler}') 224 | 225 | steps = min(int(parameters.get('steps', opt.ddim_steps)), MAX_STEPS) 226 | 227 | # If the number of samples we have is more than would currently be 228 | # given for n_samples * n_iter, increase n_iter to yield more images. 229 | n_samples = self.batch_size 230 | n_iter = opt.n_iter 231 | if num_images < n_samples: 232 | n_samples = num_images 233 | if num_images // n_samples > n_iter: 234 | n_iter = num_images // n_samples 235 | 236 | assert 0. < strength < 1., 'can only work with strength in (0.0, 1.0)' 237 | 238 | for d in docs: 239 | batch_size = n_samples 240 | prompt = d.text 241 | if prompt_override is not None: 242 | prompt = prompt_override 243 | assert prompt is not None 244 | 245 | for i in trange(n_iter, desc="Sampling"): 246 | samples, extra_data = self.stable_diffusion_module.sample( 247 | prompt, 248 | batch_size, 249 | sampler, 250 | seed + i, 251 | steps, 252 | init_pil_image=document_to_pil(d), 253 | init_pil_image_as_random_latent=latentless, 254 | noiser=noiser, 255 | scale=scale, 256 | strength=strength, 257 | ) 258 | 259 | ( 260 | conditioning, 261 | images, 262 | ) = itemgetter('conditioning', 'images')(extra_data) 263 | 264 | image_conditioning = None 265 | if isinstance(conditioning, dict): 266 | image_conditioning = conditioning['c_concat'] 267 | conditioning = conditioning['c_crossattn'] 268 | 269 | for img in images: 270 | buffered = BytesIO() 271 | img.save(buffered, format='PNG') 272 | 273 | samples_buffer = BytesIO() 274 | torch.save(samples, samples_buffer) 275 | samples_buffer.seek(0) 276 | 277 | if image_conditioning is not None: 278 | image_conditioning_buffer = BytesIO() 279 | torch.save(image_conditioning, image_conditioning_buffer) 280 | image_conditioning_buffer.seek(0) 281 | 282 | _d = Document( 283 | embedding=conditioning, 284 | blob=buffered.getvalue(), 285 | mime_type='image/png', 286 | tags={ 287 | 'latent_repr': base64.b64encode( 288 | samples_buffer.getvalue()).decode(), 289 | 'image_conditioning': base64.b64encode( 290 | image_conditioning_buffer.getvalue()).decode() 291 | if image_conditioning is not None else None, 292 | 'request': { 293 | 'api': 'stablediffuse', 294 | 'latentless': latentless, 295 | 'noiser': noiser, 296 | 'num_images': num_images, 297 | 'sampler': sampler, 298 | 'scale': scale, 299 | 'seed': seed, 300 | 'steps': steps, 301 | 'strength': strength, 302 | }, 303 | 'text': prompt, 304 | 'generator': 'stable-diffusion', 305 | 'request_time': request_time, 306 | 'created_time': time.time(), 307 | }, 308 | ).convert_blob_to_datauri() 309 | _d.text = prompt 310 | d.matches.append(_d) 311 | 312 | torch.cuda.empty_cache() 313 | 314 | @requests(on='/stableinterpolate') 315 | def stableinterpolate(self, docs: DocumentArray, parameters: Dict, **kwargs): 316 | ''' 317 | Create a series of images that are interpolations between two prompts. 318 | ''' 319 | request_time = time.time() 320 | 321 | # Default options for inherence engine. 322 | opt = self.stable_diffusion_module.opt 323 | 324 | noiser = parameters.get('noiser', None) 325 | num_images = max(1, min(16, int(parameters.get('num_images', 1)))) 326 | resample_prior = parameters.get('resample_prior', True) 327 | sampler = parameters.get('sampler', 'k_lms') 328 | scale = parameters.get('scale', opt.scale) 329 | seed = int(parameters.get('seed', randint(0, 2 ** 32 - 1))) 330 | strength = parameters.get('strength', 0.75) 331 | 332 | if sampler not in K_DIFF_SAMPLERS: 333 | raise ValueError(f'sampler must be in {K_DIFF_SAMPLERS}, got {sampler}') 334 | 335 | steps = min(int(parameters.get('steps', opt.ddim_steps)), MAX_STEPS) 336 | height, width = self._h_and_w_from_parameters(parameters, opt) 337 | 338 | assert 0.5 <= strength <= 1., 'can only work with strength in [0.5, 1.0]' 339 | 340 | for d in docs: 341 | batch_size = 1 342 | prompt = d.text 343 | assert prompt is not None 344 | 345 | prompts = prompt.split('|') 346 | 347 | ( 348 | conditioning_start, 349 | unconditioning, # Reuse this as it's the same for both 350 | weighted_subprompts_start, 351 | _, # Don't need the individual embedding managers 352 | ) = self.stable_diffusion_module.compute_conditioning_and_weights( 353 | prompts[0].strip(), 354 | batch_size) 355 | 356 | ( 357 | conditioning_end, 358 | _, 359 | weighted_subprompts_end, 360 | _, # Don't need the individual embedding managers 361 | ) = self.stable_diffusion_module.compute_conditioning_and_weights( 362 | prompts[1].strip(), 363 | batch_size) 364 | 365 | assert len(weighted_subprompts_start) == len(weighted_subprompts_end), \ 366 | 'Weighted subprompts for interpolation must be equal in number' 367 | 368 | to_iterate = list(enumerate(np.linspace(0, 1, num_images))) 369 | 370 | # Interate over interpolation percentages. 371 | samples_last = None 372 | for i, percent in to_iterate: 373 | c = None 374 | if i < 1: 375 | c = conditioning_start 376 | elif i == len(to_iterate) - 1: 377 | c = conditioning_end 378 | else: 379 | c = conditioning_start.clone().detach() 380 | for embedding_i, _ in enumerate(conditioning_start): 381 | c[embedding_i] = slerp( 382 | percent, 383 | conditioning_start[embedding_i], 384 | conditioning_end[embedding_i], 385 | ) 386 | weighted_subprompts = combine_weighted_subprompts(percent, 387 | weighted_subprompts_start, 388 | weighted_subprompts_end) 389 | 390 | image = None 391 | if i == 0 or not resample_prior: 392 | samples_last, extra_data = self.stable_diffusion_module.sample( 393 | prompt, 394 | batch_size, 395 | sampler, 396 | seed, 397 | steps, 398 | conditioning=c, 399 | height=height, 400 | noiser=noiser, 401 | prompt_concept_injection_required=False, 402 | scale=scale, 403 | weighted_subprompts=weighted_subprompts, 404 | width=width, 405 | unconditioning=unconditioning, 406 | ) 407 | 408 | ( 409 | image, 410 | ) = itemgetter('images')(extra_data) 411 | else: 412 | samples_last, extra_data = self.stable_diffusion_module.sample( 413 | prompt, 414 | batch_size, 415 | sampler, 416 | seed + i, 417 | steps, 418 | conditioning=c, 419 | height=height, 420 | init_latent=samples_last, 421 | noiser=noiser, 422 | prompt_concept_injection_required=False, 423 | scale=scale, 424 | strength=strength, 425 | weighted_subprompts=weighted_subprompts, 426 | width=width, 427 | unconditioning=unconditioning, 428 | ) 429 | 430 | ( 431 | image, 432 | ) = itemgetter('images')(extra_data) 433 | 434 | torch.cuda.empty_cache() 435 | 436 | buffered = BytesIO() 437 | image.save(buffered, format='PNG') 438 | 439 | samples_buffer = BytesIO() 440 | torch.save(samples_last, samples_buffer) 441 | samples_buffer.seek(0) 442 | 443 | image_conditioning = None 444 | if isinstance(c, dict): 445 | image_conditioning = c['c_concat'] 446 | c = c['c_crossattn'] 447 | 448 | if image_conditioning is not None: 449 | image_conditioning_buffer = BytesIO() 450 | torch.save(image_conditioning, image_conditioning_buffer) 451 | image_conditioning_buffer.seek(0) 452 | 453 | _d = Document( 454 | embedding=c, 455 | blob=buffered.getvalue(), 456 | mime_type='image/png', 457 | tags={ 458 | 'latent_repr': base64.b64encode( 459 | samples_buffer.getvalue()).decode(), 460 | 'image_conditioning': base64.b64encode( 461 | image_conditioning_buffer.getvalue()).decode() 462 | if image_conditioning is not None else None, 463 | 'request': { 464 | 'api': 'stableinterpolate', 465 | 'height': height, 466 | 'noiser': noiser, 467 | 'num_images': num_images, 468 | 'resample_prior': resample_prior, 469 | 'sampler': sampler, 470 | 'scale': scale, 471 | 'seed': seed, 472 | 'steps': steps, 473 | 'strength': strength, 474 | 'width': width, 475 | }, 476 | 'text': prompt, 477 | 'percent': percent, 478 | 'generator': 'stable-diffusion', 479 | 'request_time': request_time, 480 | 'created_time': time.time(), 481 | }, 482 | ).convert_blob_to_datauri() 483 | _d.text = prompt 484 | d.matches.append(_d) 485 | -------------------------------------------------------------------------------- /executors/stable/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: StableDiffusionGenerator -------------------------------------------------------------------------------- /executors/stable/requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.23.3 2 | tqdm==4.64.1 3 | jina 4 | docarray 5 | kornia==0.6.7 6 | einops==0.4.1 7 | transformers==4.22.2 8 | pytorch_lightning==1.7.7 9 | omegaconf==2.2.3 10 | protobuf==3.20.0 11 | k-diffusion @ git+https://github.com/crowsonkb/k-diffusion.git 12 | stable-inference @ git+https://github.com/AmericanPresidentJimmyCarter/stable-diffusion.git@v0.0.15 13 | https://github.com/AmericanPresidentJimmyCarter/xformers-builds/raw/master/cu116/xformers-0.0.14.dev0-cp310-cp310-linux_x86_64.whl 14 | CLIP @ git+https://github.com/openai/CLIP 15 | -------------------------------------------------------------------------------- /executors/store/config.yml: -------------------------------------------------------------------------------- 1 | jtype: DalleFlowStore 2 | metas: 3 | py_modules: 4 | - executor.py -------------------------------------------------------------------------------- /executors/store/executor.py: -------------------------------------------------------------------------------- 1 | import time 2 | import string 3 | import random 4 | 5 | from jina import Executor, requests, DocumentArray 6 | 7 | 8 | class DalleFlowStore(Executor): 9 | 10 | @requests(on='/upscale') 11 | def store(self, docs: DocumentArray, **kwargs): 12 | random_str = ''.join(random.choices(string.ascii_letters + string.digits, k=10)) 13 | docs[...].blobs = None # remove all blobs from anywhere to save space 14 | docs[...].embeddings = None 15 | for d in docs.find({'$and': [{'tags__upscaled': {'$exists': True}}, {'tags__generator': {'$exists': True}}]}): 16 | d.tags['finish_time'] = time.time() 17 | DocumentArray([d]).push(f'dalle-flow-{d.id}-{random_str}') 18 | -------------------------------------------------------------------------------- /executors/store/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: DalleFlowStore -------------------------------------------------------------------------------- /executors/store/requirements.txt: -------------------------------------------------------------------------------- 1 | requests -------------------------------------------------------------------------------- /executors/swinir/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nvidia/cuda:11.6.2-runtime-ubuntu20.04 2 | 3 | RUN apt-get update \ 4 | && apt-get install -y --no-install-recommends libgl1 libglib2.0-0 wget git python3.8 python3-pip \ 5 | && ln -sf python3.8 /usr/bin/python \ 6 | && ln -sf pip3 /usr/bin/pip \ 7 | && pip install --upgrade pip \ 8 | && pip install wheel setuptools 9 | 10 | ENV PIP_NO_CACHE_DIR=1 \ 11 | PIP_DISABLE_PIP_VERSION_CHECK=1 12 | 13 | WORKDIR /workdir 14 | 15 | RUN git clone --depth=1 https://github.com/jina-ai/SwinIR.git && \ 16 | pip install torch torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cu113 && \ 17 | cd SwinIR && pip install --timeout=1000 -e . && cd - 18 | 19 | COPY . /workdir/executor 20 | RUN pip install --no-cache-dir -r ./executor/requirements.txt 21 | 22 | WORKDIR /workdir/executor 23 | ENTRYPOINT ["jina", "executor", "--uses", "config.yml"] -------------------------------------------------------------------------------- /executors/swinir/config.yml: -------------------------------------------------------------------------------- 1 | jtype: SwinIRUpscaler 2 | with: 3 | swinir_path: ../SwinIR 4 | metas: 5 | py_modules: 6 | - executor.py -------------------------------------------------------------------------------- /executors/swinir/executor.py: -------------------------------------------------------------------------------- 1 | import glob 2 | import torch 3 | import os 4 | import shutil 5 | from pathlib import Path 6 | 7 | from jina import Executor, DocumentArray, Document, requests 8 | 9 | from dalle_flow_swin_ir.main_test_swinir import main as swin_ir_main, get_model 10 | 11 | class SwinIRUpscaler(Executor): 12 | def __init__(self, swinir_path: str, **kwargs): 13 | super().__init__(**kwargs) 14 | self.swinir_path = swinir_path 15 | self.input_path = f'{swinir_path}/input/' 16 | self.output_path = f'{swinir_path}/results/swinir_real_sr_x4_large/' 17 | self.failover = 0 18 | 19 | self.swin_ir_kwargs = { 20 | 'task': 'real_sr', 21 | 'scale': 4, 22 | 'model_path': f'{self.swinir_path}/model_zoo/swinir/003_realSR_BSRGAN_DFOWMFC_s64w8_SwinIR-L_x4_GAN.pth', 23 | 'save_dir': self.output_path, 24 | } 25 | args_str = ';'.join(f'--{k};{str(v)}' for k, v in self.swin_ir_kwargs.items()) + ';--large_model' 26 | self.swin_ir_args = args_str.split(';') 27 | self.swin_ir_model = get_model(self.swin_ir_args[:]) 28 | 29 | def _upscale(self, d: Document): 30 | self.logger.info(f'upscaling [{d.text}]...') 31 | 32 | input_path = os.path.join(self.input_path, f'{d.id}/') 33 | 34 | Path(input_path).mkdir(parents=True, exist_ok=True) 35 | Path(self.output_path).mkdir(parents=True, exist_ok=True) 36 | 37 | d.save_uri_to_file(os.path.join(input_path, f'{d.id}.png')) 38 | 39 | swin_ir_main([*self.swin_ir_args, '--folder_lq', input_path], self.swin_ir_model) 40 | 41 | d.uri = os.path.join(self.output_path, f'{d.id}_SwinIR.png') 42 | d.convert_uri_to_datauri() 43 | d.tags['upscaled'] = True 44 | d.tags.update({**self.swin_ir_kwargs, 'folder_lq': input_path}) 45 | 46 | self.logger.info('cleaning...') 47 | # remove input 48 | shutil.rmtree(input_path, ignore_errors=True) 49 | 50 | # remove all outputs 51 | for f in glob.glob(f'{self.output_path}/{d.id}*.png'): 52 | if os.path.isfile(f): 53 | os.remove(f) 54 | 55 | self.logger.info('done!') 56 | 57 | # Dump memory here because SWINIR uses loads of it 58 | torch.cuda.empty_cache() 59 | 60 | @requests(on='/upscale') 61 | async def upscale(self, docs: DocumentArray, **kwargs): 62 | for d in docs.find({'$and': [{'tags__upscaled': {'$exists': False}}, {'tags__generator': {'$exists': True}}]}): 63 | self._upscale(d) 64 | d.blob = None 65 | d.embedding = None -------------------------------------------------------------------------------- /executors/swinir/manifest.yml: -------------------------------------------------------------------------------- 1 | manifest_version: 1 2 | name: SwinIRUpscaler -------------------------------------------------------------------------------- /executors/swinir/requirements.txt: -------------------------------------------------------------------------------- 1 | jina==3.12.0 2 | 3 | opencv-python 4 | timm 5 | Cython 6 | -------------------------------------------------------------------------------- /executors/waifu/executor.py: -------------------------------------------------------------------------------- 1 | import os 2 | import stat 3 | import subprocess 4 | import tempfile 5 | from io import BytesIO 6 | from urllib.request import urlopen 7 | from zipfile import ZipFile 8 | 9 | from jina import Executor, requests, DocumentArray, Document 10 | 11 | 12 | class WaifuUpscaler(Executor): 13 | def __init__(self, waifu_url: str, top_k: int = 3, **kwargs): 14 | super().__init__(**kwargs) 15 | print('downloading...') 16 | resp = urlopen(waifu_url) 17 | zipfile = ZipFile(BytesIO(resp.read())) 18 | bin_path = './waifu-bin' 19 | zipfile.extractall(bin_path) 20 | print('complete') 21 | self.waifu_path = os.path.realpath( 22 | f'{bin_path}/waifu2x-ncnn-vulkan-20220419-ubuntu/waifu2x-ncnn-vulkan' 23 | ) 24 | self.top_k = top_k 25 | 26 | st = os.stat(self.waifu_path) 27 | os.chmod(self.waifu_path, st.st_mode | stat.S_IEXEC) 28 | print(self.waifu_path) 29 | 30 | def _upscale(self, d: Document): 31 | with tempfile.NamedTemporaryFile( 32 | suffix='.png', 33 | ) as f_in, tempfile.NamedTemporaryFile( 34 | suffix='.png', 35 | ) as f_out: 36 | d.save_uri_to_file(f_in.name) 37 | print( 38 | subprocess.getoutput( 39 | f'{self.waifu_path} -i {f_in.name} -o {f_out.name} -s 4 -n 0 -g -1' 40 | ) 41 | ) 42 | print(f'{f_in.name} done') 43 | d.uri = f_out.name 44 | d.convert_uri_to_datauri() 45 | d.blob = None 46 | d.tags['upscaled'] = 'true' 47 | return d 48 | 49 | @requests(on='/upscale') 50 | async def upscale(self, docs: DocumentArray, **kwargs): 51 | docs.apply(self._upscale) 52 | -------------------------------------------------------------------------------- /flow-jcloud.yml: -------------------------------------------------------------------------------- 1 | jtype: Flow 2 | with: 3 | protocol: grpc 4 | port: 51005 5 | monitoring: true # enable prometheus & grafana 6 | env: 7 | JINA_LOG_LEVEL: debug 8 | exit_on_exceptions: 9 | - RuntimeError 10 | - ValueError 11 | jcloud: 12 | gateway: 13 | ingress: kong 14 | timeout: 6000 15 | executors: 16 | - name: dalle 17 | uses: jinahub+docker://DalleGenerator/latest 18 | timeout_ready: -1 # slow download speed often leads to timeout 19 | env: 20 | XLA_PYTHON_CLIENT_ALLOCATOR: platform # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 21 | replicas: 1 # change this if you have larger VRAM 22 | gpus: all 23 | jcloud: 24 | resources: 25 | memory: 10G 26 | gpu: shared 27 | - name: clip_encoder 28 | # uses: jinahub+docker://CLIPTorchEncoder/latest-gpu 29 | uses: jinahub+docker://CLIPOnnxEncoder/latest-gpu 30 | uses_with: 31 | name: ViT-L-14-336::openai 32 | replicas: 1 33 | needs: [gateway] 34 | gpus: all 35 | jcloud: 36 | resources: 37 | memory: 5G 38 | gpu: shared 39 | - name: diffusion 40 | uses: jinahub+docker://GLID3Diffusion/latest 41 | timeout_ready: -1 # slow download speed often leads to timeout 42 | replicas: 1 # change this if you have larger VRAM 43 | needs: [clip_encoder] 44 | gpus: all 45 | jcloud: 46 | resources: 47 | memory: 10G 48 | gpu: shared 49 | - name: rerank 50 | # uses: jinahub+docker://CLIPTorchEncoder/latest-gpu 51 | uses: jinahub+docker://CLIPOnnxEncoder/latest-gpu 52 | uses_with: 53 | name: ViT-L-14-336::openai 54 | uses_requests: 55 | '/': rank 56 | replicas: 1 57 | needs: [dalle, diffusion] 58 | gpus: all 59 | jcloud: 60 | resources: 61 | memory: 5G 62 | gpu: shared 63 | - name: upscaler 64 | uses: jinahub+docker://SwinIRUpscaler/latest 65 | replicas: 1 66 | gpus: all 67 | jcloud: 68 | resources: 69 | memory: 5G 70 | gpu: shared 71 | - name: store 72 | uses: jinahub+docker://DalleFlowStore/latest 73 | floating: true 74 | replicas: 1 75 | jcloud: 76 | resources: 77 | memory: 500M 78 | -------------------------------------------------------------------------------- /flow.yml: -------------------------------------------------------------------------------- 1 | jtype: Flow 2 | with: 3 | protocol: grpc 4 | port: 51005 5 | monitoring: true # enable prometheus & grafana 6 | env: 7 | JINA_LOG_LEVEL: debug 8 | executors: 9 | - name: dalle 10 | uses: executors/dalle/config.yml 11 | timeout_ready: -1 # slow download speed often leads to timeout 12 | env: 13 | CUDA_VISIBLE_DEVICES: 0 # change this if you have multiple GPU 14 | XLA_PYTHON_CLIENT_ALLOCATOR: platform # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 15 | replicas: 1 # change this if you have larger VRAM 16 | - name: clip_encoder 17 | uses: jinahub+docker://CLIPTorchEncoder/latest-gpu 18 | needs: [gateway] 19 | - name: diffusion 20 | uses: executors/glid3/config.yml 21 | timeout_ready: -1 # slow download speed often leads to timeout 22 | env: 23 | CUDA_VISIBLE_DEVICES: 0 # change this if you have multiple GPU 24 | XLA_PYTHON_CLIENT_ALLOCATOR: platform # https://jax.readthedocs.io/en/latest/gpu_memory_allocation.html 25 | replicas: 1 # change this if you have larger VRAM 26 | needs: [clip_encoder] 27 | - name: rerank 28 | uses: jinahub+docker://CLIPTorchEncoder/latest-gpu 29 | uses_requests: 30 | '/': rank 31 | needs: [dalle, diffusion] 32 | - name: upscaler 33 | uses: executors/swinir/config.yml 34 | env: 35 | CUDA_VISIBLE_DEVICES: 0 # change this if you have multiple GPU 36 | - name: store 37 | uses: executors/store/config.yml 38 | floating: true -------------------------------------------------------------------------------- /flow_parser.py: -------------------------------------------------------------------------------- 1 | ''' 2 | A simple python script that parses the flow.yml and removes any undesired 3 | executors based on environmental variables that are present, then creates 4 | flow*.tmp.yml. 5 | 6 | Environmental flags available: 7 | 8 | DISABLE_CLIP 9 | DISABLE_DALLE_MEGA 10 | DISABLE_GLID3XL 11 | DISABLE_SWINIR 12 | ENABLE_STABLE_DIFFUSION 13 | ENABLE_CLIPSEG 14 | 15 | TODO Support jcloud and k8s configurations? 16 | ''' 17 | import argparse 18 | import os 19 | import sys 20 | import yaml 21 | 22 | from collections import OrderedDict 23 | 24 | ENV_DISABLE_CLIP = 'DISABLE_CLIP' 25 | ENV_DISABLE_DALLE_MEGA = 'DISABLE_DALLE_MEGA' 26 | ENV_DISABLE_GLID3XL = 'DISABLE_GLID3XL' 27 | ENV_DISABLE_SWINIR = 'DISABLE_SWINIR' 28 | ENV_ENABLE_CLIPSEG = 'ENABLE_CLIPSEG' 29 | ENV_ENABLE_REALESRGAN = 'ENABLE_REALESRGAN' 30 | ENV_ENABLE_STABLE_DIFFUSION = 'ENABLE_STABLE_DIFFUSION' 31 | 32 | ENV_GPUS_DALLE_MEGA = 'GPUS_DALLE_MEGA' 33 | ENV_GPUS_GLID3XL = 'GPUS_GLID3XL' 34 | ENV_GPUS_REALESRGAN = 'GPUS_REALESRGAN' 35 | ENV_GPUS_SWINIR = 'GPUS_SWINIR' 36 | ENV_GPUS_STABLE_DIFFUSION = 'GPUS_STABLE_DIFFUSION' 37 | ENV_CAS_TOKEN = 'CAS_TOKEN' 38 | 39 | ENV_REPLICAS_DALLE_MEGA = 'REPLICAS_DALLE_MEGA' 40 | ENV_REPLICAS_GLID3XL = 'REPLICAS_GLID3XL' 41 | ENV_REPLICAS_REALESRGAN = 'REPLICAS_REALESRGAN' 42 | ENV_REPLICAS_SWINIR = 'REPLICAS_SWINIR' 43 | ENV_REPLICAS_STABLE_DIFFUSION = 'REPLICAS_STABLE_DIFFUSION' 44 | 45 | FLOW_KEY_ENV = 'env' 46 | FLOW_KEY_ENV_CUDA_DEV = 'CUDA_VISIBLE_DEVICES' 47 | FLOW_KEY_REPLICAS = 'replicas' 48 | 49 | CAS_FLOW_NAME = 'clip_encoder' 50 | CLIPSEG_FLOW_NAME = 'clipseg' 51 | DALLE_MEGA_FLOW_NAME = 'dalle' 52 | GLID3XL_FLOW_NAME = 'diffusion' 53 | REALESRGAN_FLOW_NAME = 'realesrgan' 54 | RERANK_FLOW_NAME = 'rerank' 55 | SWINIR_FLOW_NAME = 'upscaler' 56 | STABLE_DIFFUSION_FLOW_NAME = 'stable' 57 | 58 | CLIP_AS_SERVICE_HOST = os.environ.get('CLIP_AS_SERVICE_HOST', 'api.clip.jina.ai') 59 | CLIP_AS_SERVICE_PORT = os.environ.get('CLIP_AS_SERVICE_PORT', '2096') 60 | 61 | 62 | def represent_ordereddict(dumper, data): 63 | ''' 64 | Used to edit the YAML filters in place so that jina doesn't freak out when 65 | we use the newly parsed file. Otherwise the new YAML is sorted by keys and 66 | that breaks jina. 67 | ''' 68 | value = [] 69 | 70 | for item_key, item_value in data.items(): 71 | node_key = dumper.represent_data(item_key) 72 | node_value = dumper.represent_data(item_value) 73 | 74 | value.append((node_key, node_value)) 75 | 76 | return yaml.nodes.MappingNode(u'tag:yaml.org,2002:map', value) 77 | 78 | yaml.add_representer(OrderedDict, represent_ordereddict) 79 | 80 | parser = argparse.ArgumentParser() 81 | 82 | parser.add_argument('-fn','--filename', 83 | dest='filename', 84 | help='YAML file to use (default is flow.yaml)', 85 | required=False) 86 | parser.add_argument('-o','--output', 87 | dest='output', 88 | help='YAML file to output (default is flow.tmp.yaml)', 89 | required=False) 90 | parser.add_argument('--disable-clip', 91 | dest='no_clip', 92 | action='store_true', 93 | help="Disable clip-as-a-service executor (default false)", 94 | required=False) 95 | parser.add_argument('--disable-dalle-mega', 96 | dest='no_dalle_mega', 97 | action='store_true', 98 | help="Disable DALLE-MEGA executor (default false)", 99 | required=False) 100 | parser.add_argument('--disable-glid3xl', 101 | dest='no_glid3xl', 102 | action='store_true', 103 | help="Disable GLID3XL executor (default false)", 104 | required=False) 105 | parser.add_argument('--disable-swinir', 106 | dest='no_swinir', 107 | action='store_true', 108 | help="Disable SWINIR upscaler executor (default false)", 109 | required=False) 110 | parser.add_argument('--enable-clipseg', 111 | dest='yes_clipseg', 112 | action='store_true', 113 | help="Enable CLIP segmentation executor (default false)", 114 | required=False) 115 | parser.add_argument('--enable-realesrgan', 116 | dest='yes_realesrgan', 117 | action='store_true', 118 | help="Enable RealESRGAN upscaler (default false)", 119 | required=False) 120 | parser.add_argument('--enable-stable-diffusion', 121 | dest='yes_stable_diffusion', 122 | action='store_true', 123 | help="Enable Stable Diffusion executor (default false)", 124 | required=False) 125 | parser.add_argument('--cas-token', 126 | dest='cas_token', 127 | help="Token to authenticate with the CAS service (default ''). If not set, the CAS service will not be used.", 128 | default='', 129 | required=False) 130 | parser.add_argument('--gpus-dalle-mega', 131 | dest='gpus_dalle_mega', 132 | help="GPU device ID(s) for DALLE-MEGA (default 0)", 133 | default=0, 134 | required=False) 135 | parser.add_argument('--gpus-glid3xl', 136 | dest='gpus_glid3xl', 137 | help="GPU device ID(s) for GLID3XL (default 0)", 138 | default=0, 139 | required=False) 140 | parser.add_argument('--gpus-realesrgan', 141 | dest='gpus_realesrgan', 142 | help="GPU device ID(s) for RealESRGAN (default 0)", 143 | default=0, 144 | required=False) 145 | parser.add_argument('--gpus-stable-diffusion', 146 | dest='gpus_stable_diffusion', 147 | help="GPU device ID(s) for Stable Diffusion (default 0)", 148 | default=0, 149 | required=False) 150 | parser.add_argument('--gpus-swinir', 151 | dest='gpus_swinir', 152 | help="GPU device ID(s) for SWINIR (default 0)", 153 | default=0, 154 | required=False) 155 | 156 | parser.add_argument('--replicas-dalle-mega', 157 | dest='replicas_dalle_mega', 158 | help="Replica number for DALLE-MEGA (default 1)", 159 | default=1, 160 | required=False) 161 | parser.add_argument('--replicas-glid3xl', 162 | dest='replicas_glid3xl', 163 | help="Replica number for GLID3XL (default 1)", 164 | default=1, 165 | required=False) 166 | parser.add_argument('--replicas-realesrgan', 167 | dest='replicas_realesrgan', 168 | help="Replica number for RealESRGAN (default 1)", 169 | default=1, 170 | required=False) 171 | parser.add_argument('--replicas-stable-diffusion', 172 | dest='replicas_stable_diffusion', 173 | help="Replica number for Stable Diffusion (default 1)", 174 | default=1, 175 | required=False) 176 | parser.add_argument('--replicas-swinir', 177 | dest='replicas_swinir', 178 | help="Replica number for SWINIR (default 1)", 179 | default=1, 180 | required=False) 181 | 182 | args = vars(parser.parse_args()) 183 | 184 | flow_to_use = 'flow.yml' 185 | if args.get('filename', None) is not None: 186 | flow_to_use = args['filename'] 187 | 188 | output_flow = 'flow.tmp.yml' 189 | if args.get('output', None) is not None: 190 | output_flow = args['output'] 191 | 192 | no_clip = args.get('no_clip') or \ 193 | os.environ.get(ENV_DISABLE_CLIP, False) 194 | no_dalle_mega = args.get('no_dalle_mega') or \ 195 | os.environ.get(ENV_DISABLE_DALLE_MEGA, False) 196 | no_glid3xl = args.get('no_glid3xl') or os.environ.get(ENV_DISABLE_GLID3XL, False) 197 | no_swinir = args.get('no_swinir') or os.environ.get(ENV_DISABLE_SWINIR, False) 198 | yes_clipseg = args.get('yes_clipseg') or \ 199 | os.environ.get(ENV_ENABLE_CLIPSEG, False) 200 | yes_realesrgan = args.get('yes_realesrgan') or \ 201 | os.environ.get(ENV_ENABLE_REALESRGAN, False) 202 | yes_stable_diffusion = args.get('yes_stable_diffusion') or \ 203 | os.environ.get(ENV_ENABLE_STABLE_DIFFUSION, False) 204 | 205 | gpus_dalle_mega = os.environ.get(ENV_GPUS_DALLE_MEGA, False) or \ 206 | args.get('gpus_dalle_mega') 207 | gpus_glid3xl = os.environ.get(ENV_GPUS_GLID3XL, False) or \ 208 | args.get('gpus_glid3xl') 209 | gpus_realesrgan = os.environ.get(ENV_GPUS_REALESRGAN, False) or \ 210 | args.get('gpus_realesrgan') 211 | gpus_stable_diffusion = os.environ.get(ENV_GPUS_STABLE_DIFFUSION, False) or \ 212 | args.get('gpus_stable_diffusion') 213 | gpus_swinir = os.environ.get(ENV_GPUS_SWINIR, False) or \ 214 | args.get('gpus_swinir') 215 | 216 | replicas_dalle_mega = os.environ.get(ENV_REPLICAS_DALLE_MEGA, False) or \ 217 | args.get('replicas_dalle_mega') 218 | replicas_glid3xl = os.environ.get(ENV_REPLICAS_GLID3XL, False) or \ 219 | args.get('replicas_glid3xl') 220 | replicas_realesrgan = os.environ.get(ENV_REPLICAS_REALESRGAN, False) or \ 221 | args.get('replicas_realesrgan') 222 | replicas_stable_diffusion = os.environ.get(ENV_REPLICAS_STABLE_DIFFUSION, False) or \ 223 | args.get('replicas_stable_diffusion') 224 | replicas_swinir = os.environ.get(ENV_REPLICAS_SWINIR, False) or \ 225 | args.get('replicas_swinir') 226 | 227 | cas_token = os.environ.get(ENV_CAS_TOKEN, '') or args.get('cas_token') 228 | 229 | if no_clip and not no_glid3xl: 230 | raise ValueError('GLID3XL requires a CLIP encoder executor to work') 231 | 232 | 233 | CLIPSEG_DICT = OrderedDict({ 234 | 'env': { 235 | 'CUDA_VISIBLE_DEVICES': 0, 236 | 'XLA_PYTHON_CLIENT_ALLOCATOR': 'platform', 237 | }, 238 | 'name': CLIPSEG_FLOW_NAME, 239 | 'replicas': 1, 240 | 'timeout_ready': -1, 241 | 'uses': f'executors/{CLIPSEG_FLOW_NAME}/config.yml', 242 | }) 243 | REALESRGAN_DICT = OrderedDict({ 244 | 'env': { 245 | 'CUDA_VISIBLE_DEVICES': gpus_realesrgan, 246 | 'XLA_PYTHON_CLIENT_ALLOCATOR': 'platform', 247 | }, 248 | 'name': REALESRGAN_FLOW_NAME, 249 | 'replicas': int(replicas_realesrgan), 250 | 'timeout_ready': -1, 251 | 'uses': f'executors/{REALESRGAN_FLOW_NAME}/config.yml', 252 | }) 253 | STABLE_YAML_DICT = OrderedDict({ 254 | 'env': { 255 | 'MEMORY_EFFICIENT_CROSS_ATTENTION': 1, 256 | 'CUDA_VISIBLE_DEVICES': gpus_stable_diffusion, 257 | 'XLA_PYTHON_CLIENT_ALLOCATOR': 'platform', 258 | }, 259 | 'name': STABLE_DIFFUSION_FLOW_NAME, 260 | 'replicas': int(replicas_stable_diffusion), 261 | 'timeout_ready': -1, 262 | 'uses': f'executors/{STABLE_DIFFUSION_FLOW_NAME}/config.yml', 263 | }) 264 | 265 | 266 | def _filter_out(flow_exec_list, name): 267 | return list(filter(lambda exc: exc['name'] != name, flow_exec_list)) 268 | 269 | with open(flow_to_use, 'r') as f_in: 270 | flow_as_dict = None 271 | try: 272 | flow_as_dict = OrderedDict(yaml.safe_load(f_in)) 273 | except yaml.YAMLError as exc: 274 | print(exc) 275 | sys.exit(1) 276 | 277 | # If the cas_token is not empty, we will use the clip-as-a-service as external executor 278 | if cas_token: 279 | for ext in flow_as_dict['executors']: 280 | if ext['name'] in [CAS_FLOW_NAME, RERANK_FLOW_NAME]: 281 | ext['host'] = CLIP_AS_SERVICE_HOST 282 | ext['port'] = int(CLIP_AS_SERVICE_PORT) 283 | ext['external'] = True 284 | ext['tls'] = True 285 | ext['grpc_metadata'] = {'authorization': cas_token} 286 | 287 | 288 | # For backwards compatibility, we inject the stable diffusion configuration 289 | # into the flow yml and then remove it if needed. 290 | # 291 | # Find the index of latent diffusion and inject stable diffusion and 292 | # clipseg after it. 293 | glid3xl_idx = next(i for i, exc in enumerate(flow_as_dict['executors']) 294 | if exc['name'] == GLID3XL_FLOW_NAME) 295 | flow_as_dict['executors'].insert(glid3xl_idx + 1, CLIPSEG_DICT) 296 | flow_as_dict['executors'].insert(glid3xl_idx + 1, REALESRGAN_DICT) 297 | flow_as_dict['executors'].insert(glid3xl_idx + 1, STABLE_YAML_DICT) 298 | 299 | # Find the rerank executor, jam stable into its needs. 300 | rerank_idx = next(i for i, exc in enumerate(flow_as_dict['executors']) 301 | if exc['name'] == RERANK_FLOW_NAME) 302 | flow_as_dict['executors'][rerank_idx]['needs'].append( 303 | STABLE_DIFFUSION_FLOW_NAME) 304 | 305 | if flow_as_dict is None: 306 | print('Input yaml was empty') 307 | sys.exit(1) 308 | 309 | if flow_as_dict.get('executors', None) is None: 310 | print('No executors found in yaml file') 311 | sys.exit(1) 312 | 313 | if no_dalle_mega: 314 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 315 | DALLE_MEGA_FLOW_NAME) 316 | else: 317 | dalle_mega_idx = next(i for i, exc in enumerate(flow_as_dict['executors']) 318 | if exc['name'] == DALLE_MEGA_FLOW_NAME) 319 | flow_as_dict['executors'][dalle_mega_idx][FLOW_KEY_ENV][FLOW_KEY_ENV_CUDA_DEV] = gpus_dalle_mega 320 | flow_as_dict['executors'][dalle_mega_idx][FLOW_KEY_REPLICAS] = int(replicas_dalle_mega) 321 | 322 | if no_glid3xl: 323 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 324 | GLID3XL_FLOW_NAME) 325 | else: 326 | glid3xl_idx = next(i for i, exc in enumerate(flow_as_dict['executors']) 327 | if exc['name'] == GLID3XL_FLOW_NAME) 328 | flow_as_dict['executors'][glid3xl_idx][FLOW_KEY_ENV][FLOW_KEY_ENV_CUDA_DEV] = gpus_glid3xl 329 | flow_as_dict['executors'][glid3xl_idx][FLOW_KEY_REPLICAS] = int(replicas_glid3xl) 330 | 331 | if no_swinir: 332 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 333 | SWINIR_FLOW_NAME) 334 | else: 335 | swinir_idx = next(i for i, exc in enumerate(flow_as_dict['executors']) 336 | if exc['name'] == SWINIR_FLOW_NAME) 337 | flow_as_dict['executors'][swinir_idx][FLOW_KEY_ENV][FLOW_KEY_ENV_CUDA_DEV] = gpus_swinir 338 | flow_as_dict['executors'][swinir_idx][FLOW_KEY_REPLICAS] = int(replicas_swinir) 339 | 340 | if not yes_clipseg: 341 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 342 | CLIPSEG_FLOW_NAME) 343 | if not yes_realesrgan: 344 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 345 | REALESRGAN_FLOW_NAME) 346 | if not yes_stable_diffusion: 347 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 348 | STABLE_DIFFUSION_FLOW_NAME) 349 | 350 | for exc in flow_as_dict['executors']: 351 | if type(exc.get('needs', None)) == list: 352 | if no_dalle_mega: 353 | exc['needs'] = list(filter( 354 | lambda _n: _n != DALLE_MEGA_FLOW_NAME, 355 | exc['needs'])) 356 | if no_glid3xl: 357 | exc['needs'] = list(filter( 358 | lambda _n: _n != GLID3XL_FLOW_NAME, 359 | exc['needs'])) 360 | if no_swinir: 361 | exc['needs'] = list(filter( 362 | lambda _n: _n != SWINIR_FLOW_NAME, 363 | exc['needs'])) 364 | if not yes_clipseg: 365 | exc['needs'] = list(filter( 366 | lambda _n: _n != CLIPSEG_FLOW_NAME, 367 | exc['needs'])) 368 | if not yes_realesrgan: 369 | exc['needs'] = list(filter( 370 | lambda _n: _n != REALESRGAN_FLOW_NAME, 371 | exc['needs'])) 372 | if not yes_stable_diffusion: 373 | exc['needs'] = list(filter( 374 | lambda _n: _n != STABLE_DIFFUSION_FLOW_NAME, 375 | exc['needs'])) 376 | 377 | if no_clip: 378 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 379 | CAS_FLOW_NAME) 380 | flow_as_dict['executors'] = _filter_out(flow_as_dict['executors'], 381 | RERANK_FLOW_NAME) 382 | 383 | with open(output_flow, 'w') as f_out: 384 | f_out.write(yaml.dump(flow_as_dict)) 385 | -------------------------------------------------------------------------------- /k8s_flow/dalle/dalle.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | JINA_LOG_LEVEL: INFO 4 | pythonunbuffered: '1' 5 | worker_class: uvicorn.workers.UvicornH11Worker 6 | kind: ConfigMap 7 | metadata: 8 | name: dalle-configmap 9 | namespace: dalle-flow 10 | --- 11 | apiVersion: v1 12 | kind: Service 13 | metadata: 14 | labels: 15 | app: dalle 16 | name: dalle 17 | namespace: dalle-flow 18 | spec: 19 | ports: 20 | - name: port 21 | port: 8080 22 | protocol: TCP 23 | targetPort: 8080 24 | - name: monitoring 25 | port: 9090 26 | protocol: TCP 27 | targetPort: 9090 28 | selector: 29 | app: dalle 30 | type: ClusterIP 31 | --- 32 | apiVersion: apps/v1 33 | kind: Deployment 34 | metadata: 35 | name: dalle 36 | namespace: dalle-flow 37 | spec: 38 | replicas: 2 39 | selector: 40 | matchLabels: 41 | app: dalle 42 | strategy: 43 | rollingUpdate: 44 | maxSurge: 0 45 | maxUnavailable: 1 46 | type: RollingUpdate 47 | template: 48 | metadata: 49 | annotations: 50 | linkerd.io/inject: enabled 51 | labels: 52 | app: dalle 53 | jina_deployment_name: dalle 54 | ns: dalle-flow 55 | pod_type: WORKER 56 | shard_id: '0' 57 | spec: 58 | affinity: 59 | podAntiAffinity: 60 | requiredDuringSchedulingIgnoredDuringExecution: 61 | - labelSelector: 62 | matchExpressions: 63 | - key: app 64 | operator: In 65 | values: 66 | - dalle 67 | topologyKey: "kubernetes.io/hostname" 68 | containers: 69 | - args: 70 | - executor 71 | - --name 72 | - dalle 73 | - --extra-search-paths 74 | - . 75 | - --k8s-namespace 76 | - dalle-flow 77 | - --uses 78 | - config.yml 79 | - --port 80 | - '8080' 81 | - --timeout-ready 82 | - '-1' 83 | - --replicas 84 | - '2' 85 | - --monitoring 86 | - --port-monitoring 87 | - '9090' 88 | - --uses-metas 89 | - '{}' 90 | - --native 91 | command: 92 | - jina 93 | env: 94 | - name: POD_UID 95 | valueFrom: 96 | fieldRef: 97 | fieldPath: metadata.uid 98 | - name: JINA_DEPLOYMENT_NAME 99 | value: dalle 100 | envFrom: 101 | - configMapRef: 102 | name: dalle-configmap 103 | image: jinahub/2583xebn:6304ceb26d4e7964d68756da 104 | imagePullPolicy: IfNotPresent 105 | name: executor 106 | ports: 107 | - containerPort: 8080 108 | readinessProbe: 109 | initialDelaySeconds: 5 110 | periodSeconds: 10 111 | tcpSocket: 112 | port: 8080 113 | --- 114 | apiVersion: monitoring.coreos.com/v1 115 | kind: ServiceMonitor 116 | metadata: 117 | labels: 118 | app: dalle 119 | name: dalle 120 | namespace: dalle-flow 121 | spec: 122 | endpoints: 123 | - port: monitoring 124 | namespaceSelector: 125 | matchNames: 126 | - dalle-flow 127 | selector: 128 | matchLabels: 129 | app: dalle 130 | -------------------------------------------------------------------------------- /k8s_flow/diffusion/diffusion.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | JINA_LOG_LEVEL: INFO 4 | pythonunbuffered: '1' 5 | worker_class: uvicorn.workers.UvicornH11Worker 6 | kind: ConfigMap 7 | metadata: 8 | name: diffusion-configmap 9 | namespace: dalle-flow 10 | --- 11 | apiVersion: v1 12 | kind: Service 13 | metadata: 14 | labels: 15 | app: diffusion 16 | name: diffusion 17 | namespace: dalle-flow 18 | spec: 19 | ports: 20 | - name: port 21 | port: 8080 22 | protocol: TCP 23 | targetPort: 8080 24 | - name: monitoring 25 | port: 9090 26 | protocol: TCP 27 | targetPort: 9090 28 | selector: 29 | app: diffusion 30 | type: ClusterIP 31 | --- 32 | apiVersion: apps/v1 33 | kind: Deployment 34 | metadata: 35 | name: diffusion 36 | namespace: dalle-flow 37 | spec: 38 | replicas: 2 39 | selector: 40 | matchLabels: 41 | app: diffusion 42 | strategy: 43 | rollingUpdate: 44 | maxSurge: 0 45 | maxUnavailable: 1 46 | type: RollingUpdate 47 | template: 48 | metadata: 49 | annotations: 50 | linkerd.io/inject: enabled 51 | labels: 52 | app: diffusion 53 | jina_deployment_name: diffusion 54 | ns: dalle-flow 55 | pod_type: WORKER 56 | shard_id: '0' 57 | spec: 58 | affinity: 59 | podAntiAffinity: 60 | requiredDuringSchedulingIgnoredDuringExecution: 61 | - labelSelector: 62 | matchExpressions: 63 | - key: app 64 | operator: In 65 | values: 66 | - diffusion 67 | topologyKey: "kubernetes.io/hostname" 68 | containers: 69 | - args: 70 | - executor 71 | - --name 72 | - diffusion 73 | - --extra-search-paths 74 | - . 75 | - --k8s-namespace 76 | - dalle-flow 77 | - --uses 78 | - config.yml 79 | - --port 80 | - '8080' 81 | - --timeout-ready 82 | - '-1' 83 | - --replicas 84 | - '2' 85 | - --monitoring 86 | - --port-monitoring 87 | - '9090' 88 | - --uses-metas 89 | - '{}' 90 | - --native 91 | command: 92 | - jina 93 | env: 94 | - name: POD_UID 95 | valueFrom: 96 | fieldRef: 97 | fieldPath: metadata.uid 98 | - name: JINA_DEPLOYMENT_NAME 99 | value: diffusion 100 | envFrom: 101 | - configMapRef: 102 | name: diffusion-configmap 103 | image: jinahub/zcougdp9:6304d164fe6c2b57227b7367 104 | imagePullPolicy: IfNotPresent 105 | name: executor 106 | ports: 107 | - containerPort: 8080 108 | readinessProbe: 109 | initialDelaySeconds: 5 110 | periodSeconds: 10 111 | tcpSocket: 112 | port: 8080 113 | --- 114 | apiVersion: monitoring.coreos.com/v1 115 | kind: ServiceMonitor 116 | metadata: 117 | labels: 118 | app: diffusion 119 | name: diffusion 120 | namespace: dalle-flow 121 | spec: 122 | endpoints: 123 | - port: monitoring 124 | namespaceSelector: 125 | matchNames: 126 | - dalle-flow 127 | selector: 128 | matchLabels: 129 | app: diffusion 130 | -------------------------------------------------------------------------------- /k8s_flow/gateway/gateway.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | JINA_LOG_LEVEL: debug 4 | pythonunbuffered: '1' 5 | worker_class: uvicorn.workers.UvicornH11Worker 6 | kind: ConfigMap 7 | metadata: 8 | name: gateway-configmap 9 | namespace: dalle-flow 10 | --- 11 | apiVersion: v1 12 | kind: Service 13 | metadata: 14 | labels: 15 | app: gateway 16 | name: gateway 17 | namespace: dalle-flow 18 | spec: 19 | ports: 20 | - name: port 21 | port: 51005 22 | protocol: TCP 23 | targetPort: 51005 24 | - name: monitoring 25 | port: 51006 26 | protocol: TCP 27 | targetPort: 51006 28 | selector: 29 | app: gateway 30 | type: NodePort 31 | --- 32 | apiVersion: apps/v1 33 | kind: Deployment 34 | metadata: 35 | name: gateway 36 | namespace: dalle-flow 37 | spec: 38 | replicas: 2 39 | selector: 40 | matchLabels: 41 | app: gateway 42 | strategy: 43 | rollingUpdate: 44 | maxSurge: 0 45 | maxUnavailable: 1 46 | type: RollingUpdate 47 | template: 48 | metadata: 49 | annotations: 50 | linkerd.io/inject: enabled 51 | labels: 52 | app: gateway 53 | jina_deployment_name: gateway 54 | ns: dalle-flow 55 | pod_type: GATEWAY 56 | shard_id: '' 57 | spec: 58 | affinity: 59 | podAntiAffinity: 60 | requiredDuringSchedulingIgnoredDuringExecution: 61 | - labelSelector: 62 | matchExpressions: 63 | - key: app 64 | operator: In 65 | values: 66 | - gateway 67 | topologyKey: "kubernetes.io/hostname" 68 | containers: 69 | - args: 70 | - gateway 71 | - --extra-search-paths 72 | - . 73 | - --k8s-namespace 74 | - dalle-flow 75 | - --polling 76 | - ANY 77 | - --port 78 | - '51005' 79 | - --expose-endpoints 80 | - '{}' 81 | - --graph-description 82 | - '{"dalle": ["rerank"], "start-gateway": ["dalle", "clip_encoder"], "clip_encoder": 83 | ["diffusion"], "diffusion": ["rerank"], "rerank": ["upscaler"], "upscaler": 84 | ["store"], "store": ["end-gateway"]}' 85 | - --deployments-addresses 86 | - '{"dalle": ["grpc://dalle.dalle-flow.svc:8080"], "clip_encoder": ["grpcs://demo-cas.jina.ai:2096"], 87 | "diffusion": ["grpc://diffusion.dalle-flow.svc:8080"], "rerank": ["grpcs://demo-cas.jina.ai:2096"], 88 | "upscaler": ["grpc://upscaler.dalle-flow.svc:8080"], "store": ["grpc://store.dalle-flow.svc:8080"]}' 89 | - --pod-role 90 | - GATEWAY 91 | - --monitoring 92 | - --port-monitoring 93 | - '51006' 94 | command: 95 | - jina 96 | env: 97 | - name: POD_UID 98 | valueFrom: 99 | fieldRef: 100 | fieldPath: metadata.uid 101 | - name: JINA_DEPLOYMENT_NAME 102 | value: gateway 103 | envFrom: 104 | - configMapRef: 105 | name: gateway-configmap 106 | image: jinaai/jina:3.7.14-py38-standard 107 | imagePullPolicy: IfNotPresent 108 | name: executor 109 | ports: 110 | - containerPort: 51005 111 | readinessProbe: 112 | initialDelaySeconds: 5 113 | periodSeconds: 10 114 | tcpSocket: 115 | port: 51005 116 | --- 117 | apiVersion: monitoring.coreos.com/v1 118 | kind: ServiceMonitor 119 | metadata: 120 | labels: 121 | app: gateway 122 | name: gateway 123 | namespace: dalle-flow 124 | spec: 125 | endpoints: 126 | - port: monitoring 127 | namespaceSelector: 128 | matchNames: 129 | - dalle-flow 130 | selector: 131 | matchLabels: 132 | app: gateway 133 | -------------------------------------------------------------------------------- /k8s_flow/ingress.yml: -------------------------------------------------------------------------------- 1 | apiVersion: networking.k8s.io/v1 2 | kind: Ingress 3 | metadata: 4 | annotations: 5 | kubernetes.io/ingress.class: 'alb' 6 | alb.ingress.kubernetes.io/scheme: 'internet-facing' 7 | alb.ingress.kubernetes.io/listen-ports: '[{"HTTPS": 443}]' 8 | alb.ingress.kubernetes.io/backend-protocol: 'HTTP' 9 | alb.ingress.kubernetes.io/backend-protocol-version: GRPC 10 | labels: 11 | app: gateway 12 | name: gateway-exposed 13 | namespace: dalle-flow 14 | spec: 15 | rules: 16 | - host: dalle-flow.dev.jina.ai 17 | http: 18 | paths: 19 | - backend: 20 | service: 21 | name: gateway 22 | port: 23 | number: 51005 24 | path: / 25 | pathType: Prefix 26 | --- 27 | apiVersion: networking.k8s.io/v1 28 | kind: Ingress 29 | metadata: 30 | annotations: 31 | kubernetes.io/ingress.class: 'alb' 32 | alb.ingress.kubernetes.io/scheme: 'internet-facing' 33 | labels: 34 | app: gateway 35 | name: monitoring-exposed 36 | namespace: dalle-flow 37 | spec: 38 | rules: 39 | - host: dalle-flow-monitoring.dev.jina.ai 40 | http: 41 | paths: 42 | - backend: 43 | service: 44 | name: gateway 45 | port: 46 | number: 51006 47 | path: /gateway 48 | pathType: Exact 49 | - backend: 50 | service: 51 | name: prometheus-grafana 52 | port: 53 | number: 80 54 | path: / 55 | pathType: Prefix 56 | -------------------------------------------------------------------------------- /k8s_flow/store/store.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | JINA_LOG_LEVEL: INFO 4 | pythonunbuffered: '1' 5 | worker_class: uvicorn.workers.UvicornH11Worker 6 | kind: ConfigMap 7 | metadata: 8 | name: store-configmap 9 | namespace: dalle-flow 10 | --- 11 | apiVersion: v1 12 | kind: Service 13 | metadata: 14 | labels: 15 | app: store 16 | name: store 17 | namespace: dalle-flow 18 | spec: 19 | ports: 20 | - name: port 21 | port: 8080 22 | protocol: TCP 23 | targetPort: 8080 24 | - name: monitoring 25 | port: 9090 26 | protocol: TCP 27 | targetPort: 9090 28 | selector: 29 | app: store 30 | type: ClusterIP 31 | --- 32 | apiVersion: apps/v1 33 | kind: Deployment 34 | metadata: 35 | name: store 36 | namespace: dalle-flow 37 | spec: 38 | replicas: 2 39 | selector: 40 | matchLabels: 41 | app: store 42 | strategy: 43 | rollingUpdate: 44 | maxSurge: 0 45 | maxUnavailable: 1 46 | type: RollingUpdate 47 | template: 48 | metadata: 49 | annotations: 50 | linkerd.io/inject: enabled 51 | labels: 52 | app: store 53 | jina_deployment_name: store 54 | ns: dalle-flow 55 | pod_type: WORKER 56 | shard_id: '0' 57 | spec: 58 | affinity: 59 | podAntiAffinity: 60 | requiredDuringSchedulingIgnoredDuringExecution: 61 | - labelSelector: 62 | matchExpressions: 63 | - key: app 64 | operator: In 65 | values: 66 | - store 67 | topologyKey: "kubernetes.io/hostname" 68 | containers: 69 | - args: 70 | - executor 71 | - --name 72 | - store 73 | - --extra-search-paths 74 | - . 75 | - --k8s-namespace 76 | - dalle-flow 77 | - --uses 78 | - config.yml 79 | - --port 80 | - '8080' 81 | - --replicas 82 | - '2' 83 | - --monitoring 84 | - --port-monitoring 85 | - '9090' 86 | - --uses-metas 87 | - '{}' 88 | - --native 89 | command: 90 | - jina 91 | env: 92 | - name: POD_UID 93 | valueFrom: 94 | fieldRef: 95 | fieldPath: metadata.uid 96 | - name: JINA_DEPLOYMENT_NAME 97 | value: store 98 | envFrom: 99 | - configMapRef: 100 | name: store-configmap 101 | image: jinahub/jffp33to:6304a2d059ebf96951457210 102 | imagePullPolicy: IfNotPresent 103 | name: executor 104 | ports: 105 | - containerPort: 8080 106 | readinessProbe: 107 | initialDelaySeconds: 5 108 | periodSeconds: 10 109 | tcpSocket: 110 | port: 8080 111 | --- 112 | apiVersion: monitoring.coreos.com/v1 113 | kind: ServiceMonitor 114 | metadata: 115 | labels: 116 | app: store 117 | name: store 118 | namespace: dalle-flow 119 | spec: 120 | endpoints: 121 | - port: monitoring 122 | namespaceSelector: 123 | matchNames: 124 | - dalle-flow 125 | selector: 126 | matchLabels: 127 | app: store 128 | -------------------------------------------------------------------------------- /k8s_flow/upscaler/upscaler.yml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | JINA_LOG_LEVEL: INFO 4 | pythonunbuffered: '1' 5 | worker_class: uvicorn.workers.UvicornH11Worker 6 | kind: ConfigMap 7 | metadata: 8 | name: upscaler-configmap 9 | namespace: dalle-flow 10 | --- 11 | apiVersion: v1 12 | kind: Service 13 | metadata: 14 | labels: 15 | app: upscaler 16 | name: upscaler 17 | namespace: dalle-flow 18 | spec: 19 | ports: 20 | - name: port 21 | port: 8080 22 | protocol: TCP 23 | targetPort: 8080 24 | - name: monitoring 25 | port: 9090 26 | protocol: TCP 27 | targetPort: 9090 28 | selector: 29 | app: upscaler 30 | type: ClusterIP 31 | --- 32 | apiVersion: apps/v1 33 | kind: Deployment 34 | metadata: 35 | name: upscaler 36 | namespace: dalle-flow 37 | spec: 38 | replicas: 2 39 | selector: 40 | matchLabels: 41 | app: upscaler 42 | strategy: 43 | rollingUpdate: 44 | maxSurge: 0 45 | maxUnavailable: 1 46 | type: RollingUpdate 47 | template: 48 | metadata: 49 | annotations: 50 | linkerd.io/inject: enabled 51 | labels: 52 | app: upscaler 53 | jina_deployment_name: upscaler 54 | ns: dalle-flow 55 | pod_type: WORKER 56 | shard_id: '0' 57 | spec: 58 | affinity: 59 | podAntiAffinity: 60 | requiredDuringSchedulingIgnoredDuringExecution: 61 | - labelSelector: 62 | matchExpressions: 63 | - key: app 64 | operator: In 65 | values: 66 | - upscaler 67 | topologyKey: "kubernetes.io/hostname" 68 | containers: 69 | - args: 70 | - executor 71 | - --name 72 | - upscaler 73 | - --extra-search-paths 74 | - . 75 | - --k8s-namespace 76 | - dalle-flow 77 | - --uses 78 | - config.yml 79 | - --port 80 | - '8080' 81 | - --replicas 82 | - '2' 83 | - --monitoring 84 | - --port-monitoring 85 | - '9090' 86 | - --uses-metas 87 | - '{}' 88 | - --native 89 | command: 90 | - jina 91 | env: 92 | - name: POD_UID 93 | valueFrom: 94 | fieldRef: 95 | fieldPath: metadata.uid 96 | - name: JINA_DEPLOYMENT_NAME 97 | value: upscaler 98 | envFrom: 99 | - configMapRef: 100 | name: upscaler-configmap 101 | image: jinahub/296f27y7:6304d180e17ee095f858c14e 102 | imagePullPolicy: IfNotPresent 103 | name: executor 104 | ports: 105 | - containerPort: 8080 106 | readinessProbe: 107 | initialDelaySeconds: 5 108 | periodSeconds: 10 109 | tcpSocket: 110 | port: 8080 111 | --- 112 | apiVersion: monitoring.coreos.com/v1 113 | kind: ServiceMonitor 114 | metadata: 115 | labels: 116 | app: upscaler 117 | name: upscaler 118 | namespace: dalle-flow 119 | spec: 120 | endpoints: 121 | - port: monitoring 122 | namespaceSelector: 123 | matchNames: 124 | - dalle-flow 125 | selector: 126 | matchLabels: 127 | app: upscaler 128 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # jina-related 2 | jina>=3.8.3 3 | docarray>=0.16.2,<0.30.0 4 | # dalle-mini 5 | flax 6 | git+https://github.com/openai/CLIP.git 7 | git+https://github.com/huggingface/transformers.git 8 | git+https://github.com/patil-suraj/vqgan-jax.git 9 | git+https://github.com/borisdayma/dalle-mini.git 10 | # glid3 11 | dalle_pytorch 12 | # SwinIR 13 | opencv-python 14 | timm 15 | Cython 16 | basicsr>=1.4.2 17 | facexlib>=0.2.5 18 | gfpgan>=1.3.5 19 | -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | if test ${DISABLE_GLID3XL}; then 3 | echo "Latent diffusion checkpoints will not be downloaded because DISABLE_GLID3XL flag is on" 4 | else 5 | if test -e /home/dalle/.cache/bert.pt -a -e /home/dalle/.cache/kl-f8.pt -a -e /home/dalle/.cache/finetune.pt; then 6 | echo "Latent diffusion checkpoints for glid3xl exist, continuing" 7 | else 8 | echo "Latent diffusion checkpoints for glid3xl not exist, downloading" 9 | sudo apt update 10 | sudo apt install -y wget 11 | wget https://dall-3.com/models/glid-3-xl/bert.pt -O /home/dalle/.cache/bert.pt 12 | wget https://dall-3.com/models/glid-3-xl/kl-f8.pt -O /home/dalle/.cache/kl-f8.pt 13 | wget https://dall-3.com/models/glid-3-xl/finetune.pt -O /home/dalle/.cache/finetune.pt 14 | fi 15 | 16 | ln -s /home/dalle/.cache/bert.pt /dalle/glid-3-xl/bert.pt 17 | ln -s /home/dalle/.cache/kl-f8.pt /dalle/glid-3-xl/kl-f8.pt 18 | ln -s /home/dalle/.cache/finetune.pt /dalle/glid-3-xl/finetune.pt 19 | fi 20 | 21 | . env/bin/activate 22 | python flow_parser.py 23 | jina flow --uses flow.tmp.yml 24 | --------------------------------------------------------------------------------