├── .gitignore ├── LICENSE ├── LICENSE.zlib ├── Makefile ├── README.md ├── gencode_clip_merges.py ├── generate.bat ├── generate.sh ├── include └── mlimgsynth.h ├── python ├── guessing_game.py ├── minimal_png.py └── mlimgsynth.py └── src ├── ccommon ├── alloc.c ├── alloc.h ├── alloc_arena.c ├── alloc_arena.h ├── alloc_gen.c ├── alloc_gen.h ├── any.c ├── any.h ├── base.mk ├── bisect.h ├── byteswap.h ├── ccommon.h ├── fsutil.c ├── fsutil.h ├── image.c ├── image.h ├── image_io.c ├── image_io.h ├── image_io_jpeg.c ├── image_io_jpeg.h ├── image_io_png.c ├── image_io_png.h ├── image_io_pnm.c ├── image_io_pnm.h ├── logging.c ├── logging.h ├── rng_philox.c ├── rng_philox.h ├── str_util.c ├── str_util.h ├── stream.c ├── stream.h ├── stringstore.c ├── stringstore.h ├── strslice.h ├── structio.c ├── structio.h ├── structio_json.c ├── structio_json.h ├── timing.c ├── timing.h ├── unicode.c ├── unicode.h ├── unicode_data.c ├── unicode_data.h └── vector.h ├── ccompute ├── tensorstore.c ├── tensorstore.h ├── tensorstore_gguf.c ├── tensorstore_gguf.h ├── tensorstore_safet.c └── tensorstore_safet.h ├── clip.c ├── clip.h ├── clip_merges.c.h ├── demo_mlimgsynth.c ├── ggml_extend.c ├── ggml_extend.h ├── localtensor.c ├── localtensor.h ├── lora.c ├── lora.h ├── main_mlimgsynth.c ├── main_tstore_util.c ├── mlblock.c ├── mlblock.h ├── mlblock_nn.c ├── mlblock_nn.h ├── mlimgsynth.c ├── mlimgsynth_options_get.c.h ├── mlimgsynth_options_set.c.h ├── prompt_preproc.h ├── sampling.c ├── sampling.h ├── solvers.c ├── solvers.h ├── str_match_util.h ├── tae.c ├── tae.h ├── tensor_name_conv.c ├── tensor_name_conv.h ├── test_common.h ├── test_prompt_preproc.c ├── test_rng.c ├── test_text_tokenize_clip.c ├── unet.c ├── unet.h ├── vae.c └── vae.h /.gitignore: -------------------------------------------------------------------------------- 1 | obj 2 | obj_dbg 3 | .d 4 | .d_dbg 5 | __pycache__ 6 | ggml 7 | test_rng 8 | tstore-util 9 | mlimgsynth 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Alejandro A. García 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 6 | 7 | The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software. 8 | 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 10 | -------------------------------------------------------------------------------- /LICENSE.zlib: -------------------------------------------------------------------------------- 1 | Copyright (C) 2024 Alejandro A. García 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any damages 5 | arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any purpose, 8 | including commercial applications, and to alter it and redistribute it 9 | freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must not 12 | claim that you wrote the original software. If you use this software 13 | in a product, an acknowledgment in the product documentation would be 14 | appreciated but is not required. 15 | 2. Altered source versions must be plainly marked as such, and must not be 16 | misrepresented as being the original software. 17 | 3. This notice may not be removed or altered from any source distribution. 18 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Makefile 2 | targets = test_rng tstore-util demo_mlimgsynth mlimgsynth \ 3 | test_text_tokenize_clip test_prompt_preproc 4 | targets_dlib = libmlimgsynth 5 | 6 | # Put your custom definitions in Makefile.local instead of changing this file 7 | -include Makefile.local 8 | 9 | include src/ccommon/base.mk 10 | VPATH = src:src/ccommon:src/ccompute 11 | cppflags += -Isrc -Iinclude 12 | ldflags += -L. 13 | 14 | ### Dependencies 15 | # math 16 | ldlibs += -lm 17 | 18 | # ggml 19 | ifndef GGML_INCLUDE_PATH 20 | GGML_INCLUDE_PATH := ggml/include 21 | endif 22 | ifndef GGML_LIB_PATH 23 | GGML_LIB_PATH := ggml/Release/src 24 | endif 25 | cppflags += -I$(GGML_INCLUDE_PATH) 26 | ldflags += -L$(GGML_LIB_PATH) 27 | # ggml headers give several warnings with C99 28 | cflags += -Wno-pedantic 29 | 30 | tstore-util: ldlibs += -lggml -lggml-base 31 | libmlimgsynth: ldlibs += -lggml -lggml-base 32 | ifndef MLIS_NO_RUNPATH 33 | tstore-util: ldflags += -Wl,-rpath,$(GGML_LIB_PATH) 34 | libmlimgsynth: ldflags += -Wl,-rpath,$(GGML_LIB_PATH) 35 | endif 36 | 37 | # ggml scheduler is need for incomplete backends (no longer needed for vulkan) 38 | ifdef MLIS_GGML_SCHED 39 | libmlimgsynth: cppflags += -DUSE_GGML_SCHED=1 40 | endif 41 | 42 | # Flash Attention (not working yet, crashes) 43 | ifdef MLIS_FLASH_ATTENTION 44 | libmlimgsynth: cppflags += -DUSE_FLASH_ATTENTION 45 | endif 46 | 47 | # png 48 | ifndef MLIS_NO_PNG 49 | mlimgsynth: ldlibs += -lpng 50 | mlimgsynth: cppflags += -DUSE_LIB_PNG 51 | mlimgsynth: image_io_png.o 52 | endif 53 | 54 | # jpeg 55 | ifndef MLIS_NO_JPEG 56 | mlimgsynth: ldlibs += -ljpeg 57 | mlimgsynth: cppflags += -DUSE_LIB_JPEG 58 | mlimgsynth: image_io_jpeg.o 59 | endif 60 | 61 | # libmlimgsynth 62 | demo_mlimgsynth: ldlibs += -lmlimgsynth 63 | mlimgsynth: ldlibs += -lmlimgsynth 64 | test_text_tokenize_clip: ldlibs += -lmlimgsynth 65 | ifndef MLIS_NO_RUNPATH 66 | demo_mlimgsynth: ldflags += -Wl,-rpath,. 67 | mlimgsynth: ldflags += -Wl,-rpath,. 68 | test_text_tokenize_clip: ldflags += -Wl,-rpath,. 69 | endif 70 | 71 | # GCC 13.3.1 20240614 warns about dstr_appendz and dstr_insertz 72 | # I think the code is ok, but I will check later 73 | FLAGS=-Wno-array-bounds -Wno-stringop-overflow 74 | 75 | ### Module dependencies 76 | tensorstore.o: cppflags += -DTENSORSTORE_USE_GGML -DTENSORSTORE_FMT_GGUF \ 77 | -DTENSORSTORE_FMT_SAFET 78 | 79 | objs_base = timing.o alloc.o alloc_gen.o stream.o logging.o 80 | objs_tstore = alloc_arena.o stringstore.o fsutil.o \ 81 | any.o structio.o structio_json.o \ 82 | tensorstore.o tensorstore_safet.o tensorstore_gguf.o 83 | 84 | ### Binary targets 85 | test_rng: $(objs_base) rng_philox.o test_rng.o 86 | 87 | tstore-util: $(objs_base) $(objs_tstore) main_tstore_util.o 88 | 89 | libmlimgsynth: $(objs_base) $(objs_tstore) rng_philox.o localtensor.o \ 90 | unicode.o unicode_data.o \ 91 | ggml_extend.o mlblock.o mlblock_nn.o tae.o vae.o clip.o unet.o lora.o \ 92 | solvers.o sampling.o tensor_name_conv.o mlimgsynth.o 93 | 94 | demo_mlimgsynth: demo_mlimgsynth.o 95 | 96 | mlimgsynth: $(objs_base) image.o image_io.o image_io_pnm.o \ 97 | localtensor.o main_mlimgsynth.o 98 | 99 | test_text_tokenize_clip: test_text_tokenize_clip.o 100 | 101 | test_prompt_preproc: $(objs_base) test_prompt_preproc.o 102 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MLImgSynth 2 | 3 | Generate images using Stable Diffusion (SD) models. This program is completely written in C and uses the [GGML](https://github.com/ggerganov/ggml/) library as inference backend. It is largely based in [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp), but with a focus in more concise and clear code. Also, I put some care in the memory usage: at each step only the required weights will be loaded in the backend memory (e.g. VRAM). Moreover, with the options `--unet-split` and `--vae-tile` it is possible to run SDXL models using only 4 GiB without quantization. 4 | 5 | ## Supported models 6 | 7 | - SD v1.x: [info](https://github.com/CompVis/stable-diffusion) [weights](https://huggingface.co/runwayml/stable-diffusion-v1-5) (`emaonly` is ok) 8 | - SD v2.x: [info](https://github.com/Stability-AI/stablediffusion) [weights](https://huggingface.co/stabilityai/stable-diffusion-2-1) 9 | - SDXL: [info](https://stability.ai/news/stable-diffusion-sdxl-1-announcement) [weights](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0) 10 | 11 | Besides the original weights, you may use any of the fine-tuned checkpoints that can be found on the internet. Destilled models (turbo, hyper, lightning) should work normally. 12 | 13 | ## Usage on Windows 14 | 15 | Download and unzip the latest [Release](https://github.com/aagdev/mlimgsynth/releases). Edit the file `generate.bat` as needed and execute it. 16 | 17 | ## Build 18 | 19 | First you must build ggml as library with the desired backends and then build this program linking to it. You may symlink the ggml directory to root of this project or define the `GGML_INCLUDE_PATH` and `GGML_LIB_PATH` variables. Finally, just call `make`. For example: 20 | 21 | ```shell 22 | export GGML_INCLUDE_PATH=../ggml/include 23 | export GGML_LIB_PATH=../ggml/Release/src 24 | make 25 | ``` 26 | 27 | By default, the program is linked with `libpng` and `libjpeg` to support those formats. You may suppress these dependencies defining `MLIS_NO_PNG` and `MLIS_NO_JPEG`. The PNM image format is always available. 28 | 29 | ## Usage 30 | 31 | First, download the weights of the model you wish to use (safetensors and gguf formats supported). To generate an image (txt2img) use: 32 | 33 | ```shell 34 | ./mlimgsynth generate -m MODEL_PATH --cfg-scale 7 --steps 20 --seed 42 -o output.png -p "a box on a table" 35 | ``` 36 | 37 | The option `-b` lets you select from the available GGML backends. By default the "best" is used, usually GPU. Run `./mlimgsynth list-backends` to see the list of backends and devices. 38 | 39 | See the script `generate.sh` for a more complete example. 40 | 41 | Execute without any arguments to see a list of all the supported options. 42 | 43 | ### img2img and inpainting 44 | 45 | To start from an initial image (img2img) add the options `-i IMAGE.png` and `--f-t-ini 0.7`. The second option controls the strength by changing the initial time in the denoising process, you may try any value between 0 (no changes) and 1. 46 | 47 | If the image has an alpha channel (transparency), it is used as a mask for inpainting. You can modify the alpha channel of an image using an editor like GIMP (remember to tick the option "Save color values from transparent pixels" when saving). 48 | 49 | ### Lora's 50 | 51 | Lora's can be loaded indivually with the option `--lora PATH,MULT` or with the option `--lora-dir PATH` and adding to the prompt ``. In the last case, it will look for the file `PATH/NAME.safetensors`. 52 | 53 | ### Prompt emphasis (token weighting) 54 | 55 | You can increase or decrease the emphasis of certain parts of the prompt to make the model pay more or less attention to it. This uses the same syntax as [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). Examples: 56 | 57 | * `a (dog) jumping` increases the weight of "dog" by 1.1 . 58 | * `a ((dog)) jumping` increases twice, that is, by 1.21 . 59 | * `a [dog] jumping` decreases by 1.1 (weight ~ 0.91). 60 | * `a (dog:1.5) jumping` increases by 1.5 . 61 | 62 | You can disable all prompt processing (including loras) using the option `--no-prompt-parse y` *before* the prompt. 63 | 64 | ### TAE 65 | 66 | To accelerate and reduce the memory usage during the image decoding, you may use the [TAE](https://github.com/madebyollin/taesd) (tiny autoencoder) in place of the VAE (variational autoencoder) of SD. Download the weights compatible with SD or SDXL, and pass the path to them with the option `--tae TAE.safetensors` to enable it. Be warned that this reduces the final images quality. If you are low on memory, it is preferable to use the `--vae-tile 512` option. 67 | 68 | ## Library 69 | 70 | All the important fuctionally is a library (libmlimgsynth) that you can use from your own programs. There are examples for C (`src/demo_mlimgsynth.c`) and for python (`python/mlimgsynth.py` and `python/guessing_game.py`). 71 | 72 | ## Future plans 73 | 74 | - API server and minimal web UI. 75 | - ControlNet. 76 | - Maybe SDE sampling. The biggest hurdle is understanding what it is doing the `torchsde.BrownianTree` used in `k-diffusion`. 77 | - Other models? 78 | 79 | ## License 80 | Most of this program is licensed under the MIT (see the file `LICENSE`), with the exceptions of the files in the directory `src/ccommon` which use the ZLib license (see the file `LICENSE.zlib`). To prevent any confusion, each file indicates its license at the beginning using the SPDX identifier. 81 | 82 | ## Contributing 83 | Contributions in the form of bug reports, suggestions, patches or pull requests are welcome. 84 | -------------------------------------------------------------------------------- /gencode_clip_merges.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2025, Alejandro A. García 3 | SPDX-License-Identifier: MIT 4 | 5 | Converts CLIP vocabulary merges in a list of token number pairs. 6 | ref: https://github.com/openai/CLIP : clip/simple_tokenizer.py 7 | """ 8 | import gzip 9 | 10 | bpe_path = "bpe_simple_vocab_16e6.txt.gz" 11 | 12 | # Code copied almost verbatim from CLIP repo 13 | def bytes_to_unicode(): 14 | bs = list(range(ord("!"), ord("~")+1)) \ 15 | + list(range(ord("¡"), ord("¬")+1)) \ 16 | + list(range(ord("®"), ord("ÿ")+1)) 17 | cs = bs[:] 18 | n = 0 19 | for b in range(2**8): 20 | if b not in bs: 21 | bs.append(b) 22 | cs.append(2**8+n) 23 | n += 1 24 | cs = [chr(n) for n in cs] 25 | return bs, cs 26 | 27 | merges = gzip.open(bpe_path).read().decode("utf-8").split('\n') 28 | merges = merges[1:49152-256-2+1] 29 | merges = [tuple(merge.split()) for merge in merges] 30 | 31 | vocab = list(bytes_to_unicode()[1]) 32 | vocab = vocab + [v+'' for v in vocab] 33 | for merge in merges: 34 | vocab.append(''.join(merge)) 35 | vocab.extend(['<|startoftext|>', '<|endoftext|>']) 36 | 37 | encoder = dict(zip(vocab, range(len(vocab)))) 38 | decoder = {v: k for k, v in encoder.items()} 39 | #bpe_ranks = dict(zip(merges, range(len(merges)))) 40 | 41 | for left, right in merges: 42 | l = encoder[left] 43 | r = encoder[right] 44 | print("{%d, %d}," % (l, r)) 45 | -------------------------------------------------------------------------------- /generate.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | title mlimgsynth 3 | 4 | REM Generation options 5 | set PROMPT=a photograph of an astronaut riding a horse 6 | set NPROMPT= 7 | set SEED= 8 | REM Sizes: 512,512 512,768 768,512 1024,1024 1216,832 832,1216 9 | set SIZE= 10 | 11 | set OUTNAME=output 12 | set OUTEXT=png 13 | set NBATCH=2 14 | 15 | set CFG_SCALE=7 16 | set STEPS=20 17 | 18 | REM Sampling method: euler, euler_a, taylor3, dpm++2m, dpm++2s_a 19 | set METHOD=euler_a 20 | REM Scheduler: uniform, karras 21 | set SCHED=uniform 22 | set SAMPOPT= 23 | 24 | REM Leave empty to use the best (GPU usually). 25 | REM set BACKEND=Vulkan0 26 | REM set BACKEND=CPU 27 | 28 | REM Change to the path of the model weights 29 | REM Supported models: SD 1, 2 or XL 30 | REM Supported formats: safetensors 31 | set MODEL=../models/sd_v1.5-pruned-emaonly-fp16.safetensors 32 | REM set MODEL=../models/DreamShaper_8.safetensors 33 | REM set MODEL=../models/dreamshaperXL_v21TurboDPMSDE.safetensors 34 | 35 | REM LoRA's 36 | set LORADIR=../models/loras_sd15 37 | REM set "PROMPT=%PROMPT%" 38 | 39 | set EXTRA= 40 | REM You may enable any of the following options removing the REM in front 41 | 42 | REM Reduce memory usage 43 | REM set EXTRA=%EXTRA% --unet-split 1 --vae-tile 512 44 | 45 | REM Use TAE instead of VAE to decode faster and with less memory 46 | REM set EXTRA=%EXTRA% --tae "../models/tae_sd.safetensors" 47 | 48 | REM Perform img2img 49 | REM Inpaints if the image has an alpha channel 50 | REM set EXTRA=%EXTRA% -i "input_image.png" --f-t-ini 0.7 51 | 52 | REM Debug output 53 | REM set EXTRA=%EXTRA% --debug 54 | 55 | REM Batch generation 56 | set IDX=0 57 | :loop 58 | set /a IDX=IDX+1 59 | echo Generating %IDX% / %NBATCH% 60 | mlimgsynth generate -b "%BACKEND%" -m "%MODEL%" --lora-dir "%LORADIR%" -p "%PROMPT%" -n "%NPROMPT%" -o "%OUTNAME%-%IDX%.%OUTEXT%" -d "%SIZE%" --cfg-scale "%CFG_SCALE%" --steps "%STEPS%" --seed "%SEED%" --method "%METHOD%" --scheduler "%SCHED%" %SAMPOPT% %EXTRA% 61 | if errorlevel 1 goto error 62 | if not "%SEED%"=="" set /a SEED=SEED+1 63 | if not "%IDX%"=="%NBATCH%" goto loop 64 | goto done 65 | 66 | :error 67 | echo ERROR %ERRORLEVEL% 68 | :done 69 | pause 70 | -------------------------------------------------------------------------------- /generate.sh: -------------------------------------------------------------------------------- 1 | #/bin/sh 2 | 3 | ### Generation options 4 | PROMPT="a photograph of an astronaut riding a horse" 5 | NPROMPT= 6 | SEED= 7 | # Sizes: 512,512 512,768 768,512 1024,1024 1216,832 832,1216 8 | SIZE= 9 | 10 | OUTNAME="output" 11 | OUTEXT="png" 12 | NBATCH=1 13 | 14 | CFG_SCALE=7 15 | STEPS=20 16 | 17 | # Sampling method: euler, euler_a, taylor3, dpm++2m, dpm++2s_a 18 | METHOD=euler_a 19 | # Scheduler: uniform, karras 20 | SCHED=uniform 21 | SAMPOPT= 22 | 23 | # Leave empty to use the best (GPU usually). 24 | #BACKEND=Vulkan0 25 | #BACKEND=CPU 26 | 27 | # Change to the path of the model weights 28 | # Supported models: SD 1, 2 or XL 29 | # Supported formats: safetensors 30 | MODEL="../models/sd_v1.5-pruned-emaonly-fp16.safetensors" 31 | #MODEL="../models/DreamShaper_8.safetensors" 32 | #MODEL="../models/dreamshaperXL_v21TurboDPMSDE.safetensors" 33 | 34 | # LoRA's 35 | LORADIR="../models/loras_sd15" 36 | #PROMPT="$PROMPT" 37 | 38 | EXTRA= 39 | # You may enable any of the following options removing the # in front 40 | 41 | # Reduce memory usage 42 | #EXTRA="$EXTRA --unet-split 1 --vae-tile 512" 43 | 44 | # Use TAE instead of VAE to decode faster and with less memory 45 | #EXTRA="$EXTRA --tae '../models/tae_sd.safetensors'" 46 | 47 | # Perform img2img 48 | # Inpaints if the image has an alpha channel 49 | #EXTRA="$EXTRA -i 'input_image.png' --f-t-ini 0.7" 50 | 51 | # Debug output 52 | #EXTRA="$EXTRA --debug" 53 | 54 | # Batch generation 55 | idx=1 56 | while [ $idx -le $NBATCH ]; do 57 | echo "Generating $idx / $NBATCH" 58 | ./mlimgsynth generate -b "$BACKEND" -m "$MODEL" --lora-dir "$LORADIR" -p "$PROMPT" -n "$NPROMPT" -o "$OUTNAME-$idx.$OUTEXT" -d "$SIZE" --cfg-scale "$CFG_SCALE" --steps "$STEPS" --seed "$SEED" --method "$METHOD" --scheduler "$SCHED" $SAMPOPT $EXTRA 59 | [ "$SEED" = "" ] || SEED=$(($SEED+1)) 60 | idx=$(($idx+1)) 61 | done 62 | -------------------------------------------------------------------------------- /python/guessing_game.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2024, Alejandro A. García 3 | SPDX-License-Identifier: MIT 4 | 5 | Example program using the MLImgSynth library. 6 | Web-based game where you see an AI generated image and have to guess the prompt. 7 | No external modules needed. 8 | """ 9 | import random 10 | import logging 11 | import argparse 12 | import webbrowser 13 | from http.server import HTTPServer, BaseHTTPRequestHandler 14 | from urllib.parse import urlparse, parse_qsl 15 | 16 | from mlimgsynth import MLImgSynth 17 | from minimal_png import png_write 18 | 19 | ADJECTIVES = [ 20 | "red", "blue", "green", "yellow", 21 | ] 22 | 23 | NOUNS = [ 24 | "lion", "rabbit", "cow", "chicken", 25 | "cup", "table", "lamp", "book", "car", 26 | ] 27 | 28 | PLACES = [ 29 | "in the mountains", "on a lake", "in a river", "on a beach", "in a forest", 30 | "in a city street", "in a cavern" 31 | ] 32 | 33 | class GuessingGame: 34 | def __init__(self, mlis): 35 | self.mlis = mlis 36 | self.prompt = None 37 | self.img = None 38 | self.feat = None 39 | self.last_score = 0.0 40 | self.last_guess = "" 41 | 42 | self.elements = [ADJECTIVES, NOUNS, PLACES] 43 | self.prompt_prefix = None 44 | self.nprompt = None 45 | #end 46 | 47 | def generate(self): 48 | self.img = None 49 | self.last_score = 0.0 50 | self.last_guess = "" 51 | 52 | self.prompt_elems = [random.choice(elist) for elist in self.elements] 53 | self.prompt = " ".join(self.prompt_elems) 54 | logging.debug("Prompt: " + self.prompt) 55 | #embd, self.feat = self.mlis.clip_text_encode(self.prompt, features=True) 56 | 57 | p = self.prompt 58 | if self.prompt_prefix: 59 | p = self.prompt_prefix + " " + p 60 | self.mlis.option_set("prompt", p) 61 | if self.nprompt: 62 | self.mlis.option_set("nprompt", self.nprompt) 63 | 64 | logging.info("Generating image...") 65 | self.mlis.generate() 66 | self.img = self.mlis.image_get() 67 | #end 68 | 69 | def guess_check(self, guess): 70 | #embd, feat = self.mlis.clip_text_encode(guess, features=True) 71 | #s = self.feat.similarity(feat) 72 | elems = guess.split(maxsplit=2) 73 | elems = [x.strip().lower() for x in elems] 74 | score = sum(int(x == y) for x, y in zip(elems, self.prompt_elems)) 75 | score /= len(self.prompt_elems) 76 | self.last_guess = guess 77 | self.last_score = score 78 | return score 79 | #end 80 | 81 | def image_png_write(self, f): 82 | png_write(f, self.img.data, self.img.w, self.img.h, self.img.c) 83 | #end 84 | #end 85 | 86 | PAGE = b""" 87 | 88 | 89 | Guessing Game 90 | 96 | 97 | 98 |

Guessing Game

99 |
100 | Try to guess the image prompt:
101 | 102 | 103 |
104 | Score: {{last_score}} 105 |
106 | 107 | 108 |
109 |
110 | Image to guess 111 | 112 | 113 | """ 114 | 115 | class GuessingGameWebHandler(BaseHTTPRequestHandler): 116 | def page_main(self): 117 | self.send_response(200) 118 | self.send_header('Content-type', 'text/html') 119 | self.end_headers() 120 | last_score = format(self.server.game.last_score, ".2f").encode("ascii") 121 | last_guess = self.server.game.last_guess.encode("ascii") 122 | page = PAGE.replace(b"{{last_score}}", last_score) \ 123 | .replace(b"{{last_guess}}", last_guess) 124 | self.wfile.write(page) 125 | 126 | def page_image(self): 127 | self.send_response(200) 128 | self.send_header('Content-type', 'image/png') 129 | self.end_headers() 130 | self.server.game.image_png_write(self.wfile) 131 | 132 | def page_not_found(self): 133 | self.send_response(404) 134 | self.send_header('Content-type', 'text/plain') 135 | self.end_headers() 136 | self.wfile.write(b"404 Not Found\n") 137 | 138 | def do_GET(self): 139 | url = urlparse(self.path) 140 | if url.path == "/": 141 | kv = parse_qsl(url.query) 142 | if kv: 143 | if kv[0][0] == "new": 144 | self.server.game.generate() 145 | elif kv[0][0] == "guess": 146 | self.server.game.guess_check(kv[0][1]) 147 | self.page_main() 148 | elif url.path == "/image.png": 149 | self.page_image() 150 | else: 151 | self.page_not_found() 152 | #end 153 | 154 | def main(): 155 | parser = argparse.ArgumentParser() 156 | parser.add_argument("-H", "--host", default="127.0.0.1") 157 | parser.add_argument("-P", "--port", type=int, default=8000) 158 | parser.add_argument("-m", "--model", required=True, 159 | help="Image generation model file path.") 160 | parser.add_argument("-p", "--prompt-prefix") 161 | parser.add_argument("-n", "--negative-prompt") 162 | parser.add_argument("-g", "--genopt", 163 | help="List of image generation options like: steps=12:method=euler:...") 164 | parser.add_argument("--no-browser", action="store_true", 165 | help="Do not open the page in a browser.") 166 | parser.add_argument("-D", "--debug", action="store_true") 167 | args = parser.parse_args() 168 | 169 | 170 | logging.basicConfig( 171 | level=logging.DEBUG if args.debug else logging.INFO, 172 | format="[GAME] %(levelname)s %(message)s" ) 173 | 174 | mlis = MLImgSynth() 175 | mlis.option_set("log-level", "debug" if args.debug else "info") 176 | mlis.option_set("model", args.model) 177 | 178 | if args.genopt: 179 | for kv in args.genopt.split(":"): 180 | k,_,v = kv.partition("=") 181 | mlis.option_set(k, v) 182 | 183 | game = GuessingGame(mlis) 184 | game.prompt_prefix = args.prompt_prefix 185 | game.nprompt = args.negative_prompt 186 | game.generate() 187 | 188 | httpd = HTTPServer((args.host, args.port), GuessingGameWebHandler) 189 | httpd.game = game 190 | logging.info("Listening on %s:%s", args.host, args.port) 191 | if not args.no_browser and args.host == "127.0.0.1": 192 | httpd.server_activate() 193 | webbrowser.open("http://127.0.0.1:%d" % args.port) 194 | httpd.serve_forever() 195 | #end 196 | 197 | if __name__ == '__main__': 198 | main() 199 | -------------------------------------------------------------------------------- /python/minimal_png.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2024, Alejandro A. García 3 | SPDX-License-Identifier: MIT 4 | 5 | Minimal module to write PNG images without any external dependencies. 6 | """ 7 | import zlib 8 | import struct 9 | 10 | def chunk_write(f, type_str, data): 11 | ct = type_str.encode("ASCII") 12 | f.write( struct.pack(">I", len(data)) ) 13 | f.write( ct ) 14 | f.write( data ) 15 | f.write( struct.pack(">I", zlib.crc32(data, zlib.crc32(ct))) ) 16 | 17 | def ihdr_make(w, h, ch): 18 | color_type = 6 if ch == 4 else 2 if ch == 3 else 4 if ch == 2 else 0 19 | out = struct.pack(">IIBBBBB", w, h, 8, color_type, 0, 0, 0) 20 | return out 21 | 22 | def data_filter(data, s, h): 23 | fdata = bytes() 24 | for y in range(h): 25 | line = data[s*y:s*(y+1)] 26 | fdata += b"\0" # No filter, raw data 27 | fdata += line 28 | return fdata 29 | 30 | def png_write(f, data, w, h, ch=3, clvl=-1, stride=None, texts=[]): 31 | # Signature 32 | f.write(b"\x89PNG\r\n\x1a\n") 33 | # Header 34 | ihdr = ihdr_make(w, h, ch) 35 | chunk_write(f, "IHDR", ihdr) 36 | # Text chunks 37 | for name, text in texts: 38 | text_data = name.encode("utf8") + b"\0" + text.encode("utf8") 39 | chunk_write(f, "tEXt", text_data) 40 | # Image data 41 | fdata = data_filter(data, stride or w*ch, h) 42 | cdata = zlib.compress(fdata, clvl) 43 | chunk_write(f, "IDAT", cdata) 44 | # End 45 | chunk_write(f, "IEND", bytes()) 46 | #end 47 | 48 | # Minimal test 49 | if __name__ == "__main__": 50 | w = 40 51 | h = 20 52 | ch = 3 53 | data = bytes([x*6*(c==0)+y*12*(c==2) 54 | for y in range(h) for x in range(w) for c in range(ch)]) 55 | with open("minimal_png_test.png", "wb") as f: 56 | png_write(f, data, w, h, ch, texts=[("source", "minimal_png.py")]) 57 | -------------------------------------------------------------------------------- /src/ccommon/alloc.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "alloc.h" 5 | 6 | #define size_align(S,A) \ 7 | (((S) + ((A) - 1)) & ~((A) - 1)) 8 | 9 | // libc wrapper 10 | #if __STDC_HOSTED__ 11 | #include 12 | void* alloc_stdlib_alloc(Allocator* a, void* p, size_t sz, int flags) 13 | { 14 | if (a->flags & ALLOC_F_HAS_SIZE4) { 15 | if (p) p = (uint8_t*)p - ALLOC_SIZE_ALIGNMENT; 16 | if (sz) sz = size_align(sz + ALLOC_SIZE_ALIGNMENT, ALLOC_SIZE_ALIGNMENT); 17 | } 18 | //if (a->flags & ALLOC_F_ROUND2 && sz) { 19 | // sz = size_round2(sz); 20 | //} 21 | if (p) { 22 | if (sz == 0) { 23 | free(p); 24 | p = NULL; 25 | } else { 26 | sz += sz >> ALLOC_RESIZE_MARGIN; 27 | p = realloc(p, sz); 28 | if (!p) alloc_fatal(a); 29 | } 30 | } else { 31 | if (flags & ALLOC_AF_ZERO) { 32 | p = calloc(1, sz); 33 | } else { 34 | p = malloc(sz); 35 | } 36 | if (!p) alloc_fatal(a); 37 | } 38 | if (a->flags & ALLOC_F_HAS_SIZE4 && p) { 39 | p = (uint8_t*)p + ALLOC_SIZE_ALIGNMENT; 40 | ((uint32_t*)p)[-1] = sz - ALLOC_SIZE_ALIGNMENT; 41 | } 42 | return p; 43 | } 44 | #endif 45 | 46 | // Global allocators 47 | #ifdef CC_ALLOC_GLOBAL_USE_STDLIB 48 | Allocator global_allocator = 49 | { alloc_stdlib_alloc, NULL, NULL, NULL, 0 }, 50 | *g_allocator = &global_allocator; 51 | 52 | Allocator global_allocator_dopt = 53 | { alloc_stdlib_alloc, NULL, NULL, NULL, ALLOC_F_DOPTIMAL }, 54 | *g_allocator_dopt = &global_allocator_dopt; 55 | #endif 56 | -------------------------------------------------------------------------------- /src/ccommon/alloc.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Common allocator interface. 5 | * Handles failure calling an special function instead of returning NULL, 6 | * so, there is no need to check for errors. 7 | */ 8 | #pragma once 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | //TODO: modify vector.h to take advantage of size info ? 16 | //TODO: compile option to use stdlib instead of the custom allocator 17 | 18 | #ifndef ALLOC_SIZE_ALIGNMENT 19 | #define ALLOC_SIZE_ALIGNMENT 16 20 | #endif 21 | 22 | #ifndef ALLOC_RESIZE_MARGIN 23 | #define ALLOC_RESIZE_MARGIN 4 //+6% 24 | #endif 25 | 26 | /* Allocator interface */ 27 | 28 | typedef struct Allocator Allocator; 29 | struct Allocator { 30 | // Allocation, reallocation and freeing of memory. 31 | void * (*alloc)(Allocator* a, void* ptr, size_t sz, int flags); 32 | // Free all associated memory (if possible). 33 | void (*ctx_free)(Allocator* a); 34 | // Handles fatal errors (out of memory). Can be NULL or user supplied. 35 | void (*fatal)(const Allocator* a); 36 | // Allocator context 37 | void * ctx; 38 | // Options 39 | int flags; 40 | }; 41 | 42 | // Allocator flags 43 | enum { 44 | // Store size of each allocation in the previous 4 bytes 45 | ALLOC_F_HAS_SIZE4 = 1, 46 | ALLOC_F_HAS_SIZE = ALLOC_F_HAS_SIZE4, 47 | // Set of flags for efficient dynamic arrays 48 | ALLOC_F_DOPTIMAL = ALLOC_F_HAS_SIZE4, 49 | }; 50 | 51 | // alloc() flags 52 | enum { 53 | ALLOC_AF_ZERO = 1, //Zero memory (new allocation only) 54 | }; 55 | 56 | // Checks if an allocator is ready to use 57 | static inline bool allocator_good(const Allocator* a) { 58 | return !!a->alloc; 59 | } 60 | 61 | // Free all the memory associated with the allocator (if it corresponds). 62 | // May be a no-op. 63 | static inline void allocator_free(Allocator* a) { 64 | if (a->ctx_free) a->ctx_free(a); 65 | } 66 | 67 | // This called to handle fatal errors (out of memory) 68 | static inline void alloc_fatal(const Allocator* a) { 69 | if (a->fatal) a->fatal(a); 70 | abort(); 71 | } 72 | 73 | #define ALLOC_SIZE_MASK 0x0ffffffc 74 | 75 | // Allocates a new block 76 | #ifdef __GNUC__ 77 | __attribute((malloc, alloc_size(2))) 78 | #endif 79 | static inline 80 | void * alloc_alloc(Allocator* a, size_t sz) { 81 | void * p = a->alloc(a, NULL, sz, ALLOC_AF_ZERO); 82 | if (!p && sz) alloc_fatal(a); 83 | return p; 84 | } 85 | 86 | // Allocates a new blocks of C elements of type T 87 | #define alloc_new(A, T, C) \ 88 | ((T*)alloc_alloc((A), sizeof(T)*(C))) 89 | 90 | /* Get the size of a block. 91 | * May be larger than the requested size. The additional space can be used normally. 92 | * Returns zero if not supported. 93 | */ 94 | static inline 95 | size_t alloc_size(const Allocator* a, const void* p) { 96 | if (!(a && a->flags & ALLOC_F_HAS_SIZE4)) return 0; 97 | return p ? ((uint32_t*)p)[-1] & ALLOC_SIZE_MASK : 0; 98 | } 99 | 100 | /* Get the size of a block. 101 | * Returns if not known. 102 | */ 103 | static inline 104 | size_t alloc_size_opt(const Allocator* a, const void* p, size_t def) { 105 | if (!(a && a->flags & ALLOC_F_HAS_SIZE4)) return def; 106 | return alloc_size(a, p); 107 | } 108 | 109 | // Changes the size of a block 110 | #ifdef __GNUC__ 111 | __attribute((malloc, alloc_size(3))) 112 | #endif 113 | static inline 114 | void * alloc_realloc(Allocator* a, void* p, size_t sz) { 115 | if (a->flags & ALLOC_F_HAS_SIZE4 && sz <= alloc_size(a, p)) return p; 116 | p = a->alloc(a, p, sz, 0); 117 | if (!p && sz) alloc_fatal(a); 118 | return p; 119 | } 120 | 121 | #define alloc_resize(A, P, T, C) \ 122 | ((T*)alloc_realloc((A), (P), sizeof(T)*(C))) 123 | 124 | // Frees a block 125 | static inline 126 | void alloc_free(Allocator* a, void* p) { 127 | if (!p) return; 128 | a->alloc(a, p, 0, 0); 129 | } 130 | 131 | /* Global allocators for modules that can not take it as a parameter. 132 | * May be modified by the user. 133 | */ 134 | extern Allocator *g_allocator, *g_allocator_dopt; 135 | 136 | /* Standard library wrapper 137 | */ 138 | #if __STDC_HOSTED__ 139 | void* alloc_stdlib_alloc(Allocator* a, void* ptr, size_t sz, int flags); 140 | 141 | /* Returns a wrapper allocator for stdlib. 142 | */ 143 | static inline 144 | Allocator allocator_stdlib() { 145 | return (Allocator){ alloc_stdlib_alloc, NULL, NULL, NULL, 0 }; 146 | } 147 | 148 | /* Returns a wrapper allocator for stdlib. 149 | * Optimized for efficient dynamic arrays (frequent reallocations). 150 | */ 151 | static inline 152 | Allocator allocator_stdlib_dopt() { 153 | return (Allocator){ alloc_stdlib_alloc, NULL, NULL, NULL, ALLOC_F_DOPTIMAL }; 154 | } 155 | #endif 156 | 157 | /* Utility */ 158 | 159 | // Round-up to the nearest power of two (up to 32 bits) 160 | static inline 161 | size_t size_round2(size_t v) 162 | { 163 | v--; 164 | v |= v >> 1; 165 | v |= v >> 2; 166 | v |= v >> 4; 167 | v |= v >> 8; 168 | v |= v >> 16; 169 | v++; 170 | return v; 171 | } 172 | -------------------------------------------------------------------------------- /src/ccommon/alloc_arena.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "alloc_arena.h" 5 | #include "ccommon.h" 6 | #include 7 | 8 | #ifndef ALLOC_arena_PAGE_SIZE 9 | #define ALLOC_arena_PAGE_SIZE (4096-16) 10 | #endif 11 | 12 | #ifndef ALLOC_arena_ALLOCATOR 13 | #define ALLOC_arena_ALLOCATOR g_allocator 14 | #endif 15 | 16 | int alloc_arena_frombuffer(AllocatorArena * S, size_t sz, void* buf) 17 | { 18 | if (S->al) alloc_arena_free(S); 19 | if (sz < sizeof(*S->page)) return -1; 20 | S->al = NULL; 21 | S->page = buf; 22 | S->page->prev = NULL; 23 | S->rem = S->page->size = sz - sizeof(*S->page); 24 | return 1; 25 | } 26 | 27 | int alloc_arena_reserve(AllocatorArena* S, size_t size) 28 | { 29 | if (S->rem >= size) return 0; 30 | if (!S->al) { 31 | if (!S->page) S->al = ALLOC_arena_ALLOCATOR; 32 | else return -1; 33 | } 34 | 35 | size += sizeof(struct AllocArenaPage); //header size 36 | MAXSET(size, ALLOC_arena_PAGE_SIZE); //minimum page size 37 | 38 | // Allocate a new page, previous page remaining space is lost 39 | struct AllocArenaPage * p = alloc_alloc(S->al, size); 40 | size = alloc_size_opt(S->al, p, size); 41 | p->prev = S->page; 42 | S->page = p; 43 | S->rem = p->size = size - sizeof(*p); 44 | return 1; 45 | } 46 | 47 | void * alloc_arena_alloc(AllocatorArena* S, size_t sz) 48 | { 49 | if (sz > S->rem && alloc_arena_reserve(S, sz) < 0) return NULL; 50 | void * p = S->page->data + S->page->size - S->rem; 51 | S->rem -= sz; 52 | return p; 53 | } 54 | 55 | void alloc_arena_free_last(AllocatorArena* S, void* p_) 56 | { 57 | if (!S->page) return; 58 | uint8_t *ini = S->page->data, 59 | *end = S->page->data + S->page->size, 60 | *p = p_; 61 | if (ini <= p && p < end) { 62 | S->rem = end - p; 63 | } 64 | } 65 | 66 | void alloc_arena_free(AllocatorArena* S) 67 | { 68 | if (S->al) { //dynamic storage 69 | // Iterate over the pages and free them 70 | struct AllocArenaPage *cur, *prev=S->page; 71 | while ((cur = prev)) { 72 | prev = cur->prev; 73 | alloc_free(S->al, cur); 74 | } 75 | S->page = NULL; 76 | S->rem = 0; 77 | } 78 | else if (S->page) { //static storage 79 | S->rem = S->page->size; 80 | } 81 | } 82 | 83 | void * allocator_arena_alloc(Allocator* a, void* ptr, size_t sz, int flags) 84 | { 85 | AllocatorArena * S = a->ctx; 86 | //TODO: implement size storage? 87 | //TODO: alignment? 88 | if (a->flags & ALLOC_F_HAS_SIZE4) { alloc_fatal(a); return NULL; } 89 | if (ptr) { 90 | if (sz == 0) { 91 | // Free: no op 92 | return NULL; 93 | } else { 94 | // The old size is not known 95 | alloc_fatal(a); 96 | return NULL; 97 | } 98 | } 99 | void * p = alloc_arena_alloc(S, sz); 100 | if (flags & ALLOC_AF_ZERO) 101 | memset(p, 0, sz); 102 | //if (ptr && p && oldsz) 103 | // memcpy(p, ptr, oldsz); 104 | return p; 105 | } 106 | 107 | void allocator_arena_ctx_free(Allocator* a) 108 | { 109 | AllocatorArena * S = a->ctx; 110 | alloc_arena_free(S); 111 | } 112 | -------------------------------------------------------------------------------- /src/ccommon/alloc_arena.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Simple and fast arena allocator that frees everything at once. 5 | * Optimized for small allocations that do not resize. 6 | * Can extend itself by allocating new arenas. 7 | * 8 | * Example: 9 | * AllocatorArena al={0}; 10 | * char * d = alloc_arena_alloc(&al, 16); 11 | * alloc_arena_free(&al); 12 | */ 13 | #pragma once 14 | #include "alloc.h" 15 | 16 | typedef struct AllocatorArena { 17 | size_t rem; 18 | Allocator * al; //Set this if a non-default allocator is desired 19 | struct AllocArenaPage { 20 | struct AllocArenaPage *prev; 21 | size_t size; 22 | uint8_t data[]; 23 | } *page; 24 | } AllocatorArena; 25 | 26 | // Return an allocator using only the space provided. 27 | int alloc_arena_frombuffer(AllocatorArena*, size_t sz, void* buf); 28 | 29 | #define alloc_arena_fromarray(S, A) \ 30 | alloc_arena_frombuffer((S), sizeof(A), (A)) 31 | 32 | // Reserve space at least bytes 33 | int alloc_arena_reserve(AllocatorArena* S, size_t sz); 34 | 35 | // Allocate memory from it 36 | void* alloc_arena_alloc(AllocatorArena* S, size_t sz); 37 | 38 | // Allocates a new blocks of C elements of type T 39 | #define alloc_arena_new(A, T, C) \ 40 | ((T*)alloc_arena_alloc((A), sizeof(T)*(C))) 41 | 42 | // Free all memory used by the arena, including internal 43 | void alloc_arena_free(AllocatorArena* S); 44 | 45 | // Return the last allocation 46 | // If p is not at the end of the arena, it does nothing. 47 | void alloc_arena_free_last(AllocatorArena* S, void* p); 48 | 49 | //TODO: change prefix to arena_ only ? 50 | //TODO: free last 51 | //TODO: free up to ~ rollback 52 | //TODO: rollback: get mark and free up to it only 53 | 54 | void * allocator_arena_alloc(Allocator* a, void* ptr, size_t sz, int flags); 55 | 56 | void allocator_arena_ctx_free(Allocator* a); 57 | 58 | // Returns a generic allocator interface 59 | static inline 60 | Allocator allocator_arena(AllocatorArena* S) { 61 | return (Allocator){ 62 | .alloc = allocator_arena_alloc, 63 | .ctx_free = allocator_arena_ctx_free, 64 | .ctx = S }; 65 | } 66 | -------------------------------------------------------------------------------- /src/ccommon/alloc_gen.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * General purpose allocator. 5 | * Loosely-based in dlmalloc. 6 | */ 7 | #pragma once 8 | #include "alloc.h" 9 | 10 | void* alloc_gen_alloc(Allocator* a, void* ptr, size_t sz, int flags); 11 | 12 | void alloc_gen_ctx_free(Allocator* a); 13 | 14 | /* Returns a new allocator. 15 | */ 16 | static inline 17 | Allocator allocator_gen() { 18 | return (Allocator){ &alloc_gen_alloc, &alloc_gen_ctx_free, 19 | NULL, NULL, ALLOC_F_HAS_SIZE4 }; 20 | } 21 | 22 | // Reduces the memory used to a minimum 23 | void allocator_gen_trim(Allocator* a); 24 | 25 | // Free all the allocated memory, but not the allocator itself 26 | //void allocator_gen_free_all(Allocator* a); 27 | 28 | // Return nonzero if the allocator has no allocations besides the space 29 | // used internally. Useful to detect memory leaks. 30 | int allocator_gen_empty_is(const Allocator* a); 31 | 32 | // Return various summary statistics 33 | // The values are calculated on the spot, so it could be slow. 34 | typedef struct AllocGenInfo { 35 | size_t mtot, // Total memory allocated from the system 36 | mfree, // Free memory 37 | mfchunk; 38 | unsigned nseg, // Number of segments 39 | nchunk, // Number of chunks 40 | nchunkf, // Number of free chunks 41 | nfchunk; 42 | } AllocGenInfo; 43 | AllocGenInfo allocator_gen_info(const Allocator* a); 44 | -------------------------------------------------------------------------------- /src/ccommon/base.mk: -------------------------------------------------------------------------------- 1 | # Makefile 2 | # Copyright 2024, Alejandro A. García 3 | # SPDX-License-Identifier: Zlib 4 | 5 | # Common make definitions and rules for use in multiple projects. 6 | # 7 | # Use VPATH to configure the source directories 8 | # e.g. VPATH = src:src/ccommon 9 | # 10 | # Append to cppflags to add include directories 11 | # e.g. cppflags += -Isrc/ccommon 12 | 13 | objdir = obj 14 | depdir = .d 15 | 16 | flags = $(FLAGS) 17 | cppflags = $(CPPFLAGS) 18 | cflags = -std=c99 -Wall -pedantic $(CFLAGS) 19 | cxxflags = $(CXXFLAGS) 20 | ldlibs = $(LDLIBS) 21 | ldflags = $(LDFLAGS) 22 | 23 | depflags = -MT $@ -MMD -MP -MF $(depdir)/$*.d 24 | 25 | # Options for dynamic libraries 26 | flags += -fPIC -fvisibility=hidden 27 | 28 | ### Compilation options 29 | ifndef nonative 30 | cppflags += -march=native 31 | endif 32 | 33 | ifdef debug 34 | objdir = obj_dbg 35 | depdir = .d_dbg 36 | #cppflags += -g -DDEBUG 37 | cppflags += -ggdb -g3 -DDEBUG 38 | else ifdef debugo 39 | objdir = obj_dbg 40 | depdir = .d_dbg 41 | cppflags += -ggdb -g3 -DDEBUG 42 | flags += -Og 43 | else ifdef small 44 | cppflags += -DNDEBUG 45 | ldflags += -Wl,--strip-all 46 | flags += -Os 47 | else ifdef fast 48 | cppflags += -DNDEBUG 49 | ldflags += -Wl,--strip-all 50 | flags += -O3 51 | flags += -flto -fwhole-program -fuse-linker-plugin 52 | else 53 | cppflags += -DNDEBUG 54 | ldflags += -Wl,--strip-all 55 | flags += -O2 56 | endif 57 | 58 | ifdef profile 59 | flags += -pg 60 | $(info gprof CMD gmon.out | less) 61 | endif 62 | 63 | ### 64 | .PHONY: all clean 65 | 66 | # Disable implicit rules 67 | .SUFFIXES: 68 | #.SUFFIXES: .c .o 69 | 70 | # Do not remove intermediate files 71 | .SECONDARY: 72 | 73 | ### OS specifics 74 | ifeq ($(OS),Windows_NT) 75 | EXEC_EXT=.exe 76 | DLIB_EXT=.dll 77 | RUN_PRE= 78 | targets_bin = $(addsuffix $(EXEC_EXT),$(targets)) $(addsuffix $(DLIB_EXT),$(targets_dlib)) 79 | targets_bin2 = $(targets) 80 | else 81 | EXEC_EXT= 82 | DLIB_EXT=.so 83 | RUN_PRE=./ 84 | targets_bin = $(targets) $(addsuffix $(DLIB_EXT),$(targets_dlib)) 85 | targets_bin2 = 86 | endif 87 | 88 | ### Commands 89 | COMPILE_C = $(CC) $(depflags) $(flags) $(cppflags) $(cflags) -c -o $@ $< 90 | COMPILE_CXX = $(CXX) $(depflags) $(flags) $(cppflags) $(cxxflags) -c -o $@ $< 91 | LINK_EXEC = $(CC) $(flags) $(ldflags) -o $@$(EXEC_EXT) \ 92 | $(addprefix $(objdir)/,$(filter %.o,$^)) $(ldlibs) 93 | LINK_DLIB = $(CC) $(flags) $(ldflags) -shared -o $@$(DLIB_EXT) \ 94 | $(addprefix $(objdir)/,$(filter %.o,$^)) $(ldlibs) 95 | 96 | ### Some commonly used dependencies 97 | #$(info OS=$(OS)) 98 | ifeq ($(OS),Windows_NT) 99 | socket_libs = -lws2_32 100 | sdl_libs = -lmingw32 -lSDL2main -lSDL2 101 | else 102 | #socket_libs = 103 | sdl_libs = -lSDL2main -lSDL2 104 | endif 105 | sdl_objs += image_sdl.o 106 | 107 | ### Rules 108 | all: $(targets_dlib) $(targets) 109 | 110 | $(targets): | $(objdir) $(depdir) 111 | ifdef verbose 112 | $(LINK_EXEC) 113 | else 114 | @echo "LINK $@" 115 | @$(LINK_EXEC) 116 | endif 117 | ifdef run 118 | $(RUN_PRE)$@ 119 | endif 120 | ifdef gdb 121 | gdb $@ 122 | endif 123 | 124 | $(targets_dlib): | $(objdir) $(depdir) 125 | ifdef verbose 126 | $(LINK_DLIB) 127 | else 128 | @echo "LINK $@" 129 | @$(LINK_DLIB) 130 | endif 131 | 132 | $(objdir): 133 | mkdir -p $(objdir) 134 | 135 | %.o: $(objdir)/%.o ; 136 | 137 | $(objdir)/%.o: %.c 138 | ifdef verbose 139 | $(COMPILE_C) 140 | else 141 | @echo "CC $@" 142 | @$(COMPILE_C) 143 | endif 144 | 145 | $(objdir)/%.o: %.cpp 146 | ifdef verbose 147 | $(COMPILE_CXX) 148 | else 149 | @echo "CXX $@" 150 | @$(COMPILE_CXX) 151 | endif 152 | 153 | ### 154 | $(depdir): 155 | mkdir -p $(depdir) 156 | 157 | $(depdir)/%.d: ; 158 | 159 | .PRECIOUS: $(depdir)/%.d 160 | 161 | include $(wildcard $(depdir)/*.d) 162 | 163 | ### Clean-up rules 164 | cleanbin: 165 | rm -f $(targets_bin) $(targets_bin2) 166 | 167 | clean: cleanbin 168 | rm -f $(objdir)/* $(depdir)/* 169 | -rm -f gmon.out *.gcov 170 | 171 | distclean: cleanbin 172 | rm -fr obj obj_dbg .d .d_dbg 173 | -rm -f gmon.out *.gcov 174 | 175 | ### Some shorthands 176 | run_%: % 177 | @echo "" 178 | $(RUN_PRE)$< 179 | 180 | test: $(addprefix run_,$(tests)) 181 | -------------------------------------------------------------------------------- /src/ccommon/bisect.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include 6 | #include 7 | 8 | /* Bisection for binary search and sorting 9 | * Example with integer: 10 | bool found; 11 | size_t idx; 12 | BISECT_RIGHT(found, idx, 0, vec_count(index), index[i_] - key) ); 13 | * Example with string key: 14 | BISECT_RIGHT(found, idx, 0, vec_count(index), strcmp(index[i_], key) ); 15 | */ 16 | #define BISECT_RIGHT(FOUND, IDX, INI, LEN, CMPE) \ 17 | BISECT_RIGHT_S(FOUND, IDX, INI, LEN, r_ = (CMPE); ) 18 | 19 | #define BISECT_RIGHT_DECL(FOUND, IDX, INI, LEN, CMPE) \ 20 | bool FOUND=0; FOUND=FOUND; \ 21 | size_t IDX=0; IDX=IDX; \ 22 | BISECT_RIGHT_S(FOUND, IDX, INI, LEN, r_ = (CMPE); ); 23 | 24 | /* Alternative version where CMPM can be function-like macro. 25 | */ 26 | #define BISECT_RIGHT_M(FOUND, IDX, INI, LEN, CMPM) \ 27 | BISECT_RIGHT_S(FOUND, IDX, INI, LEN, CMPM(r_,i_); ) 28 | 29 | /* Alternative version where CMPS is an statement setting i_. 30 | */ 31 | #define BISECT_RIGHT_S(FOUND, IDX, INI, LEN, CMPS) do { \ 32 | size_t i_, b_=(INI), e_=(LEN); \ 33 | int r_=-1; \ 34 | while (b_ < e_) { \ 35 | i_ = (b_+e_)/2; \ 36 | CMPS \ 37 | if (r_ < 0) b_ = i_+1; \ 38 | else if (r_ > 0) e_ = i_; \ 39 | else { b_=i_; break; } \ 40 | } \ 41 | (FOUND) = (r_ == 0); \ 42 | (IDX) = b_; \ 43 | } while(0) 44 | -------------------------------------------------------------------------------- /src/ccommon/byteswap.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Byte order (endianness) convertion. 5 | */ 6 | #pragma once 7 | #include 8 | 9 | /* 10 | */ 11 | static inline bool little_endian_is() { int n=1; return *(char*)&n; } 12 | 13 | static inline bool big_endian_is() { int n=1; return !*(char*)&n; } 14 | 15 | /* 16 | */ 17 | static inline 18 | void byteswap16(void* p) { 19 | unsigned char tmp, *b=p; 20 | tmp = b[0]; b[0] = b[1]; b[1] = tmp; 21 | } 22 | static inline 23 | void byteswap16le(void* p) { if (!little_endian_is()) byteswap16(p); } 24 | static inline 25 | void byteswap16be(void* p) { if (!big_endian_is()) byteswap16(p); } 26 | 27 | static inline 28 | void byteswap32(void* p) { 29 | unsigned char tmp, *b=p; 30 | tmp = b[0]; b[0] = b[3]; b[3] = tmp; 31 | tmp = b[1]; b[1] = b[2]; b[2] = tmp; 32 | } 33 | static inline 34 | void byteswap32le(void* p) { if (!little_endian_is()) byteswap32(p); } 35 | static inline 36 | void byteswap32be(void* p) { if (!big_endian_is()) byteswap32(p); } 37 | 38 | static inline 39 | void byteswap64(void* p) { 40 | unsigned char tmp, *b=p; 41 | tmp = b[0]; b[0] = b[7]; b[7] = tmp; 42 | tmp = b[1]; b[1] = b[6]; b[6] = tmp; 43 | tmp = b[2]; b[2] = b[5]; b[5] = tmp; 44 | tmp = b[3]; b[3] = b[4]; b[4] = tmp; 45 | } 46 | static inline 47 | void byteswap64le(void* p) { if (!little_endian_is()) byteswap64(p); } 48 | static inline 49 | void byteswap64be(void* p) { if (!big_endian_is()) byteswap64(p); } 50 | 51 | static inline 52 | void byteswap(unsigned n, void* p) { 53 | switch (n) { 54 | case 2: byteswap16(p); break; 55 | case 4: byteswap32(p); break; 56 | case 8: byteswap64(p); break; 57 | } 58 | } 59 | static inline 60 | void byteswaple(unsigned n, void* p) { 61 | if (!little_endian_is()) byteswap(n, p); 62 | } 63 | static inline 64 | void byteswapbe(unsigned n, void* p) { 65 | if (!big_endian_is()) byteswap(n, p); 66 | } 67 | 68 | /* 69 | */ 70 | static inline 71 | void byteswap_copy16(void*restrict dst, const void*restrict src) { 72 | unsigned char *d=dst; 73 | unsigned char const *s=src; 74 | d[0] = s[0]; 75 | d[1] = s[1]; 76 | } 77 | static inline 78 | void byteswap_swap16(void*restrict dst, const void*restrict src) { 79 | unsigned char *d=dst; 80 | unsigned char const *s=src; 81 | d[0] = s[1]; 82 | d[1] = s[0]; 83 | } 84 | static inline 85 | void byteswap_copy16le(void*restrict dst, const void*restrict src) { 86 | if (!little_endian_is()) byteswap_swap16(dst, src); 87 | else byteswap_copy16(dst, src); 88 | } 89 | static inline 90 | void byteswap_copy16be(void*restrict dst, const void*restrict src) { 91 | if (!big_endian_is()) byteswap_swap16(dst, src); 92 | else byteswap_copy16(dst, src); 93 | } 94 | 95 | static inline 96 | void byteswap_copy32(void*restrict dst, const void*restrict src) { 97 | unsigned char *d=dst; 98 | unsigned char const *s=src; 99 | d[0] = s[0]; 100 | d[1] = s[1]; 101 | d[2] = s[2]; 102 | d[3] = s[3]; 103 | } 104 | static inline 105 | void byteswap_swap32(void*restrict dst, const void*restrict src) { 106 | unsigned char *d=dst; 107 | unsigned char const *s=src; 108 | d[0] = s[3]; 109 | d[1] = s[2]; 110 | d[2] = s[1]; 111 | d[3] = s[0]; 112 | } 113 | static inline 114 | void byteswap_copy32le(void*restrict dst, const void*restrict src) { 115 | if (!little_endian_is()) byteswap_swap32(dst, src); 116 | else byteswap_copy32(dst, src); 117 | } 118 | static inline 119 | void byteswap_copy32be(void*restrict dst, const void*restrict src) { 120 | if (!big_endian_is()) byteswap_swap32(dst, src); 121 | else byteswap_copy32(dst, src); 122 | } 123 | 124 | static inline 125 | void byteswap_copy64(void*restrict dst, const void*restrict src) { 126 | unsigned char *d=dst; 127 | unsigned char const *s=src; 128 | d[0] = s[0]; 129 | d[1] = s[1]; 130 | d[2] = s[2]; 131 | d[3] = s[3]; 132 | d[4] = s[4]; 133 | d[5] = s[5]; 134 | d[6] = s[6]; 135 | d[7] = s[7]; 136 | } 137 | static inline 138 | void byteswap_swap64(void*restrict dst, const void*restrict src) { 139 | unsigned char *d=dst; 140 | unsigned char const *s=src; 141 | d[0] = s[7]; 142 | d[1] = s[6]; 143 | d[2] = s[5]; 144 | d[3] = s[4]; 145 | d[4] = s[3]; 146 | d[5] = s[2]; 147 | d[6] = s[1]; 148 | d[7] = s[0]; 149 | } 150 | static inline 151 | void byteswap_copy64le(void*restrict dst, const void*restrict src) { 152 | if (!little_endian_is()) byteswap_swap64(dst, src); 153 | else byteswap_copy64(dst, src); 154 | } 155 | static inline 156 | void byteswap_copy64be(void*restrict dst, const void*restrict src) { 157 | if (!big_endian_is()) byteswap_swap64(dst, src); 158 | else byteswap_copy64(dst, src); 159 | } 160 | 161 | static inline 162 | void byteswap_copy(unsigned n, void*restrict dst, const void*restrict src) { 163 | switch (n) { 164 | case 1: *(unsigned char*)dst = *(unsigned char*)src; break; 165 | case 2: byteswap_copy16(dst, src); break; 166 | case 4: byteswap_copy32(dst, src); break; 167 | case 8: byteswap_copy64(dst, src); break; 168 | default: assert(false); 169 | } 170 | } 171 | static inline 172 | void byteswap_swap(unsigned n, void*restrict dst, const void*restrict src) { 173 | switch (n) { 174 | case 1: *(unsigned char*)dst = *(unsigned char*)src; break; 175 | case 2: byteswap_swap16(dst, src); break; 176 | case 4: byteswap_swap32(dst, src); break; 177 | case 8: byteswap_swap64(dst, src); break; 178 | default: assert(false); 179 | } 180 | } 181 | static inline 182 | void byteswap_copyle(unsigned n, void*restrict dst, const void*restrict src) { 183 | if (!little_endian_is()) byteswap_swap(n, dst, src); 184 | else byteswap_copy(n, dst, src); 185 | } 186 | static inline 187 | void byteswap_copybe(unsigned n, void*restrict dst, const void*restrict src) { 188 | if (!big_endian_is()) byteswap_swap(n, dst, src); 189 | else byteswap_copy(n, dst, src); 190 | } 191 | -------------------------------------------------------------------------------- /src/ccommon/ccommon.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024-2025, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Common C code header 5 | */ 6 | #pragma once 7 | 8 | /* General use macros 9 | */ 10 | #define COUNTOF(X) (sizeof(X)/sizeof(*(X))) 11 | 12 | // Stringify 13 | #define STR(X) STR_(X) 14 | #define STR_(X) #X 15 | 16 | // Concatenate and evaluate 17 | #define CAT2(_1,_2) _1 ## _2 18 | #define CAT3(_1,_2,_3) _1 ## _2 ## _3 19 | #define CAT4(_1,_2,_3,_4) _1 ## _2 ## _3 ## _4 20 | // Evaluate and concatenate 21 | #define PASTE2(_1,_2) CAT2(_1,_2) 22 | #define PASTE3(_1,_2,_3) CAT3(_1,_2,_3) 23 | #define PASTE4(_1,_2,_3,_4) CAT4(_1,_2,_3,_4) 24 | 25 | #define MINSET(VAR,VAL) ((VAR) > (VAL) ? ((VAR) = (VAL)) : (VAR)) 26 | #define MAXSET(VAR,VAL) ((VAR) < (VAL) ? ((VAR) = (VAL)) : (VAR)) 27 | 28 | #define IFFALSE(VAR,DEF) ((VAR) ? (VAR) : (DEF)) 29 | #define IFNPOS(VAR,DEF) ((VAR) > 0 ? (VAR) : (DEF)) 30 | 31 | #define IFFALSESET(VAR,DEF) ((VAR) ? (VAR) : ((VAR) = (DEF))) 32 | #define IFNPOSSET(VAR,DEF) ((VAR) > 0 ? (VAR) : ((VAR) = (DEF))) 33 | 34 | #define ccSIGN(X) ((X) < 0 ? -1 : (X) > 0 ? 1 : 0) 35 | #define ccABS(X) ((X)<0 ? -(X) : (X)) 36 | #define ccMIN(X,Y) ((X)<(Y) ? (X) : (Y)) 37 | #define ccMAX(X,Y) ((X)>(Y) ? (X) : (Y)) 38 | #define ccMIN3(A,B,C) ccMIN(ccMIN(A,B),C) 39 | #define ccMAX3(A,B,C) ccMAX(ccMAX(A,B),C) 40 | #define ccCLAMPED(V,L,H) ((V)<(L) ? (L) : (V)>(H) ? (H) : (V)) 41 | #define ccCLAMP(V,L,H) ((V)<(L) ? ((V)=(L)) : (V)>(H) ? ((V)=(H)) : (V)) 42 | 43 | #define ccSWAPV(V,A,B) ((V)=(A), (A)=(B), (B)=(V)) 44 | #define ccSWAPT(T,A,B) do { T tmp_=(A); (A)=(B); (B)=tmp_; } while(0) 45 | 46 | #define ccSWAP(A,B) do { \ 47 | char tmp_[sizeof(A)]; \ 48 | void *a=&(A), *b=&(B); \ 49 | memcpy(tmp_, a, sizeof(A)); \ 50 | memcpy(a, b, sizeof(A)); \ 51 | memcpy(b, tmp_, sizeof(B)); \ 52 | } while(0) 53 | 54 | #define ccFLAG_SET(VAR, FLAG, CTRL) \ 55 | ((VAR) = (CTRL) ? (VAR) | (FLAG) : (VAR) & ~(FLAG)) 56 | 57 | #define MEM_ZERO(D) memset(&(D), 0, sizeof(D)) 58 | #define MEM_CMP(D,S) memcmp(&(D), &(S), sizeof(D)) 59 | #define MEM_COPY(D, S) memcpy(&(D), &(S), sizeof(D)) 60 | #define ARRAY_ZERO(D, C) memset((D), 0, sizeof(*(D))*(C)) 61 | #define ARRAY_CMP(D, S, C) memcmp((D), (S), sizeof(*(D))*(C)) 62 | #define ARRAY_COPY(D, S, C) memcpy((D), (S), sizeof(*(D))*(C)) 63 | #define ARRAY_MOVE(D, S, C) memmove((D), (S), sizeof(*(D))*(C)) 64 | 65 | #define ccUNUSED(x) (void)(x) 66 | 67 | #ifndef M_PI 68 | #define M_PI 3.14159265358979323846 69 | #endif 70 | 71 | /* Error handling 72 | * 73 | * Example: 74 | * result_t f(...) { 75 | * result_t R=1; 76 | * if (...) RETURN(code); 77 | * TRY( f2(...) ); 78 | * end: 79 | * //clean-up 80 | * return R; 81 | * } 82 | */ 83 | typedef int result_t; 84 | 85 | /* Return going through the end label */ 86 | #define RETURN(CODE) do { \ 87 | R = (CODE); \ 88 | goto end; \ 89 | } while (0) 90 | 91 | #define ERROR_LOG(CODE, ...) do { \ 92 | log_error(__VA_ARGS__); \ 93 | RETURN(CODE); \ 94 | } while (0) 95 | 96 | // needs stdlib.h 97 | #define FATAL_LOG(...) do { \ 98 | log_error(__VA_ARGS__); \ 99 | exit(1); \ 100 | } while (0) 101 | 102 | /* Propagate errors */ 103 | #define TRY(EXPR) do { \ 104 | result_t _R_ = (EXPR); \ 105 | if (_R_ < 0) RETURN(_R_); \ 106 | } while (0) 107 | 108 | #define TRYR(EXPR) do { \ 109 | result_t _R_ = (EXPR); \ 110 | if (_R_ < 0) return _R_; \ 111 | } while (0) 112 | 113 | #define TRYB(CODE, EXPR) do { \ 114 | if (!(EXPR)) RETURN(CODE); \ 115 | } while (0) 116 | 117 | #define TRYRB(CODE, EXPR) do { \ 118 | if (!(EXPR)) return (CODE); \ 119 | } while (0) 120 | 121 | #define TRY_LOG(EXPR, ...) do { \ 122 | result_t _R_ = (EXPR); \ 123 | if (_R_ < 0) ERROR_LOG(_R_, __VA_ARGS__); \ 124 | } while (0) 125 | 126 | #define TRY_ASSERT(EXPR) do { \ 127 | result_t _R_ = (EXPR); \ 128 | if (_R_ < 0) ERROR_LOG(_R_, "Error 0x%x in %s:%d:\n%s", \ 129 | -_R_, __FILE__, __LINE__, #EXPR); \ 130 | } while (0) 131 | 132 | #define TRYB_ASSERT(CODE, EXPR) do { \ 133 | if (!(EXPR)) ERROR_LOG((CODE), "Assertion Error %s:%d:\n%s", \ 134 | __FILE__, __LINE__, #EXPR); \ 135 | } while (0) 136 | -------------------------------------------------------------------------------- /src/ccommon/fsutil.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "fsutil.h" 5 | #include 6 | 7 | static inline 8 | size_t str_copy(size_t maxlen, char* dst, const char* src) { 9 | if (!maxlen || !dst || !src) return 0; 10 | char * dst0 = dst; 11 | maxlen--; //null terminator 12 | while (maxlen-- > 1 && *src) *dst++ = *src++; 13 | *dst = 0; 14 | return dst - dst0; 15 | } 16 | 17 | // ----------------------------------------------------------------------------- 18 | #if defined(__unix__) 19 | #include 20 | #include 21 | #include 22 | 23 | int file_exists(const char* path) 24 | { 25 | return (access(path, F_OK) != -1); 26 | } 27 | 28 | int directory_make(const char* path) 29 | { 30 | if (mkdir(path, 0777) < 0) { 31 | if (errno == EEXIST) return 0; 32 | return -1; 33 | } 34 | return 1; 35 | } 36 | 37 | static 38 | int user_dir_get(size_t maxlen, char* out, const char* var, const char* hdir) 39 | { 40 | const char * dir = getenv(var); 41 | if (dir) { 42 | return str_copy(maxlen, out, dir); 43 | } 44 | else if ((dir = getenv("HOME"))) { 45 | size_t i = str_copy(maxlen, out, dir), ip=i; 46 | i += str_copy(maxlen-i, out+i, hdir); 47 | if (!file_exists(out)) { out[ip] = 0; i=ip; } 48 | return i; 49 | } 50 | return -1; 51 | } 52 | 53 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type) 54 | { 55 | switch (type) { 56 | case FS_DIR_TEMP: { 57 | const char * dir = getenv("TMPDIR"); 58 | if (!dir) dir = "/tmp"; 59 | return str_copy(maxlen, out, dir); 60 | } 61 | case FS_DIR_USER_CONFIG: 62 | return user_dir_get(maxlen, out, "XDG_CONFIG_HOME", "/.config"); 63 | case FS_DIR_USER_CACHE: 64 | return user_dir_get(maxlen, out, "XDG_CACHE_HOME", "/.cache"); 65 | case FS_DIR_USER_DATA: 66 | return user_dir_get(maxlen, out, "XDG_DATA_HOME", "/.local/.cache"); 67 | } 68 | return -1; 69 | } 70 | 71 | // ----------------------------------------------------------------------------- 72 | #elif defined(__WIN32__) 73 | #define WIN32_LEAN_AND_MEAN 74 | #define WIN32_EXTRA_LEAN 75 | #include 76 | 77 | int file_exists(const char* path) 78 | { 79 | DWORD dwAttrib = GetFileAttributesA(path); 80 | return (dwAttrib != INVALID_FILE_ATTRIBUTES); 81 | //return (dwAttrib != INVALID_FILE_ATTRIBUTES && 82 | // !(dwAttrib & FILE_ATTRIBUTE_DIRECTORY)); 83 | } 84 | 85 | int directory_make(const char* path) 86 | { 87 | if (CreateDirectoryA(path, NULL) == 0) { 88 | if (GetLastError() == ERROR_ALREADY_EXISTS) return 0; 89 | return -1; 90 | } 91 | return 1; 92 | } 93 | 94 | static 95 | int user_dir_get(size_t maxlen, char* out, const char* var) 96 | { 97 | const char * dir; 98 | if (var && (dir = getenv(var))) ; 99 | else if ((dir = getenv("APPDATA"))) ; 100 | else if ((dir = getenv("USERPROFILE"))) ; 101 | else return -1; 102 | return str_copy(maxlen, out, dir); 103 | } 104 | 105 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type) 106 | { 107 | switch (type) { 108 | case FS_DIR_TEMP: { 109 | const char * dir = getenv("TEMP"); 110 | if (!dir) return -1; 111 | return str_copy(maxlen, out, dir); 112 | } 113 | case FS_DIR_USER_CONFIG: 114 | return user_dir_get(maxlen, out, NULL); 115 | case FS_DIR_USER_CACHE: 116 | return user_dir_get(maxlen, out, "LOCALAPPDATA"); 117 | case FS_DIR_USER_DATA: 118 | return user_dir_get(maxlen, out, "LOCALAPPDATA"); 119 | } 120 | return -1; 121 | } 122 | 123 | // ----------------------------------------------------------------------------- 124 | #else 125 | #include 126 | 127 | int file_exists(const char* path) 128 | { 129 | FILE * f = fopen(path, "r"); 130 | if (!f) return 0; 131 | fclose(f); 132 | return 1; 133 | } 134 | 135 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type) 136 | { 137 | return -1; 138 | } 139 | 140 | #endif 141 | -------------------------------------------------------------------------------- /src/ccommon/fsutil.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * File system utility functions 5 | */ 6 | #pragma once 7 | #include 8 | #include 9 | 10 | // Returns the last part of a path. 11 | // Example: "dir/name.ext" -> "name.ext" 12 | static inline 13 | char* path_tail(const char* path); 14 | 15 | // Returns the file name extension without the dot. 16 | // Examples: "dir/name.ext" -> "ext", "name" -> "" (pointer to end) 17 | static inline 18 | char* path_ext(const char* path); 19 | 20 | // Returns the file name extension with the dot. 21 | // Examples: "dir/name.ext" -> ".ext", "name" -> "" (pointer to end) 22 | static inline 23 | char* path_extdot(const char* path); 24 | 25 | static inline 26 | bool path_abs_is(const char* path); 27 | 28 | static inline 29 | bool path_sep_is(int c); 30 | 31 | // Returns 1 if it exists, 0 otherwise 32 | int file_exists(const char* path); 33 | 34 | // Returns 1 on creation, 0 if already exists and <0 on error 35 | int directory_make(const char* path); 36 | 37 | enum FsDirType { 38 | FS_DIR_TEMP = 1, 39 | FS_DIR_USER_CONFIG, 40 | FS_DIR_USER_CACHE, 41 | FS_DIR_USER_DATA, 42 | //TODO: system dirs 43 | }; 44 | // Writes to out the path to choosen system directory. 45 | // Returns the number of bytes written, <0 on error 46 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type); 47 | 48 | /* Inline implementations */ 49 | static inline 50 | char* path_tail(const char* path) 51 | { 52 | int i = strlen(path); 53 | for (i--; i>=0; --i) if (path_sep_is(path[i])) return (char*)(path+i+1); 54 | return (char*)path; 55 | } 56 | 57 | static inline 58 | char* path_ext(const char* path) 59 | { 60 | int n = strlen(path); 61 | for (int i=n-1; i>=0; --i) if (path[i] == '.') return (char*)(path+i+1); 62 | return (char*)path+n; //empty 63 | } 64 | 65 | static inline 66 | char* path_extdot(const char* path) 67 | { 68 | int n = strlen(path); 69 | for (int i=n-1; i>=0; --i) if (path[i] == '.') return (char*)(path+i); 70 | return (char*)path+n; //empty 71 | } 72 | 73 | static inline 74 | bool path_abs_is(const char* path) 75 | { 76 | if (path[0] == '/') return true; 77 | #ifdef __WIN32__ 78 | if (path[0] == '\\') return true; 79 | if (path[0] && path[1] == ':') return true; 80 | #endif 81 | return false; 82 | } 83 | 84 | static inline 85 | bool path_sep_is(int c) 86 | { 87 | if (c == '/') return true; 88 | #ifdef __WIN32__ 89 | if (c == '\\') return true; 90 | #endif 91 | return false; 92 | } 93 | -------------------------------------------------------------------------------- /src/ccommon/image.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "image.h" 5 | #include "ccommon.h" 6 | #include "alloc.h" 7 | #include 8 | #include 9 | 10 | #ifndef IMAGE_DEFAULT_ALIGNMENT 11 | #define IMAGE_DEFAULT_ALIGNMENT 64 12 | #endif 13 | 14 | #ifndef IMAGE_ALLOCATOR 15 | #define IMAGE_ALLOCATOR g_allocator 16 | #endif 17 | 18 | /* 19 | Color 20 | HSV <-> RGB 21 | ref.: http://code.google.com/p/streumix-frei0r-goodies/wiki/Integer_based_RGB_HSV_conversion 22 | HSV2RGB(RGB2HSV( )) = identity (0 errors verified) 23 | */ 24 | #define HSV_ABITS IMG_HSV_ABITS 25 | #define HSV_SSCALE (255 << HSV_ABITS) 26 | #define HSV_HSCALE (256 << HSV_ABITS) //hue scale 27 | 28 | ImgColor img_color_hsv2rgb(const ImgColorHSV hsv) 29 | { 30 | const int round_sum = 1 << (HSV_ABITS - 1); 31 | int a = hsv.a >> HSV_ABITS; 32 | 33 | int v = hsv.v >> HSV_ABITS; 34 | if (hsv.s == 0) 35 | return (ImgColor){v, v, v, a}; 36 | 37 | const int region = 6 * hsv.h / HSV_HSCALE; // h/60 38 | 39 | int m = hsv.v * (HSV_SSCALE - hsv.s) / HSV_SSCALE; 40 | int x = (hsv.v * hsv.s/HSV_HSCALE) 41 | * (HSV_HSCALE 42 | - abs(6 * hsv.h - 2 * (region >> 1) * HSV_HSCALE - HSV_HSCALE)); 43 | 44 | x = ((x + hsv.v * (HSV_SSCALE - hsv.s)) / HSV_SSCALE + round_sum) >> HSV_ABITS; 45 | m = m >> HSV_ABITS; 46 | 47 | switch (region) { 48 | case 0: return (ImgColor){v, x, m, a}; 49 | case 1: return (ImgColor){x, v, m, a}; 50 | case 2: return (ImgColor){m, v, x, a}; 51 | case 3: return (ImgColor){m, x, v, a}; 52 | case 4: return (ImgColor){x, m, v, a}; 53 | default: return (ImgColor){v, m, x, a}; 54 | } 55 | } 56 | 57 | ImgColorHSV img_color_rgb2hsv(const ImgColor rgb) 58 | { 59 | const int rgb_min = ccMIN3(rgb.r, rgb.g, rgb.b); 60 | const int rgb_max = ccMAX3(rgb.r, rgb.g, rgb.b); 61 | const int chroma = rgb_max - rgb_min; 62 | 63 | int a = rgb.a << HSV_ABITS; 64 | int v = rgb_max << HSV_ABITS; 65 | if (v == 0) 66 | return (ImgColorHSV){0, 0, v, a}; 67 | 68 | int s = HSV_SSCALE * chroma / rgb_max; 69 | if (s == 0) 70 | return (ImgColorHSV){0, 0, v, a}; 71 | 72 | int h; 73 | if (rgb_max == rgb.r) { 74 | h = HSV_HSCALE * (6*chroma + rgb.g - rgb.b) / (6*chroma); 75 | if (h > HSV_HSCALE) h -= HSV_HSCALE; 76 | } else if (rgb_max == rgb.g) 77 | h = HSV_HSCALE * (2*chroma + rgb.b - rgb.r) / (6*chroma); 78 | else 79 | h = HSV_HSCALE * (4*chroma + rgb.r - rgb.g) / (6*chroma); 80 | 81 | return (ImgColorHSV){h, s, v, a}; 82 | } 83 | 84 | /* 85 | Image 86 | */ 87 | void img_free(Image* img) 88 | { 89 | if (img->data && img->flags & IMG_F_OWN_MEM) 90 | alloc_free(IMAGE_ALLOCATOR, img->data); 91 | 92 | *img = (Image){0}; 93 | } 94 | 95 | int img_resize(Image* img, unsigned w, unsigned h, ImgFormat fmt, 96 | unsigned pitch) 97 | { 98 | if (img->w == w && img->h == h && img->format == fmt && 99 | (!pitch || img->pitch == pitch) && img->data) 100 | return 0; 101 | 102 | if (img->data && !(img->flags & IMG_F_OWN_MEM)) 103 | return -1; 104 | 105 | unsigned bypp=0; 106 | switch (fmt) { 107 | case IMG_FORMAT_NULL: bypp = 0; break; 108 | case IMG_FORMAT_GRAY: bypp = 1; break; 109 | case IMG_FORMAT_RGB: bypp = 3; break; 110 | case IMG_FORMAT_RGBA: bypp = 4; break; 111 | default: 112 | return -1;//IMG_ERROR_UNSUPPORTED_PARAM; 113 | } 114 | 115 | if (!pitch) { 116 | const unsigned a = IMAGE_DEFAULT_ALIGNMENT; 117 | pitch = (w * bypp + a-1) / a * a; 118 | } 119 | else if (pitch < w * bypp) 120 | return -1;//IMG_ERROR_PARAMS; 121 | 122 | size_t sz = h * pitch; 123 | void* p = img->data; 124 | if (sz > 0) { 125 | p = alloc_realloc(IMAGE_ALLOCATOR, p, sz); 126 | if (!p) return -1;//IMG_ERROR_OUT_OF_MEMORY; 127 | } 128 | 129 | img->data = p; 130 | img->w = w; 131 | img->h = h; 132 | img->pitch = pitch; 133 | img->bypp = bypp; 134 | img->format = fmt; 135 | img->flags |= IMG_F_OWN_MEM; 136 | 137 | return 0; 138 | } 139 | 140 | int img_copy(Image* dst, const Image* src) 141 | { 142 | int r = img_resize(dst, src->w, src->h, src->format, src->pitch); 143 | if (r < 0) return r; 144 | memcpy(dst->data, src->data, dst->h * dst->pitch); 145 | return 0; 146 | } 147 | 148 | void img_view_make(Image* dst, const Image* src, ImgRect rect) 149 | { 150 | img_free(dst); 151 | 152 | if (rect.x < 0) { rect.w += rect.x; rect.x = 0; } 153 | if (rect.y < 0) { rect.h += rect.y; rect.y = 0; } 154 | 155 | rect.w = ccMAX(ccMIN(rect.x + rect.w, (int)src->w) - rect.x, 0); 156 | rect.h = ccMAX(ccMIN(rect.y + rect.h, (int)src->h) - rect.y, 0); 157 | 158 | if (rect.w < 0) rect.w = 0; 159 | if (rect.h < 0) rect.h = 0; 160 | 161 | dst->data = src->data + src->pitch * rect.y + src->bypp * rect.x; 162 | dst->w = rect.w; 163 | dst->h = rect.h; 164 | dst->pitch = src->pitch; 165 | dst->bypp = src->bypp; 166 | dst->format = src->format; 167 | } 168 | 169 | //TODO: macro the switch(img->format) and color set code? 170 | void img_fill(Image* img, const ImgColor color) 171 | { 172 | unsigned w=img->w, h=img->h, x, y; 173 | ImgColorInt c = img_color_map(color, img->format); 174 | 175 | switch (img->format) { 176 | case IMG_FORMAT_GRAY: { 177 | for (y=0; yh; ++y) { 185 | uint8_t *p = &IMG_INDEX(*img, 0, y); 186 | for (x=0; xw; ++x, p+=3) 187 | memcpy(p, &c, 3); 188 | } 189 | break; 190 | case IMG_FORMAT_RGBA: 191 | for (y=0; yh; ++y) { 192 | uint8_t *p = &IMG_INDEX(*img, 0, y); 193 | for (x=0; xw; ++x, p+=4) 194 | memcpy(p, &c, 4); 195 | } 196 | break; 197 | default: 198 | break; 199 | } 200 | } 201 | -------------------------------------------------------------------------------- /src/ccommon/image.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Inteface to store and manipulate images in memory. 5 | * 6 | * Example: 7 | * Image img={0}; 8 | * TRY( img_resize(&img, 512, 256, IMG_FORMAT_RBG, 0) ); 9 | * img_fill(&img, (ImgColor){255,0,0}); 10 | * img_free(&img); 11 | */ 12 | #pragma once 13 | #include 14 | #include 15 | #include 16 | 17 | //TODO: define error codes 18 | 19 | /* 20 | Point 21 | */ 22 | typedef struct ImgPoint { 23 | int x, y; 24 | } ImgPoint; 25 | 26 | #define IMG_POINT_UNPACK(V) (V).x, (V).y 27 | 28 | /* 29 | Rect 30 | */ 31 | typedef struct ImgRectS { 32 | int x, y, w, h; 33 | } ImgRectS; 34 | 35 | typedef struct ImgRectP { 36 | int x1, y1, x2, y2; 37 | } ImgRectP; 38 | 39 | #define IMG_RECTS_UNPACK(V) (V).x, (V).y, (V).w, (V).h 40 | #define IMG_RECTP_UNPACK(V) (V).x1, (V).y1, (V).x2, (V).y2 41 | 42 | typedef ImgRectS ImgRect; 43 | #define IMG_RECT_UNPACK IMG_RECTS_UNPACK 44 | #define IMG_RECT_FMT "%d,%d:%dx%d" 45 | 46 | #define IMG_RECT_P1(R) (*((ImgPoint*)&(R))) 47 | #define IMG_RECT_P2(R) (*(((ImgPoint*)&(R))+1)) 48 | 49 | static inline 50 | bool img_rect_inside_is(const ImgRect* r, const ImgPoint* p) { 51 | return r->x <= p->x && p->x < r->x+r->w && 52 | r->y <= p->y && p->y < r->y+r->h; 53 | } 54 | 55 | /* 56 | Color 57 | */ 58 | typedef struct ImgColor { 59 | uint8_t r, g, b, a; 60 | } ImgColor; 61 | 62 | #define IMG_COLOR_UNPACK_RGB(V) (V).r, (V).g, (V).b 63 | #define IMG_COLOR_UNPACK(V) (V).r, (V).g, (V).b, (V).a 64 | 65 | typedef enum ImgFormat { 66 | #define IMG_FORMAT_F_COLOR 0x100 67 | #define IMG_FORMAT_F_ALPHA 0x200 68 | IMG_FORMAT_NULL = 0, 69 | IMG_FORMAT_GRAY = 1, 70 | IMG_FORMAT_RGB = 2 | IMG_FORMAT_F_COLOR, 71 | IMG_FORMAT_RGBA = 3 | IMG_FORMAT_F_COLOR | IMG_FORMAT_F_ALPHA, 72 | } ImgFormat; 73 | 74 | typedef uint32_t ImgColorInt; 75 | 76 | static inline 77 | ImgColorInt img_color_map(const ImgColor c, ImgFormat fmt); 78 | 79 | enum ImgColorTransform { 80 | IMG_COLOR_TRANF_NULL = 0, 81 | IMG_COLOR_TRANF_BGR = 1, 82 | IMG_COLOR_TRANF_GRB = 2, 83 | IMG_COLOR_TRANF_GRAY_MIN = 3, 84 | IMG_COLOR_TRANF_GRAY_MAX = 4, 85 | IMG_COLOR_TRANF_INVERSE = 5, 86 | }; 87 | 88 | static inline 89 | ImgColor img_color_transform(const ImgColor col, unsigned tranf); 90 | 91 | enum { 92 | IMG_HSV_ABITS = 4, //aditional pression bits 93 | IMG_HSV_VSCALE = (255 << IMG_HSV_ABITS), 94 | IMG_HSV_SSCALE = (255 << IMG_HSV_ABITS), 95 | IMG_HSV_HSCALE = (256 << IMG_HSV_ABITS), //hue scale 96 | IMG_HSV_ASCALE = (255 << IMG_HSV_ABITS), 97 | }; 98 | 99 | typedef struct ImgColorHSV { 100 | uint16_t h, s, v, a; 101 | } ImgColorHSV; 102 | 103 | ImgColor img_color_hsv2rgb(const ImgColorHSV hsv); 104 | 105 | ImgColorHSV img_color_rgb2hsv(const ImgColor rgb); 106 | 107 | /* 108 | Image 109 | */ 110 | typedef enum ImgFlags { 111 | IMG_F_OWN_MEM = 1, 112 | } ImgFlags; 113 | 114 | typedef struct Image { 115 | uint8_t *data; 116 | unsigned w, h; 117 | unsigned pitch; //bytes per line 118 | unsigned bypp; //bytes per pixel 119 | ImgFormat format; 120 | int flags; 121 | } Image; 122 | 123 | void img_free(Image* img); 124 | 125 | static inline 126 | bool img_empty(const Image* img) { 127 | return !img || !img->w || !img->h || !img->data; 128 | } 129 | 130 | int img_resize(Image* img, unsigned w, unsigned h, ImgFormat fmt, 131 | unsigned pitch); 132 | 133 | int img_copy(Image* dst, const Image* src); 134 | 135 | void img_view_make(Image* dst, const Image* src, const ImgRect rect); 136 | 137 | void img_fill(Image* img, const ImgColor color); 138 | 139 | static inline 140 | void img_zero(Image* img); 141 | 142 | static inline 143 | ImgColor img_pixel_get(const Image* img, unsigned x, unsigned y); 144 | 145 | #define IMG_INDEX(I,X,Y) \ 146 | ((I).data[ (I).pitch * (Y) + (I).bypp * (X) ]) 147 | 148 | #define IMG_INDEX3(I,X,Y,C) \ 149 | ((I).data[ (I).pitch * (Y) + (I).bypp * (X) + (C)]) 150 | 151 | /* 152 | Inline implementations 153 | */ 154 | 155 | static inline 156 | ImgColorInt img_color_map(const ImgColor c, ImgFormat fmt) 157 | { 158 | ImgColorInt n=0; 159 | uint8_t* p = (uint8_t*) &n; 160 | switch (fmt) { 161 | case IMG_FORMAT_GRAY: 162 | p[0] = c.r; 163 | if (p[0] < c.g) p[0] = c.g; 164 | if (p[0] < c.b) p[0] = c.b; 165 | break; 166 | case IMG_FORMAT_RGB: 167 | //TODO: endianness 168 | p[0]=c.r; p[1]=c.g; p[2]=c.b; 169 | break; 170 | case IMG_FORMAT_RGBA: 171 | //TODO: endianness 172 | p[0]=c.r; p[1]=c.g; p[2]=c.b; p[3]=c.a; 173 | break; 174 | default: 175 | break; 176 | } 177 | return n; 178 | } 179 | 180 | static inline 181 | ImgColor img_color_unmap(const uint8_t* p, ImgFormat fmt) 182 | { 183 | switch (fmt) { 184 | case IMG_FORMAT_GRAY: return (ImgColor){ *p, *p, *p, 255 }; 185 | case IMG_FORMAT_RGB: return (ImgColor){ p[0], p[1], p[2], 255 }; 186 | case IMG_FORMAT_RGBA: return (ImgColor){ p[0], p[1], p[2], p[3] }; 187 | default: return (ImgColor){0}; 188 | } 189 | } 190 | 191 | static inline 192 | ImgColor img_color_transform(const ImgColor col, unsigned tranf) 193 | { 194 | switch (tranf) { 195 | case IMG_COLOR_TRANF_BGR: 196 | return (ImgColor){ col.b, col.g, col.r, col.a }; 197 | case IMG_COLOR_TRANF_GRB: 198 | return (ImgColor){ col.g, col.b, col.r, col.a }; 199 | case IMG_COLOR_TRANF_GRAY_MIN: { 200 | unsigned char m = col.r < col.g ? col.r : col.g; 201 | if (col.b < m) m = col.b; 202 | return (ImgColor){ m, m, m, col.a }; 203 | } 204 | case IMG_COLOR_TRANF_GRAY_MAX: { 205 | unsigned char m = col.r > col.g ? col.r : col.g; 206 | if (col.b > m) m = col.b; 207 | return (ImgColor){ m, m, m, col.a }; 208 | } 209 | case IMG_COLOR_TRANF_INVERSE: 210 | return (ImgColor){ 255-col.b, 255-col.g, 255-col.r, col.a }; 211 | default: 212 | return col; 213 | } 214 | } 215 | 216 | static inline 217 | void img_zero(Image* img) 218 | { 219 | if (img->data) 220 | memset(img->data, 0, img->pitch * img->h); 221 | } 222 | 223 | static inline 224 | ImgColor img_pixel_get(const Image* img, unsigned x, unsigned y) 225 | { 226 | const uint8_t *p = &IMG_INDEX(*img, x, y); 227 | return img_color_unmap(p, img->format); 228 | } 229 | 230 | static inline 231 | void img_pixel_set(const Image* img, unsigned x, unsigned y, 232 | const ImgColor col) 233 | { 234 | ImgColorInt c = img_color_map(col, img->format); 235 | uint8_t *p = &IMG_INDEX(*img, x, y); 236 | memcpy(p, &c, img->bypp); 237 | } 238 | 239 | -------------------------------------------------------------------------------- /src/ccommon/image_io.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "image_io.h" 5 | #include "logging.h" 6 | #include "str_util.h" 7 | #include "alloc.h" 8 | #include 9 | 10 | #ifndef IMAGE_IO_ALLOCATOR 11 | #define IMAGE_IO_ALLOCATOR g_allocator 12 | #endif 13 | 14 | /* 15 | Codecs 16 | */ 17 | 18 | #define MAX_CODECS 31 19 | const ImageCodec * imgio_codecs[MAX_CODECS+1] = { 20 | NULL 21 | }; 22 | 23 | int img_codec_register(const ImageCodec* codec) 24 | { 25 | int i; 26 | for (i=0; imgio_codecs[i]; ++i) 27 | if (imgio_codecs[i] == codec) 28 | return 0; 29 | 30 | if (i >= MAX_CODECS) 31 | return -1; 32 | 33 | imgio_codecs[i] = codec; 34 | return 1; 35 | } 36 | 37 | const ImageCodec* img_codec_detect_stream(Stream* s) 38 | { 39 | if (stream_read_prep(s,0) < 8) 40 | return 0; 41 | 42 | for (int i=0; imgio_codecs[i]; ++i) 43 | if (imgio_codecs[i]->detect && 44 | imgio_codecs[i]->load.op && 45 | imgio_codecs[i]->detect(s, 0)) 46 | return imgio_codecs[i]; 47 | 48 | return 0; 49 | } 50 | 51 | const ImageCodec* img_codec_detect_ext(const char* ext, int oflags) 52 | { 53 | char buffer[8]; 54 | str_tolower(buffer, sizeof(buffer), ext); 55 | 56 | const bool save = oflags & IMG_OF_SAVE; 57 | for (int i=0; imgio_codecs[i]; ++i) 58 | if (imgio_codecs[i]->detect && 59 | (( save && imgio_codecs[i]->save.op) || 60 | (!save && imgio_codecs[i]->load.op)) && 61 | imgio_codecs[i]->detect(0, buffer) ) 62 | return imgio_codecs[i]; 63 | 64 | return 0; 65 | } 66 | 67 | const ImageCodec* img_codec_detect_filename(const char* filename, int oflags) 68 | { 69 | const char* ext = strrchr(filename, '.'); 70 | if (!ext) return 0; 71 | ext++; 72 | 73 | return img_codec_detect_ext(ext, oflags); 74 | } 75 | 76 | const ImageCodec* img_codec_by_name(const char* name) 77 | { 78 | for (int i=0; imgio_codecs[i]; ++i) { 79 | if (!imgio_codecs[i]->name) continue; 80 | if (!str_cmp_i(imgio_codecs[i]->name, name)) 81 | return imgio_codecs[i]; 82 | } 83 | return 0; 84 | } 85 | 86 | /* 87 | Image I/O 88 | */ 89 | 90 | int imgio_stream_alloc(ImageIO* obj) 91 | { 92 | if (obj->s) 93 | return IMG_ERROR_PARAMS; 94 | if (!obj->filename) 95 | return IMG_ERROR_UNSUPPORTED_INPUT_TYPE; 96 | 97 | Stream * p = alloc_new(IMAGE_IO_ALLOCATOR, Stream, 1); 98 | 99 | if (stream_open_file(p, obj->filename, 100 | (obj->oflags & IMG_OF_SAVE) ? SOF_CREATE : SOF_READ) < 0) 101 | { 102 | alloc_free(IMAGE_IO_ALLOCATOR, p); 103 | return IMG_ERROR_FILE_OPEN; 104 | } 105 | 106 | obj->s = p; 107 | obj->flags |= IMGIO_F_OWN_STREAM; 108 | 109 | return 0; 110 | } 111 | 112 | int imgio_codec_detect(ImageIO* obj) 113 | { 114 | if (obj->oflags & IMG_OF_SAVE) { 115 | if (obj->filename) 116 | obj->codec = img_codec_detect_filename(obj->filename, obj->oflags); 117 | } 118 | else { 119 | if (!obj->s) { 120 | imgio_stream_alloc(obj); 121 | // An error here can be ok, 122 | // for example some LibAV URL are not files 123 | } 124 | if (obj->s) 125 | obj->codec = img_codec_detect_stream(obj->s); 126 | } 127 | 128 | if (!obj->codec) 129 | return IMG_ERROR_UNKNOWN_CODEC; 130 | 131 | return 0; 132 | } 133 | 134 | int imgio_open_inner(ImageIO* obj) 135 | { 136 | assert(obj->codec); 137 | 138 | const ImageCodecSub* cs = 139 | (obj->oflags & IMG_OF_SAVE) ? &obj->codec->save : &obj->codec->load; 140 | 141 | if (obj->s && cs->flags & IMG_CODEC_F_ACCEPT_STREAM) { 142 | } 143 | else if (obj->filename && cs->flags & IMG_CODEC_F_ACCEPT_FILENAME) { 144 | } 145 | else if (obj->filename && cs->flags & IMG_CODEC_F_ACCEPT_STREAM) { 146 | int r = imgio_stream_alloc(obj); 147 | if (r) return r; 148 | } 149 | else 150 | return IMG_ERROR_UNSUPPORTED_INPUT_TYPE; 151 | 152 | // Codec alloc 153 | if (cs->obj_size) { 154 | obj->internal = alloc_realloc(IMAGE_IO_ALLOCATOR, obj->internal, cs->obj_size); 155 | obj->flags |= IMGIO_F_OWN_INTERNAL; 156 | } 157 | 158 | if (cs->init) { 159 | int r = cs->init(obj->internal, obj); 160 | if (r) return r; 161 | } 162 | 163 | return 0; 164 | } 165 | 166 | int imgio_open(ImageIO* obj) 167 | { 168 | int r=0; 169 | 170 | if (!obj->codec) { 171 | r = imgio_codec_detect(obj); 172 | if (r && r != IMG_ERROR_UNKNOWN_CODEC) 173 | return r; 174 | } 175 | 176 | if (obj->codec) { 177 | r = imgio_open_inner(obj); 178 | } 179 | else { 180 | // Test all codecs without detection 181 | for (int i=0; imgio_codecs[i]; ++i) { 182 | if (imgio_codecs[i]->detect || 183 | (obj->oflags & IMG_OF_SAVE && 184 | (!imgio_codecs[i]->save.op || 185 | ~imgio_codecs[i]->save.flags & IMG_CODEC_F_TRY_DETECT) ) || 186 | (~obj->oflags & IMG_OF_SAVE && 187 | (!imgio_codecs[i]->load.op || 188 | ~imgio_codecs[i]->load.flags & IMG_CODEC_F_TRY_DETECT) ) 189 | ) 190 | continue; 191 | obj->codec = imgio_codecs[i]; 192 | r = imgio_open_inner(obj); 193 | if (!r) 194 | break; 195 | } 196 | if (r) 197 | obj->codec = 0; 198 | } 199 | 200 | obj->filename = 0; // This pointer may not be safe 201 | 202 | if (r) { 203 | obj->codec = 0; 204 | imgio_free(obj); 205 | } 206 | 207 | return r; 208 | } 209 | 210 | #define imgio_open_BEGIN \ 211 | imgio_free(obj); 212 | 213 | int imgio_open_stream(ImageIO* obj, Stream* s, int flags, 214 | const ImageCodec* codec) 215 | { 216 | imgio_open_BEGIN; 217 | obj->s = s; 218 | obj->filename = 0; 219 | obj->codec = codec; 220 | obj->oflags = flags; 221 | return imgio_open(obj); 222 | } 223 | 224 | int imgio_open_filename(ImageIO* obj, const char* fname, int flags, 225 | const ImageCodec* codec) 226 | { 227 | imgio_open_BEGIN; 228 | obj->s = 0; 229 | obj->filename = fname; 230 | obj->codec = codec; 231 | obj->oflags = flags; 232 | return imgio_open(obj); 233 | } 234 | 235 | void imgio_free(ImageIO* obj) 236 | { 237 | if (obj->codec) { 238 | const ImageCodecSub* cs = 239 | (obj->oflags & IMG_OF_SAVE) ? &obj->codec->save : &obj->codec->load; 240 | if (cs->free) 241 | cs->free(obj->internal, obj); 242 | obj->codec = 0; 243 | } 244 | if (obj->flags & IMGIO_F_OWN_INTERNAL && obj->internal) { 245 | alloc_free(IMAGE_IO_ALLOCATOR, obj->internal); 246 | obj->internal = 0; 247 | } 248 | if (obj->flags & IMGIO_F_OWN_STREAM && obj->s) { 249 | stream_close(obj->s, 0); 250 | alloc_free(IMAGE_IO_ALLOCATOR, obj->s); 251 | obj->s = 0; 252 | } 253 | obj->flags = 0; 254 | } 255 | 256 | int img_load_file(Image* img, const char* filename) 257 | { 258 | int r=0; 259 | 260 | Stream s={0}; 261 | if (stream_open_file(&s, filename, SOF_READ) < 0) 262 | return IMG_ERROR_FILE_OPEN; 263 | 264 | ImageIO imgio={0}; 265 | r = imgio_open_stream(&imgio, &s, 0, 0); 266 | if (r) goto end; 267 | 268 | r = imgio_load(&imgio, img); 269 | 270 | end: 271 | imgio_free(&imgio); 272 | stream_close(&s, 0); 273 | return r; 274 | } 275 | 276 | int img_save_file(const Image* img, const char* filename) 277 | { 278 | int r=0; 279 | 280 | const ImageCodec* codec = img_codec_detect_filename(filename, IMG_OF_SAVE); 281 | if (!codec) return IMG_ERROR_UNKNOWN_CODEC; 282 | 283 | Stream s={0}; 284 | if (stream_open_file(&s, filename, SOF_CREATE) < 0) 285 | return IMG_ERROR_FILE_OPEN; 286 | 287 | ImageIO imgio={0}; 288 | r = imgio_open_stream(&imgio, &s, IMG_OF_SAVE, codec); 289 | if (r) goto end; 290 | 291 | r = imgio_save(&imgio, img); 292 | 293 | end: 294 | imgio_free(&imgio); 295 | stream_close(&s, 0); 296 | return r; 297 | } 298 | -------------------------------------------------------------------------------- /src/ccommon/image_io.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Interface to read and write image and video data in multiple formats. 5 | */ 6 | #pragma once 7 | #include "image.h" 8 | #include "stream.h" 9 | 10 | enum ImageError { 11 | IMG_RESULT_OK = 0, 12 | IMG_ERROR_UNKNOWN = -0x301, 13 | IMG_ERROR_PARAMS = -0x302, 14 | IMG_ERROR_OUT_OF_MEMORY = -0x303, 15 | IMG_ERROR_FILE_OPEN = -0x304, 16 | IMG_ERROR_READ = -0x305, 17 | IMG_ERROR_UNKNOWN_CODEC = -0x306, 18 | IMG_ERROR_UNSUPPORTED_FUNCTION = -0x307, 19 | IMG_ERROR_UNSUPPORTED_FORMAT = -0x308, 20 | IMG_ERROR_UNSUPPORTED_PARAM = -0x309, 21 | IMG_ERROR_UNSUPPORTED_INPUT_TYPE = -0x30a, 22 | IMG_ERROR_LOAD = -0x30b, 23 | IMG_ERROR_SAVE = -0x30c, 24 | IMG_ERROR_INVALID_IMAGE = -0x30d, 25 | IMG_ERROR_SEEK = -0x30e, 26 | IMG_ERROR_EOF = -0x30f, 27 | IMG_ERROR_AGAIN = -0x310, //try again later 28 | IMG_ERROR_UNSUPPORTED_VALUE = -0x311, 29 | }; 30 | 31 | /* 32 | Codec 33 | */ 34 | struct ImageIO; 35 | 36 | enum ImageSeekMode { 37 | IMG_SEEK_SET = 0, 38 | IMG_SEEK_CUR = 1, 39 | IMG_SEEK_END = 2, 40 | }; 41 | 42 | enum ImageCodecFlag { 43 | IMG_CODEC_F_ACCEPT_STREAM = 1, 44 | IMG_CODEC_F_ACCEPT_FILENAME = 2, 45 | IMG_CODEC_F_TRY_DETECT = 4, 46 | }; 47 | 48 | typedef struct { 49 | int (*op)(void*, struct ImageIO*, Image*); 50 | int flags; 51 | unsigned obj_size; 52 | int (*init)(void*, struct ImageIO*); 53 | void (*free)(void*, struct ImageIO*); 54 | int (*seek)(void*, struct ImageIO*, long, int); 55 | int (*value_get)(void*, struct ImageIO*, int, void*, unsigned); 56 | int (*value_set)(void*, struct ImageIO*, int, const void*, unsigned); 57 | } ImageCodecSub; 58 | 59 | typedef struct { 60 | bool (*detect)(Stream*, const char*); 61 | ImageCodecSub load; 62 | ImageCodecSub save; 63 | const char* name; 64 | const char* ext; 65 | } ImageCodec; 66 | 67 | int img_codec_register(const ImageCodec* codec); 68 | 69 | const ImageCodec* img_codec_detect_stream(Stream* s); 70 | const ImageCodec* img_codec_detect_ext(const char* ext, int oflags); 71 | const ImageCodec* img_codec_detect_filename(const char* filename, int oflags); 72 | const ImageCodec* img_codec_by_name(const char* name); 73 | 74 | /* 75 | Image I/O 76 | */ 77 | 78 | enum ImageIOFlag { 79 | IMGIO_F_OWN_STREAM = 1, 80 | IMGIO_F_OWN_INTERNAL = 2, 81 | IMGIO_F_END_FOUND = 4, 82 | }; 83 | 84 | enum ImageIOOpenFlag { 85 | //IMG_OF_NO_INIT = 1, 86 | IMG_OF_SAVE = 2, 87 | IMG_OF_FAST = 4, 88 | IMG_OF_GRAY = 8, 89 | IMG_OF_NO_ALPHA = 16, 90 | IMG_OF_ASYNC = 32, //asynchronous operation 91 | }; 92 | 93 | typedef struct ImageIO { 94 | const ImageCodec * codec; 95 | Stream * s; 96 | const char * filename; 97 | void * internal; //codec data 98 | int oflags; 99 | int flags; 100 | } ImageIO; 101 | 102 | void imgio_free(ImageIO* obj); 103 | 104 | /** 105 | Check if the image i/o object is ready to be used. 106 | */ 107 | static inline 108 | bool imgio_good(ImageIO* obj) { return obj->codec; } 109 | 110 | int imgio_open_stream(ImageIO* obj, Stream* s, int flags, 111 | const ImageCodec* codec); 112 | 113 | int imgio_open_filename(ImageIO* obj, const char* fname, int flags, 114 | const ImageCodec* codec); 115 | 116 | #define IMGIO_CODEC_CALL(NAME, ...) \ 117 | if (!obj->codec) return IMG_ERROR_UNKNOWN_CODEC; \ 118 | const ImageCodecSub* cs = \ 119 | (obj->oflags & IMG_OF_SAVE) ? &obj->codec->save : &obj->codec->load; \ 120 | if (!cs->NAME) return IMG_ERROR_UNSUPPORTED_FUNCTION; \ 121 | return cs->NAME(obj->internal, obj, __VA_ARGS__); 122 | 123 | static inline 124 | int imgio_load(ImageIO* obj, Image* img) { 125 | if (obj->oflags & IMG_OF_SAVE) return IMG_ERROR_UNSUPPORTED_FUNCTION; 126 | IMGIO_CODEC_CALL(op, img) 127 | } 128 | 129 | static inline 130 | int imgio_save(ImageIO* obj, const Image* img) { 131 | if (~obj->oflags & IMG_OF_SAVE) return IMG_ERROR_UNSUPPORTED_FUNCTION; 132 | IMGIO_CODEC_CALL(op, (Image*)img) 133 | } 134 | 135 | static inline 136 | int imgio_seek(ImageIO* obj, long offset, int mode) { 137 | IMGIO_CODEC_CALL(seek, offset, mode) 138 | } 139 | 140 | enum { 141 | //unsigned: 0 to 100: jpeg or similar quality (85=default) 142 | IMG_VALUE_QUALITY = 1, 143 | //unsigned: 0 to 9: png/deflate or similar compression level (0=disable, 6=default) 144 | IMG_VALUE_COMPRESSION = 2, 145 | 146 | //unsigned: frame number counting from 0 147 | IMG_VALUE_FRAME_IDX = 3, 148 | //unsigned: total number of frames, 149 | // may be estimated until you reach the last frame 150 | IMG_VALUE_FRAME_COUNT = 4, 151 | //double: default or estimated frame duration in seconds 152 | IMG_VALUE_FRAME_DURATION = 5, 153 | //unsigned: accumulated number of non fatal errors that occurred 154 | // the meaning varies with the codec, normally is amount of frames that 155 | // could not be read and were skipped 156 | IMG_VALUE_ERROR_COUNT = 6, 157 | //text: 158 | // for read: buf="tag\0" and set bufsz, returns value length 159 | // for writing: buf="tag\0value\0" 160 | // Use the tag "comment" for a generic comment. 161 | IMG_VALUE_METADATA = 7, 162 | 163 | //none: prompts the codec to reload some external configuration 164 | IMG_VALUE_RELOAD = 8, 165 | 166 | //double: camera exposure time in seconds 167 | IMG_VALUE_EXPOSURE = 101, 168 | 169 | //double: camera gain (1.0 normal) 170 | IMG_VALUE_GAIN = 102, 171 | 172 | //ImgRectS: camera AOI (crop rectangle) 173 | IMG_VALUE_AOI = 103, 174 | 175 | IMG_VALUE_CUSTOM = 0x8000, 176 | }; 177 | 178 | static inline 179 | int imgio_value_get(ImageIO* obj, int id, void* buf, unsigned bufsz) { 180 | IMGIO_CODEC_CALL(value_get, id, buf, bufsz) 181 | } 182 | 183 | static inline 184 | int imgio_value_set(ImageIO* obj, int id, const void* buf, unsigned bufsz) { 185 | IMGIO_CODEC_CALL(value_set, id, buf, bufsz) 186 | } 187 | 188 | /* 189 | Simplified image file I/O 190 | */ 191 | int img_load_file(Image* img, const char* filename); 192 | int img_save_file(const Image* img, const char* filename); 193 | 194 | /* 195 | Simplified codec registration 196 | */ 197 | #define IMGIO_CODEC_REGISTER_NODEP() do { \ 198 | extern const ImageCodec img_codec_pnm;\ 199 | img_codec_register(&img_codec_pnm); \ 200 | extern const ImageCodec img_codec_imgseq; \ 201 | img_codec_register(&img_codec_imgseq); \ 202 | } while (0) 203 | 204 | #define IMGIO_CODEC_REGISTER_BASIC() do { \ 205 | IMGIO_CODEC_REGISTER_NODEP(); \ 206 | extern const ImageCodec img_codec_jpeg; \ 207 | img_codec_register(&img_codec_jpeg); \ 208 | extern const ImageCodec img_codec_png; \ 209 | img_codec_register(&img_codec_png); \ 210 | } while (0) 211 | 212 | #define IMGIO_CODEC_REGISTER_ALL() do { \ 213 | IMGIO_CODEC_REGISTER_BASIC(); \ 214 | extern const ImageCodec img_codec_tiff; \ 215 | img_codec_register(&img_codec_tiff); \ 216 | extern const ImageCodec img_codec_bigtiff; \ 217 | img_codec_register(&img_codec_bigtiff); \ 218 | extern const ImageCodec img_codec_libtiff; \ 219 | img_codec_register(&img_codec_libtiff); \ 220 | extern const ImageCodec img_codec_avimjpg; \ 221 | img_codec_register(&img_codec_avimjpg); \ 222 | extern const ImageCodec img_codec_libav; \ 223 | img_codec_register(&img_codec_libav); \ 224 | extern const ImageCodec img_codec_test; \ 225 | img_codec_register(&img_codec_test); \ 226 | } while (0) 227 | -------------------------------------------------------------------------------- /src/ccommon/image_io_jpeg.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include "image_io.h" 6 | 7 | #ifdef IMGIO_JPEG_IMPL 8 | #include "vector.h" 9 | #include 10 | #include 11 | 12 | struct img_codec_jpeg_error_mgr { 13 | struct jpeg_error_mgr errmgr; 14 | jmp_buf escape; 15 | }; 16 | 17 | struct CodecJpegLoad { 18 | struct jpeg_decompress_struct cinfo; 19 | struct img_codec_jpeg_error_mgr jerr; 20 | }; 21 | 22 | struct CodecJpegSave { 23 | struct jpeg_compress_struct cinfo; 24 | struct img_codec_jpeg_error_mgr jerr; 25 | 26 | struct CodecJpegText { DynStr key, value; } *metadata; //vector 27 | int quality; 28 | }; 29 | #endif 30 | 31 | typedef struct CodecJpegLoad CodecJpegLoad; 32 | typedef struct CodecJpegSave CodecJpegSave; 33 | 34 | bool imgio_jpeg_detect(Stream* s, const char* fileext); 35 | 36 | int imgio_jpeg_load_init(CodecJpegLoad* codec, ImageIO* imgio); 37 | void imgio_jpeg_load_free(CodecJpegLoad* codec, ImageIO* imgio); 38 | int imgio_jpeg_load_op(CodecJpegLoad* codec, ImageIO* imgio, Image* img); 39 | 40 | int imgio_jpeg_save_init(CodecJpegSave* codec, ImageIO* imgio); 41 | void imgio_jpeg_save_free(CodecJpegSave* codec, ImageIO* imgio); 42 | int imgio_jpeg_save_op(CodecJpegSave* codec, ImageIO* imgio, Image* img); 43 | 44 | extern const ImageCodec img_codec_jpeg; 45 | 46 | -------------------------------------------------------------------------------- /src/ccommon/image_io_png.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include "image_io.h" 6 | 7 | #ifdef IMGIO_PNG_IMPL 8 | #include "vector.h" 9 | struct CodecPng { 10 | struct CodecPngText { DynStr key, value; } *metadata; //vector 11 | int comp_lvl; 12 | }; 13 | #endif 14 | 15 | typedef struct CodecPng CodecPng; 16 | 17 | bool imgio_png_detect(Stream* s, const char* fileext); 18 | 19 | int imgio_png_load(void* self, ImageIO* imgio, Image* img); 20 | 21 | int imgio_png_save_init(CodecPng* S, ImageIO* imgio); 22 | void imgio_png_save_free(CodecPng* S, ImageIO* imgio); 23 | int imgio_png_save_op(CodecPng* S, ImageIO* imgio, Image* img); 24 | int imgio_png_value_set(CodecPng* S, ImageIO* imgio, 25 | int id, const void* buf, unsigned bufsz); 26 | 27 | extern const ImageCodec img_codec_png; 28 | -------------------------------------------------------------------------------- /src/ccommon/image_io_pnm.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include 5 | #include "image_io.h" 6 | 7 | static inline int whitespace_is(char c) { 8 | return (c == ' ' || c == '\t' || c == '\r' || c == '\n'); 9 | } 10 | 11 | /* 12 | Type detect 13 | */ 14 | bool imgio_pnm_detect(Stream* s, const char* fileext) 15 | { 16 | if (s) { 17 | const unsigned char *c = s->cursor; 18 | if (c[0] == 'P' && ('1' <= c[1] && c[1] <= '6') && 19 | whitespace_is(c[2])) 20 | return true; 21 | } 22 | else if (fileext) { 23 | if (fileext[0] == 'p' && 24 | (fileext[1] == 'n' || fileext[1] == 'p' || fileext[1] == 'g' || 25 | fileext[1] == 'b') && 26 | fileext[2] == 'm') 27 | return true; 28 | } 29 | return false; 30 | } 31 | 32 | /* 33 | Read 34 | */ 35 | 36 | static inline 37 | char* field_next(char* cur, char* end) 38 | { 39 | while (curs, 0) < 8) 49 | return IMG_ERROR_LOAD; 50 | 51 | // Read file header 52 | char *end, *cur=stream_buffer_get(imgio->s, &end); 53 | 54 | if (*cur++ != 'P') { 55 | r = IMG_ERROR_LOAD; 56 | goto error; 57 | } 58 | 59 | int bypp=0; 60 | ImgFormat format = IMG_FORMAT_NULL; 61 | switch (*cur++) { 62 | case '5': 63 | format = IMG_FORMAT_GRAY; 64 | bypp = 1; 65 | break; 66 | case '6': 67 | format = IMG_FORMAT_RGB; 68 | bypp = 3; 69 | break; 70 | default: 71 | r = IMG_ERROR_UNSUPPORTED_FORMAT; 72 | goto error; 73 | } 74 | 75 | int width = atoi( (cur = field_next(cur, end)) ); 76 | int height = atoi( (cur = field_next(cur, end)) ); 77 | int depth = atoi( (cur = field_next(cur, end)) ); 78 | 79 | if (width < 1 || height < 1 || depth < 1) { 80 | r = IMG_ERROR_LOAD; 81 | goto error; 82 | } 83 | if (depth != 255) { //TODO 84 | r = IMG_ERROR_UNSUPPORTED_FORMAT; 85 | goto error; 86 | } 87 | 88 | cur = field_next(cur, end); 89 | stream_commit(imgio->s, cur); 90 | 91 | // Allocate image 92 | r = img_resize(img, width, height, format, 0); 93 | if (r) 94 | goto error; 95 | 96 | // Load binary data 97 | size_t line_size = img->w * bypp; 98 | unsigned char* imgcur = img->data; 99 | for (unsigned y=0; yh; ++y) { 100 | if (stream_read(imgio->s, line_size, imgcur) != line_size) { 101 | r = IMG_ERROR_LOAD; 102 | goto error; 103 | } 104 | imgcur += img->pitch; 105 | } 106 | 107 | return 0; 108 | 109 | error: 110 | return r; 111 | } 112 | 113 | /* 114 | Save 115 | */ 116 | 117 | int imgio_pnm_save(void* unused, ImageIO* imgio, Image* img) 118 | { 119 | if (stream_write_prep(imgio->s, 0) < 8) 120 | return IMG_ERROR_SAVE; 121 | 122 | size_t line_size=0; 123 | switch (img->format) { 124 | case IMG_FORMAT_GRAY: 125 | line_size = img->w; 126 | stream_printf(imgio->s, "P5 %d %d 255\n", img->w, img->h); 127 | break; 128 | case IMG_FORMAT_RGB: 129 | line_size = img->w * 3; 130 | stream_printf(imgio->s, "P6 %d %d 255\n", img->w, img->h); 131 | break; 132 | case IMG_FORMAT_RGBA: 133 | line_size = img->w * 4; 134 | //http://netpbm.sourceforge.net/doc/pam.html 135 | stream_printf(imgio->s, 136 | "P7\nWIDTH %d\nHEIGHT %d\nDEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n", 137 | img->w, img->h); 138 | break; 139 | default: 140 | return IMG_ERROR_UNSUPPORTED_FORMAT; 141 | } 142 | 143 | unsigned char* imgcur = img->data; 144 | for (unsigned y=0; yh; ++y) { 145 | if (stream_write(imgio->s, line_size, imgcur) != line_size) 146 | return IMG_ERROR_SAVE; 147 | imgcur += img->pitch; 148 | } 149 | 150 | return IMG_RESULT_OK; 151 | } 152 | 153 | /* 154 | Codec 155 | */ 156 | const ImageCodec img_codec_pnm = { 157 | imgio_pnm_detect, 158 | { 159 | imgio_pnm_load, 160 | IMG_CODEC_F_ACCEPT_STREAM, 161 | }, 162 | { 163 | imgio_pnm_save, 164 | IMG_CODEC_F_ACCEPT_STREAM, 165 | }, 166 | "PNM", "pnm" 167 | }; 168 | -------------------------------------------------------------------------------- /src/ccommon/image_io_pnm.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include "image_io.h" 6 | 7 | bool imgio_pnm_detect(Stream* s, const char* fileext); 8 | 9 | int imgio_pnm_load(void* self, ImageIO* imgio, Image* img); 10 | 11 | int imgio_pnm_save(void* self, ImageIO* imgio, Image* img); 12 | 13 | extern const ImageCodec img_codec_pnm; 14 | 15 | -------------------------------------------------------------------------------- /src/ccommon/logging.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "logging.h" 5 | //#include 6 | 7 | struct Logger g_logger = { 8 | #ifdef DEBUG 9 | .level = LOG_LVL_DEBUG, 10 | #else 11 | .level = LOG_LVL_INFO, 12 | #endif 13 | }; 14 | 15 | Stream g_logger_stream; 16 | 17 | void log_line_begin_raw(int level) 18 | { 19 | if (!g_logger.stm) { 20 | stream_open_std(&g_logger_stream, STREAM_STD_ERR, 0); 21 | g_logger.stm = &g_logger_stream; 22 | } 23 | 24 | const char * lvl_prefix = 0; 25 | if (level >= LOG_LVL_DEBUG) lvl_prefix = "DEBUG "; 26 | else if (level >= LOG_LVL_INFO) ; 27 | else if (level >= LOG_LVL_WARNING) lvl_prefix = "WARN "; 28 | else lvl_prefix = "ERROR "; 29 | 30 | //TODO: time (optional) 31 | 32 | if (g_logger.prefix) 33 | stream_str_put(g_logger.stm, g_logger.prefix); 34 | 35 | if (lvl_prefix) 36 | stream_str_put(g_logger.stm, lvl_prefix); 37 | } 38 | 39 | void log_line_str(const char* str) 40 | { 41 | stream_str_put(g_logger.stm, str); 42 | } 43 | 44 | #if __STDC_HOSTED__ 45 | void log_line_strv(const char format[], va_list ap) 46 | { 47 | stream_vprintf(g_logger.stm, format, ap); 48 | } 49 | 50 | void log_line_strf(const char format[], ...) 51 | { 52 | va_list ap; 53 | va_start(ap, format); 54 | log_line_strv(format, ap); 55 | va_end(ap); 56 | } 57 | #endif 58 | 59 | void log_line_end() 60 | { 61 | stream_char_put(g_logger.stm, '\n'); 62 | stream_flush(g_logger.stm); 63 | } 64 | 65 | void log_logs(int level, const char* text) 66 | { 67 | if (!text) return; 68 | if (!log_level_check(level)) return; 69 | log_line_begin(level); 70 | log_line_str(text); 71 | log_line_end(); 72 | } 73 | 74 | #if __STDC_HOSTED__ 75 | void log_logv(int level, const char format[], va_list ap) 76 | { 77 | if (!format) return; 78 | if (!log_level_check(level)) return; 79 | log_line_begin(level); 80 | log_line_strv(format, ap); 81 | log_line_end(); 82 | } 83 | 84 | void log_logf(int level, const char format[], ...) 85 | { 86 | if (!format) return; 87 | if (!log_level_check(level)) return; 88 | log_line_begin(level); 89 | 90 | va_list ap; 91 | va_start(ap, format); 92 | log_line_strv(format, ap); 93 | va_end(ap); 94 | 95 | log_line_end(); 96 | } 97 | #endif 98 | -------------------------------------------------------------------------------- /src/ccommon/logging.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Logging interface 5 | */ 6 | #pragma once 7 | #include "stream.h" 8 | #include 9 | #include 10 | 11 | //TODO: interface to use other loggers 12 | 13 | // Levels 14 | #define LOG_LVL_STEP 10 15 | 16 | enum LoggingLevel { 17 | LOG_LVL_NONE = 0, 18 | LOG_LVL_ERROR = LOG_LVL_STEP, 19 | LOG_LVL_WARNING = LOG_LVL_STEP*2, 20 | LOG_LVL_INFO = LOG_LVL_STEP*3, //normal 21 | LOG_LVL_INFO2 = LOG_LVL_STEP*4, //verbose 22 | LOG_LVL_DEBUG = LOG_LVL_STEP*5, 23 | LOG_LVL_DEBUG2 = LOG_LVL_STEP*6, 24 | LOG_LVL_DEBUG3 = LOG_LVL_STEP*7, 25 | LOG_LVL_DEBUG4 = LOG_LVL_STEP*8, 26 | LOG_LVL_MAX = 255 27 | }; 28 | 29 | // Utility macros, use mostly these 30 | #define log_error(...) log_log(LOG_LVL_ERROR, __VA_ARGS__) 31 | #define log_warning(...) log_log(LOG_LVL_WARNING, __VA_ARGS__) 32 | #define log_info(...) log_log(LOG_LVL_INFO, __VA_ARGS__) 33 | #define log_info2(...) log_log(LOG_LVL_INFO2, __VA_ARGS__) 34 | #define log_debug(...) log_log(LOG_LVL_DEBUG, __VA_ARGS__) 35 | #define log_debug2(...) log_log(LOG_LVL_DEBUG2, __VA_ARGS__) 36 | #define log_debug3(...) log_log(LOG_LVL_DEBUG3, __VA_ARGS__) 37 | #define log_debug4(...) log_log(LOG_LVL_DEBUG4, __VA_ARGS__) 38 | 39 | #define log_log(LVL, ...) do {\ 40 | if (log_level_check((LVL))) \ 41 | log_logf((LVL), __VA_ARGS__); \ 42 | } while (0) 43 | 44 | #define log_log_str(LVL, STR) do {\ 45 | if (log_level_check((LVL))) \ 46 | log_logs((LVL), (STR)); \ 47 | } while (0) 48 | 49 | // Interface 50 | struct Logger { 51 | int level; 52 | Stream *stm; 53 | const char *prefix; // Prefix for all messages 54 | }; 55 | 56 | extern struct Logger g_logger; 57 | 58 | static inline 59 | bool log_level_check(int level) 60 | { 61 | return level <= g_logger.level; 62 | } 63 | 64 | static inline 65 | int log_level_set(int level) 66 | { 67 | int oldval = g_logger.level; 68 | g_logger.level = level; 69 | return oldval; 70 | } 71 | 72 | static inline 73 | int log_level_inc(int change) 74 | { 75 | int oldval = g_logger.level; 76 | g_logger.level += change; 77 | return oldval; 78 | } 79 | 80 | void log_logs(int level, const char* text); 81 | 82 | #ifdef __GNUC__ 83 | __attribute__((format(printf, 2, 0))) 84 | #endif 85 | void log_logv(int level, const char format[], va_list ap); 86 | 87 | #ifdef __GNUC__ 88 | __attribute__((format(printf, 2, 3))) 89 | #endif 90 | void log_logf(int level, const char format[], ...); 91 | 92 | 93 | // Low level interface 94 | void log_line_begin_raw(int level); //no checking 95 | 96 | static inline 97 | bool log_line_begin(int level) { 98 | if (!log_level_check(level)) return false; 99 | log_line_begin_raw(level); 100 | return true; 101 | } 102 | 103 | void log_line_str(const char* str); 104 | 105 | #ifdef __GNUC__ 106 | __attribute__((format(printf, 1, 0))) 107 | #endif 108 | void log_line_strv(const char format[], va_list ap); 109 | 110 | #ifdef __GNUC__ 111 | __attribute__((format(printf, 1, 2))) 112 | #endif 113 | void log_line_strf(const char format[], ...); 114 | 115 | static inline 116 | Stream* log_line_stream() { 117 | return g_logger.stm; 118 | } 119 | 120 | void log_line_end(); 121 | -------------------------------------------------------------------------------- /src/ccommon/rng_philox.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "rng_philox.h" 5 | #include 6 | 7 | RngPhilox g_rng; 8 | 9 | const uint32_t philox_m[2] = {0xD2511F53, 0xCD9E8D57}; 10 | const uint32_t philox_w[2] = {0x9E3779B9, 0xBB67AE85}; 11 | 12 | const double two_pow32_inv = 2.3283064365386963e-10; // 1/2^32 13 | const double two_pow32_inv_2pi = 1.4629180792671596e-09; // 2pi/2^32 14 | 15 | static inline 16 | double box_muller(double x, double y) 17 | { 18 | double u = (x + 0.5) * two_pow32_inv; 19 | double v = (y + 0.5) * two_pow32_inv_2pi; 20 | return sqrt(-2.0 * log(u)) * sin(v); 21 | } 22 | 23 | void rng_philox_randn(RngPhilox* S, unsigned n, float* out) 24 | { 25 | uint32_t cnt[4], key[2]; 26 | for (unsigned i=0; ioffset; 28 | cnt[1] = 0; 29 | cnt[2] = i; 30 | cnt[3] = 0; 31 | 32 | key[0] = S->seed; 33 | key[1] = S->seed>>32; 34 | 35 | for (unsigned r=0; r<10; ++r) { 36 | // Round 37 | uint64_t v1 = (uint64_t)cnt[0] * philox_m[0]; 38 | uint64_t v2 = (uint64_t)cnt[2] * philox_m[1]; 39 | cnt[0] = (uint32_t)(v2>>32) ^ cnt[1] ^ key[0]; 40 | cnt[1] = v2; 41 | cnt[2] = (uint32_t)(v1>>32) ^ cnt[3] ^ key[1]; 42 | cnt[3] = v1; 43 | 44 | key[0] += philox_w[0]; 45 | key[1] += philox_w[1]; 46 | } 47 | 48 | out[i] = box_muller(cnt[0], cnt[1]); 49 | } 50 | S->offset++; 51 | } 52 | -------------------------------------------------------------------------------- /src/ccommon/rng_philox.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Pseudo-random number generator imitating torch cuda randn. 5 | * Based on: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/modules/rng_philox.py 6 | */ 7 | #pragma once 8 | #include 9 | 10 | typedef struct { 11 | uint64_t seed; 12 | uint32_t offset; 13 | } RngPhilox; 14 | 15 | void rng_philox_randn(RngPhilox* S, unsigned n, float* out); 16 | 17 | extern RngPhilox g_rng; 18 | 19 | static inline 20 | void rng_randn(unsigned n, float* out) { 21 | rng_philox_randn(&g_rng, n, out); 22 | } 23 | -------------------------------------------------------------------------------- /src/ccommon/str_util.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "str_util.h" 5 | #include 6 | 7 | int sprintf_alloc(char** buffer, const char* fmt, ...) 8 | { 9 | va_list ap; 10 | 11 | va_start(ap, fmt); 12 | int sz = vsnprintf(0, 0, fmt, ap); 13 | va_end(ap); 14 | 15 | if (sz < 0) return sz; 16 | sz += 1; 17 | char* p = realloc(*buffer, sz); 18 | if (!p) return -1; 19 | *buffer = p; 20 | 21 | va_start(ap, fmt); 22 | sz = vsnprintf(p, sz, fmt, ap); 23 | va_end(ap); 24 | 25 | return sz; 26 | } 27 | 28 | size_t string_escape_encode(char* out, size_t out_size, 29 | const char* in, size_t in_size, size_t* in_done) 30 | { 31 | if (!in || !out) return 0; 32 | if (out_size < 5) return 0; // 4=\xNN + 1=zero-end 33 | char *o=out, *oend=out+out_size-5; 34 | const char *i=in, *iend=in+in_size; 35 | for (; i= iend) { 60 | *o++ = '\\'; 61 | break; 62 | } 63 | switch (*i) { 64 | case '"': *o++ = '"'; break; 65 | case 'n': *o++ = '\n'; break; 66 | case 'r': *o++ = '\r'; break; 67 | case 't': *o++ = '\t'; break; 68 | case 'x': 69 | if (i+2 < iend) { 70 | *o++ = digit_decode(*(i+1), 16) * 16 71 | + digit_decode(*(i+2), 16); 72 | i += 2; 73 | } 74 | else { 75 | *o++ = '\\'; 76 | --i; 77 | } 78 | break; 79 | //TODO: more... 80 | default: 81 | *o++ = '\\'; 82 | --i; 83 | break; 84 | } 85 | } 86 | else *o++ = *i; 87 | } 88 | *o = 0; 89 | if (in_done) *in_done = (i - in); 90 | return (o - out); 91 | } 92 | 93 | -------------------------------------------------------------------------------- /src/ccommon/str_util.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | //! Checks if a character is an string 12 | static inline 13 | bool char_in_str(int ch, const char* str) { 14 | for (; *str; ++str) if (ch == *str) return true; 15 | return false; 16 | } 17 | 18 | //! Finds the first occurrence of a character in a list 19 | static inline 20 | char* str_chars_find(const char* str, const char* chars) { 21 | while (*str && !char_in_str(*str, chars)) str++; 22 | return (char*)str; 23 | } 24 | 25 | //! Finds the first occurrence of a character in a list 26 | static inline 27 | char* stre_chars_find(char* str, char* end, const char* chars) { 28 | while (str end-str) dsize = end-str; 68 | memcpy(dst, str, dsize); 69 | dst[dsize] = 0; 70 | } 71 | return dst; 72 | } 73 | 74 | //! Compare two string in case insensitive way 75 | static inline 76 | int str_cmp_i(const char* a, const char* b) { 77 | for (;; ++a, ++b) { 78 | int d = tolower((unsigned char)*a) - tolower((unsigned char)*b); 79 | if (d != 0 || !*a) 80 | return d; 81 | } 82 | } 83 | 84 | static inline 85 | const char* str_startswith(const char* str, const char* sub) 86 | { 87 | unsigned ls = strlen(str), 88 | l2 = strlen(sub); 89 | if (ls >= l2 && !memcmp(str, sub, l2)) return str+l2; 90 | return NULL; 91 | } 92 | 93 | static inline 94 | const char* str_endswith(const char* str, const char* sub) 95 | { 96 | unsigned ls = strlen(str), 97 | l2 = strlen(sub); 98 | if (ls >= l2 && !memcmp(str+ls-l2, sub, l2)) return str+ls-l2; 99 | return NULL; 100 | } 101 | 102 | //! Convert an string to lower case 103 | static inline 104 | size_t str_tolower(char* dst, size_t max, const char* src) { 105 | char *cur=dst, *end = dst+max; 106 | for(; *src && cur 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "stringstore.h" 5 | #include "bisect.h" 6 | 7 | /* */ 8 | void strsto_free(StringStore* S) 9 | { 10 | alloc_arena_free(&S->al); 11 | vec_free(S->idx); 12 | vec_free(S->s); 13 | } 14 | 15 | bool strsto_iidx_find(const StringStore* S, const StrSlice key, size_t* idx) 16 | { 17 | bool found; 18 | BISECT_RIGHT(found, *idx, 0, vec_count(S->idx), 19 | strsl_cmp(S->s[S->idx[i_]], key) ); 20 | return found; 21 | } 22 | 23 | StringInt strsto_find(const StringStore* S, const StrSlice ss) 24 | { 25 | size_t iidx; 26 | return strsto_iidx_find(S, ss, &iidx) ? S->idx[iidx] : -1; 27 | } 28 | 29 | StringInt strsto_find_prefix(const StringStore* S, const StrSlice key) 30 | { 31 | if (!vec_count(S->idx)) return -1; //empty store 32 | 33 | size_t iidx; 34 | bool found = strsto_iidx_find(S, key, &iidx); 35 | if (found) return S->idx[iidx]; //exact match 36 | 37 | bool last=false; //last attempt 38 | while (1) { 39 | StringInt si = S->idx[iidx]; 40 | const StrSlice str = S->s[si]; 41 | 42 | // Count the matching characters 43 | size_t i=0; 44 | while (i= 0 && idx != S->idx[iidx]) return -1; 65 | return S->idx[iidx]; 66 | } 67 | else 68 | { 69 | unsigned n = vec_count(S->s); 70 | if (idx < 0) idx = n; 71 | 72 | if (idx < n) { 73 | // Index already used 74 | if (S->s[idx].b) return -1; 75 | } else { 76 | vec_append_zero(S->s, idx-n+1); 77 | } 78 | 79 | if (static_) 80 | S->s[idx] = ss; 81 | else { 82 | // Copy string 83 | char * p = alloc_arena_alloc(&S->al, ss.s+1); 84 | memcpy(p, ss.b, ss.s); 85 | p[ss.s] = 0; 86 | S->s[idx] = (StrSlice){ .b=p, .s=ss.s }; 87 | } 88 | 89 | vec_insert(S->idx, iidx, 1, &idx); 90 | return idx; 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /src/ccommon/stringstore.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Storage of unique string slices. 5 | * 6 | * Example: 7 | * StringStore ss={0}; 8 | * StringInt si = strsto_add(&ss, strsl_static("apple")); 9 | * assert( !strsl_cmp(strsto_get(&ss, si), strsl_static("apple")) ); 10 | * strsto_free(&ss); 11 | */ 12 | #pragma once 13 | #include 14 | #include 15 | #include 16 | #include "strslice.h" 17 | #include "alloc_arena.h" 18 | #include "vector.h" 19 | 20 | typedef int32_t StringInt; 21 | 22 | typedef struct StringStore { 23 | StrSlice * s; //vector 24 | unsigned * idx; //index, vector 25 | AllocatorArena al; 26 | } StringStore; 27 | 28 | void strsto_free(StringStore* S); 29 | 30 | static inline 31 | unsigned strsto_count(const StringStore* S) 32 | { return vec_count(S->idx); } 33 | 34 | static inline 35 | unsigned strsto_next_idx(const StringStore* S) 36 | { return vec_count(S->s); } 37 | 38 | static inline 39 | StrSlice strsto_get(const StringStore* S, StringInt idx) { 40 | assert(0 <= idx && idx < vec_count(S->s)); 41 | if (!(0 <= idx && idx < vec_count(S->s))) return (StrSlice){0}; 42 | return S->s[idx]; 43 | } 44 | 45 | // Return -1 if not found 46 | StringInt strsto_find(const StringStore* S, const StrSlice ss); 47 | 48 | StringInt strsto_add2(StringStore* S, const StrSlice ss, StringInt idx, 49 | bool static_); 50 | 51 | // Add an string. 52 | static inline 53 | StringInt strsto_add(StringStore* S, const StrSlice ss) { 54 | return strsto_add2(S, ss, -1, false); 55 | } 56 | 57 | // Find longest string in the store that matches the beginning of key. 58 | StringInt strsto_find_prefix(const StringStore* S, const StrSlice key); 59 | 60 | // Find the position in the index for . 61 | // Returns true if is present in the store. 62 | // Then, S->idx[*idx] is the StringInt. 63 | bool strsto_iidx_find(const StringStore* S, const StrSlice key, size_t* idx); 64 | 65 | /* Utility */ 66 | static inline 67 | char* strsl_getd(DynStr* buf, const StrSlice ss) { 68 | dstr_copy(*buf, strsl_len(ss), ss.b); 69 | return *buf; 70 | } 71 | 72 | //static inline 73 | //StrSlice strsl_fromd(const DynStr buf) { 74 | // return (StrSlice){ .b=buf, .s=dstr_count(buf) }; 75 | //} 76 | -------------------------------------------------------------------------------- /src/ccommon/strslice.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | typedef struct StrSlice { 11 | const char *b; 12 | size_t s; 13 | } StrSlice; 14 | 15 | // Initialization 16 | 17 | #define strsl_make(B,S) \ 18 | ((StrSlice){ .b=(B), .s=(S) }) 19 | 20 | #define strsl_static(S) \ 21 | ((StrSlice){ .b=(S), .s=sizeof(S)-1 }) 22 | 23 | #define strsl_fromd(D) \ 24 | ((StrSlice){ .b=(D), .s=dstr_count(D) }) 25 | 26 | #define strsl_froma(A) \ 27 | ((StrSlice){ .b=(A).p.cp, .s=(A).len }) 28 | 29 | #define strsl_fromr(B,E) \ 30 | ((StrSlice){ .b=(B), .s=(E)-(B) }) 31 | 32 | static inline 33 | StrSlice strsl_fromz(const char* strz) 34 | { return (StrSlice){ .b=strz, .s=strlen(strz) }; } 35 | 36 | // Access 37 | 38 | static inline 39 | intptr_t strsl_len(const StrSlice ss) 40 | { return ss.s; } 41 | 42 | static inline 43 | const char * strsl_begin(const StrSlice ss) 44 | { return ss.b; } 45 | 46 | static inline 47 | const char * strsl_end(const StrSlice ss) 48 | { return ss.b + ss.s; } 49 | 50 | #define strsl_for(S, VC, VE, I) \ 51 | for (const char *VC=strsl_begin(S)+(I), *VE=strsl_end(S); VC bufsz) len = bufsz; 102 | memcpy(buf, ss.b, len); 103 | buf[len] = 0; 104 | return len; 105 | } 106 | 107 | static inline 108 | char* strsl_getz(size_t bufsz, char* buf, const StrSlice ss) { 109 | strsl_copyz(bufsz, buf, ss); 110 | return buf; 111 | } 112 | 113 | // Utility 114 | 115 | static inline 116 | int strsl_startswith(const StrSlice ss, const StrSlice prefix) { 117 | if (!(ss.s >= prefix.s)) return 0; 118 | return !memcmp(ss.b, prefix.b, prefix.s); 119 | } 120 | 121 | static inline 122 | int strsl_endswith(const StrSlice ss, const StrSlice suffix) { 123 | if (!(ss.s >= suffix.s)) return 0; 124 | return !memcmp(ss.b+ss.s-suffix.s, suffix.b, suffix.s); 125 | } 126 | 127 | static inline 128 | int strsl_prefix_trim(StrSlice* pss, const StrSlice prefix) 129 | { 130 | if (!strsl_startswith(*pss, prefix)) return 0; 131 | pss->b += prefix.s; 132 | pss->s -= prefix.s; 133 | return 1; 134 | } 135 | 136 | static inline 137 | int strsl_prefixz_trim(StrSlice* pss, const char* prefix) { 138 | return strsl_prefix_trim(pss, strsl_fromz(prefix)); 139 | } 140 | 141 | static inline 142 | int strsl_suffix_trim(StrSlice* pss, const StrSlice suffix) 143 | { 144 | if (!strsl_endswith(*pss, suffix)) return 0; 145 | pss->s -= suffix.s; 146 | return 1; 147 | } 148 | 149 | static inline 150 | int strsl_suffixz_trim(StrSlice* pss, const char* suffix) { 151 | return strsl_suffix_trim(pss, strsl_fromz(suffix)); 152 | } 153 | -------------------------------------------------------------------------------- /src/ccommon/structio_json.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include "structio.h" 6 | 7 | enum StioStreamJsonFlag { 8 | STIO_SF_JSON_PRETTY = STIO_IF_CUSTOM, //WIP 9 | }; 10 | 11 | extern const StioClass stio_class_json; 12 | 13 | int stio_json_write(StioStream* sio, StioCtx* ctx, StioItem* itm); 14 | 15 | int stio_json_read(StioStream* sio, StioCtx* ctx, StioItem* itm); 16 | 17 | -------------------------------------------------------------------------------- /src/ccommon/timing.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "timing.h" 5 | 6 | // ----------------------------------------------------------------------------- 7 | #if defined(__unix__) 8 | #ifndef _POSIX_C_SOURCE 9 | #define _POSIX_C_SOURCE 200809L 10 | #endif 11 | #include 12 | #include 13 | 14 | double timing_time() { 15 | struct timespec tp; 16 | clock_gettime(CLOCK_MONOTONIC, &tp); 17 | return (double)tp.tv_sec + (double)tp.tv_nsec * 1e-9; 18 | } 19 | 20 | void timing_sleep(double dt) { 21 | struct timespec tp; 22 | tp.tv_sec = (int)dt; 23 | tp.tv_nsec = (dt - tp.tv_sec) * 1e9; 24 | while (clock_nanosleep(CLOCK_MONOTONIC, 0, &tp, &tp) == EINTR) ; 25 | } 26 | 27 | double timing_timeofday() { 28 | //struct timeval tv={0}; 29 | //gettimeofday(&tv, NULL); 30 | //return tv.tv_sec + tv.tv_usec * 1e-6; 31 | struct timespec tp; 32 | clock_gettime(CLOCK_REALTIME, &tp); 33 | return (double)tp.tv_sec + (double)tp.tv_nsec * 1e-9; 34 | } 35 | 36 | // ----------------------------------------------------------------------------- 37 | #elif defined(__WIN32__) 38 | #define WIN32_LEAN_AND_MEAN 39 | #define WIN32_EXTRA_LEAN 40 | #include 41 | #include 42 | 43 | static struct { 44 | double d_freq; 45 | LARGE_INTEGER li_freq; 46 | } timing_win_data; 47 | 48 | void timing_win_init() { 49 | QueryPerformanceFrequency(&timing_win_data.li_freq); 50 | timing_win_data.d_freq = timing_win_data.li_freq.QuadPart; 51 | } 52 | 53 | double timing_time() { 54 | if (!timing_win_data.d_freq) timing_win_init(); 55 | LARGE_INTEGER value; 56 | QueryPerformanceCounter(&value); 57 | return (double)value.QuadPart / timing_win_data.d_freq; 58 | } 59 | 60 | void timing_sleep(double dt) { 61 | Sleep(dt*1000); 62 | } 63 | 64 | double timing_timeofday() { 65 | int64_t t; 66 | GetSystemTimeAsFileTime((FILETIME*)&t); 67 | return (t - 116444736000000000LL) * 1e-7; 68 | } 69 | 70 | // ----------------------------------------------------------------------------- 71 | #elif defined(SDL_VERSION) 72 | #include 73 | 74 | static struct { 75 | double d_freq; 76 | Uint64 u64_freq; 77 | } timing_sdl_data; 78 | 79 | void timing_sdl_init() { 80 | timing_sdl_data.u64_freq = SDL_GetPerformanceFrequency(); 81 | timing_sdl_data.d_freq = timing_sdl_data.u64_freq; 82 | } 83 | 84 | double timing_time() { 85 | return (double)SDL_GetPerformanceCounter() / timing_sdl_data.d_freq; 86 | } 87 | 88 | void timing_sleep(double dt) { 89 | SDL_Delay(dt*1000); 90 | } 91 | 92 | #include 93 | double timing_timeofday() { 94 | return time(NULL); //TODO: not portable 95 | } 96 | 97 | // ----------------------------------------------------------------------------- 98 | #else 99 | #include 100 | 101 | double timing_time() { 102 | return (double)time(0); //TODO: use clock? 103 | } 104 | 105 | void timing_sleep(double dt) { 106 | //TODO: implement with polling? 107 | } 108 | 109 | double timing_timeofday() { 110 | return (double)time(NULL); //TODO: not portable 111 | } 112 | 113 | #endif 114 | -------------------------------------------------------------------------------- /src/ccommon/timing.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | 6 | // Get a monotonic time measured in seconds 7 | double timing_time(); 8 | 9 | void timing_sleep(double dt); 10 | 11 | static inline 12 | double timing_tic(double* t_last) { 13 | double t=timing_time(), dt=t-*t_last; 14 | *t_last = t; 15 | return dt; 16 | } 17 | 18 | // Get the current number of seconds since 1970-01-01 00:00:00 (UTC). 19 | double timing_timeofday(); 20 | -------------------------------------------------------------------------------- /src/ccommon/unicode.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024-2025, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #include "unicode.h" 5 | 6 | uint32_t utf8_decode_next(const char** pstr, const char* end) 7 | { 8 | const uint8_t *c = (const uint8_t*)*pstr, 9 | *e = (const uint8_t*)end; 10 | if (!(c < e)) return 0; 11 | 12 | uint32_t cp = *c++; 13 | 14 | if ((cp & 0x80) == 0x80) { //multibyte 15 | const uint8_t *b=c; 16 | while (c < e && (*c & 0xC0) == 0x80) ++c; //count continuation bytes 17 | 18 | if ((cp & 0xE0) == 0xC0) { //2 bytes: 110xxxxx 10xxxxxx 19 | if (c != b+1) goto error_end; 20 | uint32_t b2 = b[0]; 21 | cp = ((cp & 0x1F) << 6) | (b2 & 0x3F); 22 | } 23 | else if ((cp & 0xF0) == 0xE0) { //3 bytes: 1110xxxx ... 24 | if (c != b+2) goto error_end; 25 | uint32_t b2 = b[0], b3 = b[1]; 26 | cp = ((cp & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F); 27 | } 28 | else if ((cp & 0xF8) == 0xF0) { //4 bytes: 11110xxx ... 29 | if (c != b+3) goto error_end; 30 | uint32_t b2 = b[0], b3 = b[1], b4 = b[2]; 31 | cp = ((cp & 0x07) << 18) | ((b2 & 0x3F) << 12) | ((b3 & 0x3F) << 6) 32 | | (b4 & 0x3F); 33 | } 34 | else goto error_end; 35 | } 36 | 37 | if ((const char*)c > end) { c=end; cp=0; } //TODO: check before! 38 | *pstr = (const char*)c; 39 | return cp; 40 | 41 | error_end: 42 | *pstr = (const char*)c; 43 | return 0xFFFD; 44 | } 45 | 46 | const char* utf8_decode_skip(const char* cur, const char* end) 47 | { 48 | if (cur < end) cur++; //first byte 49 | while (cur < end && (*cur & 0xC0) == 0x80) cur++; 50 | return cur; 51 | } 52 | 53 | char* utf8_encode_next(char* dst, uint32_t cp) 54 | { 55 | if (cp <= 0x7F) { 56 | *dst++ = cp; 57 | } 58 | else if (cp <= 0x7FF) { 59 | *dst++ = 0xC0 | (cp >> 6); 60 | *dst++ = 0x80 | (cp & 0x3F); 61 | } 62 | else if (cp <= 0xFFFF) { 63 | *dst++ = 0xE0 | (cp >> 12); 64 | *dst++ = 0x80 | ((cp >> 6) & 0x3F); 65 | *dst++ = 0x80 | (cp & 0x3F); 66 | } 67 | else if (cp <= 0x10FFFF) { 68 | *dst++ = 0xF0 | (cp >> 18); 69 | *dst++ = 0x80 | ((cp >> 12) & 0x3F); 70 | *dst++ = 0x80 | ((cp >> 6) & 0x3F); 71 | *dst++ = 0x80 | (cp & 0x3F); 72 | } 73 | //else error, do nothing 74 | 75 | return dst; 76 | } 77 | -------------------------------------------------------------------------------- /src/ccommon/unicode.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024-2025, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Unicode, UTF-8, encoding and decoding. 5 | */ 6 | #pragma once 7 | #include 8 | 9 | /* Return the next code point and advance the string pointer. 10 | * Return zero for an empty string. 11 | * In case of error, returns 0xFFFD and skips the bytes. 12 | */ 13 | uint32_t utf8_decode_next(const char** pstr, const char* end); 14 | 15 | /* Skip one codepoint without fully decoding it. 16 | * Returns a pointer to the next codepoint. 17 | * Returns if cur == end. 18 | */ 19 | const char* utf8_decode_skip(const char* cur, const char* end); 20 | 21 | /* Encode one code point into cursor. 22 | * Writes up to 4 bytes. 23 | * Return the new cursor position. 24 | */ 25 | char* utf8_encode_next(char* dst, uint32_t cp); 26 | -------------------------------------------------------------------------------- /src/ccommon/unicode_data.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | * 4 | * Unicode database. 5 | * Query the properties of codepoints. 6 | * This is a separate module from "unicode" because the data occupies several 7 | * kilobytes. 8 | */ 9 | #pragma once 10 | #include 11 | 12 | /* Get the major general category of a unicode codepoint. 13 | * Returns one of the following characters or zero the codepoint is out unicode 14 | * range. 15 | * L: Letter, M: Mark, N: Number, P: Punctuation, S: Symbol, Z: Separator, C: Other 16 | */ 17 | int unicode_category_major(uint32_t cp); 18 | 19 | /* Returns the upper case variant of codepoint. 20 | * If there is none, it returns the same codepoint. 21 | */ 22 | uint32_t unicode_upper(uint32_t cp); 23 | 24 | /* Returns the lower case variant of codepoint. 25 | * If there is none, it returns the same codepoint. 26 | */ 27 | uint32_t unicode_lower(uint32_t cp); 28 | -------------------------------------------------------------------------------- /src/ccompute/tensorstore.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Parse and store information from common tensor storage formats used for 5 | * machine learning. 6 | * 7 | * Example: 8 | StringStore ss={0}; 9 | TensorStore ts={ .ss=&ss }; 10 | Stream stm={0}; 11 | TRY( stream_open_file(&stm, "model.gguf") ); 12 | TRY( tstore_read(&tsp, &stm, NULL) ); 13 | TRY( tstore_info_dump_path(&sp, "model-info.txt") ); 14 | end: 15 | tstore_free(&tsp); 16 | stream_close(&stm, 0); 17 | */ 18 | #pragma once 19 | #include "ccommon/stream.h" 20 | #include "ccommon/vector.h" 21 | #include "ccommon/any.h" 22 | #include "ccommon/stringstore.h" 23 | 24 | typedef struct TensorStore TensorStore; 25 | 26 | /* Error codes */ 27 | 28 | typedef enum { 29 | TS_E_UNKNOWN = -0x3001, 30 | TS_E_OVERFLOW = -0x3002, 31 | TS_E_FORMAT = -0x3003, 32 | TS_E_READ = -0x3004, 33 | TS_E_METADATA = -0x3005, 34 | TS_E_DTYPE = -0x3006, 35 | TS_E_WRITE = -0x3007, 36 | } TSError; 37 | 38 | /* Data types */ 39 | 40 | typedef enum { 41 | TS_DTYPE_NONE, 42 | TS_DTYPE_F64, 43 | TS_DTYPE_F32, 44 | TS_DTYPE_F16, 45 | TS_DTYPE_BF16, 46 | TS_DTYPE_I64, 47 | TS_DTYPE_I32, 48 | TS_DTYPE_I16, 49 | TS_DTYPE_I8, 50 | // GGML quantization 51 | TS_DTYPE_Q8_0, 52 | TS_DTYPE_Q4_1, 53 | TS_DTYPE_Q6_K, 54 | TS_DTYPE_Q5_K, 55 | TS_DTYPE_Q4_K, 56 | TS_DTYPE__END, 57 | } TSDType; 58 | 59 | typedef struct { 60 | const char *name; 61 | uint64_t sz_m, sz_d; // tensor size = count * sz_m / sz_d 62 | int ggml, 63 | mda; 64 | unsigned valid:1; 65 | } TSDTypeAttr; 66 | 67 | const TSDTypeAttr* tstore_dtype_attr(int dt); 68 | 69 | int tstore_dtype_fromz(const char* s); 70 | 71 | const char * tstore_dtype_str(int dt); 72 | 73 | // Returns -1 if not found 74 | int tstore_dtype_from_ggml(int ggml_type); 75 | int tstore_dtype_to_ggml(int dt); 76 | 77 | // Returns -1 if not found 78 | int tstore_dtype_from_mda(int mda_dtype); 79 | int tstore_dtype_to_mda(int dt); 80 | 81 | /* Tensor data */ 82 | 83 | typedef struct { 84 | TSDType dtype; //data type 85 | void *data; 86 | size_t size; 87 | unsigned ownmem:1, 88 | perm:1; // remains valid for the lifetime of the tensor store 89 | } TSTensorData; 90 | 91 | void tstore_tdata_free(TSTensorData*); 92 | 93 | /* Meta data entry */ 94 | 95 | typedef struct { 96 | int key; //str_id 97 | Any value; 98 | } TSMetaEntry; 99 | 100 | /* Tensor entry */ 101 | 102 | typedef struct { 103 | int key; //str_id 104 | TSDType dtype; 105 | unsigned shape_n, shape[4]; 106 | uint64_t offset, size; 107 | Stream *stm; 108 | TSTensorData *cache; //converted tensor cache, vector, sorted 109 | } TSTensorEntry; 110 | 111 | uint64_t tstore_tensor_count(const TSTensorEntry* S); 112 | uint64_t tstore_tensor_size(const TSTensorEntry* S); 113 | 114 | #define TSTENSOR_SHAPE4_FMT "%ux%ux%ux%u" 115 | #define TSTENSOR_SHAPE4_UNPACK(T) \ 116 | (T).shape[0], (T).shape[1], (T).shape[2], (T).shape[3] 117 | 118 | /* Return a TSTensorData object with the tensor data with type dtype. 119 | * If flags & TSTDG_F_PERM, the data pointer is permanent, otherwise, 120 | * The TSTensorData object must be free'd after use. 121 | */ 122 | int tstore_tensor_data_get(TSTensorEntry* S, TSDType dtype, int flags, 123 | TSTensorData* out); 124 | 125 | enum tstore_tensor_data_get_flags_t { 126 | TSTDG_F_PERM = 1, // out->data is in permanent storage 127 | TSTDG_F_WRITE = 2, // Returns memory that can be written 128 | }; 129 | 130 | /* IO CallBack */ 131 | 132 | typedef struct { 133 | int (*func)(void* user, TensorStore* ts, TSTensorEntry* te, DynStr* pname); 134 | void *user; 135 | } TSCallback; 136 | 137 | static inline 138 | int tstore_cb_call(TSCallback* cb, TensorStore* ts, TSTensorEntry* te, 139 | DynStr* pname) 140 | { 141 | if (!cb || !cb->func) return 1; 142 | return cb->func(cb->user, ts, te, pname); 143 | } 144 | 145 | /* Parser */ 146 | 147 | typedef struct { 148 | const char *name, *ext; 149 | int (*detect)(Stream*); 150 | int (*read)(TensorStore*, Stream*, TSCallback*); 151 | int (*write)(TensorStore*, Stream*, TSCallback*); 152 | } TensorStoreFormat; 153 | 154 | int tstore_format_register(const TensorStoreFormat*); 155 | 156 | const TensorStoreFormat* tstore_format_get(int idx); 157 | 158 | /* Store */ 159 | 160 | struct TensorStore { 161 | TSTensorEntry * tensors; //vector, source order 162 | TSMetaEntry * meta; //vector, source order 163 | unsigned * tensors_idx; //vector, key sorted 164 | unsigned * meta_idx; //vector, key sorted 165 | StringStore *ss; //external store for tensor names strings, fill before use 166 | }; 167 | 168 | void tstore_free(TensorStore*); 169 | 170 | /* Read tensors information from a stream. 171 | * Does not read the tensors data. 172 | * fmt: data format. If NULL, tries to guess from the data. 173 | * cb: Optional. Function called before adding each tensor. If it returns non 174 | * positive, the tensor is not added. May change the name. 175 | */ 176 | int tstore_read(TensorStore* S, Stream* stm, const TensorStoreFormat* fmt, 177 | TSCallback* cb); 178 | 179 | /* Write tensors information to a stream. 180 | * Does not writes the tensors data. 181 | * fmt: data format. 182 | * cb: Optional. Function called before writing each tensor. If it returns non 183 | * positive, the tensor is not written. May store a new name in *pname. 184 | */ 185 | int tstore_write(TensorStore* S, Stream* stm, const TensorStoreFormat* fmt, 186 | TSCallback* cb); 187 | 188 | /* Tries to detect the data format of a stream. 189 | */ 190 | const TensorStoreFormat* tstore_format_detect(Stream* stm); 191 | 192 | /* Make copy of the store src in dst. 193 | * Useful for conversion and for other manipulations. 194 | */ 195 | void tstore_copy_from(TensorStore* dst, const TensorStore* src); 196 | 197 | /* Write human readable information about the store. 198 | */ 199 | int tstore_info_dump(const TensorStore*, Stream* out); 200 | 201 | /* Write human readable information about the store. 202 | */ 203 | int tstore_info_dump_path(const TensorStore*, const char* path); 204 | 205 | /* Add a new key-value metadata entry. 206 | * Takes ownership of value. 207 | */ 208 | int tstore_meta_addk(TensorStore* S, StringInt key, Any* value); 209 | 210 | /* Add a new key-value metadata entry. 211 | * Takes ownership of value. 212 | */ 213 | static inline 214 | int tstore_meta_add(TensorStore* S, const char* name, Any* value) 215 | { 216 | int key = strsto_add(S->ss, strsl_fromz(name)); 217 | return tstore_meta_addk(S, key, value); 218 | } 219 | 220 | /* Add a new key-value metadata entry. 221 | * String value. 222 | */ 223 | int tstore_meta_adds(TensorStore* S, const char* name, const char* value); 224 | 225 | /* Find and return a metadata entry. 226 | * Returns empty (t=0) if not found. 227 | */ 228 | const Any tstore_meta_getk(const TensorStore* S, StringInt key); 229 | 230 | /* Find and return a metadata entry. 231 | * Return empty (t=0) if not found. 232 | */ 233 | static inline 234 | const Any tstore_meta_get(const TensorStore* S, const char* name) { 235 | int key = strsto_add(S->ss, strsl_fromz(name)); 236 | return tstore_meta_getk(S, key); 237 | } 238 | 239 | /* Add a new tensor entry. 240 | * entry->key is ignored. 241 | */ 242 | int tstore_tensor_addk(TensorStore* S, StringInt key, 243 | const TSTensorEntry* entry); 244 | 245 | /* Add a new tensor entry. 246 | * entry->key is ignored. 247 | */ 248 | static inline 249 | int tstore_tensor_add(TensorStore* S, const char* name, 250 | const TSTensorEntry* entry) 251 | { 252 | int key = strsto_add(S->ss, strsl_fromz(name)); 253 | return tstore_tensor_addk(S, key, entry); 254 | } 255 | 256 | /* Find and return a tensor entry. 257 | * Return NULL if not found. 258 | */ 259 | TSTensorEntry* tstore_tensor_getk(const TensorStore*, StringInt key); 260 | 261 | /* Find and return a tensor entry. 262 | * Return NULL if not found. 263 | */ 264 | static inline 265 | TSTensorEntry* tstore_tensor_get(const TensorStore* S, const char* name) { 266 | int key = strsto_add(S->ss, strsl_fromz(name)); 267 | return tstore_tensor_getk(S, key); 268 | } 269 | 270 | /* Remake the tensors index. 271 | * Call after changing the tensor manually. 272 | */ 273 | int tstore_tensor_index_remake(TensorStore* S); 274 | 275 | /* Free all stored tensor data. 276 | */ 277 | int tstore_cache_clear(TensorStore* S); 278 | -------------------------------------------------------------------------------- /src/ccompute/tensorstore_gguf.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #include "tensorstore_gguf.h" 5 | #include "ccommon/logging.h" 6 | 7 | #ifndef TENSORSTORE_ALLOCATOR 8 | #define TENSORSTORE_ALLOCATOR g_allocator 9 | #endif 10 | 11 | #define GGUF_MAGIC "GGUF" 12 | //#define GGUF_VERSION 3 13 | #define GGUF_ALIGNMENT 32 14 | 15 | static 16 | uint64_t gguf_align(uint64_t offset) 17 | { 18 | return offset + (GGUF_ALIGNMENT - (offset % GGUF_ALIGNMENT)) % GGUF_ALIGNMENT; 19 | } 20 | 21 | static const int g_gguf_to_any_type[] = { 22 | ANY_T_UINT8 , ANY_T_INT8 , 23 | ANY_T_UINT16, ANY_T_INT16, 24 | ANY_T_UINT32, ANY_T_INT32, 25 | ANY_T_FLOAT32, 26 | ANY_T_BOOL, 27 | ANY_T_STRING, 28 | ANY_T_ARRAY, 29 | ANY_T_UINT64, ANY_T_INT64, 30 | ANY_T_FLOAT64, 31 | }; 32 | 33 | static 34 | int gguf_meta_type_to_any(uint32_t gguf_type) 35 | { 36 | return gguf_type < COUNTOF(g_gguf_to_any_type) ? 37 | g_gguf_to_any_type[gguf_type] : 0; 38 | } 39 | 40 | static 41 | int gguf_read_string(Stream* stm, Allocator* al, Any* out, uint64_t limit) 42 | { 43 | uint64_t len; 44 | TRYR( stream_read_var(stm, len) ); 45 | TRYRB(TS_E_OVERFLOW, len <= limit); //sanity check 46 | char *p = alloc_alloc(al, len+1); 47 | TRYR( stream_read_chk(stm, len, p) ); 48 | p[len] = 0; 49 | *out = any_string(len, p); 50 | return 1; 51 | } 52 | 53 | static 54 | int gguf_read_key(Stream* stm, Allocator* al, StringStore* ss, DynStr* pname) 55 | { 56 | Any key={0}; 57 | TRYR( gguf_read_string(stm, al, &key, 256) ); 58 | TRYRB( TS_E_FORMAT, key.len > 0 ); 59 | dstr_copy(*pname, key.len, key.p.cp); 60 | return 1; 61 | } 62 | 63 | static 64 | int gguf_read_array(Stream* stm, Allocator* al, Any* out) 65 | { 66 | int R=1; 67 | 68 | uint32_t type; 69 | TRY( stream_read_var(stm, type) ); 70 | int atype = gguf_meta_type_to_any(type); 71 | if (!(atype > 0)) ERROR_LOG(TS_E_METADATA, "unknown metadata type %u", type); 72 | 73 | uint64_t len; 74 | TRY( stream_read_var(stm, len) ); 75 | TRYB(TS_E_OVERFLOW, len <= 0xffffff); //sanity check 76 | 77 | if (anyb_scalar_is(atype)) { 78 | size_t sz = anyb_size(atype) * len; 79 | void *p = alloc_alloc(al, sz); 80 | TRY( stream_read_chk(stm, sz, p) ); 81 | *out = any_vector(atype, len, p); 82 | } 83 | else if (atype == ANY_T_STRING) { 84 | size_t sz = sizeof(Any) * len; 85 | Any *p = alloc_alloc(al, sz); 86 | for (uint64_t i=0; i 0)) ERROR_LOG(TS_E_METADATA, "unknown metadata type %u", type); 107 | 108 | if (anyb_scalar_is(atype)) { 109 | *value = (Any){ atype }; 110 | TRY( stream_read_chk(stm, anyb_size(atype), &value->p) ); 111 | } 112 | else if (atype == ANY_T_STRING) { 113 | TRY( gguf_read_string(stm, al, value, 0xffffff) ); 114 | } 115 | else if (atype == ANY_T_ARRAY) { 116 | TRY( gguf_read_array(stm, al, value) ); 117 | } 118 | else 119 | return TS_E_METADATA; 120 | 121 | //log_debug("gguf meta '%s' %s", name, anyb_name(atype)); 122 | 123 | end: 124 | if (R<0) log_error("gguf load metadata '%s': %x", name, -R); 125 | return R; 126 | } 127 | 128 | static 129 | int gguf_read_tensor(Stream* stm, TSTensorEntry* entry, const char* name) 130 | { 131 | int R=1; 132 | 133 | uint32_t n_dim; 134 | TRY( stream_read_var(stm, n_dim) ); 135 | TRYB( TS_E_OVERFLOW, n_dim <= 4 ); //sanity check 136 | 137 | uint64_t dims[4]={1,1,1,1}; 138 | TRY( stream_read_chk(stm, sizeof(*dims)*n_dim, dims) ); 139 | TRYB( TS_E_OVERFLOW, dims[0] <= 0xffffff ); 140 | TRYB( TS_E_OVERFLOW, dims[1] <= 0xffffff ); 141 | TRYB( TS_E_OVERFLOW, dims[2] <= 0xffffff ); 142 | TRYB( TS_E_OVERFLOW, dims[3] <= 0xffffff ); 143 | 144 | uint32_t ggml_type; 145 | TRY( stream_read_var(stm, ggml_type) ); 146 | int dtype = tstore_dtype_from_ggml(ggml_type); 147 | if (!(dtype > 0)) ERROR_LOG(TS_E_DTYPE, "unknown tensor type %u", ggml_type); 148 | 149 | uint64_t offset; 150 | TRY( stream_read_var(stm, offset) ); 151 | 152 | // Store 153 | entry->dtype = dtype; 154 | entry->shape_n = n_dim; 155 | entry->shape[0] = dims[0]; 156 | entry->shape[1] = dims[1]; 157 | entry->shape[2] = dims[2]; 158 | entry->shape[3] = dims[3]; 159 | entry->offset = offset; // needs to be updated 160 | entry->stm = stm; 161 | entry->size = tstore_tensor_size(entry); 162 | 163 | //log_debug("gguf tensor '%s' %s " TSTENSOR_SHAPE4_FMT, 164 | // name, tstore_dtype_str(dtype), TSTENSOR_SHAPE4_UNPACK(*entry)); 165 | 166 | end: 167 | if (R<0) log_error("gguf load tensor '%s': %x", name, -R); 168 | return R; 169 | } 170 | 171 | int tstore_read_gguf(TensorStore* S, Stream* stm, TSCallback* cb) 172 | { 173 | int R=1, r; 174 | DynStr name=NULL; 175 | Allocator al = allocator_arena(&S->ss->al); 176 | 177 | // Header 178 | uint32_t magic; 179 | if (stream_read_var(stm, magic) < 0) 180 | ERROR_LOG(TS_E_READ, "could not read" ); 181 | if (memcmp(&magic, GGUF_MAGIC, 4)) 182 | ERROR_LOG(TS_E_FORMAT, "bad magic: %08xh", magic); 183 | 184 | uint32_t version; 185 | TRY( stream_read_var(stm, version) ); 186 | if (version != 2 && version != 3) 187 | ERROR_LOG(TS_E_FORMAT, "unsupported version: %u", version); 188 | 189 | uint64_t n_tensor, n_meta; 190 | TRY( stream_read_var(stm, n_tensor) ); 191 | TRY( stream_read_var(stm, n_meta) ); 192 | TRYB(TS_E_OVERFLOW, n_tensor <= 65535); //sanity check 193 | TRYB(TS_E_OVERFLOW, n_meta <= 65535); //sanity check 194 | 195 | log_debug("gguf n_meta:%u n_tensor:%u", 196 | (unsigned)n_meta, (unsigned)n_tensor); 197 | 198 | // Reserve memory 199 | vec_realloc(S->meta, vec_count(S->meta) + n_meta); 200 | vec_realloc(S->meta_idx, vec_count(S->meta_idx) + n_meta); 201 | vec_realloc(S->tensors, vec_count(S->tensors) + n_tensor); 202 | vec_realloc(S->tensors_idx, vec_count(S->tensors_idx) + n_tensor); 203 | 204 | // Metadata 205 | for (uint64_t i=0; iss, &name) ); 207 | Any value={0}; 208 | TRY( gguf_read_meta(stm, &al, &value, name) ); 209 | TRY( tstore_meta_add(S, name, &value) ); 210 | } 211 | 212 | // Tensors 213 | for (uint64_t i=0; iss, &name) ); 215 | TSTensorEntry e={0}; 216 | TRY( gguf_read_tensor(stm, &e, name) ); 217 | TRY( r = tstore_cb_call(cb, S, &e, &name) ); 218 | if (r > 0) { 219 | TRY( tstore_tensor_add(S, name, &e) ); 220 | } 221 | } 222 | 223 | uint64_t offset = stream_pos_get(stm); 224 | offset = gguf_align(offset); 225 | 226 | // Make tensors offsets absolute 227 | vec_for(S->tensors, i, 0) { 228 | if (S->tensors[i].stm != stm) continue; 229 | S->tensors[i].offset += offset; 230 | } 231 | 232 | end: 233 | if (R<0) log_error("gguf read: %x", -R); 234 | return R; 235 | } 236 | 237 | //TODO: write 238 | 239 | int tstore_detect_gguf(Stream* stm) 240 | { 241 | uint8_t *end, *cur = stream_read_buffer(stm, &end); 242 | if (!(end-cur >= 4)) return 0; 243 | return !memcmp(cur, GGUF_MAGIC, 4); 244 | } 245 | 246 | const TensorStoreFormat ts_cls_gguf = { 247 | "gguf", "gguf", 248 | tstore_detect_gguf, 249 | tstore_read_gguf, 250 | }; 251 | -------------------------------------------------------------------------------- /src/ccompute/tensorstore_gguf.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Load tensors from a GGUF file. 5 | */ 6 | #pragma once 7 | #include "tensorstore.h" 8 | 9 | extern const TensorStoreFormat ts_cls_gguf; 10 | 11 | int tstore_detect_gguf(Stream* stm); 12 | 13 | int tstore_read_gguf(TensorStore* ts, Stream* stm, TSCallback* cb); 14 | -------------------------------------------------------------------------------- /src/ccompute/tensorstore_safet.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Load/save tensor from a file with the SafeTensor format. 5 | */ 6 | #pragma once 7 | #include "tensorstore.h" 8 | 9 | extern const TensorStoreFormat ts_cls_safet; 10 | 11 | int tstore_detect_safet(Stream* stm); 12 | 13 | int tstore_read_safet(TensorStore* ts, Stream* stm, TSCallback*); 14 | 15 | int tstore_write_safet(TensorStore* ts, Stream* stm, TSCallback* cb); 16 | -------------------------------------------------------------------------------- /src/clip.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024-2025, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * CLIP text to embeddings encoder for conditioning in SD. 5 | */ 6 | #pragma once 7 | #include "mlblock.h" 8 | #include "localtensor.h" 9 | #include "ccommon/strslice.h" 10 | 11 | typedef struct { 12 | int n_vocab; 13 | int n_token; // max_position_embeddings 14 | int d_embed; 15 | int n_interm; 16 | int n_head; 17 | int n_layer; // num_hidden_layers 18 | uint32_t tok_start, tok_end, tok_pad; 19 | } ClipParams; 20 | 21 | extern const ClipParams g_clip_vit_l_14; //SD 1.x and SDXL 22 | extern const ClipParams g_clip_vit_h_14; //SD 2.x 23 | extern const ClipParams g_clip_vit_bigg_14; //SDXL 24 | 25 | /* Encode a text in to a list of tokens. 26 | * Return the number of tokens put into . 27 | * is a pointer to a vector of tokens where new tokens will be appended. 28 | */ 29 | int clip_tokenize(const ClipParams* P, StrSlice text, int32_t** ptokvec); 30 | 31 | /* Decode a token into an string (zero terminated). 32 | * Returns the number of bytes written, or negative in case of error. 33 | */ 34 | int clip_token_decode(const ClipParams* P, int32_t token, 35 | size_t bufsz, char* buf); 36 | 37 | /* Get the string corresponding to a token. 38 | * For debuging purposes, uses an internal buffer. 39 | * Returns "<|INVALID|>" if not found. 40 | */ 41 | const char* clip_token_str(const ClipParams* P, int32_t token); 42 | 43 | // In : vector of token ids [n_token] 44 | // Out: embeddings [d_embed, n_token] 45 | MLTensor* mlb_clip_text(MLCtx* C, MLTensor* tokens, MLTensor* cust_emb, 46 | const ClipParams* P, int clip_skip, bool norm); 47 | 48 | // In : embeddings [d_embed, n_token] 49 | // Out: features vector [d_embed] 50 | MLTensor* mlb_clip_text_proj(MLCtx* C, MLTensor* embed, int i_tok_end); 51 | 52 | int clip_text_encode(MLCtx* C, const ClipParams* P, unsigned n_tok, 53 | const int32_t *toks, LocalTensor* embed, LocalTensor* feat, 54 | int clip_skip, bool norm); 55 | -------------------------------------------------------------------------------- /src/demo_mlimgsynth.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Demostration of the capabilities of the MLImgSynth library. 5 | */ 6 | #include "mlimgsynth.h" 7 | #include 8 | #include 9 | #include 10 | 11 | #define error(...) do { \ 12 | printf("ERROR "); \ 13 | printf(__VA_ARGS__); \ 14 | printf("\n"); \ 15 | exit(1); \ 16 | } while (0) 17 | 18 | #define log(...) do { \ 19 | printf(__VA_ARGS__); \ 20 | printf("\n"); \ 21 | } while (0) 22 | 23 | void img_save(MLIS_Ctx* ctx, const char* name) 24 | { 25 | char buffer[128]; 26 | 27 | const MLIS_Image *img = mlis_image_get(ctx, 0); 28 | const char *info = mlis_infotext_get(ctx, 0); 29 | 30 | log("Saving..."); 31 | 32 | sprintf(buffer, "%s.ppm", name); 33 | FILE *f = fopen(buffer, "w"); 34 | fprintf(f, "P6 %u %u 255\n", img->w, img->h); 35 | fwrite(img->d, 1, img->sz, f); 36 | fclose(f); 37 | 38 | sprintf(buffer, "%s.txt", name); 39 | f = fopen(buffer, "w"); 40 | fwrite(info, 1, strlen(info), f); 41 | fclose(f); 42 | } 43 | 44 | void demo_txt2img(MLIS_Ctx* ctx) 45 | { 46 | log("txt2img"); 47 | mlis_option_set(ctx, MLIS_OPT_PROMPT, 48 | "a photograph of an astronaut riding a horse in a grassland"); 49 | 50 | mlis_generate(ctx); 51 | 52 | img_save(ctx, "demo_txt2img"); 53 | } 54 | 55 | void demo_img2img(MLIS_Ctx* ctx) 56 | { 57 | log("img2img"); 58 | mlis_option_set(ctx, MLIS_OPT_PROMPT, 59 | "a photograph of an astronaut riding a horse in a forest"); 60 | mlis_option_set(ctx, MLIS_OPT_F_T_INI, 0.70); // Strength 61 | 62 | // For this example we just use the previously generated image 63 | const MLIS_Image *img = mlis_image_get(ctx, 0); 64 | mlis_option_set(ctx, MLIS_OPT_IMAGE, img); 65 | 66 | mlis_generate(ctx); 67 | 68 | img_save(ctx, "demo_img2img"); 69 | } 70 | 71 | void demo_inpaint(MLIS_Ctx* ctx) 72 | { 73 | log("inpaint"); 74 | mlis_option_set(ctx, MLIS_OPT_PROMPT, "a pile of gold coins"); 75 | mlis_option_set(ctx, MLIS_OPT_NO_DECODE, 1); 76 | 77 | mlis_generate(ctx); 78 | 79 | mlis_option_set(ctx, MLIS_OPT_NO_DECODE, 0); 80 | 81 | // Creates a circular mask for latent space 82 | MLIS_Tensor *latent = mlis_tensor_get(ctx, MLIS_TENSOR_LATENT); 83 | MLIS_Tensor *lmask = mlis_tensor_get(ctx, MLIS_TENSOR_LMASK); 84 | mlis_tensor_resize_like(lmask, latent); 85 | int r0 = lmask->n[0] / 2; // Radius 86 | int r1 = lmask->n[1] / 2; 87 | mlis_tensor_for(*lmask, i) { 88 | lmask->d[ip] = ((i0-r0)*(i0-r0) + (i1-r1)*(i1-r1)) > r1*r1; 89 | } 90 | 91 | mlis_option_set(ctx, MLIS_OPT_PROMPT, "a red dragon on a pile of gold coins"); 92 | mlis_option_set(ctx, MLIS_OPT_F_T_INI, 0.70); 93 | mlis_option_set(ctx, MLIS_OPT_TENSOR_USE_FLAGS, 94 | MLIS_TUF_LATENT | MLIS_TUF_LMASK); 95 | 96 | mlis_generate(ctx); 97 | 98 | img_save(ctx, "demo_inpaint"); 99 | } 100 | 101 | void error_handler(void*, MLIS_Ctx* ctx, const MLIS_ErrorInfo* ei) 102 | { 103 | error("mlis error 0x%x: %s", -ei->code, ei->desc); 104 | } 105 | 106 | int progress_callback(void*, MLIS_Ctx* ctx, const MLIS_Progress* prg) 107 | { 108 | double etc = -1; 109 | if (1 < prg->step) etc = (prg->step_end - prg->step) * prg->step_time; 110 | log("%s %d/%d nfe=%d {%.3fs} ETC %.0fs", 111 | mlis_stage_str(prg->stage), prg->step, prg->step_end, prg->nfe, 112 | prg->step_time, etc); 113 | return 0; //continue 114 | } 115 | 116 | int main(int argc, char* argv[]) 117 | { 118 | if (argc != 2) 119 | error("Usage: %s [MODEL FILE PATH]", argv[0]); 120 | 121 | log("Initializing..."); 122 | MLIS_Ctx *ctx = mlis_ctx_create(); 123 | mlis_option_set(ctx, MLIS_OPT_ERROR_HANDLER, error_handler, NULL); 124 | mlis_option_set(ctx, MLIS_OPT_CALLBACK, progress_callback, NULL); 125 | mlis_option_set(ctx, MLIS_OPT_MODEL, argv[1]); 126 | 127 | // If you do not set the following options, default values will be used. 128 | mlis_option_set(ctx, MLIS_OPT_IMAGE_DIM, 768, 512); 129 | mlis_option_set(ctx, MLIS_OPT_SEED, 42); 130 | mlis_option_set(ctx, MLIS_OPT_METHOD, MLIS_METHOD_EULER); 131 | mlis_option_set(ctx, MLIS_OPT_SCHEDULER, MLIS_SCHED_UNIFORM); 132 | mlis_option_set(ctx, MLIS_OPT_STEPS, 20); 133 | // Be sure to use floating point numbers with options that require it. 134 | mlis_option_set(ctx, MLIS_OPT_CFG_SCALE, 7.0); 135 | mlis_option_set(ctx, MLIS_OPT_S_ANCESTRAL, 1.0); 136 | // You can also set options using strings. 137 | mlis_option_set_str(ctx, "image_dim", "768,512"); 138 | //mlis_option_set(ctx, MLIS_OPT_LORA, lora_path, 1.0); 139 | 140 | // Initialized the backend and load the model header 141 | // This is not required, but it can be useful to catch errors early. 142 | mlis_setup(ctx); 143 | 144 | demo_txt2img(ctx); 145 | demo_img2img(ctx); 146 | demo_inpaint(ctx); 147 | 148 | log("End"); 149 | mlis_ctx_destroy(&ctx); 150 | return 0; 151 | } 152 | -------------------------------------------------------------------------------- /src/ggml_extend.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #include "ggml_extend.h" 5 | #include "ccommon/ccommon.h" 6 | #include "ccommon/logging.h" 7 | #include "ccommon/vector.h" 8 | #include 9 | #include 10 | #include 11 | 12 | #include "ggml-backend.h" 13 | 14 | struct ggml_tensor* ggml_name_prefix(struct ggml_tensor* x, const char* pre) 15 | { 16 | if (x->name[0]) { 17 | unsigned lp=strlen(pre), ln=strlen(x->name); 18 | if (lp+1+ln+1 > sizeof(x->name)) 19 | FATAL_LOG("ggml tensor name too long"); 20 | memmove(x->name+lp+1, x->name, ln+1); 21 | memcpy(x->name, pre, lp); 22 | x->name[lp] = '.'; 23 | } else { 24 | strncpy(x->name, pre, sizeof(x->name)-1); 25 | x->name[sizeof(x->name)-1] = 0; 26 | } 27 | return x; 28 | } 29 | 30 | const char* ggml_tensor_typeshape_desc(const struct ggml_tensor* x) 31 | { 32 | static DynStr out=NULL; 33 | dstr_printf(out, "%s ", ggml_type_name(x->type)); 34 | 35 | for (unsigned i=0; ine[i]; ++i) { 36 | if (i) dstr_push(out, 'x'); 37 | dstr_printfa(out, "%"PRId64, x->ne[i]); 38 | } 39 | 40 | return out; 41 | } 42 | 43 | size_t ggml_ctx_tensors_total_size(const struct ggml_context* ctx) 44 | { 45 | size_t s=0; 46 | struct ggml_tensor *t = ggml_get_first_tensor(ctx); 47 | for (; t; t=ggml_get_next_tensor(ctx, t)) s += ggml_nbytes(t); 48 | return s; 49 | } 50 | 51 | void ggml_ctx_tensors_dump(const struct ggml_context* ctx, Stream* out) 52 | { 53 | struct ggml_tensor *t=ggml_get_first_tensor(ctx); 54 | for (; t; t=ggml_get_next_tensor(ctx, t)) { 55 | stream_printf(out, GGML_TENSOR_FMT "\n", GGML_TENSOR_ARGS(t)); 56 | } 57 | } 58 | 59 | static const char g_base64_chars[] = 60 | "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/"; 61 | 62 | #define ggml_tensor_stat_CODE(TYPE,CONV) do { \ 63 | const int64_t GGML_TENSOR_VARS_N(T,t), GGML_TENSOR_VARS_S(T,t); \ 64 | const TYPE *tp = T->data; \ 65 | stat.first = *tp; \ 66 | int64_t hsep = (t3n *t2n *t1n *t0n) / 8; \ 67 | for (int64_t i3=0, i=0; i3data) return stat; 81 | 82 | double hsum[8]={0}; 83 | if (T->type == GGML_TYPE_F32) 84 | ggml_tensor_stat_CODE(float,); 85 | else if (T->type == GGML_TYPE_F16) 86 | ggml_tensor_stat_CODE(ggml_fp16_t,ggml_fp16_to_fp32); 87 | else return stat; 88 | 89 | // hsum: partial sums of 8 segments 90 | double hmn=hsum[0], hmx=hmn; 91 | for (unsigned i=1; i<8; ++i) { 92 | MINSET(hmn, hsum[i]); 93 | MAXSET(hmx, hsum[i]); 94 | } 95 | // Convert each sum to a character to fast checking by a human 96 | double f = (hmx > hmn) ? (64 / (hmx - hmn)) : 0; 97 | f = nextafter(f, 0); 98 | for (unsigned i=0; i<8; ++i) { 99 | int idx = (hsum[i] - hmn) * f; 100 | assert( 0 <= idx && idx < 64 ); 101 | stat.hash[i] = g_base64_chars[idx]; 102 | } 103 | stat.hash[8] = 0; 104 | 105 | return stat; 106 | } 107 | 108 | #define ggml_tensor_export_CODE(TYPE,CONV) do { \ 109 | const TYPE *tp = T->data; \ 110 | for (int64_t i3=0; i3data) return; 120 | 121 | FILE *f = fopen(path, "w"); 122 | if (!f) return; 123 | 124 | const int64_t GGML_TENSOR_VARS_N(T,t), GGML_TENSOR_VARS_S(T,t); 125 | fprintf(f, "TENSOR ASCII %zd %zd %zd %zd\n", t0n,t1n,t2n,t3n); 126 | 127 | if (T->type == GGML_TYPE_F32) 128 | ggml_tensor_export_CODE(float,); 129 | else if (T->type == GGML_TYPE_F16) 130 | ggml_tensor_export_CODE(ggml_fp16_t,ggml_fp16_to_fp32); 131 | 132 | fclose(f); 133 | } 134 | 135 | void ggml_chunk_(struct ggml_context* ctx, 136 | struct ggml_tensor* x, int n_chunk, int n_dim, struct ggml_tensor*** out) 137 | { 138 | GGML_ASSERT( GGML_MAX_DIMS == 4 ); 139 | GGML_ASSERT( 0 <= n_dim && n_dim < GGML_MAX_DIMS ); 140 | GGML_ASSERT( n_dim == 0 ); //TODO 141 | int64_t ne[GGML_MAX_DIMS]; 142 | memcpy(ne, x->ne, sizeof(ne)); 143 | size_t nb[GGML_MAX_DIMS]; 144 | memcpy(nb, x->nb, sizeof(nb)); 145 | ne[n_dim] /= n_chunk; 146 | GGML_ASSERT( n_chunk * ne[n_dim] == x->ne[n_dim] ); 147 | 148 | size_t offset = ggml_type_size(x->type) * ne[n_dim]; 149 | 150 | for (int i=0; inb[1], x->nb[2], x->nb[3], offset*i); 153 | } 154 | } 155 | 156 | void ggml_tensor_debug_print(struct ggml_tensor* dst, 157 | const struct ggml_tensor* src, int ith, int nth, void* userdata) 158 | { 159 | if (ith != 0) return; 160 | GGML_ASSERT( dst->data == src->data ); 161 | 162 | const char *desc = userdata; 163 | ggml_tensor_stat_st stat = ggml_tensor_stat(src); 164 | char buffer[32]; 165 | sprintf(buffer, GGML_SHAPE_FMT, GGML_SHAPE_UNPACK(src) ); 166 | log_debug("%-12s: %s %-16s %.2e %s %+.2e", 167 | desc ? desc : src->name, ggml_type_name(src->type), 168 | buffer, stat.asum, stat.hash, stat.first); 169 | } 170 | 171 | struct ggml_tensor* 172 | ggml_debug_print(struct ggml_context* ctx, struct ggml_tensor* t, const char* desc, 173 | int loglvl) 174 | { 175 | if (!log_level_check(loglvl)) return t; 176 | if (!ggml_backend_buffer_is_host(t->buffer)) return t; 177 | return ggml_map_custom1_inplace(ctx, t, ggml_tensor_debug_print, 1, 178 | (void*)desc); 179 | } 180 | 181 | void ggml_tensor_debug_export(struct ggml_tensor* dst, 182 | const struct ggml_tensor* src, int ith, int nth, void* userdata) 183 | { 184 | if (ith != 0) return; 185 | GGML_ASSERT( dst->data == src->data ); 186 | 187 | const char *path = userdata; 188 | ggml_tensor_export(src, path); 189 | } 190 | 191 | struct ggml_tensor* 192 | ggml_debug_export(struct ggml_context* ctx, struct ggml_tensor* t, 193 | const char* fname) 194 | { 195 | if (!ggml_backend_buffer_is_host(t->buffer)) return t; 196 | return ggml_map_custom1_inplace(ctx, t, ggml_tensor_debug_export, 1, 197 | (void*)fname); 198 | } 199 | 200 | struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx, 201 | struct ggml_tensor* q, struct ggml_tensor* k, struct ggml_tensor* v, 202 | bool mask) 203 | { 204 | //#ifdef USE_FLASH_ATTENTION 205 | // assert(q->ne[0] == v->ne[0]); 206 | // return ggml_flash_attn_ext(ctx, q, k, v, NULL, 1.0f, 0.0f); 207 | // // [N * n_head, n_token, d_head] 208 | //#else 209 | float d_head = (float)q->ne[0]; 210 | struct ggml_tensor *kq; 211 | 212 | kq = ggml_mul_mat(ctx, k, q); // [N * n_head, n_token, n_k] 213 | kq = ggml_scale_inplace(ctx, kq, 1.0f / sqrt(d_head)); 214 | if (mask) 215 | kq = ggml_diag_mask_inf_inplace(ctx, kq, 0); 216 | 217 | kq = ggml_soft_max_inplace(ctx, kq); 218 | 219 | return ggml_mul_mat(ctx, v, kq); 220 | // [N * n_head, n_token, d_head] 221 | //#endif 222 | } 223 | -------------------------------------------------------------------------------- /src/ggml_extend.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #pragma once 5 | #include "ccommon/stream.h" 6 | #include "ggml.h" 7 | #include 8 | 9 | #define GGML_SHAPE_FMT "%"PRId64"x%"PRId64"x%"PRId64"x%"PRId64 10 | #define GGML_SHAPE_UNPACK(T) \ 11 | (T)->ne[0], (T)->ne[1], (T)->ne[2], (T)->ne[3] 12 | 13 | #define GGML_TYPESHAPE_FMT "%s " GGML_SHAPE_FMT 14 | #define GGML_TYPESHAPE_ARGS(T) \ 15 | ggml_type_name((T)->type), (T)->ne[0], (T)->ne[1], (T)->ne[2], (T)->ne[3] 16 | 17 | #define GGML_TENSOR_FMT "%s: %s %s " GGML_SHAPE_FMT 18 | #define GGML_TENSOR_ARGS(T) \ 19 | ggml_get_name(T), ggml_op_desc(T), ggml_type_name((T)->type), \ 20 | (T)->ne[0], (T)->ne[1], (T)->ne[2], (T)->ne[3] 21 | 22 | #define GGML_TENSOR_VARS_N(X,L) \ 23 | L##0n=(X)->ne[0], L##1n=(X)->ne[1], L##2n=(X)->ne[2], L##3n=(X)->ne[3] 24 | 25 | #define GGML_TENSOR_VARS_B(X,L) \ 26 | L##0b=(X)->nb[0], L##1b=(X)->nb[1], L##2b=(X)->nb[2], L##3b=(X)->nb[3] 27 | 28 | #define GGML_TENSOR_VARS_S(X,L) \ 29 | L##eb=ggml_element_size(X),\ 30 | L##0s=(X)->nb[0]/L##eb, \ 31 | L##1s=(X)->nb[1]/L##eb, \ 32 | L##2s=(X)->nb[2]/L##eb, \ 33 | L##3s=(X)->nb[3]/L##eb 34 | 35 | struct ggml_tensor* ggml_name_prefix(struct ggml_tensor* x, const char* pre); 36 | 37 | const char* ggml_tensor_typeshape_desc(const struct ggml_tensor* x); 38 | 39 | size_t ggml_ctx_tensors_total_size(const struct ggml_context* ctx); 40 | 41 | void ggml_ctx_tensors_dump(const struct ggml_context* ctx, Stream* out); 42 | 43 | void ggml_tensor_graph_dump(const struct ggml_tensor* result, Stream* out); 44 | 45 | void ggml_tensor_export(const struct ggml_tensor* T, const char* path); 46 | 47 | typedef struct { 48 | double asum, first; 49 | char hash[9]; 50 | char valid; 51 | } ggml_tensor_stat_st; 52 | 53 | ggml_tensor_stat_st ggml_tensor_stat(const struct ggml_tensor* T); 54 | 55 | // Operations 56 | 57 | void ggml_chunk_(struct ggml_context* ctx, 58 | struct ggml_tensor* x, int n_chunk, int n_dim, struct ggml_tensor*** out); 59 | #define ggml_chunk(C, X, N, D, ...) \ 60 | ggml_chunk_((C), (X), (N), (D), (struct ggml_tensor**[]){__VA_ARGS__}); 61 | 62 | // Debug operations 63 | // Only works on CPU 64 | 65 | struct ggml_tensor* 66 | ggml_debug_print(struct ggml_context* ctx, struct ggml_tensor* t, 67 | const char* desc, int loglvl); 68 | 69 | #define ggml_debug4_print(...) \ 70 | ggml_debug_print(__VA_ARGS__, LOG_LVL_DEBUG4) 71 | 72 | struct ggml_tensor* 73 | ggml_debug_export(struct ggml_context* ctx, struct ggml_tensor* t, 74 | const char* fname); 75 | 76 | // Neural networks operations 77 | 78 | struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx, 79 | struct ggml_tensor* q, struct ggml_tensor* k, struct ggml_tensor* v, 80 | bool mask); 81 | -------------------------------------------------------------------------------- /src/localtensor.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Simple storage for tensors. 5 | */ 6 | #pragma once 7 | #include "ccommon/alloc.h" 8 | #include "ccommon/logging.h" 9 | #include "ggml.h" 10 | #include "ggml-backend.h" 11 | 12 | #ifdef LOCALTENSOR_USE_IMAGE 13 | #include "ccommon/image.h" 14 | #endif 15 | 16 | typedef struct LocalTensor { 17 | float *d; //data 18 | int n[4]; //shape 19 | int flags; 20 | } LocalTensor; 21 | 22 | enum { 23 | // Memory owned by the tensor 24 | LT_F_OWNMEM = 1, 25 | // User-specified ready state 26 | LT_F_READY = 2, 27 | }; 28 | 29 | #define LT_SHAPE_FMT "%dx%dx%dx%d" 30 | #define LT_SHAPE_UNPACK(X) (X).n[0], (X).n[1], (X).n[2], (X).n[3] 31 | 32 | static inline 33 | size_t ltensor_good(const LocalTensor* S) { return S && S->d; } 34 | 35 | static inline 36 | size_t ltensor_nelements(const LocalTensor* S) { 37 | return (size_t)S->n[0] * S->n[1] * S->n[2] * S->n[3]; 38 | } 39 | 40 | static inline 41 | size_t ltensor_nbytes(const LocalTensor* S) { 42 | return sizeof(*S->d) * ltensor_nelements(S); 43 | } 44 | 45 | static inline 46 | void ltensor_free(LocalTensor* S) { 47 | if (S->flags & LT_F_OWNMEM) 48 | alloc_free(g_allocator, S->d); 49 | *S = (LocalTensor){0}; 50 | } 51 | 52 | static inline 53 | void ltensor_resize(LocalTensor* S, int n0, int n1, int n2, int n3) { 54 | if (!(S->flags & LT_F_OWNMEM)) S->d = NULL; 55 | S->n[0] = n0; S->n[1] = n1; S->n[2] = n2; S->n[3] = n3; 56 | S->d = alloc_realloc(g_allocator, S->d, ltensor_nbytes(S)); 57 | S->flags |= LT_F_OWNMEM; 58 | } 59 | 60 | static inline 61 | void ltensor_resize_like(LocalTensor* S, const LocalTensor* T) { 62 | ltensor_resize(S, LT_SHAPE_UNPACK(*T)); 63 | } 64 | 65 | static inline 66 | void ltensor_copy(LocalTensor* dst, const LocalTensor* src) { 67 | ltensor_resize_like(dst, src); 68 | memcpy(dst->d, src->d, ltensor_nbytes(dst)); 69 | } 70 | 71 | /* Copy an slice of src into an slice of dst. 72 | * n#: slice size en elements (#: 0-3 dimension) 73 | * Li#: slice start (L: d=dst or s=src) 74 | * Ls#: slice step (L: d=dst or s=src) 75 | */ 76 | void ltensor_copy_slice(LocalTensor* dst, const LocalTensor* src, 77 | int n0 , int n1 , int n2 , int n3 , 78 | int di0, int di1, int di2, int di3, 79 | int si0, int si1, int si2, int si3, 80 | int ds0, int ds1, int ds2, int ds3, 81 | int ss0, int ss1, int ss2, int ss3 ); 82 | 83 | static inline 84 | void ltensor_copy_slice2(LocalTensor* dst, const LocalTensor* src, 85 | int n0 , int n1 , 86 | int di0, int di1, 87 | int si0, int si1, 88 | int ds0, int ds1, 89 | int ss0, int ss1 ) 90 | { 91 | ltensor_copy_slice(dst, src, n0,n1,src->n[2],src->n[3], 92 | di0,di1,0,0, si0,si1,0,0, ds0,ds1,1,1, ss0,ss1,1,1); 93 | } 94 | 95 | static inline 96 | void ltensor_to_backend(const LocalTensor* S, struct ggml_tensor* out) { 97 | assert(ltensor_nbytes(S) == ggml_nbytes(out)); 98 | ggml_backend_tensor_set(out, S->d, 0, ltensor_nbytes(S)); 99 | } 100 | 101 | static inline 102 | void ltensor_from_backend(LocalTensor* S, struct ggml_tensor* out) { 103 | ltensor_resize(S, out->ne[0], out->ne[1], out->ne[2], out->ne[3]); 104 | assert(ltensor_nbytes(S) == ggml_nbytes(out)); 105 | ggml_backend_tensor_get(out, S->d, 0, ltensor_nbytes(S)); 106 | } 107 | 108 | static inline 109 | bool ltensor_shape_equal(const LocalTensor* A, const LocalTensor* B) { 110 | return (A->n[0] == B->n[0] && A->n[1] == B->n[1] && A->n[2] == B->n[2] && 111 | A->n[3] == B->n[3]); 112 | } 113 | 114 | static inline 115 | int ltensor_shape_check(const LocalTensor* S, int n0, int n1, int n2, int n3) { 116 | if (n0>0 && n0 != S->n[0]) return -1; 117 | if (n1>0 && n1 != S->n[1]) return -1; 118 | if (n2>0 && n2 != S->n[2]) return -1; 119 | if (n3>0 && n3 != S->n[3]) return -1; 120 | return 1; 121 | } 122 | 123 | static inline 124 | int ltensor_shape_check_log(const LocalTensor* S, const char* desc, 125 | int n0, int n1, int n2, int n3) 126 | { 127 | int r = ltensor_shape_check(S, n0, n1, n2, n3); 128 | if (r < 0) log_error("%s wrong shape: " LT_SHAPE_FMT, 129 | desc, LT_SHAPE_UNPACK(*S)); 130 | return r; 131 | } 132 | 133 | int ltensor_finite_check(const LocalTensor* S); 134 | 135 | float ltensor_minmax(const LocalTensor* S, float* min); 136 | float ltensor_sum(const LocalTensor* S); 137 | float ltensor_mean(const LocalTensor* S); 138 | 139 | typedef struct { 140 | float asum, first, min, max; 141 | char hash[9]; 142 | char valid; 143 | } LocalTensorStats; 144 | 145 | LocalTensorStats ltensor_stat(const LocalTensor* S); 146 | 147 | void log_ltensor_stats(int loglvl, const LocalTensor* S, const char* desc); 148 | 149 | #define log_debug2_ltensor(T, D) \ 150 | log_ltensor_stats(LOG_LVL_DEBUG2, (T), (D)) 151 | 152 | #define log_debug3_ltensor(T, D) \ 153 | log_ltensor_stats(LOG_LVL_DEBUG3, (T), (D)) 154 | 155 | // Reduces the sizes by the factors. 156 | // Can be done inplace (dst = src). 157 | void ltensor_downsize(LocalTensor* dst, const LocalTensor* src, 158 | int f0, int f1, int f2, int f3); 159 | 160 | int ltensor_save_stream(const LocalTensor* S, Stream *stm); 161 | int ltensor_save_path(const LocalTensor* S, const char* path); 162 | int ltensor_load_stream(LocalTensor* S, Stream *stm); 163 | int ltensor_load_path(LocalTensor* S, const char* path); 164 | 165 | #ifdef LOCALTENSOR_USE_IMAGE 166 | void ltensor_from_image(LocalTensor* S, const Image* img); 167 | void ltensor_to_image(const LocalTensor* S, Image* img); 168 | 169 | // Load separately the last channel (usually the transparancy) 170 | void ltensor_from_image_alpha(LocalTensor* S, LocalTensor* alpha, const Image* img); 171 | 172 | int ltensor_img_redblue(const LocalTensor* S, Image* img); 173 | int ltensor_img_redblue_path(const LocalTensor* S, const char* path); 174 | #endif 175 | 176 | #define ltensor_for(T,V,I) \ 177 | for (unsigned V=(I), V##e_=ltensor_nelements(&(T)); V 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #include "lora.h" 5 | #include "ccommon/logging.h" 6 | #include "ggml.h" 7 | #include 8 | 9 | int lora_apply_inner(TSTensorEntry* dst, TSTensorEntry* ld, TSTensorEntry* lu, 10 | TSTensorEntry *ls, TSTensorEntry *la, float mult, MLCtx* C) 11 | { 12 | int R=1; 13 | TSTensorData td_ld={0}, td_lu={0}, td_dst={0}; 14 | 15 | unsigned n_inner = ld->shape[ld->shape_n-1], 16 | n0 = tstore_tensor_count(ld) / n_inner, 17 | n1 = tstore_tensor_count(lu) / n_inner; 18 | 19 | if (!(dst->shape_n >= 2 && 20 | ld->shape_n == dst->shape_n && 21 | lu->shape_n == dst->shape_n && 22 | tstore_tensor_count(dst) == n0 * n1)) 23 | { 24 | ERROR_LOG(-1, "lora up/down invalid shapes"); 25 | } 26 | 27 | // Must init ggml before any tensor conversion 28 | mlctx_begin(C, "lora"); 29 | C->c.flags_e |= MLB_F_QUIET; 30 | 31 | // Scale get 32 | float scale=1; 33 | if (ls) { 34 | TRY( tstore_tensor_data_get(ls, TS_DTYPE_F32, 0, &td_ld) ); 35 | scale = *(float*)td_ld.data; 36 | } 37 | else if (la) { 38 | TRY( tstore_tensor_data_get(la, TS_DTYPE_F32, 0, &td_ld) ); 39 | scale = *(float*)td_ld.data / n_inner; 40 | } 41 | scale *= mult; 42 | assert( scale > 0 ); 43 | 44 | // Get data 45 | int wtype = C->c.wtype; 46 | int tsdt = tstore_dtype_from_ggml(wtype); 47 | assert( tsdt > 0 ); 48 | 49 | TRY( tstore_tensor_data_get(ld , tsdt, 0, &td_ld ) ); 50 | TRY( tstore_tensor_data_get(lu , tsdt, 0, &td_lu ) ); 51 | TRY( tstore_tensor_data_get(dst, tsdt, TSTDG_F_PERM | TSTDG_F_WRITE, &td_dst) ); 52 | 53 | // Make graph 54 | MLTensor *t_ld, *t_lu, *t_dst, *t_out; 55 | t_ld = mlctx_input_new(C, "ld" , wtype, n0, n_inner, 1, 1); 56 | t_lu = mlctx_input_new(C, "lu" , wtype, n_inner, n1, 1, 1); 57 | t_dst = mlctx_input_new(C, "dst", wtype, n0, n1, 1, 1); 58 | 59 | t_out = ggml_cont(C->cc, ggml_transpose(C->cc, t_ld)); 60 | t_out = ggml_mul_mat(C->cc, t_lu, t_out); 61 | t_out = ggml_cont(C->cc, ggml_transpose(C->cc, t_out)); 62 | t_out = ggml_scale_inplace(C->cc, t_out, scale); 63 | t_out = ggml_add_inplace(C->cc, t_dst, t_out); 64 | 65 | mlctx_tensor_add(C, "output", t_out); 66 | TRY( mlctx_prep(C) ); 67 | 68 | // Set inputs 69 | ggml_backend_tensor_set(t_ld , td_ld .data, 0, td_ld .size); 70 | ggml_backend_tensor_set(t_lu , td_lu .data, 0, td_lu .size); 71 | ggml_backend_tensor_set(t_dst, td_dst.data, 0, td_dst.size); 72 | 73 | // Compute 74 | TRY( mlctx_compute(C) ); 75 | 76 | // Store output 77 | assert( ggml_nbytes(t_out) == td_dst.size ); 78 | ggml_backend_tensor_get(t_out, td_dst.data, 0, td_dst.size); 79 | 80 | // Check 81 | float v=0; 82 | if (wtype == GGML_TYPE_F16) 83 | v = ggml_fp16_to_fp32(*(ggml_fp16_t*)td_dst.data); 84 | else if (wtype == GGML_TYPE_F32) 85 | v = *(float*)td_dst.data; 86 | if (!isfinite(v)) 87 | ERROR_LOG(-1, "NaN in LoRA result"); 88 | 89 | end: 90 | mlctx_end(C); 91 | tstore_tdata_free(&td_dst); 92 | tstore_tdata_free(&td_lu); 93 | tstore_tdata_free(&td_ld); 94 | return R; 95 | } 96 | 97 | int lora_apply(TensorStore* ts_dst, TensorStore* ts_lora, float mult, 98 | MLCtx* ctx) 99 | { 100 | int R=1; 101 | StrSlice name={0}; 102 | TSTensorData td={0}; 103 | DynStr tmps=NULL; 104 | 105 | vec_forp(TSTensorEntry, ts_lora->tensors, ld, 0) { 106 | name = strsto_get(ts_lora->ss, ld->key); 107 | if (!( strsl_suffix_trim(&name, strsl_static(".lora_down.weight")) )) 108 | continue; 109 | 110 | dstr_copy(tmps, name.s, name.b); 111 | dstr_appendz(tmps, ".weight"); 112 | TSTensorEntry *dst = tstore_tensor_get(ts_dst, tmps); 113 | if (!dst) ERROR_LOG(-1, "lora tensor not found in model: %s", tmps); 114 | 115 | dstr_copy(tmps, name.s, name.b); 116 | dstr_appendz(tmps, ".lora_up.weight"); 117 | TSTensorEntry *lu = tstore_tensor_get(ts_lora, tmps); 118 | if (!lu) ERROR_LOG(-1, "lora up tensor not found: %s", tmps); 119 | 120 | dstr_copy(tmps, name.s, name.b); 121 | dstr_appendz(tmps, ".scale"); 122 | TSTensorEntry *ls = tstore_tensor_get(ts_lora, tmps); 123 | 124 | dstr_copy(tmps, name.s, name.b); 125 | dstr_appendz(tmps, ".alpha"); 126 | TSTensorEntry *la = tstore_tensor_get(ts_lora, tmps); 127 | 128 | // Apply 129 | log_debug("lora apply %.*s", (int)name.s, name.b); 130 | TRY( lora_apply_inner(dst, ld, lu, ls, la, mult, ctx) ); 131 | } 132 | 133 | end: 134 | if (R<0) log_error("lora tensor '%.*s': %x", (int)name.s, name.b, -R); 135 | tstore_tdata_free(&td); 136 | dstr_free(tmps); 137 | return R; 138 | } 139 | -------------------------------------------------------------------------------- /src/lora.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * LoRA: low rank adaptation. 5 | * Ref.: Hu et al. (2021) "LoRA..." 6 | */ 7 | #pragma once 8 | #include "ccompute/tensorstore.h" 9 | #include "mlblock.h" 10 | 11 | int lora_apply(TensorStore* ts_dst, TensorStore* ts_lora, float mult, 12 | MLCtx* ctx); 13 | -------------------------------------------------------------------------------- /src/mlblock.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024-2025, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Machine learning blocks of operations. 5 | */ 6 | #pragma once 7 | #include "ccommon/vector.h" 8 | #include "ccommon/stream.h" 9 | #include "ccommon/logging.h" 10 | #include "ccommon/stringstore.h" 11 | #include "ccompute/tensorstore.h" 12 | #include "localtensor.h" 13 | 14 | #include "ggml.h" 15 | #include "ggml-alloc.h" 16 | #include "ggml-backend.h" 17 | #include "ggml_extend.h" 18 | 19 | //TODO: load: if CPU backend, do not copy tensor data 20 | //TODO: option: free compute, keep params in memory 21 | 22 | typedef struct ggml_tensor MLTensor; 23 | 24 | enum { 25 | MLB_NAME_BLOCK_BEGIN = -0x1000, 26 | MLB_NAME_SPLIT = -0x1001, 27 | }; 28 | 29 | enum MLCtxFlags { 30 | // Prepare the computation to allow multiple calls to mlctx_compute 31 | MLB_F_MULTI_COMPUTE = 1, 32 | // Do produce any information output 33 | MLB_F_QUIET = 2, 34 | //(debug) Dump the computation graph to a file 35 | MLB_F_DUMP = 4, 36 | }; 37 | 38 | typedef struct { 39 | MLTensor *tensor; 40 | StringInt name, 41 | key; //Full name to load from the tensor store 42 | } MLCtxTensor; 43 | 44 | typedef struct { 45 | ggml_backend_t backend; //Fill 46 | TensorStore *tstore; //Fill 47 | StringStore *ss; //Tensor names are stored here 48 | 49 | struct ggml_context *cp, *cc; //params, compute 50 | struct ggml_cgraph *graph; 51 | ggml_gallocr_t allocr; 52 | 53 | #if USE_GGML_SCHED 54 | ggml_backend_t backend2; //Fill 55 | ggml_backend_sched_t sched; 56 | ggml_backend_buffer_t bkbuf; 57 | #endif 58 | 59 | MLCtxTensor * tensors; //vector 60 | MLTensor ** inputs; //vector 61 | MLTensor * result; 62 | 63 | // Configuration 64 | struct { 65 | enum ggml_type wtype; //weights type (default F16) 66 | unsigned n_tensor_max; 67 | char tpath_sep; //default: '.' 68 | const char *tprefix; //Tensor names prefix 69 | const char *name; //Computation name, set by mlctx_begin 70 | int flags; //MLB_F_* 71 | int flags_e; //Flags valid until the next mlctx_begin 72 | } c; 73 | 74 | // Information/statistics 75 | struct MLCtxInfo { 76 | size_t mem_params, mem_compute, mem_total; 77 | double t_load, t_compute; 78 | unsigned n_compute, n_conv; 79 | } info; 80 | } MLCtx; 81 | 82 | void mlctx_free(MLCtx* C); 83 | 84 | void mlctx_begin(MLCtx* C, const char* name); 85 | 86 | void mlctx_end(MLCtx* C); 87 | 88 | // All in one 89 | int mlctx_run_(MLCtx* C, LocalTensor* out, const LocalTensor** inputs); 90 | #define mlctx_run(C,O,...) \ 91 | mlctx_run_((C), (O), (const LocalTensor*[]){ __VA_ARGS__, NULL }) 92 | 93 | // Build, alloc and load 94 | // Pending: set input, compute, get output, free 95 | int mlctx_prep(MLCtx* C); 96 | 97 | /* Step by step interface */ 98 | 99 | // No need to call build 100 | void mlctx_block_graph_dump(const MLCtx* C, Stream* out); 101 | int mlctx_block_graph_dump_path(const MLCtx* C, const char* path); 102 | 103 | int mlctx_build_alloc(MLCtx* C, MLTensor* result); 104 | 105 | int mlctx_tstore_load(MLCtx* C, TensorStore* ts); 106 | 107 | int mlctx_compute(MLCtx* C); 108 | 109 | /* aux */ 110 | 111 | int tstore_tensor_read(TSTensorEntry*, struct ggml_tensor*); 112 | 113 | /* Functions to define blocks */ 114 | 115 | static inline 116 | void mlctx_block_begin(MLCtx* C) 117 | { 118 | vec_push(C->tensors, ((MLCtxTensor){ NULL, MLB_NAME_BLOCK_BEGIN })); 119 | log_debug2("ML block begin"); 120 | } 121 | 122 | static inline 123 | MLTensor* mlctx_tensor_add(MLCtx* C, const char* name, MLTensor* tensor) 124 | { 125 | ggml_name_prefix(tensor, name); 126 | bool param = (tensor->op == GGML_OP_NONE); 127 | int id = strsto_add(C->ss, strsl_fromz(name)); 128 | vec_push(C->tensors, ((MLCtxTensor){ tensor, id })); 129 | log_debug2("ML %s: %s " GGML_TYPESHAPE_FMT, param ? "param" : "op", 130 | name, GGML_TYPESHAPE_ARGS(tensor)); 131 | return tensor; 132 | } 133 | 134 | static inline 135 | MLTensor* mlctx_split_add(MLCtx* C, MLTensor* tensor) 136 | { 137 | vec_push(C->tensors, ((MLCtxTensor){ tensor, MLB_NAME_SPLIT })); 138 | log_debug2("ML graph split"); 139 | return tensor; 140 | } 141 | 142 | static inline 143 | MLTensor* mlctx_input_new(MLCtx* C, const char* name, enum ggml_type dtype, 144 | int n0, int n1, int n2, int n3) 145 | { 146 | MLTensor *T = ggml_new_tensor_4d(C->cp, dtype, n0,n1,n2,n3); 147 | ggml_set_name(T, name); 148 | ggml_set_input(T); 149 | vec_push(C->inputs, T); 150 | return T; 151 | } 152 | 153 | static inline 154 | MLTensor* mlctx_param_new(MLCtx* C, const char* name, enum ggml_type dtype, 155 | int n0, int n1, int n2, int n3) 156 | { 157 | MLTensor *T = ggml_new_tensor_4d(C->cp, dtype, n0,n1,n2,n3); 158 | ggml_set_input(T); 159 | return mlctx_tensor_add(C, name, T); 160 | } 161 | -------------------------------------------------------------------------------- /src/mlblock_nn.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Blocks commonly used in neural networks. 5 | */ 6 | #pragma once 7 | #include "mlblock.h" 8 | 9 | MLTensor* mlb_nn_linear(MLCtx* C, MLTensor* x, int n_out, bool bias); 10 | 11 | MLTensor* mlb_nn_conv2d(MLCtx* C, MLTensor* x, 12 | int ch_out, 13 | int k0, int k1, int s0, int s1, int p0, int p1, int d0, int d1, 14 | bool bias); 15 | 16 | MLTensor* mlb_nn_layer_norm(MLCtx* C, MLTensor* x, 17 | bool affine, bool bias, float eps); 18 | 19 | MLTensor* mlb_nn_groupnorm(MLCtx* C, MLTensor* x, 20 | int n_grp, bool affine, float eps); 21 | 22 | static inline 23 | MLTensor* mlb_nn_groupnorm32(MLCtx* C, MLTensor* x) { 24 | return mlb_nn_groupnorm(C, x, 32, true, 1e-6); 25 | } 26 | 27 | MLTensor* mlb_downsample(MLCtx* C, MLTensor* x, int ch_out, bool vae); 28 | 29 | MLTensor* mlb_upsample(MLCtx* C, MLTensor* x, int ch_out); 30 | 31 | MLTensor* mlb_resnet(MLCtx* C, MLTensor* x, MLTensor* emb, int ch_out); 32 | 33 | MLTensor* mlb_GEGLU(MLCtx* C, MLTensor* x, int d_out); 34 | 35 | MLTensor* mlb_feed_forward(MLCtx* C, MLTensor* x, int d_out, int mult); 36 | 37 | MLTensor* mlb_attn_mhead(MLCtx* C, MLTensor* q, MLTensor* k, MLTensor* v, 38 | int d_out, int d_embed, int n_head, bool mask, bool bias, bool bias_out); 39 | 40 | MLTensor* mlb_basic_transf(MLCtx* C, MLTensor* x, MLTensor* c, 41 | int d_out, int d_embed, int n_head); 42 | -------------------------------------------------------------------------------- /src/mlimgsynth_options_get.c.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * mlimgsynth library option_get implementation. 5 | */ 6 | OPTION( MODEL ) { 7 | ARG_STR( S->c.path_model ); 8 | } 9 | OPTION( MODEL_TYPE ) { 10 | ARG_ENUM( S->c.model_type, mlis_model_type_froms ); 11 | } 12 | OPTION( PROMPT ) { 13 | ARG_STR( S->c.prompt_raw ); 14 | } 15 | OPTION( NPROMPT ) { 16 | ARG_STR( S->c.nprompt_raw ); 17 | } 18 | //TODO: complete 19 | -------------------------------------------------------------------------------- /src/mlimgsynth_options_set.c.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * mlimgsynth library option_set implementation. 5 | */ 6 | OPTION( BACKEND ) { 7 | ARG_STR(name , 0, 65535) 8 | ARG_STR(params, 0, 65535) 9 | dstr_copy(S->c.backend, name.s, name.b); 10 | dstr_copy(S->c.be_params, params.s, params.b); 11 | S->rflags &= ~MLIS_READY_BACKEND; 12 | } 13 | OPTION( MODEL ) { 14 | ARG_STR_NO_PARSE(path, 1, 65535) 15 | dstr_copy(S->c.path_model, path.s, path.b); 16 | S->rflags &= ~MLIS_READY_MODEL; 17 | } 18 | OPTION( TAE ) { 19 | ARG_STR_NO_PARSE(path, 0, 65535) 20 | dstr_copy(S->c.path_tae, path.s, path.b); 21 | bool en = !dstr_empty(S->c.path_tae); 22 | ccFLAG_SET(S->c.flags, MLIS_CF_USE_TAE, en); 23 | } 24 | OPTION( MODEL_TYPE ) { 25 | ARG_ENUM(id, mlis_model_type_froms) 26 | TRY( mlis_model_type_set(S, id) ); 27 | } 28 | OPTION( AUX_DIR ) { 29 | ARG_STR_NO_PARSE(path, 0, 65535) 30 | dstr_copy(S->c.path_aux, path.s, path.b); 31 | } 32 | OPTION( LORA_DIR ) { 33 | ARG_STR_NO_PARSE(path, 0, 65535) 34 | dstr_copy(S->c.path_lora_dir, path.s, path.b); 35 | } 36 | OPTION( LORA ) { 37 | ARG_STR(path, 1, 65535) 38 | ARG_FLOAT(mult, 0, 1, 1); 39 | TRY( mlis_cfg_lora_add(S, path, mult, 0) ); 40 | } 41 | OPTION( LORA_CLEAR ) { 42 | mlis_cfg_loras_free(S); 43 | } 44 | OPTION( PROMPT ) { 45 | ARG_STR_NO_PARSE(text, 0, 65535) 46 | dstr_copy(S->c.prompt_raw, text.s, text.b); 47 | if (S->c.flags & MLIS_CF_NO_PROMPT_PARSE) 48 | prompt_text_set_raw(&S->c.prompt, text); 49 | else { 50 | TRY( prompt_text_set_parse(&S->c.prompt, text) ); 51 | vec_forp(struct PromptTextLora, S->c.prompt.loras, p, 0) 52 | TRY( mlis_cfg_lora_add(S, p->name, p->w, MLIS_LF_PROMPT) ); 53 | } 54 | } 55 | OPTION( NPROMPT ) { 56 | ARG_STR_NO_PARSE(text, 0, 65535) 57 | dstr_copy(S->c.nprompt_raw, text.s, text.b); 58 | if (S->c.flags & MLIS_CF_NO_PROMPT_PARSE) 59 | prompt_text_set_raw(&S->c.nprompt, text); 60 | else { 61 | TRY( prompt_text_set_parse(&S->c.nprompt, text) ); 62 | vec_forp(struct PromptTextLora, S->c.nprompt.loras, p, 0) 63 | TRY( mlis_cfg_lora_add(S, p->name, p->w, MLIS_LF_PROMPT) ); 64 | } 65 | } 66 | OPTION( NO_PROMPT_PARSE ) { 67 | ARG_BOOL(en) 68 | ccFLAG_SET(S->c.flags, MLIS_CF_NO_PROMPT_PARSE, en); 69 | } 70 | OPTION( IMAGE_DIM ) { 71 | ARG_INT(w, 0, 65535, 0) 72 | ARG_INT(h, 0, 65535, 0) 73 | S->c.width = w; 74 | S->c.height = h; 75 | } 76 | OPTION( BATCH_SIZE ) { 77 | ARG_INT(i, 0, 1024, 0) 78 | S->c.n_batch = i; 79 | } 80 | OPTION( CLIP_SKIP ) { 81 | ARG_INT(i, 0, 255, 0) 82 | S->c.clip_skip = i; 83 | } 84 | OPTION( CFG_SCALE ) { 85 | ARG_FLOAT(f, 0, 255, NAN) 86 | S->c.cfg_scale = f; 87 | } 88 | OPTION( METHOD ) { 89 | #ifdef ARG_IS_STR 90 | StrSlice ss = strsl_fromz(value); 91 | if (strsl_suffixz_trim(&ss, "_a")) { // Shortcut for ancestral methods 92 | int id = mlis_method_froms(ss); 93 | if (id < 0) 94 | ERROR_LOG(MLIS_E_OPT_VALUE, "invalid method name '%s'", value); 95 | S->sampler.c.method = id; 96 | S->sampler.c.s_ancestral = 1; 97 | goto done; 98 | } 99 | #endif 100 | ARG_ENUM(id, mlis_method_froms) 101 | S->sampler.c.method = id; 102 | } 103 | OPTION( SCHEDULER ) { 104 | ARG_ENUM(id, mlis_sched_froms) 105 | S->sampler.c.sched = id; 106 | } 107 | OPTION( STEPS ) { 108 | ARG_INT(i, 0, 1000, 0) 109 | S->sampler.c.n_step = i; 110 | } 111 | OPTION( F_T_INI ) { 112 | ARG_FLOAT(f, 0, 1, NAN) 113 | S->sampler.c.f_t_ini = f; 114 | } 115 | OPTION( F_T_END ) { 116 | ARG_FLOAT(f, 0, 1, NAN) 117 | S->sampler.c.f_t_end = f; 118 | } 119 | OPTION( S_NOISE ) { 120 | ARG_FLOAT(f, 0, 255, NAN) 121 | S->sampler.c.s_noise = f; 122 | } 123 | OPTION( S_ANCESTRAL ) { 124 | ARG_FLOAT(f, 0, 255, NAN) 125 | S->sampler.c.s_ancestral = f; 126 | } 127 | OPTION( IMAGE ) { 128 | ARG_C(img, const MLIS_Image*) 129 | if (img->c != 3 && img->c != 4) 130 | ERROR_LOG(MLIS_E_IMAGE, 131 | "invalid number of channels in image: %d", img->c); 132 | if (mlis_tensor_from_image(&S->image, img) < 0) 133 | ERROR_LOG(MLIS_E_IMAGE, "invalid image"); 134 | S->c.tuflags |= MLIS_TUF_IMAGE; 135 | 136 | if (S->image.n[2] == 4) { // Take mask from last channel (alpha) 137 | unsigned w = S->image.n[0]; 138 | unsigned h = S->image.n[1]; 139 | mlis_tensor_resize(&S->mask, w, h, 1, 1); 140 | memcpy(S->mask.d, S->image.d+(w*h*3*4), w*h*4); 141 | S->image.n[2] = 3; 142 | S->c.tuflags |= MLIS_TUF_MASK; 143 | } 144 | } 145 | OPTION( IMAGE_MASK ) { 146 | ARG_C(img, const MLIS_Image*) 147 | if (img->c != 1) 148 | ERROR_LOG(MLIS_E_IMAGE, 149 | "invalid number of channels in image mask: %d", img->c); 150 | if (mlis_tensor_from_image(&S->mask, img) < 0) 151 | ERROR_LOG(MLIS_E_IMAGE, "invalid image mask"); 152 | S->c.tuflags |= MLIS_TUF_MASK; 153 | } 154 | OPTION( NO_DECODE ) { 155 | ARG_BOOL(en) 156 | ccFLAG_SET(S->c.flags, MLIS_CF_NO_DECODE, en); 157 | } 158 | OPTION( TENSOR_USE_FLAGS ) { 159 | ARG_FLAGS(fl) 160 | S->c.tuflags = fl; 161 | } 162 | OPTION( SEED ) { 163 | #ifdef ARG_IS_STR 164 | if (!vcur[0]) goto done; // Empty string -> keep random seed 165 | #endif 166 | ARG_UINT64(i) 167 | g_rng.seed = i; //TODO: local rng 168 | } 169 | OPTION( VAE_TILE ) { 170 | ARG_INT(i, 0, 65535, 0) 171 | S->c.vae_tile = i; 172 | } 173 | OPTION( UNET_SPLIT ) { 174 | ARG_BOOL(en) 175 | ccFLAG_SET(S->c.flags, MLIS_CF_UNET_SPLIT, en); 176 | } 177 | OPTION( WEIGHT_TYPE ) { 178 | #ifdef ARG_IS_STR 179 | int id = tstore_dtype_fromz(vcur); 180 | id = tstore_dtype_to_ggml(id); 181 | if (id >= 0) { 182 | S->ctx.c.wtype = id; 183 | S->c.flags |= MLIS_CF_WEIGHT_TYPE_SET; 184 | goto done; 185 | } 186 | #endif 187 | ARG_INT(i, -1, GGML_TYPE_COUNT-1, 0) 188 | if (i == -1) { //unset 189 | S->ctx.c.wtype = GGML_TYPE_F16; 190 | S->c.flags &= ~MLIS_CF_WEIGHT_TYPE_SET; 191 | } else { 192 | S->ctx.c.wtype = i; 193 | S->c.flags |= MLIS_CF_WEIGHT_TYPE_SET; 194 | } 195 | } 196 | OPTION( THREADS ) { 197 | ARG_INT(i, 0, 65535, 0) 198 | S->c.n_thread = i; 199 | S->rflags &= ~MLIS_READY_BACKEND; //this is overkill... 200 | } 201 | OPTION( DUMP_FLAGS ) { 202 | ARG_FLAGS(fl) 203 | S->c.dump_flags = fl; 204 | } 205 | OPTION( CALLBACK ) { 206 | ARG_C(func, MLIS_Callback) 207 | ARG_C(user, void*) 208 | S->callback = func; 209 | S->callback_ud = user; 210 | } 211 | OPTION( ERROR_HANDLER ) { 212 | ARG_C(func, MLIS_ErrorHandler) 213 | ARG_C(user, void*) 214 | S->errh = func; 215 | S->errh_ud = user; 216 | } 217 | OPTION( LOG_LEVEL ) { 218 | // Warning: this sets a global configuration, not associated with the context. 219 | 220 | #ifdef ARG_IS_STR 221 | int lvls = mlis_loglvl_fromz(vcur); 222 | if (lvls >= 0) { 223 | log_level_set(lvls); 224 | goto done; 225 | } 226 | #endif 227 | 228 | ARG_INT(lvl, 0, 0x2ff, -1) 229 | if ((lvl & 0xf00) == 0x100) { 230 | // Increase verbosity, starting directly from INFO. 231 | if (!log_level_check(LOG_LVL_INFO)) 232 | log_level_set(LOG_LVL_INFO); 233 | else 234 | log_level_inc(lvl & 0xff); 235 | } else if ((lvl & 0xf00) == 0x200) 236 | log_level_inc(-(lvl & 0xff)); 237 | else 238 | log_level_set(lvl); 239 | } 240 | -------------------------------------------------------------------------------- /src/prompt_preproc.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include "ccommon/ccommon.h" 6 | #include "ccommon/strslice.h" 7 | #include "ccommon/vector.h" 8 | #include "ccommon/logging.h" 9 | #include 10 | 11 | #ifndef MLIMGSYNTH_H 12 | enum { MLIS_E_PROMPT_PARSE = -5 }; 13 | #endif 14 | 15 | /* Text prompt structure containing the preprocesed text, weights, and loras. 16 | */ 17 | typedef struct PromptText { 18 | DynStr text; 19 | DynStr data; 20 | struct PromptTextChunk { 21 | StrSlice text; 22 | float w; // Weight / attention multiplier 23 | } *chunks; //vector 24 | struct PromptTextLora { 25 | StrSlice name; 26 | float w; 27 | } *loras; //vector 28 | } PromptText; 29 | 30 | static 31 | void prompt_text_free(PromptText* S) 32 | { 33 | dstr_free(S->text); 34 | dstr_free(S->data); 35 | vec_free(S->chunks); 36 | vec_free(S->loras); 37 | } 38 | 39 | static 40 | void prompt_text_clear(PromptText* S) 41 | { 42 | dstr_resize(S->text, 0); 43 | dstr_resize(S->data, 0); 44 | vec_resize(S->chunks, 0); 45 | vec_resize(S->loras, 0); 46 | } 47 | 48 | static 49 | void prompt_text_set_raw(PromptText* S, const StrSlice ss) 50 | { 51 | prompt_text_clear(S); 52 | dstr_copy(S->text, strsl_len(ss), strsl_begin(ss)); 53 | vec_resize(S->chunks, 1); 54 | S->chunks[0] = (struct PromptTextChunk){ strsl_fromd(S->text), 1.0 }; 55 | } 56 | 57 | static 58 | int prompt_text_option_parse(PromptText* S, StrSlice ss) 59 | { 60 | int R=1; 61 | 62 | if (strsl_prefix_trim(&ss, strsl_static("lora:"))) 63 | { 64 | const char *beg=strsl_begin(ss), *sep=beg, *end=strsl_end(ss); 65 | while (sep < end && *sep != ':') sep++; // Find multiplier option 66 | 67 | float mult=1; 68 | if (*sep == ':') { // Optional multiplier 69 | char *tail=NULL; 70 | mult = strtof(sep+1, &tail); 71 | if (tail != end) 72 | ERROR_LOG(MLIS_E_PROMPT_PARSE, "prompt: invalid lora multiplier"); 73 | } 74 | 75 | //TRY( mlis_cfg_lora_add(S, strsl_fromr(ss.b, sep), mult, MLIS_LF_PROMPT) ); 76 | 77 | // Store lora name 78 | unsigned len = sep - beg; 79 | dstr_append(S->data, len, beg); 80 | 81 | // Add lora to list 82 | unsigned nl = vec_count(S->loras); 83 | vec_append_zero(S->loras, 1); 84 | S->loras[nl].name = strsl_make(dstr_end(S->data) - len, len); 85 | S->loras[nl].w = mult; 86 | } 87 | else { 88 | ERROR_LOG(MLIS_E_PROMPT_PARSE, "prompt: unknown option '%.*s'", 89 | (int)ss.s, ss.b); 90 | } 91 | 92 | end: 93 | return R; 94 | } 95 | 96 | /* Parse prompt like in stable-diffusion-webui. 97 | * "normal text" -> 1 chunk 98 | * "normal (weighted by 1.1) normal" -> 3 chunks 99 | * "normal ((weighted by 1.1*1.1)) normal" -> 3 chunks 100 | * "normal [weighted by 1/1.1) normal" -> 3 chunks 101 | * "normal (weighted by 1.5:1.5) normal" -> 3 chunks 102 | * "normal BREAK normal" -> "normal normal" (ignores BREAK for now) 103 | */ 104 | static 105 | int prompt_text_set_parse(PromptText* S, const StrSlice ss) 106 | { 107 | int R=1; 108 | 109 | prompt_text_clear(S); 110 | 111 | // Reserve memory so that pointers are not invalidated. 112 | dstr_realloc(S->text, strsl_len(ss)*2); 113 | dstr_realloc(S->data, strsl_len(ss)*2); 114 | 115 | vec_resize_zero(S->chunks, 1); 116 | S->chunks[0].text = strsl_make(S->text, 0); 117 | S->chunks[0].w = 1; 118 | 119 | int n_paren=0, n_braket=0; 120 | 121 | strsl_for(ss, cur, end, 0) 122 | { 123 | if (*cur == '\\') { // Escape 124 | if (cur+1 < end) { 125 | cur++; 126 | char c = *cur; 127 | switch (c) { 128 | case 'n': c = '\n'; break; 129 | } 130 | dstr_push(S->text, c); 131 | } 132 | } 133 | else if (*cur == '(' || *cur == ')' || *cur == '[' || *cur == ']') { 134 | switch (*cur) { 135 | case '(': n_paren++; break; 136 | case ')': n_paren--; break; 137 | case '[': n_braket++; break; 138 | case ']': n_braket--; break; 139 | } 140 | if (n_paren < 0 || n_braket < 0) 141 | ERROR_LOG(MLIS_E_PROMPT_PARSE, 142 | "prompt: unmatched ')' or ']'"); 143 | //if (n_paren > 0 && n_braket > 0) 144 | // ERROR_LOG(MLIS_E_PROMPT_PARSE, 145 | // "prompt: mix of emphasis with '(' and '['"); 146 | 147 | const char *e = dstr_end(S->text); 148 | //unsigned lvl = n_paren - n_braket; 149 | float w = pow(1.1, n_paren - n_braket); //TODO: cfg? 150 | 151 | unsigned ic = vec_count(S->chunks) -1; 152 | if (S->chunks[ic].text.b == e) { 153 | S->chunks[ic].w = w; 154 | } else { 155 | // Finish previous chunk 156 | S->chunks[ic].text.s = e - S->chunks[ic].text.b; 157 | // New chunk 158 | vec_append_zero(S->chunks, 1); 159 | ic++; 160 | S->chunks[ic].text.b = e; 161 | S->chunks[ic].w = w; 162 | } 163 | } 164 | else if (*cur == ':' && (n_paren > 0 || n_braket > 0)) { 165 | if (!(n_paren == 1 && n_braket == 0)) 166 | ERROR_LOG(MLIS_E_PROMPT_PARSE, 167 | "prompt: custom emphasis multiplier outside of '()'"); 168 | 169 | char *tail=NULL; 170 | float w=0; 171 | if (cur+1 < end) { 172 | cur++; 173 | w = strtof(cur, &tail); //TODO: restrict to an slice 174 | } 175 | if (!(tail && tail < end && *tail == ')')) 176 | ERROR_LOG(MLIS_E_PROMPT_PARSE, 177 | "prompt: invalid emphasis with ':'"); 178 | 179 | cur = tail-1; 180 | vec_last(S->chunks, 0).w = w; 181 | } 182 | else if (*cur == '<') { 183 | const char *e=cur+1; 184 | while (e < end && *e != '>') ++e; 185 | if (*e != '>') 186 | ERROR_LOG(MLIS_E_PROMPT_PARSE, "prompt: '<' not matched with '>'"); 187 | TRY( prompt_text_option_parse(S, strsl_fromr(cur+1, e)) ); 188 | cur = e; 189 | } 190 | else if (*cur == 'B' && cur+5 < end && !memcmp(cur, "BREAK", 5)) { 191 | cur += 4; 192 | } 193 | else dstr_push(S->text, *cur); 194 | } 195 | 196 | // Finish last chunk 197 | unsigned ic = vec_count(S->chunks) - 1; 198 | S->chunks[ic].text = strsl_fromr(S->chunks[ic].text.b, dstr_end(S->text)); 199 | 200 | #ifndef NDEBUG 201 | vec_for(S->chunks, i, 0) { 202 | assert( strsl_begin(S->chunks[i].text) >= S->text ); 203 | assert( strsl_end(S->chunks[i].text) <= dstr_end(S->text) ); 204 | } 205 | #endif 206 | 207 | end: 208 | return R; 209 | } 210 | -------------------------------------------------------------------------------- /src/sampling.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #include "sampling.h" 5 | #include "ccommon/ccommon.h" 6 | #include "ccommon/rng_philox.h" 7 | #include "ccommon/logging.h" 8 | #include 9 | 10 | #define log_vec(LVL,DESC,VEC,VAR,I0,...) \ 11 | if (log_level_check(LVL)) { \ 12 | log_line_begin(LVL); \ 13 | log_line_str(DESC ":"); \ 14 | vec_for(VEC,VAR,I0) log_line_strf(" " __VA_ARGS__); \ 15 | log_line_end(); \ 16 | } 17 | 18 | #define log_debug_vec(...) log_vec(LOG_LVL_DEBUG, __VA_ARGS__) 19 | 20 | void dnsamp_free(DenoiseSampler* S) 21 | { 22 | ltensor_free(&S->noise); 23 | ltensor_free(&S->x0); 24 | solver_free(&S->solver); 25 | vec_free(S->sigmas); 26 | } 27 | 28 | int dnsamp_init(DenoiseSampler* S) 29 | { 30 | int R=1; 31 | 32 | // Solver 33 | if (S->c.method <= 0) S->c.method = SOLVER_METHOD_EULER; 34 | 35 | S->solver.i_step = 0; 36 | S->solver.C = solver_class_get(S->c.method); 37 | if (!S->solver.C) 38 | ERROR_LOG(-1, "invalid sampling method %d", S->c.method); 39 | 40 | // Scheduling 41 | // Compute times and sigmas for inference 42 | S->n_step = S->c.n_step; 43 | if (S->n_step < 1) S->n_step = 20; 44 | 45 | S->nfe_per_step = S->solver.C->n_fe; 46 | 47 | // Reduce number of steps to keep the number of neural function evaluations 48 | if (S->nfe_per_step > 1) 49 | S->n_step = (S->n_step + S->nfe_per_step-1) / S->nfe_per_step; 50 | 51 | S->nfe_per_step *= S->nfe_per_dxdt; 52 | 53 | // Reduces the number of steps to keep the step size the same (img2img) 54 | IFNPOSSET(S->c.f_t_ini, 1); 55 | S->n_step = S->n_step * (S->c.f_t_ini - S->c.f_t_end) +0.5; 56 | if (S->n_step < 1) S->n_step = 1; 57 | 58 | // Calculate noise levels / times 59 | vec_resize(S->sigmas, S->n_step+1); 60 | S->sigmas[S->n_step] = 0; 61 | 62 | float t_ini = (S->unet_p->n_step_train - 1) * S->c.f_t_ini; 63 | float t_end = (S->unet_p->n_step_train - 1) * S->c.f_t_end; 64 | 65 | IFFALSESET(S->c.sched, DNSAMP_SCHED_UNIFORM); 66 | switch (S->c.sched) { 67 | case DNSAMP_SCHED_UNIFORM: { 68 | float b = t_ini, 69 | f = S->n_step>1 ? (t_end-t_ini)/(S->n_step-1) : 0; 70 | for (unsigned i=0; in_step; ++i) 71 | S->sigmas[i] = unet_t_to_sigma(S->unet_p, b+i*f); 72 | } break; 73 | case DNSAMP_SCHED_KARRAS: { 74 | // Uses the model's min and max sigma instead of 0.1 and 10. 75 | float smin = unet_t_to_sigma(S->unet_p, t_end), 76 | smax = unet_t_to_sigma(S->unet_p, t_ini), 77 | p=7, 78 | sminp = pow(smin, 1/p), 79 | smaxp = pow(smax, 1/p), 80 | b = smaxp, 81 | f = S->n_step>1 ? (sminp - smaxp) / (S->n_step-1) : 0; 82 | for (unsigned i=0; in_step; ++i) 83 | S->sigmas[i] = pow(b+i*f, p); 84 | } break; 85 | default: 86 | ERROR_LOG(-1, "invalid sampling scheduler %d", S->c.sched); 87 | } 88 | 89 | log_debug_vec("Sigmas", S->sigmas, i, 0, "%.6g", S->sigmas[i]); 90 | 91 | S->solver.t = S->sigmas[0]; //initial t 92 | S->i_step = 0; 93 | 94 | end: 95 | return R; 96 | } 97 | 98 | void dnsamp_mask_apply(DenoiseSampler* S, LocalTensor* x) 99 | { 100 | int n0 = x->n[0], n1 = x->n[1], n2 = x->n[2], 101 | s1 = n0, s2 = n0*n1; 102 | assert( ltensor_shape_check(S->c.lmask, n0, n1, 1, 1) ); 103 | for (int i2=0; i2c.lmask->d[i0 +i1*s1]; 107 | int i = i0 +i1*s1 +i2*s2; 108 | x->d[i] = S->x0.d[i] * m + x->d[i] * (1-m); 109 | } 110 | } 111 | 112 | void dnsamp_noise_add(DenoiseSampler* S, LocalTensor* x, float sigma) 113 | { 114 | ltensor_resize_like(&S->noise, x); 115 | rng_randn(ltensor_nelements(&S->noise), S->noise.d); 116 | ltensor_for(*x,i,0) x->d[i] += S->noise.d[i] * sigma; 117 | } 118 | 119 | int dnsamp_step(DenoiseSampler* S, LocalTensor* x) 120 | { 121 | int R=1; 122 | 123 | int s = S->i_step; 124 | if (!(s < S->n_step)) return 0; 125 | 126 | float s_up = 0, 127 | s_down = S->sigmas[s+1]; 128 | 129 | if (s == 0) { // Initial tasks 130 | if (S->c.lmask) ltensor_copy(&S->x0, x); 131 | 132 | // Add noise to initial latent 133 | dnsamp_noise_add(S, x, S->sigmas[0]); 134 | if (S->c.lmask) dnsamp_mask_apply(S, x); 135 | log_debug3_ltensor(x, "x0+noise"); 136 | } 137 | 138 | if (S->c.s_noise > 0 && s > 0) { 139 | // Stochastic sampling: may help to add detail lost during sampling 140 | // Ref.: Karras2022, see Algo2 with S_churn 141 | // Produces softer images 142 | // Similar to the ancestral sampling below 143 | float s_curr = S->sigmas[s], 144 | s_hat = s_curr * sqrt(2) * S->c.s_noise, 145 | s_noise = sqrt(s_hat*s_hat - s_curr*s_curr); 146 | log_debug("s_noise:%g s_hat:%g", s_noise, s_hat); 147 | 148 | dnsamp_noise_add(S, x, s_noise); 149 | if (S->c.lmask) dnsamp_mask_apply(S, x); 150 | S->solver.t = s_hat; 151 | } 152 | 153 | if (S->c.s_ancestral > 0) { 154 | // Ancestral sampling 155 | // Ref.: k_diffusion/sampling.py get_ancestral_step 156 | // Produces softer images 157 | float s1 = S->sigmas[s], //sigma_from 158 | s2 = S->sigmas[s+1]; //sigma_to 159 | 160 | s_up = sqrt((s2*s2) * (s1*s1 - s2*s2) / (s1*s1)); 161 | s_up *= S->c.s_ancestral; //eta * s_noise 162 | MINSET(s_up, s2); 163 | s_down = sqrt(s2*s2 - s_up*s_up); 164 | 165 | log_debug("ancestral s_down:%g s_up:%g", s_down, s_up); 166 | } 167 | 168 | TRY( solver_step(&S->solver, s_down, x) ); 169 | 170 | if (s_up > 0 && s+1 != S->n_step) { 171 | // Ancestral sampling 172 | dnsamp_noise_add(S, x, s_up); 173 | S->solver.t = S->sigmas[s+1]; 174 | } 175 | 176 | // In-painting mask apply 177 | if (S->c.lmask) 178 | dnsamp_mask_apply(S, x); 179 | 180 | log_debug3_ltensor(x, "x"); 181 | 182 | S->i_step++; 183 | end: 184 | return R; 185 | } 186 | -------------------------------------------------------------------------------- /src/sampling.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #pragma once 5 | #include "unet.h" 6 | #include "solvers.h" 7 | #include "localtensor.h" 8 | 9 | // Schedulers. Matches MLIS_Sched. 10 | //TODO: classes? 11 | enum { 12 | DNSAMP_SCHED_UNIFORM = 1, 13 | DNSAMP_SCHED_KARRAS = 2, 14 | }; 15 | 16 | typedef struct { 17 | Solver solver; 18 | float *sigmas; //vector 19 | int i_step, n_step, nfe_per_step; 20 | 21 | const UnetParams *unet_p; //fill before use 22 | int nfe_per_dxdt; //fill before use 23 | 24 | LocalTensor noise, x0; 25 | 26 | struct { 27 | int n_step, method, sched; 28 | float f_t_ini, f_t_end, s_noise, s_ancestral; 29 | LocalTensor *lmask; 30 | } c; 31 | } DenoiseSampler; 32 | 33 | void dnsamp_free(DenoiseSampler* S); 34 | 35 | int dnsamp_init(DenoiseSampler* S); 36 | 37 | int dnsamp_step(DenoiseSampler* S, LocalTensor* x); 38 | 39 | static inline 40 | int dnsamp_sample(DenoiseSampler* S, LocalTensor* x) 41 | { 42 | int r; 43 | while ((r = dnsamp_step(S, x)) > 0) ; 44 | return r; 45 | } 46 | -------------------------------------------------------------------------------- /src/solvers.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #include "solvers.h" 5 | #include "ccommon/ccommon.h" 6 | #include 7 | 8 | // List of all available solvers. Null-terminated. Matches MLIS_Method. 9 | const SolverClass *g_solvers[] = { 10 | NULL, 11 | &g_solver_euler, 12 | &g_solver_heun, 13 | &g_solver_taylor3, 14 | &g_solver_dpmpp2m, 15 | &g_solver_dpmpp2s, 16 | NULL 17 | }; 18 | 19 | const SolverClass* solver_class_get(int idx) 20 | { 21 | if (!(0 <= idx && idx < COUNTOF(g_solvers))) 22 | return NULL; 23 | 24 | return g_solvers[idx]; 25 | } 26 | 27 | const SolverClass* solver_class_find(const char* name) 28 | { 29 | for (unsigned i=0; g_solvers[i]; ++i) 30 | if (!strcmp(name, g_solvers[i]->name)) 31 | return g_solvers[i]; 32 | 33 | return NULL; 34 | } 35 | 36 | void solver_free(Solver* S) 37 | { 38 | for (unsigned i=0; itmp); ++i) 39 | ltensor_free(&S->tmp[i]); 40 | ltensor_free(&S->dx); 41 | } 42 | 43 | int solver_step(Solver* S, float t, LocalTensor* x) 44 | { 45 | S->i_tmp = 0; 46 | ltensor_resize_like(&S->dx, x); 47 | int r = S->C->step(S, t, x); 48 | if (r < 0) return r; 49 | S->t = t; 50 | S->i_step++; 51 | return r; 52 | } 53 | 54 | static inline 55 | LocalTensor* solver_tmp_get(Solver* S) 56 | { 57 | assert( S->i_tmp < COUNTOF(S->tmp) ); 58 | S->i_tmp++; 59 | return &S->tmp[S->i_tmp-1]; 60 | } 61 | 62 | static inline 63 | LocalTensor* solver_tmp_get_resize(Solver* S, int n0, int n1, int n2, int n3) 64 | { 65 | LocalTensor* lt = solver_tmp_get(S); 66 | ltensor_resize(lt, n0, n1, n2, n3); 67 | return lt; 68 | } 69 | 70 | static inline 71 | LocalTensor* solver_tmp_get_resize_like(Solver* S, const LocalTensor* x) 72 | { 73 | LocalTensor* lt = solver_tmp_get(S); 74 | ltensor_resize_like(lt, x); 75 | return lt; 76 | } 77 | 78 | /* Euler 79 | * Ref.: any textbook 80 | * Baseline. 81 | */ 82 | int solver_euler_step(Solver* S, float t, LocalTensor* x) 83 | { 84 | float dt = t - S->t; 85 | TRYR( solver_dxdt(S, S->t, x, &S->dx) ); 86 | ltensor_for(*x,i,0) x->d[i] += S->dx.d[i] * dt; 87 | return 1; 88 | } 89 | 90 | const SolverClass g_solver_euler = { 91 | .step = solver_euler_step, 92 | .n_fe = 1, 93 | .name = "euler", 94 | }; 95 | 96 | /* Heun (improved Euler) 97 | * Ref.: Karras et al. 2022 "Elucidating..." Algo1 98 | * Tends to distort the images with low step counts. 99 | */ 100 | int solver_heun_step(Solver* S, float t, LocalTensor* x) 101 | { 102 | float dt = t - S->t; 103 | LocalTensor *x1 = solver_tmp_get_resize_like(S, x); 104 | LocalTensor *d1 = solver_tmp_get_resize_like(S, x); 105 | 106 | TRYR( solver_dxdt(S, S->t, x, &S->dx) ); 107 | ltensor_for(*x,i,0) x1->d[i] = x->d[i] + S->dx.d[i] * dt; 108 | 109 | if (!(t > 0)) { //last step: just euler 110 | ltensor_for(*x,i,0) x->d[i] = x1->d[i]; 111 | } 112 | else { //2nd order correction 113 | TRYR( solver_dxdt(S, t, x1, d1) ); 114 | ltensor_for(*x,i,0) 115 | x->d[i] += (S->dx.d[i] + d1->d[i]) * 0.5 * dt; 116 | } 117 | 118 | return 1; 119 | } 120 | 121 | const SolverClass g_solver_heun = { 122 | .step = solver_heun_step, 123 | .n_fe = 2, 124 | .name = "heun", 125 | }; 126 | 127 | /* Third-order-Taylor extension of Euler 128 | * Ref.: own 129 | * Similar to Euler with less steps. 130 | 131 | x_{i+1} = x_i + dx_i dt_i + (1/2) dx2_i dt_i^2 + (1/6) dx3_i dt_i^3 132 | 133 | dx2_i = (dx_i - dx_{i-1}) / dt_{i-1} 134 | dx3_i = (dx2_i - dx2_{i-1}) / dt_{i-1} 135 | = (dx_i - dx_{i-1}) / dt_{i-1}^2 - (dx_{i-1} - dx_{i-2}) / (dt_{i-1} dt_{i-2}) 136 | */ 137 | int solver_taylor3_step(Solver* S, float t, LocalTensor* x) 138 | { 139 | float dt = t - S->t; 140 | LocalTensor *lt_dt_prev = solver_tmp_get_resize(S, 1,1,1,1); 141 | LocalTensor *lt_dp1 = solver_tmp_get_resize_like(S, x); 142 | LocalTensor *lt_dp2 = solver_tmp_get_resize_like(S, x); 143 | 144 | float *dt_prev = lt_dt_prev->d, 145 | *dp1 = lt_dp1->d, 146 | *dp2 = lt_dp2->d; 147 | 148 | TRYR( solver_dxdt(S, S->t, x, &S->dx) ); 149 | ltensor_for(*x,i,0) x->d[i] += S->dx.d[i] * dt; 150 | 151 | // 2nd and 3nd order corrections 152 | float idtp = S->i_step >= 1 ? 1 / dt_prev[0] : 0, 153 | f2 = S->i_step >= 1 ? dt*dt/2 : 0, 154 | f3 = S->i_step >= 2 ? dt*dt*dt/6 : 0; 155 | ltensor_for(*x,i,0) { 156 | float d2 = (S->dx.d[i] - dp1[i]) * idtp, 157 | d3 = (d2 - dp2[i]) * idtp; 158 | x->d[i] += d2 * f2 + d3 * f3; 159 | dp1[i] = S->dx.d[i]; 160 | dp2[i] = d2; 161 | } 162 | 163 | dt_prev[0] = dt; 164 | return 1; 165 | } 166 | 167 | const SolverClass g_solver_taylor3 = { 168 | .step = solver_taylor3_step, 169 | .n_fe = 1, 170 | .name = "taylor3", 171 | }; 172 | 173 | /* DPM++(2M) 174 | * Ref.: Lu et al. 2023 "DPM-Solver++ ..." Algo2 175 | * Ref.: k-diffusion/sampling.py sample_dpmpp_2m 176 | * Produces sharper images. 177 | * Use with Karras scheduler to prevent overly sharp images. 178 | 179 | alpha_i = 1 180 | sigma_{i+1} = t 181 | sigma_i = S->t 182 | 183 | lambda_i = log(alpha_i / sigma_i) 184 | = -log(sigma_i) 185 | 186 | a_i = sigma_{i+1} / sigma_i 187 | 188 | h_i = lambda_{i+1} - lambda_i 189 | = -log(sigma_{i+1} / sigma_i) 190 | = -log(a_i) 191 | 192 | b_i = exp(-h_i) - 1 = a_i - 1 193 | 194 | c_i = 1/(2r) 195 | = h_{i} / (2 h_{i-1}) 196 | 197 | d_i = x_i - sigma_i dx_i 198 | 199 | D_i = (1 + c_i) d_i - c_i d_{i-1} 200 | 201 | x_{i+1} = a_i x_i - b_i D_i 202 | = a_i x_i + (1 - a_i) D_i 203 | 204 | if c_i == 0: 205 | x_{i+1} = x_i + (sigma_{i+1} - sigma_i) dx_i (Euler) 206 | */ 207 | int solver_dpmpp2m_step(Solver* S, float t, LocalTensor* x) 208 | { 209 | LocalTensor *vars = solver_tmp_get_resize(S, 1,1,1,1); 210 | LocalTensor *dprev = solver_tmp_get_resize_like(S, x); 211 | 212 | float a = t / S->t, 213 | h = -log(a), 214 | h_last = vars->d[0], 215 | c = h / (2*h_last); 216 | 217 | if (S->i_step == 0 || !(t > 0)) //first or last step 218 | c = 0; 219 | 220 | TRYR( solver_dxdt(S, S->t, x, &S->dx) ); 221 | ltensor_for(*x,i,0) { 222 | float d0 = x->d[i] - S->t * S->dx.d[i], 223 | d1 = dprev->d[i], 224 | d = (1+c) * d0 - c * d1; 225 | x->d[i] = a * x->d[i] + (1-a) * d; 226 | dprev->d[i] = d0; 227 | } 228 | 229 | vars->d[0] = h; 230 | return 1; 231 | } 232 | 233 | const SolverClass g_solver_dpmpp2m = { 234 | .step = solver_dpmpp2m_step, 235 | .n_fe = 1, 236 | .name = "dpmpp2m", 237 | }; 238 | 239 | /* DPM++(2S) 240 | * Ref.: Lu et al. 2023 "DPM-Solver++ ..." Algo1 241 | * Ref.: k-diffusion/sampling.py sample_dpmpp_2s_ancestral 242 | * Should be used with ancestral sampling. 243 | 244 | Check DPM++(2M) first. 245 | 246 | lambda_i = -log(sigma_i) 247 | 248 | s_i = sqrt(sigma_{i+1} sigma_i) From k-diffusion r=1/2 249 | 250 | a'_i = s_i / sigma_i 251 | h'_i = -log(a'_i) 252 | d_i = x_i - sigma_i dx_i 253 | 254 | x'_i = a'_i x_i + (1 - a'_i) d_i 255 | = x_i + (s_i - sigma_i) dx_i 256 | 257 | d'_i = x'_i - s_i dx'_i 258 | 259 | a_i = sigma_{i+1} / sigma_i 260 | h_i = -log(a_i) 261 | 262 | x_{i+1} = a_i x_i + (1 - a_i) d'_i 263 | */ 264 | int solver_dpmpp2s_step(Solver* S, float t, LocalTensor* x) 265 | { 266 | LocalTensor *x1 = solver_tmp_get_resize_like(S, x); 267 | LocalTensor *dx1 = solver_tmp_get_resize_like(S, x); 268 | 269 | TRYR( solver_dxdt(S, S->t, x, &S->dx) ); 270 | 271 | if (!(t > 0)) { //last step: just euler 272 | float dt = t - S->t; 273 | ltensor_for(*x,i,0) x->d[i] += S->dx.d[i] * dt; 274 | } 275 | else { 276 | float t1 = sqrt(t * S->t), 277 | dt1 = t1 - S->t, 278 | a = t / S->t; 279 | 280 | ltensor_for(*x,i,0) x1->d[i] = x->d[i] + S->dx.d[i] * dt1; 281 | 282 | TRYR( solver_dxdt(S, t1, x1, dx1) ); 283 | ltensor_for(*x,i,0) { 284 | float d = x1->d[i] - t1 * dx1->d[i]; 285 | x->d[i] = a * x->d[i] + (1-a) * d; 286 | } 287 | } 288 | 289 | return 1; 290 | } 291 | 292 | const SolverClass g_solver_dpmpp2s = { 293 | .step = solver_dpmpp2s_step, 294 | .n_fe = 2, 295 | .name = "dpmpp2s", 296 | }; 297 | -------------------------------------------------------------------------------- /src/solvers.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Initial value problem (IVP) solvers. 5 | * Used as sampling methods for generative diffusion models. 6 | * 7 | * Example: 8 | 9 | int dxdt(Solver* S, float t, const LocalTensor* x, LocalTensor* dx) { 10 | unsigned n = x->n[0]; 11 | for (unsigned i=1; i+1d[i] = (x->d[i-1] -2*x->d[i] + x->d[i+1]) / 4; 13 | dx->d[0] = dx->d[n-1] = 0; 14 | return 1; 15 | } 16 | 17 | void solve() { 18 | // Set solver 19 | Solver sol={ .C=&solver_euler }; 20 | // Set initial time 21 | sol.t = 0; 22 | // Set initial state 23 | ltensor_resize_zero(&sol.x, 100,1,1,1); 24 | sol.x.d[50] = 1; 25 | // Set differential equation 26 | sol.dxdt = dxdt; 27 | // Solve until t_end=10 28 | for (float dt=0.1, t_end=10, t=dt; t<=t_end; t+=dt) 29 | TRY( solver_step(&sol, t) ); 30 | // Do something here with the result in sol.x . 31 | // You may reuse the solver by setting i_step to zero. 32 | // Free memory 33 | solver_free(&sol); 34 | } 35 | */ 36 | #pragma once 37 | #include "localtensor.h" 38 | 39 | struct Solver; 40 | 41 | typedef struct { 42 | int (*step)(struct Solver*, float dt, LocalTensor* x); 43 | int n_fe; //number of calls to dxdt per step 44 | const char *name; 45 | } SolverClass; 46 | 47 | // Default methods 48 | extern const SolverClass g_solver_euler; 49 | extern const SolverClass g_solver_heun; 50 | extern const SolverClass g_solver_taylor3; 51 | extern const SolverClass g_solver_dpmpp2m; 52 | extern const SolverClass g_solver_dpmpp2s; 53 | 54 | enum { 55 | SOLVER_METHOD_EULER = 1, 56 | SOLVER_METHOD_HEUN = 2, 57 | SOLVER_METHOD_TAYLOR3 = 3, 58 | SOLVER_METHOD_DPMPP2M = 4, 59 | SOLVER_METHOD_DPMPP2S = 5, 60 | }; 61 | 62 | const SolverClass* solver_class_get(int idx); //idx >= 1 63 | const SolverClass* solver_class_find(const char* name); 64 | 65 | typedef struct Solver { 66 | const SolverClass *C; // Fill before using 67 | 68 | // State 69 | LocalTensor dx, 70 | tmp[8]; //vector, temporal tensors 71 | float t; 72 | unsigned i_step, i_tmp; 73 | 74 | // Config (fill before use) 75 | int (*dxdt)(struct Solver*, float t, const LocalTensor* x, LocalTensor* dx); 76 | void *user; 77 | } Solver; 78 | 79 | void solver_free(Solver* S); 80 | 81 | int solver_step(Solver* S, float t, LocalTensor* x); 82 | 83 | static inline 84 | int solver_dxdt(Solver* S, float t, const LocalTensor* x, LocalTensor* dx) 85 | { 86 | assert( ltensor_shape_equal(x, dx) ); 87 | int r = S->dxdt(S, t, x, dx); 88 | if (r < 0) return r; 89 | return r; 90 | } 91 | -------------------------------------------------------------------------------- /src/str_match_util.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: Zlib 3 | */ 4 | #pragma once 5 | #include 6 | #include "ccommon/unicode.h" 7 | #include "ccommon/unicode_data.h" 8 | 9 | static inline 10 | int chr_ascii_space_is(int c) { 11 | return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\v' 12 | || c == '\f'; 13 | } 14 | 15 | static inline 16 | int chr_ascii_lower(int c) { 17 | return ('A' <= c && c <= 'Z') ? (c+'a'-'A') : c; 18 | } 19 | 20 | static inline 21 | void str_ascii_lower(char* cur, const char* end) { 22 | for (; cur 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #include "tae.h" 5 | #include "mlblock_nn.h" 6 | 7 | #define T 1 //true 8 | #define F 0 //false 9 | #define MLN(NAME,X) mlctx_tensor_add(C, (NAME), (X)) 10 | 11 | // The GGML scheduler have problems with inplace operations (2024-07-13) 12 | #if USE_GGML_SCHED 13 | #define ggml_relu_inplace ggml_relu 14 | #define ggml_tanh_inplace ggml_tanh 15 | #endif 16 | 17 | const SdTaeParams g_sdtae_sd1 = { 18 | .ch_x = 3, 19 | .ch_inner = 64, 20 | .ch_z = 4, 21 | .n_blk = 3, 22 | }; 23 | 24 | MLTensor* mlb_sdtae_block(MLCtx* C, MLTensor* x, int ch_out) 25 | { 26 | MLTensor *x0=x; 27 | mlctx_block_begin(C); 28 | int ch_in = x->ne[2]; 29 | x = MLN("conv.0", mlb_nn_conv2d(C, x, ch_out, 3,3, 1,1, 1,1, 1,1, T)); 30 | x = ggml_relu_inplace(C->cc, x); 31 | x = MLN("conv.2", mlb_nn_conv2d(C, x, ch_out, 3,3, 1,1, 1,1, 1,1, T)); 32 | x = ggml_relu_inplace(C->cc, x); 33 | x = MLN("conv.4", mlb_nn_conv2d(C, x, ch_out, 3,3, 1,1, 1,1, 1,1, T)); 34 | if (ch_in != ch_out) 35 | x0 = MLN("skip", mlb_nn_conv2d(C, x0, ch_out, 1,1, 1,1, 1,1, 1,1, T)); 36 | x = ggml_add(C->cc, x, x0); 37 | x = ggml_relu_inplace(C->cc, x); 38 | return x; 39 | } 40 | 41 | #define IDX2NAME(I) (sprintf(name, "%d", (I)), name) 42 | 43 | MLTensor* mlb_sdtae_encoder(MLCtx* C, MLTensor* x, const SdTaeParams* P) 44 | { 45 | int iblk=0; 46 | char name[32]; 47 | mlctx_block_begin(C); 48 | 49 | x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x, 50 | P->ch_inner, 3,3, 1,1, 1,1, 1,1, true)); 51 | x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner)); 52 | 53 | for (int j=0; j<3; ++j) { 54 | x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x, 55 | P->ch_inner, 3,3, 2,2, 1,1, 1,1, false)); 56 | for (int i=0; in_blk; ++i) 57 | x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner)); 58 | } 59 | 60 | x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x, 61 | P->ch_z, 3,3, 1,1, 1,1, 1,1, true)); 62 | return x; 63 | } 64 | 65 | MLTensor* mlb_sdtae_decoder(MLCtx* C, MLTensor* x, const SdTaeParams* P) 66 | { 67 | int iblk=0; 68 | char name[32]; 69 | mlctx_block_begin(C); 70 | 71 | x = ggml_scale(C->cc, x, 1.0f / 3.0f); 72 | x = ggml_tanh_inplace(C->cc, x); 73 | x = ggml_scale(C->cc, x, 3.0f); 74 | 75 | x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x, 76 | P->ch_inner, 3,3, 1,1, 1,1, 1,1, true)); 77 | x = ggml_relu_inplace(C->cc, x); iblk++; 78 | 79 | for (int j=0; j<3; ++j) { 80 | for (int i=0; in_blk; ++i) 81 | x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner)); 82 | x = ggml_upscale(C->cc, x, 2, GGML_SCALE_MODE_NEAREST); iblk++; 83 | x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x, 84 | P->ch_inner, 3,3, 1,1, 1,1, 1,1, false)); 85 | } 86 | 87 | x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner)); 88 | x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x, 89 | P->ch_x, 3,3, 1,1, 1,1, 1,1, true)); 90 | 91 | return x; 92 | } 93 | 94 | int sdtae_encode(MLCtx* C, const SdTaeParams* P, 95 | const LocalTensor* img, LocalTensor* latent) 96 | { 97 | int R=1; 98 | 99 | const int f = 8; //latent to image scale (8 for SD) 100 | if (!(img->n[0]%f==0 && img->n[1]%f==0 && img->n[2]==3 && img->n[3]==1)) 101 | ERROR_LOG(-1, "invalid input image shape: " LT_SHAPE_FMT, 102 | LT_SHAPE_UNPACK(*img)); 103 | 104 | mlctx_begin(C, "TAE encode"); 105 | 106 | MLTensor *input = mlctx_input_new(C, "img", GGML_TYPE_F32, 107 | LT_SHAPE_UNPACK(*img) ); 108 | MLTensor *output = mlb_sdtae_encoder(C, input, P); 109 | mlctx_tensor_add(C, "encoder.layers", output); 110 | 111 | TRY( mlctx_run(C, latent, img) ); 112 | 113 | end: 114 | return R; 115 | } 116 | 117 | int sdtae_decode(MLCtx* C, const SdTaeParams* P, 118 | const LocalTensor* latent, LocalTensor* img) 119 | { 120 | int R=1; 121 | 122 | TRY( ltensor_shape_check_log(latent, "latent", 0,0,4,1) ); 123 | 124 | mlctx_begin(C, "TAE decode"); 125 | 126 | MLTensor *input = mlctx_input_new(C, "latent", GGML_TYPE_F32, 127 | LT_SHAPE_UNPACK(*latent)); 128 | MLTensor *output = mlb_sdtae_decoder(C, input, P); 129 | mlctx_tensor_add(C, "decoder.layers", output); 130 | 131 | TRY( mlctx_run(C, img, latent) ); 132 | 133 | end: 134 | mlctx_end(C); 135 | return R; 136 | } 137 | -------------------------------------------------------------------------------- /src/tae.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Tiny auto-encoder 5 | * 6 | * References: 7 | * https://github.com/madebyollin/taesd/blob/main/taesd.py 8 | */ 9 | #pragma once 10 | #include "mlblock.h" 11 | #include "localtensor.h" 12 | 13 | typedef struct { 14 | int ch_x, ch_inner, ch_z, n_blk; 15 | } SdTaeParams; 16 | 17 | extern const SdTaeParams g_sdtae_sd1; 18 | 19 | MLTensor* mlb_sdtae_encoder(MLCtx* C, MLTensor* x, const SdTaeParams* P); 20 | 21 | MLTensor* mlb_sdtae_decoder(MLCtx* C, MLTensor* x, const SdTaeParams* P); 22 | 23 | /*static inline 24 | void sdtae_encoder_post(LocalTensor* out, const LocalTensor* latent) 25 | { // [0,1] -> [-1,1] 26 | ltensor_resize_like(out, latent); 27 | ltensor_for(*out,i,0) out->d[i] = latent->d[i]*2 -1; 28 | } 29 | 30 | static inline 31 | void sdtae_decoder_pre(LocalTensor* out, const LocalTensor* latent) 32 | { // [-1,1] -> [0,1] 33 | ltensor_resize_like(out, latent); 34 | ltensor_for(*out,i,0) out->d[i] = (latent->d[i]+1)/2; 35 | }*/ 36 | 37 | int sdtae_encode(MLCtx* C, const SdTaeParams* P, 38 | const LocalTensor* img, LocalTensor* latent); 39 | 40 | int sdtae_decode(MLCtx* C, const SdTaeParams* P, 41 | const LocalTensor* latent, LocalTensor* img); 42 | -------------------------------------------------------------------------------- /src/tensor_name_conv.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Conversions from the multiple model tensor naming schemes to internal names. 5 | */ 6 | #pragma once 7 | #include "ccommon/vector.h" 8 | #include "ccommon/strslice.h" 9 | 10 | int tnconv_sd(StrSlice name, DynStr *out); 11 | 12 | enum tensor_name_convert_result_t { 13 | TNCONV_R_UNUSED = 0, 14 | TNCONV_R_GOOD = 1, 15 | TNCONV_R_QKV_PROJ = 2, 16 | }; 17 | -------------------------------------------------------------------------------- /src/test_common.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2025, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | */ 4 | #pragma once 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #define log(...) do { \ 11 | printf(__VA_ARGS__); \ 12 | printf("\n"); \ 13 | } while (0) 14 | 15 | #define error(...) do { \ 16 | printf("ERROR "); \ 17 | printf(__VA_ARGS__); \ 18 | printf("\n"); \ 19 | printf("TEST FAIL " __FILE__ "\n"); \ 20 | exit(1); \ 21 | } while (0) 22 | 23 | #ifdef NDEBUG 24 | #define debug(...) 25 | #else 26 | #define debug(...) do { \ 27 | printf("DEBUG "); \ 28 | printf(__VA_ARGS__); \ 29 | printf("\n"); \ 30 | } while (0) 31 | #endif 32 | 33 | #define assert_int(A, B, ...) do { \ 34 | int a = (A), b = (B); \ 35 | if (a != b) error(__VA_ARGS__); \ 36 | } while(0) 37 | -------------------------------------------------------------------------------- /src/test_prompt_preproc.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2025, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Test of the prompt preprocessing. 5 | */ 6 | #include "prompt_preproc.h" 7 | #include "test_common.h" 8 | 9 | typedef struct { 10 | const char *text; 11 | float w; 12 | } TestChunk; 13 | 14 | typedef struct { 15 | unsigned n_chunk, n_lora; 16 | TestChunk *chunks; 17 | TestChunk *loras; 18 | } TestPrompt; 19 | 20 | #define assert_chunk(A, B, ...) do { \ 21 | const struct PromptTextChunk a = (A); \ 22 | const TestChunk b = (B); \ 23 | if (strsl_cmpz(a.text, b.text) || a.w != b.w) { \ 24 | DynStr errstr=NULL; \ 25 | dstr_printf(errstr, "'%.*s' %g != '%s' %g", (int)strsl_len(a.text), \ 26 | strsl_begin(a.text), a.w, b.text, b.w); \ 27 | error(__VA_ARGS__); \ 28 | } \ 29 | } while(0) 30 | 31 | #define assert_lora(A, B, ...) do { \ 32 | const struct PromptTextLora a = (A); \ 33 | const TestChunk b = (B); \ 34 | if (strsl_cmpz(a.name, b.text) || a.w != b.w) { \ 35 | DynStr errstr=NULL; \ 36 | dstr_printf(errstr, "'%.*s' %g != '%s' %g", (int)strsl_len(a.name), \ 37 | strsl_begin(a.name), b.w, b.text, b.w); \ 38 | error(__VA_ARGS__); \ 39 | } \ 40 | } while(0) 41 | 42 | #define CHUNKS(...) \ 43 | .n_chunk=sizeof((TestChunk[]){__VA_ARGS__, {0}})/sizeof(TestChunk)-1, \ 44 | .chunks=(TestChunk[]){__VA_ARGS__, {0}} 45 | 46 | #define LORAS(...) \ 47 | .n_lora=sizeof((TestChunk[]){__VA_ARGS__, {0}})/sizeof(TestChunk)-1, \ 48 | .loras=(TestChunk[]){__VA_ARGS__, {0}} 49 | 50 | #define TEST(TEXT, ...) \ 51 | test((TEXT), (TestPrompt){__VA_ARGS__}) 52 | 53 | static 54 | void assert_prompt(const PromptText pt, const TestPrompt exp, const char *text) 55 | { 56 | assert_int( vec_count(pt.chunks), exp.n_chunk, 57 | "in '%s':\nchunks returned: %d, expected: %d", text, a, b); 58 | 59 | assert_int( vec_count(pt.loras), exp.n_lora, 60 | "in '%s':\nloras returned: %d, expected: %d", text, a, b); 61 | 62 | for (unsigned i=0; iping", 117 | CHUNKS({"a dog jumping", 1}), 118 | LORAS({"LORA NAME", 1})); 119 | TEST("a dog jumping", 120 | CHUNKS({"a dog jumping", 1}), 121 | LORAS({"LORA NAME", 0.8})); 122 | // Escapes 123 | TEST("a \\(dog\\) jumping", 124 | CHUNKS({"a (dog) jumping", 1})); 125 | TEST("a dog jum\\ping", 126 | CHUNKS({"a dog jumping", 1})); 127 | 128 | log("TEST OK "__FILE__); 129 | return 0; 130 | } 131 | -------------------------------------------------------------------------------- /src/test_rng.c: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Utility to test the Philox RNG. 5 | */ 6 | #include "ccommon/timing.h" 7 | #include "ccommon/rng_philox.h" 8 | #include 9 | #include 10 | 11 | /* Seed: 0, Offset: 0, n: 12 12 | -0.92466259 13 | -0.42534414 14 | -2.64384580 15 | 0.14518388 16 | -0.12086648 17 | -0.57972562 18 | -0.62285119 19 | -0.32838708 20 | -1.07454228 21 | -0.36314407 22 | -1.67105067 23 | 2.26550508 24 | */ 25 | 26 | int main(int argc, char* argv[]) 27 | { 28 | RngPhilox rng={0}; 29 | unsigned n=12; 30 | 31 | if (argc > 1) rng.seed = strtoull(argv[1], NULL, 10); 32 | if (argc > 2) rng.offset = strtoul(argv[2], NULL, 10); 33 | if (argc > 3) n = strtoul(argv[3], NULL, 10); 34 | 35 | float *out = malloc(sizeof(float)*n); 36 | if (!out) { printf("out of memory\n"); return 1; } 37 | 38 | double t = timing_time(); 39 | rng_philox_randn(&rng, n, out); 40 | t = timing_time() - t; 41 | fprintf(stderr, "%d numbers in %.3fms (%.3fns/num)\n", n, t*1e3, t*1e9/n); 42 | for (unsigned i=0; i 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Test of the CLIP tokenizer. 5 | */ 6 | #include "mlimgsynth.h" 7 | #include "test_common.h" 8 | 9 | #define TEST(TEXT, ...) \ 10 | test(ctx, (TEXT), (const int32_t[]){__VA_ARGS__, -1}) 11 | 12 | #define TEST_EMPTY(TEXT) \ 13 | test(ctx, (TEXT), (const int32_t[]){-1}) 14 | 15 | static 16 | void test(MLIS_Ctx* ctx, const char* text, const int32_t* expected) 17 | { 18 | debug("%s", text); 19 | 20 | int32_t *tokens=NULL; 21 | int r = mlis_text_tokenize(ctx, text, &tokens, MLIS_SUBMODEL_CLIP); 22 | if (r < 0) 23 | error("mlis_tokenize('%s'): 0x%x", text, -r); 24 | 25 | int i; 26 | for (i=0; i 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * UNet implementation for denoising in SD. 5 | */ 6 | #pragma once 7 | #include "mlblock.h" 8 | #include "localtensor.h" 9 | 10 | typedef struct { 11 | int n_ch_in; 12 | int n_ch_out; 13 | int n_res_blk; 14 | int attn_res[4]; 15 | int ch_mult[5]; 16 | int transf_depth[5]; 17 | int n_te; //time embedding dimensions 18 | int n_head; 19 | int d_head; 20 | int n_ctx; 21 | int n_ch; 22 | int ch_adm_in; 23 | 24 | unsigned clip_norm:1, 25 | cond_label:1, 26 | uncond_empty_zero:1, 27 | vparam:1; 28 | 29 | int n_step_train; 30 | float sigma_min; 31 | float sigma_max; 32 | float *log_sigmas; //[n_step_train] 33 | } UnetParams; 34 | 35 | extern const UnetParams g_unet_sd1; //SD 1.x 36 | extern const UnetParams g_unet_sd2; //SD 2.x 37 | extern const UnetParams g_unet_sdxl; //SDXL 38 | //extern const UnetParams g_unet_svd; //SVD (stable video diffusion) 39 | 40 | MLTensor* mlb_unet_denoise(MLCtx* C, MLTensor* x, MLTensor* time, MLTensor* c, 41 | MLTensor* label, const UnetParams* P); 42 | 43 | void unet_params_init(); //fill global log_sigmas 44 | 45 | float unet_sigma_to_t(const UnetParams* P, float sigma); 46 | 47 | float unet_t_to_sigma(const UnetParams* P, float t); 48 | 49 | typedef struct { 50 | MLCtx *ctx; 51 | const UnetParams *par; 52 | unsigned nfe, split:1; 53 | } UnetState; 54 | 55 | int unet_denoise_init(UnetState* S, MLCtx* C, const UnetParams* P, 56 | unsigned lw, unsigned lh, bool split); 57 | 58 | int unet_denoise_run(UnetState* S, 59 | const LocalTensor* x, const LocalTensor* cond, const LocalTensor* label, 60 | float sigma, LocalTensor* dx); 61 | -------------------------------------------------------------------------------- /src/vae.h: -------------------------------------------------------------------------------- 1 | /* Copyright 2024, Alejandro A. García 2 | * SPDX-License-Identifier: MIT 3 | * 4 | * Variational auto-encoder. 5 | */ 6 | #pragma once 7 | #include "mlblock.h" 8 | #include "localtensor.h" 9 | 10 | typedef struct { 11 | int ch_x, 12 | ch_z, 13 | ch, 14 | n_res, 15 | n_res_blk, 16 | ch_mult[5], 17 | d_embed, 18 | f_down; //downsampling total factor 19 | float scale_factor; 20 | } VaeParams; 21 | 22 | extern const VaeParams g_vae_sd1; //SD 1.x & 2.x 23 | extern const VaeParams g_vae_sdxl; //SDXL 24 | 25 | MLTensor* mlb_sdvae_encoder(MLCtx* C, MLTensor* x, const VaeParams* P); 26 | 27 | MLTensor* mlb_sdvae_decoder(MLCtx* C, MLTensor* x, const VaeParams* P); 28 | 29 | void sdvae_latent_mean(LocalTensor* latent, const LocalTensor* moments, 30 | const VaeParams* P); 31 | 32 | void sdvae_latent_sample(LocalTensor* latent, const LocalTensor* moments, 33 | const VaeParams* P); 34 | 35 | static inline 36 | void sdvae_encoder_pre(LocalTensor* out, const LocalTensor* img) 37 | { // [0,1] -> [-1,1] 38 | ltensor_resize_like(out, img); 39 | ltensor_for(*out,i,0) out->d[i] = img->d[i]*2 -1; 40 | } 41 | 42 | static inline 43 | void sdvae_decoder_post(LocalTensor* out, const LocalTensor* img) 44 | { // [-1,1] -> [0,1] 45 | ltensor_resize_like(out, img); 46 | ltensor_for(*out,i,0) out->d[i] = (img->d[i]+1)/2; 47 | } 48 | 49 | int sdvae_encode(MLCtx* C, const VaeParams* P, 50 | const LocalTensor* img, LocalTensor* latent, int tile_px); 51 | 52 | int sdvae_decode(MLCtx* C, const VaeParams* P, 53 | const LocalTensor* latent, LocalTensor* img, int tile_px); 54 | --------------------------------------------------------------------------------