├── .gitignore
├── LICENSE
├── LICENSE.zlib
├── Makefile
├── README.md
├── gencode_clip_merges.py
├── generate.bat
├── generate.sh
├── include
    └── mlimgsynth.h
├── python
    ├── guessing_game.py
    ├── minimal_png.py
    └── mlimgsynth.py
└── src
    ├── ccommon
        ├── alloc.c
        ├── alloc.h
        ├── alloc_arena.c
        ├── alloc_arena.h
        ├── alloc_gen.c
        ├── alloc_gen.h
        ├── any.c
        ├── any.h
        ├── base.mk
        ├── bisect.h
        ├── byteswap.h
        ├── ccommon.h
        ├── fsutil.c
        ├── fsutil.h
        ├── image.c
        ├── image.h
        ├── image_io.c
        ├── image_io.h
        ├── image_io_jpeg.c
        ├── image_io_jpeg.h
        ├── image_io_png.c
        ├── image_io_png.h
        ├── image_io_pnm.c
        ├── image_io_pnm.h
        ├── logging.c
        ├── logging.h
        ├── rng_philox.c
        ├── rng_philox.h
        ├── str_util.c
        ├── str_util.h
        ├── stream.c
        ├── stream.h
        ├── stringstore.c
        ├── stringstore.h
        ├── strslice.h
        ├── structio.c
        ├── structio.h
        ├── structio_json.c
        ├── structio_json.h
        ├── timing.c
        ├── timing.h
        ├── unicode.c
        ├── unicode.h
        ├── unicode_data.c
        ├── unicode_data.h
        └── vector.h
    ├── ccompute
        ├── tensorstore.c
        ├── tensorstore.h
        ├── tensorstore_gguf.c
        ├── tensorstore_gguf.h
        ├── tensorstore_safet.c
        └── tensorstore_safet.h
    ├── clip.c
    ├── clip.h
    ├── clip_merges.c.h
    ├── demo_mlimgsynth.c
    ├── ggml_extend.c
    ├── ggml_extend.h
    ├── localtensor.c
    ├── localtensor.h
    ├── lora.c
    ├── lora.h
    ├── main_mlimgsynth.c
    ├── main_tstore_util.c
    ├── mlblock.c
    ├── mlblock.h
    ├── mlblock_nn.c
    ├── mlblock_nn.h
    ├── mlimgsynth.c
    ├── mlimgsynth_options_get.c.h
    ├── mlimgsynth_options_set.c.h
    ├── prompt_preproc.h
    ├── sampling.c
    ├── sampling.h
    ├── solvers.c
    ├── solvers.h
    ├── str_match_util.h
    ├── tae.c
    ├── tae.h
    ├── tensor_name_conv.c
    ├── tensor_name_conv.h
    ├── test_common.h
    ├── test_prompt_preproc.c
    ├── test_rng.c
    ├── test_text_tokenize_clip.c
    ├── unet.c
    ├── unet.h
    ├── vae.c
    └── vae.h


/.gitignore:
--------------------------------------------------------------------------------
 1 | obj
 2 | obj_dbg
 3 | .d
 4 | .d_dbg
 5 | __pycache__
 6 | ggml
 7 | test_rng
 8 | tstore-util
 9 | mlimgsynth
10 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Alejandro A. García <aag@zorzal.net>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 6 | 
 7 | The above copyright notice and this permission notice (including the next paragraph) shall be included in all copies or substantial portions of the Software.
 8 | 
 9 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
10 | 


--------------------------------------------------------------------------------
/LICENSE.zlib:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2024 Alejandro A. García <aag@zorzal.net>
 2 |   
 3 | This software is provided 'as-is', without any express or implied
 4 | warranty.  In no event will the authors be held liable for any damages
 5 | arising from the use of this software.
 6 | 
 7 | Permission is granted to anyone to use this software for any purpose,
 8 | including commercial applications, and to alter it and redistribute it
 9 | freely, subject to the following restrictions:
10 |   
11 | 1. The origin of this software must not be misrepresented; you must not
12 |    claim that you wrote the original software. If you use this software
13 |    in a product, an acknowledgment in the product documentation would be
14 |    appreciated but is not required. 
15 | 2. Altered source versions must be plainly marked as such, and must not be
16 |    misrepresented as being the original software.
17 | 3. This notice may not be removed or altered from any source distribution.
18 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile
  2 | targets = test_rng tstore-util demo_mlimgsynth mlimgsynth \
  3 | 	test_text_tokenize_clip test_prompt_preproc
  4 | targets_dlib = libmlimgsynth
  5 | 
  6 | # Put your custom definitions in Makefile.local instead of changing this file
  7 | -include Makefile.local
  8 | 
  9 | include src/ccommon/base.mk
 10 | VPATH = src:src/ccommon:src/ccompute
 11 | cppflags += -Isrc -Iinclude
 12 | ldflags += -L.
 13 | 
 14 | ### Dependencies
 15 | # math
 16 | ldlibs += -lm
 17 | 
 18 | # ggml
 19 | ifndef GGML_INCLUDE_PATH
 20 | GGML_INCLUDE_PATH := ggml/include
 21 | endif
 22 | ifndef GGML_LIB_PATH
 23 | GGML_LIB_PATH := ggml/Release/src
 24 | endif
 25 | cppflags += -I$(GGML_INCLUDE_PATH)
 26 | ldflags += -L$(GGML_LIB_PATH)
 27 | # ggml headers give several warnings with C99
 28 | cflags += -Wno-pedantic
 29 | 
 30 | tstore-util: ldlibs += -lggml -lggml-base
 31 | libmlimgsynth: ldlibs += -lggml -lggml-base
 32 | ifndef MLIS_NO_RUNPATH
 33 | tstore-util: ldflags += -Wl,-rpath,$(GGML_LIB_PATH)
 34 | libmlimgsynth: ldflags += -Wl,-rpath,$(GGML_LIB_PATH)
 35 | endif
 36 | 
 37 | # ggml scheduler is need for incomplete backends (no longer needed for vulkan)
 38 | ifdef MLIS_GGML_SCHED
 39 | libmlimgsynth: cppflags += -DUSE_GGML_SCHED=1
 40 | endif
 41 | 
 42 | # Flash Attention (not working yet, crashes)
 43 | ifdef MLIS_FLASH_ATTENTION
 44 | libmlimgsynth: cppflags += -DUSE_FLASH_ATTENTION
 45 | endif
 46 | 
 47 | # png
 48 | ifndef MLIS_NO_PNG
 49 | mlimgsynth: ldlibs += -lpng
 50 | mlimgsynth: cppflags += -DUSE_LIB_PNG
 51 | mlimgsynth: image_io_png.o
 52 | endif
 53 | 
 54 | # jpeg
 55 | ifndef MLIS_NO_JPEG
 56 | mlimgsynth: ldlibs += -ljpeg
 57 | mlimgsynth: cppflags += -DUSE_LIB_JPEG
 58 | mlimgsynth: image_io_jpeg.o
 59 | endif
 60 | 
 61 | # libmlimgsynth
 62 | demo_mlimgsynth: ldlibs += -lmlimgsynth
 63 | mlimgsynth: ldlibs += -lmlimgsynth
 64 | test_text_tokenize_clip: ldlibs += -lmlimgsynth
 65 | ifndef MLIS_NO_RUNPATH
 66 | demo_mlimgsynth: ldflags += -Wl,-rpath,.
 67 | mlimgsynth: ldflags += -Wl,-rpath,.
 68 | test_text_tokenize_clip: ldflags += -Wl,-rpath,.
 69 | endif
 70 | 
 71 | # GCC 13.3.1 20240614 warns about dstr_appendz and dstr_insertz
 72 | # I think the code is ok, but I will check later
 73 | FLAGS=-Wno-array-bounds -Wno-stringop-overflow
 74 | 
 75 | ### Module dependencies
 76 | tensorstore.o: cppflags += -DTENSORSTORE_USE_GGML -DTENSORSTORE_FMT_GGUF \
 77 | 	-DTENSORSTORE_FMT_SAFET
 78 | 
 79 | objs_base = timing.o alloc.o alloc_gen.o stream.o logging.o
 80 | objs_tstore = alloc_arena.o stringstore.o fsutil.o \
 81 | 	any.o structio.o structio_json.o \
 82 | 	tensorstore.o tensorstore_safet.o tensorstore_gguf.o
 83 | 
 84 | ### Binary targets
 85 | test_rng: $(objs_base) rng_philox.o test_rng.o
 86 | 
 87 | tstore-util: $(objs_base) $(objs_tstore) main_tstore_util.o
 88 | 
 89 | libmlimgsynth: $(objs_base) $(objs_tstore) rng_philox.o localtensor.o \
 90 | 	unicode.o unicode_data.o \
 91 | 	ggml_extend.o mlblock.o mlblock_nn.o tae.o vae.o clip.o unet.o lora.o \
 92 | 	solvers.o sampling.o tensor_name_conv.o mlimgsynth.o
 93 | 
 94 | demo_mlimgsynth: demo_mlimgsynth.o
 95 | 
 96 | mlimgsynth: $(objs_base) image.o image_io.o image_io_pnm.o \
 97 | 	localtensor.o main_mlimgsynth.o
 98 | 
 99 | test_text_tokenize_clip: test_text_tokenize_clip.o
100 | 
101 | test_prompt_preproc: $(objs_base) test_prompt_preproc.o
102 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # MLImgSynth
 2 | 
 3 | Generate images using Stable Diffusion (SD) models. This program is completely written in C and uses the [GGML](https://github.com/ggerganov/ggml/) library as inference backend. It is largely based in [stable-diffusion.cpp](https://github.com/leejet/stable-diffusion.cpp), but with a focus in more concise and clear code. Also, I put some care in the memory usage: at each step only the required weights will be loaded in the backend memory (e.g. VRAM). Moreover, with the options `--unet-split` and `--vae-tile` it is possible to run SDXL models using only 4 GiB without quantization.
 4 | 
 5 | ## Supported models
 6 | 
 7 | - SD v1.x: [info](https://github.com/CompVis/stable-diffusion) [weights](https://huggingface.co/runwayml/stable-diffusion-v1-5) (`emaonly` is ok)
 8 | - SD v2.x: [info](https://github.com/Stability-AI/stablediffusion) [weights](https://huggingface.co/stabilityai/stable-diffusion-2-1)
 9 | - SDXL: [info](https://stability.ai/news/stable-diffusion-sdxl-1-announcement) [weights](https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0)
10 | 
11 | Besides the original weights, you may use any of the fine-tuned checkpoints that can be found on the internet. Destilled models (turbo, hyper, lightning) should work normally.
12 | 
13 | ## Usage on Windows
14 | 
15 | Download and unzip the latest [Release](https://github.com/aagdev/mlimgsynth/releases). Edit the file `generate.bat` as needed and execute it.
16 | 
17 | ## Build
18 | 
19 | First you must build ggml as library with the desired backends and then build this program linking to it. You may symlink the ggml directory to root of this project or define the `GGML_INCLUDE_PATH` and `GGML_LIB_PATH` variables. Finally, just call `make`. For example:
20 | 
21 | ```shell
22 | export GGML_INCLUDE_PATH=../ggml/include
23 | export GGML_LIB_PATH=../ggml/Release/src
24 | make
25 | ```
26 | 
27 | By default, the program is linked with `libpng` and `libjpeg` to support those formats. You may suppress these dependencies defining `MLIS_NO_PNG` and `MLIS_NO_JPEG`. The PNM image format is always available.
28 | 
29 | ## Usage
30 | 
31 | First, download the weights of the model you wish to use (safetensors and gguf formats supported). To generate an image (txt2img) use:
32 | 
33 | ```shell
34 | ./mlimgsynth generate -m MODEL_PATH --cfg-scale 7 --steps 20 --seed 42 -o output.png -p "a box on a table"
35 | ```
36 | 
37 | The option `-b` lets you select from the available GGML backends. By default the "best" is used, usually GPU. Run `./mlimgsynth list-backends` to see the list of backends and devices.
38 | 
39 | See the script `generate.sh` for a more complete example.
40 | 
41 | Execute without any arguments to see a list of all the supported options.
42 | 
43 | ### img2img and inpainting
44 | 
45 | To start from an initial image (img2img) add the options `-i IMAGE.png` and `--f-t-ini 0.7`. The second option controls the strength by changing the initial time in the denoising process, you may try any value between 0 (no changes) and 1. 
46 | 
47 | If the image has an alpha channel (transparency), it is used as a mask for inpainting. You can modify the alpha channel of an image using an editor like GIMP (remember to tick the option "Save color values from transparent pixels" when saving).
48 | 
49 | ### Lora's
50 | 
51 | Lora's can be loaded indivually with the option `--lora PATH,MULT` or with the option `--lora-dir PATH` and adding to the prompt `<lora:NAME:MULT>`. In the last case, it will look for the file `PATH/NAME.safetensors`.
52 | 
53 | ### Prompt emphasis (token weighting)
54 | 
55 | You can increase or decrease the emphasis of certain parts of the prompt to make the model pay more or less attention to it. This uses the same syntax as [stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui). Examples:
56 | 
57 | * `a (dog) jumping` increases the weight of "dog" by 1.1 .
58 | * `a ((dog)) jumping` increases twice, that is, by 1.21 .
59 | * `a [dog] jumping` decreases by 1.1 (weight ~ 0.91).
60 | * `a (dog:1.5) jumping` increases by 1.5 .
61 | 
62 | You can disable all prompt processing (including loras) using the option `--no-prompt-parse y` *before* the prompt.
63 | 
64 | ### TAE
65 | 
66 | To accelerate and reduce the memory usage during the image decoding, you may use the [TAE](https://github.com/madebyollin/taesd) (tiny autoencoder) in place of the VAE (variational autoencoder) of SD. Download the weights compatible with SD or SDXL, and pass the path to them with the option `--tae TAE.safetensors` to enable it. Be warned that this reduces the final images quality. If you are low on memory, it is preferable to use the `--vae-tile 512` option.
67 | 
68 | ## Library
69 | 
70 | All the important fuctionally is a library (libmlimgsynth) that you can use from your own programs. There are examples for C (`src/demo_mlimgsynth.c`) and for python (`python/mlimgsynth.py` and `python/guessing_game.py`).
71 | 
72 | ## Future plans
73 | 
74 | - API server and minimal web UI.
75 | - ControlNet.
76 | - Maybe SDE sampling. The biggest hurdle is understanding what it is doing the `torchsde.BrownianTree` used in `k-diffusion`.
77 | - Other models?
78 | 
79 | ## License
80 | Most of this program is licensed under the MIT (see the file `LICENSE`), with the exceptions of the files in the directory `src/ccommon` which use the ZLib license (see the file `LICENSE.zlib`). To prevent any confusion, each file indicates its license at the beginning using the SPDX identifier.
81 | 
82 | ## Contributing
83 | Contributions in the form of bug reports, suggestions, patches or pull requests are welcome.
84 | 


--------------------------------------------------------------------------------
/gencode_clip_merges.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright 2025, Alejandro A. García <aag@zorzal.net>
 3 | SPDX-License-Identifier: MIT
 4 | 
 5 | Converts CLIP vocabulary merges in a list of token number pairs.
 6 | ref: https://github.com/openai/CLIP : clip/simple_tokenizer.py
 7 | """
 8 | import gzip
 9 | 
10 | bpe_path = "bpe_simple_vocab_16e6.txt.gz"
11 | 
12 | # Code copied almost verbatim from CLIP repo
13 | def bytes_to_unicode():
14 | 	bs = list(range(ord("!"), ord("~")+1)) \
15 | 	   + list(range(ord("¡"), ord("¬")+1)) \
16 | 	   + list(range(ord("®"), ord("ÿ")+1))
17 | 	cs = bs[:]
18 | 	n = 0
19 | 	for b in range(2**8):
20 | 		if b not in bs:
21 | 			bs.append(b)
22 | 			cs.append(2**8+n)
23 | 			n += 1
24 | 	cs = [chr(n) for n in cs]
25 | 	return bs, cs
26 | 
27 | merges = gzip.open(bpe_path).read().decode("utf-8").split('\n')
28 | merges = merges[1:49152-256-2+1]
29 | merges = [tuple(merge.split()) for merge in merges]
30 | 
31 | vocab = list(bytes_to_unicode()[1])
32 | vocab = vocab + [v+'</w>' for v in vocab]
33 | for merge in merges:
34 | 	vocab.append(''.join(merge))
35 | vocab.extend(['<|startoftext|>', '<|endoftext|>'])
36 | 
37 | encoder = dict(zip(vocab, range(len(vocab))))
38 | decoder = {v: k for k, v in encoder.items()}
39 | #bpe_ranks = dict(zip(merges, range(len(merges))))
40 | 
41 | for left, right in merges:
42 | 	l = encoder[left]
43 | 	r = encoder[right]
44 | 	print("{%d, %d}," % (l, r))
45 | 


--------------------------------------------------------------------------------
/generate.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | title mlimgsynth
 3 | 
 4 | REM Generation options
 5 | set PROMPT=a photograph of an astronaut riding a horse
 6 | set NPROMPT=
 7 | set SEED=
 8 | REM Sizes: 512,512 512,768 768,512 1024,1024 1216,832 832,1216
 9 | set SIZE=
10 | 
11 | set OUTNAME=output
12 | set OUTEXT=png
13 | set NBATCH=2
14 | 
15 | set CFG_SCALE=7
16 | set STEPS=20
17 | 
18 | REM Sampling method: euler, euler_a, taylor3, dpm++2m, dpm++2s_a
19 | set METHOD=euler_a
20 | REM Scheduler: uniform, karras
21 | set SCHED=uniform
22 | set SAMPOPT=
23 | 
24 | REM Leave empty to use the best (GPU usually).
25 | REM set BACKEND=Vulkan0
26 | REM set BACKEND=CPU
27 | 
28 | REM Change to the path of the model weights
29 | REM Supported models: SD 1, 2 or XL
30 | REM Supported formats: safetensors
31 | set MODEL=../models/sd_v1.5-pruned-emaonly-fp16.safetensors
32 | REM set MODEL=../models/DreamShaper_8.safetensors
33 | REM set MODEL=../models/dreamshaperXL_v21TurboDPMSDE.safetensors
34 | 
35 | REM LoRA's
36 | set LORADIR=../models/loras_sd15
37 | REM set "PROMPT=%PROMPT%<lora:add_detail:0.75>"
38 | 
39 | set EXTRA=
40 | REM You may enable any of the following options removing the REM in front
41 | 
42 | REM Reduce memory usage
43 | REM set EXTRA=%EXTRA% --unet-split 1 --vae-tile 512
44 | 
45 | REM Use TAE instead of VAE to decode faster and with less memory
46 | REM set EXTRA=%EXTRA% --tae "../models/tae_sd.safetensors"
47 | 
48 | REM Perform img2img
49 | REM Inpaints if the image has an alpha channel
50 | REM set EXTRA=%EXTRA% -i "input_image.png" --f-t-ini 0.7
51 | 
52 | REM Debug output
53 | REM set EXTRA=%EXTRA% --debug
54 | 
55 | REM Batch generation
56 | set IDX=0
57 | :loop
58 | set /a IDX=IDX+1
59 | echo Generating %IDX% / %NBATCH%
60 | mlimgsynth generate -b "%BACKEND%" -m "%MODEL%" --lora-dir "%LORADIR%" -p "%PROMPT%" -n "%NPROMPT%" -o "%OUTNAME%-%IDX%.%OUTEXT%" -d "%SIZE%" --cfg-scale "%CFG_SCALE%" --steps "%STEPS%" --seed "%SEED%" --method "%METHOD%" --scheduler "%SCHED%" %SAMPOPT% %EXTRA%
61 | if errorlevel 1 goto error
62 | if not "%SEED%"=="" set /a SEED=SEED+1
63 | if not "%IDX%"=="%NBATCH%" goto loop
64 | goto done
65 | 
66 | :error
67 | echo ERROR %ERRORLEVEL%
68 | :done
69 | pause
70 | 


--------------------------------------------------------------------------------
/generate.sh:
--------------------------------------------------------------------------------
 1 | #/bin/sh
 2 | 
 3 | ### Generation options
 4 | PROMPT="a photograph of an astronaut riding a horse"
 5 | NPROMPT=
 6 | SEED=
 7 | # Sizes: 512,512 512,768 768,512 1024,1024 1216,832 832,1216
 8 | SIZE=
 9 | 
10 | OUTNAME="output"
11 | OUTEXT="png"
12 | NBATCH=1
13 | 
14 | CFG_SCALE=7
15 | STEPS=20
16 | 
17 | # Sampling method: euler, euler_a, taylor3, dpm++2m, dpm++2s_a
18 | METHOD=euler_a
19 | # Scheduler: uniform, karras
20 | SCHED=uniform
21 | SAMPOPT=
22 | 
23 | # Leave empty to use the best (GPU usually).
24 | #BACKEND=Vulkan0
25 | #BACKEND=CPU
26 | 
27 | # Change to the path of the model weights
28 | # Supported models: SD 1, 2 or XL
29 | # Supported formats: safetensors
30 | MODEL="../models/sd_v1.5-pruned-emaonly-fp16.safetensors"
31 | #MODEL="../models/DreamShaper_8.safetensors"
32 | #MODEL="../models/dreamshaperXL_v21TurboDPMSDE.safetensors"
33 | 
34 | # LoRA's
35 | LORADIR="../models/loras_sd15"
36 | #PROMPT="$PROMPT<lora:add_detail:0.75>"
37 | 
38 | EXTRA=
39 | # You may enable any of the following options removing the # in front
40 | 
41 | # Reduce memory usage
42 | #EXTRA="$EXTRA --unet-split 1 --vae-tile 512"
43 | 
44 | # Use TAE instead of VAE to decode faster and with less memory
45 | #EXTRA="$EXTRA --tae '../models/tae_sd.safetensors'"
46 | 
47 | # Perform img2img
48 | # Inpaints if the image has an alpha channel
49 | #EXTRA="$EXTRA -i 'input_image.png' --f-t-ini 0.7"
50 | 
51 | # Debug output
52 | #EXTRA="$EXTRA --debug"
53 | 
54 | # Batch generation
55 | idx=1
56 | while [ $idx -le $NBATCH ]; do
57 | 	echo "Generating $idx / $NBATCH"
58 | 	./mlimgsynth generate -b "$BACKEND" -m "$MODEL" --lora-dir "$LORADIR" -p "$PROMPT" -n "$NPROMPT" -o "$OUTNAME-$idx.$OUTEXT" -d "$SIZE" --cfg-scale "$CFG_SCALE" --steps "$STEPS" --seed "$SEED" --method "$METHOD" --scheduler "$SCHED" $SAMPOPT $EXTRA
59 | 	[ "$SEED" = "" ] || SEED=$(($SEED+1))
60 |     idx=$(($idx+1))
61 | done
62 | 


--------------------------------------------------------------------------------
/python/guessing_game.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2024, Alejandro A. García <aag@zorzal.net>
  3 | SPDX-License-Identifier: MIT
  4 | 
  5 | Example program using the MLImgSynth library.
  6 | Web-based game where you see an AI generated image and have to guess the prompt.
  7 | No external modules needed.
  8 | """
  9 | import random
 10 | import logging
 11 | import argparse
 12 | import webbrowser
 13 | from http.server import HTTPServer, BaseHTTPRequestHandler
 14 | from urllib.parse import urlparse, parse_qsl
 15 | 
 16 | from mlimgsynth import MLImgSynth
 17 | from minimal_png import png_write
 18 | 
 19 | ADJECTIVES = [
 20 | 	"red", "blue", "green", "yellow",
 21 | ]
 22 | 
 23 | NOUNS = [
 24 | 	"lion", "rabbit", "cow", "chicken",
 25 | 	"cup", "table", "lamp", "book", "car",
 26 | ]
 27 | 
 28 | PLACES = [
 29 | 	"in the mountains", "on a lake", "in a river", "on a beach", "in a forest",
 30 | 	"in a city street", "in a cavern"
 31 | ]
 32 | 
 33 | class GuessingGame:
 34 | 	def __init__(self, mlis):
 35 | 		self.mlis = mlis
 36 | 		self.prompt = None
 37 | 		self.img = None
 38 | 		self.feat = None
 39 | 		self.last_score = 0.0
 40 | 		self.last_guess = ""
 41 | 		
 42 | 		self.elements = [ADJECTIVES, NOUNS, PLACES]
 43 | 		self.prompt_prefix = None
 44 | 		self.nprompt = None
 45 | 	#end
 46 | 
 47 | 	def generate(self):
 48 | 		self.img = None
 49 | 		self.last_score = 0.0
 50 | 		self.last_guess = ""
 51 | 
 52 | 		self.prompt_elems = [random.choice(elist) for elist in self.elements]
 53 | 		self.prompt = " ".join(self.prompt_elems)
 54 | 		logging.debug("Prompt: " + self.prompt)
 55 | 		#embd, self.feat = self.mlis.clip_text_encode(self.prompt, features=True)
 56 | 
 57 | 		p = self.prompt
 58 | 		if self.prompt_prefix:
 59 | 			p = self.prompt_prefix + " " + p
 60 | 		self.mlis.option_set("prompt", p)
 61 | 		if self.nprompt:
 62 | 			self.mlis.option_set("nprompt", self.nprompt)
 63 | 
 64 | 		logging.info("Generating image...")
 65 | 		self.mlis.generate()
 66 | 		self.img = self.mlis.image_get()
 67 | 	#end
 68 | 
 69 | 	def guess_check(self, guess):
 70 | 		#embd, feat = self.mlis.clip_text_encode(guess, features=True)
 71 | 		#s = self.feat.similarity(feat)
 72 | 		elems = guess.split(maxsplit=2)
 73 | 		elems = [x.strip().lower() for x in elems]
 74 | 		score = sum(int(x == y) for x, y in zip(elems, self.prompt_elems))
 75 | 		score /= len(self.prompt_elems)
 76 | 		self.last_guess = guess
 77 | 		self.last_score = score
 78 | 		return score
 79 | 	#end
 80 | 
 81 | 	def image_png_write(self, f):
 82 | 		png_write(f, self.img.data, self.img.w, self.img.h, self.img.c)
 83 | 	#end
 84 | #end
 85 | 
 86 | PAGE = b"""
 87 | <html>
 88 | <head>
 89 | 	<title>Guessing Game</title>
 90 | 	<style>
 91 | html, body {
 92 | 	max-width: max-content;
 93 | 	margin: 0 auto;
 94 | }
 95 | 	</style>
 96 | </head>
 97 | <body>
 98 | 	<h1>Guessing Game</h1>
 99 | 	<form style="display: inline;">
100 | 		Try to guess the image prompt:<br/>
101 | 		<input type="text" name="guess" size=40 placeholder="red car on a beach" value="{{last_guess}}"/>
102 | 		<input type="submit" value="Guess">
103 | 	</form>
104 | 	Score: {{last_score}}
105 | 	<form style="display: inline;">
106 | 		<input type="hidden" name="new" value="1"/>
107 | 		<input type="submit" value="New Image">
108 | 	</form>
109 | 	<br/>
110 | 	<img src="/image.png" alt="Image to guess"/>
111 | </body>
112 | </html>
113 | """
114 | 
115 | class GuessingGameWebHandler(BaseHTTPRequestHandler):
116 | 	def page_main(self):
117 | 		self.send_response(200)
118 | 		self.send_header('Content-type', 'text/html')
119 | 		self.end_headers()
120 | 		last_score = format(self.server.game.last_score, ".2f").encode("ascii")
121 | 		last_guess = self.server.game.last_guess.encode("ascii")
122 | 		page = PAGE.replace(b"{{last_score}}", last_score) \
123 | 		           .replace(b"{{last_guess}}", last_guess)
124 | 		self.wfile.write(page)
125 | 
126 | 	def page_image(self):
127 | 		self.send_response(200)
128 | 		self.send_header('Content-type', 'image/png')
129 | 		self.end_headers()
130 | 		self.server.game.image_png_write(self.wfile)
131 | 
132 | 	def page_not_found(self):
133 | 		self.send_response(404)
134 | 		self.send_header('Content-type', 'text/plain')
135 | 		self.end_headers()
136 | 		self.wfile.write(b"404 Not Found\n")
137 | 
138 | 	def do_GET(self):
139 | 		url = urlparse(self.path)
140 | 		if url.path == "/":
141 | 			kv = parse_qsl(url.query)
142 | 			if kv:
143 | 				if kv[0][0] == "new":
144 | 					self.server.game.generate()
145 | 				elif kv[0][0] == "guess":
146 | 					self.server.game.guess_check(kv[0][1])
147 | 			self.page_main()
148 | 		elif url.path == "/image.png":
149 | 			self.page_image()
150 | 		else:
151 | 			self.page_not_found()
152 | #end
153 | 
154 | def main():
155 | 	parser = argparse.ArgumentParser()
156 | 	parser.add_argument("-H", "--host", default="127.0.0.1")
157 | 	parser.add_argument("-P", "--port", type=int, default=8000)
158 | 	parser.add_argument("-m", "--model", required=True,
159 | 		help="Image generation model file path.")
160 | 	parser.add_argument("-p", "--prompt-prefix")
161 | 	parser.add_argument("-n", "--negative-prompt")
162 | 	parser.add_argument("-g", "--genopt",
163 | 		help="List of image generation options like: steps=12:method=euler:...")
164 | 	parser.add_argument("--no-browser", action="store_true",
165 | 		help="Do not open the page in a browser.")
166 | 	parser.add_argument("-D", "--debug", action="store_true")
167 | 	args = parser.parse_args()
168 | 
169 | 	
170 | 	logging.basicConfig(
171 | 		level=logging.DEBUG if args.debug else logging.INFO,
172 | 		format="[GAME] %(levelname)s %(message)s" )
173 | 	
174 | 	mlis = MLImgSynth()
175 | 	mlis.option_set("log-level", "debug" if args.debug else "info")
176 | 	mlis.option_set("model", args.model)
177 | 
178 | 	if args.genopt:
179 | 		for kv in args.genopt.split(":"):
180 | 			k,_,v = kv.partition("=")
181 | 			mlis.option_set(k, v)
182 | 	
183 | 	game = GuessingGame(mlis)
184 | 	game.prompt_prefix = args.prompt_prefix
185 | 	game.nprompt = args.negative_prompt
186 | 	game.generate()
187 | 
188 | 	httpd = HTTPServer((args.host, args.port), GuessingGameWebHandler)
189 | 	httpd.game = game
190 | 	logging.info("Listening on %s:%s", args.host, args.port)
191 | 	if not args.no_browser and args.host == "127.0.0.1":
192 | 		httpd.server_activate()
193 | 		webbrowser.open("http://127.0.0.1:%d" % args.port)
194 | 	httpd.serve_forever()
195 | #end
196 | 
197 | if __name__ == '__main__':
198 | 	main()
199 | 


--------------------------------------------------------------------------------
/python/minimal_png.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright 2024, Alejandro A. García <aag@zorzal.net>
 3 | SPDX-License-Identifier: MIT
 4 | 
 5 | Minimal module to write PNG images without any external dependencies.
 6 | """
 7 | import zlib
 8 | import struct
 9 | 
10 | def chunk_write(f, type_str, data):
11 | 	ct = type_str.encode("ASCII")
12 | 	f.write( struct.pack(">I", len(data)) )
13 | 	f.write( ct )
14 | 	f.write( data )
15 | 	f.write( struct.pack(">I", zlib.crc32(data, zlib.crc32(ct))) )
16 | 
17 | def ihdr_make(w, h, ch):
18 | 	color_type = 6 if ch == 4 else 2 if ch == 3 else 4 if ch == 2 else 0
19 | 	out = struct.pack(">IIBBBBB", w, h, 8, color_type, 0, 0, 0)
20 | 	return out
21 | 
22 | def data_filter(data, s, h):
23 | 	fdata = bytes()
24 | 	for y in range(h):
25 | 		line = data[s*y:s*(y+1)]
26 | 		fdata += b"\0"  # No filter, raw data
27 | 		fdata += line
28 | 	return fdata
29 | 
30 | def png_write(f, data, w, h, ch=3, clvl=-1, stride=None, texts=[]):
31 | 	# Signature
32 | 	f.write(b"\x89PNG\r\n\x1a\n")
33 | 	# Header
34 | 	ihdr = ihdr_make(w, h, ch)
35 | 	chunk_write(f, "IHDR", ihdr)
36 | 	# Text chunks
37 | 	for name, text in texts:
38 | 		text_data = name.encode("utf8") + b"\0" + text.encode("utf8")
39 | 		chunk_write(f, "tEXt", text_data)
40 | 	# Image data
41 | 	fdata = data_filter(data, stride or w*ch, h)
42 | 	cdata = zlib.compress(fdata, clvl)
43 | 	chunk_write(f, "IDAT", cdata)
44 | 	# End
45 | 	chunk_write(f, "IEND", bytes())
46 | #end
47 | 
48 | # Minimal test
49 | if __name__ == "__main__":
50 | 	w = 40
51 | 	h = 20
52 | 	ch = 3
53 | 	data = bytes([x*6*(c==0)+y*12*(c==2)
54 | 		for y in range(h) for x in range(w) for c in range(ch)])
55 | 	with open("minimal_png_test.png", "wb") as f:
56 | 		png_write(f, data, w, h, ch, texts=[("source", "minimal_png.py")])
57 | 


--------------------------------------------------------------------------------
/src/ccommon/alloc.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #include "alloc.h"
 5 | 
 6 | #define size_align(S,A) \
 7 | 	(((S) + ((A) - 1)) & ~((A) - 1))
 8 | 
 9 | // libc wrapper
10 | #if __STDC_HOSTED__
11 | #include <stdlib.h>
12 | void* alloc_stdlib_alloc(Allocator* a, void* p, size_t sz, int flags)
13 | {
14 | 	if (a->flags & ALLOC_F_HAS_SIZE4) {
15 | 		if (p) p = (uint8_t*)p - ALLOC_SIZE_ALIGNMENT;
16 | 		if (sz) sz = size_align(sz + ALLOC_SIZE_ALIGNMENT, ALLOC_SIZE_ALIGNMENT);
17 | 	}
18 | 	//if (a->flags & ALLOC_F_ROUND2 && sz) {
19 | 	//	sz = size_round2(sz);
20 | 	//}
21 | 	if (p) {
22 | 		if (sz == 0) {
23 | 			free(p);
24 | 			p = NULL;
25 | 		} else {
26 | 			sz += sz >> ALLOC_RESIZE_MARGIN;
27 | 			p = realloc(p, sz);
28 | 			if (!p) alloc_fatal(a);
29 | 		}
30 | 	} else {
31 | 		if (flags & ALLOC_AF_ZERO) {
32 | 			p = calloc(1, sz);
33 | 		} else {
34 | 			p = malloc(sz);
35 | 		}
36 | 		if (!p) alloc_fatal(a);
37 | 	}
38 | 	if (a->flags & ALLOC_F_HAS_SIZE4 && p) {
39 | 		p = (uint8_t*)p + ALLOC_SIZE_ALIGNMENT;
40 | 		((uint32_t*)p)[-1] = sz - ALLOC_SIZE_ALIGNMENT;
41 | 	}
42 | 	return p;
43 | }
44 | #endif
45 | 
46 | // Global allocators
47 | #ifdef CC_ALLOC_GLOBAL_USE_STDLIB
48 | Allocator global_allocator =
49 | 			{ alloc_stdlib_alloc, NULL, NULL, NULL, 0 },
50 |           *g_allocator = &global_allocator;
51 | 
52 | Allocator global_allocator_dopt =
53 | 			{ alloc_stdlib_alloc, NULL, NULL, NULL, ALLOC_F_DOPTIMAL },
54 |           *g_allocator_dopt = &global_allocator_dopt;
55 | #endif
56 | 


--------------------------------------------------------------------------------
/src/ccommon/alloc.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  *
  4 |  * Common allocator interface.
  5 |  * Handles failure calling an special function instead of returning NULL,
  6 |  * so, there is no need to check for errors.
  7 |  */
  8 | #pragma once
  9 | #include <stddef.h>
 10 | #include <stdint.h>
 11 | #include <stdbool.h>
 12 | #include <assert.h>
 13 | #include <stdlib.h>
 14 | 
 15 | //TODO: modify vector.h to take advantage of size info ?
 16 | //TODO: compile option to use stdlib instead of the custom allocator
 17 | 
 18 | #ifndef ALLOC_SIZE_ALIGNMENT
 19 | #define ALLOC_SIZE_ALIGNMENT 16
 20 | #endif
 21 | 
 22 | #ifndef ALLOC_RESIZE_MARGIN
 23 | #define ALLOC_RESIZE_MARGIN 4  //+6%
 24 | #endif
 25 | 
 26 | /* Allocator interface */
 27 | 
 28 | typedef struct Allocator Allocator;
 29 | struct Allocator {
 30 | 	// Allocation, reallocation and freeing of memory.
 31 | 	void * (*alloc)(Allocator* a, void* ptr, size_t sz, int flags);
 32 | 	// Free all associated memory (if possible).
 33 | 	void (*ctx_free)(Allocator* a);
 34 | 	// Handles fatal errors (out of memory). Can be NULL or user supplied.
 35 | 	void (*fatal)(const Allocator* a);
 36 | 	// Allocator context
 37 | 	void * ctx;
 38 | 	// Options
 39 | 	int flags;
 40 | };
 41 | 
 42 | // Allocator flags
 43 | enum {
 44 | 	// Store size of each allocation in the previous 4 bytes
 45 | 	ALLOC_F_HAS_SIZE4	= 1,
 46 | 	ALLOC_F_HAS_SIZE	= ALLOC_F_HAS_SIZE4,
 47 | 	// Set of flags for efficient dynamic arrays
 48 | 	ALLOC_F_DOPTIMAL	= ALLOC_F_HAS_SIZE4,
 49 | };
 50 | 
 51 | // alloc() flags
 52 | enum {
 53 | 	ALLOC_AF_ZERO = 1,  //Zero memory (new allocation only)
 54 | };
 55 | 
 56 | // Checks if an allocator is ready to use
 57 | static inline bool allocator_good(const Allocator* a) {
 58 | 	return !!a->alloc;
 59 | }
 60 | 
 61 | // Free all the memory associated with the allocator (if it corresponds).
 62 | // May be a no-op.
 63 | static inline void allocator_free(Allocator* a) {
 64 | 	if (a->ctx_free) a->ctx_free(a);
 65 | }
 66 | 
 67 | // This called to handle fatal errors (out of memory)
 68 | static inline void alloc_fatal(const Allocator* a) {
 69 | 	if (a->fatal) a->fatal(a);
 70 | 	abort();
 71 | }
 72 | 
 73 | #define ALLOC_SIZE_MASK  0x0ffffffc
 74 | 
 75 | // Allocates a new block
 76 | #ifdef __GNUC__
 77 | __attribute((malloc, alloc_size(2)))
 78 | #endif
 79 | static inline
 80 | void * alloc_alloc(Allocator* a, size_t sz) {
 81 | 	void * p = a->alloc(a, NULL, sz, ALLOC_AF_ZERO);
 82 | 	if (!p && sz) alloc_fatal(a);
 83 | 	return p;
 84 | }
 85 | 
 86 | // Allocates a new blocks of C elements of type T
 87 | #define alloc_new(A, T, C) \
 88 | 	((T*)alloc_alloc((A), sizeof(T)*(C)))
 89 | 
 90 | /* Get the size of a block.
 91 |  * May be larger than the requested size. The additional space can be used normally.
 92 |  * Returns zero if not supported.
 93 |  */
 94 | static inline
 95 | size_t alloc_size(const Allocator* a, const void* p) {
 96 | 	if (!(a && a->flags & ALLOC_F_HAS_SIZE4)) return 0; 
 97 | 	return p ? ((uint32_t*)p)[-1] & ALLOC_SIZE_MASK : 0;
 98 | }
 99 | 
100 | /* Get the size of a block.
101 |  * Returns <def> if not known.
102 |  */
103 | static inline
104 | size_t alloc_size_opt(const Allocator* a, const void* p, size_t def) {
105 | 	if (!(a && a->flags & ALLOC_F_HAS_SIZE4)) return def;
106 | 	return alloc_size(a, p);
107 | }
108 | 
109 | // Changes the size of a block
110 | #ifdef __GNUC__
111 | __attribute((malloc, alloc_size(3)))
112 | #endif
113 | static inline
114 | void * alloc_realloc(Allocator* a, void* p, size_t sz) {
115 | 	if (a->flags & ALLOC_F_HAS_SIZE4 && sz <= alloc_size(a, p)) return p;
116 | 	p = a->alloc(a, p, sz, 0);
117 | 	if (!p && sz) alloc_fatal(a);
118 | 	return p;
119 | }
120 | 
121 | #define alloc_resize(A, P, T, C) \
122 | 	((T*)alloc_realloc((A), (P), sizeof(T)*(C)))
123 | 
124 | // Frees a block
125 | static inline
126 | void alloc_free(Allocator* a, void* p) {
127 | 	if (!p) return;
128 | 	a->alloc(a, p, 0, 0);
129 | }
130 | 
131 | /* Global allocators for modules that can not take it as a parameter.
132 |  * May be modified by the user.
133 |  */
134 | extern Allocator *g_allocator, *g_allocator_dopt;
135 | 
136 | /* Standard library wrapper
137 |  */
138 | #if __STDC_HOSTED__
139 | void* alloc_stdlib_alloc(Allocator* a, void* ptr, size_t sz, int flags);
140 | 
141 | /* Returns a wrapper allocator for stdlib.
142 |  */
143 | static inline
144 | Allocator allocator_stdlib() {
145 | 	return (Allocator){ alloc_stdlib_alloc, NULL, NULL, NULL, 0 };
146 | }
147 | 
148 | /* Returns a wrapper allocator for stdlib.
149 |  * Optimized for efficient dynamic arrays (frequent reallocations).
150 |  */
151 | static inline
152 | Allocator allocator_stdlib_dopt() {
153 | 	return (Allocator){ alloc_stdlib_alloc, NULL, NULL, NULL, ALLOC_F_DOPTIMAL };
154 | }
155 | #endif
156 | 
157 | /* Utility */
158 | 
159 | // Round-up to the nearest power of two (up to 32 bits)
160 | static inline
161 | size_t size_round2(size_t v)
162 | {
163 | 	v--;
164 | 	v |= v >> 1;
165 | 	v |= v >> 2;
166 | 	v |= v >> 4;
167 | 	v |= v >> 8;
168 | 	v |= v >> 16;
169 | 	v++;
170 | 	return v;
171 | }
172 | 


--------------------------------------------------------------------------------
/src/ccommon/alloc_arena.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #include "alloc_arena.h"
  5 | #include "ccommon.h"
  6 | #include <string.h>
  7 | 
  8 | #ifndef ALLOC_arena_PAGE_SIZE
  9 | #define ALLOC_arena_PAGE_SIZE  (4096-16)
 10 | #endif
 11 | 
 12 | #ifndef ALLOC_arena_ALLOCATOR
 13 | #define ALLOC_arena_ALLOCATOR  g_allocator
 14 | #endif
 15 | 
 16 | int alloc_arena_frombuffer(AllocatorArena * S, size_t sz, void* buf)
 17 | {
 18 | 	if (S->al) alloc_arena_free(S);
 19 | 	if (sz < sizeof(*S->page)) return -1;
 20 | 	S->al = NULL;
 21 | 	S->page = buf;
 22 | 	S->page->prev = NULL;
 23 | 	S->rem = S->page->size = sz - sizeof(*S->page);
 24 | 	return 1;
 25 | }
 26 | 
 27 | int alloc_arena_reserve(AllocatorArena* S, size_t size)
 28 | {
 29 | 	if (S->rem >= size) return 0;
 30 | 	if (!S->al) {
 31 | 		if (!S->page) S->al = ALLOC_arena_ALLOCATOR;
 32 | 		else return -1;
 33 | 	}
 34 | 
 35 | 	size += sizeof(struct AllocArenaPage);  //header size
 36 | 	MAXSET(size, ALLOC_arena_PAGE_SIZE);  //minimum page size
 37 | 	
 38 | 	// Allocate a new page, previous page remaining space is lost
 39 | 	struct AllocArenaPage * p = alloc_alloc(S->al, size);
 40 | 	size = alloc_size_opt(S->al, p, size);
 41 | 	p->prev = S->page;
 42 | 	S->page = p;
 43 | 	S->rem = p->size = size - sizeof(*p);
 44 | 	return 1;
 45 | }
 46 | 
 47 | void * alloc_arena_alloc(AllocatorArena* S, size_t sz)
 48 | {
 49 | 	if (sz > S->rem && alloc_arena_reserve(S, sz) < 0) return NULL;
 50 | 	void * p = S->page->data + S->page->size - S->rem;
 51 | 	S->rem -= sz;
 52 | 	return p;
 53 | }
 54 | 
 55 | void alloc_arena_free_last(AllocatorArena* S, void* p_)
 56 | {
 57 | 	if (!S->page) return;
 58 | 	uint8_t *ini = S->page->data,
 59 | 	        *end = S->page->data + S->page->size,
 60 | 			*p = p_;
 61 | 	if (ini <= p && p < end) {
 62 | 		S->rem = end - p;
 63 | 	}
 64 | }
 65 | 
 66 | void alloc_arena_free(AllocatorArena* S)
 67 | {
 68 | 	if (S->al) {  //dynamic storage
 69 | 		// Iterate over the pages and free them
 70 | 		struct AllocArenaPage *cur, *prev=S->page;
 71 | 		while ((cur = prev)) {
 72 | 			prev = cur->prev;
 73 | 			alloc_free(S->al, cur);
 74 | 		}
 75 | 		S->page = NULL;
 76 | 		S->rem = 0;
 77 | 	}
 78 | 	else if (S->page) {  //static storage
 79 | 		S->rem = S->page->size;
 80 | 	}
 81 | }
 82 | 
 83 | void * allocator_arena_alloc(Allocator* a, void* ptr, size_t sz, int flags)
 84 | {
 85 | 	AllocatorArena * S = a->ctx;
 86 | 	//TODO: implement size storage?
 87 | 	//TODO: alignment?
 88 | 	if (a->flags & ALLOC_F_HAS_SIZE4) { alloc_fatal(a); return NULL; }
 89 | 	if (ptr) {
 90 | 		if (sz == 0) {
 91 | 			// Free: no op
 92 | 			return NULL;
 93 | 		} else {
 94 | 			// The old size is not known
 95 | 			alloc_fatal(a);
 96 | 			return NULL;
 97 | 		}
 98 | 	}
 99 | 	void * p = alloc_arena_alloc(S, sz);
100 | 	if (flags & ALLOC_AF_ZERO)
101 | 		memset(p, 0, sz);
102 | 	//if (ptr && p && oldsz)
103 | 	//	memcpy(p, ptr, oldsz);
104 | 	return p;
105 | }
106 | 
107 | void allocator_arena_ctx_free(Allocator* a)
108 | {
109 | 	AllocatorArena * S = a->ctx;
110 | 	alloc_arena_free(S);
111 | }
112 | 


--------------------------------------------------------------------------------
/src/ccommon/alloc_arena.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * Simple and fast arena allocator that frees everything at once.
 5 |  * Optimized for small allocations that do not resize.
 6 |  * Can extend itself by allocating new arenas.
 7 |  *
 8 |  * Example:
 9 |  *   AllocatorArena al={0};
10 |  *   char * d = alloc_arena_alloc(&al, 16);
11 |  *   alloc_arena_free(&al);
12 |  */
13 | #pragma once
14 | #include "alloc.h"
15 | 
16 | typedef struct AllocatorArena {	
17 | 	size_t rem;
18 | 	Allocator * al;  //Set this if a non-default allocator is desired
19 | 	struct AllocArenaPage {
20 | 		struct AllocArenaPage *prev;
21 | 		size_t size;
22 | 		uint8_t data[];
23 | 	} *page;
24 | } AllocatorArena;
25 | 
26 | // Return an allocator using only the space provided.
27 | int alloc_arena_frombuffer(AllocatorArena*, size_t sz, void* buf);
28 | 
29 | #define alloc_arena_fromarray(S, A) \
30 | 	alloc_arena_frombuffer((S), sizeof(A), (A))
31 | 
32 | // Reserve space at least <sz> bytes
33 | int alloc_arena_reserve(AllocatorArena* S, size_t sz);
34 | 
35 | // Allocate memory from it
36 | void* alloc_arena_alloc(AllocatorArena* S, size_t sz);
37 | 
38 | // Allocates a new blocks of C elements of type T
39 | #define alloc_arena_new(A, T, C) \
40 | 	((T*)alloc_arena_alloc((A), sizeof(T)*(C)))
41 | 
42 | // Free all memory used by the arena, including internal
43 | void alloc_arena_free(AllocatorArena* S);
44 | 
45 | // Return the last allocation
46 | // If p is not at the end of the arena, it does nothing.
47 | void alloc_arena_free_last(AllocatorArena* S, void* p);
48 | 
49 | //TODO: change prefix to arena_ only ?
50 | //TODO: free last
51 | //TODO: free up to ~ rollback
52 | //TODO: rollback: get mark and free up to it only
53 | 
54 | void * allocator_arena_alloc(Allocator* a, void* ptr, size_t sz, int flags);
55 | 
56 | void allocator_arena_ctx_free(Allocator* a);
57 | 
58 | // Returns a generic allocator interface
59 | static inline
60 | Allocator allocator_arena(AllocatorArena* S) {
61 | 	return (Allocator){
62 | 		.alloc = allocator_arena_alloc,
63 | 		.ctx_free = allocator_arena_ctx_free,
64 | 		.ctx = S };
65 | }
66 | 


--------------------------------------------------------------------------------
/src/ccommon/alloc_gen.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * General purpose allocator.
 5 |  * Loosely-based in dlmalloc.
 6 |  */
 7 | #pragma once
 8 | #include "alloc.h"
 9 | 
10 | void* alloc_gen_alloc(Allocator* a, void* ptr, size_t sz, int flags);
11 | 
12 | void alloc_gen_ctx_free(Allocator* a);
13 | 
14 | /* Returns a new allocator.
15 |  */
16 | static inline
17 | Allocator allocator_gen() {
18 | 	return (Allocator){ &alloc_gen_alloc, &alloc_gen_ctx_free,
19 | 		NULL, NULL, ALLOC_F_HAS_SIZE4 };
20 | }
21 | 
22 | // Reduces the memory used to a minimum
23 | void allocator_gen_trim(Allocator* a);
24 | 
25 | // Free all the allocated memory, but not the allocator itself
26 | //void allocator_gen_free_all(Allocator* a);
27 | 
28 | // Return nonzero if the allocator has no allocations besides the space
29 | // used internally. Useful to detect memory leaks.
30 | int allocator_gen_empty_is(const Allocator* a);
31 | 
32 | // Return various summary statistics
33 | // The values are calculated on the spot, so it could be slow.
34 | typedef struct AllocGenInfo {
35 | 	size_t		mtot,		// Total memory allocated from the system
36 | 				mfree,		// Free memory
37 | 				mfchunk;
38 | 	unsigned	nseg,		// Number of segments
39 | 				nchunk,		// Number of chunks
40 | 				nchunkf,	// Number of free chunks
41 | 				nfchunk;
42 | } AllocGenInfo;
43 | AllocGenInfo allocator_gen_info(const Allocator* a);
44 | 


--------------------------------------------------------------------------------
/src/ccommon/base.mk:
--------------------------------------------------------------------------------
  1 | # Makefile
  2 | # Copyright 2024, Alejandro A. García <aag@zorzal.net>
  3 | # SPDX-License-Identifier: Zlib
  4 | 
  5 | # Common make definitions and rules for use in multiple projects.
  6 | # 
  7 | # Use VPATH to configure the source directories
  8 | # e.g. VPATH = src:src/ccommon
  9 | #
 10 | # Append to cppflags to add include directories
 11 | # e.g. cppflags += -Isrc/ccommon
 12 | 
 13 | objdir = obj
 14 | depdir = .d
 15 | 
 16 | flags    = $(FLAGS)
 17 | cppflags = $(CPPFLAGS)
 18 | cflags   = -std=c99 -Wall -pedantic $(CFLAGS)
 19 | cxxflags = $(CXXFLAGS)
 20 | ldlibs   = $(LDLIBS)
 21 | ldflags  = $(LDFLAGS)
 22 | 
 23 | depflags = -MT $@ -MMD -MP -MF $(depdir)/$*.d
 24 | 
 25 | # Options for dynamic libraries
 26 | flags += -fPIC -fvisibility=hidden
 27 | 
 28 | ### Compilation options
 29 | ifndef nonative
 30 | cppflags += -march=native
 31 | endif
 32 | 
 33 | ifdef debug
 34 | objdir = obj_dbg
 35 | depdir = .d_dbg
 36 | #cppflags += -g -DDEBUG
 37 | cppflags += -ggdb -g3 -DDEBUG
 38 | else ifdef debugo
 39 | objdir = obj_dbg
 40 | depdir = .d_dbg
 41 | cppflags += -ggdb -g3 -DDEBUG
 42 | flags += -Og
 43 | else ifdef small
 44 | cppflags += -DNDEBUG
 45 | ldflags += -Wl,--strip-all
 46 | flags += -Os
 47 | else ifdef fast
 48 | cppflags += -DNDEBUG
 49 | ldflags += -Wl,--strip-all
 50 | flags += -O3
 51 | flags += -flto -fwhole-program -fuse-linker-plugin
 52 | else
 53 | cppflags += -DNDEBUG
 54 | ldflags += -Wl,--strip-all
 55 | flags += -O2
 56 | endif
 57 | 
 58 | ifdef profile
 59 | flags += -pg
 60 | $(info gprof CMD gmon.out | less)
 61 | endif
 62 | 
 63 | ###
 64 | .PHONY: all clean
 65 | 
 66 | # Disable implicit rules
 67 | .SUFFIXES:
 68 | #.SUFFIXES: .c .o
 69 | 
 70 | # Do not remove intermediate files
 71 | .SECONDARY:
 72 | 
 73 | ### OS specifics
 74 | ifeq ($(OS),Windows_NT)
 75 | EXEC_EXT=.exe
 76 | DLIB_EXT=.dll
 77 | RUN_PRE=
 78 | targets_bin = $(addsuffix $(EXEC_EXT),$(targets)) $(addsuffix $(DLIB_EXT),$(targets_dlib))
 79 | targets_bin2 = $(targets)
 80 | else
 81 | EXEC_EXT=
 82 | DLIB_EXT=.so
 83 | RUN_PRE=./
 84 | targets_bin = $(targets) $(addsuffix $(DLIB_EXT),$(targets_dlib))
 85 | targets_bin2 = 
 86 | endif
 87 | 
 88 | ### Commands
 89 | COMPILE_C   = $(CC)  $(depflags) $(flags) $(cppflags) $(cflags)   -c -o $@ $<
 90 | COMPILE_CXX = $(CXX) $(depflags) $(flags) $(cppflags) $(cxxflags) -c -o $@ $<
 91 | LINK_EXEC = $(CC) $(flags) $(ldflags) -o $@$(EXEC_EXT) \
 92 | 	$(addprefix $(objdir)/,$(filter %.o,$^)) $(ldlibs)
 93 | LINK_DLIB = $(CC) $(flags) $(ldflags) -shared -o $@$(DLIB_EXT) \
 94 | 	$(addprefix $(objdir)/,$(filter %.o,$^)) $(ldlibs)
 95 | 
 96 | ### Some commonly used dependencies
 97 | #$(info OS=$(OS))
 98 | ifeq ($(OS),Windows_NT)
 99 | socket_libs = -lws2_32
100 | sdl_libs = -lmingw32 -lSDL2main -lSDL2
101 | else
102 | #socket_libs =
103 | sdl_libs = -lSDL2main -lSDL2
104 | endif
105 | sdl_objs += image_sdl.o
106 | 
107 | ### Rules
108 | all: $(targets_dlib) $(targets)
109 | 
110 | $(targets): | $(objdir) $(depdir)
111 | ifdef verbose
112 | 	$(LINK_EXEC)
113 | else
114 | 	@echo "LINK $@"
115 | 	@$(LINK_EXEC)
116 | endif
117 | ifdef run
118 | 	$(RUN_PRE)$@
119 | endif
120 | ifdef gdb
121 | 	gdb $@
122 | endif
123 | 
124 | $(targets_dlib): | $(objdir) $(depdir)
125 | ifdef verbose
126 | 	$(LINK_DLIB)
127 | else
128 | 	@echo "LINK $@"
129 | 	@$(LINK_DLIB)
130 | endif
131 | 
132 | $(objdir):
133 | 	mkdir -p $(objdir)
134 | 
135 | %.o: $(objdir)/%.o ;
136 | 
137 | $(objdir)/%.o: %.c
138 | ifdef verbose
139 | 	$(COMPILE_C)
140 | else
141 | 	@echo "CC $@"
142 | 	@$(COMPILE_C)
143 | endif
144 | 
145 | $(objdir)/%.o: %.cpp
146 | ifdef verbose
147 | 	$(COMPILE_CXX)
148 | else
149 | 	@echo "CXX $@"
150 | 	@$(COMPILE_CXX)
151 | endif
152 | 
153 | ###
154 | $(depdir):
155 | 	mkdir -p $(depdir)
156 | 
157 | $(depdir)/%.d: ;
158 | 
159 | .PRECIOUS: $(depdir)/%.d
160 | 
161 | include $(wildcard $(depdir)/*.d)
162 | 
163 | ### Clean-up rules
164 | cleanbin:
165 | 	rm -f $(targets_bin) $(targets_bin2)
166 | 
167 | clean: cleanbin
168 | 	rm -f $(objdir)/* $(depdir)/*
169 | 	-rm -f gmon.out *.gcov
170 | 
171 | distclean: cleanbin
172 | 	rm -fr obj obj_dbg .d .d_dbg
173 | 	-rm -f gmon.out *.gcov
174 | 
175 | ### Some shorthands
176 | run_%: %
177 | 	@echo ""
178 | 	$(RUN_PRE)$<
179 | 
180 | test: $(addprefix run_,$(tests))
181 | 


--------------------------------------------------------------------------------
/src/ccommon/bisect.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | #include <stddef.h>
 6 | #include <stdbool.h>
 7 | 
 8 | /* Bisection for binary search and sorting
 9 |  * Example with integer:
10 | 	bool found;
11 | 	size_t idx;
12 | 	BISECT_RIGHT(found, idx, 0, vec_count(index), index[i_] - key) );
13 |  * Example with string key:
14 | 	BISECT_RIGHT(found, idx, 0, vec_count(index), strcmp(index[i_], key) );
15 |  */
16 | #define BISECT_RIGHT(FOUND, IDX, INI, LEN, CMPE) \
17 | 	BISECT_RIGHT_S(FOUND, IDX, INI, LEN, r_ = (CMPE); )
18 | 
19 | #define BISECT_RIGHT_DECL(FOUND, IDX, INI, LEN, CMPE) \
20 | 	bool FOUND=0; FOUND=FOUND; \
21 | 	size_t IDX=0; IDX=IDX; \
22 | 	BISECT_RIGHT_S(FOUND, IDX, INI, LEN, r_ = (CMPE); );
23 | 
24 | /* Alternative version where CMPM can be function-like macro.
25 |  */
26 | #define BISECT_RIGHT_M(FOUND, IDX, INI, LEN, CMPM) \
27 | 	BISECT_RIGHT_S(FOUND, IDX, INI, LEN, CMPM(r_,i_); )
28 | 
29 | /* Alternative version where CMPS is an statement setting i_.
30 |  */
31 | #define BISECT_RIGHT_S(FOUND, IDX, INI, LEN, CMPS) do { \
32 | 	size_t i_, b_=(INI), e_=(LEN); \
33 | 	int r_=-1; \
34 | 	while (b_ < e_) { \
35 | 		i_ = (b_+e_)/2; \
36 | 		CMPS \
37 | 		if (r_ < 0)			b_ = i_+1; \
38 | 		else if (r_ > 0)	e_ = i_; \
39 | 		else { b_=i_; break; } \
40 | 	} \
41 | 	(FOUND) = (r_ == 0); \
42 | 	(IDX) = b_; \
43 | } while(0)
44 | 


--------------------------------------------------------------------------------
/src/ccommon/byteswap.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  *
  4 |  * Byte order (endianness) convertion.
  5 |  */
  6 | #pragma once
  7 | #include <assert.h>
  8 | 
  9 | /*
 10 | */
 11 | static inline bool little_endian_is() { int n=1; return *(char*)&n; }
 12 | 
 13 | static inline bool big_endian_is() { int n=1; return !*(char*)&n; }
 14 | 
 15 | /*
 16 | */
 17 | static inline
 18 | void byteswap16(void* p) {
 19 | 	unsigned char tmp, *b=p;
 20 | 	tmp = b[0]; b[0] = b[1]; b[1] = tmp;
 21 | }
 22 | static inline
 23 | void byteswap16le(void* p) { if (!little_endian_is()) byteswap16(p); }
 24 | static inline
 25 | void byteswap16be(void* p) { if (!big_endian_is()) byteswap16(p); }
 26 | 
 27 | static inline
 28 | void byteswap32(void* p) {
 29 | 	unsigned char tmp, *b=p;
 30 | 	tmp = b[0]; b[0] = b[3]; b[3] = tmp;
 31 | 	tmp = b[1]; b[1] = b[2]; b[2] = tmp;
 32 | }
 33 | static inline
 34 | void byteswap32le(void* p) { if (!little_endian_is()) byteswap32(p); }
 35 | static inline
 36 | void byteswap32be(void* p) { if (!big_endian_is()) byteswap32(p); }
 37 | 
 38 | static inline
 39 | void byteswap64(void* p) {
 40 | 	unsigned char tmp, *b=p;
 41 | 	tmp = b[0]; b[0] = b[7]; b[7] = tmp;
 42 | 	tmp = b[1]; b[1] = b[6]; b[6] = tmp;
 43 | 	tmp = b[2]; b[2] = b[5]; b[5] = tmp;
 44 | 	tmp = b[3]; b[3] = b[4]; b[4] = tmp;
 45 | }
 46 | static inline
 47 | void byteswap64le(void* p) { if (!little_endian_is()) byteswap64(p); }
 48 | static inline
 49 | void byteswap64be(void* p) { if (!big_endian_is()) byteswap64(p); }
 50 | 
 51 | static inline
 52 | void byteswap(unsigned n, void* p) {
 53 | 	switch (n) {
 54 | 	case 2: byteswap16(p); break;
 55 | 	case 4: byteswap32(p); break;
 56 | 	case 8: byteswap64(p); break;
 57 | 	}
 58 | }
 59 | static inline
 60 | void byteswaple(unsigned n, void* p) {
 61 | 	if (!little_endian_is()) byteswap(n, p);
 62 | }
 63 | static inline
 64 | void byteswapbe(unsigned n, void* p) {
 65 | 	if (!big_endian_is()) byteswap(n, p);
 66 | }
 67 | 
 68 | /*
 69 | */
 70 | static inline
 71 | void byteswap_copy16(void*restrict dst, const void*restrict src) {
 72 | 	unsigned char *d=dst;
 73 | 	unsigned char const *s=src;
 74 | 	d[0] = s[0];
 75 | 	d[1] = s[1];
 76 | }
 77 | static inline
 78 | void byteswap_swap16(void*restrict dst, const void*restrict src) {
 79 | 	unsigned char *d=dst;
 80 | 	unsigned char const *s=src;
 81 | 	d[0] = s[1];
 82 | 	d[1] = s[0];
 83 | }
 84 | static inline
 85 | void byteswap_copy16le(void*restrict dst, const void*restrict src) {
 86 | 	if (!little_endian_is()) byteswap_swap16(dst, src);
 87 | 	else byteswap_copy16(dst, src);
 88 | }
 89 | static inline
 90 | void byteswap_copy16be(void*restrict dst, const void*restrict src) {
 91 | 	if (!big_endian_is()) byteswap_swap16(dst, src);
 92 | 	else byteswap_copy16(dst, src);
 93 | }
 94 | 
 95 | static inline
 96 | void byteswap_copy32(void*restrict dst, const void*restrict src) {
 97 | 	unsigned char *d=dst;
 98 | 	unsigned char const *s=src;
 99 | 	d[0] = s[0];
100 | 	d[1] = s[1];
101 | 	d[2] = s[2];
102 | 	d[3] = s[3];
103 | }
104 | static inline
105 | void byteswap_swap32(void*restrict dst, const void*restrict src) {
106 | 	unsigned char *d=dst;
107 | 	unsigned char const *s=src;
108 | 	d[0] = s[3];
109 | 	d[1] = s[2];
110 | 	d[2] = s[1];
111 | 	d[3] = s[0];
112 | }
113 | static inline
114 | void byteswap_copy32le(void*restrict dst, const void*restrict src) {
115 | 	if (!little_endian_is()) byteswap_swap32(dst, src);
116 | 	else byteswap_copy32(dst, src);
117 | }
118 | static inline
119 | void byteswap_copy32be(void*restrict dst, const void*restrict src) {
120 | 	if (!big_endian_is()) byteswap_swap32(dst, src);
121 | 	else byteswap_copy32(dst, src);
122 | }
123 | 
124 | static inline
125 | void byteswap_copy64(void*restrict dst, const void*restrict src) {
126 | 	unsigned char *d=dst;
127 | 	unsigned char const *s=src;
128 | 	d[0] = s[0];
129 | 	d[1] = s[1];
130 | 	d[2] = s[2];
131 | 	d[3] = s[3];
132 | 	d[4] = s[4];
133 | 	d[5] = s[5];
134 | 	d[6] = s[6];
135 | 	d[7] = s[7];
136 | }
137 | static inline
138 | void byteswap_swap64(void*restrict dst, const void*restrict src) {
139 | 	unsigned char *d=dst;
140 | 	unsigned char const *s=src;
141 | 	d[0] = s[7];
142 | 	d[1] = s[6];
143 | 	d[2] = s[5];
144 | 	d[3] = s[4];
145 | 	d[4] = s[3];
146 | 	d[5] = s[2];
147 | 	d[6] = s[1];
148 | 	d[7] = s[0];
149 | }
150 | static inline
151 | void byteswap_copy64le(void*restrict dst, const void*restrict src) {
152 | 	if (!little_endian_is()) byteswap_swap64(dst, src);
153 | 	else byteswap_copy64(dst, src);
154 | }
155 | static inline
156 | void byteswap_copy64be(void*restrict dst, const void*restrict src) {
157 | 	if (!big_endian_is()) byteswap_swap64(dst, src);
158 | 	else byteswap_copy64(dst, src);
159 | }
160 | 
161 | static inline
162 | void byteswap_copy(unsigned n, void*restrict dst, const void*restrict src) {
163 | 	switch (n) {
164 | 	case 1: *(unsigned char*)dst = *(unsigned char*)src; break;
165 | 	case 2: byteswap_copy16(dst, src); break;
166 | 	case 4: byteswap_copy32(dst, src); break;
167 | 	case 8: byteswap_copy64(dst, src); break;
168 | 	default: assert(false);
169 | 	}
170 | }
171 | static inline
172 | void byteswap_swap(unsigned n, void*restrict dst, const void*restrict src) {
173 | 	switch (n) {
174 | 	case 1: *(unsigned char*)dst = *(unsigned char*)src; break;
175 | 	case 2: byteswap_swap16(dst, src); break;
176 | 	case 4: byteswap_swap32(dst, src); break;
177 | 	case 8: byteswap_swap64(dst, src); break;
178 | 	default: assert(false);
179 | 	}
180 | }
181 | static inline
182 | void byteswap_copyle(unsigned n, void*restrict dst, const void*restrict src) {
183 | 	if (!little_endian_is()) byteswap_swap(n, dst, src);
184 | 	else byteswap_copy(n, dst, src);
185 | }
186 | static inline
187 | void byteswap_copybe(unsigned n, void*restrict dst, const void*restrict src) {
188 | 	if (!big_endian_is()) byteswap_swap(n, dst, src);
189 | 	else byteswap_copy(n, dst, src);
190 | }
191 | 


--------------------------------------------------------------------------------
/src/ccommon/ccommon.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  *
  4 |  * Common C code header
  5 |  */
  6 | #pragma once
  7 | 
  8 | /* General use macros
  9 |  */
 10 | #define COUNTOF(X)  (sizeof(X)/sizeof(*(X)))
 11 | 
 12 | // Stringify
 13 | #define STR(X) STR_(X)
 14 | #define STR_(X) #X
 15 | 
 16 | // Concatenate and evaluate
 17 | #define CAT2(_1,_2)			_1 ## _2
 18 | #define CAT3(_1,_2,_3)		_1 ## _2 ## _3
 19 | #define CAT4(_1,_2,_3,_4)	_1 ## _2 ## _3 ## _4
 20 | // Evaluate and concatenate
 21 | #define PASTE2(_1,_2)		CAT2(_1,_2)
 22 | #define PASTE3(_1,_2,_3)	CAT3(_1,_2,_3)
 23 | #define PASTE4(_1,_2,_3,_4)	CAT4(_1,_2,_3,_4)
 24 | 
 25 | #define MINSET(VAR,VAL)  ((VAR) > (VAL) ? ((VAR) = (VAL)) : (VAR))
 26 | #define MAXSET(VAR,VAL)  ((VAR) < (VAL) ? ((VAR) = (VAL)) : (VAR))
 27 | 
 28 | #define IFFALSE(VAR,DEF)	((VAR) ? (VAR) : (DEF))
 29 | #define IFNPOS(VAR,DEF)		((VAR) > 0 ? (VAR) : (DEF))
 30 | 
 31 | #define IFFALSESET(VAR,DEF)		((VAR) ? (VAR) : ((VAR) = (DEF)))
 32 | #define IFNPOSSET(VAR,DEF)		((VAR) > 0 ? (VAR) : ((VAR) = (DEF)))
 33 | 
 34 | #define ccSIGN(X)			((X) < 0 ? -1 : (X) > 0 ? 1 : 0)
 35 | #define ccABS(X)			((X)<0 ? -(X) : (X))
 36 | #define ccMIN(X,Y)			((X)<(Y) ? (X) : (Y))
 37 | #define ccMAX(X,Y)			((X)>(Y) ? (X) : (Y))
 38 | #define ccMIN3(A,B,C)		ccMIN(ccMIN(A,B),C)
 39 | #define ccMAX3(A,B,C)		ccMAX(ccMAX(A,B),C)
 40 | #define ccCLAMPED(V,L,H)	((V)<(L) ? (L) : (V)>(H) ? (H) : (V))
 41 | #define ccCLAMP(V,L,H)		((V)<(L) ? ((V)=(L)) : (V)>(H) ? ((V)=(H)) : (V))
 42 | 
 43 | #define ccSWAPV(V,A,B)		((V)=(A), (A)=(B), (B)=(V))
 44 | #define ccSWAPT(T,A,B)		do { T tmp_=(A); (A)=(B); (B)=tmp_; } while(0)
 45 | 
 46 | #define ccSWAP(A,B)  do { \
 47 | 	char tmp_[sizeof(A)]; \
 48 | 	void *a=&(A), *b=&(B); \
 49 | 	memcpy(tmp_, a, sizeof(A)); \
 50 | 	memcpy(a, b, sizeof(A)); \
 51 | 	memcpy(b, tmp_, sizeof(B)); \
 52 | } while(0)
 53 | 
 54 | #define ccFLAG_SET(VAR, FLAG, CTRL) \
 55 | 	((VAR) = (CTRL) ? (VAR) | (FLAG) : (VAR) & ~(FLAG))
 56 | 	
 57 | #define MEM_ZERO(D)				memset(&(D), 0, sizeof(D))
 58 | #define MEM_CMP(D,S)			memcmp(&(D), &(S), sizeof(D))
 59 | #define MEM_COPY(D, S)			memcpy(&(D), &(S), sizeof(D))
 60 | #define ARRAY_ZERO(D, C)		memset((D), 0, sizeof(*(D))*(C))
 61 | #define ARRAY_CMP(D, S, C)		memcmp((D), (S), sizeof(*(D))*(C))
 62 | #define ARRAY_COPY(D, S, C)		memcpy((D), (S), sizeof(*(D))*(C))
 63 | #define ARRAY_MOVE(D, S, C)		memmove((D), (S), sizeof(*(D))*(C))
 64 | 
 65 | #define ccUNUSED(x) (void)(x)
 66 | 	
 67 | #ifndef M_PI
 68 | #define M_PI  3.14159265358979323846
 69 | #endif
 70 | 
 71 | /* Error handling
 72 |  *
 73 |  * Example:
 74 |  *   result_t f(...) {
 75 |  *     result_t R=1;
 76 |  *     if (...) RETURN(code);
 77 |  *     TRY( f2(...) );
 78 |  *   end:
 79 |  *     //clean-up
 80 |  *     return R;
 81 |  *   }
 82 |  */
 83 | typedef int result_t;
 84 | 
 85 | /* Return going through the end label */
 86 | #define RETURN(CODE) do { \
 87 | 	R = (CODE); \
 88 | 	goto end; \
 89 | } while (0)
 90 | 
 91 | #define ERROR_LOG(CODE, ...) do { \
 92 | 	log_error(__VA_ARGS__); \
 93 | 	RETURN(CODE); \
 94 | } while (0)
 95 | 
 96 | // needs stdlib.h
 97 | #define FATAL_LOG(...) do { \
 98 | 	log_error(__VA_ARGS__); \
 99 | 	exit(1); \
100 | } while (0)
101 | 
102 | /* Propagate errors */
103 | #define TRY(EXPR) do { \
104 | 	result_t _R_ = (EXPR); \
105 | 	if (_R_ < 0) RETURN(_R_); \
106 | } while (0)
107 | 
108 | #define TRYR(EXPR) do { \
109 | 	result_t _R_ = (EXPR); \
110 | 	if (_R_ < 0) return _R_; \
111 | } while (0)
112 | 
113 | #define TRYB(CODE, EXPR) do { \
114 | 	if (!(EXPR)) RETURN(CODE); \
115 | } while (0)
116 | 
117 | #define TRYRB(CODE, EXPR) do { \
118 | 	if (!(EXPR)) return (CODE); \
119 | } while (0)
120 | 
121 | #define TRY_LOG(EXPR, ...) do { \
122 | 	result_t _R_ = (EXPR); \
123 | 	if (_R_ < 0) ERROR_LOG(_R_, __VA_ARGS__); \
124 | } while (0)
125 | 
126 | #define TRY_ASSERT(EXPR) do { \
127 | 	result_t _R_ = (EXPR); \
128 | 	if (_R_ < 0) ERROR_LOG(_R_, "Error 0x%x in %s:%d:\n%s", \
129 | 		-_R_, __FILE__, __LINE__, #EXPR); \
130 | } while (0)
131 | 
132 | #define TRYB_ASSERT(CODE, EXPR) do { \
133 | 	if (!(EXPR)) ERROR_LOG((CODE), "Assertion Error %s:%d:\n%s", \
134 | 		__FILE__, __LINE__, #EXPR); \
135 | } while (0)
136 | 


--------------------------------------------------------------------------------
/src/ccommon/fsutil.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #include "fsutil.h"
  5 | #include <stdlib.h>
  6 | 
  7 | static inline
  8 | size_t str_copy(size_t maxlen, char* dst, const char* src) {
  9 | 	if (!maxlen || !dst || !src) return 0;
 10 | 	char * dst0 = dst;
 11 | 	maxlen--; //null terminator
 12 | 	while (maxlen-- > 1 && *src) *dst++ = *src++;
 13 | 	*dst = 0;
 14 | 	return dst - dst0;
 15 | }
 16 | 
 17 | // -----------------------------------------------------------------------------
 18 | #if defined(__unix__)
 19 | #include <sys/stat.h>
 20 | #include <unistd.h>
 21 | #include <errno.h>
 22 | 
 23 | int file_exists(const char* path)
 24 | {
 25 |     return (access(path, F_OK) != -1);
 26 | }
 27 | 
 28 | int directory_make(const char* path)
 29 | {
 30 | 	if (mkdir(path, 0777) < 0) {
 31 | 		if (errno == EEXIST) return 0;
 32 | 		return -1;
 33 | 	}
 34 | 	return 1;
 35 | }
 36 | 
 37 | static
 38 | int user_dir_get(size_t maxlen, char* out, const char* var, const char* hdir)
 39 | {
 40 | 	const char * dir = getenv(var);
 41 | 	if (dir) {
 42 | 		return str_copy(maxlen, out, dir);
 43 | 	}
 44 | 	else if ((dir = getenv("HOME"))) {
 45 | 		size_t i = str_copy(maxlen, out, dir), ip=i;
 46 | 		i += str_copy(maxlen-i, out+i, hdir);
 47 | 		if (!file_exists(out)) { out[ip] = 0; i=ip; }
 48 | 		return i;
 49 | 	}
 50 | 	return -1;
 51 | }
 52 | 
 53 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type)
 54 | {
 55 | 	switch (type) {
 56 | 	case FS_DIR_TEMP: {
 57 | 		const char * dir = getenv("TMPDIR");
 58 | 		if (!dir) dir = "/tmp";
 59 | 		return str_copy(maxlen, out, dir);
 60 | 	}
 61 | 	case FS_DIR_USER_CONFIG:
 62 | 		return user_dir_get(maxlen, out, "XDG_CONFIG_HOME", "/.config");
 63 | 	case FS_DIR_USER_CACHE:
 64 | 		return user_dir_get(maxlen, out, "XDG_CACHE_HOME", "/.cache");
 65 | 	case FS_DIR_USER_DATA:
 66 | 		return user_dir_get(maxlen, out, "XDG_DATA_HOME", "/.local/.cache");
 67 | 	}
 68 | 	return -1;
 69 | }
 70 | 
 71 | // -----------------------------------------------------------------------------
 72 | #elif defined(__WIN32__)
 73 | #define WIN32_LEAN_AND_MEAN
 74 | #define WIN32_EXTRA_LEAN
 75 | #include <windows.h>
 76 | 
 77 | int file_exists(const char* path)
 78 | {
 79 |     DWORD dwAttrib = GetFileAttributesA(path);
 80 |     return (dwAttrib != INVALID_FILE_ATTRIBUTES);
 81 |     //return (dwAttrib != INVALID_FILE_ATTRIBUTES &&
 82 |     //        !(dwAttrib & FILE_ATTRIBUTE_DIRECTORY));
 83 | }
 84 | 
 85 | int directory_make(const char* path)
 86 | {
 87 | 	if (CreateDirectoryA(path, NULL) == 0) {
 88 | 		if (GetLastError() == ERROR_ALREADY_EXISTS) return 0;
 89 | 		return -1;
 90 | 	}
 91 | 	return 1;
 92 | }
 93 | 
 94 | static
 95 | int user_dir_get(size_t maxlen, char* out, const char* var)
 96 | {
 97 | 	const char * dir;
 98 | 	if (var && (dir = getenv(var))) ;
 99 | 	else if ((dir = getenv("APPDATA"))) ;
100 | 	else if ((dir = getenv("USERPROFILE"))) ;
101 | 	else return -1;
102 | 	return str_copy(maxlen, out, dir);
103 | }
104 | 
105 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type)
106 | {
107 | 	switch (type) {
108 | 	case FS_DIR_TEMP: {
109 | 		const char * dir = getenv("TEMP");
110 | 		if (!dir) return -1;
111 | 		return str_copy(maxlen, out, dir);
112 | 	}
113 | 	case FS_DIR_USER_CONFIG:
114 | 		return user_dir_get(maxlen, out, NULL);
115 | 	case FS_DIR_USER_CACHE:
116 | 		return user_dir_get(maxlen, out, "LOCALAPPDATA");
117 | 	case FS_DIR_USER_DATA:
118 | 		return user_dir_get(maxlen, out, "LOCALAPPDATA");
119 | 	}
120 | 	return -1;
121 | }
122 | 
123 | // -----------------------------------------------------------------------------
124 | #else
125 | #include <stdio.h>
126 | 
127 | int file_exists(const char* path)
128 | {
129 |     FILE * f = fopen(path, "r");
130 |     if (!f) return 0;
131 |     fclose(f);
132 |     return 1;
133 | }
134 | 
135 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type)
136 | {
137 | 	return -1;
138 | }
139 | 
140 | #endif
141 | 


--------------------------------------------------------------------------------
/src/ccommon/fsutil.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * File system utility functions
 5 |  */
 6 | #pragma once
 7 | #include <string.h>
 8 | #include <stdbool.h>
 9 | 
10 | // Returns the last part of a path.
11 | // Example: "dir/name.ext" -> "name.ext"
12 | static inline
13 | char* path_tail(const char* path);
14 | 
15 | // Returns the file name extension without the dot.
16 | // Examples: "dir/name.ext" -> "ext", "name" -> "" (pointer to end)
17 | static inline
18 | char* path_ext(const char* path);
19 | 
20 | // Returns the file name extension with the dot.
21 | // Examples: "dir/name.ext" -> ".ext", "name" -> "" (pointer to end)
22 | static inline
23 | char* path_extdot(const char* path);
24 | 
25 | static inline
26 | bool path_abs_is(const char* path);
27 | 
28 | static inline
29 | bool path_sep_is(int c);
30 | 
31 | // Returns 1 if it exists, 0 otherwise
32 | int file_exists(const char* path);
33 | 
34 | // Returns 1 on creation, 0 if already exists and <0 on error
35 | int directory_make(const char* path);
36 | 
37 | enum FsDirType {
38 | 	FS_DIR_TEMP = 1,
39 | 	FS_DIR_USER_CONFIG,
40 | 	FS_DIR_USER_CACHE,
41 | 	FS_DIR_USER_DATA,
42 | 	//TODO: system dirs
43 | };
44 | // Writes to out the path to choosen system directory.
45 | // Returns the number of bytes written, <0 on error
46 | int fs_dir_get(size_t maxlen, char* out, enum FsDirType type);
47 | 
48 | /* Inline implementations */
49 | static inline
50 | char* path_tail(const char* path)
51 | {
52 | 	int i = strlen(path);
53 | 	for (i--; i>=0; --i) if (path_sep_is(path[i])) return (char*)(path+i+1);
54 | 	return (char*)path;
55 | }
56 | 
57 | static inline
58 | char* path_ext(const char* path)
59 | {
60 | 	int n = strlen(path);
61 | 	for (int i=n-1; i>=0; --i) if (path[i] == '.') return (char*)(path+i+1);
62 | 	return (char*)path+n;  //empty
63 | }
64 | 
65 | static inline
66 | char* path_extdot(const char* path)
67 | {
68 | 	int n = strlen(path);
69 | 	for (int i=n-1; i>=0; --i) if (path[i] == '.') return (char*)(path+i);
70 | 	return (char*)path+n;  //empty
71 | }
72 | 
73 | static inline
74 | bool path_abs_is(const char* path)
75 | {
76 | 	if (path[0] == '/') return true;
77 | #ifdef __WIN32__
78 | 	if (path[0] == '\\') return true;
79 | 	if (path[0] && path[1] == ':') return true;
80 | #endif
81 | 	return false;
82 | }
83 | 
84 | static inline
85 | bool path_sep_is(int c)
86 | {
87 | 	if (c == '/') return true;
88 | #ifdef __WIN32__
89 | 	if (c == '\\') return true;
90 | #endif
91 | 	return false;
92 | }
93 | 


--------------------------------------------------------------------------------
/src/ccommon/image.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #include "image.h"
  5 | #include "ccommon.h"
  6 | #include "alloc.h"
  7 | #include <stdlib.h>
  8 | #include <string.h>
  9 | 
 10 | #ifndef IMAGE_DEFAULT_ALIGNMENT
 11 | #define IMAGE_DEFAULT_ALIGNMENT 64
 12 | #endif
 13 | 
 14 | #ifndef IMAGE_ALLOCATOR
 15 | #define IMAGE_ALLOCATOR  g_allocator
 16 | #endif
 17 | 
 18 | /*
 19 | Color
 20 | HSV <-> RGB
 21 | ref.: http://code.google.com/p/streumix-frei0r-goodies/wiki/Integer_based_RGB_HSV_conversion
 22 | HSV2RGB(RGB2HSV( )) = identity (0 errors verified)
 23 | */
 24 | #define HSV_ABITS  IMG_HSV_ABITS
 25 | #define HSV_SSCALE (255 << HSV_ABITS)
 26 | #define HSV_HSCALE (256 << HSV_ABITS)	//hue scale
 27 | 
 28 | ImgColor img_color_hsv2rgb(const ImgColorHSV hsv)
 29 | {
 30 | 	const int round_sum = 1 << (HSV_ABITS - 1);
 31 | 	int a = hsv.a >> HSV_ABITS;
 32 | 
 33 | 	int v = hsv.v >> HSV_ABITS;
 34 | 	if (hsv.s == 0)
 35 | 		return (ImgColor){v, v, v, a};
 36 | 
 37 | 	const int region = 6 * hsv.h / HSV_HSCALE;	// h/60
 38 | 
 39 | 	int m = hsv.v * (HSV_SSCALE - hsv.s) / HSV_SSCALE;
 40 | 	int x = (hsv.v * hsv.s/HSV_HSCALE)
 41 | 		* (HSV_HSCALE
 42 | 			- abs(6 * hsv.h - 2 * (region >> 1) * HSV_HSCALE - HSV_HSCALE));
 43 | 
 44 | 	x = ((x + hsv.v * (HSV_SSCALE - hsv.s)) / HSV_SSCALE + round_sum) >> HSV_ABITS;
 45 | 	m = m >> HSV_ABITS;
 46 | 
 47 | 	switch (region) {
 48 | 		case 0:		return (ImgColor){v, x, m, a};
 49 | 		case 1:		return (ImgColor){x, v, m, a};
 50 | 		case 2:		return (ImgColor){m, v, x, a};
 51 | 		case 3:		return (ImgColor){m, x, v, a};
 52 | 		case 4:		return (ImgColor){x, m, v, a};
 53 | 		default:	return (ImgColor){v, m, x, a};
 54 | 	}
 55 | }
 56 | 
 57 | ImgColorHSV img_color_rgb2hsv(const ImgColor rgb)
 58 | {
 59 | 	const int rgb_min = ccMIN3(rgb.r, rgb.g, rgb.b);
 60 | 	const int rgb_max = ccMAX3(rgb.r, rgb.g, rgb.b);
 61 | 	const int chroma  = rgb_max - rgb_min;
 62 | 
 63 | 	int a = rgb.a << HSV_ABITS;
 64 | 	int v = rgb_max << HSV_ABITS;
 65 | 	if (v == 0)
 66 | 		return (ImgColorHSV){0, 0, v, a};
 67 | 
 68 | 	int s = HSV_SSCALE * chroma / rgb_max;
 69 | 	if (s == 0)
 70 | 		return (ImgColorHSV){0, 0, v, a};
 71 | 
 72 | 	int h;
 73 | 	if (rgb_max == rgb.r) {
 74 | 		h = HSV_HSCALE * (6*chroma + rgb.g - rgb.b) / (6*chroma);
 75 | 		if (h > HSV_HSCALE) h -= HSV_HSCALE;
 76 | 	} else if (rgb_max == rgb.g)
 77 | 		h = HSV_HSCALE * (2*chroma + rgb.b - rgb.r) / (6*chroma);
 78 | 	else
 79 | 		h = HSV_HSCALE * (4*chroma + rgb.r - rgb.g) / (6*chroma);
 80 | 
 81 | 	return (ImgColorHSV){h, s, v, a};
 82 | }
 83 | 
 84 | /*
 85 | 	Image
 86 | */
 87 | void img_free(Image* img)
 88 | {
 89 | 	if (img->data && img->flags & IMG_F_OWN_MEM)
 90 | 		alloc_free(IMAGE_ALLOCATOR, img->data);
 91 | 	
 92 | 	*img = (Image){0};
 93 | }
 94 | 
 95 | int img_resize(Image* img, unsigned w, unsigned h, ImgFormat fmt,
 96 | 	unsigned pitch)
 97 | {
 98 | 	if (img->w == w && img->h == h && img->format == fmt &&
 99 | 			(!pitch || img->pitch == pitch) && img->data)
100 | 		return 0;
101 | 
102 | 	if (img->data && !(img->flags & IMG_F_OWN_MEM))
103 | 		return -1;
104 | 
105 | 	unsigned bypp=0;
106 | 	switch (fmt) {
107 | 	case IMG_FORMAT_NULL:	bypp = 0;	break;
108 | 	case IMG_FORMAT_GRAY:	bypp = 1;	break;
109 | 	case IMG_FORMAT_RGB:	bypp = 3;	break;
110 | 	case IMG_FORMAT_RGBA:	bypp = 4;	break;
111 | 	default:
112 | 		return -1;//IMG_ERROR_UNSUPPORTED_PARAM;
113 | 	}
114 | 
115 | 	if (!pitch) {
116 | 		const unsigned a = IMAGE_DEFAULT_ALIGNMENT;
117 | 		pitch = (w * bypp + a-1) / a * a;
118 | 	}
119 | 	else if (pitch < w * bypp)
120 | 		return -1;//IMG_ERROR_PARAMS;
121 | 
122 | 	size_t sz = h * pitch;
123 | 	void* p = img->data;
124 | 	if (sz > 0) {
125 | 		p = alloc_realloc(IMAGE_ALLOCATOR, p, sz);
126 | 		if (!p) return -1;//IMG_ERROR_OUT_OF_MEMORY;
127 | 	}
128 | 
129 | 	img->data = p;
130 | 	img->w = w;
131 | 	img->h = h;
132 | 	img->pitch = pitch;
133 | 	img->bypp = bypp;
134 | 	img->format = fmt;
135 | 	img->flags |= IMG_F_OWN_MEM;
136 | 
137 | 	return 0;
138 | }
139 | 
140 | int img_copy(Image* dst, const Image* src)
141 | {
142 | 	int r = img_resize(dst, src->w, src->h, src->format, src->pitch);
143 | 	if (r < 0) return r;
144 | 	memcpy(dst->data, src->data, dst->h * dst->pitch);
145 | 	return 0;
146 | }
147 | 
148 | void img_view_make(Image* dst, const Image* src, ImgRect rect)
149 | {
150 | 	img_free(dst);
151 | 
152 | 	if (rect.x < 0) { rect.w += rect.x; rect.x = 0; }
153 | 	if (rect.y < 0) { rect.h += rect.y; rect.y = 0; }
154 | 
155 | 	rect.w = ccMAX(ccMIN(rect.x + rect.w, (int)src->w) - rect.x, 0);
156 | 	rect.h = ccMAX(ccMIN(rect.y + rect.h, (int)src->h) - rect.y, 0);
157 | 
158 | 	if (rect.w < 0) rect.w = 0;
159 | 	if (rect.h < 0) rect.h = 0;
160 | 
161 | 	dst->data = src->data + src->pitch * rect.y + src->bypp * rect.x;
162 | 	dst->w = rect.w;
163 | 	dst->h = rect.h;
164 | 	dst->pitch = src->pitch;
165 | 	dst->bypp = src->bypp;
166 | 	dst->format = src->format;
167 | }
168 | 
169 | //TODO: macro the switch(img->format) and color set code?
170 | void img_fill(Image* img, const ImgColor color)
171 | {
172 | 	unsigned w=img->w, h=img->h, x, y;
173 | 	ImgColorInt c = img_color_map(color, img->format);
174 | 	
175 | 	switch (img->format) {
176 | 	case IMG_FORMAT_GRAY: {
177 | 		for (y=0; y<h; ++y) {
178 | 			uint8_t *p = &IMG_INDEX(*img, 0, y);
179 | 			for (x=0; x<w; ++x, ++p)
180 | 				*p = c & 0xff;
181 | 		}
182 | 		} break;
183 | 	case IMG_FORMAT_RGB:
184 | 		for (y=0; y<img->h; ++y) {
185 | 			uint8_t *p = &IMG_INDEX(*img, 0, y);
186 | 			for (x=0; x<img->w; ++x, p+=3)
187 | 				memcpy(p, &c, 3);
188 | 		}
189 | 		break;
190 | 	case IMG_FORMAT_RGBA:
191 | 		for (y=0; y<img->h; ++y) {
192 | 			uint8_t *p = &IMG_INDEX(*img, 0, y);
193 | 			for (x=0; x<img->w; ++x, p+=4)
194 | 				memcpy(p, &c, 4);
195 | 		}
196 | 		break;
197 | 	default:
198 | 		break;
199 | 	}
200 | }
201 | 


--------------------------------------------------------------------------------
/src/ccommon/image.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  *
  4 |  * Inteface to store and manipulate images in memory.
  5 |  *
  6 |  * Example:
  7 |  *   Image img={0};
  8 |  *   TRY( img_resize(&img, 512, 256, IMG_FORMAT_RBG, 0) );
  9 |  *   img_fill(&img, (ImgColor){255,0,0});
 10 |  *   img_free(&img);
 11 |  */
 12 | #pragma once
 13 | #include <string.h>
 14 | #include <stdint.h>
 15 | #include <stdbool.h>
 16 | 
 17 | //TODO: define error codes
 18 | 
 19 | /*
 20 | 	Point
 21 | */
 22 | typedef struct ImgPoint {
 23 | 	int x, y;
 24 | } ImgPoint;
 25 | 
 26 | #define IMG_POINT_UNPACK(V)  (V).x, (V).y
 27 | 
 28 | /*
 29 | 	Rect
 30 | */
 31 | typedef struct ImgRectS {
 32 | 	int x, y, w, h;
 33 | } ImgRectS;
 34 | 
 35 | typedef struct ImgRectP {
 36 | 	int x1, y1, x2, y2;
 37 | } ImgRectP;
 38 | 
 39 | #define IMG_RECTS_UNPACK(V)  (V).x, (V).y, (V).w, (V).h
 40 | #define IMG_RECTP_UNPACK(V)  (V).x1, (V).y1, (V).x2, (V).y2
 41 | 
 42 | typedef ImgRectS ImgRect;
 43 | #define IMG_RECT_UNPACK IMG_RECTS_UNPACK
 44 | #define IMG_RECT_FMT "%d,%d:%dx%d"
 45 | 
 46 | #define IMG_RECT_P1(R)  (*((ImgPoint*)&(R)))
 47 | #define IMG_RECT_P2(R)  (*(((ImgPoint*)&(R))+1))
 48 | 
 49 | static inline
 50 | bool img_rect_inside_is(const ImgRect* r, const ImgPoint* p) {
 51 | 	return	r->x <= p->x && p->x < r->x+r->w &&
 52 | 			r->y <= p->y && p->y < r->y+r->h;
 53 | }
 54 | 
 55 | /*
 56 | 	Color
 57 | */
 58 | typedef struct ImgColor {
 59 | 	uint8_t r, g, b, a;
 60 | } ImgColor;
 61 | 
 62 | #define IMG_COLOR_UNPACK_RGB(V)  (V).r, (V).g, (V).b
 63 | #define IMG_COLOR_UNPACK(V)  (V).r, (V).g, (V).b, (V).a
 64 | 
 65 | typedef enum ImgFormat {
 66 | #define IMG_FORMAT_F_COLOR  0x100
 67 | #define IMG_FORMAT_F_ALPHA  0x200
 68 | 	IMG_FORMAT_NULL		= 0,
 69 | 	IMG_FORMAT_GRAY		= 1,
 70 | 	IMG_FORMAT_RGB		= 2 | IMG_FORMAT_F_COLOR,
 71 | 	IMG_FORMAT_RGBA		= 3 | IMG_FORMAT_F_COLOR | IMG_FORMAT_F_ALPHA,
 72 | } ImgFormat;
 73 | 
 74 | typedef uint32_t ImgColorInt;
 75 | 
 76 | static inline
 77 | ImgColorInt img_color_map(const ImgColor c, ImgFormat fmt);
 78 | 
 79 | enum ImgColorTransform {
 80 | 	IMG_COLOR_TRANF_NULL		= 0,
 81 | 	IMG_COLOR_TRANF_BGR			= 1,
 82 | 	IMG_COLOR_TRANF_GRB			= 2,
 83 | 	IMG_COLOR_TRANF_GRAY_MIN	= 3,
 84 | 	IMG_COLOR_TRANF_GRAY_MAX	= 4,
 85 | 	IMG_COLOR_TRANF_INVERSE		= 5,
 86 | };
 87 | 
 88 | static inline
 89 | ImgColor img_color_transform(const ImgColor col, unsigned tranf);
 90 | 
 91 | enum {
 92 | 	IMG_HSV_ABITS = 4,  //aditional pression bits
 93 | 	IMG_HSV_VSCALE = (255 << IMG_HSV_ABITS),
 94 | 	IMG_HSV_SSCALE = (255 << IMG_HSV_ABITS),
 95 | 	IMG_HSV_HSCALE = (256 << IMG_HSV_ABITS),	//hue scale
 96 | 	IMG_HSV_ASCALE = (255 << IMG_HSV_ABITS),
 97 | };
 98 | 
 99 | typedef struct ImgColorHSV {
100 | 	uint16_t h, s, v, a;
101 | } ImgColorHSV;
102 | 
103 | ImgColor img_color_hsv2rgb(const ImgColorHSV hsv);
104 | 
105 | ImgColorHSV img_color_rgb2hsv(const ImgColor rgb);
106 | 
107 | /*
108 | 	Image
109 | */
110 | typedef enum ImgFlags {
111 | 	IMG_F_OWN_MEM		= 1,
112 | } ImgFlags;
113 | 
114 | typedef struct Image {
115 | 	uint8_t 	*data;
116 | 	unsigned	w, h;
117 | 	unsigned	pitch;	//bytes per line
118 | 	unsigned	bypp;	//bytes per pixel
119 | 	ImgFormat	format;
120 | 	int			flags;
121 | } Image;
122 | 
123 | void img_free(Image* img);
124 | 
125 | static inline
126 | bool img_empty(const Image* img) {
127 | 	return !img || !img->w || !img->h || !img->data;
128 | }
129 | 
130 | int img_resize(Image* img, unsigned w, unsigned h, ImgFormat fmt,
131 | 	unsigned pitch);
132 | 
133 | int img_copy(Image* dst, const Image* src);
134 | 
135 | void img_view_make(Image* dst, const Image* src, const ImgRect rect);
136 | 
137 | void img_fill(Image* img, const ImgColor color);
138 | 
139 | static inline
140 | void img_zero(Image* img);
141 | 
142 | static inline
143 | ImgColor img_pixel_get(const Image* img, unsigned x, unsigned y);
144 | 
145 | #define IMG_INDEX(I,X,Y) \
146 | 	((I).data[ (I).pitch * (Y) + (I).bypp * (X) ])
147 | 
148 | #define IMG_INDEX3(I,X,Y,C) \
149 | 	((I).data[ (I).pitch * (Y) + (I).bypp * (X) + (C)])
150 | 
151 | /*
152 | 	Inline implementations
153 | */
154 | 
155 | static inline
156 | ImgColorInt img_color_map(const ImgColor c, ImgFormat fmt)
157 | {
158 | 	ImgColorInt n=0;
159 | 	uint8_t* p = (uint8_t*) &n;
160 | 	switch (fmt) {
161 | 	case IMG_FORMAT_GRAY:
162 | 		p[0] = c.r;
163 | 		if (p[0] < c.g) p[0] = c.g;
164 | 		if (p[0] < c.b) p[0] = c.b;
165 | 		break;
166 | 	case IMG_FORMAT_RGB:
167 | 		//TODO: endianness
168 | 		p[0]=c.r; p[1]=c.g; p[2]=c.b;
169 | 		break;
170 | 	case IMG_FORMAT_RGBA:
171 | 		//TODO: endianness
172 | 		p[0]=c.r; p[1]=c.g; p[2]=c.b; p[3]=c.a;
173 | 		break;
174 | 	default:
175 | 		break;
176 | 	}
177 | 	return n;
178 | }
179 | 
180 | static inline
181 | ImgColor img_color_unmap(const uint8_t* p, ImgFormat fmt)
182 | {
183 | 	switch (fmt) {
184 | 	case IMG_FORMAT_GRAY:	return (ImgColor){ *p, *p, *p, 255 };
185 | 	case IMG_FORMAT_RGB:	return (ImgColor){ p[0], p[1], p[2], 255 };
186 | 	case IMG_FORMAT_RGBA:	return (ImgColor){ p[0], p[1], p[2], p[3] };
187 | 	default:				return (ImgColor){0};
188 | 	}
189 | }
190 | 
191 | static inline
192 | ImgColor img_color_transform(const ImgColor col, unsigned tranf)
193 | {
194 | 	switch (tranf) {
195 | 	case IMG_COLOR_TRANF_BGR:
196 | 		return (ImgColor){ col.b, col.g, col.r, col.a };
197 | 	case IMG_COLOR_TRANF_GRB:
198 | 		return (ImgColor){ col.g, col.b, col.r, col.a };
199 | 	case IMG_COLOR_TRANF_GRAY_MIN: {
200 | 		unsigned char m = col.r < col.g ? col.r : col.g;
201 | 		if (col.b < m) m = col.b;
202 | 		return (ImgColor){ m, m, m, col.a };
203 | 	}
204 | 	case IMG_COLOR_TRANF_GRAY_MAX: {
205 | 		unsigned char m = col.r > col.g ? col.r : col.g;
206 | 		if (col.b > m) m = col.b;
207 | 		return (ImgColor){ m, m, m, col.a };
208 | 	}
209 | 	case IMG_COLOR_TRANF_INVERSE:
210 | 		return (ImgColor){ 255-col.b, 255-col.g, 255-col.r, col.a };
211 | 	default:
212 | 		return col;
213 | 	}
214 | }
215 | 
216 | static inline
217 | void img_zero(Image* img)
218 | {
219 | 	if (img->data)
220 | 		memset(img->data, 0, img->pitch * img->h);
221 | }
222 | 
223 | static inline
224 | ImgColor img_pixel_get(const Image* img, unsigned x, unsigned y)
225 | {
226 | 	const uint8_t *p = &IMG_INDEX(*img, x, y);
227 | 	return img_color_unmap(p, img->format);
228 | }
229 | 
230 | static inline
231 | void img_pixel_set(const Image* img, unsigned x, unsigned y,
232 | 	const ImgColor col)
233 | {
234 | 	ImgColorInt c = img_color_map(col, img->format);
235 | 	uint8_t *p = &IMG_INDEX(*img, x, y);
236 | 	memcpy(p, &c, img->bypp);
237 | }
238 | 
239 | 


--------------------------------------------------------------------------------
/src/ccommon/image_io.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #include "image_io.h"
  5 | #include "logging.h"
  6 | #include "str_util.h"
  7 | #include "alloc.h"
  8 | #include <assert.h>
  9 | 
 10 | #ifndef IMAGE_IO_ALLOCATOR
 11 | #define IMAGE_IO_ALLOCATOR  g_allocator
 12 | #endif
 13 | 
 14 | /*
 15 | 	Codecs
 16 | */
 17 | 
 18 | #define MAX_CODECS 31
 19 | const ImageCodec * imgio_codecs[MAX_CODECS+1] = {
 20 | 	NULL
 21 | };
 22 | 
 23 | int img_codec_register(const ImageCodec* codec)
 24 | {
 25 | 	int i;
 26 | 	for (i=0; imgio_codecs[i]; ++i)
 27 | 		if (imgio_codecs[i] == codec)
 28 | 			return 0;
 29 | 
 30 | 	if (i >= MAX_CODECS)
 31 | 		return -1;
 32 | 
 33 | 	imgio_codecs[i] = codec;
 34 | 	return 1;
 35 | }
 36 | 
 37 | const ImageCodec* img_codec_detect_stream(Stream* s)
 38 | {
 39 | 	if (stream_read_prep(s,0) < 8)
 40 | 		return 0;
 41 | 
 42 | 	for (int i=0; imgio_codecs[i]; ++i)
 43 | 		if (imgio_codecs[i]->detect &&
 44 | 			imgio_codecs[i]->load.op &&
 45 | 			imgio_codecs[i]->detect(s, 0))
 46 | 			return imgio_codecs[i];
 47 | 
 48 | 	return 0;
 49 | }
 50 | 
 51 | const ImageCodec* img_codec_detect_ext(const char* ext, int oflags)
 52 | {
 53 | 	char buffer[8];
 54 | 	str_tolower(buffer, sizeof(buffer), ext);
 55 | 
 56 | 	const bool save = oflags & IMG_OF_SAVE;
 57 | 	for (int i=0; imgio_codecs[i]; ++i)
 58 | 		if (imgio_codecs[i]->detect &&
 59 | 			(( save && imgio_codecs[i]->save.op) ||
 60 | 			 (!save && imgio_codecs[i]->load.op)) &&
 61 | 			imgio_codecs[i]->detect(0, buffer) )
 62 | 			return imgio_codecs[i];
 63 | 
 64 | 	return 0;
 65 | }
 66 | 
 67 | const ImageCodec* img_codec_detect_filename(const char* filename, int oflags)
 68 | {
 69 | 	const char* ext = strrchr(filename, '.');
 70 | 	if (!ext) return 0;
 71 | 	ext++;
 72 | 
 73 | 	return img_codec_detect_ext(ext, oflags);
 74 | }
 75 | 
 76 | const ImageCodec* img_codec_by_name(const char* name)
 77 | {
 78 | 	for (int i=0; imgio_codecs[i]; ++i) {
 79 | 		if (!imgio_codecs[i]->name) continue;
 80 | 		if (!str_cmp_i(imgio_codecs[i]->name, name))
 81 | 			return imgio_codecs[i];
 82 | 	}
 83 | 	return 0;
 84 | }
 85 | 
 86 | /*
 87 | 	Image I/O
 88 | */
 89 | 
 90 | int imgio_stream_alloc(ImageIO* obj)
 91 | {
 92 | 	if (obj->s)
 93 | 		return IMG_ERROR_PARAMS;
 94 | 	if (!obj->filename)
 95 | 		return IMG_ERROR_UNSUPPORTED_INPUT_TYPE;
 96 | 
 97 | 	Stream * p = alloc_new(IMAGE_IO_ALLOCATOR, Stream, 1);
 98 | 
 99 | 	if (stream_open_file(p, obj->filename,
100 | 		(obj->oflags & IMG_OF_SAVE) ? SOF_CREATE : SOF_READ) < 0)
101 | 	{
102 | 		alloc_free(IMAGE_IO_ALLOCATOR, p);
103 | 		return IMG_ERROR_FILE_OPEN;
104 | 	}
105 | 
106 | 	obj->s = p;
107 | 	obj->flags |= IMGIO_F_OWN_STREAM;
108 | 
109 | 	return 0;
110 | }
111 | 
112 | int imgio_codec_detect(ImageIO* obj)
113 | {
114 | 	if (obj->oflags & IMG_OF_SAVE) {
115 | 		if (obj->filename)
116 | 			obj->codec = img_codec_detect_filename(obj->filename, obj->oflags);
117 | 	}
118 | 	else {
119 | 		if (!obj->s) {
120 | 			imgio_stream_alloc(obj);
121 | 			// An error here can be ok,
122 | 			// for example some LibAV URL are not files
123 | 		}
124 | 		if (obj->s)
125 | 			obj->codec = img_codec_detect_stream(obj->s);
126 | 	}
127 | 
128 | 	if (!obj->codec)
129 | 		return IMG_ERROR_UNKNOWN_CODEC;
130 | 
131 | 	return 0;
132 | }
133 | 
134 | int imgio_open_inner(ImageIO* obj)
135 | {
136 | 	assert(obj->codec);
137 | 
138 | 	const ImageCodecSub* cs =
139 | 		(obj->oflags & IMG_OF_SAVE) ? &obj->codec->save : &obj->codec->load;
140 | 
141 | 	if (obj->s && cs->flags & IMG_CODEC_F_ACCEPT_STREAM) {
142 | 	}
143 | 	else if (obj->filename && cs->flags & IMG_CODEC_F_ACCEPT_FILENAME) {
144 | 	}
145 | 	else if (obj->filename && cs->flags & IMG_CODEC_F_ACCEPT_STREAM) {
146 | 		int r = imgio_stream_alloc(obj);
147 | 		if (r) return r;
148 | 	}
149 | 	else
150 | 		return IMG_ERROR_UNSUPPORTED_INPUT_TYPE;
151 | 
152 | 	// Codec alloc
153 | 	if (cs->obj_size) {
154 | 		obj->internal = alloc_realloc(IMAGE_IO_ALLOCATOR, obj->internal, cs->obj_size);
155 | 		obj->flags |= IMGIO_F_OWN_INTERNAL;
156 | 	}
157 | 
158 | 	if (cs->init) {
159 | 		int r = cs->init(obj->internal, obj);
160 | 		if (r) return r;
161 | 	}
162 | 
163 | 	return 0;
164 | }
165 | 
166 | int imgio_open(ImageIO* obj)
167 | {
168 | 	int r=0;
169 | 
170 | 	if (!obj->codec) {
171 | 		r = imgio_codec_detect(obj);
172 | 		if (r && r != IMG_ERROR_UNKNOWN_CODEC)
173 | 			return r;
174 | 	}
175 | 
176 | 	if (obj->codec) {
177 | 		r = imgio_open_inner(obj);
178 | 	}
179 | 	else {
180 | 		// Test all codecs without detection
181 | 		for (int i=0; imgio_codecs[i]; ++i) {
182 | 			if (imgio_codecs[i]->detect ||
183 | 				(obj->oflags & IMG_OF_SAVE &&
184 | 				 (!imgio_codecs[i]->save.op ||
185 | 				  ~imgio_codecs[i]->save.flags & IMG_CODEC_F_TRY_DETECT) ) ||
186 | 				(~obj->oflags & IMG_OF_SAVE &&
187 | 				 (!imgio_codecs[i]->load.op ||
188 | 				  ~imgio_codecs[i]->load.flags & IMG_CODEC_F_TRY_DETECT) )
189 | 				)
190 | 				continue;
191 | 			obj->codec = imgio_codecs[i];
192 | 			r = imgio_open_inner(obj);
193 | 			if (!r)
194 | 				break;
195 | 		}
196 | 		if (r)
197 | 			obj->codec = 0;
198 | 	}
199 | 
200 | 	obj->filename = 0;	// This pointer may not be safe
201 | 
202 | 	if (r) {
203 | 		obj->codec = 0;
204 | 		imgio_free(obj);
205 | 	}
206 | 
207 | 	return r;
208 | }
209 | 
210 | #define imgio_open_BEGIN \
211 | 	imgio_free(obj);
212 | 
213 | int imgio_open_stream(ImageIO* obj, Stream* s, int flags,
214 | 	const ImageCodec* codec)
215 | {
216 | 	imgio_open_BEGIN;
217 | 	obj->s = s;
218 | 	obj->filename = 0;
219 | 	obj->codec = codec;
220 | 	obj->oflags = flags;
221 | 	return imgio_open(obj);
222 | }
223 | 
224 | int imgio_open_filename(ImageIO* obj, const char* fname, int flags,
225 | 	const ImageCodec* codec)
226 | {
227 | 	imgio_open_BEGIN;
228 | 	obj->s = 0;
229 | 	obj->filename = fname;
230 | 	obj->codec = codec;
231 | 	obj->oflags = flags;
232 | 	return imgio_open(obj);
233 | }
234 | 
235 | void imgio_free(ImageIO* obj)
236 | {
237 | 	if (obj->codec) {
238 | 		const ImageCodecSub* cs =
239 | 			(obj->oflags & IMG_OF_SAVE) ? &obj->codec->save : &obj->codec->load;
240 | 		if (cs->free)
241 | 			cs->free(obj->internal, obj);
242 | 		obj->codec = 0;
243 | 	}
244 | 	if (obj->flags & IMGIO_F_OWN_INTERNAL && obj->internal) {
245 | 		alloc_free(IMAGE_IO_ALLOCATOR, obj->internal);
246 | 		obj->internal = 0;
247 | 	}
248 | 	if (obj->flags & IMGIO_F_OWN_STREAM && obj->s) {
249 | 		stream_close(obj->s, 0);
250 | 		alloc_free(IMAGE_IO_ALLOCATOR, obj->s);
251 | 		obj->s = 0;
252 | 	}
253 | 	obj->flags = 0;
254 | }
255 | 
256 | int img_load_file(Image* img, const char* filename)
257 | {
258 | 	int r=0;
259 | 
260 | 	Stream s={0};
261 | 	if (stream_open_file(&s, filename, SOF_READ) < 0)
262 | 		return IMG_ERROR_FILE_OPEN;
263 | 
264 | 	ImageIO imgio={0};
265 | 	r = imgio_open_stream(&imgio, &s, 0, 0);
266 | 	if (r) goto end;
267 | 
268 | 	r = imgio_load(&imgio, img);
269 | 
270 | end:
271 | 	imgio_free(&imgio);
272 | 	stream_close(&s, 0);
273 | 	return r;
274 | }
275 | 
276 | int img_save_file(const Image* img, const char* filename)
277 | {
278 | 	int r=0;
279 | 
280 | 	const ImageCodec* codec = img_codec_detect_filename(filename, IMG_OF_SAVE);
281 | 	if (!codec) return IMG_ERROR_UNKNOWN_CODEC;
282 | 
283 | 	Stream s={0};
284 | 	if (stream_open_file(&s, filename, SOF_CREATE) < 0)
285 | 		return IMG_ERROR_FILE_OPEN;
286 | 
287 | 	ImageIO imgio={0};
288 | 	r = imgio_open_stream(&imgio, &s, IMG_OF_SAVE, codec);
289 | 	if (r) goto end;
290 | 
291 | 	r = imgio_save(&imgio, img);
292 | 
293 | end:
294 | 	imgio_free(&imgio);
295 | 	stream_close(&s, 0);
296 | 	return r;
297 | }
298 | 


--------------------------------------------------------------------------------
/src/ccommon/image_io.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  *
  4 |  * Interface to read and write image and video data in multiple formats.
  5 |  */
  6 | #pragma once
  7 | #include "image.h"
  8 | #include "stream.h"
  9 | 
 10 | enum ImageError {
 11 | 	IMG_RESULT_OK						= 0,
 12 | 	IMG_ERROR_UNKNOWN					= -0x301,
 13 | 	IMG_ERROR_PARAMS					= -0x302,
 14 | 	IMG_ERROR_OUT_OF_MEMORY				= -0x303,
 15 | 	IMG_ERROR_FILE_OPEN					= -0x304,
 16 | 	IMG_ERROR_READ						= -0x305,
 17 | 	IMG_ERROR_UNKNOWN_CODEC				= -0x306,
 18 | 	IMG_ERROR_UNSUPPORTED_FUNCTION		= -0x307,
 19 | 	IMG_ERROR_UNSUPPORTED_FORMAT		= -0x308,
 20 | 	IMG_ERROR_UNSUPPORTED_PARAM			= -0x309,
 21 | 	IMG_ERROR_UNSUPPORTED_INPUT_TYPE	= -0x30a,
 22 | 	IMG_ERROR_LOAD						= -0x30b,
 23 | 	IMG_ERROR_SAVE						= -0x30c,
 24 | 	IMG_ERROR_INVALID_IMAGE				= -0x30d,
 25 | 	IMG_ERROR_SEEK						= -0x30e,
 26 | 	IMG_ERROR_EOF						= -0x30f,
 27 | 	IMG_ERROR_AGAIN						= -0x310,	//try again later
 28 | 	IMG_ERROR_UNSUPPORTED_VALUE			= -0x311,
 29 | };
 30 | 
 31 | /*
 32 | 	Codec
 33 | */
 34 | struct ImageIO;
 35 | 
 36 | enum ImageSeekMode {
 37 | 	IMG_SEEK_SET = 0,
 38 | 	IMG_SEEK_CUR = 1,
 39 | 	IMG_SEEK_END = 2,
 40 | };
 41 | 
 42 | enum ImageCodecFlag {
 43 | 	IMG_CODEC_F_ACCEPT_STREAM	= 1,
 44 | 	IMG_CODEC_F_ACCEPT_FILENAME	= 2,
 45 | 	IMG_CODEC_F_TRY_DETECT		= 4,
 46 | };
 47 | 
 48 | typedef struct {
 49 | 	int (*op)(void*, struct ImageIO*, Image*);
 50 | 	int flags;
 51 | 	unsigned obj_size;
 52 | 	int (*init)(void*, struct ImageIO*);
 53 | 	void (*free)(void*, struct ImageIO*);
 54 | 	int (*seek)(void*, struct ImageIO*, long, int);
 55 | 	int (*value_get)(void*, struct ImageIO*, int, void*, unsigned);
 56 | 	int (*value_set)(void*, struct ImageIO*, int, const void*, unsigned);
 57 | } ImageCodecSub;
 58 | 
 59 | typedef struct {
 60 | 	bool (*detect)(Stream*, const char*);
 61 | 	ImageCodecSub load;
 62 | 	ImageCodecSub save;
 63 | 	const char* name;
 64 | 	const char* ext;
 65 | } ImageCodec;
 66 | 
 67 | int img_codec_register(const ImageCodec* codec);
 68 | 
 69 | const ImageCodec* img_codec_detect_stream(Stream* s);
 70 | const ImageCodec* img_codec_detect_ext(const char* ext, int oflags);
 71 | const ImageCodec* img_codec_detect_filename(const char* filename, int oflags);
 72 | const ImageCodec* img_codec_by_name(const char* name);
 73 | 
 74 | /*
 75 | 	Image I/O
 76 | */
 77 | 
 78 | enum ImageIOFlag {
 79 | 	IMGIO_F_OWN_STREAM		= 1,
 80 | 	IMGIO_F_OWN_INTERNAL	= 2,
 81 | 	IMGIO_F_END_FOUND		= 4,
 82 | };
 83 | 
 84 | enum ImageIOOpenFlag {
 85 | 	//IMG_OF_NO_INIT		= 1,
 86 | 	IMG_OF_SAVE			= 2,
 87 | 	IMG_OF_FAST			= 4,
 88 | 	IMG_OF_GRAY			= 8,
 89 | 	IMG_OF_NO_ALPHA		= 16,
 90 | 	IMG_OF_ASYNC		= 32,	//asynchronous operation
 91 | };
 92 | 
 93 | typedef struct ImageIO {
 94 | 	const ImageCodec *	codec;
 95 | 	Stream *			s;
 96 | 	const char *		filename;
 97 | 	void *				internal;	//codec data
 98 | 	int					oflags;
 99 | 	int					flags;
100 | } ImageIO;
101 | 
102 | void imgio_free(ImageIO* obj);
103 | 
104 | /**
105 | Check if the image i/o object is ready to be used.
106 | */
107 | static inline
108 | bool imgio_good(ImageIO* obj) { return obj->codec; }
109 | 
110 | int imgio_open_stream(ImageIO* obj, Stream* s, int flags,
111 | 	const ImageCodec* codec);
112 | 
113 | int imgio_open_filename(ImageIO* obj, const char* fname, int flags,
114 | 	const ImageCodec* codec);
115 | 
116 | #define IMGIO_CODEC_CALL(NAME, ...) \
117 | 	if (!obj->codec) return IMG_ERROR_UNKNOWN_CODEC; \
118 | 	const ImageCodecSub* cs = \
119 | 		(obj->oflags & IMG_OF_SAVE) ? &obj->codec->save : &obj->codec->load; \
120 | 	if (!cs->NAME) return IMG_ERROR_UNSUPPORTED_FUNCTION; \
121 | 	return cs->NAME(obj->internal, obj, __VA_ARGS__);
122 | 
123 | static inline
124 | int imgio_load(ImageIO* obj, Image* img) {
125 | 	if (obj->oflags & IMG_OF_SAVE) return IMG_ERROR_UNSUPPORTED_FUNCTION;
126 | 	IMGIO_CODEC_CALL(op, img)
127 | }
128 | 
129 | static inline
130 | int imgio_save(ImageIO* obj, const Image* img) {
131 | 	if (~obj->oflags & IMG_OF_SAVE) return IMG_ERROR_UNSUPPORTED_FUNCTION;
132 | 	IMGIO_CODEC_CALL(op, (Image*)img)
133 | }
134 | 
135 | static inline
136 | int imgio_seek(ImageIO* obj, long offset, int mode) {
137 | 	IMGIO_CODEC_CALL(seek, offset, mode)
138 | }
139 | 
140 | enum {
141 | 	//unsigned: 0 to 100: jpeg or similar quality (85=default)
142 | 	IMG_VALUE_QUALITY			= 1,
143 | 	//unsigned: 0 to 9: png/deflate or similar compression level (0=disable, 6=default)
144 | 	IMG_VALUE_COMPRESSION		= 2,
145 | 
146 | 	//unsigned: frame number counting from 0
147 | 	IMG_VALUE_FRAME_IDX			= 3,
148 | 	//unsigned: total number of frames,
149 | 	// may be estimated until you reach the last frame
150 | 	IMG_VALUE_FRAME_COUNT		= 4,
151 | 	//double: default or estimated frame duration in seconds
152 | 	IMG_VALUE_FRAME_DURATION	= 5,
153 | 	//unsigned: accumulated number of non fatal errors that occurred
154 | 	// the meaning varies with the codec, normally is amount of frames that
155 | 	// could not be read and were skipped
156 | 	IMG_VALUE_ERROR_COUNT		= 6,
157 | 	//text:
158 | 	// for read: buf="tag\0" and set bufsz, returns value length
159 | 	// for writing: buf="tag\0value\0"
160 | 	// Use the tag "comment" for a generic comment.
161 | 	IMG_VALUE_METADATA			= 7,
162 | 
163 | 	//none: prompts the codec to reload some external configuration
164 | 	IMG_VALUE_RELOAD			= 8,
165 | 
166 | 	//double: camera exposure time in seconds
167 | 	IMG_VALUE_EXPOSURE			= 101,
168 | 
169 | 	//double: camera gain (1.0 normal)
170 | 	IMG_VALUE_GAIN				= 102,
171 | 
172 | 	//ImgRectS: camera AOI (crop rectangle)
173 | 	IMG_VALUE_AOI				= 103,
174 | 
175 | 	IMG_VALUE_CUSTOM			= 0x8000,
176 | };
177 | 
178 | static inline
179 | int imgio_value_get(ImageIO* obj, int id, void* buf, unsigned bufsz) {
180 | 	IMGIO_CODEC_CALL(value_get, id, buf, bufsz)
181 | }
182 | 
183 | static inline
184 | int imgio_value_set(ImageIO* obj, int id, const void* buf, unsigned bufsz) {
185 | 	IMGIO_CODEC_CALL(value_set, id, buf, bufsz)
186 | }
187 | 
188 | /*
189 | 	Simplified image file I/O
190 | */
191 | int img_load_file(Image* img, const char* filename);
192 | int img_save_file(const Image* img, const char* filename);
193 | 
194 | /*
195 | 	Simplified codec registration
196 | */
197 | #define IMGIO_CODEC_REGISTER_NODEP() do { \
198 | 	extern const ImageCodec img_codec_pnm;\
199 | 	img_codec_register(&img_codec_pnm); \
200 | 	extern const ImageCodec img_codec_imgseq; \
201 | 	img_codec_register(&img_codec_imgseq); \
202 | } while (0)
203 | 
204 | #define IMGIO_CODEC_REGISTER_BASIC() do { \
205 | 	IMGIO_CODEC_REGISTER_NODEP(); \
206 | 	extern const ImageCodec img_codec_jpeg; \
207 | 	img_codec_register(&img_codec_jpeg); \
208 | 	extern const ImageCodec img_codec_png; \
209 | 	img_codec_register(&img_codec_png); \
210 | } while (0)
211 | 
212 | #define IMGIO_CODEC_REGISTER_ALL() do { \
213 | 	IMGIO_CODEC_REGISTER_BASIC(); \
214 | 	extern const ImageCodec img_codec_tiff; \
215 | 	img_codec_register(&img_codec_tiff); \
216 | 	extern const ImageCodec img_codec_bigtiff; \
217 | 	img_codec_register(&img_codec_bigtiff); \
218 | 	extern const ImageCodec img_codec_libtiff; \
219 | 	img_codec_register(&img_codec_libtiff); \
220 | 	extern const ImageCodec img_codec_avimjpg; \
221 | 	img_codec_register(&img_codec_avimjpg); \
222 | 	extern const ImageCodec img_codec_libav; \
223 | 	img_codec_register(&img_codec_libav); \
224 | 	extern const ImageCodec img_codec_test; \
225 | 	img_codec_register(&img_codec_test); \
226 | } while (0)	
227 | 


--------------------------------------------------------------------------------
/src/ccommon/image_io_jpeg.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | #include "image_io.h"
 6 | 
 7 | #ifdef IMGIO_JPEG_IMPL
 8 | #include "vector.h"
 9 | #include <jpeglib.h>
10 | #include <setjmp.h>
11 | 
12 | struct img_codec_jpeg_error_mgr {
13 | 	struct jpeg_error_mgr	errmgr;
14 | 	jmp_buf					escape;
15 | };
16 | 
17 | struct CodecJpegLoad {
18 | 	struct jpeg_decompress_struct cinfo;
19 | 	struct img_codec_jpeg_error_mgr jerr;
20 | };
21 | 
22 | struct CodecJpegSave {
23 | 	struct jpeg_compress_struct cinfo;
24 | 	struct img_codec_jpeg_error_mgr jerr;
25 | 
26 | 	struct CodecJpegText { DynStr key, value; } *metadata;  //vector
27 | 	int quality;
28 | };
29 | #endif
30 | 
31 | typedef struct CodecJpegLoad CodecJpegLoad;
32 | typedef struct CodecJpegSave CodecJpegSave;
33 | 
34 | bool imgio_jpeg_detect(Stream* s, const char* fileext);
35 | 
36 | int  imgio_jpeg_load_init(CodecJpegLoad* codec, ImageIO* imgio);
37 | void imgio_jpeg_load_free(CodecJpegLoad* codec, ImageIO* imgio);
38 | int  imgio_jpeg_load_op(CodecJpegLoad* codec, ImageIO* imgio, Image* img);
39 | 
40 | int  imgio_jpeg_save_init(CodecJpegSave* codec, ImageIO* imgio);
41 | void imgio_jpeg_save_free(CodecJpegSave* codec, ImageIO* imgio);
42 | int  imgio_jpeg_save_op(CodecJpegSave* codec, ImageIO* imgio, Image* img);
43 | 
44 | extern const ImageCodec img_codec_jpeg;
45 | 
46 | 


--------------------------------------------------------------------------------
/src/ccommon/image_io_png.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | #include "image_io.h"
 6 | 
 7 | #ifdef IMGIO_PNG_IMPL
 8 | #include "vector.h"
 9 | struct CodecPng {
10 | 	struct CodecPngText { DynStr key, value; } *metadata;  //vector
11 | 	int comp_lvl;
12 | };
13 | #endif
14 | 
15 | typedef struct CodecPng CodecPng;
16 | 
17 | bool imgio_png_detect(Stream* s, const char* fileext);
18 | 
19 | int imgio_png_load(void* self, ImageIO* imgio, Image* img);
20 | 
21 | int  imgio_png_save_init(CodecPng* S, ImageIO* imgio);
22 | void imgio_png_save_free(CodecPng* S, ImageIO* imgio);
23 | int  imgio_png_save_op(CodecPng* S, ImageIO* imgio, Image* img);
24 | int  imgio_png_value_set(CodecPng* S, ImageIO* imgio,
25 | 		int id, const void* buf, unsigned bufsz);
26 | 
27 | extern const ImageCodec img_codec_png;
28 | 


--------------------------------------------------------------------------------
/src/ccommon/image_io_pnm.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #include <stdlib.h>
  5 | #include "image_io.h"
  6 | 
  7 | static inline int whitespace_is(char c) {
  8 | 	return (c == ' ' || c == '\t' || c == '\r' || c == '\n');
  9 | }
 10 | 
 11 | /*
 12 | 	Type detect
 13 | */
 14 | bool imgio_pnm_detect(Stream* s, const char* fileext)
 15 | {
 16 | 	if (s) {
 17 | 		const unsigned char *c = s->cursor;
 18 | 		if (c[0] == 'P' && ('1' <= c[1] && c[1] <= '6') &&
 19 | 			whitespace_is(c[2]))
 20 | 			return true;
 21 | 	}
 22 | 	else if (fileext) {
 23 | 		if (fileext[0] == 'p' &&
 24 | 			(fileext[1] == 'n' || fileext[1] == 'p' || fileext[1] == 'g' ||
 25 | 				fileext[1] == 'b') &&
 26 | 			fileext[2] == 'm')
 27 | 			return true;
 28 | 	}
 29 | 	return false;
 30 | }
 31 | 
 32 | /*
 33 | 	Read
 34 | */
 35 | 
 36 | static inline
 37 | char* field_next(char* cur, char* end)
 38 | {
 39 | 	while (cur<end && !whitespace_is(*cur)) cur++;
 40 | 	while (cur<end && whitespace_is(*cur)) cur++;
 41 | 	return cur;
 42 | }
 43 | 
 44 | int imgio_pnm_load(void* self, ImageIO* imgio, Image* img)
 45 | {
 46 | 	int r=0;
 47 | 
 48 | 	if (stream_read_prep(imgio->s, 0) < 8)
 49 | 		return IMG_ERROR_LOAD;
 50 | 
 51 | 	// Read file header
 52 | 	char *end, *cur=stream_buffer_get(imgio->s, &end);
 53 | 
 54 | 	if (*cur++ != 'P') {
 55 | 		r = IMG_ERROR_LOAD;
 56 | 		goto error;
 57 | 	}
 58 | 
 59 | 	int bypp=0;
 60 | 	ImgFormat format = IMG_FORMAT_NULL;
 61 | 	switch (*cur++) {
 62 | 	case '5':
 63 | 		format = IMG_FORMAT_GRAY;
 64 | 		bypp = 1;
 65 | 		break;
 66 | 	case '6':
 67 | 		format = IMG_FORMAT_RGB;
 68 | 		bypp = 3;
 69 | 		break;
 70 | 	default:
 71 | 		r = IMG_ERROR_UNSUPPORTED_FORMAT;
 72 | 		goto error;
 73 | 	}
 74 | 
 75 | 	int width = atoi( (cur = field_next(cur, end)) );
 76 | 	int height = atoi( (cur = field_next(cur, end)) );
 77 | 	int depth = atoi( (cur = field_next(cur, end)) );
 78 | 
 79 | 	if (width < 1 || height < 1 || depth < 1) {
 80 | 		r = IMG_ERROR_LOAD;
 81 | 		goto error;
 82 | 	}
 83 | 	if (depth != 255) {	//TODO
 84 | 		r = IMG_ERROR_UNSUPPORTED_FORMAT;
 85 | 		goto error;
 86 | 	}
 87 | 
 88 | 	cur = field_next(cur, end);
 89 | 	stream_commit(imgio->s, cur);
 90 | 
 91 | 	// Allocate image
 92 | 	r = img_resize(img, width, height, format, 0);
 93 | 	if (r)
 94 | 		goto error;
 95 | 
 96 | 	// Load binary data
 97 | 	size_t line_size = img->w * bypp;
 98 | 	unsigned char* imgcur = img->data;
 99 | 	for (unsigned y=0; y<img->h; ++y) {
100 | 		if (stream_read(imgio->s, line_size, imgcur) != line_size) {
101 | 			r = IMG_ERROR_LOAD;
102 | 			goto error;
103 | 		}
104 | 		imgcur += img->pitch;
105 | 	}
106 | 
107 | 	return 0;
108 | 
109 | error:
110 | 	return r;
111 | }
112 | 
113 | /*
114 | 	Save
115 | */
116 | 
117 | int imgio_pnm_save(void* unused, ImageIO* imgio, Image* img)
118 | {
119 | 	if (stream_write_prep(imgio->s, 0) < 8)
120 | 		return IMG_ERROR_SAVE;
121 | 
122 | 	size_t line_size=0;
123 | 	switch (img->format) {
124 | 	case IMG_FORMAT_GRAY:
125 | 		line_size = img->w;
126 | 		stream_printf(imgio->s, "P5 %d %d 255\n", img->w, img->h);
127 | 		break;
128 | 	case IMG_FORMAT_RGB:
129 | 		line_size = img->w * 3;
130 | 		stream_printf(imgio->s, "P6 %d %d 255\n", img->w, img->h);
131 | 		break;
132 | 	case IMG_FORMAT_RGBA:
133 | 		line_size = img->w * 4;
134 | 		//http://netpbm.sourceforge.net/doc/pam.html
135 | 		stream_printf(imgio->s,
136 | 			"P7\nWIDTH %d\nHEIGHT %d\nDEPTH 4\nMAXVAL 255\nTUPLTYPE RGB_ALPHA\nENDHDR\n",
137 | 			img->w, img->h);
138 | 		break;
139 | 	default:
140 | 		return IMG_ERROR_UNSUPPORTED_FORMAT;
141 | 	}
142 | 
143 | 	unsigned char* imgcur = img->data;
144 | 	for (unsigned y=0; y<img->h; ++y) {
145 | 		if (stream_write(imgio->s, line_size, imgcur) != line_size)
146 | 			return IMG_ERROR_SAVE;
147 | 		imgcur += img->pitch;
148 | 	}
149 | 
150 | 	return IMG_RESULT_OK;
151 | }
152 | 
153 | /*
154 | 	Codec
155 | */
156 | const ImageCodec img_codec_pnm = {
157 | 	imgio_pnm_detect,
158 | 	{
159 | 		imgio_pnm_load,
160 | 		IMG_CODEC_F_ACCEPT_STREAM,
161 | 	},
162 | 	{
163 | 		imgio_pnm_save,
164 | 		IMG_CODEC_F_ACCEPT_STREAM,
165 | 	},
166 | 	"PNM", "pnm"
167 | };
168 | 


--------------------------------------------------------------------------------
/src/ccommon/image_io_pnm.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | #include "image_io.h"
 6 | 
 7 | bool imgio_pnm_detect(Stream* s, const char* fileext);
 8 | 
 9 | int imgio_pnm_load(void* self, ImageIO* imgio, Image* img);
10 | 
11 | int imgio_pnm_save(void* self, ImageIO* imgio, Image* img);
12 | 
13 | extern const ImageCodec img_codec_pnm;
14 | 
15 | 


--------------------------------------------------------------------------------
/src/ccommon/logging.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #include "logging.h"
 5 | //#include <stdio.h>
 6 | 
 7 | struct Logger g_logger = {
 8 | #ifdef DEBUG
 9 | 	.level	= LOG_LVL_DEBUG,
10 | #else
11 | 	.level	= LOG_LVL_INFO,
12 | #endif
13 | };
14 | 
15 | Stream g_logger_stream;
16 | 
17 | void log_line_begin_raw(int level)
18 | {
19 | 	if (!g_logger.stm) {
20 | 		stream_open_std(&g_logger_stream, STREAM_STD_ERR, 0);		
21 | 		g_logger.stm = &g_logger_stream;
22 | 	}
23 | 
24 | 	const char * lvl_prefix = 0;
25 | 	if      (level >= LOG_LVL_DEBUG)   lvl_prefix = "DEBUG ";
26 | 	else if (level >= LOG_LVL_INFO)    ;
27 | 	else if (level >= LOG_LVL_WARNING) lvl_prefix = "WARN  ";
28 | 	else                               lvl_prefix = "ERROR ";
29 | 
30 | 	//TODO: time (optional)
31 | 	
32 | 	if (g_logger.prefix)
33 | 		stream_str_put(g_logger.stm, g_logger.prefix);
34 | 
35 | 	if (lvl_prefix)
36 | 		stream_str_put(g_logger.stm, lvl_prefix);
37 | }
38 | 
39 | void log_line_str(const char* str)
40 | {
41 | 	stream_str_put(g_logger.stm, str);
42 | }
43 | 
44 | #if __STDC_HOSTED__
45 | void log_line_strv(const char format[], va_list ap)
46 | {
47 | 	stream_vprintf(g_logger.stm, format, ap);
48 | }
49 | 
50 | void log_line_strf(const char format[], ...)
51 | {
52 | 	va_list ap;
53 | 	va_start(ap, format);
54 | 	log_line_strv(format, ap);
55 | 	va_end(ap);
56 | }
57 | #endif
58 | 
59 | void log_line_end()
60 | {
61 | 	stream_char_put(g_logger.stm, '\n');
62 | 	stream_flush(g_logger.stm);
63 | }
64 | 
65 | void log_logs(int level, const char* text)
66 | {
67 | 	if (!text) return;
68 | 	if (!log_level_check(level)) return;
69 | 	log_line_begin(level);
70 | 	log_line_str(text);
71 | 	log_line_end();
72 | }
73 | 
74 | #if __STDC_HOSTED__
75 | void log_logv(int level, const char format[], va_list ap)
76 | {
77 | 	if (!format) return;
78 | 	if (!log_level_check(level)) return;
79 | 	log_line_begin(level);
80 | 	log_line_strv(format, ap);
81 | 	log_line_end();
82 | }
83 | 
84 | void log_logf(int level, const char format[], ...)
85 | {
86 | 	if (!format) return;
87 | 	if (!log_level_check(level)) return;
88 | 	log_line_begin(level);
89 | 
90 | 	va_list ap;
91 | 	va_start(ap, format);
92 | 	log_line_strv(format, ap);
93 | 	va_end(ap);
94 | 
95 | 	log_line_end();
96 | }
97 | #endif
98 | 


--------------------------------------------------------------------------------
/src/ccommon/logging.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  *
  4 |  * Logging interface
  5 |  */
  6 | #pragma once
  7 | #include "stream.h"
  8 | #include <stdarg.h>
  9 | #include <stdbool.h>
 10 | 
 11 | //TODO: interface to use other loggers
 12 | 
 13 | // Levels
 14 | #define LOG_LVL_STEP 10
 15 | 
 16 | enum LoggingLevel {
 17 | 	LOG_LVL_NONE	= 0,
 18 | 	LOG_LVL_ERROR	= LOG_LVL_STEP,
 19 | 	LOG_LVL_WARNING	= LOG_LVL_STEP*2,
 20 | 	LOG_LVL_INFO	= LOG_LVL_STEP*3, //normal
 21 | 	LOG_LVL_INFO2	= LOG_LVL_STEP*4, //verbose
 22 | 	LOG_LVL_DEBUG	= LOG_LVL_STEP*5,
 23 | 	LOG_LVL_DEBUG2	= LOG_LVL_STEP*6,
 24 | 	LOG_LVL_DEBUG3	= LOG_LVL_STEP*7,
 25 | 	LOG_LVL_DEBUG4	= LOG_LVL_STEP*8,
 26 | 	LOG_LVL_MAX		= 255
 27 | };
 28 | 
 29 | // Utility macros, use mostly these
 30 | #define log_error(...)		log_log(LOG_LVL_ERROR, __VA_ARGS__)
 31 | #define log_warning(...)	log_log(LOG_LVL_WARNING, __VA_ARGS__)
 32 | #define log_info(...)		log_log(LOG_LVL_INFO, __VA_ARGS__)
 33 | #define log_info2(...)		log_log(LOG_LVL_INFO2, __VA_ARGS__)
 34 | #define log_debug(...)		log_log(LOG_LVL_DEBUG, __VA_ARGS__)
 35 | #define log_debug2(...)		log_log(LOG_LVL_DEBUG2, __VA_ARGS__)
 36 | #define log_debug3(...)		log_log(LOG_LVL_DEBUG3, __VA_ARGS__)
 37 | #define log_debug4(...)		log_log(LOG_LVL_DEBUG4, __VA_ARGS__)
 38 | 
 39 | #define log_log(LVL, ...) do {\
 40 | 	if (log_level_check((LVL))) \
 41 | 		log_logf((LVL), __VA_ARGS__); \
 42 | } while (0)
 43 | 	
 44 | #define log_log_str(LVL, STR) do {\
 45 | 	if (log_level_check((LVL))) \
 46 | 		log_logs((LVL), (STR)); \
 47 | } while (0)
 48 | 
 49 | // Interface
 50 | struct Logger {
 51 | 	int			level;
 52 | 	Stream		*stm;
 53 | 	const char  *prefix;  // Prefix for all messages
 54 | };
 55 | 
 56 | extern struct Logger g_logger;
 57 | 
 58 | static inline
 59 | bool log_level_check(int level)
 60 | {
 61 | 	return level <= g_logger.level;
 62 | }
 63 | 
 64 | static inline
 65 | int log_level_set(int level)
 66 | {
 67 | 	int oldval = g_logger.level;
 68 | 	g_logger.level = level;
 69 | 	return oldval;
 70 | }
 71 | 
 72 | static inline
 73 | int log_level_inc(int change)
 74 | {
 75 | 	int oldval = g_logger.level;
 76 | 	g_logger.level += change;
 77 | 	return oldval;
 78 | }
 79 | 
 80 | void log_logs(int level, const char* text);
 81 | 
 82 | #ifdef __GNUC__
 83 | __attribute__((format(printf, 2, 0)))
 84 | #endif
 85 | void log_logv(int level, const char format[], va_list ap);
 86 | 
 87 | #ifdef __GNUC__
 88 | __attribute__((format(printf, 2, 3)))
 89 | #endif
 90 | void log_logf(int level, const char format[], ...);
 91 | 
 92 | 
 93 | // Low level interface
 94 | void log_line_begin_raw(int level);  //no checking
 95 | 
 96 | static inline
 97 | bool log_line_begin(int level) {
 98 | 	if (!log_level_check(level)) return false;
 99 | 	log_line_begin_raw(level);
100 | 	return true;
101 | }
102 | 
103 | void log_line_str(const char* str);
104 | 
105 | #ifdef __GNUC__
106 | __attribute__((format(printf, 1, 0)))
107 | #endif
108 | void log_line_strv(const char format[], va_list ap);
109 | 
110 | #ifdef __GNUC__
111 | __attribute__((format(printf, 1, 2)))
112 | #endif
113 | void log_line_strf(const char format[], ...);
114 | 
115 | static inline
116 | Stream* log_line_stream() {
117 | 	return g_logger.stm;
118 | }
119 | 
120 | void log_line_end();
121 | 


--------------------------------------------------------------------------------
/src/ccommon/rng_philox.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #include "rng_philox.h"
 5 | #include <math.h>
 6 | 
 7 | RngPhilox g_rng;
 8 | 
 9 | const uint32_t philox_m[2] = {0xD2511F53, 0xCD9E8D57};
10 | const uint32_t philox_w[2] = {0x9E3779B9, 0xBB67AE85};
11 | 
12 | const double two_pow32_inv     = 2.3283064365386963e-10; //   1/2^32
13 | const double two_pow32_inv_2pi = 1.4629180792671596e-09; // 2pi/2^32
14 | 
15 | static inline
16 | double box_muller(double x, double y)
17 | {
18 | 	double u = (x + 0.5) * two_pow32_inv;  
19 | 	double v = (y + 0.5) * two_pow32_inv_2pi;
20 | 	return sqrt(-2.0 * log(u)) * sin(v);
21 | }
22 | 
23 | void rng_philox_randn(RngPhilox* S, unsigned n, float* out)
24 | {
25 | 	uint32_t cnt[4], key[2];
26 | 	for (unsigned i=0; i<n; ++i) {
27 | 		cnt[0] = S->offset;
28 | 		cnt[1] = 0;
29 | 		cnt[2] = i;
30 | 		cnt[3] = 0;
31 | 
32 | 		key[0] = S->seed;
33 | 		key[1] = S->seed>>32;
34 | 
35 | 		for (unsigned r=0; r<10; ++r) {
36 | 			// Round
37 | 			uint64_t v1 = (uint64_t)cnt[0] * philox_m[0];
38 | 			uint64_t v2 = (uint64_t)cnt[2] * philox_m[1];
39 | 			cnt[0] = (uint32_t)(v2>>32) ^ cnt[1] ^ key[0];
40 | 			cnt[1] = v2;
41 | 			cnt[2] = (uint32_t)(v1>>32) ^ cnt[3] ^ key[1];
42 | 			cnt[3] = v1;
43 | 
44 | 			key[0] += philox_w[0];
45 | 			key[1] += philox_w[1];
46 | 		}
47 | 
48 | 		out[i] = box_muller(cnt[0], cnt[1]);
49 | 	}
50 | 	S->offset++;
51 | }
52 | 


--------------------------------------------------------------------------------
/src/ccommon/rng_philox.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * Pseudo-random number generator imitating torch cuda randn.
 5 |  * Based on: https://github.com/AUTOMATIC1111/stable-diffusion-webui/blob/master/modules/rng_philox.py
 6 |  */
 7 | #pragma once
 8 | #include <stdint.h>
 9 | 
10 | typedef struct {
11 |     uint64_t seed;
12 |     uint32_t offset;	
13 | } RngPhilox;
14 | 
15 | void rng_philox_randn(RngPhilox* S, unsigned n, float* out);
16 | 
17 | extern RngPhilox g_rng;
18 | 
19 | static inline
20 | void rng_randn(unsigned n, float* out) {
21 | 	rng_philox_randn(&g_rng, n, out);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/ccommon/str_util.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #include "str_util.h"
 5 | #include <stdarg.h>
 6 | 
 7 | int sprintf_alloc(char** buffer, const char* fmt, ...)
 8 | {
 9 | 	va_list ap;
10 | 
11 | 	va_start(ap, fmt);
12 | 	int sz = vsnprintf(0, 0, fmt, ap);
13 | 	va_end(ap);
14 | 
15 | 	if (sz < 0) return sz;
16 | 	sz += 1;
17 | 	char* p = realloc(*buffer, sz);
18 | 	if (!p) return -1;
19 | 	*buffer = p;
20 | 
21 | 	va_start(ap, fmt);
22 | 	sz = vsnprintf(p, sz, fmt, ap);
23 | 	va_end(ap);
24 | 
25 | 	return sz;
26 | }
27 | 
28 | size_t string_escape_encode(char* out, size_t out_size,
29 | 	const char* in, size_t in_size, size_t* in_done)
30 | {
31 | 	if (!in || !out) return 0;
32 | 	if (out_size < 5) return 0;	// 4=\xNN + 1=zero-end
33 | 	char *o=out, *oend=out+out_size-5;
34 | 	const char *i=in, *iend=in+in_size;
35 | 	for (; i<iend && o<oend; ++i) {
36 | 		     if (*i == '"' ) { *o++ = '\\'; *o++ = '"'; }
37 | 		else if (*i == '\n') { *o++ = '\\'; *o++ = 'n'; }
38 | 		else if (*i == '\r') { *o++ = '\\'; *o++ = 'r'; }
39 | 		else if (*i == '\t') { *o++ = '\\'; *o++ = 't'; }
40 | 		else if (32 <= *i && *i < 127) *o++ = *i;
41 | 		else
42 | 			o += sprintf(o, "\\x%02x", (unsigned)(unsigned char)*i);
43 | 	}
44 | 	*o = 0;
45 | 	if (in_done) *in_done = (i - in);
46 | 	return (o - out);
47 | }
48 | 
49 | size_t string_escape_decode(char* out, size_t out_size,
50 | 	const char* in, size_t in_size, size_t* in_done)
51 | {
52 | 	if (!in || !out) return 0;
53 | 	if (out_size < 1) return 0;
54 | 	char *o=out, *oend=out+out_size-1;
55 | 	const char *i=in, *iend=in+in_size;
56 | 	for (; i<iend && o<oend; ++i) {
57 | 		if (*i == '\\') {
58 | 			++i;
59 | 			if (i >= iend) {
60 | 				*o++ = '\\';
61 | 				break;
62 | 			}
63 | 			switch (*i) {
64 | 			case '"': *o++ = '"'; break;
65 | 			case 'n': *o++ = '\n'; break;
66 | 			case 'r': *o++ = '\r'; break;
67 | 			case 't': *o++ = '\t'; break;
68 | 			case 'x':
69 | 				if (i+2 < iend) {
70 | 					*o++ = digit_decode(*(i+1), 16) * 16
71 | 						+ digit_decode(*(i+2), 16);
72 | 					i += 2;
73 | 				}
74 | 				else {
75 | 					*o++ = '\\';
76 | 					--i;
77 | 				}
78 | 				break;
79 | 			//TODO: more...
80 | 			default:
81 | 				*o++ = '\\';
82 | 				--i;
83 | 				break;
84 | 			}
85 | 		}
86 | 		else *o++ = *i;
87 | 	}
88 | 	*o = 0;
89 | 	if (in_done) *in_done = (i - in);
90 | 	return (o - out);
91 | }
92 | 
93 | 


--------------------------------------------------------------------------------
/src/ccommon/str_util.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #pragma once
  5 | #include <ctype.h>
  6 | #include <stdio.h>
  7 | #include <string.h>
  8 | #include <stdlib.h>
  9 | #include <stdbool.h>
 10 | 
 11 | //! Checks if a character is an string
 12 | static inline
 13 | bool char_in_str(int ch, const char* str) {
 14 | 	for (; *str; ++str) if (ch == *str) return true;
 15 | 	return false;
 16 | }
 17 | 
 18 | //! Finds the first occurrence of a character in a list
 19 | static inline
 20 | char* str_chars_find(const char* str, const char* chars) {
 21 | 	while (*str && !char_in_str(*str, chars)) str++;
 22 | 	return (char*)str;
 23 | }
 24 | 
 25 | //! Finds the first occurrence of a character in a list
 26 | static inline
 27 | char* stre_chars_find(char* str, char* end, const char* chars) {
 28 | 	while (str<end && !char_in_str(*str, chars)) str++;
 29 | 	return str;
 30 | }
 31 | 
 32 | //! Skips all the occurrences of a list of characters at the beginning of a string
 33 | static inline
 34 | char* str_ltrim(char* str, const char* spaces) {
 35 | 	while (*str && char_in_str(*str, spaces)) str++;
 36 | 	return str;
 37 | }
 38 | 
 39 | //! Skips all the occurrences of a list of characters at the beginning of a string
 40 | static inline
 41 | char* stre_ltrim(char* str, char* end, const char* spaces) {
 42 | 	while (str<end && char_in_str(*str, spaces)) str++;
 43 | 	return str;
 44 | }
 45 | 
 46 | //! Skips all the occurrences of a list of characters at the end of a string
 47 | static inline
 48 | char* stre_rtrim(char* str, char* end, const char* spaces) {
 49 | 	end--;
 50 | 	while (str<end && char_in_str(*end, spaces)) end--;
 51 | 	return end+1;
 52 | }
 53 | 
 54 | //! Skips all the occurrences of a list of characters at the beginning and the end of a string
 55 | static inline
 56 | void stre_trim(char** str, char** end, char const* spaces) {
 57 | 	*str = stre_ltrim(*str, *end, spaces);
 58 | 	*end = stre_rtrim(*str, *end, spaces);
 59 | }
 60 | 
 61 | //! Copy an string
 62 | static inline
 63 | char* stre_copy(unsigned dsize, char* dst, const char* str, const char* end)
 64 | {
 65 | 	if (dsize) {
 66 | 		dsize--;
 67 | 		if (dsize > end-str) dsize = end-str;
 68 | 		memcpy(dst, str, dsize);
 69 | 		dst[dsize] = 0;
 70 | 	}
 71 | 	return dst;
 72 | }
 73 | 
 74 | //! Compare two string in case insensitive way
 75 | static inline
 76 | int str_cmp_i(const char* a, const char* b) {
 77 | 	for (;; ++a, ++b) {
 78 | 		int d = tolower((unsigned char)*a) - tolower((unsigned char)*b);
 79 | 		if (d != 0 || !*a)
 80 | 			return d;
 81 | 	}
 82 | }
 83 | 
 84 | static inline
 85 | const char* str_startswith(const char* str, const char* sub)
 86 | {
 87 | 	unsigned ls = strlen(str),
 88 | 			 l2 = strlen(sub);
 89 | 	if (ls >= l2 && !memcmp(str, sub, l2)) return str+l2;
 90 | 	return NULL;
 91 | }
 92 | 
 93 | static inline
 94 | const char* str_endswith(const char* str, const char* sub)
 95 | {
 96 | 	unsigned ls = strlen(str),
 97 | 			 l2 = strlen(sub);
 98 | 	if (ls >= l2 && !memcmp(str+ls-l2, sub, l2)) return str+ls-l2;
 99 | 	return NULL;
100 | }
101 | 
102 | //! Convert an string to lower case
103 | static inline
104 | size_t str_tolower(char* dst, size_t max, const char* src) {
105 | 	char *cur=dst, *end = dst+max;
106 | 	for(; *src && cur<end; ++src, ++cur)
107 | 		*cur = tolower((unsigned char)*src);
108 | 	if (cur == end) cur--;
109 | 	*cur = 0;
110 | 	return (cur - dst);
111 | }
112 | 
113 | //! Parse an string to a boolean value
114 | static inline
115 | bool str_to_bool(const char* text) {
116 | 	if (!strcmp(text, "1")) return true;
117 | 	if (!str_cmp_i(text, "y")) return true;
118 | 	if (!str_cmp_i(text, "yes")) return true;
119 | 	if (!str_cmp_i(text, "true")) return true;
120 | 	return false;
121 | }
122 | 
123 | //! Parse a character to single digit value in a base
124 | static inline
125 | int digit_decode(int c, int base) {
126 | 	if (!( 2 <= base && base <= 36)) base = 10;
127 | 	if ('0' <= c && c <= '9') return c - '0';
128 | 	if ('A' <= c && c < 'A'+base-10) return c - 'A' + 10;
129 | 	if ('a' <= c && c < 'a'+base-10) return c - 'a' + 10;
130 | 	return -1;
131 | }
132 | 
133 | //! Checks if a string is a member of a list of strings
134 | static inline
135 | bool strlist_in(unsigned count, char** list, const char* str)
136 | {
137 | 	while (count--)
138 | 		if (!strcmp(list[count], str))
139 | 			return true;
140 | 	return false;
141 | }
142 | 
143 | //! Copy an string dynamically allocating memory as needed
144 | static inline
145 | char* strcpy_alloc(char** dst, const char* src) {
146 | 	size_t size = strlen(src)+1;
147 | 	char* p = realloc(*dst, size);
148 | 	if (!p) return 0;
149 | 	*dst = p;
150 | 	memcpy(p, src, size);
151 | 	return p;
152 | }
153 | 
154 | #ifdef __GNUC__
155 | __attribute__((format(printf, 2, 3)))
156 | #endif
157 | int sprintf_alloc(char** buffer, const char* fmt, ...);
158 | 
159 | size_t string_escape_encode(char* out, size_t out_size,
160 | 	const char* in, size_t in_size, size_t* in_done);
161 | 
162 | size_t string_escape_decode(char* out, size_t out_size,
163 | 	const char* in, size_t in_size, size_t* in_done);
164 | 


--------------------------------------------------------------------------------
/src/ccommon/stringstore.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #include "stringstore.h"
 5 | #include "bisect.h"
 6 | 
 7 | /* */
 8 | void strsto_free(StringStore* S)
 9 | {
10 | 	alloc_arena_free(&S->al);
11 | 	vec_free(S->idx);
12 | 	vec_free(S->s);
13 | }
14 | 
15 | bool strsto_iidx_find(const StringStore* S, const StrSlice key, size_t* idx)
16 | {
17 | 	bool found;
18 | 	BISECT_RIGHT(found, *idx, 0, vec_count(S->idx),
19 | 		strsl_cmp(S->s[S->idx[i_]], key) );
20 | 	return found;
21 | }
22 | 
23 | StringInt strsto_find(const StringStore* S, const StrSlice ss)
24 | {
25 | 	size_t iidx;
26 | 	return strsto_iidx_find(S, ss, &iidx) ? S->idx[iidx] : -1;
27 | }
28 | 
29 | StringInt strsto_find_prefix(const StringStore* S, const StrSlice key)
30 | {
31 | 	if (!vec_count(S->idx)) return -1;  //empty store
32 | 
33 | 	size_t iidx;
34 | 	bool found = strsto_iidx_find(S, key, &iidx);
35 | 	if (found) return S->idx[iidx];  //exact match
36 | 
37 | 	bool last=false;  //last attempt
38 | 	while (1) {
39 | 		StringInt si = S->idx[iidx];
40 | 		const StrSlice str = S->s[si];
41 | 		
42 | 		// Count the matching characters
43 | 		size_t i=0;
44 | 		while (i<str.s && i<key.s && str.b[i] == key.b[i]) i++;
45 | 		
46 | 		if (i != str.s) {  //key does not starts with str, try a shorter str
47 | 			if (!iidx) return -1;
48 | 			if (!i && last) return -1;  //no matching prefix found
49 | 			last = !i;  //length-1 cases
50 | 			iidx--;
51 | 		} else {
52 | 			assert(i != key.s);  //would be exact
53 | 			return si;
54 | 		}
55 | 	}
56 | }
57 | 
58 | StringInt strsto_add2(StringStore* S, const StrSlice ss, StringInt idx,
59 | 	bool static_)
60 | {
61 | 	size_t iidx;
62 | 	if (strsto_iidx_find(S, ss, &iidx))
63 | 	{
64 | 		if (idx >= 0 && idx != S->idx[iidx]) return -1;
65 | 		return S->idx[iidx];
66 | 	}
67 | 	else
68 | 	{
69 | 		unsigned n = vec_count(S->s);
70 | 		if (idx < 0) idx = n;
71 | 		
72 | 		if (idx < n) {
73 | 			// Index already used
74 | 			if (S->s[idx].b) return -1;
75 | 		} else {
76 | 			vec_append_zero(S->s, idx-n+1);
77 | 		}
78 | 
79 | 		if (static_)
80 | 			S->s[idx] = ss;
81 | 		else {
82 | 			// Copy string
83 | 			char * p = alloc_arena_alloc(&S->al, ss.s+1);
84 | 			memcpy(p, ss.b, ss.s);
85 | 			p[ss.s] = 0;
86 | 			S->s[idx] = (StrSlice){ .b=p, .s=ss.s };
87 | 		}
88 | 			
89 | 		vec_insert(S->idx, iidx, 1, &idx);
90 | 		return idx;
91 | 	}
92 | }
93 | 


--------------------------------------------------------------------------------
/src/ccommon/stringstore.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * Storage of unique string slices.
 5 |  *
 6 |  * Example:
 7 |  *   StringStore ss={0};
 8 |  *   StringInt si = strsto_add(&ss, strsl_static("apple"));
 9 |  *   assert( !strsl_cmp(strsto_get(&ss, si), strsl_static("apple")) );
10 |  *   strsto_free(&ss);
11 |  */
12 | #pragma once
13 | #include <stdint.h>
14 | #include <stddef.h>
15 | #include <stdbool.h>
16 | #include "strslice.h"
17 | #include "alloc_arena.h"
18 | #include "vector.h"
19 | 
20 | typedef int32_t StringInt;
21 | 
22 | typedef struct StringStore {
23 | 	StrSlice * s;  //vector
24 | 	unsigned * idx;  //index, vector
25 | 	AllocatorArena al;
26 | } StringStore;
27 | 
28 | void strsto_free(StringStore* S);
29 | 
30 | static inline
31 | unsigned strsto_count(const StringStore* S)
32 | 	{ return vec_count(S->idx); }
33 | 
34 | static inline
35 | unsigned strsto_next_idx(const StringStore* S)
36 | 	{ return vec_count(S->s); }
37 | 
38 | static inline
39 | StrSlice strsto_get(const StringStore* S, StringInt idx) {
40 | 	assert(0 <= idx && idx < vec_count(S->s));
41 | 	if (!(0 <= idx && idx < vec_count(S->s))) return (StrSlice){0};
42 | 	return S->s[idx];
43 | }
44 | 
45 | // Return -1 if not found
46 | StringInt strsto_find(const StringStore* S, const StrSlice ss);
47 | 
48 | StringInt strsto_add2(StringStore* S, const StrSlice ss, StringInt idx,
49 | 	bool static_);
50 | 
51 | // Add an string.
52 | static inline
53 | StringInt strsto_add(StringStore* S, const StrSlice ss) {
54 | 	return strsto_add2(S, ss, -1, false);
55 | }
56 | 
57 | // Find longest string in the store that matches the beginning of key.
58 | StringInt strsto_find_prefix(const StringStore* S, const StrSlice key);
59 | 
60 | // Find the position in the index <idx> for <key>.
61 | // Returns true if <key> is present in the store.
62 | // Then, S->idx[*idx] is the StringInt.
63 | bool strsto_iidx_find(const StringStore* S, const StrSlice key, size_t* idx);
64 | 
65 | /* Utility */
66 | static inline
67 | char* strsl_getd(DynStr* buf, const StrSlice ss) {
68 | 	dstr_copy(*buf, strsl_len(ss), ss.b);
69 | 	return *buf;
70 | }
71 | 
72 | //static inline
73 | //StrSlice strsl_fromd(const DynStr buf) {
74 | //	return (StrSlice){ .b=buf, .s=dstr_count(buf) };
75 | //}
76 | 


--------------------------------------------------------------------------------
/src/ccommon/strslice.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #pragma once
  5 | #include <assert.h>
  6 | #include <stdint.h>
  7 | #include <stddef.h>
  8 | #include <string.h>
  9 | 
 10 | typedef struct StrSlice {
 11 | 	const char	*b;
 12 | 	size_t		s;
 13 | } StrSlice;
 14 | 
 15 | // Initialization
 16 | 
 17 | #define strsl_make(B,S) \
 18 | 	((StrSlice){ .b=(B), .s=(S) })
 19 | 
 20 | #define strsl_static(S) \
 21 | 	((StrSlice){ .b=(S), .s=sizeof(S)-1 })
 22 | 
 23 | #define strsl_fromd(D) \
 24 | 	((StrSlice){ .b=(D), .s=dstr_count(D) })
 25 | 
 26 | #define strsl_froma(A) \
 27 | 	((StrSlice){ .b=(A).p.cp, .s=(A).len })
 28 | 
 29 | #define strsl_fromr(B,E) \
 30 | 	((StrSlice){ .b=(B), .s=(E)-(B) })
 31 | 
 32 | static inline
 33 | StrSlice strsl_fromz(const char* strz)
 34 | 	{ return (StrSlice){ .b=strz, .s=strlen(strz) }; }
 35 | 
 36 | // Access
 37 | 
 38 | static inline
 39 | intptr_t strsl_len(const StrSlice ss)
 40 | 	{ return ss.s; }
 41 | 
 42 | static inline
 43 | const char * strsl_begin(const StrSlice ss)
 44 | 	{ return ss.b; }
 45 | 
 46 | static inline
 47 | const char * strsl_end(const StrSlice ss)
 48 | 	{ return ss.b + ss.s; }
 49 | 
 50 | #define strsl_for(S, VC, VE, I) \
 51 | 	for (const char *VC=strsl_begin(S)+(I), *VE=strsl_end(S); VC<VE; ++VC)
 52 | 
 53 | // Unsafe slice
 54 | static inline
 55 | StrSlice strsl_slice_u(const StrSlice ss, size_t b, size_t e)
 56 | 	{ return (StrSlice){ ss.b+b, ss.s-e-b }; }
 57 | 
 58 | // Operations
 59 | 
 60 | static inline
 61 | int strsl_cmp(const StrSlice s1, const StrSlice s2)
 62 | {
 63 | 	const char *c1=s1.b, *e1=c1+s1.s,
 64 | 			   *c2=s2.b, *e2=c2+s2.s;
 65 | 	do {
 66 | 		int v1 = (uint8_t)*c1;
 67 | 		int v2 = (uint8_t)*c2;
 68 | 		if (!(c1 < e1)) {
 69 | 			if (!(c2 < e2))
 70 | 				return 0;
 71 | 			else
 72 | 				return -v2;
 73 | 		} else if (!(c2 < e2))
 74 | 			return v1;
 75 | 
 76 | 		int d = v1 - v2;
 77 | 		if (d) return d;
 78 | 		
 79 | 		c1++;
 80 | 		c2++;
 81 | 	} while (1);
 82 | }
 83 | 
 84 | static inline
 85 | int strsl_cmpz(const StrSlice ss, const char* strz)
 86 | {
 87 | 	for (const char *c=ss.b, *e=c+ss.s; c<e; ++c, ++strz) {
 88 | 		if (!*strz) return *c ? *c : 1;
 89 | 		int d = *c - *strz;
 90 | 		if (d) return d;
 91 | 	}
 92 | 	return *strz;
 93 | }
 94 | 
 95 | static inline
 96 | size_t strsl_copyz(size_t bufsz, char* buf, const StrSlice ss)
 97 | {
 98 | 	if (bufsz < 1) return 0;
 99 | 	bufsz--;
100 | 	size_t len = strsl_len(ss);
101 | 	if (len > bufsz) len = bufsz;
102 | 	memcpy(buf, ss.b, len);
103 | 	buf[len] = 0;
104 | 	return len;
105 | }
106 | 
107 | static inline
108 | char* strsl_getz(size_t bufsz, char* buf, const StrSlice ss) {
109 | 	strsl_copyz(bufsz, buf, ss);
110 | 	return buf;
111 | }
112 | 
113 | // Utility
114 | 
115 | static inline
116 | int strsl_startswith(const StrSlice ss, const StrSlice prefix) {
117 | 	if (!(ss.s >= prefix.s)) return 0;
118 | 	return !memcmp(ss.b, prefix.b, prefix.s);
119 | }
120 | 
121 | static inline
122 | int strsl_endswith(const StrSlice ss, const StrSlice suffix) {
123 | 	if (!(ss.s >= suffix.s)) return 0;
124 | 	return !memcmp(ss.b+ss.s-suffix.s, suffix.b, suffix.s);
125 | }
126 | 
127 | static inline
128 | int strsl_prefix_trim(StrSlice* pss, const StrSlice prefix)
129 | {
130 | 	if (!strsl_startswith(*pss, prefix)) return 0;
131 | 	pss->b += prefix.s;
132 | 	pss->s -= prefix.s;
133 | 	return 1;
134 | }
135 | 
136 | static inline
137 | int strsl_prefixz_trim(StrSlice* pss, const char* prefix) {
138 | 	return strsl_prefix_trim(pss, strsl_fromz(prefix));
139 | }
140 | 
141 | static inline
142 | int strsl_suffix_trim(StrSlice* pss, const StrSlice suffix)
143 | {
144 | 	if (!strsl_endswith(*pss, suffix)) return 0;
145 | 	pss->s -= suffix.s;
146 | 	return 1;
147 | }
148 | 
149 | static inline
150 | int strsl_suffixz_trim(StrSlice* pss, const char* suffix) {
151 | 	return strsl_suffix_trim(pss, strsl_fromz(suffix));
152 | }
153 | 


--------------------------------------------------------------------------------
/src/ccommon/structio_json.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | #include "structio.h"
 6 | 
 7 | enum StioStreamJsonFlag {
 8 | 	STIO_SF_JSON_PRETTY = STIO_IF_CUSTOM, //WIP
 9 | };
10 | 
11 | extern const StioClass stio_class_json;
12 | 
13 | int stio_json_write(StioStream* sio, StioCtx* ctx, StioItem* itm);
14 | 
15 | int stio_json_read(StioStream* sio, StioCtx* ctx, StioItem* itm);
16 | 
17 | 


--------------------------------------------------------------------------------
/src/ccommon/timing.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #include "timing.h"
  5 | 
  6 | // -----------------------------------------------------------------------------
  7 | #if defined(__unix__)
  8 | #ifndef _POSIX_C_SOURCE
  9 | #define _POSIX_C_SOURCE 200809L
 10 | #endif
 11 | #include <time.h>
 12 | #include <errno.h>
 13 | 
 14 | double timing_time() {
 15 | 	struct timespec tp;
 16 | 	clock_gettime(CLOCK_MONOTONIC, &tp);
 17 | 	return (double)tp.tv_sec + (double)tp.tv_nsec * 1e-9;
 18 | }
 19 | 
 20 | void timing_sleep(double dt) {
 21 | 	struct timespec tp;
 22 | 	tp.tv_sec = (int)dt;
 23 | 	tp.tv_nsec = (dt - tp.tv_sec) * 1e9;
 24 | 	while (clock_nanosleep(CLOCK_MONOTONIC, 0, &tp, &tp) == EINTR) ;
 25 | }
 26 | 
 27 | double timing_timeofday() {
 28 | 	//struct timeval tv={0};
 29 | 	//gettimeofday(&tv, NULL);
 30 | 	//return tv.tv_sec + tv.tv_usec * 1e-6;
 31 | 	struct timespec tp;
 32 | 	clock_gettime(CLOCK_REALTIME, &tp);
 33 | 	return (double)tp.tv_sec + (double)tp.tv_nsec * 1e-9;
 34 | }
 35 | 
 36 | // -----------------------------------------------------------------------------
 37 | #elif defined(__WIN32__)
 38 | #define WIN32_LEAN_AND_MEAN
 39 | #define WIN32_EXTRA_LEAN
 40 | #include <windows.h>
 41 | #include <stdint.h>
 42 | 
 43 | static struct {
 44 | 	double d_freq;
 45 | 	LARGE_INTEGER li_freq;
 46 | } timing_win_data;
 47 | 
 48 | void timing_win_init() {
 49 | 	QueryPerformanceFrequency(&timing_win_data.li_freq);
 50 | 	timing_win_data.d_freq = timing_win_data.li_freq.QuadPart;
 51 | }
 52 | 
 53 | double timing_time() {
 54 | 	if (!timing_win_data.d_freq) timing_win_init();
 55 | 	LARGE_INTEGER value;
 56 | 	QueryPerformanceCounter(&value);
 57 | 	return (double)value.QuadPart / timing_win_data.d_freq;
 58 | }
 59 | 
 60 | void timing_sleep(double dt) {
 61 | 	Sleep(dt*1000);
 62 | }
 63 | 
 64 | double timing_timeofday() {
 65 | 	int64_t t;
 66 | 	GetSystemTimeAsFileTime((FILETIME*)&t);
 67 | 	return (t - 116444736000000000LL) * 1e-7;
 68 | }
 69 | 
 70 | // -----------------------------------------------------------------------------
 71 | #elif defined(SDL_VERSION)
 72 | #include <SDL2/SDL.h>
 73 | 
 74 | static struct {
 75 | 	double d_freq;
 76 | 	Uint64 u64_freq;
 77 | } timing_sdl_data;
 78 | 
 79 | void timing_sdl_init() {
 80 | 	timing_sdl_data.u64_freq = SDL_GetPerformanceFrequency();
 81 | 	timing_sdl_data.d_freq = timing_sdl_data.u64_freq;
 82 | }
 83 | 
 84 | double timing_time() {
 85 | 	return (double)SDL_GetPerformanceCounter() / timing_sdl_data.d_freq;
 86 | }
 87 | 
 88 | void timing_sleep(double dt) {
 89 | 	SDL_Delay(dt*1000);
 90 | }
 91 | 
 92 | #include <time.h>
 93 | double timing_timeofday() {
 94 | 	return time(NULL);  //TODO: not portable
 95 | }
 96 | 
 97 | // -----------------------------------------------------------------------------
 98 | #else
 99 | #include <time.h>
100 | 
101 | double timing_time() {
102 | 	return (double)time(0);  //TODO: use clock?
103 | }
104 | 
105 | void timing_sleep(double dt) {
106 | 	//TODO: implement with polling?
107 | }
108 | 
109 | double timing_timeofday() {
110 | 	return (double)time(NULL);  //TODO: not portable
111 | }
112 | 
113 | #endif
114 | 


--------------------------------------------------------------------------------
/src/ccommon/timing.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | 
 6 | // Get a monotonic time measured in seconds
 7 | double timing_time();
 8 | 
 9 | void timing_sleep(double dt);
10 | 
11 | static inline
12 | double timing_tic(double* t_last) {
13 | 	double t=timing_time(), dt=t-*t_last;
14 | 	*t_last = t;
15 | 	return dt;
16 | }
17 | 
18 | // Get the current number of seconds since 1970-01-01 00:00:00 (UTC).
19 | double timing_timeofday();
20 | 


--------------------------------------------------------------------------------
/src/ccommon/unicode.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #include "unicode.h"
 5 | 
 6 | uint32_t utf8_decode_next(const char** pstr, const char* end)
 7 | {
 8 | 	const uint8_t *c = (const uint8_t*)*pstr,
 9 | 	              *e = (const uint8_t*)end;
10 | 	if (!(c < e)) return 0;
11 | 	
12 | 	uint32_t cp = *c++;
13 | 
14 | 	if ((cp & 0x80) == 0x80) {  //multibyte
15 | 		const uint8_t *b=c;
16 | 		while (c < e && (*c & 0xC0) == 0x80) ++c;  //count continuation bytes
17 | 
18 | 		if ((cp & 0xE0) == 0xC0) {  //2 bytes: 110xxxxx 10xxxxxx
19 | 			if (c != b+1) goto error_end;
20 | 			uint32_t b2 = b[0];
21 | 			cp = ((cp & 0x1F) << 6) | (b2 & 0x3F);
22 | 		}
23 | 		else if ((cp & 0xF0) == 0xE0) {  //3 bytes: 1110xxxx ...
24 | 			if (c != b+2) goto error_end;
25 | 			uint32_t b2 = b[0], b3 = b[1];
26 | 			cp = ((cp & 0x0F) << 12) | ((b2 & 0x3F) << 6) | (b3 & 0x3F);
27 | 		}
28 | 		else if ((cp & 0xF8) == 0xF0) {  //4 bytes: 11110xxx ...  
29 | 			if (c != b+3) goto error_end;
30 | 			uint32_t b2 = b[0], b3 = b[1], b4 = b[2];
31 | 			cp = ((cp & 0x07) << 18) | ((b2 & 0x3F) << 12) | ((b3 & 0x3F) << 6)
32 | 				| (b4 & 0x3F);
33 | 		}
34 | 		else goto error_end;
35 | 	}
36 | 
37 | 	if ((const char*)c > end) { c=end; cp=0; }  //TODO: check before!
38 | 	*pstr = (const char*)c;
39 | 	return cp;
40 | 	
41 | error_end:
42 | 	*pstr = (const char*)c;
43 | 	return 0xFFFD;
44 | }
45 | 
46 | const char* utf8_decode_skip(const char* cur, const char* end)
47 | {
48 | 	if (cur < end) cur++;  //first byte
49 | 	while (cur < end && (*cur & 0xC0) == 0x80) cur++;
50 | 	return cur;
51 | }
52 | 
53 | char* utf8_encode_next(char* dst, uint32_t cp)
54 | {
55 | 	if (cp <= 0x7F) {
56 | 		*dst++ = cp;
57 | 	}
58 | 	else if (cp <= 0x7FF) {
59 | 		*dst++ = 0xC0 | (cp >> 6);
60 | 		*dst++ = 0x80 | (cp & 0x3F);
61 | 	}
62 | 	else if (cp <= 0xFFFF) {
63 | 		*dst++ = 0xE0 | (cp >> 12);
64 | 		*dst++ = 0x80 | ((cp >> 6) & 0x3F);
65 | 		*dst++ = 0x80 | (cp & 0x3F);
66 | 	}
67 | 	else if (cp <= 0x10FFFF) {
68 | 		*dst++ = 0xF0 | (cp >> 18);
69 | 		*dst++ = 0x80 | ((cp >> 12) & 0x3F);
70 | 		*dst++ = 0x80 | ((cp >> 6) & 0x3F);
71 | 		*dst++ = 0x80 | (cp & 0x3F);
72 | 	}
73 | 	//else error, do nothing
74 | 
75 | 	return dst;
76 | }
77 | 


--------------------------------------------------------------------------------
/src/ccommon/unicode.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * Unicode, UTF-8, encoding and decoding.
 5 |  */
 6 | #pragma once
 7 | #include <stdint.h>
 8 | 
 9 | /* Return the next code point and advance the string pointer.
10 |  * Return zero for an empty string.
11 |  * In case of error, returns 0xFFFD and skips the bytes.
12 |  */
13 | uint32_t utf8_decode_next(const char** pstr, const char* end);
14 | 
15 | /* Skip one codepoint without fully decoding it.
16 |  * Returns a pointer to the next codepoint.
17 |  * Returns <cur> if cur == end.
18 |  */
19 | const char* utf8_decode_skip(const char* cur, const char* end);
20 | 
21 | /* Encode one code point into cursor.
22 |  * Writes up to 4 bytes. 
23 |  * Return the new cursor position.
24 |  */
25 | char* utf8_encode_next(char* dst, uint32_t cp);
26 | 


--------------------------------------------------------------------------------
/src/ccommon/unicode_data.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  *
 4 |  * Unicode database.
 5 |  * Query the properties of codepoints.
 6 |  * This is a separate module from "unicode" because the data occupies several
 7 |  * kilobytes.
 8 |  */
 9 | #pragma once
10 | #include <stdint.h>
11 | 
12 | /* Get the major general category of a unicode codepoint.
13 |  * Returns one of the following characters or zero the codepoint is out unicode
14 |  * range.
15 |  * L: Letter, M: Mark, N: Number, P: Punctuation, S: Symbol, Z: Separator, C: Other
16 |  */
17 | int unicode_category_major(uint32_t cp);
18 | 
19 | /* Returns the upper case variant of codepoint.
20 |  * If there is none, it returns the same codepoint.
21 |  */
22 | uint32_t unicode_upper(uint32_t cp);
23 | 
24 | /* Returns the lower case variant of codepoint.
25 |  * If there is none, it returns the same codepoint.
26 |  */
27 | uint32_t unicode_lower(uint32_t cp);
28 | 


--------------------------------------------------------------------------------
/src/ccompute/tensorstore.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  *
  4 |  * Parse and store information from common tensor storage formats used for
  5 |  * machine learning.
  6 |  *
  7 |  * Example:
  8 | 	StringStore ss={0};
  9 | 	TensorStore ts={ .ss=&ss };
 10 | 	Stream stm={0};
 11 | 	TRY( stream_open_file(&stm, "model.gguf") );
 12 | 	TRY( tstore_read(&tsp, &stm, NULL) );
 13 | 	TRY( tstore_info_dump_path(&sp, "model-info.txt") );
 14 | end:
 15 | 	tstore_free(&tsp);
 16 | 	stream_close(&stm, 0);
 17 |  */
 18 | #pragma once
 19 | #include "ccommon/stream.h"
 20 | #include "ccommon/vector.h"
 21 | #include "ccommon/any.h"
 22 | #include "ccommon/stringstore.h"
 23 | 
 24 | typedef struct TensorStore TensorStore;
 25 | 
 26 | /* Error codes */
 27 | 
 28 | typedef enum {
 29 | 	TS_E_UNKNOWN		= -0x3001,
 30 | 	TS_E_OVERFLOW		= -0x3002,
 31 | 	TS_E_FORMAT			= -0x3003,
 32 | 	TS_E_READ			= -0x3004,
 33 | 	TS_E_METADATA		= -0x3005,
 34 | 	TS_E_DTYPE			= -0x3006,
 35 | 	TS_E_WRITE			= -0x3007,
 36 | } TSError;
 37 | 
 38 | /* Data types */
 39 | 
 40 | typedef enum {
 41 | 	TS_DTYPE_NONE,
 42 | 	TS_DTYPE_F64,
 43 | 	TS_DTYPE_F32,
 44 | 	TS_DTYPE_F16,
 45 | 	TS_DTYPE_BF16,
 46 | 	TS_DTYPE_I64,
 47 | 	TS_DTYPE_I32,
 48 | 	TS_DTYPE_I16,
 49 | 	TS_DTYPE_I8,
 50 | 	// GGML quantization
 51 | 	TS_DTYPE_Q8_0,
 52 | 	TS_DTYPE_Q4_1,
 53 | 	TS_DTYPE_Q6_K,
 54 | 	TS_DTYPE_Q5_K,
 55 | 	TS_DTYPE_Q4_K,
 56 | 	TS_DTYPE__END,
 57 | } TSDType;
 58 | 
 59 | typedef struct {
 60 | 	const char	*name;
 61 | 	uint64_t	sz_m, sz_d;  // tensor size = count * sz_m / sz_d
 62 | 	int			ggml,
 63 | 				mda;
 64 | 	unsigned	valid:1;
 65 | } TSDTypeAttr;
 66 | 
 67 | const TSDTypeAttr* tstore_dtype_attr(int dt);
 68 | 
 69 | int tstore_dtype_fromz(const char* s);
 70 | 
 71 | const char * tstore_dtype_str(int dt);
 72 | 
 73 | // Returns -1 if not found
 74 | int tstore_dtype_from_ggml(int ggml_type);
 75 | int tstore_dtype_to_ggml(int dt);
 76 | 
 77 | // Returns -1 if not found
 78 | int tstore_dtype_from_mda(int mda_dtype);
 79 | int tstore_dtype_to_mda(int dt);
 80 | 
 81 | /* Tensor data */
 82 | 
 83 | typedef struct {
 84 | 	TSDType dtype;  	//data type
 85 | 	void *data;
 86 | 	size_t size;
 87 | 	unsigned ownmem:1,
 88 | 	         perm:1;	//<data> remains valid for the lifetime of the tensor store
 89 | } TSTensorData;
 90 | 
 91 | void tstore_tdata_free(TSTensorData*);
 92 | 
 93 | /* Meta data entry */
 94 | 
 95 | typedef struct {
 96 | 	int key;  //str_id
 97 | 	Any value;
 98 | } TSMetaEntry;
 99 | 
100 | /* Tensor entry */
101 | 
102 | typedef struct {
103 | 	int key;  //str_id
104 | 	TSDType dtype;
105 | 	unsigned shape_n, shape[4];
106 | 	uint64_t offset, size;
107 | 	Stream *stm;
108 | 	TSTensorData *cache;  //converted tensor cache, vector, sorted
109 | } TSTensorEntry;
110 | 
111 | uint64_t tstore_tensor_count(const TSTensorEntry* S);
112 | uint64_t tstore_tensor_size(const TSTensorEntry* S);
113 | 
114 | #define TSTENSOR_SHAPE4_FMT  "%ux%ux%ux%u"
115 | #define TSTENSOR_SHAPE4_UNPACK(T) \
116 | 	(T).shape[0], (T).shape[1], (T).shape[2], (T).shape[3]
117 | 
118 | /* Return a TSTensorData object with the tensor data with type dtype.
119 |  * If flags & TSTDG_F_PERM, the data pointer is permanent, otherwise,
120 |  * The TSTensorData object must be free'd after use.
121 |  */
122 | int tstore_tensor_data_get(TSTensorEntry* S, TSDType dtype, int flags,
123 | 	TSTensorData* out);
124 | 
125 | enum tstore_tensor_data_get_flags_t {
126 | 	TSTDG_F_PERM  = 1,  // out->data is in permanent storage
127 | 	TSTDG_F_WRITE = 2,  // Returns memory that can be written
128 | };
129 | 
130 | /* IO CallBack */
131 | 
132 | typedef struct {
133 | 	int (*func)(void* user, TensorStore* ts, TSTensorEntry* te, DynStr* pname);
134 | 	void *user;
135 | } TSCallback;
136 | 
137 | static inline
138 | int tstore_cb_call(TSCallback* cb, TensorStore* ts, TSTensorEntry* te,
139 | 	DynStr* pname)
140 | {
141 | 	if (!cb || !cb->func) return 1;
142 | 	return cb->func(cb->user, ts, te, pname);
143 | }
144 | 
145 | /* Parser */
146 | 
147 | typedef struct {
148 | 	const char *name, *ext;
149 | 	int (*detect)(Stream*);
150 | 	int (*read)(TensorStore*, Stream*, TSCallback*);
151 | 	int (*write)(TensorStore*, Stream*, TSCallback*);
152 | } TensorStoreFormat;
153 | 
154 | int tstore_format_register(const TensorStoreFormat*);
155 | 
156 | const TensorStoreFormat* tstore_format_get(int idx);
157 | 
158 | /* Store */
159 | 
160 | struct TensorStore {
161 | 	TSTensorEntry * tensors;  //vector, source order
162 | 	TSMetaEntry * meta;  //vector, source order
163 | 	unsigned * tensors_idx;  //vector, key sorted
164 | 	unsigned * meta_idx;  //vector, key sorted
165 | 	StringStore *ss;  //external store for tensor names strings, fill before use
166 | };
167 | 
168 | void tstore_free(TensorStore*);
169 | 
170 | /* Read tensors information from a stream.
171 |  * Does not read the tensors data.
172 |  * fmt: data format. If NULL, tries to guess from the data.
173 |  * cb: Optional. Function called before adding each tensor. If it returns non
174 |  *     positive, the tensor is not added. May change the name.
175 |  */
176 | int tstore_read(TensorStore* S, Stream* stm, const TensorStoreFormat* fmt,
177 | 	TSCallback* cb);
178 | 
179 | /* Write tensors information to a stream.
180 |  * Does not writes the tensors data.
181 |  * fmt: data format.
182 |  * cb: Optional. Function called before writing each tensor. If it returns non
183 |  *     positive, the tensor is not written. May store a new name in *pname.
184 |  */
185 | int tstore_write(TensorStore* S, Stream* stm, const TensorStoreFormat* fmt,
186 | 	TSCallback* cb);
187 | 
188 | /* Tries to detect the data format of a stream.
189 |  */
190 | const TensorStoreFormat* tstore_format_detect(Stream* stm);
191 | 
192 | /* Make copy of the store src in dst.
193 |  * Useful for conversion and for other manipulations.
194 |  */
195 | void tstore_copy_from(TensorStore* dst, const TensorStore* src);
196 | 
197 | /* Write human readable information about the store.
198 |  */
199 | int tstore_info_dump(const TensorStore*, Stream* out);
200 | 
201 | /* Write human readable information about the store.
202 |  */
203 | int tstore_info_dump_path(const TensorStore*, const char* path);
204 | 
205 | /* Add a new key-value metadata entry.
206 |  * Takes ownership of value.
207 |  */
208 | int tstore_meta_addk(TensorStore* S, StringInt key, Any* value);
209 | 
210 | /* Add a new key-value metadata entry.
211 |  * Takes ownership of value.
212 |  */
213 | static inline
214 | int tstore_meta_add(TensorStore* S, const char* name, Any* value)
215 | {
216 | 	int key = strsto_add(S->ss, strsl_fromz(name));
217 | 	return tstore_meta_addk(S, key, value);
218 | }
219 | 
220 | /* Add a new key-value metadata entry.
221 |  * String value.
222 |  */
223 | int tstore_meta_adds(TensorStore* S, const char* name, const char* value);
224 | 
225 | /* Find and return a metadata entry.
226 |  * Returns empty (t=0) if not found.
227 |  */
228 | const Any tstore_meta_getk(const TensorStore* S, StringInt key);
229 | 
230 | /* Find and return a metadata entry.
231 |  * Return empty (t=0) if not found.
232 |  */
233 | static inline
234 | const Any tstore_meta_get(const TensorStore* S, const char* name) {
235 | 	int key = strsto_add(S->ss, strsl_fromz(name));
236 | 	return tstore_meta_getk(S, key);
237 | }
238 | 
239 | /* Add a new tensor entry.
240 |  * entry->key is ignored.
241 |  */
242 | int tstore_tensor_addk(TensorStore* S, StringInt key,
243 | 	const TSTensorEntry* entry);
244 | 
245 | /* Add a new tensor entry.
246 |  * entry->key is ignored.
247 |  */
248 | static inline
249 | int tstore_tensor_add(TensorStore* S, const char* name,
250 | 	const TSTensorEntry* entry)
251 | {
252 | 	int key = strsto_add(S->ss, strsl_fromz(name));
253 | 	return tstore_tensor_addk(S, key, entry);
254 | }
255 | 
256 | /* Find and return a tensor entry.
257 |  * Return NULL if not found.
258 |  */
259 | TSTensorEntry* tstore_tensor_getk(const TensorStore*, StringInt key);
260 | 
261 | /* Find and return a tensor entry.
262 |  * Return NULL if not found.
263 |  */
264 | static inline
265 | TSTensorEntry* tstore_tensor_get(const TensorStore* S, const char* name) {
266 | 	int key = strsto_add(S->ss, strsl_fromz(name));
267 | 	return tstore_tensor_getk(S, key);
268 | }
269 | 
270 | /* Remake the tensors index.
271 |  * Call after changing the tensor manually.
272 |  */
273 | int tstore_tensor_index_remake(TensorStore* S);
274 | 
275 | /* Free all stored tensor data.
276 |  */
277 | int tstore_cache_clear(TensorStore* S);
278 | 


--------------------------------------------------------------------------------
/src/ccompute/tensorstore_gguf.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  */
  4 | #include "tensorstore_gguf.h"
  5 | #include "ccommon/logging.h"
  6 | 
  7 | #ifndef TENSORSTORE_ALLOCATOR
  8 | #define TENSORSTORE_ALLOCATOR  g_allocator
  9 | #endif
 10 | 
 11 | #define GGUF_MAGIC "GGUF"
 12 | //#define GGUF_VERSION 3
 13 | #define GGUF_ALIGNMENT 32
 14 | 
 15 | static
 16 | uint64_t gguf_align(uint64_t offset)
 17 | {
 18 |     return offset + (GGUF_ALIGNMENT - (offset % GGUF_ALIGNMENT)) % GGUF_ALIGNMENT;
 19 | }
 20 | 
 21 | static const int g_gguf_to_any_type[] = {
 22 | 	ANY_T_UINT8 , ANY_T_INT8 ,
 23 | 	ANY_T_UINT16, ANY_T_INT16,
 24 | 	ANY_T_UINT32, ANY_T_INT32,
 25 | 	ANY_T_FLOAT32,
 26 | 	ANY_T_BOOL,
 27 | 	ANY_T_STRING,
 28 | 	ANY_T_ARRAY,
 29 | 	ANY_T_UINT64, ANY_T_INT64,
 30 | 	ANY_T_FLOAT64,
 31 | };
 32 | 
 33 | static
 34 | int gguf_meta_type_to_any(uint32_t gguf_type)
 35 | {
 36 | 	return gguf_type < COUNTOF(g_gguf_to_any_type) ?
 37 | 		g_gguf_to_any_type[gguf_type] : 0;
 38 | }
 39 | 
 40 | static
 41 | int gguf_read_string(Stream* stm, Allocator* al, Any* out, uint64_t limit)
 42 | {
 43 | 	uint64_t len;
 44 | 	TRYR( stream_read_var(stm, len) );
 45 | 	TRYRB(TS_E_OVERFLOW, len <= limit);  //sanity check
 46 | 	char *p = alloc_alloc(al, len+1);
 47 | 	TRYR( stream_read_chk(stm, len, p) );
 48 | 	p[len] = 0;
 49 | 	*out = any_string(len, p);
 50 | 	return 1;
 51 | }
 52 | 
 53 | static
 54 | int gguf_read_key(Stream* stm, Allocator* al, StringStore* ss, DynStr* pname)
 55 | {
 56 | 	Any key={0};
 57 | 	TRYR( gguf_read_string(stm, al, &key, 256) );
 58 | 	TRYRB( TS_E_FORMAT, key.len > 0 );
 59 | 	dstr_copy(*pname, key.len, key.p.cp);
 60 | 	return 1;
 61 | }
 62 | 
 63 | static
 64 | int gguf_read_array(Stream* stm, Allocator* al, Any* out)
 65 | {
 66 | 	int R=1;
 67 | 
 68 | 	uint32_t type;
 69 | 	TRY( stream_read_var(stm, type) );
 70 | 	int atype = gguf_meta_type_to_any(type);
 71 | 	if (!(atype > 0)) ERROR_LOG(TS_E_METADATA, "unknown metadata type %u", type);
 72 | 
 73 | 	uint64_t len;
 74 | 	TRY( stream_read_var(stm, len) );
 75 | 	TRYB(TS_E_OVERFLOW, len <= 0xffffff);  //sanity check
 76 | 
 77 | 	if (anyb_scalar_is(atype)) {
 78 | 		size_t sz = anyb_size(atype) * len;
 79 | 		void *p = alloc_alloc(al, sz);
 80 | 		TRY( stream_read_chk(stm, sz, p) );
 81 | 		*out = any_vector(atype, len, p);
 82 | 	}
 83 | 	else if (atype == ANY_T_STRING) {
 84 | 		size_t sz = sizeof(Any) * len;
 85 | 		Any *p = alloc_alloc(al, sz);
 86 | 		for (uint64_t i=0; i<len; ++i) {
 87 | 			TRY( gguf_read_string(stm, al, &p[i], 0xffff) );
 88 | 		}
 89 | 		*out = any_array(len, p);
 90 | 	}
 91 | 	else
 92 | 		return TS_E_METADATA;
 93 | 
 94 | end:
 95 | 	return R;
 96 | }
 97 | 
 98 | static
 99 | int gguf_read_meta(Stream* stm, Allocator* al, Any* value, const char* name)
100 | {
101 | 	int R=1;
102 | 
103 | 	uint32_t type;
104 | 	TRY( stream_read_var(stm, type) );
105 | 	int atype = gguf_meta_type_to_any(type);
106 | 	if (!(atype > 0)) ERROR_LOG(TS_E_METADATA, "unknown metadata type %u", type);
107 | 
108 | 	if (anyb_scalar_is(atype)) {
109 | 		*value = (Any){ atype };
110 | 		TRY( stream_read_chk(stm, anyb_size(atype), &value->p) );
111 | 	}
112 | 	else if (atype == ANY_T_STRING) {
113 | 		TRY( gguf_read_string(stm, al, value, 0xffffff) );
114 | 	}
115 | 	else if (atype == ANY_T_ARRAY) {
116 | 		TRY( gguf_read_array(stm, al, value) );
117 | 	}
118 | 	else
119 | 		return TS_E_METADATA;
120 | 	
121 | 	//log_debug("gguf meta '%s' %s", name, anyb_name(atype));
122 | 
123 | end:
124 | 	if (R<0) log_error("gguf load metadata '%s': %x", name, -R);
125 | 	return R;
126 | }
127 | 
128 | static
129 | int gguf_read_tensor(Stream* stm, TSTensorEntry* entry, const char* name)
130 | {
131 | 	int R=1;
132 | 
133 | 	uint32_t n_dim;
134 | 	TRY( stream_read_var(stm, n_dim) );
135 | 	TRYB( TS_E_OVERFLOW, n_dim <= 4 );  //sanity check
136 | 
137 | 	uint64_t dims[4]={1,1,1,1};
138 | 	TRY( stream_read_chk(stm, sizeof(*dims)*n_dim, dims) );
139 | 	TRYB( TS_E_OVERFLOW, dims[0] <= 0xffffff );
140 | 	TRYB( TS_E_OVERFLOW, dims[1] <= 0xffffff );
141 | 	TRYB( TS_E_OVERFLOW, dims[2] <= 0xffffff );
142 | 	TRYB( TS_E_OVERFLOW, dims[3] <= 0xffffff );
143 | 
144 | 	uint32_t ggml_type;
145 | 	TRY( stream_read_var(stm, ggml_type) );
146 | 	int dtype = tstore_dtype_from_ggml(ggml_type);
147 | 	if (!(dtype > 0)) ERROR_LOG(TS_E_DTYPE, "unknown tensor type %u", ggml_type);
148 | 
149 | 	uint64_t offset;
150 | 	TRY( stream_read_var(stm, offset) );
151 | 
152 | 	// Store
153 | 	entry->dtype = dtype;
154 | 	entry->shape_n = n_dim;
155 | 	entry->shape[0] = dims[0];
156 | 	entry->shape[1] = dims[1];
157 | 	entry->shape[2] = dims[2];
158 | 	entry->shape[3] = dims[3];
159 | 	entry->offset = offset;  // needs to be updated
160 | 	entry->stm = stm;
161 | 	entry->size = tstore_tensor_size(entry);
162 | 	
163 | 	//log_debug("gguf tensor '%s' %s " TSTENSOR_SHAPE4_FMT,
164 | 	//	name, tstore_dtype_str(dtype), TSTENSOR_SHAPE4_UNPACK(*entry));
165 | 
166 | end:
167 | 	if (R<0) log_error("gguf load tensor '%s': %x", name, -R);
168 | 	return R;
169 | }
170 | 
171 | int tstore_read_gguf(TensorStore* S, Stream* stm, TSCallback* cb)
172 | {
173 | 	int R=1, r;
174 | 	DynStr name=NULL;
175 | 	Allocator al = allocator_arena(&S->ss->al);
176 | 	
177 | 	// Header
178 | 	uint32_t magic;
179 | 	if (stream_read_var(stm, magic) < 0)
180 | 		ERROR_LOG(TS_E_READ, "could not read" );
181 | 	if (memcmp(&magic, GGUF_MAGIC, 4))
182 | 		ERROR_LOG(TS_E_FORMAT, "bad magic: %08xh", magic);
183 | 	
184 | 	uint32_t version;
185 | 	TRY( stream_read_var(stm, version) );
186 | 	if (version != 2 && version != 3)
187 | 		ERROR_LOG(TS_E_FORMAT, "unsupported version: %u", version);
188 | 
189 | 	uint64_t n_tensor, n_meta;
190 | 	TRY( stream_read_var(stm, n_tensor) );
191 | 	TRY( stream_read_var(stm, n_meta) );
192 | 	TRYB(TS_E_OVERFLOW, n_tensor <= 65535);  //sanity check
193 | 	TRYB(TS_E_OVERFLOW, n_meta   <= 65535);  //sanity check
194 | 
195 | 	log_debug("gguf n_meta:%u n_tensor:%u",
196 | 		(unsigned)n_meta, (unsigned)n_tensor);
197 | 
198 | 	// Reserve memory
199 | 	vec_realloc(S->meta, vec_count(S->meta) + n_meta);
200 | 	vec_realloc(S->meta_idx, vec_count(S->meta_idx) + n_meta);
201 | 	vec_realloc(S->tensors, vec_count(S->tensors) + n_tensor);
202 | 	vec_realloc(S->tensors_idx, vec_count(S->tensors_idx) + n_tensor);
203 | 
204 | 	// Metadata
205 | 	for (uint64_t i=0; i<n_meta; ++i) {
206 | 		TRY( gguf_read_key(stm, &al, S->ss, &name) );
207 | 		Any value={0};
208 | 		TRY( gguf_read_meta(stm, &al, &value, name) );
209 | 		TRY( tstore_meta_add(S, name, &value) );
210 | 	}
211 | 	
212 | 	// Tensors
213 | 	for (uint64_t i=0; i<n_tensor; ++i) {
214 | 		TRY( gguf_read_key(stm, &al, S->ss, &name) );
215 | 		TSTensorEntry e={0};
216 | 		TRY( gguf_read_tensor(stm, &e, name) );
217 | 		TRY( r = tstore_cb_call(cb, S, &e, &name) );
218 | 		if (r > 0) {
219 | 			TRY( tstore_tensor_add(S, name, &e) );
220 | 		}
221 | 	}
222 | 
223 | 	uint64_t offset = stream_pos_get(stm);
224 | 	offset = gguf_align(offset);
225 | 
226 | 	// Make tensors offsets absolute
227 | 	vec_for(S->tensors, i, 0) {
228 | 		if (S->tensors[i].stm != stm) continue;
229 | 		S->tensors[i].offset += offset;
230 | 	}
231 | 
232 | end:
233 | 	if (R<0) log_error("gguf read: %x", -R);
234 | 	return R;
235 | }
236 | 
237 | //TODO: write
238 | 
239 | int tstore_detect_gguf(Stream* stm)
240 | {
241 | 	uint8_t *end, *cur = stream_read_buffer(stm, &end);
242 | 	if (!(end-cur >= 4)) return 0;
243 | 	return !memcmp(cur, GGUF_MAGIC, 4);
244 | }
245 | 
246 | const TensorStoreFormat ts_cls_gguf = {
247 | 	"gguf", "gguf",
248 | 	tstore_detect_gguf,
249 | 	tstore_read_gguf,
250 | };
251 | 


--------------------------------------------------------------------------------
/src/ccompute/tensorstore_gguf.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Load tensors from a GGUF file.
 5 |  */
 6 | #pragma once
 7 | #include "tensorstore.h"
 8 | 
 9 | extern const TensorStoreFormat ts_cls_gguf;
10 | 
11 | int tstore_detect_gguf(Stream* stm);
12 | 
13 | int tstore_read_gguf(TensorStore* ts, Stream* stm, TSCallback* cb);
14 | 


--------------------------------------------------------------------------------
/src/ccompute/tensorstore_safet.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Load/save tensor from a file with the SafeTensor format.
 5 |  */
 6 | #pragma once
 7 | #include "tensorstore.h"
 8 | 
 9 | extern const TensorStoreFormat ts_cls_safet;
10 | 
11 | int tstore_detect_safet(Stream* stm);
12 | 
13 | int tstore_read_safet(TensorStore* ts, Stream* stm, TSCallback*);
14 | 
15 | int tstore_write_safet(TensorStore* ts, Stream* stm, TSCallback* cb);
16 | 


--------------------------------------------------------------------------------
/src/clip.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * CLIP text to embeddings encoder for conditioning in SD.
 5 |  */
 6 | #pragma once
 7 | #include "mlblock.h"
 8 | #include "localtensor.h"
 9 | #include "ccommon/strslice.h"
10 | 
11 | typedef struct {
12 | 	int n_vocab;
13 | 	int n_token;  // max_position_embeddings
14 | 	int d_embed;
15 | 	int n_interm;
16 | 	int n_head;
17 | 	int n_layer;    // num_hidden_layers
18 | 	uint32_t tok_start, tok_end, tok_pad;
19 | } ClipParams;
20 | 
21 | extern const ClipParams g_clip_vit_l_14;		//SD 1.x and SDXL
22 | extern const ClipParams g_clip_vit_h_14;		//SD 2.x
23 | extern const ClipParams g_clip_vit_bigg_14;		//SDXL
24 | 
25 | /* Encode a text in to a list of tokens.
26 |  * Return the number of tokens put into <out>.
27 |  * <ptokvec> is a pointer to a vector of tokens where new tokens will be appended.
28 |  */
29 | int clip_tokenize(const ClipParams* P, StrSlice text, int32_t** ptokvec);
30 | 
31 | /* Decode a token into an string (zero terminated).
32 |  * Returns the number of bytes written, or negative in case of error.
33 |  */
34 | int clip_token_decode(const ClipParams* P, int32_t token,
35 | 	size_t bufsz, char* buf);
36 | 
37 | /* Get the string corresponding to a token.
38 |  * For debuging purposes, uses an internal buffer.
39 |  * Returns "<|INVALID|>" if not found.
40 |  */
41 | const char* clip_token_str(const ClipParams* P, int32_t token);
42 | 
43 | // In : vector of token ids [n_token]
44 | // Out: embeddings [d_embed, n_token]
45 | MLTensor* mlb_clip_text(MLCtx* C, MLTensor* tokens, MLTensor* cust_emb,
46 | 	const ClipParams* P, int clip_skip, bool norm);
47 | 
48 | // In : embeddings [d_embed, n_token]
49 | // Out: features vector [d_embed]
50 | MLTensor* mlb_clip_text_proj(MLCtx* C, MLTensor* embed, int i_tok_end);
51 | 
52 | int clip_text_encode(MLCtx* C, const ClipParams* P, unsigned n_tok,
53 | 	const int32_t *toks, LocalTensor* embed, LocalTensor* feat,
54 | 	int clip_skip, bool norm);
55 | 


--------------------------------------------------------------------------------
/src/demo_mlimgsynth.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  *
  4 |  * Demostration of the capabilities of the MLImgSynth library.
  5 |  */
  6 | #include "mlimgsynth.h"
  7 | #include <stdio.h>
  8 | #include <stdlib.h>
  9 | #include <string.h>
 10 | 
 11 | #define error(...) do { \
 12 | 	printf("ERROR "); \
 13 | 	printf(__VA_ARGS__); \
 14 | 	printf("\n"); \
 15 | 	exit(1); \
 16 | } while (0)
 17 | 
 18 | #define log(...) do { \
 19 | 	printf(__VA_ARGS__); \
 20 | 	printf("\n"); \
 21 | } while (0)
 22 | 	
 23 | void img_save(MLIS_Ctx* ctx, const char* name)
 24 | {
 25 | 	char buffer[128];
 26 | 
 27 | 	const MLIS_Image *img = mlis_image_get(ctx, 0);
 28 | 	const char *info = mlis_infotext_get(ctx, 0);
 29 | 
 30 | 	log("Saving...");
 31 | 
 32 | 	sprintf(buffer, "%s.ppm", name);
 33 | 	FILE *f = fopen(buffer, "w");
 34 | 	fprintf(f, "P6 %u %u 255\n", img->w, img->h);
 35 | 	fwrite(img->d, 1, img->sz, f);
 36 | 	fclose(f);
 37 | 
 38 | 	sprintf(buffer, "%s.txt", name);
 39 | 	f = fopen(buffer, "w");
 40 | 	fwrite(info, 1, strlen(info), f);
 41 | 	fclose(f);
 42 | }
 43 | 
 44 | void demo_txt2img(MLIS_Ctx* ctx)
 45 | {
 46 | 	log("txt2img");
 47 | 	mlis_option_set(ctx, MLIS_OPT_PROMPT,
 48 | 		"a photograph of an astronaut riding a horse in a grassland");
 49 | 
 50 | 	mlis_generate(ctx);
 51 | 	
 52 | 	img_save(ctx, "demo_txt2img");
 53 | }
 54 | 
 55 | void demo_img2img(MLIS_Ctx* ctx)
 56 | {
 57 | 	log("img2img");
 58 | 	mlis_option_set(ctx, MLIS_OPT_PROMPT,
 59 | 		"a photograph of an astronaut riding a horse in a forest");
 60 | 	mlis_option_set(ctx, MLIS_OPT_F_T_INI, 0.70);  // Strength
 61 | 	
 62 | 	// For this example we just use the previously generated image
 63 | 	const MLIS_Image *img = mlis_image_get(ctx, 0);
 64 | 	mlis_option_set(ctx, MLIS_OPT_IMAGE, img);
 65 | 
 66 | 	mlis_generate(ctx);
 67 | 
 68 | 	img_save(ctx, "demo_img2img");
 69 | }
 70 | 
 71 | void demo_inpaint(MLIS_Ctx* ctx)
 72 | {
 73 | 	log("inpaint");
 74 | 	mlis_option_set(ctx, MLIS_OPT_PROMPT, "a pile of gold coins");
 75 | 	mlis_option_set(ctx, MLIS_OPT_NO_DECODE, 1);
 76 | 
 77 | 	mlis_generate(ctx);
 78 | 	
 79 | 	mlis_option_set(ctx, MLIS_OPT_NO_DECODE, 0);
 80 | 
 81 | 	// Creates a circular mask for latent space
 82 | 	MLIS_Tensor *latent = mlis_tensor_get(ctx, MLIS_TENSOR_LATENT);
 83 | 	MLIS_Tensor *lmask = mlis_tensor_get(ctx, MLIS_TENSOR_LMASK);
 84 | 	mlis_tensor_resize_like(lmask, latent);
 85 | 	int r0 = lmask->n[0] / 2;  // Radius
 86 | 	int r1 = lmask->n[1] / 2;
 87 | 	mlis_tensor_for(*lmask, i) {
 88 | 		lmask->d[ip] = ((i0-r0)*(i0-r0) + (i1-r1)*(i1-r1)) > r1*r1;
 89 | 	}
 90 | 	
 91 | 	mlis_option_set(ctx, MLIS_OPT_PROMPT, "a red dragon on a pile of gold coins");
 92 | 	mlis_option_set(ctx, MLIS_OPT_F_T_INI, 0.70);
 93 | 	mlis_option_set(ctx, MLIS_OPT_TENSOR_USE_FLAGS,
 94 | 		MLIS_TUF_LATENT | MLIS_TUF_LMASK);
 95 | 	
 96 | 	mlis_generate(ctx);
 97 | 
 98 | 	img_save(ctx, "demo_inpaint");
 99 | }
100 | 
101 | void error_handler(void*, MLIS_Ctx* ctx, const MLIS_ErrorInfo* ei)
102 | {
103 | 	error("mlis error 0x%x: %s", -ei->code, ei->desc);
104 | }
105 | 
106 | int progress_callback(void*, MLIS_Ctx* ctx, const MLIS_Progress* prg)
107 | {
108 | 	double etc = -1;
109 | 	if (1 < prg->step) etc = (prg->step_end - prg->step) * prg->step_time;
110 | 	log("%s %d/%d nfe=%d {%.3fs} ETC %.0fs",
111 | 		mlis_stage_str(prg->stage), prg->step, prg->step_end, prg->nfe,
112 | 		prg->step_time, etc);
113 | 	return 0;  //continue
114 | }
115 | 
116 | int main(int argc, char* argv[])
117 | {
118 | 	if (argc != 2)
119 | 		error("Usage: %s [MODEL FILE PATH]", argv[0]);
120 | 	
121 | 	log("Initializing...");
122 | 	MLIS_Ctx *ctx = mlis_ctx_create();
123 | 	mlis_option_set(ctx, MLIS_OPT_ERROR_HANDLER, error_handler, NULL);
124 | 	mlis_option_set(ctx, MLIS_OPT_CALLBACK, progress_callback, NULL);
125 | 	mlis_option_set(ctx, MLIS_OPT_MODEL, argv[1]);
126 | 	
127 | 	// If you do not set the following options, default values will be used.
128 | 	mlis_option_set(ctx, MLIS_OPT_IMAGE_DIM, 768, 512);
129 | 	mlis_option_set(ctx, MLIS_OPT_SEED, 42);
130 | 	mlis_option_set(ctx, MLIS_OPT_METHOD, MLIS_METHOD_EULER);
131 | 	mlis_option_set(ctx, MLIS_OPT_SCHEDULER, MLIS_SCHED_UNIFORM);
132 | 	mlis_option_set(ctx, MLIS_OPT_STEPS, 20);
133 | 	// Be sure to use floating point numbers with options that require it.
134 | 	mlis_option_set(ctx, MLIS_OPT_CFG_SCALE, 7.0);
135 | 	mlis_option_set(ctx, MLIS_OPT_S_ANCESTRAL, 1.0);
136 | 	// You can also set options using strings.
137 | 	mlis_option_set_str(ctx, "image_dim", "768,512");
138 | 	//mlis_option_set(ctx, MLIS_OPT_LORA, lora_path, 1.0);
139 | 
140 | 	// Initialized the backend and load the model header
141 | 	// This is not required, but it can be useful to catch errors early.
142 | 	mlis_setup(ctx);
143 | 
144 | 	demo_txt2img(ctx);
145 | 	demo_img2img(ctx);
146 | 	demo_inpaint(ctx);
147 | 
148 | 	log("End");
149 | 	mlis_ctx_destroy(&ctx);
150 | 	return 0;
151 | }
152 | 


--------------------------------------------------------------------------------
/src/ggml_extend.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  */
  4 | #include "ggml_extend.h"
  5 | #include "ccommon/ccommon.h"
  6 | #include "ccommon/logging.h"
  7 | #include "ccommon/vector.h"
  8 | #include <inttypes.h>
  9 | #include <string.h>
 10 | #include <math.h>
 11 | 
 12 | #include "ggml-backend.h"
 13 | 
 14 | struct ggml_tensor* ggml_name_prefix(struct ggml_tensor* x, const char* pre)
 15 | {
 16 | 	if (x->name[0]) {
 17 | 		unsigned lp=strlen(pre), ln=strlen(x->name);
 18 | 		if (lp+1+ln+1 > sizeof(x->name))
 19 | 			FATAL_LOG("ggml tensor name too long");
 20 | 		memmove(x->name+lp+1, x->name, ln+1);
 21 | 		memcpy(x->name, pre, lp);
 22 | 		x->name[lp] = '.';
 23 | 	} else {
 24 | 		strncpy(x->name, pre, sizeof(x->name)-1);
 25 | 		x->name[sizeof(x->name)-1] = 0;
 26 | 	}
 27 | 	return x;
 28 | }
 29 | 
 30 | const char* ggml_tensor_typeshape_desc(const struct ggml_tensor* x)
 31 | {
 32 | 	static DynStr out=NULL;
 33 | 	dstr_printf(out, "%s ", ggml_type_name(x->type));
 34 | 
 35 | 	for (unsigned i=0; i<GGML_MAX_DIMS && x->ne[i]; ++i) {
 36 | 		if (i) dstr_push(out, 'x');
 37 | 		dstr_printfa(out, "%"PRId64, x->ne[i]);
 38 | 	}
 39 | 
 40 | 	return out;
 41 | }
 42 | 
 43 | size_t ggml_ctx_tensors_total_size(const struct ggml_context* ctx)
 44 | {
 45 | 	size_t s=0;
 46 | 	struct ggml_tensor *t = ggml_get_first_tensor(ctx);
 47 | 	for (; t; t=ggml_get_next_tensor(ctx, t)) s += ggml_nbytes(t);
 48 | 	return s;
 49 | }
 50 | 
 51 | void ggml_ctx_tensors_dump(const struct ggml_context* ctx, Stream* out)
 52 | {
 53 | 	struct ggml_tensor *t=ggml_get_first_tensor(ctx);
 54 | 	for (; t; t=ggml_get_next_tensor(ctx, t)) {
 55 | 		stream_printf(out, GGML_TENSOR_FMT "\n", GGML_TENSOR_ARGS(t));
 56 | 	}
 57 | }
 58 | 
 59 | static const char g_base64_chars[] =
 60 | 	"ABCDEFGHIJKLMNOPQRSTUVWXYZ" "abcdefghijklmnopqrstuvwxyz" "0123456789" "+/";
 61 | 
 62 | #define ggml_tensor_stat_CODE(TYPE,CONV) do { \
 63 | 	const int64_t GGML_TENSOR_VARS_N(T,t), GGML_TENSOR_VARS_S(T,t); \
 64 | 	const TYPE *tp = T->data; \
 65 |     stat.first = *tp; \
 66 |     int64_t hsep = (t3n *t2n *t1n *t0n) / 8; \
 67 | 	for (int64_t i3=0, i=0; i3<t3n; ++i3) \
 68 | 	for (int64_t i2=0; i2<t2n; ++i2) \
 69 | 	for (int64_t i1=0; i1<t1n; ++i1) \
 70 | 	for (int64_t i0=0; i0<t0n; ++i0, ++i) { \
 71 |         double v = (double)CONV(tp[i3*t3s +i2*t2s +i1*t1s +i0*t0s]); \
 72 | 		stat.asum += fabs(v); \
 73 | 		hsum[i/hsep] += v; \
 74 |     } \
 75 | } while(0)
 76 | 
 77 | ggml_tensor_stat_st ggml_tensor_stat(const struct ggml_tensor* T)
 78 | {
 79 |     ggml_tensor_stat_st stat={0};
 80 |     if (!T->data) return stat;
 81 | 
 82 |     double hsum[8]={0};
 83 |     if      (T->type == GGML_TYPE_F32)
 84 | 		ggml_tensor_stat_CODE(float,);
 85 |     else if (T->type == GGML_TYPE_F16)
 86 | 		ggml_tensor_stat_CODE(ggml_fp16_t,ggml_fp16_to_fp32);
 87 |     else return stat;
 88 | 
 89 | 	// hsum: partial sums of 8 segments
 90 |     double hmn=hsum[0], hmx=hmn;
 91 | 	for (unsigned i=1; i<8; ++i) {
 92 | 		MINSET(hmn, hsum[i]);
 93 | 		MAXSET(hmx, hsum[i]);
 94 | 	}
 95 | 	// Convert each sum to a character to fast checking by a human
 96 | 	double f = (hmx > hmn) ? (64 / (hmx - hmn)) : 0;
 97 | 	f = nextafter(f, 0);
 98 | 	for (unsigned i=0; i<8; ++i) {
 99 | 		int idx = (hsum[i] - hmn) * f;
100 | 		assert( 0 <= idx && idx < 64 );
101 | 		stat.hash[i] = g_base64_chars[idx];
102 | 	}
103 | 	stat.hash[8] = 0;
104 | 
105 |     return stat;
106 | }
107 | 
108 | #define ggml_tensor_export_CODE(TYPE,CONV) do { \
109 | 	const TYPE *tp = T->data; \
110 | 	for (int64_t i3=0; i3<t3n; ++i3) \
111 | 	for (int64_t i2=0; i2<t2n; ++i2) \
112 | 	for (int64_t i1=0; i1<t1n; ++i1) \
113 | 	for (int64_t i0=0; i0<t0n; ++i0) \
114 | 		fprintf(f, "%g\n", (double)CONV(tp[i3*t3s +i2*t2s +i1*t1s +i0*t0s])); \
115 | } while(0)
116 | 
117 | void ggml_tensor_export(const struct ggml_tensor* T, const char* path)
118 | {
119 |     if (!T->data) return;
120 | 
121 |     FILE *f = fopen(path, "w");
122 |     if (!f) return;
123 | 
124 | 	const int64_t GGML_TENSOR_VARS_N(T,t), GGML_TENSOR_VARS_S(T,t);
125 | 	fprintf(f, "TENSOR ASCII %zd %zd %zd %zd\n", t0n,t1n,t2n,t3n);
126 | 
127 |     if      (T->type == GGML_TYPE_F32)
128 | 		ggml_tensor_export_CODE(float,);
129 |     else if (T->type == GGML_TYPE_F16)
130 | 		ggml_tensor_export_CODE(ggml_fp16_t,ggml_fp16_to_fp32);
131 | 
132 | 	fclose(f);
133 | }
134 | 
135 | void ggml_chunk_(struct ggml_context* ctx,
136 | 	struct ggml_tensor* x, int n_chunk, int n_dim, struct ggml_tensor*** out)
137 | {
138 | 	GGML_ASSERT( GGML_MAX_DIMS == 4 );
139 | 	GGML_ASSERT( 0 <= n_dim && n_dim < GGML_MAX_DIMS );
140 | 	GGML_ASSERT( n_dim == 0 );  //TODO
141 | 	int64_t ne[GGML_MAX_DIMS];
142 | 	memcpy(ne, x->ne, sizeof(ne));
143 | 	size_t  nb[GGML_MAX_DIMS];
144 | 	memcpy(nb, x->nb, sizeof(nb));
145 | 	ne[n_dim] /= n_chunk;
146 | 	GGML_ASSERT( n_chunk * ne[n_dim] == x->ne[n_dim] );
147 | 	
148 | 	size_t offset = ggml_type_size(x->type) * ne[n_dim];
149 | 
150 | 	for (int i=0; i<n_chunk; ++i) {
151 | 		*out[i] = ggml_view_4d(ctx, x, ne[0], ne[1], ne[2], ne[3],
152 | 					x->nb[1], x->nb[2], x->nb[3], offset*i);
153 | 	}
154 | }
155 | 
156 | void ggml_tensor_debug_print(struct ggml_tensor* dst,
157 | 	const struct ggml_tensor* src, int ith, int nth, void* userdata)
158 | {
159 | 	if (ith != 0) return;
160 | 	GGML_ASSERT( dst->data == src->data );
161 | 
162 | 	const char *desc = userdata;
163 |     ggml_tensor_stat_st stat = ggml_tensor_stat(src);
164 |     char buffer[32];
165 |     sprintf(buffer, GGML_SHAPE_FMT, GGML_SHAPE_UNPACK(src) );
166 | 	log_debug("%-12s: %s %-16s %.2e %s %+.2e",
167 | 		desc ? desc : src->name, ggml_type_name(src->type),
168 |         buffer, stat.asum, stat.hash, stat.first);
169 | }
170 | 
171 | struct ggml_tensor*
172 | ggml_debug_print(struct ggml_context* ctx, struct ggml_tensor* t, const char* desc,
173 | 	int loglvl)
174 | {
175 | 	if (!log_level_check(loglvl)) return t;
176 | 	if (!ggml_backend_buffer_is_host(t->buffer)) return t;
177 | 	return ggml_map_custom1_inplace(ctx, t, ggml_tensor_debug_print, 1,
178 | 		(void*)desc);
179 | }
180 | 
181 | void ggml_tensor_debug_export(struct ggml_tensor* dst,
182 | 	const struct ggml_tensor* src, int ith, int nth, void* userdata)
183 | {
184 | 	if (ith != 0) return;
185 | 	GGML_ASSERT( dst->data == src->data );
186 | 
187 | 	const char *path = userdata;
188 |     ggml_tensor_export(src, path);
189 | }
190 | 
191 | struct ggml_tensor*
192 | ggml_debug_export(struct ggml_context* ctx, struct ggml_tensor* t,
193 | 	const char* fname)
194 | {
195 | 	if (!ggml_backend_buffer_is_host(t->buffer)) return t;
196 | 	return ggml_map_custom1_inplace(ctx, t, ggml_tensor_debug_export, 1,
197 | 		(void*)fname);
198 | }
199 | 
200 | struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx,
201 | 	struct ggml_tensor* q, struct ggml_tensor* k, struct ggml_tensor* v, 
202 | 	bool mask)
203 | {
204 | //#ifdef USE_FLASH_ATTENTION
205 | //	assert(q->ne[0] == v->ne[0]);
206 | //	return ggml_flash_attn_ext(ctx, q, k, v, NULL, 1.0f, 0.0f);
207 | //	// [N * n_head, n_token, d_head]
208 | //#else
209 |     float d_head = (float)q->ne[0];
210 | 	struct ggml_tensor *kq;
211 | 
212 |     kq = ggml_mul_mat(ctx, k, q);  // [N * n_head, n_token, n_k]
213 |     kq = ggml_scale_inplace(ctx, kq, 1.0f / sqrt(d_head));
214 |     if (mask)
215 |         kq = ggml_diag_mask_inf_inplace(ctx, kq, 0);
216 | 
217 |     kq = ggml_soft_max_inplace(ctx, kq);
218 | 
219 | 	return ggml_mul_mat(ctx, v, kq);
220 | 	// [N * n_head, n_token, d_head]
221 | //#endif
222 | }
223 | 


--------------------------------------------------------------------------------
/src/ggml_extend.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  */
 4 | #pragma once
 5 | #include "ccommon/stream.h"
 6 | #include "ggml.h"
 7 | #include <inttypes.h>
 8 | 
 9 | #define GGML_SHAPE_FMT  "%"PRId64"x%"PRId64"x%"PRId64"x%"PRId64
10 | #define GGML_SHAPE_UNPACK(T) \
11 | 	(T)->ne[0], (T)->ne[1], (T)->ne[2], (T)->ne[3]
12 | 
13 | #define GGML_TYPESHAPE_FMT  "%s " GGML_SHAPE_FMT
14 | #define GGML_TYPESHAPE_ARGS(T) \
15 | 	ggml_type_name((T)->type), (T)->ne[0], (T)->ne[1], (T)->ne[2], (T)->ne[3]
16 | 
17 | #define GGML_TENSOR_FMT  "%s: %s %s " GGML_SHAPE_FMT
18 | #define GGML_TENSOR_ARGS(T) \
19 | 	ggml_get_name(T), ggml_op_desc(T), ggml_type_name((T)->type), \
20 | 	(T)->ne[0], (T)->ne[1], (T)->ne[2], (T)->ne[3]
21 | 
22 | #define GGML_TENSOR_VARS_N(X,L) \
23 | 	L##0n=(X)->ne[0], L##1n=(X)->ne[1], L##2n=(X)->ne[2], L##3n=(X)->ne[3]
24 | 
25 | #define GGML_TENSOR_VARS_B(X,L) \
26 | 	L##0b=(X)->nb[0], L##1b=(X)->nb[1], L##2b=(X)->nb[2], L##3b=(X)->nb[3]
27 | 
28 | #define GGML_TENSOR_VARS_S(X,L) \
29 | 	L##eb=ggml_element_size(X),\
30 | 	L##0s=(X)->nb[0]/L##eb, \
31 | 	L##1s=(X)->nb[1]/L##eb, \
32 | 	L##2s=(X)->nb[2]/L##eb, \
33 | 	L##3s=(X)->nb[3]/L##eb
34 | 
35 | struct ggml_tensor* ggml_name_prefix(struct ggml_tensor* x, const char* pre);
36 | 
37 | const char* ggml_tensor_typeshape_desc(const struct ggml_tensor* x);
38 | 
39 | size_t ggml_ctx_tensors_total_size(const struct ggml_context* ctx);
40 | 
41 | void ggml_ctx_tensors_dump(const struct ggml_context* ctx, Stream* out);
42 | 
43 | void ggml_tensor_graph_dump(const struct ggml_tensor* result, Stream* out);
44 | 
45 | void ggml_tensor_export(const struct ggml_tensor* T, const char* path);
46 | 
47 | typedef struct {
48 | 	double asum, first;
49 | 	char hash[9];
50 | 	char valid;
51 | } ggml_tensor_stat_st;
52 | 
53 | ggml_tensor_stat_st ggml_tensor_stat(const struct ggml_tensor* T);
54 | 
55 | // Operations
56 | 
57 | void ggml_chunk_(struct ggml_context* ctx,
58 | 	struct ggml_tensor* x, int n_chunk, int n_dim, struct ggml_tensor*** out);
59 | #define ggml_chunk(C, X, N, D, ...) \
60 | 	ggml_chunk_((C), (X), (N), (D), (struct ggml_tensor**[]){__VA_ARGS__});
61 | 
62 | // Debug operations
63 | // Only works on CPU
64 | 
65 | struct ggml_tensor*
66 | ggml_debug_print(struct ggml_context* ctx, struct ggml_tensor* t,
67 | 	const char* desc, int loglvl);
68 | 
69 | #define ggml_debug4_print(...) \
70 | 	ggml_debug_print(__VA_ARGS__, LOG_LVL_DEBUG4)
71 | 
72 | struct ggml_tensor*
73 | ggml_debug_export(struct ggml_context* ctx, struct ggml_tensor* t,
74 | 	const char* fname);
75 | 
76 | // Neural networks operations
77 | 
78 | struct ggml_tensor* ggml_nn_attention(struct ggml_context* ctx,
79 | 	struct ggml_tensor* q, struct ggml_tensor* k, struct ggml_tensor* v, 
80 | 	bool mask);
81 | 


--------------------------------------------------------------------------------
/src/localtensor.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  *
  4 |  * Simple storage for tensors.
  5 |  */
  6 | #pragma once
  7 | #include "ccommon/alloc.h"
  8 | #include "ccommon/logging.h"
  9 | #include "ggml.h"
 10 | #include "ggml-backend.h"
 11 | 
 12 | #ifdef LOCALTENSOR_USE_IMAGE
 13 | #include "ccommon/image.h"
 14 | #endif
 15 | 
 16 | typedef struct LocalTensor {
 17 | 	float	*d;  //data
 18 | 	int		n[4];  //shape
 19 | 	int		flags;
 20 | } LocalTensor;
 21 | 
 22 | enum {
 23 | 	// Memory owned by the tensor
 24 | 	LT_F_OWNMEM = 1,
 25 | 	// User-specified ready state
 26 | 	LT_F_READY  = 2,
 27 | };
 28 | 
 29 | #define LT_SHAPE_FMT		"%dx%dx%dx%d"
 30 | #define LT_SHAPE_UNPACK(X)	(X).n[0], (X).n[1], (X).n[2], (X).n[3]
 31 | 
 32 | static inline
 33 | size_t ltensor_good(const LocalTensor* S) { return S && S->d; }
 34 | 
 35 | static inline
 36 | size_t ltensor_nelements(const LocalTensor* S) {
 37 | 	return (size_t)S->n[0] * S->n[1] * S->n[2] * S->n[3];
 38 | }
 39 | 
 40 | static inline
 41 | size_t ltensor_nbytes(const LocalTensor* S) {
 42 | 	return sizeof(*S->d) * ltensor_nelements(S);
 43 | }
 44 | 
 45 | static inline
 46 | void ltensor_free(LocalTensor* S) {
 47 | 	if (S->flags & LT_F_OWNMEM)
 48 | 		alloc_free(g_allocator, S->d);
 49 | 	*S = (LocalTensor){0};
 50 | }
 51 | 
 52 | static inline
 53 | void ltensor_resize(LocalTensor* S, int n0, int n1, int n2, int n3) {
 54 | 	if (!(S->flags & LT_F_OWNMEM)) S->d = NULL;
 55 | 	S->n[0] = n0;  S->n[1] = n1;  S->n[2] = n2;  S->n[3] = n3;
 56 | 	S->d = alloc_realloc(g_allocator, S->d, ltensor_nbytes(S));
 57 | 	S->flags |= LT_F_OWNMEM;
 58 | }
 59 | 
 60 | static inline
 61 | void ltensor_resize_like(LocalTensor* S, const LocalTensor* T) {
 62 | 	ltensor_resize(S, LT_SHAPE_UNPACK(*T));
 63 | }
 64 | 
 65 | static inline
 66 | void ltensor_copy(LocalTensor* dst, const LocalTensor* src) {
 67 | 	ltensor_resize_like(dst, src);
 68 | 	memcpy(dst->d, src->d, ltensor_nbytes(dst));
 69 | }
 70 | 
 71 | /* Copy an slice of src into an slice of dst.
 72 |  * n#: slice size en elements (#: 0-3 dimension)
 73 |  * Li#: slice start (L: d=dst or s=src)
 74 |  * Ls#: slice step (L: d=dst or s=src)
 75 |  */
 76 | void ltensor_copy_slice(LocalTensor* dst, const LocalTensor* src,
 77 | 	int n0 , int n1 , int n2 , int n3 ,
 78 | 	int di0, int di1, int di2, int di3,
 79 | 	int si0, int si1, int si2, int si3,
 80 | 	int ds0, int ds1, int ds2, int ds3,
 81 | 	int ss0, int ss1, int ss2, int ss3 );
 82 | 
 83 | static inline
 84 | void ltensor_copy_slice2(LocalTensor* dst, const LocalTensor* src,
 85 | 	int n0 , int n1 ,
 86 | 	int di0, int di1,
 87 | 	int si0, int si1,
 88 | 	int ds0, int ds1,
 89 | 	int ss0, int ss1 )
 90 | {
 91 | 	ltensor_copy_slice(dst, src, n0,n1,src->n[2],src->n[3],
 92 | 		di0,di1,0,0, si0,si1,0,0, ds0,ds1,1,1, ss0,ss1,1,1);
 93 | }
 94 | 
 95 | static inline
 96 | void ltensor_to_backend(const LocalTensor* S, struct ggml_tensor* out) {
 97 | 	assert(ltensor_nbytes(S) == ggml_nbytes(out));
 98 | 	ggml_backend_tensor_set(out, S->d, 0, ltensor_nbytes(S));
 99 | }
100 | 
101 | static inline
102 | void ltensor_from_backend(LocalTensor* S, struct ggml_tensor* out) {
103 | 	ltensor_resize(S, out->ne[0], out->ne[1], out->ne[2], out->ne[3]);
104 | 	assert(ltensor_nbytes(S) == ggml_nbytes(out));
105 | 	ggml_backend_tensor_get(out, S->d, 0, ltensor_nbytes(S));
106 | }
107 | 
108 | static inline
109 | bool ltensor_shape_equal(const LocalTensor* A, const LocalTensor* B) {
110 | 	return (A->n[0] == B->n[0] && A->n[1] == B->n[1] && A->n[2] == B->n[2] &&
111 | 		A->n[3] == B->n[3]);
112 | }
113 | 
114 | static inline
115 | int ltensor_shape_check(const LocalTensor* S, int n0, int n1, int n2, int n3) {
116 | 	if (n0>0 && n0 != S->n[0]) return -1;
117 | 	if (n1>0 && n1 != S->n[1]) return -1;
118 | 	if (n2>0 && n2 != S->n[2]) return -1;
119 | 	if (n3>0 && n3 != S->n[3]) return -1;
120 | 	return 1;
121 | }
122 | 
123 | static inline
124 | int ltensor_shape_check_log(const LocalTensor* S, const char* desc,
125 | 	int n0, int n1, int n2, int n3)
126 | {
127 | 	int r = ltensor_shape_check(S, n0, n1, n2, n3);
128 | 	if (r < 0) log_error("%s wrong shape: " LT_SHAPE_FMT,
129 | 				desc, LT_SHAPE_UNPACK(*S));
130 | 	return r;
131 | }
132 | 
133 | int ltensor_finite_check(const LocalTensor* S);
134 | 
135 | float ltensor_minmax(const LocalTensor* S, float* min);
136 | float ltensor_sum(const LocalTensor* S);
137 | float ltensor_mean(const LocalTensor* S);
138 | 
139 | typedef struct {
140 | 	float asum, first, min, max;
141 | 	char hash[9];
142 | 	char valid;
143 | } LocalTensorStats;
144 | 
145 | LocalTensorStats ltensor_stat(const LocalTensor* S);
146 | 
147 | void log_ltensor_stats(int loglvl, const LocalTensor* S, const char* desc);
148 | 
149 | #define log_debug2_ltensor(T, D) \
150 | 	log_ltensor_stats(LOG_LVL_DEBUG2, (T), (D))
151 | 
152 | #define log_debug3_ltensor(T, D) \
153 | 	log_ltensor_stats(LOG_LVL_DEBUG3, (T), (D))
154 | 
155 | // Reduces the sizes by the factors.
156 | // Can be done inplace (dst = src).
157 | void ltensor_downsize(LocalTensor* dst, const LocalTensor* src,
158 | 	int f0, int f1, int f2, int f3);
159 | 
160 | int ltensor_save_stream(const LocalTensor* S, Stream *stm);
161 | int ltensor_save_path(const LocalTensor* S, const char* path);
162 | int ltensor_load_stream(LocalTensor* S, Stream *stm);
163 | int ltensor_load_path(LocalTensor* S, const char* path);
164 | 
165 | #ifdef LOCALTENSOR_USE_IMAGE
166 | void ltensor_from_image(LocalTensor* S, const Image* img);
167 | void ltensor_to_image(const LocalTensor* S, Image* img);
168 | 
169 | // Load separately the last channel (usually the transparancy)
170 | void ltensor_from_image_alpha(LocalTensor* S, LocalTensor* alpha, const Image* img);
171 | 
172 | int ltensor_img_redblue(const LocalTensor* S, Image* img);
173 | int ltensor_img_redblue_path(const LocalTensor* S, const char* path);
174 | #endif
175 | 
176 | #define ltensor_for(T,V,I) \
177 | 	for (unsigned V=(I), V##e_=ltensor_nelements(&(T)); V<V##e_; ++V)
178 | 


--------------------------------------------------------------------------------
/src/lora.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  */
  4 | #include "lora.h"
  5 | #include "ccommon/logging.h"
  6 | #include "ggml.h"
  7 | #include <math.h>
  8 | 
  9 | int lora_apply_inner(TSTensorEntry* dst, TSTensorEntry* ld, TSTensorEntry* lu,
 10 | 	TSTensorEntry *ls, TSTensorEntry *la, float mult, MLCtx* C)
 11 | {
 12 | 	int R=1;
 13 | 	TSTensorData td_ld={0}, td_lu={0}, td_dst={0};
 14 | 	
 15 | 	unsigned n_inner = ld->shape[ld->shape_n-1],
 16 | 	         n0 = tstore_tensor_count(ld) / n_inner,
 17 | 			 n1 = tstore_tensor_count(lu) / n_inner;
 18 | 
 19 | 	if (!(dst->shape_n >= 2 &&
 20 | 		ld->shape_n == dst->shape_n &&
 21 | 		lu->shape_n == dst->shape_n &&
 22 | 		tstore_tensor_count(dst) == n0 * n1))
 23 | 	{
 24 | 		ERROR_LOG(-1, "lora up/down invalid shapes");
 25 | 	}
 26 | 
 27 | 	// Must init ggml before any tensor conversion
 28 | 	mlctx_begin(C, "lora");
 29 | 	C->c.flags_e |= MLB_F_QUIET;
 30 | 
 31 | 	// Scale get
 32 | 	float scale=1;
 33 | 	if (ls) {
 34 | 		TRY( tstore_tensor_data_get(ls, TS_DTYPE_F32, 0, &td_ld) );
 35 | 		scale = *(float*)td_ld.data;
 36 | 	}
 37 | 	else if (la) {
 38 | 		TRY( tstore_tensor_data_get(la, TS_DTYPE_F32, 0, &td_ld) );
 39 | 		scale = *(float*)td_ld.data / n_inner;
 40 | 	}
 41 | 	scale *= mult;
 42 | 	assert( scale > 0 );
 43 | 	
 44 | 	// Get data
 45 | 	int wtype = C->c.wtype;
 46 | 	int tsdt = tstore_dtype_from_ggml(wtype);
 47 | 	assert( tsdt > 0 );
 48 | 	
 49 | 	TRY( tstore_tensor_data_get(ld , tsdt, 0, &td_ld ) );
 50 | 	TRY( tstore_tensor_data_get(lu , tsdt, 0, &td_lu ) );
 51 | 	TRY( tstore_tensor_data_get(dst, tsdt, TSTDG_F_PERM | TSTDG_F_WRITE, &td_dst) );
 52 | 
 53 | 	// Make graph
 54 | 	MLTensor *t_ld, *t_lu, *t_dst, *t_out;
 55 | 	t_ld  = mlctx_input_new(C, "ld" , wtype, n0, n_inner, 1, 1);
 56 | 	t_lu  = mlctx_input_new(C, "lu" , wtype, n_inner, n1, 1, 1);
 57 | 	t_dst = mlctx_input_new(C, "dst", wtype, n0, n1, 1, 1);
 58 | 	
 59 | 	t_out = ggml_cont(C->cc, ggml_transpose(C->cc, t_ld));
 60 | 	t_out = ggml_mul_mat(C->cc, t_lu, t_out);
 61 | 	t_out = ggml_cont(C->cc, ggml_transpose(C->cc, t_out));
 62 | 	t_out = ggml_scale_inplace(C->cc, t_out, scale);
 63 | 	t_out = ggml_add_inplace(C->cc, t_dst, t_out);
 64 | 	
 65 | 	mlctx_tensor_add(C, "output", t_out);
 66 | 	TRY( mlctx_prep(C) );
 67 | 
 68 | 	// Set inputs
 69 | 	ggml_backend_tensor_set(t_ld , td_ld .data, 0, td_ld .size);
 70 | 	ggml_backend_tensor_set(t_lu , td_lu .data, 0, td_lu .size);
 71 | 	ggml_backend_tensor_set(t_dst, td_dst.data, 0, td_dst.size);
 72 | 
 73 | 	// Compute
 74 | 	TRY( mlctx_compute(C) );
 75 | 
 76 | 	// Store output
 77 | 	assert( ggml_nbytes(t_out) == td_dst.size );
 78 | 	ggml_backend_tensor_get(t_out, td_dst.data, 0, td_dst.size);
 79 | 	
 80 | 	// Check
 81 | 	float v=0;
 82 | 	if (wtype == GGML_TYPE_F16)
 83 | 		v = ggml_fp16_to_fp32(*(ggml_fp16_t*)td_dst.data);
 84 | 	else if (wtype == GGML_TYPE_F32)
 85 | 		v = *(float*)td_dst.data;
 86 | 	if (!isfinite(v))
 87 | 		ERROR_LOG(-1, "NaN in LoRA result");
 88 | 
 89 | end:
 90 | 	mlctx_end(C);
 91 | 	tstore_tdata_free(&td_dst);
 92 | 	tstore_tdata_free(&td_lu);
 93 | 	tstore_tdata_free(&td_ld);
 94 | 	return R;
 95 | }
 96 | 
 97 | int lora_apply(TensorStore* ts_dst, TensorStore* ts_lora, float mult,
 98 | 	MLCtx* ctx)
 99 | {
100 | 	int R=1;
101 | 	StrSlice name={0};
102 | 	TSTensorData td={0};
103 | 	DynStr tmps=NULL;
104 | 
105 | 	vec_forp(TSTensorEntry, ts_lora->tensors, ld, 0) {
106 | 		name = strsto_get(ts_lora->ss, ld->key);
107 | 		if (!( strsl_suffix_trim(&name, strsl_static(".lora_down.weight")) ))
108 | 			continue;
109 | 
110 | 		dstr_copy(tmps, name.s, name.b);
111 | 		dstr_appendz(tmps, ".weight");
112 | 		TSTensorEntry *dst = tstore_tensor_get(ts_dst, tmps);
113 | 		if (!dst) ERROR_LOG(-1, "lora tensor not found in model: %s", tmps);
114 | 
115 | 		dstr_copy(tmps, name.s, name.b);
116 | 		dstr_appendz(tmps, ".lora_up.weight");
117 | 		TSTensorEntry *lu = tstore_tensor_get(ts_lora, tmps);
118 | 		if (!lu) ERROR_LOG(-1, "lora up tensor not found: %s", tmps);
119 | 
120 | 		dstr_copy(tmps, name.s, name.b);
121 | 		dstr_appendz(tmps, ".scale");
122 | 		TSTensorEntry *ls = tstore_tensor_get(ts_lora, tmps);
123 | 
124 | 		dstr_copy(tmps, name.s, name.b);
125 | 		dstr_appendz(tmps, ".alpha");
126 | 		TSTensorEntry *la = tstore_tensor_get(ts_lora, tmps);
127 | 
128 | 		// Apply
129 | 		log_debug("lora apply %.*s", (int)name.s, name.b);
130 | 		TRY( lora_apply_inner(dst, ld, lu, ls, la, mult, ctx) );
131 | 	}
132 | 
133 | end:
134 | 	if (R<0) log_error("lora tensor '%.*s': %x", (int)name.s, name.b, -R);
135 | 	tstore_tdata_free(&td);
136 | 	dstr_free(tmps);
137 | 	return R;
138 | }
139 | 


--------------------------------------------------------------------------------
/src/lora.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * LoRA: low rank adaptation.
 5 |  * Ref.: Hu et al. (2021) "LoRA..."
 6 |  */
 7 | #pragma once
 8 | #include "ccompute/tensorstore.h"
 9 | #include "mlblock.h"
10 | 
11 | int lora_apply(TensorStore* ts_dst, TensorStore* ts_lora, float mult,
12 | 	MLCtx* ctx);
13 | 


--------------------------------------------------------------------------------
/src/mlblock.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  *
  4 |  * Machine learning blocks of operations.
  5 |  */
  6 | #pragma once
  7 | #include "ccommon/vector.h"
  8 | #include "ccommon/stream.h"
  9 | #include "ccommon/logging.h"
 10 | #include "ccommon/stringstore.h"
 11 | #include "ccompute/tensorstore.h"
 12 | #include "localtensor.h"
 13 | 
 14 | #include "ggml.h"
 15 | #include "ggml-alloc.h"
 16 | #include "ggml-backend.h"
 17 | #include "ggml_extend.h"
 18 | 
 19 | //TODO: load: if CPU backend, do not copy tensor data
 20 | //TODO: option: free compute, keep params in memory
 21 | 
 22 | typedef struct ggml_tensor MLTensor;
 23 | 
 24 | enum {
 25 | 	MLB_NAME_BLOCK_BEGIN	= -0x1000,
 26 | 	MLB_NAME_SPLIT			= -0x1001,
 27 | };
 28 | 
 29 | enum MLCtxFlags {
 30 | 	// Prepare the computation to allow multiple calls to mlctx_compute
 31 | 	MLB_F_MULTI_COMPUTE = 1,
 32 | 	// Do produce any information output
 33 | 	MLB_F_QUIET			= 2,
 34 | 	//(debug) Dump the computation graph to a file
 35 | 	MLB_F_DUMP			= 4,
 36 | };
 37 | 
 38 | typedef struct {
 39 | 	MLTensor *tensor;
 40 | 	StringInt name,
 41 | 	          key;  //Full name to load from the tensor store
 42 | } MLCtxTensor;
 43 | 
 44 | typedef struct {
 45 | 	ggml_backend_t backend;  //Fill
 46 | 	TensorStore *tstore;  //Fill
 47 | 	StringStore *ss;  //Tensor names are stored here
 48 | 	
 49 | 	struct ggml_context *cp, *cc; //params, compute
 50 | 	struct ggml_cgraph *graph;
 51 |     ggml_gallocr_t allocr;
 52 | 
 53 | #if USE_GGML_SCHED
 54 | 	ggml_backend_t backend2;  //Fill
 55 | 	ggml_backend_sched_t sched;
 56 | 	ggml_backend_buffer_t bkbuf;
 57 | #endif
 58 | 	
 59 | 	MLCtxTensor * tensors;  //vector
 60 | 	MLTensor ** inputs;  //vector
 61 | 	MLTensor * result;
 62 | 
 63 | 	// Configuration
 64 | 	struct {
 65 | 		enum ggml_type wtype;  //weights type (default F16)
 66 | 		unsigned n_tensor_max;
 67 | 		char tpath_sep;  //default: '.'
 68 | 		const char *tprefix;  //Tensor names prefix
 69 | 		const char *name;  //Computation name, set by mlctx_begin
 70 | 		int flags;  //MLB_F_*
 71 | 		int flags_e;  //Flags valid until the next mlctx_begin
 72 | 	} c;
 73 | 
 74 | 	// Information/statistics
 75 | 	struct MLCtxInfo {
 76 | 		size_t mem_params, mem_compute, mem_total;
 77 | 		double t_load, t_compute;
 78 | 		unsigned n_compute, n_conv;
 79 | 	} info;
 80 | } MLCtx;
 81 | 
 82 | void mlctx_free(MLCtx* C);
 83 | 
 84 | void mlctx_begin(MLCtx* C, const char* name);
 85 | 
 86 | void mlctx_end(MLCtx* C);
 87 | 
 88 | // All in one
 89 | int mlctx_run_(MLCtx* C, LocalTensor* out, const LocalTensor** inputs);
 90 | #define mlctx_run(C,O,...) \
 91 | 	mlctx_run_((C), (O), (const LocalTensor*[]){ __VA_ARGS__, NULL })
 92 | 
 93 | // Build, alloc and load
 94 | // Pending: set input, compute, get output, free
 95 | int mlctx_prep(MLCtx* C);
 96 | 
 97 | /* Step by step interface */
 98 | 
 99 | // No need to call build
100 | void mlctx_block_graph_dump(const MLCtx* C, Stream* out);
101 | int mlctx_block_graph_dump_path(const MLCtx* C, const char* path);
102 | 
103 | int mlctx_build_alloc(MLCtx* C, MLTensor* result);
104 | 
105 | int mlctx_tstore_load(MLCtx* C, TensorStore* ts);
106 | 
107 | int mlctx_compute(MLCtx* C);
108 | 
109 | /* aux */
110 | 
111 | int tstore_tensor_read(TSTensorEntry*, struct ggml_tensor*);
112 | 
113 | /* Functions to define blocks */
114 | 
115 | static inline
116 | void mlctx_block_begin(MLCtx* C)
117 | {
118 | 	vec_push(C->tensors, ((MLCtxTensor){ NULL, MLB_NAME_BLOCK_BEGIN }));
119 | 	log_debug2("ML block begin");
120 | }
121 | 
122 | static inline
123 | MLTensor* mlctx_tensor_add(MLCtx* C, const char* name, MLTensor* tensor)
124 | {
125 | 	ggml_name_prefix(tensor, name);
126 | 	bool param = (tensor->op == GGML_OP_NONE);
127 | 	int id = strsto_add(C->ss, strsl_fromz(name));
128 | 	vec_push(C->tensors, ((MLCtxTensor){ tensor, id }));
129 | 	log_debug2("ML %s: %s " GGML_TYPESHAPE_FMT, param ? "param" : "op",
130 | 		name, GGML_TYPESHAPE_ARGS(tensor));
131 | 	return tensor;
132 | }
133 | 
134 | static inline
135 | MLTensor* mlctx_split_add(MLCtx* C, MLTensor* tensor)
136 | {
137 | 	vec_push(C->tensors, ((MLCtxTensor){ tensor, MLB_NAME_SPLIT }));
138 | 	log_debug2("ML graph split");
139 | 	return tensor;
140 | }
141 | 
142 | static inline
143 | MLTensor* mlctx_input_new(MLCtx* C, const char* name, enum ggml_type dtype,
144 | 	int n0, int n1, int n2, int n3)
145 | {
146 | 	MLTensor *T = ggml_new_tensor_4d(C->cp, dtype, n0,n1,n2,n3);
147 | 	ggml_set_name(T, name);
148 | 	ggml_set_input(T);
149 | 	vec_push(C->inputs, T);
150 | 	return T;
151 | }
152 | 
153 | static inline
154 | MLTensor* mlctx_param_new(MLCtx* C, const char* name, enum ggml_type dtype,
155 | 	int n0, int n1, int n2, int n3)
156 | {
157 | 	MLTensor *T = ggml_new_tensor_4d(C->cp, dtype, n0,n1,n2,n3);
158 | 	ggml_set_input(T);
159 | 	return mlctx_tensor_add(C, name, T);
160 | }
161 | 


--------------------------------------------------------------------------------
/src/mlblock_nn.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Blocks commonly used in neural networks.
 5 |  */
 6 | #pragma once
 7 | #include "mlblock.h"
 8 | 
 9 | MLTensor* mlb_nn_linear(MLCtx* C, MLTensor* x, int n_out, bool bias);
10 | 
11 | MLTensor* mlb_nn_conv2d(MLCtx* C, MLTensor* x,
12 | 	int ch_out,
13 | 	int k0, int k1, int s0, int s1, int p0, int p1, int d0, int d1,
14 | 	bool bias);
15 | 
16 | MLTensor* mlb_nn_layer_norm(MLCtx* C, MLTensor* x,
17 | 	bool affine, bool bias, float eps);
18 | 
19 | MLTensor* mlb_nn_groupnorm(MLCtx* C, MLTensor* x,
20 | 	int n_grp, bool affine, float eps);
21 | 
22 | static inline
23 | MLTensor* mlb_nn_groupnorm32(MLCtx* C, MLTensor* x) {
24 | 	return mlb_nn_groupnorm(C, x, 32, true, 1e-6);
25 | }
26 | 
27 | MLTensor* mlb_downsample(MLCtx* C, MLTensor* x, int ch_out, bool vae);
28 | 
29 | MLTensor* mlb_upsample(MLCtx* C, MLTensor* x, int ch_out);
30 | 
31 | MLTensor* mlb_resnet(MLCtx* C, MLTensor* x, MLTensor* emb, int ch_out);
32 | 
33 | MLTensor* mlb_GEGLU(MLCtx* C, MLTensor* x, int d_out);
34 | 
35 | MLTensor* mlb_feed_forward(MLCtx* C, MLTensor* x, int d_out, int mult);
36 | 
37 | MLTensor* mlb_attn_mhead(MLCtx* C, MLTensor* q, MLTensor* k, MLTensor* v,
38 | 	int d_out, int d_embed, int n_head, bool mask, bool bias, bool bias_out);
39 | 
40 | MLTensor* mlb_basic_transf(MLCtx* C, MLTensor* x, MLTensor* c,
41 | 	int d_out, int d_embed, int n_head);
42 | 


--------------------------------------------------------------------------------
/src/mlimgsynth_options_get.c.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * mlimgsynth library option_get implementation.
 5 |  */
 6 | OPTION( MODEL ) {
 7 | 	ARG_STR( S->c.path_model );
 8 | }
 9 | OPTION( MODEL_TYPE ) {
10 | 	ARG_ENUM( S->c.model_type, mlis_model_type_froms );
11 | }
12 | OPTION( PROMPT ) {
13 | 	ARG_STR( S->c.prompt_raw );
14 | }
15 | OPTION( NPROMPT ) {
16 | 	ARG_STR( S->c.nprompt_raw );
17 | }
18 | //TODO: complete
19 | 


--------------------------------------------------------------------------------
/src/mlimgsynth_options_set.c.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  *
  4 |  * mlimgsynth library option_set implementation.
  5 |  */
  6 | OPTION( BACKEND ) {
  7 | 	ARG_STR(name  , 0, 65535)
  8 | 	ARG_STR(params, 0, 65535)
  9 | 	dstr_copy(S->c.backend, name.s, name.b);
 10 | 	dstr_copy(S->c.be_params, params.s, params.b);
 11 | 	S->rflags &= ~MLIS_READY_BACKEND;
 12 | }
 13 | OPTION( MODEL ) {
 14 | 	ARG_STR_NO_PARSE(path, 1, 65535)
 15 | 	dstr_copy(S->c.path_model, path.s, path.b);
 16 | 	S->rflags &= ~MLIS_READY_MODEL;
 17 | }
 18 | OPTION( TAE ) {
 19 | 	ARG_STR_NO_PARSE(path, 0, 65535)
 20 | 	dstr_copy(S->c.path_tae, path.s, path.b);
 21 | 	bool en = !dstr_empty(S->c.path_tae);
 22 | 	ccFLAG_SET(S->c.flags, MLIS_CF_USE_TAE, en);
 23 | }
 24 | OPTION( MODEL_TYPE ) {
 25 | 	ARG_ENUM(id, mlis_model_type_froms)
 26 | 	TRY( mlis_model_type_set(S, id) );
 27 | }
 28 | OPTION( AUX_DIR ) {
 29 | 	ARG_STR_NO_PARSE(path, 0, 65535)
 30 | 	dstr_copy(S->c.path_aux, path.s, path.b);
 31 | }
 32 | OPTION( LORA_DIR ) {
 33 | 	ARG_STR_NO_PARSE(path, 0, 65535)
 34 | 	dstr_copy(S->c.path_lora_dir, path.s, path.b);
 35 | }
 36 | OPTION( LORA ) {
 37 | 	ARG_STR(path, 1, 65535)
 38 | 	ARG_FLOAT(mult, 0, 1, 1);
 39 | 	TRY( mlis_cfg_lora_add(S, path, mult, 0) );
 40 | }
 41 | OPTION( LORA_CLEAR ) {
 42 | 	mlis_cfg_loras_free(S);	
 43 | }
 44 | OPTION( PROMPT ) {
 45 | 	ARG_STR_NO_PARSE(text, 0, 65535)
 46 | 	dstr_copy(S->c.prompt_raw, text.s, text.b);
 47 | 	if (S->c.flags & MLIS_CF_NO_PROMPT_PARSE)
 48 | 		prompt_text_set_raw(&S->c.prompt, text);
 49 | 	else {
 50 | 		TRY( prompt_text_set_parse(&S->c.prompt, text) );
 51 | 		vec_forp(struct PromptTextLora, S->c.prompt.loras, p, 0)
 52 | 			TRY( mlis_cfg_lora_add(S, p->name, p->w, MLIS_LF_PROMPT) );
 53 | 	}
 54 | }
 55 | OPTION( NPROMPT ) {
 56 | 	ARG_STR_NO_PARSE(text, 0, 65535)
 57 | 	dstr_copy(S->c.nprompt_raw, text.s, text.b);
 58 | 	if (S->c.flags & MLIS_CF_NO_PROMPT_PARSE)
 59 | 		prompt_text_set_raw(&S->c.nprompt, text);
 60 | 	else {
 61 | 		TRY( prompt_text_set_parse(&S->c.nprompt, text) );
 62 | 		vec_forp(struct PromptTextLora, S->c.nprompt.loras, p, 0)
 63 | 			TRY( mlis_cfg_lora_add(S, p->name, p->w, MLIS_LF_PROMPT) );
 64 | 	}
 65 | }
 66 | OPTION( NO_PROMPT_PARSE ) {
 67 | 	ARG_BOOL(en)
 68 | 	ccFLAG_SET(S->c.flags, MLIS_CF_NO_PROMPT_PARSE, en);
 69 | }
 70 | OPTION( IMAGE_DIM ) {
 71 | 	ARG_INT(w, 0, 65535, 0)
 72 | 	ARG_INT(h, 0, 65535, 0)
 73 | 	S->c.width  = w;
 74 | 	S->c.height = h;
 75 | }
 76 | OPTION( BATCH_SIZE ) {
 77 | 	ARG_INT(i, 0, 1024, 0)
 78 | 	S->c.n_batch = i;
 79 | }
 80 | OPTION( CLIP_SKIP ) {
 81 | 	ARG_INT(i, 0, 255, 0)
 82 | 	S->c.clip_skip = i;
 83 | }
 84 | OPTION( CFG_SCALE ) {
 85 | 	ARG_FLOAT(f, 0, 255, NAN)
 86 | 	S->c.cfg_scale = f;
 87 | }
 88 | OPTION( METHOD ) {
 89 | #ifdef ARG_IS_STR
 90 | 	StrSlice ss = strsl_fromz(value);
 91 | 	if (strsl_suffixz_trim(&ss, "_a")) {  // Shortcut for ancestral methods
 92 | 		int id = mlis_method_froms(ss);
 93 | 		if (id < 0)
 94 | 			ERROR_LOG(MLIS_E_OPT_VALUE, "invalid method name '%s'", value);
 95 | 		S->sampler.c.method = id;
 96 | 		S->sampler.c.s_ancestral = 1;
 97 | 		goto done;
 98 | 	}
 99 | #endif
100 | 	ARG_ENUM(id, mlis_method_froms)
101 | 	S->sampler.c.method = id;
102 | }
103 | OPTION( SCHEDULER ) {
104 | 	ARG_ENUM(id, mlis_sched_froms)
105 | 	S->sampler.c.sched = id;
106 | }
107 | OPTION( STEPS ) {
108 | 	ARG_INT(i, 0, 1000, 0)
109 | 	S->sampler.c.n_step = i;
110 | }
111 | OPTION( F_T_INI ) {
112 | 	ARG_FLOAT(f, 0, 1, NAN)
113 | 	S->sampler.c.f_t_ini = f;
114 | }
115 | OPTION( F_T_END ) {
116 | 	ARG_FLOAT(f, 0, 1, NAN)
117 | 	S->sampler.c.f_t_end = f;
118 | }
119 | OPTION( S_NOISE ) {
120 | 	ARG_FLOAT(f, 0, 255, NAN)
121 | 	S->sampler.c.s_noise = f;
122 | }
123 | OPTION( S_ANCESTRAL ) {
124 | 	ARG_FLOAT(f, 0, 255, NAN)
125 | 	S->sampler.c.s_ancestral = f;
126 | }
127 | OPTION( IMAGE ) {
128 | 	ARG_C(img, const MLIS_Image*)
129 | 	if (img->c != 3 && img->c != 4)
130 | 		ERROR_LOG(MLIS_E_IMAGE,
131 | 			"invalid number of channels in image: %d", img->c);
132 | 	if (mlis_tensor_from_image(&S->image, img) < 0)
133 | 		ERROR_LOG(MLIS_E_IMAGE, "invalid image");
134 | 	S->c.tuflags |= MLIS_TUF_IMAGE;
135 | 	
136 | 	if (S->image.n[2] == 4) {  // Take mask from last channel (alpha)
137 | 		unsigned w = S->image.n[0];
138 | 		unsigned h = S->image.n[1];
139 | 		mlis_tensor_resize(&S->mask, w, h, 1, 1);
140 | 		memcpy(S->mask.d, S->image.d+(w*h*3*4), w*h*4);
141 | 		S->image.n[2] = 3;
142 | 		S->c.tuflags |= MLIS_TUF_MASK;
143 | 	}
144 | }
145 | OPTION( IMAGE_MASK ) {
146 | 	ARG_C(img, const MLIS_Image*)
147 | 	if (img->c != 1)
148 | 		ERROR_LOG(MLIS_E_IMAGE,
149 | 			"invalid number of channels in image mask: %d", img->c);
150 | 	if (mlis_tensor_from_image(&S->mask, img) < 0)
151 | 		ERROR_LOG(MLIS_E_IMAGE, "invalid image mask");
152 | 	S->c.tuflags |= MLIS_TUF_MASK;
153 | }
154 | OPTION( NO_DECODE ) {
155 | 	ARG_BOOL(en)
156 | 	ccFLAG_SET(S->c.flags, MLIS_CF_NO_DECODE, en);
157 | }
158 | OPTION( TENSOR_USE_FLAGS ) {
159 | 	ARG_FLAGS(fl)
160 | 	S->c.tuflags = fl;
161 | }
162 | OPTION( SEED ) {
163 | #ifdef ARG_IS_STR
164 | 	if (!vcur[0]) goto done;  // Empty string -> keep random seed
165 | #endif
166 | 	ARG_UINT64(i)
167 | 	g_rng.seed = i;  //TODO: local rng
168 | }
169 | OPTION( VAE_TILE ) {
170 | 	ARG_INT(i, 0, 65535, 0)
171 | 	S->c.vae_tile = i;
172 | }
173 | OPTION( UNET_SPLIT ) {
174 | 	ARG_BOOL(en)
175 | 	ccFLAG_SET(S->c.flags, MLIS_CF_UNET_SPLIT, en);
176 | }
177 | OPTION( WEIGHT_TYPE ) {
178 | #ifdef ARG_IS_STR
179 | 	int id = tstore_dtype_fromz(vcur);
180 | 	id = tstore_dtype_to_ggml(id);
181 | 	if (id >= 0) {
182 | 		S->ctx.c.wtype = id;
183 | 		S->c.flags |= MLIS_CF_WEIGHT_TYPE_SET;
184 | 		goto done;
185 | 	}
186 | #endif
187 | 	ARG_INT(i, -1, GGML_TYPE_COUNT-1, 0)
188 | 	if (i == -1) {  //unset
189 | 		S->ctx.c.wtype = GGML_TYPE_F16;
190 | 		S->c.flags &= ~MLIS_CF_WEIGHT_TYPE_SET;
191 | 	} else {
192 | 		S->ctx.c.wtype = i;	
193 | 		S->c.flags |= MLIS_CF_WEIGHT_TYPE_SET;
194 | 	}
195 | }
196 | OPTION( THREADS ) {
197 | 	ARG_INT(i, 0, 65535, 0)
198 | 	S->c.n_thread = i;
199 | 	S->rflags &= ~MLIS_READY_BACKEND;  //this is overkill...
200 | }
201 | OPTION( DUMP_FLAGS ) {
202 | 	ARG_FLAGS(fl)
203 | 	S->c.dump_flags = fl;
204 | }
205 | OPTION( CALLBACK ) {
206 | 	ARG_C(func, MLIS_Callback)
207 | 	ARG_C(user, void*)
208 | 	S->callback = func;
209 | 	S->callback_ud = user;
210 | }
211 | OPTION( ERROR_HANDLER ) {
212 | 	ARG_C(func, MLIS_ErrorHandler)
213 | 	ARG_C(user, void*)
214 | 	S->errh = func;
215 | 	S->errh_ud = user;
216 | }
217 | OPTION( LOG_LEVEL ) {
218 | 	// Warning: this sets a global configuration, not associated with the context.
219 | 
220 | #ifdef ARG_IS_STR
221 | 	int lvls = mlis_loglvl_fromz(vcur);
222 | 	if (lvls >= 0) {
223 | 		log_level_set(lvls);
224 | 		goto done;
225 | 	}
226 | #endif
227 | 
228 | 	ARG_INT(lvl, 0, 0x2ff, -1)
229 | 	if ((lvl & 0xf00) == 0x100) {
230 | 		// Increase verbosity, starting directly from INFO.
231 | 		if (!log_level_check(LOG_LVL_INFO))
232 | 			log_level_set(LOG_LVL_INFO);
233 | 		else
234 | 			log_level_inc(lvl & 0xff);
235 | 	} else if ((lvl & 0xf00) == 0x200)
236 | 		log_level_inc(-(lvl & 0xff));
237 | 	else
238 | 		log_level_set(lvl);
239 | }
240 | 


--------------------------------------------------------------------------------
/src/prompt_preproc.h:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: Zlib
  3 |  */
  4 | #pragma once
  5 | #include "ccommon/ccommon.h"
  6 | #include "ccommon/strslice.h"
  7 | #include "ccommon/vector.h"
  8 | #include "ccommon/logging.h"
  9 | #include <math.h>
 10 | 
 11 | #ifndef MLIMGSYNTH_H
 12 | enum { MLIS_E_PROMPT_PARSE = -5 };
 13 | #endif
 14 | 
 15 | /* Text prompt structure containing the preprocesed text, weights, and loras.
 16 |  */
 17 | typedef struct PromptText {
 18 | 	DynStr text;
 19 | 	DynStr data;
 20 | 	struct PromptTextChunk {
 21 | 		StrSlice text;
 22 | 		float w;  // Weight / attention multiplier
 23 | 	} *chunks;  //vector
 24 | 	struct PromptTextLora {
 25 | 		StrSlice name;
 26 | 		float w;
 27 | 	} *loras;  //vector
 28 | } PromptText;
 29 | 
 30 | static
 31 | void prompt_text_free(PromptText* S)
 32 | {
 33 | 	dstr_free(S->text);
 34 | 	dstr_free(S->data);
 35 | 	vec_free(S->chunks);
 36 | 	vec_free(S->loras);
 37 | }
 38 | 
 39 | static
 40 | void prompt_text_clear(PromptText* S)
 41 | {
 42 | 	dstr_resize(S->text, 0);
 43 | 	dstr_resize(S->data, 0);
 44 | 	vec_resize(S->chunks, 0);
 45 | 	vec_resize(S->loras, 0);
 46 | }
 47 | 
 48 | static
 49 | void prompt_text_set_raw(PromptText* S, const StrSlice ss)
 50 | {
 51 | 	prompt_text_clear(S);
 52 | 	dstr_copy(S->text, strsl_len(ss), strsl_begin(ss));
 53 | 	vec_resize(S->chunks, 1);
 54 | 	S->chunks[0] = (struct PromptTextChunk){ strsl_fromd(S->text), 1.0 };
 55 | }
 56 | 
 57 | static
 58 | int prompt_text_option_parse(PromptText* S, StrSlice ss)
 59 | {
 60 | 	int R=1;
 61 | 
 62 | 	if (strsl_prefix_trim(&ss, strsl_static("lora:")))
 63 | 	{
 64 | 		const char *beg=strsl_begin(ss), *sep=beg, *end=strsl_end(ss);
 65 | 		while (sep < end && *sep != ':') sep++;  // Find multiplier option
 66 | 		
 67 | 		float mult=1;
 68 | 		if (*sep == ':') {  // Optional multiplier
 69 | 			char *tail=NULL;
 70 | 			mult = strtof(sep+1, &tail);
 71 | 			if (tail != end)
 72 | 				ERROR_LOG(MLIS_E_PROMPT_PARSE, "prompt: invalid lora multiplier");
 73 | 		}
 74 | 		
 75 | 		//TRY( mlis_cfg_lora_add(S, strsl_fromr(ss.b, sep), mult, MLIS_LF_PROMPT) );
 76 | 
 77 | 		// Store lora name
 78 | 		unsigned len = sep - beg;
 79 | 		dstr_append(S->data, len, beg);
 80 | 
 81 | 		// Add lora to list
 82 | 		unsigned nl = vec_count(S->loras);
 83 | 		vec_append_zero(S->loras, 1);
 84 | 		S->loras[nl].name = strsl_make(dstr_end(S->data) - len, len);
 85 | 		S->loras[nl].w = mult;
 86 | 	}
 87 | 	else {
 88 | 		ERROR_LOG(MLIS_E_PROMPT_PARSE, "prompt: unknown option '%.*s'",
 89 | 			(int)ss.s, ss.b);
 90 | 	}
 91 | 
 92 | end:
 93 | 	return R;
 94 | }
 95 | 
 96 | /* Parse prompt like in stable-diffusion-webui.
 97 |  * "normal text" -> 1 chunk
 98 |  * "normal (weighted by 1.1) normal" -> 3 chunks
 99 |  * "normal ((weighted by 1.1*1.1)) normal" -> 3 chunks
100 |  * "normal [weighted by 1/1.1) normal" -> 3 chunks
101 |  * "normal (weighted by 1.5:1.5) normal" -> 3 chunks
102 |  * "normal BREAK normal" -> "normal  normal"  (ignores BREAK for now)
103 |  */
104 | static
105 | int prompt_text_set_parse(PromptText* S, const StrSlice ss)
106 | {
107 | 	int R=1;
108 | 
109 | 	prompt_text_clear(S);
110 | 
111 | 	// Reserve memory so that pointers are not invalidated.
112 | 	dstr_realloc(S->text, strsl_len(ss)*2);
113 | 	dstr_realloc(S->data, strsl_len(ss)*2);
114 | 
115 | 	vec_resize_zero(S->chunks, 1);
116 | 	S->chunks[0].text = strsl_make(S->text, 0);
117 | 	S->chunks[0].w = 1;
118 | 	
119 | 	int n_paren=0, n_braket=0;
120 | 
121 | 	strsl_for(ss, cur, end, 0)
122 | 	{
123 | 		if (*cur == '\\') {  // Escape
124 | 			if (cur+1 < end) {
125 | 				cur++;
126 | 				char c = *cur;
127 | 				switch (c) {
128 | 				case 'n':  c = '\n';  break;
129 | 				}
130 | 				dstr_push(S->text, c);
131 | 			}
132 | 		}
133 | 		else if (*cur == '(' || *cur == ')' || *cur == '[' || *cur == ']') {
134 | 			switch (*cur) {
135 | 			case '(':  n_paren++;  break;
136 | 			case ')':  n_paren--;  break;
137 | 			case '[':  n_braket++; break;
138 | 			case ']':  n_braket--; break;
139 | 			}
140 | 			if (n_paren < 0 || n_braket < 0)
141 | 				ERROR_LOG(MLIS_E_PROMPT_PARSE,
142 | 					"prompt: unmatched ')' or ']'");
143 | 			//if (n_paren > 0 && n_braket > 0)
144 | 			//	ERROR_LOG(MLIS_E_PROMPT_PARSE,
145 | 			//		"prompt: mix of emphasis with '(' and '['");
146 | 
147 | 			const char *e = dstr_end(S->text);
148 | 			//unsigned lvl = n_paren - n_braket;
149 | 			float w = pow(1.1, n_paren - n_braket);  //TODO: cfg?
150 | 			
151 | 			unsigned ic = vec_count(S->chunks) -1;
152 | 			if (S->chunks[ic].text.b == e) {
153 | 				S->chunks[ic].w = w;
154 | 			} else {
155 | 				// Finish previous chunk
156 | 				S->chunks[ic].text.s = e - S->chunks[ic].text.b;
157 | 				// New chunk
158 | 				vec_append_zero(S->chunks, 1);
159 | 				ic++;
160 | 				S->chunks[ic].text.b = e;
161 | 				S->chunks[ic].w = w;
162 | 			}
163 | 		}
164 | 		else if (*cur == ':' && (n_paren > 0 || n_braket > 0)) {
165 | 			if (!(n_paren == 1 && n_braket == 0))
166 | 				ERROR_LOG(MLIS_E_PROMPT_PARSE,
167 | 					"prompt: custom emphasis multiplier outside of '()'");
168 | 
169 | 			char *tail=NULL;
170 | 			float w=0;
171 | 			if (cur+1 < end) {
172 | 				cur++;
173 | 				w = strtof(cur, &tail);  //TODO: restrict to an slice
174 | 			}
175 | 			if (!(tail && tail < end && *tail == ')'))
176 | 				ERROR_LOG(MLIS_E_PROMPT_PARSE,
177 | 					"prompt: invalid emphasis with ':'");
178 | 
179 | 			cur = tail-1;
180 | 			vec_last(S->chunks, 0).w = w;
181 | 		}
182 | 		else if (*cur == '<') {
183 | 			const char *e=cur+1;
184 | 			while (e < end && *e != '>') ++e;
185 | 			if (*e != '>')
186 | 				ERROR_LOG(MLIS_E_PROMPT_PARSE, "prompt: '<' not matched with '>'");
187 | 			TRY( prompt_text_option_parse(S, strsl_fromr(cur+1, e)) );
188 | 			cur = e;
189 | 		}
190 | 		else if (*cur == 'B' && cur+5 < end && !memcmp(cur, "BREAK", 5)) {
191 | 			cur += 4;
192 | 		}
193 | 		else dstr_push(S->text, *cur);
194 | 	}
195 | 
196 | 	// Finish last chunk
197 | 	unsigned ic = vec_count(S->chunks) - 1;
198 | 	S->chunks[ic].text = strsl_fromr(S->chunks[ic].text.b, dstr_end(S->text));
199 | 
200 | #ifndef NDEBUG
201 | 	vec_for(S->chunks, i, 0) {
202 | 		assert( strsl_begin(S->chunks[i].text) >= S->text );
203 | 		assert( strsl_end(S->chunks[i].text) <= dstr_end(S->text) );
204 | 	}
205 | #endif
206 | 
207 | end:
208 | 	return R;
209 | }
210 | 


--------------------------------------------------------------------------------
/src/sampling.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  */
  4 | #include "sampling.h"
  5 | #include "ccommon/ccommon.h"
  6 | #include "ccommon/rng_philox.h"
  7 | #include "ccommon/logging.h"
  8 | #include <math.h>
  9 | 
 10 | #define log_vec(LVL,DESC,VEC,VAR,I0,...) \
 11 | if (log_level_check(LVL)) { \
 12 | 	log_line_begin(LVL); \
 13 | 	log_line_str(DESC ":"); \
 14 | 	vec_for(VEC,VAR,I0) log_line_strf(" " __VA_ARGS__); \
 15 | 	log_line_end(); \
 16 | }
 17 | 
 18 | #define log_debug_vec(...)  log_vec(LOG_LVL_DEBUG, __VA_ARGS__)
 19 | 
 20 | void dnsamp_free(DenoiseSampler* S)
 21 | {
 22 | 	ltensor_free(&S->noise);
 23 | 	ltensor_free(&S->x0);
 24 | 	solver_free(&S->solver);
 25 | 	vec_free(S->sigmas);
 26 | }
 27 | 
 28 | int dnsamp_init(DenoiseSampler* S)
 29 | {
 30 | 	int R=1;
 31 | 
 32 | 	// Solver
 33 | 	if (S->c.method <= 0) S->c.method = SOLVER_METHOD_EULER;
 34 | 
 35 | 	S->solver.i_step = 0;
 36 | 	S->solver.C = solver_class_get(S->c.method);
 37 | 	if (!S->solver.C)
 38 | 		ERROR_LOG(-1, "invalid sampling method %d", S->c.method);
 39 | 	
 40 | 	// Scheduling
 41 | 	// Compute times and sigmas for inference
 42 | 	S->n_step = S->c.n_step;
 43 | 	if (S->n_step < 1) S->n_step = 20;
 44 | 
 45 | 	S->nfe_per_step = S->solver.C->n_fe;
 46 | 
 47 | 	// Reduce number of steps to keep the number of neural function evaluations
 48 | 	if (S->nfe_per_step > 1)
 49 | 		S->n_step = (S->n_step + S->nfe_per_step-1) / S->nfe_per_step;
 50 | 	
 51 | 	S->nfe_per_step *= S->nfe_per_dxdt;
 52 | 	
 53 | 	// Reduces the number of steps to keep the step size the same (img2img)
 54 | 	IFNPOSSET(S->c.f_t_ini, 1);
 55 | 	S->n_step = S->n_step * (S->c.f_t_ini - S->c.f_t_end) +0.5;
 56 | 	if (S->n_step < 1) S->n_step = 1;
 57 | 	
 58 | 	// Calculate noise levels / times
 59 | 	vec_resize(S->sigmas, S->n_step+1);
 60 | 	S->sigmas[S->n_step] = 0;
 61 | 
 62 | 	float t_ini = (S->unet_p->n_step_train - 1) * S->c.f_t_ini;
 63 | 	float t_end = (S->unet_p->n_step_train - 1) * S->c.f_t_end;
 64 | 
 65 | 	IFFALSESET(S->c.sched, DNSAMP_SCHED_UNIFORM);
 66 | 	switch (S->c.sched) {
 67 | 	case DNSAMP_SCHED_UNIFORM: {
 68 | 		float b = t_ini,
 69 | 		      f = S->n_step>1 ? (t_end-t_ini)/(S->n_step-1) : 0;
 70 | 		for (unsigned i=0; i<S->n_step; ++i)
 71 | 			S->sigmas[i] = unet_t_to_sigma(S->unet_p, b+i*f);
 72 | 	} break;
 73 | 	case DNSAMP_SCHED_KARRAS: {
 74 | 		// Uses the model's min and max sigma instead of 0.1 and 10.
 75 | 		float smin = unet_t_to_sigma(S->unet_p, t_end),
 76 | 		      smax = unet_t_to_sigma(S->unet_p, t_ini),
 77 | 			  p=7,
 78 | 		      sminp = pow(smin, 1/p),
 79 | 		      smaxp = pow(smax, 1/p),
 80 | 			  b = smaxp,
 81 | 			  f = S->n_step>1 ? (sminp - smaxp) / (S->n_step-1) : 0;
 82 | 		for (unsigned i=0; i<S->n_step; ++i)
 83 | 			S->sigmas[i] = pow(b+i*f, p);
 84 | 	} break;
 85 | 	default:
 86 | 		ERROR_LOG(-1, "invalid sampling scheduler %d", S->c.sched);
 87 | 	}
 88 | 
 89 | 	log_debug_vec("Sigmas", S->sigmas, i, 0, "%.6g", S->sigmas[i]);
 90 | 	
 91 | 	S->solver.t = S->sigmas[0];  //initial t
 92 | 	S->i_step = 0;
 93 | 
 94 | end:
 95 | 	return R;
 96 | }
 97 | 
 98 | void dnsamp_mask_apply(DenoiseSampler* S, LocalTensor* x)
 99 | {
100 | 	int n0 = x->n[0], n1 = x->n[1], n2 = x->n[2],
101 | 		s1 = n0, s2 = n0*n1;
102 | 	assert( ltensor_shape_check(S->c.lmask, n0, n1, 1, 1) );
103 | 	for (int i2=0; i2<n2; ++i2)
104 | 	for (int i1=0; i1<n1; ++i1)
105 | 	for (int i0=0; i0<n0; ++i0) {
106 | 		float m = S->c.lmask->d[i0 +i1*s1];
107 | 		int i = i0 +i1*s1 +i2*s2;
108 | 		x->d[i] = S->x0.d[i] * m + x->d[i] * (1-m);
109 | 	}
110 | }
111 | 
112 | void dnsamp_noise_add(DenoiseSampler* S, LocalTensor* x, float sigma)
113 | {
114 | 	ltensor_resize_like(&S->noise, x);
115 | 	rng_randn(ltensor_nelements(&S->noise), S->noise.d);
116 | 	ltensor_for(*x,i,0) x->d[i] += S->noise.d[i] * sigma;
117 | }
118 | 
119 | int dnsamp_step(DenoiseSampler* S, LocalTensor* x)
120 | {
121 | 	int R=1;
122 | 
123 | 	int s = S->i_step;
124 | 	if (!(s < S->n_step)) return 0;
125 | 	
126 | 	float s_up = 0,
127 | 	      s_down = S->sigmas[s+1];
128 | 
129 | 	if (s == 0) {  // Initial tasks
130 | 		if (S->c.lmask) ltensor_copy(&S->x0, x);
131 | 
132 | 		// Add noise to initial latent
133 | 		dnsamp_noise_add(S, x, S->sigmas[0]);
134 | 		if (S->c.lmask) dnsamp_mask_apply(S, x);
135 | 		log_debug3_ltensor(x, "x0+noise");
136 | 	}
137 | 
138 | 	if (S->c.s_noise > 0 && s > 0) {
139 | 		// Stochastic sampling: may help to add detail lost during sampling
140 | 		// Ref.: Karras2022, see Algo2 with S_churn
141 | 		// Produces softer images
142 | 		// Similar to the ancestral sampling below
143 | 		float s_curr  = S->sigmas[s],
144 | 		      s_hat   = s_curr * sqrt(2) * S->c.s_noise,
145 | 			  s_noise = sqrt(s_hat*s_hat - s_curr*s_curr);
146 | 		log_debug("s_noise:%g s_hat:%g", s_noise, s_hat);
147 | 		
148 | 		dnsamp_noise_add(S, x, s_noise);
149 | 		if (S->c.lmask) dnsamp_mask_apply(S, x);
150 | 		S->solver.t = s_hat;
151 | 	}
152 | 		
153 | 	if (S->c.s_ancestral > 0) {
154 | 		// Ancestral sampling
155 |  		// Ref.: k_diffusion/sampling.py  get_ancestral_step
156 | 		// Produces softer images
157 | 		float s1 = S->sigmas[s],  //sigma_from
158 | 			  s2 = S->sigmas[s+1];  //sigma_to
159 | 		
160 | 		s_up = sqrt((s2*s2) * (s1*s1 - s2*s2) / (s1*s1));
161 | 		s_up *= S->c.s_ancestral;  //eta * s_noise
162 | 		MINSET(s_up, s2);
163 | 		s_down = sqrt(s2*s2 - s_up*s_up);
164 | 
165 | 		log_debug("ancestral s_down:%g s_up:%g", s_down, s_up);
166 | 	}
167 | 
168 | 	TRY( solver_step(&S->solver, s_down, x) );
169 | 	
170 | 	if (s_up > 0 && s+1 != S->n_step) {
171 | 		// Ancestral sampling
172 | 		dnsamp_noise_add(S, x, s_up);
173 | 		S->solver.t = S->sigmas[s+1];
174 | 	}
175 | 
176 | 	// In-painting mask apply
177 | 	if (S->c.lmask)
178 | 		dnsamp_mask_apply(S, x);
179 | 	
180 | 	log_debug3_ltensor(x, "x");
181 | 
182 | 	S->i_step++;
183 | end:
184 | 	return R;
185 | }
186 | 


--------------------------------------------------------------------------------
/src/sampling.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  */
 4 | #pragma once
 5 | #include "unet.h"
 6 | #include "solvers.h"
 7 | #include "localtensor.h"
 8 | 
 9 | // Schedulers. Matches MLIS_Sched.
10 | //TODO: classes?
11 | enum {
12 | 	DNSAMP_SCHED_UNIFORM	= 1,
13 | 	DNSAMP_SCHED_KARRAS		= 2,
14 | };
15 | 
16 | typedef struct {
17 | 	Solver solver;
18 | 	float *sigmas;  //vector
19 | 	int i_step, n_step, nfe_per_step;
20 | 	
21 | 	const UnetParams *unet_p;  //fill before use
22 | 	int nfe_per_dxdt;  //fill before use
23 | 
24 | 	LocalTensor noise, x0;
25 | 
26 | 	struct {
27 | 		int n_step, method, sched;
28 | 		float f_t_ini, f_t_end, s_noise, s_ancestral;
29 | 		LocalTensor *lmask;
30 | 	} c;
31 | } DenoiseSampler;
32 | 
33 | void dnsamp_free(DenoiseSampler* S);
34 | 
35 | int dnsamp_init(DenoiseSampler* S);
36 | 
37 | int dnsamp_step(DenoiseSampler* S, LocalTensor* x);
38 | 
39 | static inline
40 | int dnsamp_sample(DenoiseSampler* S, LocalTensor* x)
41 | {
42 | 	int r;
43 | 	while ((r = dnsamp_step(S, x)) > 0) ;
44 | 	return r;
45 | }
46 | 


--------------------------------------------------------------------------------
/src/solvers.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  */
  4 | #include "solvers.h"
  5 | #include "ccommon/ccommon.h"
  6 | #include <math.h>
  7 | 
  8 | // List of all available solvers. Null-terminated. Matches MLIS_Method.
  9 | const SolverClass *g_solvers[] = {
 10 | 	NULL,
 11 | 	&g_solver_euler,
 12 | 	&g_solver_heun,
 13 | 	&g_solver_taylor3,
 14 | 	&g_solver_dpmpp2m,
 15 | 	&g_solver_dpmpp2s,
 16 | 	NULL
 17 | };
 18 | 
 19 | const SolverClass* solver_class_get(int idx)
 20 | {
 21 | 	if (!(0 <= idx && idx < COUNTOF(g_solvers)))
 22 | 		return NULL;
 23 | 
 24 | 	return g_solvers[idx];
 25 | }
 26 | 
 27 | const SolverClass* solver_class_find(const char* name)
 28 | {
 29 | 	for (unsigned i=0; g_solvers[i]; ++i)
 30 | 		if (!strcmp(name, g_solvers[i]->name))
 31 | 			return g_solvers[i];
 32 | 
 33 | 	return NULL;
 34 | }
 35 | 
 36 | void solver_free(Solver* S)
 37 | {
 38 | 	for (unsigned i=0; i<COUNTOF(S->tmp); ++i)
 39 | 		ltensor_free(&S->tmp[i]);
 40 | 	ltensor_free(&S->dx);
 41 | }
 42 | 
 43 | int solver_step(Solver* S, float t, LocalTensor* x)
 44 | {
 45 | 	S->i_tmp = 0;
 46 | 	ltensor_resize_like(&S->dx, x);
 47 | 	int r = S->C->step(S, t, x);
 48 | 	if (r < 0) return r;
 49 | 	S->t = t;
 50 | 	S->i_step++;
 51 | 	return r;
 52 | }
 53 | 
 54 | static inline
 55 | LocalTensor* solver_tmp_get(Solver* S)
 56 | {
 57 | 	assert( S->i_tmp < COUNTOF(S->tmp) );
 58 | 	S->i_tmp++;
 59 | 	return &S->tmp[S->i_tmp-1];
 60 | }
 61 | 
 62 | static inline
 63 | LocalTensor* solver_tmp_get_resize(Solver* S, int n0, int n1, int n2, int n3)
 64 | {
 65 | 	LocalTensor* lt = solver_tmp_get(S);
 66 | 	ltensor_resize(lt, n0, n1, n2, n3);
 67 | 	return lt;
 68 | }
 69 | 
 70 | static inline
 71 | LocalTensor* solver_tmp_get_resize_like(Solver* S, const LocalTensor* x)
 72 | {
 73 | 	LocalTensor* lt = solver_tmp_get(S);
 74 | 	ltensor_resize_like(lt, x);
 75 | 	return lt;
 76 | }
 77 | 
 78 | /* Euler
 79 |  * Ref.: any textbook
 80 |  * Baseline.
 81 |  */
 82 | int solver_euler_step(Solver* S, float t, LocalTensor* x)
 83 | {
 84 | 	float dt = t - S->t;
 85 | 	TRYR( solver_dxdt(S, S->t, x, &S->dx) );
 86 | 	ltensor_for(*x,i,0) x->d[i] += S->dx.d[i] * dt;
 87 | 	return 1;
 88 | }
 89 | 
 90 | const SolverClass g_solver_euler = {
 91 | 	.step = solver_euler_step,
 92 | 	.n_fe = 1,
 93 | 	.name = "euler",
 94 | };
 95 | 
 96 | /* Heun (improved Euler)
 97 |  * Ref.: Karras et al. 2022 "Elucidating..." Algo1
 98 |  * Tends to distort the images with low step counts.
 99 |  */
100 | int solver_heun_step(Solver* S, float t, LocalTensor* x)
101 | {
102 | 	float dt = t - S->t;
103 | 	LocalTensor *x1 = solver_tmp_get_resize_like(S, x);
104 | 	LocalTensor *d1 = solver_tmp_get_resize_like(S, x);
105 | 
106 | 	TRYR( solver_dxdt(S, S->t, x, &S->dx) );
107 | 	ltensor_for(*x,i,0) x1->d[i] = x->d[i] + S->dx.d[i] * dt;
108 | 
109 | 	if (!(t > 0)) {  //last step: just euler
110 | 		ltensor_for(*x,i,0) x->d[i] = x1->d[i];
111 | 	}
112 | 	else {  //2nd order correction
113 | 		TRYR( solver_dxdt(S, t, x1, d1) );
114 | 		ltensor_for(*x,i,0)
115 | 			x->d[i] += (S->dx.d[i] + d1->d[i]) * 0.5 * dt;
116 | 	}
117 | 	
118 | 	return 1;
119 | }
120 | 
121 | const SolverClass g_solver_heun = {
122 | 	.step = solver_heun_step,
123 | 	.n_fe = 2,
124 | 	.name = "heun",
125 | };
126 | 
127 | /* Third-order-Taylor extension of Euler
128 |  * Ref.: own
129 |  * Similar to Euler with less steps.
130 | 
131 | x_{i+1} = x_i + dx_i dt_i + (1/2) dx2_i dt_i^2 + (1/6) dx3_i dt_i^3
132 | 
133 | dx2_i = (dx_i - dx_{i-1}) / dt_{i-1}
134 | dx3_i = (dx2_i - dx2_{i-1}) / dt_{i-1}
135 |       = (dx_i - dx_{i-1}) / dt_{i-1}^2 - (dx_{i-1} - dx_{i-2}) / (dt_{i-1} dt_{i-2})
136 |  */
137 | int solver_taylor3_step(Solver* S, float t, LocalTensor* x)
138 | {
139 | 	float dt = t - S->t;
140 | 	LocalTensor *lt_dt_prev = solver_tmp_get_resize(S, 1,1,1,1);
141 | 	LocalTensor *lt_dp1 = solver_tmp_get_resize_like(S, x);
142 | 	LocalTensor *lt_dp2 = solver_tmp_get_resize_like(S, x);
143 | 
144 | 	float *dt_prev = lt_dt_prev->d,
145 | 	      *dp1 = lt_dp1->d,
146 | 		  *dp2 = lt_dp2->d;
147 | 
148 | 	TRYR( solver_dxdt(S, S->t, x, &S->dx) );
149 | 	ltensor_for(*x,i,0) x->d[i] += S->dx.d[i] * dt;
150 | 	
151 | 	// 2nd and 3nd order corrections
152 | 	float idtp = S->i_step >= 1 ? 1 / dt_prev[0] : 0,
153 | 	      f2 = S->i_step >= 1 ? dt*dt/2 : 0,
154 | 		  f3 = S->i_step >= 2 ? dt*dt*dt/6 : 0;
155 | 	ltensor_for(*x,i,0) {
156 | 		float d2 = (S->dx.d[i] - dp1[i]) * idtp,
157 | 			  d3 = (d2 - dp2[i]) * idtp;
158 | 		x->d[i] += d2 * f2 + d3 * f3;
159 | 		dp1[i] = S->dx.d[i];
160 | 		dp2[i] = d2;
161 | 	}
162 | 	
163 | 	dt_prev[0] = dt;
164 | 	return 1;
165 | }
166 | 
167 | const SolverClass g_solver_taylor3 = {
168 | 	.step = solver_taylor3_step,
169 | 	.n_fe = 1,
170 | 	.name = "taylor3",
171 | };
172 | 
173 | /* DPM++(2M)
174 |  * Ref.: Lu et al. 2023 "DPM-Solver++ ..." Algo2
175 |  * Ref.: k-diffusion/sampling.py  sample_dpmpp_2m
176 |  * Produces sharper images.
177 |  * Use with Karras scheduler to prevent overly sharp images.
178 | 
179 | alpha_i     = 1
180 | sigma_{i+1} = t
181 | sigma_i     = S->t
182 | 
183 | lambda_i = log(alpha_i / sigma_i)
184 |          = -log(sigma_i)
185 | 
186 | a_i = sigma_{i+1} / sigma_i
187 | 
188 | h_i = lambda_{i+1} - lambda_i
189 |     = -log(sigma_{i+1} / sigma_i)
190 | 	= -log(a_i)
191 | 
192 | b_i = exp(-h_i) - 1 = a_i - 1
193 | 
194 | c_i = 1/(2r)
195 |     = h_{i} / (2 h_{i-1})
196 | 
197 | d_i = x_i - sigma_i dx_i
198 | 
199 | D_i = (1 + c_i) d_i - c_i d_{i-1}
200 | 
201 | x_{i+1} = a_i x_i - b_i D_i
202 |         = a_i x_i + (1 - a_i) D_i
203 | 
204 | if c_i == 0:
205 | 	x_{i+1} = x_i + (sigma_{i+1} - sigma_i) dx_i   (Euler)
206 |  */
207 | int solver_dpmpp2m_step(Solver* S, float t, LocalTensor* x)
208 | {
209 | 	LocalTensor *vars = solver_tmp_get_resize(S, 1,1,1,1);
210 | 	LocalTensor *dprev = solver_tmp_get_resize_like(S, x);
211 | 
212 | 	float a = t / S->t,
213 | 		  h = -log(a),
214 | 		  h_last = vars->d[0],
215 | 		  c = h / (2*h_last);
216 | 
217 | 	if (S->i_step == 0 || !(t > 0))  //first or last step
218 | 		c = 0;
219 | 
220 | 	TRYR( solver_dxdt(S, S->t, x, &S->dx) );
221 | 	ltensor_for(*x,i,0) {
222 | 		float d0 = x->d[i] - S->t * S->dx.d[i],
223 | 		      d1 = dprev->d[i],
224 | 		      d  = (1+c) * d0 - c * d1;
225 | 		x->d[i] = a * x->d[i] + (1-a) * d;
226 | 		dprev->d[i] = d0;
227 | 	}
228 | 
229 | 	vars->d[0] = h;
230 | 	return 1;
231 | }
232 | 
233 | const SolverClass g_solver_dpmpp2m = {
234 | 	.step = solver_dpmpp2m_step,
235 | 	.n_fe = 1,
236 | 	.name = "dpmpp2m",
237 | };
238 | 
239 | /* DPM++(2S)
240 |  * Ref.: Lu et al. 2023 "DPM-Solver++ ..." Algo1
241 |  * Ref.: k-diffusion/sampling.py  sample_dpmpp_2s_ancestral
242 |  * Should be used with ancestral sampling.
243 | 
244 | Check DPM++(2M) first.
245 | 
246 | lambda_i = -log(sigma_i)
247 | 
248 | s_i = sqrt(sigma_{i+1} sigma_i)   From k-diffusion r=1/2
249 | 
250 | a'_i = s_i / sigma_i
251 | h'_i = -log(a'_i)
252 | d_i  = x_i - sigma_i dx_i
253 | 
254 | x'_i = a'_i x_i + (1 - a'_i) d_i
255 | 	 = x_i + (s_i - sigma_i) dx_i
256 | 
257 | d'_i = x'_i - s_i dx'_i
258 | 
259 | a_i = sigma_{i+1} / sigma_i
260 | h_i = -log(a_i)
261 | 
262 | x_{i+1} = a_i x_i + (1 - a_i) d'_i
263 |  */
264 | int solver_dpmpp2s_step(Solver* S, float t, LocalTensor* x)
265 | {
266 | 	LocalTensor *x1  = solver_tmp_get_resize_like(S, x);
267 | 	LocalTensor *dx1 = solver_tmp_get_resize_like(S, x);
268 | 	
269 | 	TRYR( solver_dxdt(S, S->t, x, &S->dx) );
270 | 
271 | 	if (!(t > 0)) {  //last step: just euler
272 | 		float dt = t - S->t;
273 | 		ltensor_for(*x,i,0) x->d[i] += S->dx.d[i] * dt;
274 | 	}
275 | 	else {
276 | 		float t1 = sqrt(t * S->t),
277 | 			  dt1 = t1 - S->t,
278 | 			  a = t / S->t;
279 | 
280 | 		ltensor_for(*x,i,0) x1->d[i] = x->d[i] + S->dx.d[i] * dt1;
281 | 	
282 | 		TRYR( solver_dxdt(S, t1, x1, dx1) );
283 | 		ltensor_for(*x,i,0) {
284 | 			float d = x1->d[i] - t1 * dx1->d[i];
285 | 			x->d[i] = a * x->d[i] + (1-a) * d;
286 | 		}
287 | 	}
288 | 
289 | 	return 1;
290 | }
291 | 
292 | const SolverClass g_solver_dpmpp2s = {
293 | 	.step = solver_dpmpp2s_step,
294 | 	.n_fe = 2,
295 | 	.name = "dpmpp2s",
296 | };
297 | 


--------------------------------------------------------------------------------
/src/solvers.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Initial value problem (IVP) solvers.
 5 |  * Used as sampling methods for generative diffusion models.
 6 |  *
 7 |  * Example:
 8 | 
 9 | int dxdt(Solver* S, float t, const LocalTensor* x, LocalTensor* dx) {
10 | 	unsigned n = x->n[0];
11 | 	for (unsigned i=1; i+1<n; ++i)
12 | 		dx->d[i] = (x->d[i-1] -2*x->d[i] + x->d[i+1]) / 4;
13 | 	dx->d[0] = dx->d[n-1] = 0;
14 | 	return 1;
15 | }
16 | 
17 | void solve() {
18 | 	// Set solver
19 | 	Solver sol={ .C=&solver_euler };
20 | 	// Set initial time
21 | 	sol.t = 0; 
22 | 	// Set initial state
23 | 	ltensor_resize_zero(&sol.x, 100,1,1,1);
24 | 	sol.x.d[50] = 1;
25 | 	// Set differential equation
26 | 	sol.dxdt = dxdt;
27 | 	// Solve until t_end=10
28 | 	for (float dt=0.1, t_end=10, t=dt; t<=t_end; t+=dt)
29 | 		TRY( solver_step(&sol, t) );
30 | 	// Do something here with the result in sol.x .
31 | 	// You may reuse the solver by setting i_step to zero.
32 | 	// Free memory
33 | 	solver_free(&sol);
34 | }
35 |  */
36 | #pragma once
37 | #include "localtensor.h"
38 | 
39 | struct Solver;
40 | 
41 | typedef struct {
42 | 	int (*step)(struct Solver*, float dt, LocalTensor* x);
43 | 	int n_fe;  //number of calls to dxdt per step
44 | 	const char *name;
45 | } SolverClass;
46 | 
47 | // Default methods
48 | extern const SolverClass g_solver_euler;
49 | extern const SolverClass g_solver_heun;
50 | extern const SolverClass g_solver_taylor3;
51 | extern const SolverClass g_solver_dpmpp2m;
52 | extern const SolverClass g_solver_dpmpp2s;
53 | 
54 | enum {
55 | 	SOLVER_METHOD_EULER		= 1,
56 | 	SOLVER_METHOD_HEUN		= 2,
57 | 	SOLVER_METHOD_TAYLOR3	= 3,
58 | 	SOLVER_METHOD_DPMPP2M	= 4,
59 | 	SOLVER_METHOD_DPMPP2S	= 5,
60 | };
61 | 
62 | const SolverClass* solver_class_get(int idx);  //idx >= 1
63 | const SolverClass* solver_class_find(const char* name);
64 | 
65 | typedef struct Solver {
66 | 	const SolverClass *C;  // Fill before using
67 | 
68 | 	// State
69 | 	LocalTensor dx,
70 | 	            tmp[8];  //vector, temporal tensors
71 | 	float t;
72 | 	unsigned i_step, i_tmp;
73 | 
74 | 	// Config (fill before use)
75 | 	int (*dxdt)(struct Solver*, float t, const LocalTensor* x, LocalTensor* dx);
76 | 	void *user;
77 | } Solver;
78 | 
79 | void solver_free(Solver* S);
80 | 
81 | int solver_step(Solver* S, float t, LocalTensor* x);
82 | 
83 | static inline
84 | int solver_dxdt(Solver* S, float t, const LocalTensor* x, LocalTensor* dx)
85 | {
86 | 	assert( ltensor_shape_equal(x, dx) );
87 | 	int r = S->dxdt(S, t, x, dx);
88 | 	if (r < 0) return r;
89 | 	return r;
90 | }
91 | 


--------------------------------------------------------------------------------
/src/str_match_util.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: Zlib
 3 |  */
 4 | #pragma once
 5 | #include <stdint.h>
 6 | #include "ccommon/unicode.h"
 7 | #include "ccommon/unicode_data.h"
 8 | 
 9 | static inline
10 | int chr_ascii_space_is(int c) {
11 | 	return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\v'
12 | 		|| c == '\f';
13 | }
14 | 
15 | static inline
16 | int chr_ascii_lower(int c) {
17 | 	return ('A' <= c && c <= 'Z') ? (c+'a'-'A') : c;
18 | }
19 | 
20 | static inline
21 | void str_ascii_lower(char* cur, const char* end) {
22 | 	for (; cur<end; ++cur)
23 | 		if ('A' <= *cur && *cur <= 'Z') *cur += 'a' - 'A';
24 | }
25 | 
26 | static inline
27 | int str_match_advance_multiple(const char** pcur, const char* end, int b_lower,
28 | 	const char** str_list)
29 | {
30 | 	for (int idx=0; str_list[idx]; ++idx) {
31 | 		const char *str = str_list[idx];
32 | 		const char *cur = *pcur;
33 | 		for (; cur < end && *str; ++cur, ++str) {
34 | 			int c = b_lower ? chr_ascii_lower(*cur) : *cur;
35 | 			if (c != *str) break;
36 | 		}
37 | 		if (*str == 0) {  // Match
38 | 			*pcur = cur;
39 | 			return idx;
40 | 		}
41 | 	}
42 | 	return -1;
43 | }
44 | #define str_match_advance_multiple(PCUR, END, LOWER, ...) \
45 | 	str_match_advance_multiple((PCUR), (END), (LOWER), \
46 | 		(const char*[]){__VA_ARGS__, NULL})
47 | 
48 | static inline
49 | const char* str_unicode_space_skip(const char* cur, const char* end)
50 | {
51 | 	while (cur < end) {
52 | 		if (chr_ascii_space_is(*cur)) {
53 | 			cur++;
54 | 			continue;
55 | 		}
56 | 		const char *prev = cur;
57 | 		uint32_t cp = utf8_decode_next(&cur, end);
58 | 		int cat = unicode_category_major(cp);
59 | 		if (cat != 'Z') {
60 | 			cur = prev;
61 | 			break;
62 | 		}
63 | 	}
64 | 	return cur;
65 | }
66 | 


--------------------------------------------------------------------------------
/src/tae.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2024-2025, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  */
  4 | #include "tae.h"
  5 | #include "mlblock_nn.h"
  6 | 
  7 | #define T  1  //true
  8 | #define F  0  //false
  9 | #define MLN(NAME,X)  mlctx_tensor_add(C, (NAME), (X))
 10 | 
 11 | // The GGML scheduler have problems with inplace operations (2024-07-13)
 12 | #if USE_GGML_SCHED
 13 | 	#define ggml_relu_inplace  ggml_relu
 14 | 	#define ggml_tanh_inplace  ggml_tanh
 15 | #endif
 16 | 
 17 | const SdTaeParams g_sdtae_sd1 = {
 18 | 	.ch_x     = 3,
 19 | 	.ch_inner = 64,
 20 | 	.ch_z     = 4,
 21 | 	.n_blk    = 3,
 22 | };
 23 | 
 24 | MLTensor* mlb_sdtae_block(MLCtx* C, MLTensor* x, int ch_out)
 25 | {
 26 | 	MLTensor *x0=x;
 27 | 	mlctx_block_begin(C);
 28 | 	int ch_in = x->ne[2];
 29 | 	x = MLN("conv.0", mlb_nn_conv2d(C, x, ch_out, 3,3, 1,1, 1,1, 1,1, T));
 30 | 	x = ggml_relu_inplace(C->cc, x);
 31 | 	x = MLN("conv.2", mlb_nn_conv2d(C, x, ch_out, 3,3, 1,1, 1,1, 1,1, T));
 32 | 	x = ggml_relu_inplace(C->cc, x);
 33 | 	x = MLN("conv.4", mlb_nn_conv2d(C, x, ch_out, 3,3, 1,1, 1,1, 1,1, T));
 34 | 	if (ch_in != ch_out)
 35 | 		x0 = MLN("skip", mlb_nn_conv2d(C, x0, ch_out, 1,1, 1,1, 1,1, 1,1, T));
 36 | 	x = ggml_add(C->cc, x, x0);
 37 | 	x = ggml_relu_inplace(C->cc, x);
 38 | 	return x;
 39 | }
 40 | 
 41 | #define IDX2NAME(I)  (sprintf(name, "%d", (I)), name)
 42 | 
 43 | MLTensor* mlb_sdtae_encoder(MLCtx* C, MLTensor* x, const SdTaeParams* P)
 44 | {
 45 | 	int iblk=0;
 46 | 	char name[32];
 47 | 	mlctx_block_begin(C);
 48 | 
 49 | 	x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x,
 50 | 		P->ch_inner, 3,3, 1,1, 1,1, 1,1, true));
 51 | 	x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner));
 52 | 	
 53 | 	for (int j=0; j<3; ++j) {
 54 | 		x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x,
 55 | 			P->ch_inner, 3,3, 2,2, 1,1, 1,1, false));
 56 | 		for (int i=0; i<P->n_blk; ++i)
 57 | 			x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner));
 58 | 	}
 59 | 	
 60 | 	x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x,
 61 | 		P->ch_z, 3,3, 1,1, 1,1, 1,1, true));
 62 | 	return x;
 63 | }
 64 | 
 65 | MLTensor* mlb_sdtae_decoder(MLCtx* C, MLTensor* x, const SdTaeParams* P)
 66 | {
 67 | 	int iblk=0;
 68 | 	char name[32];
 69 | 	mlctx_block_begin(C);
 70 | 
 71 | 	x = ggml_scale(C->cc, x, 1.0f / 3.0f);
 72 |     x = ggml_tanh_inplace(C->cc, x);
 73 |     x = ggml_scale(C->cc, x, 3.0f);
 74 | 	
 75 | 	x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x,
 76 | 		P->ch_inner, 3,3, 1,1, 1,1, 1,1, true));
 77 | 	x = ggml_relu_inplace(C->cc, x);  iblk++;
 78 | 	
 79 | 	for (int j=0; j<3; ++j) {
 80 | 		for (int i=0; i<P->n_blk; ++i)
 81 | 			x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner));
 82 | 		x = ggml_upscale(C->cc, x, 2, GGML_SCALE_MODE_NEAREST);  iblk++;
 83 | 		x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x,
 84 | 			P->ch_inner, 3,3, 1,1, 1,1, 1,1, false));
 85 | 	}
 86 | 	
 87 | 	x = MLN(IDX2NAME(iblk++), mlb_sdtae_block(C, x, P->ch_inner));
 88 | 	x = MLN(IDX2NAME(iblk++), mlb_nn_conv2d(C, x,
 89 | 		P->ch_x, 3,3, 1,1, 1,1, 1,1, true));
 90 | 
 91 | 	return x;
 92 | }
 93 | 
 94 | int sdtae_encode(MLCtx* C, const SdTaeParams* P,
 95 | 	const LocalTensor* img, LocalTensor* latent)
 96 | {
 97 | 	int R=1;
 98 | 	
 99 | 	const int f = 8;  //latent to image scale (8 for SD)
100 | 	if (!(img->n[0]%f==0 && img->n[1]%f==0 && img->n[2]==3 && img->n[3]==1))
101 | 		ERROR_LOG(-1, "invalid input image shape: " LT_SHAPE_FMT,
102 | 			LT_SHAPE_UNPACK(*img));
103 | 	
104 | 	mlctx_begin(C, "TAE encode");
105 | 	
106 | 	MLTensor *input = mlctx_input_new(C, "img", GGML_TYPE_F32,
107 | 		LT_SHAPE_UNPACK(*img) );
108 | 	MLTensor *output = mlb_sdtae_encoder(C, input, P);
109 | 	mlctx_tensor_add(C, "encoder.layers", output);
110 | 
111 | 	TRY( mlctx_run(C, latent, img) );
112 | 
113 | end:
114 | 	return R;
115 | }
116 | 
117 | int sdtae_decode(MLCtx* C, const SdTaeParams* P,
118 | 	const LocalTensor* latent, LocalTensor* img)
119 | {
120 | 	int R=1;
121 | 
122 | 	TRY( ltensor_shape_check_log(latent, "latent", 0,0,4,1) );
123 | 	
124 | 	mlctx_begin(C, "TAE decode");
125 | 	
126 | 	MLTensor *input = mlctx_input_new(C, "latent", GGML_TYPE_F32,
127 | 		LT_SHAPE_UNPACK(*latent));
128 | 	MLTensor *output = mlb_sdtae_decoder(C, input, P);
129 | 	mlctx_tensor_add(C, "decoder.layers", output);
130 | 
131 | 	TRY( mlctx_run(C, img, latent) );
132 | 
133 | end:
134 | 	mlctx_end(C);
135 | 	return R;
136 | }
137 | 


--------------------------------------------------------------------------------
/src/tae.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Tiny auto-encoder
 5 |  *
 6 |  * References:
 7 |  *   https://github.com/madebyollin/taesd/blob/main/taesd.py 
 8 |  */
 9 | #pragma once
10 | #include "mlblock.h"
11 | #include "localtensor.h"
12 | 
13 | typedef struct {
14 | 	int ch_x, ch_inner, ch_z, n_blk;
15 | } SdTaeParams;
16 | 
17 | extern const SdTaeParams g_sdtae_sd1;
18 | 
19 | MLTensor* mlb_sdtae_encoder(MLCtx* C, MLTensor* x, const SdTaeParams* P);
20 | 
21 | MLTensor* mlb_sdtae_decoder(MLCtx* C, MLTensor* x, const SdTaeParams* P);
22 | 
23 | /*static inline
24 | void sdtae_encoder_post(LocalTensor* out, const LocalTensor* latent)
25 | {	// [0,1] -> [-1,1]
26 | 	ltensor_resize_like(out, latent);
27 | 	ltensor_for(*out,i,0) out->d[i] = latent->d[i]*2 -1;
28 | }
29 | 
30 | static inline
31 | void sdtae_decoder_pre(LocalTensor* out, const LocalTensor* latent)
32 | {	// [-1,1] -> [0,1]
33 | 	ltensor_resize_like(out, latent);
34 | 	ltensor_for(*out,i,0) out->d[i] = (latent->d[i]+1)/2;
35 | }*/
36 | 
37 | int sdtae_encode(MLCtx* C, const SdTaeParams* P,
38 | 	const LocalTensor* img, LocalTensor* latent);
39 | 
40 | int sdtae_decode(MLCtx* C, const SdTaeParams* P,
41 | 	const LocalTensor* latent, LocalTensor* img);
42 | 


--------------------------------------------------------------------------------
/src/tensor_name_conv.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Conversions from the multiple model tensor naming schemes to internal names.
 5 |  */
 6 | #pragma once
 7 | #include "ccommon/vector.h"
 8 | #include "ccommon/strslice.h"
 9 | 
10 | int tnconv_sd(StrSlice name, DynStr *out);
11 | 
12 | enum tensor_name_convert_result_t {
13 | 	TNCONV_R_UNUSED = 0,
14 | 	TNCONV_R_GOOD = 1,
15 | 	TNCONV_R_QKV_PROJ = 2,
16 | };
17 | 


--------------------------------------------------------------------------------
/src/test_common.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2025, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  */
 4 | #pragma once
 5 | #include <stdio.h>
 6 | #include <stdint.h>
 7 | #include <stdlib.h>
 8 | #include <string.h>
 9 | 
10 | #define log(...) do { \
11 | 	printf(__VA_ARGS__); \
12 | 	printf("\n"); \
13 | } while (0)
14 | 
15 | #define error(...) do { \
16 | 	printf("ERROR "); \
17 | 	printf(__VA_ARGS__); \
18 | 	printf("\n"); \
19 | 	printf("TEST FAIL " __FILE__ "\n"); \
20 | 	exit(1); \
21 | } while (0)
22 | 
23 | #ifdef NDEBUG
24 | #define debug(...)
25 | #else
26 | #define debug(...) do { \
27 | 	printf("DEBUG "); \
28 | 	printf(__VA_ARGS__); \
29 | 	printf("\n"); \
30 | } while (0)
31 | #endif
32 | 
33 | #define assert_int(A, B, ...) do { \
34 | 	int a = (A), b = (B); \
35 | 	if (a != b) error(__VA_ARGS__); \
36 | } while(0)
37 | 


--------------------------------------------------------------------------------
/src/test_prompt_preproc.c:
--------------------------------------------------------------------------------
  1 | /* Copyright 2025, Alejandro A. García <aag@zorzal.net>
  2 |  * SPDX-License-Identifier: MIT
  3 |  *
  4 |  * Test of the prompt preprocessing.
  5 |  */
  6 | #include "prompt_preproc.h"
  7 | #include "test_common.h"
  8 | 
  9 | typedef struct {
 10 | 	const char *text;
 11 | 	float w;
 12 | } TestChunk;
 13 | 
 14 | typedef struct {
 15 | 	unsigned n_chunk, n_lora;
 16 | 	TestChunk *chunks;
 17 | 	TestChunk *loras;
 18 | } TestPrompt;
 19 | 
 20 | #define assert_chunk(A, B, ...) do { \
 21 | 	const struct PromptTextChunk a = (A); \
 22 | 	const TestChunk b = (B); \
 23 | 	if (strsl_cmpz(a.text, b.text) || a.w != b.w) { \
 24 | 		DynStr errstr=NULL; \
 25 | 		dstr_printf(errstr, "'%.*s' %g != '%s' %g", (int)strsl_len(a.text), \
 26 | 			strsl_begin(a.text), a.w, b.text, b.w); \
 27 | 		error(__VA_ARGS__); \
 28 | 	} \
 29 | } while(0)
 30 | 
 31 | #define assert_lora(A, B, ...) do { \
 32 | 	const struct PromptTextLora a = (A); \
 33 | 	const TestChunk b = (B); \
 34 | 	if (strsl_cmpz(a.name, b.text) || a.w != b.w) { \
 35 | 		DynStr errstr=NULL; \
 36 | 		dstr_printf(errstr, "'%.*s' %g != '%s' %g", (int)strsl_len(a.name), \
 37 | 			strsl_begin(a.name), b.w, b.text, b.w); \
 38 | 		error(__VA_ARGS__); \
 39 | 	} \
 40 | } while(0)
 41 | 
 42 | #define CHUNKS(...) \
 43 | 	.n_chunk=sizeof((TestChunk[]){__VA_ARGS__, {0}})/sizeof(TestChunk)-1, \
 44 | 	.chunks=(TestChunk[]){__VA_ARGS__, {0}}
 45 | 
 46 | #define LORAS(...) \
 47 | 	.n_lora=sizeof((TestChunk[]){__VA_ARGS__, {0}})/sizeof(TestChunk)-1, \
 48 | 	.loras=(TestChunk[]){__VA_ARGS__, {0}}
 49 | 
 50 | #define TEST(TEXT, ...) \
 51 | 	test((TEXT), (TestPrompt){__VA_ARGS__})
 52 | 
 53 | static
 54 | void assert_prompt(const PromptText pt, const TestPrompt exp, const char *text)
 55 | {
 56 | 	assert_int( vec_count(pt.chunks), exp.n_chunk,
 57 | 		"in '%s':\nchunks returned: %d, expected: %d", text, a, b);
 58 | 	
 59 | 	assert_int( vec_count(pt.loras), exp.n_lora,
 60 | 		"in '%s':\nloras returned: %d, expected: %d", text, a, b);
 61 | 
 62 | 	for (unsigned i=0; i<exp.n_chunk; ++i) {
 63 | 		assert_chunk(pt.chunks[i], exp.chunks[i],
 64 | 			"in '%s':\nchunk %u: %s", text, i, errstr);
 65 | 	}
 66 | 
 67 | 	for (unsigned i=0; i<exp.n_lora; ++i) {
 68 | 		assert_lora(pt.loras[i], exp.loras[i],
 69 | 			"in '%s':\nlora %u: %s", text, i, errstr);
 70 | 	}
 71 | }
 72 | 
 73 | static
 74 | void test(const char* text, const TestPrompt exp)
 75 | {
 76 | 	debug("%s", text);
 77 | 
 78 | 	int r;
 79 | 	PromptText pt={0};
 80 | 	r = prompt_text_set_parse(&pt, strsl_fromz(text));
 81 | 	if (r < 0)
 82 | 		error("prompt_text_set_parse('%s'): 0x%x", text, -r);
 83 | 	
 84 | 	assert_prompt(pt, exp, text);
 85 | 	
 86 | 	prompt_text_free(&pt);
 87 | }
 88 | 
 89 | static
 90 | void test_raw(const char* text)
 91 | {
 92 | 	debug("%s", text);
 93 | 	PromptText pt={0};
 94 | 	prompt_text_set_raw(&pt, strsl_fromz(text));
 95 | 	assert_prompt(pt, (TestPrompt){ CHUNKS({text, 1}) }, text);
 96 | 	prompt_text_free(&pt);
 97 | }
 98 | 
 99 | int main(int argc, char* argv[])
100 | {
101 | 	test_raw("a (dog:1.5) jumping [in] the ((park))");
102 | 
103 | 	// Simple
104 | 	TEST("a dog jumping",
105 | 		CHUNKS({"a dog jumping", 1}));
106 | 	// Emphasis
107 | 	TEST("a (dog) jumping",
108 | 		CHUNKS({"a ", 1}, {"dog", 1.1}, {" jumping", 1}));
109 | 	TEST("a [dog] jumping",
110 | 		CHUNKS({"a ", 1}, {"dog", 1/1.1}, {" jumping", 1}));
111 | 	TEST("a ((dog)) jumping",
112 | 		CHUNKS({"a ", 1}, {"dog", 1.1*1.1}, {" jumping", 1}));
113 | 	TEST("a (dog:1.5) jumping",
114 | 		CHUNKS({"a ", 1}, {"dog", 1.5}, {" jumping", 1}));
115 | 	// Loras
116 | 	TEST("a dog jum<lora:LORA NAME>ping",
117 | 		CHUNKS({"a dog jumping", 1}),
118 | 		LORAS({"LORA NAME", 1}));
119 | 	TEST("a dog jum<lora:LORA NAME:0.8>ping",
120 | 		CHUNKS({"a dog jumping", 1}),
121 | 		LORAS({"LORA NAME", 0.8}));
122 | 	// Escapes
123 | 	TEST("a \\(dog\\) jumping",
124 | 		CHUNKS({"a (dog) jumping", 1}));
125 | 	TEST("a dog jum\\<lora:LORA NAME>ping",
126 | 		CHUNKS({"a dog jum<lora:LORA NAME>ping", 1}));
127 | 
128 | 	log("TEST OK "__FILE__);
129 | 	return 0;
130 | }
131 | 


--------------------------------------------------------------------------------
/src/test_rng.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Utility to test the Philox RNG.
 5 |  */
 6 | #include "ccommon/timing.h"
 7 | #include "ccommon/rng_philox.h"
 8 | #include <stdlib.h>
 9 | #include <stdio.h>
10 | 
11 | /* Seed: 0, Offset: 0, n: 12
12 |  -0.92466259
13 |  -0.42534414
14 |  -2.64384580
15 |   0.14518388
16 |  -0.12086648
17 |  -0.57972562
18 |  -0.62285119
19 |  -0.32838708
20 |  -1.07454228
21 |  -0.36314407
22 |  -1.67105067
23 |   2.26550508
24 | */
25 | 
26 | int main(int argc, char* argv[])
27 | {
28 | 	RngPhilox rng={0};
29 | 	unsigned n=12;
30 | 	
31 | 	if (argc > 1) rng.seed = strtoull(argv[1], NULL, 10);
32 | 	if (argc > 2) rng.offset = strtoul(argv[2], NULL, 10);
33 | 	if (argc > 3) n = strtoul(argv[3], NULL, 10);
34 | 
35 | 	float *out = malloc(sizeof(float)*n);
36 | 	if (!out) { printf("out of memory\n"); return 1; }
37 | 	
38 | 	double t = timing_time();
39 | 	rng_philox_randn(&rng, n, out);
40 | 	t = timing_time() - t;
41 | 	fprintf(stderr, "%d numbers in %.3fms (%.3fns/num)\n", n, t*1e3, t*1e9/n);
42 | 	for (unsigned i=0; i<n; ++i) printf("%12.8f\n", out[i]);
43 | 	
44 | 	return 0;
45 | }
46 | 


--------------------------------------------------------------------------------
/src/test_text_tokenize_clip.c:
--------------------------------------------------------------------------------
 1 | /* Copyright 2025, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Test of the CLIP tokenizer.
 5 |  */
 6 | #include "mlimgsynth.h"
 7 | #include "test_common.h"
 8 | 
 9 | #define TEST(TEXT, ...) \
10 | 	test(ctx, (TEXT), (const int32_t[]){__VA_ARGS__, -1})
11 | 
12 | #define TEST_EMPTY(TEXT) \
13 | 	test(ctx, (TEXT), (const int32_t[]){-1})
14 | 
15 | static
16 | void test(MLIS_Ctx* ctx, const char* text, const int32_t* expected)
17 | {
18 | 	debug("%s", text);
19 | 
20 | 	int32_t *tokens=NULL;
21 | 	int r = mlis_text_tokenize(ctx, text, &tokens, MLIS_SUBMODEL_CLIP);
22 | 	if (r < 0)
23 | 		error("mlis_tokenize('%s'): 0x%x", text, -r);
24 | 	
25 | 	int i;
26 | 	for (i=0; i<r; ++i) {
27 | 		if (tokens[i] != expected[i])
28 | 			error("in '%s':\ntoken[%d] = %d != %d",
29 | 				text, i, tokens[i], expected[i]);
30 | 	}
31 | 	if (expected[i] != -1)
32 | 		error("in '%s':\n%d tokens returned, but expected more", text, r);
33 | }
34 | 
35 | int main(int argc, char* argv[])
36 | {
37 | 	MLIS_Ctx *ctx = mlis_ctx_create();
38 | 	mlis_option_set(ctx, MLIS_OPT_MODEL_TYPE, MLIS_MODEL_TYPE_SD1);
39 | 
40 | 	// Simple
41 | 	TEST("a dog jumping", 320, 1929, 11476);
42 | 	// Superflous spacing
43 | 	TEST("   a   dog\t\tjumping\r\n", 320, 1929, 11476);
44 | 	// Merges are important
45 | 	TEST("an illustration", 550, 6052);  
46 | 	// Quotes
47 | 	TEST("a sign saying \"Here lies Cesar\"", 320, 2292, 4455, 257, 763, 3205, 28603, 257);
48 | 	TEST("a sign saying 'Here lies Cesar'", 320, 2292, 4455, 262, 763, 3205, 28603, 262);
49 | 	// Number
50 | 	TEST("2025", 17, 15, 17, 276);
51 | 	// English contractions
52 | 	TEST("A'veA'llA's", 320, 1200, 320, 1342, 320, 568);
53 | 	// Empty
54 | 	TEST_EMPTY("");
55 | 	// Space only
56 | 	TEST_EMPTY("  \t  \n");
57 | 	// Puntuation
58 | 	TEST("a dog, a house.", 320, 1929, 267, 320, 1212, 269);
59 | 	// UTF-8
60 | 	TEST("coraz\xc3\xb3n", 851, 854, 13926);
61 | 	// Unicode dash in between ascii ones
62 | 	TEST("cat---dog-\xe2\x80\x94-rabbit", 2368, 11079, 1929, 12, 6718, 268, 10274);
63 | 	// Unicode word split. Japanese: "Maa, machinanasai."
64 | 	TEST("\xe3\x81\xbe\xe3\x81\x82\xe3\x80\x81\xe3\x81\x8a\xe5\xbe\x85\xe3\x81\xa1\xe3\x81\xaa\xe3\x81\x95\xe3\x81\x84\xe3\x80\x82", 4813, 122, 4813, 480, 45262, 4813, 232, 161, 122, 227, 4813, 94, 29104, 4813, 243, 38850, 38000);
65 | 	// Long text. Split words.
66 | 	TEST("Stable Diffusion is a deep learning, text-to-image model released in 2022 based on diffusion techniques.", 10492, 18656, 9364, 533, 320, 3383, 2378, 267, 4160, 268, 531, 268, 2867, 2863, 3410, 530, 17, 15, 17, 273, 2812, 525, 18656, 9364, 1782, 697, 7715, 269);
67 | 
68 | 	log("TEST OK "__FILE__);
69 | 	mlis_ctx_destroy(&ctx);
70 | 	return 0;
71 | }
72 | 


--------------------------------------------------------------------------------
/src/unet.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * UNet implementation for denoising in SD.
 5 |  */
 6 | #pragma once
 7 | #include "mlblock.h"
 8 | #include "localtensor.h"
 9 | 
10 | typedef struct {
11 | 	int n_ch_in;
12 | 	int n_ch_out;
13 | 	int n_res_blk;
14 | 	int attn_res[4];
15 | 	int ch_mult[5];
16 | 	int transf_depth[5];
17 | 	int n_te;  //time embedding dimensions
18 | 	int n_head;
19 | 	int d_head;
20 | 	int n_ctx;
21 | 	int n_ch;
22 | 	int ch_adm_in;
23 | 
24 | 	unsigned clip_norm:1,
25 | 	         cond_label:1,
26 | 	         uncond_empty_zero:1,
27 | 			 vparam:1;
28 | 	
29 | 	int n_step_train;
30 | 	float sigma_min;
31 | 	float sigma_max;
32 | 	float *log_sigmas;  //[n_step_train]
33 | } UnetParams;
34 | 
35 | extern const UnetParams g_unet_sd1;		//SD 1.x
36 | extern const UnetParams g_unet_sd2;		//SD 2.x
37 | extern const UnetParams g_unet_sdxl;	//SDXL
38 | //extern const UnetParams g_unet_svd;		//SVD (stable video diffusion)
39 | 
40 | MLTensor* mlb_unet_denoise(MLCtx* C, MLTensor* x, MLTensor* time, MLTensor* c,
41 | 	MLTensor* label, const UnetParams* P);
42 | 
43 | void unet_params_init();  //fill global log_sigmas
44 | 
45 | float unet_sigma_to_t(const UnetParams* P, float sigma);
46 | 
47 | float unet_t_to_sigma(const UnetParams* P, float t);
48 | 
49 | typedef struct {
50 | 	MLCtx *ctx;
51 | 	const UnetParams *par;
52 | 	unsigned nfe, split:1;
53 | } UnetState;
54 | 
55 | int unet_denoise_init(UnetState* S, MLCtx* C, const UnetParams* P,
56 | 	unsigned lw, unsigned lh, bool split);
57 | 
58 | int unet_denoise_run(UnetState* S,
59 | 	const LocalTensor* x, const LocalTensor* cond, const LocalTensor* label,
60 | 	float sigma, LocalTensor* dx);
61 | 


--------------------------------------------------------------------------------
/src/vae.h:
--------------------------------------------------------------------------------
 1 | /* Copyright 2024, Alejandro A. García <aag@zorzal.net>
 2 |  * SPDX-License-Identifier: MIT
 3 |  *
 4 |  * Variational auto-encoder.
 5 |  */
 6 | #pragma once
 7 | #include "mlblock.h"
 8 | #include "localtensor.h"
 9 | 
10 | typedef struct {
11 | 	int ch_x,
12 | 	    ch_z,
13 | 		ch,
14 | 		n_res,
15 | 		n_res_blk,
16 | 		ch_mult[5],
17 | 		d_embed,
18 | 		f_down;  //downsampling total factor
19 | 	float scale_factor;
20 | } VaeParams;
21 | 
22 | extern const VaeParams g_vae_sd1;	//SD 1.x & 2.x
23 | extern const VaeParams g_vae_sdxl;	//SDXL
24 | 
25 | MLTensor* mlb_sdvae_encoder(MLCtx* C, MLTensor* x, const VaeParams* P);
26 | 
27 | MLTensor* mlb_sdvae_decoder(MLCtx* C, MLTensor* x, const VaeParams* P);
28 | 
29 | void sdvae_latent_mean(LocalTensor* latent, const LocalTensor* moments,
30 | 	const VaeParams* P);
31 | 
32 | void sdvae_latent_sample(LocalTensor* latent, const LocalTensor* moments,
33 | 	const VaeParams* P);
34 | 
35 | static inline
36 | void sdvae_encoder_pre(LocalTensor* out, const LocalTensor* img)
37 | {	// [0,1] -> [-1,1]
38 | 	ltensor_resize_like(out, img);
39 | 	ltensor_for(*out,i,0) out->d[i] = img->d[i]*2 -1;
40 | }
41 | 
42 | static inline
43 | void sdvae_decoder_post(LocalTensor* out, const LocalTensor* img)
44 | {	// [-1,1] -> [0,1]
45 | 	ltensor_resize_like(out, img);
46 | 	ltensor_for(*out,i,0) out->d[i] = (img->d[i]+1)/2;
47 | }
48 | 
49 | int sdvae_encode(MLCtx* C, const VaeParams* P,
50 | 	const LocalTensor* img, LocalTensor* latent, int tile_px);
51 | 
52 | int sdvae_decode(MLCtx* C, const VaeParams* P,
53 | 	const LocalTensor* latent, LocalTensor* img, int tile_px);
54 | 


--------------------------------------------------------------------------------