├── .github ├── FUNDING.yml ├── workflows │ ├── pre-commit.yml │ └── build-publish.yml ├── ISSUE_TEMPLATE │ ├── question.md │ ├── bug.md │ └── proposal.md ├── PULL_REQUEST_TEMPLATE.md └── stale.yml ├── tinyscaler-text.png ├── MANIFEST.in ├── .pre-commit-config.yaml ├── src ├── scaler.h ├── tinyscaler.pyx └── scaler.c ├── setup.py ├── LICENSE ├── pyproject.toml ├── examples ├── benchmark_area.py └── benchmark_bilinear.py └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | github: Farama-Foundation 2 | -------------------------------------------------------------------------------- /tinyscaler-text.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Farama-Foundation/TinyScaler/HEAD/tinyscaler-text.png -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md LICENSE.md 2 | include CMakeLists.txt 3 | include src/tinyscaler.pyx src/scaler.h src/scaler.c 4 | recursive-exclude examples/ 5 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | --- 2 | repos: 3 | - repo: https://github.com/psf/black 4 | rev: 23.3.0 5 | hooks: 6 | - id: black 7 | - repo: https://github.com/PyCQA/flake8 8 | rev: 6.0.0 9 | hooks: 10 | - id: flake8 11 | args: 12 | - --max-complexity=30 13 | - --max-line-length=456 14 | - --show-source 15 | - --statistics 16 | - repo: https://github.com/PyCQA/isort 17 | rev: 5.12.0 18 | hooks: 19 | - id: isort 20 | args: ["--profile", "black"] 21 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | # https://pre-commit.com 2 | # This GitHub Action assumes that the repo contains a valid .pre-commit-config.yaml file. 3 | name: pre-commit 4 | on: 5 | pull_request: 6 | push: 7 | branches: [main] 8 | 9 | permissions: 10 | contents: read # to fetch code (actions/checkout) 11 | 12 | jobs: 13 | pre-commit: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v3 17 | - uses: actions/setup-python@v4 18 | - run: pip install pre-commit 19 | - run: pre-commit --version 20 | - run: pre-commit install 21 | - run: pre-commit run --all-files 22 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Question 3 | about: Ask a question 4 | title: "[Question] Question title" 5 | --- 6 | 7 | 8 | ### Question 9 | 10 | If you're a beginner and have basic questions, please ask on [r/reinforcementlearning](https://www.reddit.com/r/reinforcementlearning/) or in the [RL Discord](https://discord.com/invite/xhfNqQv) (if you're new please use the beginners channel). Basic questions that are not bugs or feature requests will be closed without reply, because GitHub issues are not an appropriate venue for these. 11 | 12 | Advanced/nontrivial questions, especially in areas where documentation is lacking, are very much welcome. 13 | -------------------------------------------------------------------------------- /src/scaler.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | #if defined(__x86_64__) 8 | #include // SSE 9 | #elif defined(__arm__) 10 | #include // Neon 11 | #endif 12 | 13 | #define RGBA32F_SIZE 16 // Byte size of a pixel 14 | 15 | #ifndef max 16 | #define max(a,b) (((a) > (b)) ? (a) : (b)) 17 | #define min(a,b) (((a) < (b)) ? (a) : (b)) 18 | #endif 19 | 20 | typedef unsigned char u8; 21 | typedef int i32; 22 | typedef float f32; 23 | typedef double f64; 24 | 25 | // Scalers 26 | void scale_nearest_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height); 27 | void scale_bilinear_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height); 28 | void scale_area_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height); 29 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import platform 2 | 3 | from Cython.Build import cythonize 4 | from setuptools import Extension, setup 5 | 6 | ext_modules = [] 7 | 8 | if platform.system() == "Windows": # Windows 9 | ext_modules = [Extension("tinyscaler", ["src/*.pyx"])] 10 | else: # Not Windows 11 | if platform.machine() in ["x86_64", "arm64", "aarch64"]: # Detect 64-bit platforms 12 | ext_modules = [Extension("tinyscaler", ["src/*.pyx"])] 13 | else: # Arm assumed 14 | ext_modules = [ 15 | Extension( 16 | "tinyscaler", 17 | ["src/*.pyx"], 18 | extra_compile_args=["-mfpu=neon"], 19 | extra_link_args=["-mfpu=neon"], 20 | ) 21 | ] 22 | 23 | setup( 24 | name="tinyscaler", 25 | ext_modules=cythonize( 26 | ext_modules, language_level=3, compiler_directives={"annotation_typing": False} 27 | ), 28 | ) 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug Report 3 | about: Submit a bug report 4 | title: "[Bug Report] Bug title" 5 | 6 | --- 7 | 8 | If you are submitting a bug report, please fill in the following details and use the tag [bug]. 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **Code example** 14 | Please try to provide a minimal example to reproduce the bug. Error messages and stack traces are also helpful. 15 | 16 | **System Info** 17 | Describe the characteristic of your environment: 18 | * Describe how TinyScaler was installed (pip, docker, source, ...) 19 | * What OS/version you're using. 20 | * Python version 21 | 22 | **Additional context** 23 | Add any other context about the problem here. 24 | 25 | ### Checklist 26 | 27 | - [ ] I have checked that there is no similar [issue](https://github.com/Farama-Foundation/TinyScaler/issues) in the repo (**required**) 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/proposal.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Proposal 3 | about: Propose changes that are not fixes bugs 4 | title: "[Proposal] Proposal title" 5 | --- 6 | 7 | 8 | 9 | ### Proposal 10 | 11 | A clear and concise description of the proposal. 12 | 13 | ### Motivation 14 | 15 | Please outline the motivation for the proposal. 16 | Is your feature request related to a problem? e.g.,"I'm always frustrated when [...]". 17 | If this is related to another GitHub issue, please link here too. 18 | 19 | ### Pitch 20 | 21 | A clear and concise description of what you want to happen. 22 | 23 | ### Alternatives 24 | 25 | A clear and concise description of any alternative solutions or features you've considered, if any. 26 | 27 | ### Additional context 28 | 29 | Add any other context or screenshots about the feature request here. 30 | 31 | ### Checklist 32 | 33 | - [ ] I have checked that there is no similar [issue](https://github.com/Farama-Foundation/TinyScaler/issues) in the repo (**required**) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License 2 | 3 | Copyright (c) 2021 Farama Foundation 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 21 | THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # Package ###################################################################### 2 | 3 | [build-system] 4 | requires = ["setuptools", "wheel", "Cython"] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [project] 8 | name = "tinyscaler" 9 | description = "A tiny, simple image scaler." 10 | readme = "README.md" 11 | requires-python = ">= 3.8" 12 | authors = [{ name = "Farama Foundation", email = "contact@farama.org" }] 13 | license = { text = "MIT License" } 14 | keywords = ["Reinforcement Learning", "Gymnasium", "PettingZoo"] 15 | classifiers = [ 16 | "Development Status :: 4 - Beta", # change to `5 - Production/Stable` when ready 17 | "License :: OSI Approved :: MIT License", 18 | "Programming Language :: Python :: 3", 19 | "Programming Language :: Python :: 3.8", 20 | "Programming Language :: Python :: 3.9", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | 'Intended Audience :: Science/Research', 25 | 'Topic :: Scientific/Engineering :: Artificial Intelligence', 26 | ] 27 | version="1.2.8" 28 | dependencies = [ 29 | "numpy >=1.21.0", 30 | ] 31 | 32 | [project.urls] 33 | Homepage = "https://farama.org" 34 | Repository = "https://github.com/Farama-Foundation/TinyScaler" 35 | Documentation = "https://github.com/Farama-Foundation/TinyScaler" 36 | "Bug Report" = "https://github.com/Farama-Foundation/TinyScaler/issues" 37 | -------------------------------------------------------------------------------- /examples/benchmark_area.py: -------------------------------------------------------------------------------- 1 | # Benchmark between tinyscaler, OpenCV, Pillow, and skImage using area filtering 2 | import time 3 | 4 | import cv2 5 | import numpy as np 6 | import tinyscaler 7 | 8 | # Disable multithreading and GPU support for OpenCV for a single-threaded CPU comparison 9 | cv2.setNumThreads(1) 10 | cv2.ocl.setUseOpenCL(False) 11 | 12 | # Number of scales to perform 13 | numScales = 100 14 | 15 | # Loading this image: https://github.com/Cykooz/fast_image_resize/blob/main/data/nasa-4928x3279.png 16 | img8 = cv2.cvtColor(cv2.imread("nasa-4928x3279.png"), cv2.COLOR_BGR2RGBA) 17 | img = (img8 / 255.0).astype(np.float32) # Preferred format 18 | 19 | targetSize = (852, 567) 20 | 21 | dst = np.empty((targetSize[1], targetSize[0], 4), dtype=np.float32) 22 | 23 | start = time.perf_counter() 24 | 25 | for t in range(numScales): 26 | tinyscaler.scale(img, targetSize, mode="area", dst=dst) 27 | 28 | end = time.perf_counter() 29 | 30 | print("Time elapsed for tinyscaler: " + str(end - start)) 31 | 32 | # Save the result from tinyscaler for viewing 33 | cv2.imwrite( 34 | "result.png", cv2.cvtColor((dst * 255.0).astype(np.uint8), cv2.COLOR_RGBA2BGR) 35 | ) 36 | 37 | start = time.perf_counter() 38 | 39 | for t in range(numScales): 40 | cv2.resize(img, targetSize, dst=dst, interpolation=cv2.INTER_AREA) 41 | 42 | end = time.perf_counter() 43 | 44 | cv2.imwrite( 45 | "result_cv.png", cv2.cvtColor((dst * 255.0).astype(np.uint8), cv2.COLOR_RGBA2BGR) 46 | ) 47 | 48 | print("Time elapsed for OpenCV: " + str(end - start)) 49 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Description 2 | 3 | Please include a summary of the change and which issue is fixed. Please also include relevant motivation and context. List any dependencies that are required for this change. 4 | 5 | Fixes # (issue) 6 | 7 | ## Type of change 8 | 9 | Please delete options that are not relevant. 10 | 11 | - [ ] Bug fix (non-breaking change which fixes an issue) 12 | - [ ] New feature (non-breaking change which adds functionality) 13 | - [ ] Breaking change (fix or feature that would cause existing functionality to not work as expected) 14 | - [ ] This change requires a documentation update 15 | 16 | ### Screenshots 17 | Please attach before and after screenshots of the change if applicable. 18 | 19 | 29 | 30 | # Checklist: 31 | 32 | - [ ] I have run the [`pre-commit` checks](https://pre-commit.com/) with `pre-commit run --all-files` (see `CONTRIBUTING.md` instructions to set it up) 33 | - [ ] I have commented my code, particularly in hard-to-understand areas 34 | - [ ] I have made corresponding changes to the documentation 35 | - [ ] My changes generate no new warnings 36 | - [ ] I have added tests that prove my fix is effective or that my feature works 37 | - [ ] New and existing unit tests pass locally with my changes 38 | 39 | 46 | -------------------------------------------------------------------------------- /examples/benchmark_bilinear.py: -------------------------------------------------------------------------------- 1 | # Benchmark between tinyscaler, OpenCV, Pillow, and skImage using bilinear filtering 2 | import time 3 | 4 | import cv2 5 | import numpy as np 6 | import tinyscaler 7 | from PIL import Image 8 | from skimage.transform import resize 9 | 10 | # Disable multithreading and GPU support for OpenCV for a single-threaded CPU comparison 11 | cv2.setNumThreads(1) 12 | cv2.ocl.setUseOpenCL(False) 13 | 14 | # Number of scales to perform 15 | numScales = 100 16 | 17 | # Loading this image: https://github.com/Cykooz/fast_image_resize/blob/main/data/nasa-4928x3279.png 18 | img8 = cv2.cvtColor(cv2.imread("nasa-4928x3279.png"), cv2.COLOR_BGR2RGBA) 19 | img = (img8 / 255.0).astype(np.float32) # Preferred format 20 | 21 | targetSize = (852, 567) 22 | 23 | dst = np.empty((targetSize[1], targetSize[0], 4), dtype=np.float32) 24 | 25 | start = time.perf_counter() 26 | 27 | for t in range(numScales): 28 | tinyscaler.scale(img, targetSize, mode="bilinear", dst=dst) 29 | 30 | end = time.perf_counter() 31 | 32 | print("Time elapsed for tinyscaler: " + str(end - start)) 33 | 34 | # Save the result from tinyscaler for viewing 35 | cv2.imwrite( 36 | "result.png", cv2.cvtColor((dst * 255.0).astype(np.uint8), cv2.COLOR_RGBA2BGR) 37 | ) 38 | 39 | start = time.perf_counter() 40 | 41 | for t in range(numScales): 42 | cv2.resize(img, targetSize, dst=dst, interpolation=cv2.INTER_LINEAR) 43 | 44 | end = time.perf_counter() 45 | 46 | cv2.imwrite( 47 | "result_cv.png", cv2.cvtColor((dst * 255.0).astype(np.uint8), cv2.COLOR_RGBA2BGR) 48 | ) 49 | 50 | print("Time elapsed for OpenCV: " + str(end - start)) 51 | 52 | pimg = Image.fromarray(img8) 53 | 54 | start = time.perf_counter() 55 | 56 | for t in range(numScales): 57 | pimg.resize(targetSize, Image.Resampling.BILINEAR) 58 | 59 | end = time.perf_counter() 60 | 61 | print("Time elapsed for Pillow: " + str(end - start)) 62 | 63 | start = time.perf_counter() 64 | 65 | for t in range(numScales): 66 | resize(img, targetSize) 67 | 68 | end = time.perf_counter() 69 | 70 | print("Time elapsed for skimage: " + str(end - start)) 71 | -------------------------------------------------------------------------------- /.github/stale.yml: -------------------------------------------------------------------------------- 1 | # Configuration for probot-stale - https://github.com/probot/stale 2 | 3 | # Number of days of inactivity before an Issue or Pull Request becomes stale 4 | daysUntilStale: 60 5 | 6 | # Number of days of inactivity before an Issue or Pull Request with the stale label is closed. 7 | # Set to false to disable. If disabled, issues still need to be closed manually, but will remain marked as stale. 8 | daysUntilClose: 14 9 | 10 | # Only issues or pull requests with all of these labels are check if stale. Defaults to `[]` (disabled) 11 | onlyLabels: 12 | - more-information-needed 13 | 14 | # Issues or Pull Requests with these labels will never be considered stale. Set to `[]` to disable 15 | exemptLabels: 16 | - pinned 17 | - security 18 | - "[Status] Maybe Later" 19 | 20 | # Set to true to ignore issues in a project (defaults to false) 21 | exemptProjects: true 22 | 23 | # Set to true to ignore issues in a milestone (defaults to false) 24 | exemptMilestones: true 25 | 26 | # Set to true to ignore issues with an assignee (defaults to false) 27 | exemptAssignees: true 28 | 29 | # Label to use when marking as stale 30 | staleLabel: stale 31 | 32 | # Comment to post when marking as stale. Set to `false` to disable 33 | markComment: > 34 | This issue has been automatically marked as stale because it has not had 35 | recent activity. It will be closed if no further activity occurs. Thank you 36 | for your contributions. 37 | 38 | # Comment to post when removing the stale label. 39 | # unmarkComment: > 40 | # Your comment here. 41 | 42 | # Comment to post when closing a stale Issue or Pull Request. 43 | # closeComment: > 44 | # Your comment here. 45 | 46 | # Limit the number of actions per hour, from 1-30. Default is 30 47 | limitPerRun: 30 48 | 49 | # Limit to only `issues` or `pulls` 50 | only: issues 51 | 52 | # Optionally, specify configuration settings that are specific to just 'issues' or 'pulls': 53 | # pulls: 54 | # daysUntilStale: 30 55 | # markComment: > 56 | # This pull request has been automatically marked as stale because it has not had 57 | # recent activity. It will be closed if no further activity occurs. Thank you 58 | # for your contributions. 59 | 60 | # issues: 61 | # exemptLabels: 62 | # - confirmed -------------------------------------------------------------------------------- /.github/workflows/build-publish.yml: -------------------------------------------------------------------------------- 1 | # This workflow will build and (if release) publish Python distributions to PyPI 2 | # For more information see: 3 | # - https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions 4 | # - https://packaging.python.org/en/latest/guides/publishing-package-distribution-releases-using-github-actions-ci-cd-workflows/ 5 | # 6 | # derived from https://github.com/Farama-Foundation/PettingZoo/blob/e230f4d80a5df3baf9bd905149f6d4e8ce22be31/.github/workflows/build-publish.yml 7 | name: build-publish 8 | 9 | on: 10 | push: 11 | branches: [main] 12 | pull_request: 13 | branches: [main] 14 | release: 15 | types: [published] 16 | 17 | jobs: 18 | matrix-build: 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | matrix: 22 | os: [macos-latest, windows-latest] 23 | python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] 24 | steps: 25 | - uses: actions/checkout@v3 26 | - name: Set up Python 27 | uses: actions/setup-python@v4 28 | with: 29 | python-version: ${{ matrix.python-version }} 30 | - name: Install dependencies 31 | run: python -m pip install --upgrade setuptools wheel cython build 32 | - name: Build distribution 33 | shell: bash 34 | run: python -m build . 35 | - name: Store 36 | uses: actions/upload-artifact@v3 37 | with: 38 | name: artifact 39 | path: dist/* 40 | if-no-files-found: error 41 | 42 | linux-build: 43 | runs-on: ubuntu-latest 44 | steps: 45 | - uses: actions/checkout@v3 46 | - name: Set up Python 47 | uses: actions/setup-python@v4 48 | with: 49 | python-version: "3.x" 50 | - name: Install dependencies 51 | run: python -m pip install --upgrade setuptools wheel cython build 52 | - name: Build manylinux Python wheels 53 | uses: RalfG/python-wheels-manylinux-build@v0.5.0 54 | with: 55 | python-versions: 'cp38-cp38 cp39-cp39 cp310-cp310 cp311-cp311 cp312-cp312' 56 | pip-wheel-args: '-w ./dist --no-deps' 57 | - name: Store 58 | uses: actions/upload-artifact@v3 59 | with: 60 | name: artifact 61 | path: dist/*-manylinux*.whl 62 | if-no-files-found: error 63 | 64 | source-build: 65 | runs-on: ubuntu-latest 66 | steps: 67 | - uses: actions/checkout@v3 68 | - name: Set up Python 69 | uses: actions/setup-python@v4 70 | with: 71 | python-version: "3.x" 72 | - name: Install dependencies 73 | run: python -m pip install --upgrade setuptools wheel cython build 74 | - name: Build source distribution 75 | shell: bash 76 | run: python -m build . --sdist 77 | - name: Store 78 | uses: actions/upload-artifact@v3 79 | with: 80 | name: artifact 81 | path: dist/* 82 | if-no-files-found: error 83 | 84 | publish: 85 | runs-on: ubuntu-latest 86 | needs: [matrix-build, linux-build, source-build] 87 | if: github.event_name == 'release' && github.event.action == 'published' 88 | steps: 89 | - name: Download dists 90 | uses: actions/download-artifact@v3 91 | with: 92 | name: artifact 93 | path: dist 94 | - name: Publish 95 | uses: pypa/gh-action-pypi-publish@release/v1 96 | with: 97 | password: ${{ secrets.PYPI_API_TOKEN }} 98 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | **Aug 11, 2025: This project has been deprecated due to a lack of wide spread community use, and is no longer planned to receive any additional updates or support.** 6 | 7 | Tinyscaler was small CPU image scaling library with SIMD support on x86_64 and Arm (Neon). This project was aimed to replace OpenCV for image resizing, resolving installation inconveniences and compatibility issues. We developed this for future use in Gymnasium and PettingZoo wrappers. 8 | 9 | ## Installation 10 | You can install from PyPI using `pip install tinyscaler`. Linux and macOS with Python >=3.8 are supported. 11 | 12 | ## Usage 13 | Tinyscaler contains a single external function, `scale` that using a numpy array input for the image and the new resized shape, returns the resized image. 14 | 15 | ```python 16 | import numpy as np 17 | import tinyscaler 18 | 19 | img = np.random.rand(64, 64, 4).astype(np.float32) 20 | 21 | resize_img = tinyscaler.scale(img, (32, 32)) 22 | print(resize_img.shape, resize_img.dtype) # (32, 32) np.float32 23 | ``` 24 | 25 | TinyScaler supports mode='area', mode='bilinear', and mode='nearest' filtering. It also allows one to pass a destination buffer in order to avoid duplicate memory allocations. 26 | 27 | Area filtering is only really useful for downscaling, bilinear will be used even when area filtering is set if upscaling. Area filtering is also likely not worth it when downscaling less than or equal to 2x. 28 | 29 | TinyScaler is used through a single function. The full signature is: 30 | 31 | ```python 32 | scale(src : np.ndarray, size : tuple, mode='area', dst : np.ndarray = None) 33 | ``` 34 | 35 | Note that the `size` tuple parameter is (width, height). However, the numpy arrays have dimensions ordered as (height, width, channels). This is similar to OpenCV. 36 | 37 | TinyScaler expects a contiguous numpy array. If it is not contiguous, it will throw an error. You can make a non-contiguous numpy array contiguous by calling `np.ascontiguousarray`. Usually a numpy array will already be contiguous. 38 | 39 | If the final array dimension is not 4 (RGBA), it will automatically convert to it. Further, if the array is uint8, it will be converted to float32. So the prefered array has a shape `(height, width, 4)` and `dtype=np.float32`. 40 | 41 | Finally, downscaling is the focus of TinyScaler. It can also upscale, but it will not be as fast as a more complex separable algorithm in that case. 42 | 43 | ## Performance 44 | In a [simple benchmark](./examples/benchmark.py), we resized the same image (4928x3279) down to (852x567) 100 times using bilinear filtering with several libraries. Here are the times (in seconds) spent (measured with Python's perf_counter) on a AMD 1950x: 45 | 46 | ``` 47 | Time elapsed for tinyscaler: 0.7968465110002398 48 | Time elapsed for OpenCV: 0.48667862100001 49 | Time elapsed for Pillow: 12.672875003999707 50 | Time elapsed for skimage: 164.45401711399973 51 | ``` 52 | 53 | And with area filtering (just TinyScaler and OpenCV): 54 | 55 | ``` 56 | Time elapsed for tinyscaler: 4.34793155800071 57 | Time elapsed for OpenCV: 8.118138265999733 58 | ``` 59 | 60 | All methods were forced to use a single thread. OpenCV is slightly faster than TinyScaler for bilinear filtering, but TinyScaler remains very fast regardless. 61 | 62 | Interestingly, for area filtering, TinyScaler is faster (almost 2x). 63 | 64 | -------------------------------------------------------------------------------- /src/tinyscaler.pyx: -------------------------------------------------------------------------------- 1 | import cython 2 | 3 | 4 | cdef extern from "scaler.c": 5 | pass 6 | 7 | cdef extern from "scaler.h": 8 | ctypedef unsigned char u8; 9 | ctypedef int i32; 10 | ctypedef float f32; 11 | ctypedef double f64; 12 | 13 | void scale_nearest_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) 14 | void scale_bilinear_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) 15 | void scale_area_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) 16 | 17 | import numpy as np 18 | 19 | auto_convert = True # Global controlling whether automatic channel/type conversions take place 20 | 21 | def _scale_4f32(src: np.ndarray, size: tuple, mode: str = 'area', dst: np.ndarray = None) -> np.ndarray: 22 | assert(len(src.shape) == 3 and src.shape[2] == 4) # Must be 4 channel 23 | 24 | if not src.flags['C_CONTIGUOUS']: 25 | src = np.ascontiguousarray(src) 26 | 27 | cdef float[:, :, ::1] src_memview = src 28 | 29 | if dst is None: 30 | dst = np.empty((size[1], size[0], 4), dtype=np.float32) 31 | else: 32 | if len(dst.shape) != 3 or dst.shape[0] != size[1] or dst.shape[1] != size[0] or dst.shape[2] != 4: 33 | raise Exception('Incorrect dst size!') 34 | elif dst.dtype != np.float32: 35 | raise Exception('Incorrect dst type (must be float32)!') 36 | 37 | if not dst.flags['C_CONTIGUOUS']: 38 | dst = np.ascontiguousarray(dst) 39 | 40 | cdef float[:, :, ::1] dst_memview = dst 41 | 42 | if mode == 'bilinear': 43 | scale_bilinear_4f32(&src_memview[0][0][0], &dst_memview[0][0][0], src.shape[1], src.shape[0], size[0], size[1]) 44 | elif mode == 'area': 45 | scale_area_4f32(&src_memview[0][0][0], &dst_memview[0][0][0], src.shape[1], src.shape[0], size[0], size[1]) 46 | else: 47 | scale_nearest_4f32(&src_memview[0][0][0], &dst_memview[0][0][0], src.shape[1], src.shape[0], size[0], size[1]) 48 | 49 | return dst.reshape((size[1], size[0], 4)) 50 | 51 | @cython.binding(True) 52 | def scale(src: np.ndarray, size: tuple, mode: str = 'area', dst: np.ndarray = None) -> np.ndarray: 53 | ''' 54 | scale (resize) a source image to a specified size 55 | 56 | Parameters 57 | ---------- 58 | src : numpy.ndarray 59 | the source array, must have len(src.shape) == 2 or len(src.shape) == 3. Must be contiguous (using numpy.ascontiguousarray if not already). 60 | Ideally (most efficient) the shape is (width, height, 4) with dtype=numpy.float32 (others will cause a conversion to occur) 61 | size : 62 | target size, a tuple of two positive integers (width, height) 63 | mode : {'area', 'bilinear', 'nearest'}, optional 64 | interpolation method to use. Defaults to area, can also be bilinear or nearest 65 | dst : {None}, optional 66 | destination buffer to put resized image in. Leaving this = None will result in an allocation. 67 | For efficient code, set dst to a buffer of shape (size[0], size[1], 4) with dtype=numpy.float32 68 | 69 | Returns 70 | ------- 71 | numpy.ndarray 72 | the scaled image 73 | 74 | Raises 75 | ------ 76 | Exception 77 | when src is not contiguous or the wrong shape 78 | ''' 79 | 80 | if not src.data.contiguous: 81 | raise Exception('Input image must be contiguous! Use np.ascontiguousarray(image) maybe?') 82 | 83 | src_dims = len(src.shape) 84 | src_channels = 4 85 | src_type = src.dtype 86 | 87 | if mode == 'area' and (src.shape[1] < size[0] or src.shape[0] < size[1]): 88 | mode = 'bilinear' # Switch to bilinear if upscaling with area filter, as area filter only applies to downscaling 89 | 90 | # Automatic conversion 91 | if auto_convert: 92 | if len(src.shape) != 2 and len(src.shape) != 3: 93 | raise Exception('Incorrect number of dimensions - need 2 or 3, received ' + str(len(src.shape))) 94 | 95 | if src.dtype != np.float32: 96 | if src.dtype == np.uint8: 97 | src = src.astype(np.float32) 98 | 99 | src *= float(1.0 / 255.0) 100 | else: 101 | src = src.astype(np.float32) 102 | 103 | if len(src.shape) == 2 or src.shape[2] == 1: # Gray 104 | if len(src.shape) == 3: 105 | src = src.reshape((src.shape[0], src.shape[1])) 106 | 107 | src_channels = 1 108 | 109 | src_new = np.empty((src.shape[0], src.shape[1], 4), dtype=np.float32) 110 | src_new[:, :, 0] = src 111 | src_new[:, :, 1] = src 112 | src_new[:, :, 2] = src 113 | src_new[:, :, 3] = src 114 | 115 | src = src_new 116 | elif src.shape[2] == 3: # RGB 117 | src_channels = 3 118 | 119 | src_new = np.empty((src.shape[0], src.shape[1], 4), dtype=np.float32) 120 | src_new[:, :, :3] = src 121 | src_new[:, :, 3] = np.ones((src.shape[0], src.shape[1])) 122 | 123 | src = src_new 124 | elif src.shape[2] != 4: 125 | raise Exception('Passed an invalid number of channels, must be 1, 3, or 4 (received ' + str(src.shape[2]) + ')!') 126 | 127 | result = None 128 | 129 | try: 130 | result = _scale_4f32(src, size, mode, dst) 131 | except Exception as e: 132 | raise e 133 | 134 | # Covert back 135 | if auto_convert: 136 | if src_type == np.uint8: 137 | result = (result * float(255.0)).astype(np.uint8)[:, :, :src_channels] 138 | else: 139 | result = result[:, :, :src_channels].astype(src_type) 140 | 141 | if src_dims == 2: 142 | result = result.reshape((size[1], size[0])) 143 | 144 | return result 145 | -------------------------------------------------------------------------------- /src/scaler.c: -------------------------------------------------------------------------------- 1 | #include "scaler.h" 2 | 3 | // Nearest does not use SIMD 4 | void scale_nearest_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 5 | f32 ratio_x = (f32)src_width / (f32)dst_width; 6 | f32 ratio_y = (f32)src_height / (f32)dst_height; 7 | i32 dst_width4 = dst_width << 2; 8 | i32 src_width4 = src_width << 2; 9 | 10 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 11 | i32 src_y = (i32)((dst_y + 0.5f) * ratio_y); 12 | 13 | i32 dst_offset4 = dst_width4 * dst_y; 14 | i32 src_offset4 = src_width4 * src_y; 15 | 16 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 17 | i32 src_x = (i32)((dst_x + 0.5f) * ratio_x); 18 | 19 | memcpy(&dst[(dst_x << 2) + dst_offset4], &src[(src_x << 2) + src_offset4], RGBA32F_SIZE); 20 | } 21 | } 22 | } 23 | 24 | #if defined(__x86_64__) // SSE implementation 25 | 26 | void scale_bilinear_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 27 | f32 ratio_x = (f32)(src_width - 1) / (f32)dst_width; 28 | f32 ratio_y = (f32)(src_height - 1) / (f32)dst_height; 29 | i32 dst_width4 = dst_width << 2; 30 | i32 src_width4 = src_width << 2; 31 | i32 src_width4_4 = src_width4 + 4; 32 | 33 | if ((((size_t)src | (size_t)dst) & 0x0f) == 0) { // Aligned memory 34 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 35 | f32 src_y_f = (dst_y + 0.5f) * ratio_y; 36 | i32 src_y = (i32)src_y_f; 37 | f32 interp_y = src_y_f - src_y; 38 | 39 | __m128 iy = _mm_set1_ps(interp_y); 40 | __m128 iy1 = _mm_set1_ps(1.0f - interp_y); 41 | 42 | i32 dst_offset4 = dst_width4 * dst_y; 43 | i32 src_offset4 = src_width4 * src_y; 44 | 45 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 46 | f32 src_x_f = (dst_x + 0.5f) * ratio_x; 47 | i32 src_x = (i32)src_x_f; 48 | f32 interp_x = src_x_f - src_x; 49 | 50 | i32 dst_start = (dst_x << 2) + dst_offset4; 51 | 52 | f32* src_start00 = src + (src_x << 2) + src_offset4; 53 | 54 | __m128 ix = _mm_set1_ps(interp_x); 55 | __m128 ix1 = _mm_set1_ps(1.0f - interp_x); 56 | 57 | __m128 p00 = _mm_load_ps(src_start00); 58 | __m128 p01 = _mm_load_ps(src_start00 + 4); 59 | __m128 p10 = _mm_load_ps(src_start00 + src_width4); 60 | __m128 p11 = _mm_load_ps(src_start00 + src_width4_4); 61 | 62 | p00 = _mm_add_ps(_mm_mul_ps(p00, iy1), _mm_mul_ps(p10, iy)); 63 | p01 = _mm_add_ps(_mm_mul_ps(p01, iy1), _mm_mul_ps(p11, iy)); 64 | 65 | p00 = _mm_add_ps(_mm_mul_ps(p00, ix1), _mm_mul_ps(p01, ix)); 66 | 67 | _mm_store_ps(dst + dst_start, p00); 68 | } 69 | } 70 | } 71 | else { // Unaligned memory 72 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 73 | f32 src_y_f = (dst_y + 0.5f) * ratio_y; 74 | i32 src_y = (i32)src_y_f; 75 | f32 interp_y = src_y_f - src_y; 76 | 77 | __m128 iy = _mm_set1_ps(interp_y); 78 | __m128 iy1 = _mm_set1_ps(1.0f - interp_y); 79 | 80 | i32 dst_offset4 = dst_width4 * dst_y; 81 | i32 src_offset4 = src_width4 * src_y; 82 | 83 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 84 | f32 src_x_f = (dst_x + 0.5f) * ratio_x; 85 | i32 src_x = (i32)src_x_f; 86 | f32 interp_x = src_x_f - src_x; 87 | 88 | i32 dst_start = (dst_x << 2) + dst_offset4; 89 | 90 | f32* src_start00 = src + (src_x << 2) + src_offset4; 91 | 92 | __m128 ix = _mm_set1_ps(interp_x); 93 | __m128 ix1 = _mm_set1_ps(1.0f - interp_x); 94 | 95 | __m128 p00 = _mm_loadu_ps(src_start00); 96 | __m128 p01 = _mm_loadu_ps(src_start00 + 4); 97 | __m128 p10 = _mm_loadu_ps(src_start00 + src_width4); 98 | __m128 p11 = _mm_loadu_ps(src_start00 + src_width4_4); 99 | 100 | p00 = _mm_add_ps(_mm_mul_ps(p00, iy1), _mm_mul_ps(p10, iy)); 101 | p01 = _mm_add_ps(_mm_mul_ps(p01, iy1), _mm_mul_ps(p11, iy)); 102 | 103 | p00 = _mm_add_ps(_mm_mul_ps(p00, ix1), _mm_mul_ps(p01, ix)); 104 | 105 | _mm_storeu_ps(dst + dst_start, p00); 106 | } 107 | } 108 | } 109 | } 110 | 111 | void scale_area_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 112 | f32 ratio_x = (f32)src_width / (f32)dst_width; 113 | f32 ratio_y = (f32)src_height / (f32)dst_height; 114 | i32 src_width4 = src_width << 2; 115 | i32 dst_width4 = dst_width << 2; 116 | 117 | if ((((size_t)src | (size_t)dst) & 0x0f) == 0) { // Aligned memory 118 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 119 | f32 src_lower_y_f = dst_y * ratio_y; 120 | f32 src_upper_y_f = (dst_y + 1.0f) * ratio_y; 121 | 122 | i32 src_lower_y = max(0, (i32)src_lower_y_f); 123 | i32 src_upper_y = min(src_height, (i32)src_upper_y_f + 1); 124 | 125 | f32 over_height = src_lower_y_f - (i32)src_lower_y_f; 126 | f32 over_height1 = 1.0f - (src_upper_y_f - (i32)src_lower_y_f); 127 | 128 | i32 dst_offset4 = dst_width4 * dst_y; 129 | 130 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 131 | f32 src_lower_x_f = dst_x * ratio_x; 132 | f32 src_upper_x_f = (dst_x + 1.0f) * ratio_x; 133 | 134 | i32 src_lower_x = max(0, (i32)src_lower_x_f); 135 | i32 src_upper_x = min(src_width, (i32)src_upper_x_f + 1); 136 | 137 | f32 over_width = src_lower_x_f - (i32)src_lower_x_f; 138 | f32 over_width1 = 1.0f - (src_upper_x_f - (i32)src_lower_x_f); 139 | 140 | i32 dst_start = (dst_x << 2) + dst_offset4; 141 | 142 | __m128 res = _mm_set1_ps(0.0f); 143 | 144 | f32 weight_total = 0.0f; 145 | 146 | for (i32 area_y = src_lower_y; area_y < src_upper_y; area_y++) { 147 | i32 src_offset4 = src_width4 * area_y; 148 | 149 | for (i32 area_x = src_lower_x; area_x < src_upper_x; area_x++) { 150 | f32 weight = (1.0f - (area_y == src_lower_y) * over_height) * (1.0f - (area_x == src_lower_x) * over_width) + 151 | (1.0f - (area_y == src_upper_y) * over_height1) * (1.0f - (area_x == src_upper_x) * over_width1); 152 | 153 | i32 src_start = (area_x << 2) + src_offset4; 154 | 155 | __m128 p = _mm_load_ps(src + src_start); 156 | 157 | res = _mm_add_ps(res, _mm_mul_ps(p, _mm_set1_ps(weight))); 158 | weight_total += weight; 159 | } 160 | } 161 | 162 | f32 div = 1.0f / weight_total; 163 | 164 | res = _mm_mul_ps(res, _mm_set1_ps(div)); 165 | 166 | _mm_store_ps(dst + dst_start, res); 167 | } 168 | } 169 | } 170 | else { // Unaligned memory 171 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 172 | f32 src_lower_y_f = dst_y * ratio_y; 173 | f32 src_upper_y_f = (dst_y + 1.0f) * ratio_y; 174 | 175 | i32 src_lower_y = max(0, (i32)src_lower_y_f); 176 | i32 src_upper_y = min(src_height, (i32)src_upper_y_f + 1); 177 | 178 | f32 over_height = src_lower_y_f - (i32)src_lower_y_f; 179 | f32 over_height1 = 1.0f - (src_upper_y_f - (i32)src_lower_y_f); 180 | 181 | i32 dst_offset4 = dst_width4 * dst_y; 182 | 183 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 184 | f32 src_lower_x_f = dst_x * ratio_x; 185 | f32 src_upper_x_f = (dst_x + 1.0f) * ratio_x; 186 | 187 | i32 src_lower_x = max(0, (i32)src_lower_x_f); 188 | i32 src_upper_x = min(src_width, (i32)src_upper_x_f + 1); 189 | 190 | f32 over_width = src_lower_x_f - (i32)src_lower_x_f; 191 | f32 over_width1 = 1.0f - (src_upper_x_f - (i32)src_lower_x_f); 192 | 193 | i32 dst_start = (dst_x << 2) + dst_offset4; 194 | 195 | __m128 res = _mm_set1_ps(0.0f); 196 | 197 | f32 weight_total = 0.0f; 198 | 199 | for (i32 area_y = src_lower_y; area_y < src_upper_y; area_y++) { 200 | i32 src_offset4 = src_width4 * area_y; 201 | 202 | for (i32 area_x = src_lower_x; area_x < src_upper_x; area_x++) { 203 | f32 weight = (1.0f - (area_y == src_lower_y) * over_height) * (1.0f - (area_x == src_lower_x) * over_width) + 204 | (1.0f - (area_y == src_upper_y) * over_height1) * (1.0f - (area_x == src_upper_x) * over_width1); 205 | 206 | i32 src_start = (area_x << 2) + src_offset4; 207 | 208 | __m128 p = _mm_loadu_ps(src + src_start); 209 | 210 | res = _mm_add_ps(res, _mm_mul_ps(p, _mm_set1_ps(weight))); 211 | weight_total += weight; 212 | } 213 | } 214 | 215 | f32 div = 1.0f / weight_total; 216 | 217 | res = _mm_mul_ps(res, _mm_set1_ps(div)); 218 | 219 | _mm_storeu_ps(dst + dst_start, res); 220 | } 221 | } 222 | } 223 | } 224 | 225 | #elif defined(__arm__) // ARM Neon implementation 226 | 227 | void scale_bilinear_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 228 | f32 ratio_x = (f32)(src_width - 1) / (f32)dst_width; 229 | f32 ratio_y = (f32)(src_height - 1) / (f32)dst_height; 230 | i32 dst_width4 = dst_width << 2; 231 | i32 src_width4 = src_width << 2; 232 | i32 src_width4_4 = src_width4 + 4; 233 | 234 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 235 | f32 src_y_f = (dst_y + 0.5f) * ratio_y; 236 | i32 src_y = (i32)src_y_f; 237 | f32 interp_y = src_y_f - src_y; 238 | 239 | float32x4_t iy = vdupq_n_f32(interp_y); 240 | float32x4_t iy1 = vdupq_n_f32(1.0f - interp_y); 241 | 242 | i32 dst_offset4 = dst_width4 * dst_y; 243 | i32 src_offset4 = src_width4 * src_y; 244 | 245 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 246 | f32 src_x_f = (dst_x + 0.5f) * ratio_x; 247 | i32 src_x = (i32)src_x_f; 248 | f32 interp_x = src_x_f - src_x; 249 | 250 | i32 dst_start = (dst_x << 2) + dst_offset4; 251 | 252 | f32* src_start00 = src + (src_x << 2) + src_offset4; 253 | 254 | float32x4_t ix = vdupq_n_f32(interp_x); // Use q versions (128 bit) 255 | float32x4_t ix1 = vdupq_n_f32(1.0f - interp_x); 256 | 257 | float32x4_t p00 = vld1q_f32(src_start00); 258 | float32x4_t p01 = vld1q_f32(src_start00 + 4); 259 | float32x4_t p10 = vld1q_f32(src_start00 + src_width4); 260 | float32x4_t p11 = vld1q_f32(src_start00 + src_width4_4); 261 | 262 | p00 = vaddq_f32(vmulq_f32(p00, iy1), vmulq_f32(p10, iy)); 263 | p01 = vaddq_f32(vmulq_f32(p01, iy1), vmulq_f32(p11, iy)); 264 | 265 | p00 = vaddq_f32(vmulq_f32(p00, ix1), vmulq_f32(p01, ix)); 266 | 267 | vst1q_f32(dst + dst_start, p00); 268 | } 269 | } 270 | } 271 | 272 | void scale_area_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 273 | f32 ratio_x = (f32)src_width / (f32)dst_width; 274 | f32 ratio_y = (f32)src_height / (f32)dst_height; 275 | i32 src_width4 = src_width << 2; 276 | i32 dst_width4 = dst_width << 2; 277 | 278 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 279 | f32 src_lower_y_f = dst_y * ratio_y; 280 | f32 src_upper_y_f = (dst_y + 1.0f) * ratio_y; 281 | 282 | i32 src_lower_y = max(0, (i32)src_lower_y_f); 283 | i32 src_upper_y = min(src_height, (i32)src_upper_y_f + 1); 284 | 285 | f32 over_height = src_lower_y_f - (i32)src_lower_y_f; 286 | f32 over_height1 = 1.0f - (src_upper_y_f - (i32)src_lower_y_f); 287 | 288 | i32 dst_offset4 = dst_width4 * dst_y; 289 | 290 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 291 | f32 src_lower_x_f = dst_x * ratio_x; 292 | f32 src_upper_x_f = (dst_x + 1.0f) * ratio_x; 293 | 294 | i32 src_lower_x = max(0, (i32)src_lower_x_f); 295 | i32 src_upper_x = min(src_width, (i32)src_upper_x_f + 1); 296 | 297 | f32 over_width = src_lower_x_f - (i32)src_lower_x_f; 298 | f32 over_width1 = 1.0f - (src_upper_x_f - (i32)src_lower_x_f); 299 | 300 | i32 dst_start = (dst_x << 2) + dst_offset4; 301 | 302 | float32x4_t res = vdupq_n_f32(0.0f); 303 | 304 | f32 weight_total = 0.0f; 305 | 306 | for (i32 area_y = src_lower_y; area_y < src_upper_y; area_y++) { 307 | i32 src_offset4 = src_width4 * area_y; 308 | 309 | for (i32 area_x = src_lower_x; area_x < src_upper_x; area_x++) { 310 | f32 weight = (1.0f - (area_y == src_lower_y) * over_height) * (1.0f - (area_x == src_lower_x) * over_width) + 311 | (1.0f - (area_y == src_upper_y) * over_height1) * (1.0f - (area_x == src_upper_x) * over_width1); 312 | 313 | i32 src_start = (area_x << 2) + src_offset4; 314 | 315 | float32x4_t p = vld1q_f32(src + src_start); 316 | 317 | res = vaddq_f32(res, vmulq_f32(p, vdupq_n_f32(weight))); 318 | weight_total += weight; 319 | } 320 | } 321 | 322 | f32 div = 1.0f / weight_total; 323 | 324 | res = vmulq_f32(res, vdupq_n_f32(div)); 325 | 326 | vst1q_f32(dst + dst_start, res); 327 | } 328 | } 329 | } 330 | 331 | #else // No SIMD implementation 332 | 333 | void scale_bilinear_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 334 | f32 ratio_x = (f32)(src_width - 1) / (f32)dst_width; 335 | f32 ratio_y = (f32)(src_height - 1) / (f32)dst_height; 336 | i32 dst_width4 = dst_width << 2; 337 | 338 | i32 src_width4s[8]; 339 | src_width4s[0] = src_width << 2; 340 | 341 | for (i32 i = 1; i < 8; i++) 342 | src_width4s[i] = src_width4s[0] + i; 343 | 344 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 345 | f32 src_y_f = (dst_y + 0.5f) * ratio_y; 346 | i32 src_y = (i32)src_y_f; 347 | f32 interp_y = src_y_f - src_y; 348 | f32 interp_y1 = 1.0f - interp_y; 349 | 350 | i32 dst_offset4 = dst_width4 * dst_y; 351 | i32 src_offset4 = src_width4s[0] * src_y; 352 | 353 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 354 | f32 src_x_f = (dst_x + 0.5f) * ratio_x; 355 | i32 src_x = (i32)src_x_f; 356 | f32 interp_x = src_x_f - src_x; 357 | 358 | i32 dst_start = (dst_x << 2) + dst_offset4; 359 | 360 | i32 src_start00 = (src_x << 2) + src_offset4; 361 | 362 | f32 interp_x1 = 1.0f - interp_x; 363 | 364 | f32 pr0 = interp_y1 * src[src_start00 ] + interp_y * src[src_start00 + src_width4s[0]]; 365 | f32 pr1 = interp_y1 * src[src_start00 + 4] + interp_y * src[src_start00 + src_width4s[4]]; 366 | 367 | f32 pg0 = interp_y1 * src[src_start00 + 1] + interp_y * src[src_start00 + src_width4s[1]]; 368 | f32 pg1 = interp_y1 * src[src_start00 + 5] + interp_y * src[src_start00 + src_width4s[5]]; 369 | 370 | f32 pb0 = interp_y1 * src[src_start00 + 2] + interp_y * src[src_start00 + src_width4s[2]]; 371 | f32 pb1 = interp_y1 * src[src_start00 + 6] + interp_y * src[src_start00 + src_width4s[6]]; 372 | 373 | f32 pa0 = interp_y1 * src[src_start00 + 3] + interp_y * src[src_start00 + src_width4s[3]]; 374 | f32 pa1 = interp_y1 * src[src_start00 + 7] + interp_y * src[src_start00 + src_width4s[7]]; 375 | 376 | dst[dst_start ] = interp_x1 * pr0 + interp_x * pr1; 377 | dst[dst_start + 1] = interp_x1 * pg0 + interp_x * pg1; 378 | dst[dst_start + 2] = interp_x1 * pb0 + interp_x * pb1; 379 | dst[dst_start + 3] = interp_x1 * pa0 + interp_x * pa1; 380 | } 381 | } 382 | } 383 | 384 | void scale_area_4f32(f32 src[], f32 dst[], i32 src_width, i32 src_height, i32 dst_width, i32 dst_height) { 385 | f32 ratio_x = (f32)src_width / (f32)dst_width; 386 | f32 ratio_y = (f32)src_height / (f32)dst_height; 387 | i32 src_width4 = src_width << 2; 388 | i32 dst_width4 = dst_width << 2; 389 | 390 | for (i32 dst_y = 0; dst_y < dst_height; dst_y++) { 391 | f32 src_lower_y_f = dst_y * ratio_y; 392 | f32 src_upper_y_f = (dst_y + 1.0f) * ratio_y; 393 | 394 | i32 src_lower_y = max(0, (i32)src_lower_y_f); 395 | i32 src_upper_y = min(src_height, (i32)src_upper_y_f + 1); 396 | 397 | f32 over_height = src_lower_y_f - (i32)src_lower_y_f; 398 | f32 over_height1 = 1.0f - (src_upper_y_f - (i32)src_lower_y_f); 399 | 400 | i32 dst_offset4 = dst_width4 * dst_y; 401 | 402 | for (i32 dst_x = 0; dst_x < dst_width; dst_x++) { 403 | f32 src_lower_x_f = dst_x * ratio_x; 404 | f32 src_upper_x_f = (dst_x + 1.0f) * ratio_x; 405 | 406 | i32 src_lower_x = max(0, (i32)src_lower_x_f); 407 | i32 src_upper_x = min(src_width, (i32)src_upper_x_f + 1); 408 | 409 | f32 over_width = src_lower_x_f - (i32)src_lower_x_f; 410 | f32 over_width1 = 1.0f - (src_upper_x_f - (i32)src_lower_x_f); 411 | 412 | i32 dst_start = (dst_x << 2) + dst_offset4; 413 | 414 | f32 r = 0.0f; 415 | f32 g = 0.0f; 416 | f32 b = 0.0f; 417 | f32 a = 0.0f; 418 | 419 | f32 weight_total = 0.0f; 420 | 421 | for (i32 area_y = src_lower_y; area_y < src_upper_y; area_y++) { 422 | i32 src_offset4 = src_width4 * area_y; 423 | 424 | for (i32 area_x = src_lower_x; area_x < src_upper_x; area_x++) { 425 | f32 weight = (1.0f - (area_y == src_lower_y) * over_height) * (1.0f - (area_x == src_lower_x) * over_width) + 426 | (1.0f - (area_y == src_upper_y) * over_height1) * (1.0f - (area_x == src_upper_x) * over_width1); 427 | 428 | i32 src_start = (area_x << 2) + src_offset4; 429 | 430 | r += weight * src[src_start ]; 431 | g += weight * src[src_start + 1]; 432 | b += weight * src[src_start + 2]; 433 | a += weight * src[src_start + 3]; 434 | weight_total += weight; 435 | } 436 | } 437 | 438 | f32 div = 1.0f / weight_total; 439 | 440 | dst[dst_start ] = r * div; 441 | dst[dst_start + 1] = g * div; 442 | dst[dst_start + 2] = b * div; 443 | dst[dst_start + 3] = a * div; 444 | } 445 | } 446 | } 447 | 448 | #endif 449 | --------------------------------------------------------------------------------