├── .bazelrc ├── .bcr ├── metadata.template.json ├── presubmit.yml └── source.template.json ├── .clang-format ├── .github ├── dependabot.yml └── workflows │ ├── build_test.yml │ ├── docs_pages_workflow.yml │ └── multiarch.yml ├── .gitignore ├── BUILD ├── CMakeLists.txt ├── CMakeLists.txt.in ├── CONTRIBUTING ├── LICENSE ├── LICENSE-BSD3 ├── MODULE.bazel ├── README.md ├── WORKSPACE ├── cmake └── FindAtomics.cmake ├── debian ├── changelog ├── compat ├── control ├── copyright ├── rules └── source │ └── format ├── docs ├── .gitignore ├── Makefile ├── _static │ ├── css │ │ ├── dark.css │ │ └── toggle.css │ └── js │ │ └── toggle.js ├── _templates │ ├── layout.html │ └── versions.html ├── buildDocs.sh ├── conf.py ├── images │ ├── logo-32x32.ico │ └── logo.png ├── index.rst ├── locales │ └── zh │ │ └── index.rst ├── make.bat └── mm-converter.py ├── g3doc ├── design_philosophy.md ├── faq.md ├── highway_intro.pdf ├── impl_details.md ├── instruction_matrix.pdf ├── op_wishlist.md ├── quick_reference.md └── release_testing_process.md ├── hwy.gni ├── hwy ├── abort.cc ├── abort.h ├── abort_header_only_test.cc ├── abort_test.cc ├── aligned_allocator.cc ├── aligned_allocator.h ├── aligned_allocator_test.cc ├── auto_tune.h ├── auto_tune_test.cc ├── base.h ├── base_test.cc ├── bit_set.h ├── bit_set_test.cc ├── cache_control.h ├── contrib │ ├── algo │ │ ├── copy-inl.h │ │ ├── copy_test.cc │ │ ├── find-inl.h │ │ ├── find_test.cc │ │ ├── transform-inl.h │ │ └── transform_test.cc │ ├── bit_pack │ │ ├── bit_pack-inl.h │ │ └── bit_pack_test.cc │ ├── dot │ │ ├── dot-inl.h │ │ └── dot_test.cc │ ├── image │ │ ├── image.cc │ │ ├── image.h │ │ └── image_test.cc │ ├── math │ │ ├── math-inl.h │ │ └── math_test.cc │ ├── matvec │ │ ├── matvec-inl.h │ │ └── matvec_test.cc │ ├── random │ │ ├── random-inl.h │ │ └── random_test.cc │ ├── sort │ │ ├── BUILD │ │ ├── README.md │ │ ├── algo-inl.h │ │ ├── bench_parallel.cc │ │ ├── bench_sort.cc │ │ ├── order.h │ │ ├── print_network.cc │ │ ├── result-inl.h │ │ ├── shared-inl.h │ │ ├── sort_test.cc │ │ ├── sort_unit_test.cc │ │ ├── sorting_networks-inl.h │ │ ├── traits-inl.h │ │ ├── traits128-inl.h │ │ ├── vqsort-inl.h │ │ ├── vqsort.cc │ │ ├── vqsort.h │ │ ├── vqsort_128a.cc │ │ ├── vqsort_128d.cc │ │ ├── vqsort_f16a.cc │ │ ├── vqsort_f16d.cc │ │ ├── vqsort_f32a.cc │ │ ├── vqsort_f32d.cc │ │ ├── vqsort_f64a.cc │ │ ├── vqsort_f64d.cc │ │ ├── vqsort_i16a.cc │ │ ├── vqsort_i16d.cc │ │ ├── vqsort_i32a.cc │ │ ├── vqsort_i32d.cc │ │ ├── vqsort_i64a.cc │ │ ├── vqsort_i64d.cc │ │ ├── vqsort_kv128a.cc │ │ ├── vqsort_kv128d.cc │ │ ├── vqsort_kv64a.cc │ │ ├── vqsort_kv64d.cc │ │ ├── vqsort_u16a.cc │ │ ├── vqsort_u16d.cc │ │ ├── vqsort_u32a.cc │ │ ├── vqsort_u32d.cc │ │ ├── vqsort_u64a.cc │ │ └── vqsort_u64d.cc │ ├── thread_pool │ │ ├── futex.h │ │ ├── spin.h │ │ ├── spin_test.cc │ │ ├── thread_pool.h │ │ ├── thread_pool_test.cc │ │ ├── topology.cc │ │ ├── topology.h │ │ └── topology_test.cc │ └── unroller │ │ ├── README.md │ │ ├── unroller-inl.h │ │ └── unroller_test.cc ├── detect_compiler_arch.h ├── detect_targets.h ├── examples │ ├── benchmark.cc │ ├── profiler_example.cc │ ├── skeleton-inl.h │ ├── skeleton.cc │ ├── skeleton.h │ └── skeleton_test.cc ├── foreach_target.h ├── highway.h ├── highway_export.h ├── highway_test.cc ├── hwy.version ├── nanobenchmark.cc ├── nanobenchmark.h ├── nanobenchmark_test.cc ├── ops │ ├── arm_neon-inl.h │ ├── arm_sve-inl.h │ ├── emu128-inl.h │ ├── generic_ops-inl.h │ ├── inside-inl.h │ ├── loongarch_lasx-inl.h │ ├── loongarch_lsx-inl.h │ ├── ppc_vsx-inl.h │ ├── rvv-inl.h │ ├── scalar-inl.h │ ├── set_macros-inl.h │ ├── shared-inl.h │ ├── wasm_128-inl.h │ ├── wasm_256-inl.h │ ├── x86_128-inl.h │ ├── x86_256-inl.h │ ├── x86_512-inl.h │ └── x86_avx3-inl.h ├── per_target.cc ├── per_target.h ├── perf_counters.cc ├── perf_counters.h ├── perf_counters_test.cc ├── print-inl.h ├── print.cc ├── print.h ├── profiler.h ├── robust_statistics.h ├── stats.cc ├── stats.h ├── targets.cc ├── targets.h ├── targets_test.cc ├── tests │ ├── arithmetic_test.cc │ ├── bit_permute_test.cc │ ├── blockwise_combine_test.cc │ ├── blockwise_shift_test.cc │ ├── blockwise_test.cc │ ├── cast_test.cc │ ├── combine_test.cc │ ├── compare_test.cc │ ├── complex_arithmetic_test.cc │ ├── compress_test.cc │ ├── concat_test.cc │ ├── convert_test.cc │ ├── count_test.cc │ ├── crypto_test.cc │ ├── demote_test.cc │ ├── div_test.cc │ ├── dup128_vec_test.cc │ ├── expand_test.cc │ ├── float_test.cc │ ├── fma_test.cc │ ├── foreach_vec_test.cc │ ├── hwy_gtest.h │ ├── if_test.cc │ ├── in_range_float_to_int_conv_test.cc │ ├── interleaved_test.cc │ ├── list_targets.cc │ ├── logical_test.cc │ ├── mask_combine_test.cc │ ├── mask_convert_test.cc │ ├── mask_mem_test.cc │ ├── mask_set_test.cc │ ├── mask_slide_test.cc │ ├── mask_test.cc │ ├── masked_arithmetic_test.cc │ ├── masked_minmax_test.cc │ ├── memory_test.cc │ ├── minmax128_test.cc │ ├── minmax_magnitude_test.cc │ ├── minmax_test.cc │ ├── mul_by_pow2_test.cc │ ├── mul_pairwise_test.cc │ ├── mul_test.cc │ ├── reduction_test.cc │ ├── resize_test.cc │ ├── reverse_test.cc │ ├── rotate_test.cc │ ├── saturated_test.cc │ ├── shift_test.cc │ ├── shuffle4_test.cc │ ├── sign_test.cc │ ├── slide_up_down_test.cc │ ├── sums_abs_diff_test.cc │ ├── swizzle_block_test.cc │ ├── swizzle_test.cc │ ├── table_test.cc │ ├── test_util-inl.h │ ├── test_util.cc │ ├── test_util.h │ ├── test_util_test.cc │ ├── truncate_test.cc │ ├── tuple_test.cc │ └── widen_mul_test.cc ├── timer-inl.h ├── timer.cc ├── timer.h └── x86_cpuid.h ├── libhwy-contrib.pc.in ├── libhwy-test.pc.in ├── libhwy.pc.in ├── preamble.js.lds ├── run_tests.bat └── run_tests.sh /.bazelrc: -------------------------------------------------------------------------------- 1 | common --enable_bzlmod 2 | -------------------------------------------------------------------------------- /.bcr/metadata.template.json: -------------------------------------------------------------------------------- 1 | { 2 | "homepage": "https://github.com/google/highway", 3 | "maintainers": [ 4 | { 5 | "email": "janwas@google.com", 6 | "github": "jan-wassenberg", 7 | "name": "Jan Wassenberg" 8 | }, 9 | { 10 | "email": "eustas@google.com", 11 | "github": "eustas", 12 | "name": "Eugene Kliuchnikov" 13 | }, 14 | { 15 | "email": "rhettstucki@google.com", 16 | "github": "rhettstucki", 17 | "name": "Rhett Stucki" 18 | } 19 | ], 20 | "repository": [ 21 | "github:google/highway" 22 | ], 23 | "versions": [], 24 | "yanked_versions": {} 25 | } 26 | -------------------------------------------------------------------------------- /.bcr/presubmit.yml: -------------------------------------------------------------------------------- 1 | bcr_test_module: 2 | module_path: "bzlmod" 3 | matrix: 4 | platform: ["debian10", "macos", "ubuntu2004", "windows"] 5 | tasks: 6 | run_tests: 7 | name: "Run test module" 8 | platform: ${{ platform }} 9 | test_targets: 10 | - ":hwy_ops_tests" 11 | -------------------------------------------------------------------------------- /.bcr/source.template.json: -------------------------------------------------------------------------------- 1 | { 2 | "integrity": "", 3 | "strip_prefix": "{REPO}-{VERSION}", 4 | "url": "https://github.com/{OWNER}/{REPO}/archive/refs/tags/{TAG}.tar.gz" 5 | } 6 | -------------------------------------------------------------------------------- /.clang-format: -------------------------------------------------------------------------------- 1 | BasedOnStyle: Google 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: github-actions 4 | directory: "/" 5 | schedule: 6 | interval: daily -------------------------------------------------------------------------------- /.github/workflows/docs_pages_workflow.yml: -------------------------------------------------------------------------------- 1 | name: docs_pages_workflow 2 | 3 | # execute this workflow automatically when a we push to master 4 | on: 5 | push: 6 | permissions: 7 | contents: write 8 | pages: write 9 | 10 | jobs: 11 | 12 | build_docs_job: 13 | runs-on: ubuntu-latest 14 | container: debian:buster-slim 15 | 16 | steps: 17 | 18 | - name: Prereqs 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | run: | 22 | apt-get update 23 | apt-get install -y git 24 | git clone "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" . 25 | shell: bash 26 | 27 | - name: Execute script to build our documentation and update pages 28 | env: 29 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 30 | run: "cp -r g3doc docs && cp README.md docs/g3doc && docs/buildDocs.sh" 31 | shell: bash 32 | -------------------------------------------------------------------------------- /.github/workflows/multiarch.yml: -------------------------------------------------------------------------------- 1 | # https://github.com/marketplace/actions/run-on-architecture 2 | name: Foreign architectures 3 | 4 | on: [push, pull_request] 5 | concurrency: 6 | group: ${{ github.workflow }}-${{ github.ref }} 7 | cancel-in-progress: true 8 | 9 | permissions: 10 | contents: read 11 | 12 | jobs: 13 | multiarch: 14 | runs-on: ubuntu-22.04 15 | strategy: 16 | fail-fast: true 17 | matrix: 18 | include: 19 | - arch: armv7 20 | distro: ubuntu_latest 21 | cxx_flags: -Wno-psabi 22 | cmake_flags: -DHWY_CMAKE_ARM7:BOOL=ON 23 | - arch: ppc64le 24 | distro: ubuntu_latest 25 | steps: 26 | - name: Checkout code 27 | uses: actions/checkout@v4 28 | - name: Build and test 29 | uses: uraimo/run-on-arch-action@v3.0.1 30 | id: build 31 | with: 32 | arch: ${{ matrix.arch }} 33 | distro: ${{ matrix.distro }} 34 | # Not required, but speeds up builds 35 | githubToken: ${{ github.token }} 36 | install: | 37 | apt-get update -q -y 38 | apt-get install -q -y --no-install-recommends \ 39 | build-essential \ 40 | cmake \ 41 | libgtest-dev \ 42 | ninja-build \ 43 | ; 44 | run: | 45 | export CMAKE_BUILD_PARALLEL_LEVEL=2 46 | export CTEST_PARALLEL_LEVEL=2 47 | CXXFLAGS=${{ matrix.cxx_flags }} cmake -GNinja ${{ matrix.cmake_flags }} -DHWY_SYSTEM_GTEST=ON -DHWY_WARNINGS_ARE_ERRORS=ON -B out . 48 | cmake --build out 49 | ctest --test-dir out 50 | aarch64_cmake: 51 | name: Build and test ${{ matrix.name }} on AArch64 52 | runs-on: ubuntu-24.04-arm 53 | strategy: 54 | matrix: 55 | include: 56 | - name: Clang-18 57 | extra_deps: clang-18 58 | c_compiler: clang-18 59 | cxx_compiler: clang++-18 60 | cxx_standard: 17 61 | 62 | - name: GCC-14 63 | extra_deps: g++-14 64 | c_compiler: gcc-14 65 | cxx_compiler: g++-14 66 | cxx_flags: -ftrapv 67 | cxx_standard: 17 68 | 69 | steps: 70 | - name: Harden Runner 71 | uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0 72 | with: 73 | egress-policy: audit # cannot be block - runner does git checkout 74 | 75 | - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.0.0 76 | 77 | - name: Install deps 78 | run: sudo apt-get install ${{ matrix.extra_deps }} 79 | 80 | - name: Build and test 81 | run: | 82 | export CMAKE_BUILD_PARALLEL_LEVEL=2 83 | export CTEST_PARALLEL_LEVEL=2 84 | CXXFLAGS="${{ matrix.cxx_flags }}" CC=${{ matrix.c_compiler }} CXX=${{ matrix.cxx_compiler }} cmake -DHWY_WARNINGS_ARE_ERRORS=ON -DCMAKE_CXX_STANDARD=${{ matrix.cxx_standard }} ${{ matrix.extra_cmake_flags }} -B out . 85 | cmake --build out 86 | ctest --test-dir out 87 | 88 | loongarch64_cmake: 89 | name: Build and test ${{ matrix.name }} on LoongArch64 90 | runs-on: ubuntu-24.04 91 | strategy: 92 | matrix: 93 | include: 94 | - name: GCC-14 95 | extra_deps: qemu-loongarch64 96 | c_compiler: loongarch64-unknown-linux-gnu-gcc 97 | cxx_compiler: loongarch64-unknown-linux-gnu-g++ 98 | cxx_standard: 17 99 | steps: 100 | - name: get cross-tools evn 101 | run: | 102 | wget https://github.com/loongson/build-tools/releases/download/2025.02.21/x86_64-cross-tools-loongarch64-binutils_2.44-gcc_14.2.0-glibc_2.41.tar.xz 103 | sudo tar -xvf x86_64-cross-tools-loongarch64-binutils_2.44-gcc_14.2.0-glibc_2.41.tar.xz -C /opt 104 | - name: Harden Runner 105 | uses: step-security/harden-runner@0634a2670c59f64b4a01f0f96f84700a4088b9f0 # v2.12.0 106 | with: 107 | egress-policy: audit # cannot be block - runner does git checkout 108 | 109 | - uses: actions/checkout@8ade135a41bc03ea155e62e844d188df1ea18608 # v4.0.0 110 | 111 | - name: Install deps 112 | run: | 113 | wget https://github.com/loongson/build-tools/releases/download/2025.02.21/${{ matrix.extra_deps }} 114 | chmod +x ${{ matrix.extra_deps }} 115 | sudo mv ${{ matrix.extra_deps }} /opt/cross-tools/bin 116 | - name: Build and test 117 | run: | 118 | export CMAKE_BUILD_PARALLEL_LEVEL=2 119 | export CTEST_PARALLEL_LEVEL=2 120 | export PATH="/opt/cross-tools/bin:$PATH" 121 | export LD_LIBRARY_PATH="/opt/cross-tools/loongarch64-unknown-linux-gnu/lib:$LD_LIBRARY_PATH" 122 | export CC=${{ matrix.c_compiler }} CXX=${{ matrix.cxx_compiler }} 123 | cmake -DCMAKE_C_COMPILER_TARGET="loongarch64-unknown-linux-gnu" -DCMAKE_CXX_COMPILER_TARGET="loongarch64-unknown-linux-gnu" -DCMAKE_CROSSCOMPILING=true -DCMAKE_CROSSCOMPILING_EMULATOR="qemu-loongarch64;-cpu;max;-L;/opt/cross-tools/target" -DCMAKE_SYSTEM_NAME=Linux -DCMAKE_SYSTEM_PROCESSOR=loongarch64 -B out . 124 | cmake --build out 125 | ctest --test-dir out 126 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | build 2 | bazel-bin 3 | bazel-highway 4 | bazel-out 5 | bazel-testlogs 6 | MODULE.bazel.lock 7 | docs/g3doc/* 8 | docs/html/* 9 | docs/md/* 10 | docs/rst/* 11 | -------------------------------------------------------------------------------- /CMakeLists.txt.in: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | 3 | project(googletest-download NONE) 4 | 5 | include(ExternalProject) 6 | ExternalProject_Add(googletest 7 | GIT_REPOSITORY https://github.com/google/googletest.git 8 | GIT_TAG 43efa0a4efd40c78b9210d15373112081899a97c 9 | SOURCE_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-src" 10 | BINARY_DIR "${CMAKE_CURRENT_BINARY_DIR}/googletest-build" 11 | CONFIGURE_COMMAND "" 12 | BUILD_COMMAND "" 13 | INSTALL_COMMAND "" 14 | TEST_COMMAND "" 15 | ) 16 | -------------------------------------------------------------------------------- /CONTRIBUTING: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. 4 | Pull requests are very welcome. Bug reports (via issue) are also appreciated. 5 | 6 | Looking for a starter project? See the [wishlist](g3doc/op_wishlist.md) for 7 | some ideas for extensions and codegen improvements. 8 | 9 | There are just a few small guidelines you need to follow. 10 | 11 | ## Contributor License Agreement 12 | 13 | Contributions to this project must be accompanied by a Contributor License 14 | Agreement. You (or your employer) retain the copyright to your contribution; 15 | this simply gives us permission to use and redistribute your contributions as 16 | part of the project. Head over to to see 17 | your current agreements on file or to sign a new one. 18 | 19 | You generally only need to submit a CLA once, so if you've already submitted one 20 | (even if it was for a different project), you probably don't need to do it 21 | again. 22 | 23 | ## Code reviews 24 | 25 | All submissions, including submissions by project members, require review. We 26 | use GitHub pull requests for this purpose. Consult 27 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 28 | information on using pull requests. 29 | 30 | ## API changes 31 | 32 | Highway promises to be backwards-compatible to the current documented API. If 33 | you would like to propose a change, please raise an issue to discuss how we can 34 | retain compatibility. 35 | 36 | ## Community Guidelines 37 | 38 | This project follows 39 | [Google's Open Source Community Guidelines](https://opensource.google.com/conduct/). 40 | -------------------------------------------------------------------------------- /LICENSE-BSD3: -------------------------------------------------------------------------------- 1 | Copyright (c) The Highway Project Authors. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without modification, 4 | are permitted provided that the following conditions are met: 5 | 6 | 1. Redistributions of source code must retain the above copyright notice, this 7 | list of conditions and the following disclaimer. 8 | 9 | 2. Redistributions in binary form must reproduce the above copyright notice, 10 | this list of conditions and the following disclaimer in the documentation 11 | and/or other materials provided with the distribution. 12 | 13 | 3. Neither the name of the copyright holder nor the names of its 14 | contributors may be used to endorse or promote products derived from 15 | this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 21 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 23 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 24 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 25 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /MODULE.bazel: -------------------------------------------------------------------------------- 1 | module( 2 | name = "highway", 3 | version = "1.2.0", 4 | ) 5 | 6 | bazel_dep(name = "bazel_skylib", version = "1.6.1") 7 | bazel_dep(name = "googletest", version = "1.15.2", repo_name = "com_google_googletest") 8 | bazel_dep(name = "rules_cc", version = "0.0.9") 9 | bazel_dep(name = "rules_license", version = "0.0.7") 10 | bazel_dep(name = "platforms", version = "0.0.10") 11 | -------------------------------------------------------------------------------- /WORKSPACE: -------------------------------------------------------------------------------- 1 | workspace(name = "highway") 2 | 3 | load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") 4 | load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") 5 | 6 | # Bazel platform rules. 7 | maybe( 8 | http_archive, 9 | name = "platforms", 10 | urls = [ 11 | "https://github.com/bazelbuild/platforms/releases/download/0.0.7/platforms-0.0.7.tar.gz", 12 | ], 13 | sha256 = "3a561c99e7bdbe9173aa653fd579fe849f1d8d67395780ab4770b1f381431d51", 14 | ) 15 | 16 | maybe( 17 | http_archive, 18 | name = "com_google_googletest", 19 | urls = ["https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip"], 20 | sha256 = "8daa1a71395892f7c1ec5f7cb5b099a02e606be720d62f1a6a98f8f8898ec826", 21 | strip_prefix = "googletest-e2239ee6043f73722e7aa812a459f54a28552929", 22 | ) 23 | 24 | # See https://google.github.io/googletest/quickstart-bazel.html 25 | maybe( 26 | http_archive, 27 | name = "rules_cc", 28 | urls = ["https://github.com/bazelbuild/rules_cc/releases/download/0.0.9/rules_cc-0.0.9.tar.gz"], 29 | sha256 = "2037875b9a4456dce4a79d112a8ae885bbc4aad968e6587dca6e64f3a0900cdf", 30 | strip_prefix = "rules_cc-0.0.9", 31 | ) 32 | 33 | # Need recent version for config_setting_group 34 | maybe( 35 | http_archive, 36 | name = "bazel_skylib", 37 | urls = ["https://github.com/bazelbuild/bazel-skylib/releases/download/0.9.0/bazel_skylib-0.9.0.tar.gz"], 38 | sha256 = "1dde365491125a3db70731e25658dfdd3bc5dbdfd11b840b3e987ecf043c7ca0", 39 | ) 40 | 41 | maybe( 42 | http_archive, 43 | name = "rules_license", 44 | urls = [ 45 | "https://github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz", 46 | ], 47 | sha256 = "4531deccb913639c30e5c7512a054d5d875698daeb75d8cf90f284375fe7c360", 48 | ) 49 | -------------------------------------------------------------------------------- /cmake/FindAtomics.cmake: -------------------------------------------------------------------------------- 1 | # Original issue: 2 | # * https://gitlab.kitware.com/cmake/cmake/-/issues/23021#note_1098733 3 | # 4 | # For reference: 5 | # * https://gcc.gnu.org/wiki/Atomic/GCCMM 6 | # 7 | # riscv64 specific: 8 | # * https://lists.debian.org/debian-riscv/2022/01/msg00009.html 9 | # 10 | # ATOMICS_FOUND - system has c++ atomics 11 | # ATOMICS_LIBRARIES - libraries needed to use c++ atomics 12 | 13 | include(CheckCXXSourceCompiles) 14 | 15 | # RISC-V only has 32-bit and 64-bit atomic instructions. GCC is supposed 16 | # to convert smaller atomics to those larger ones via masking and 17 | # shifting like LLVM, but it’s a known bug that it does not. This means 18 | # anything that wants to use atomics on 1-byte or 2-byte types needs 19 | # -latomic, but not 4-byte or 8-byte (though it does no harm). 20 | set(atomic_code 21 | " 22 | #include 23 | #include 24 | std::atomic n8 (0); // riscv64 25 | std::atomic n64 (0); // armel, mipsel, powerpc 26 | int main() { 27 | ++n8; 28 | ++n64; 29 | return 0; 30 | }") 31 | 32 | # https://gitlab.kitware.com/cmake/cmake/-/issues/24063 33 | set(CMAKE_CXX_STANDARD 11) 34 | check_cxx_source_compiles("${atomic_code}" ATOMICS_LOCK_FREE_INSTRUCTIONS) 35 | 36 | if(ATOMICS_LOCK_FREE_INSTRUCTIONS) 37 | set(ATOMICS_FOUND TRUE) 38 | set(ATOMICS_LIBRARIES) 39 | else() 40 | set(CMAKE_REQUIRED_LIBRARIES "-latomic") 41 | check_cxx_source_compiles("${atomic_code}" ATOMICS_IN_LIBRARY) 42 | set(CMAKE_REQUIRED_LIBRARIES) 43 | if(ATOMICS_IN_LIBRARY) 44 | set(ATOMICS_LIBRARY atomic) 45 | include(FindPackageHandleStandardArgs) 46 | find_package_handle_standard_args(Atomics DEFAULT_MSG ATOMICS_LIBRARY) 47 | set(ATOMICS_LIBRARIES ${ATOMICS_LIBRARY}) 48 | unset(ATOMICS_LIBRARY) 49 | else() 50 | if(Atomics_FIND_REQUIRED) 51 | message(FATAL_ERROR "Neither lock free instructions nor -latomic found.") 52 | endif() 53 | endif() 54 | endif() 55 | unset(atomic_code) 56 | unset(CMAKE_CXX_STANDARD) 57 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 10 2 | -------------------------------------------------------------------------------- /debian/control: -------------------------------------------------------------------------------- 1 | Source: highway 2 | Maintainer: JPEG XL Maintainers 3 | Section: misc 4 | Priority: optional 5 | Standards-Version: 3.9.8 6 | Build-Depends: cmake, 7 | debhelper (>= 9), 8 | libgtest-dev 9 | Homepage: https://github.com/google/highway 10 | 11 | Package: libhwy-dev 12 | Architecture: any 13 | Section: libdevel 14 | Depends: ${misc:Depends} 15 | Description: Efficient and performance-portable SIMD wrapper (developer files) 16 | This library provides type-safe and source-code portable wrappers over 17 | existing platform-specific intrinsics. Its design aims for simplicity, 18 | reliable efficiency across platforms, and immediate usability with current 19 | compilers. 20 | . 21 | This package installs the development files. There's no runtime library 22 | since most of Highway is implemented in headers and only a very small 23 | static library is needed. 24 | -------------------------------------------------------------------------------- /debian/copyright: -------------------------------------------------------------------------------- 1 | Format: https://www.debian.org/doc/packaging-manuals/copyright-format/1.0/ 2 | Upstream-Name: highway 3 | 4 | Files: * 5 | Copyright: 2020 Google LLC 6 | License: Apache-2.0 7 | Licensed under the Apache License, Version 2.0 (the "License"); 8 | you may not use this file except in compliance with the License. 9 | You may obtain a copy of the License at 10 | . 11 | http://www.apache.org/licenses/LICENSE-2.0 12 | . 13 | Unless required by applicable law or agreed to in writing, software 14 | distributed under the License is distributed on an "AS IS" BASIS, 15 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | See the License for the specific language governing permissions and 17 | limitations under the License. 18 | . 19 | On Debian systems, the complete text of the Apache License, Version 2 20 | can be found in "/usr/share/common-licenses/Apache-2.0". 21 | -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | 3 | # This variable could be set via --set-envvar=HWY_EXTRA_CONFIG= 4 | # Safeguard: turn undefined to empty 5 | HWY_EXTRA_CONFIG ?= 6 | 7 | %: 8 | dh $@ --buildsystem=cmake 9 | 10 | override_dh_auto_configure: 11 | dh_auto_configure -- -DHWY_SYSTEM_GTEST=ON $(HWY_EXTRA_CONFIG) 12 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (quilt) 2 | -------------------------------------------------------------------------------- /docs/.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | /_build 3 | /doctrees 4 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/css/toggle.css: -------------------------------------------------------------------------------- 1 | input[type=checkbox] { 2 | visibility: hidden; 3 | height: 0; 4 | width: 0; 5 | margin: 0; 6 | } 7 | 8 | .rst-versions .rst-current-version { 9 | padding: 10px; 10 | display: flex; 11 | justify-content: space-between; 12 | } 13 | 14 | .rst-versions .rst-current-version .fa-book, 15 | .rst-versions .rst-current-version .fa-v, 16 | .rst-versions .rst-current-version .fa-caret-down { 17 | height: 24px; 18 | line-height: 24px; 19 | vertical-align: middle; 20 | } 21 | 22 | .rst-versions .rst-current-version .fa-element { 23 | width: 80px; 24 | text-align: center; 25 | } 26 | 27 | .rst-versions .rst-current-version .fa-book { 28 | text-align: left; 29 | } 30 | 31 | .rst-versions .rst-current-version .fa-v { 32 | color: #27AE60; 33 | text-align: right; 34 | } 35 | 36 | label { 37 | margin: 0 auto; 38 | display: inline-block; 39 | justify-content: center; 40 | align-items: right; 41 | border-radius: 100px; 42 | position: relative; 43 | cursor: pointer; 44 | text-indent: -9999px; 45 | width: 50px; 46 | height: 21px; 47 | background: #000; 48 | } 49 | 50 | label:after { 51 | border-radius: 50%; 52 | position: absolute; 53 | content: ''; 54 | background: #fff; 55 | width: 15px; 56 | height: 15px; 57 | top: 3px; 58 | left: 3px; 59 | transition: ease-in-out 200ms; 60 | } 61 | 62 | input:checked+label { 63 | background: #3a7ca8; 64 | } 65 | 66 | input:checked+label:after { 67 | left: calc(100% - 5px); 68 | transform: translateX(-100%); 69 | } 70 | 71 | html.transition, 72 | html.transition *, 73 | html.transition *:before, 74 | html.transition *:after { 75 | transition: ease-in-out 200ms !important; 76 | transition-delay: 0 !important; 77 | } 78 | -------------------------------------------------------------------------------- /docs/_static/js/toggle.js: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', function() { 2 | 3 | function toggleCssMode(isDay) { 4 | var mode = (isDay ? "Day" : "Night"); 5 | localStorage.setItem("css-mode", mode); 6 | 7 | var daysheet = $('link[href="_static/pygments.css"]')[0].sheet; 8 | daysheet.disabled = !isDay; 9 | 10 | var nightsheet = $('link[href="_static/css/dark.css"]')[0]; 11 | if (!isDay && nightsheet === undefined) { 12 | var element = document.createElement("link"); 13 | element.setAttribute("rel", "stylesheet"); 14 | element.setAttribute("type", "text/css"); 15 | element.setAttribute("href", "_static/css/dark.css"); 16 | document.getElementsByTagName("head")[0].appendChild(element); 17 | return; 18 | } 19 | if (nightsheet !== undefined) { 20 | nightsheet.sheet.disabled = isDay; 21 | } 22 | } 23 | 24 | var initial = localStorage.getItem("css-mode") != "Night"; 25 | var checkbox = document.querySelector('input[name=mode]'); 26 | 27 | toggleCssMode(initial); 28 | checkbox.checked = initial; 29 | 30 | checkbox.addEventListener('change', function() { 31 | document.documentElement.classList.add('transition'); 32 | window.setTimeout(() => { 33 | document.documentElement.classList.remove('transition'); 34 | }, 1000) 35 | toggleCssMode(this.checked); 36 | }) 37 | 38 | }); 39 | -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | {% block footer %} {{ super() }} 3 | 4 | 7 | 8 | {% endblock %} 9 | 10 | -------------------------------------------------------------------------------- /docs/_templates/versions.html: -------------------------------------------------------------------------------- 1 | {% if READTHEDOCS or display_lower_left %} 2 | {# Add rst-badge after rst-versions for small badge style. #} 3 |
4 | 5 | Read the Docs 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | v: {{ current_version }} 14 | 15 | 16 |
17 | {% if languages|length >= 1 %} 18 |
19 |
{{ _('Languages') }}
20 | {% for slug, url in languages %} 21 | {% if slug == current_language %} {% endif %} 22 |
{{ slug }}
23 | {% if slug == current_language %}
{% endif %} 24 | {% endfor %} 25 |
26 | {% endif %} 27 | {% if versions|length >= 1 %} 28 |
29 |
{{ _('Versions') }}
30 | {% for slug, url in versions %} 31 | {% if slug == current_version %} {% endif %} 32 |
{{ slug }}
33 | {% if slug == current_version %}
{% endif %} 34 | {% endfor %} 35 |
36 | {% endif %} 37 | {% if downloads|length >= 1 %} 38 |
39 |
{{ _('Downloads') }}
40 | {% for type, url in downloads %} 41 |
{{ type }}
42 | {% endfor %} 43 |
44 | {% endif %} 45 | {% if READTHEDOCS %} 46 |
47 |
{{ _('On Read the Docs') }}
48 |
49 | {{ _('Project Home') }} 50 |
51 |
52 | {{ _('Builds') }} 53 |
54 |
55 | {% endif %} 56 |
57 | {% trans %}Free document hosting provided by GitHub Pages.{% endtrans %} 58 | 59 |
60 |
61 | {% endif %} 62 | 63 | -------------------------------------------------------------------------------- /docs/buildDocs.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -x 3 | ################################################################################ 4 | # File: buildDocs.sh 5 | # Purpose: Script that builds our documentation using sphinx and updates GitHub 6 | # Pages. This script is executed by: 7 | # .github/workflows/docs_pages_workflow.yml 8 | # 9 | # Authors: Michael Altfield 10 | # Created: 2020-07-17 11 | # Updated: 2020-07-23 12 | # Version: 0.2 13 | ################################################################################ 14 | 15 | ################### 16 | # INSTALL DEPENDS # 17 | ################### 18 | 19 | apt-get update 20 | apt-get -y install git rsync pandoc python3-sphinx python3-sphinx-rtd-theme python3-stemmer python3-git python3-pip python3-virtualenv python3-setuptools 21 | 22 | python3 -m pip install --upgrade rinohtype pygments sphinx-rtd-theme sphinx-tabs docutils==0.16 pandoc 23 | python3 -m pip list 24 | 25 | # get rid of all these safe dir warnings 26 | git config --global --add safe.directory '*' 27 | 28 | ##################### 29 | # DECLARE VARIABLES # 30 | ##################### 31 | 32 | pwd 33 | ls -lah 34 | export SOURCE_DATE_EPOCH=$(git log -1 --pretty=%ct) 35 | 36 | # make a new temp dir which will be our GitHub Pages docroot 37 | docroot=`mktemp -d` 38 | 39 | export REPO_NAME="${GITHUB_REPOSITORY##*/}" 40 | 41 | ############## 42 | # BUILD DOCS # 43 | ############## 44 | 45 | # first, cleanup any old builds' static assets 46 | make -C docs clean 47 | 48 | # get a list of branches, excluding 'HEAD' and 'gh-pages' 49 | versions="`git for-each-ref '--format=%(refname:lstrip=-1)' refs/remotes/origin/ | grep -viE '^(HEAD|gh-pages)$'`" 50 | ls 51 | for current_version in ${versions}; do 52 | 53 | # make the current language available to conf.py 54 | export current_version 55 | git checkout ${current_version} 56 | 57 | echo "INFO: Building sites for ${current_version}" 58 | 59 | cd docs && python3 mm-converter.py 60 | cd .. 61 | 62 | # skip this branch if it doesn't have our docs dir & sphinx config 63 | if [ ! -e 'docs/conf.py' ]; then 64 | echo -e "\tINFO: Couldn't find 'docs/conf.py' (skipped)" 65 | continue 66 | fi 67 | 68 | languages="en" 69 | for current_language in ${languages}; do 70 | 71 | # make the current language available to conf.py 72 | export current_language 73 | 74 | ########## 75 | # BUILDS # 76 | ########## 77 | echo "INFO: Building for ${current_language}" 78 | 79 | # HTML # 80 | sphinx-build -b html docs/ docs/_build/html/${current_language}/${current_version} -D language="${current_language}" 81 | 82 | # copy the static assets produced by the above build into our docroot 83 | rsync -av "docs/_build/html/" "${docroot}/" 84 | 85 | done 86 | 87 | done 88 | 89 | # return to master branch 90 | git checkout master 91 | 92 | ####################### 93 | # Update GitHub Pages # 94 | ####################### 95 | 96 | git config --global user.name "${GITHUB_ACTOR}" 97 | git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" 98 | 99 | pushd "${docroot}" 100 | 101 | # don't bother maintaining history; just generate fresh 102 | git init 103 | git remote add deploy "https://token:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" 104 | git checkout -b gh-pages 105 | 106 | # add .nojekyll to the root so that github won't 404 on content added to dirs 107 | # that start with an underscore (_), such as our "_content" dir.. 108 | touch .nojekyll 109 | 110 | # add redirect from the docroot to our default docs language/version 111 | cat > index.html < 113 | 114 | 115 | ${REPO_NAME} Docs 116 | 117 | 118 | 119 |

Please wait while you're redirected to our documentation.

120 | 121 | 122 | EOF 123 | 124 | # Add README 125 | cat > README.md <NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/mm-converter.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # set correct links (pandoc did not deal with github links properly) 4 | 5 | import os 6 | import re 7 | import subprocess 8 | 9 | regex_pdf_links1 = re.compile(r'`(.*)\`__', 10 | re.M | re.X) # Multiline and Verbose 11 | regex_md_links = re.compile(r'`(.*)\`__', 12 | re.M | re.X) # Multiline and Verbose 13 | regex_md_links2 = re.compile(r'`(.*)\n(.*)\`__', 14 | re.M | re.X) # Multiline and Verbose 15 | regex_pdf_links2 = re.compile(r'`(.*)\n\s+(.*)\`__', 16 | re.M | re.X) # Multiline and Verbose 17 | 18 | def remove_links_to_index2(data): 19 | # remove liks to the index, they are useless in py4web docs 20 | data = data 21 | print(re.search(regex_pdf_links2, data)) 22 | return re.sub(regex_pdf_links2, 23 | r':download:`\1 \2`', 24 | data) 25 | 26 | def remove_links_to_index(data): 27 | # remove liks to the index, they are useless in py4web docs 28 | data = data 29 | print(re.search(regex_pdf_links1, data)) 30 | return re.sub(regex_pdf_links1, 31 | r':download:`\1`', 32 | data) 33 | 34 | def rewrite_md_links(data): 35 | # remove liks to the index, they are useless in py4web docs 36 | data = data 37 | print(re.search(regex_md_links, data)) 38 | data = re.sub(regex_md_links, 39 | r'`\1<\2.html>`__', 40 | data) 41 | data = re.sub(regex_md_links2, 42 | r'`\1 \2<\3.html>`__', 43 | data) 44 | return data 45 | 46 | 47 | docs_on_pages = [ 48 | 'README.md', 49 | 'quick_reference.md', 50 | 'design_philosophy.md', 51 | 'impl_details.md', 52 | 'faq.md', 53 | 'release_testing_process.md' 54 | ] 55 | 56 | def convert2md(file): 57 | print(f" Working on file {file}") 58 | file = os.path.join('g3doc', file) 59 | data = open(file, 'r').read() 60 | write_files(file, data) 61 | 62 | def write_files(file, data): 63 | for extension in ['rst']: 64 | ext_dir = os.getcwd() 65 | md_dir = os.path.join(os.getcwd(), 'g3doc') 66 | if not os.path.isdir(ext_dir): 67 | os.mkdir(ext_dir) 68 | ext_file = os.path.join(ext_dir , os.path.splitext(os.path.basename(file))[0] + "." + extension) 69 | md_file = os.path.join(md_dir , os.path.splitext(os.path.basename(file))[0] + ".md") 70 | print(f'writing {ext_file}') 71 | if os.path.exists(ext_file): 72 | os.unlink(ext_file) 73 | with open(ext_file, 'w') as handler: 74 | write_format(extension, ext_file, handler, md_file, data) 75 | 76 | 77 | def write_format(extension, ext_file, handler, md_file, data): 78 | if extension =='md': 79 | handler.write(data) 80 | elif extension =='rst': 81 | try: 82 | subprocess.call(['pandoc', '-s', md_file, '-f', 'markdown', '-t', 'rst', '-o', ext_file]) 83 | data = open(ext_file, 'r').read() 84 | data = remove_links_to_index(data) 85 | data = remove_links_to_index2(data) 86 | data = rewrite_md_links(data) 87 | handler.write(data) 88 | # Open a file for writing 89 | # with open('tmp.txt', 'w') as f: 90 | # Call the subprocess and redirect the output to the file 91 | # subprocess.call(['awk', '{ gsub(/ 3 | // SPDX-License-Identifier: Apache-2.0 4 | // SPDX-License-Identifier: BSD-3-Clause 5 | 6 | #include "hwy/abort.h" 7 | 8 | #include 9 | #include 10 | #include 11 | 12 | #include 13 | #include 14 | 15 | #include "hwy/base.h" 16 | 17 | #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN 18 | #include "sanitizer/common_interface_defs.h" // __sanitizer_print_stack_trace 19 | #endif 20 | 21 | namespace hwy { 22 | 23 | namespace { 24 | 25 | std::atomic& AtomicWarnFunc() { 26 | static std::atomic func; 27 | return func; 28 | } 29 | 30 | std::atomic& AtomicAbortFunc() { 31 | static std::atomic func; 32 | return func; 33 | } 34 | 35 | std::string GetBaseName(std::string const& file_name) { 36 | auto last_slash = file_name.find_last_of("/\\"); 37 | return file_name.substr(last_slash + 1); 38 | } 39 | 40 | } // namespace 41 | 42 | // Returning a reference is unfortunately incompatible with `std::atomic`, which 43 | // is required to safely implement `SetWarnFunc`. As a workaround, we store a 44 | // copy here, update it when called, and return a reference to the copy. This 45 | // has the added benefit of protecting the actual pointer from modification. 46 | HWY_DLLEXPORT WarnFunc& GetWarnFunc() { 47 | static WarnFunc func; 48 | func = AtomicWarnFunc().load(); 49 | return func; 50 | } 51 | 52 | HWY_DLLEXPORT AbortFunc& GetAbortFunc() { 53 | static AbortFunc func; 54 | func = AtomicAbortFunc().load(); 55 | return func; 56 | } 57 | 58 | HWY_DLLEXPORT WarnFunc SetWarnFunc(WarnFunc func) { 59 | return AtomicWarnFunc().exchange(func); 60 | } 61 | 62 | HWY_DLLEXPORT AbortFunc SetAbortFunc(AbortFunc func) { 63 | return AtomicAbortFunc().exchange(func); 64 | } 65 | 66 | HWY_DLLEXPORT void HWY_FORMAT(3, 4) 67 | Warn(const char* file, int line, const char* format, ...) { 68 | char buf[800]; 69 | va_list args; 70 | va_start(args, format); 71 | vsnprintf(buf, sizeof(buf), format, args); 72 | va_end(args); 73 | 74 | WarnFunc handler = AtomicWarnFunc().load(); 75 | if (handler != nullptr) { 76 | handler(file, line, buf); 77 | } else { 78 | fprintf(stderr, "Warn at %s:%d: %s\n", GetBaseName(file).data(), line, buf); 79 | } 80 | } 81 | 82 | HWY_DLLEXPORT HWY_NORETURN void HWY_FORMAT(3, 4) 83 | Abort(const char* file, int line, const char* format, ...) { 84 | char buf[800]; 85 | va_list args; 86 | va_start(args, format); 87 | vsnprintf(buf, sizeof(buf), format, args); 88 | va_end(args); 89 | 90 | AbortFunc handler = AtomicAbortFunc().load(); 91 | if (handler != nullptr) { 92 | handler(file, line, buf); 93 | } else { 94 | fprintf(stderr, "Abort at %s:%d: %s\n", GetBaseName(file).data(), line, 95 | buf); 96 | } 97 | 98 | // If compiled with any sanitizer, they can also print a stack trace. 99 | #if HWY_IS_ASAN || HWY_IS_MSAN || HWY_IS_TSAN 100 | __sanitizer_print_stack_trace(); 101 | #endif // HWY_IS_* 102 | fflush(stderr); 103 | 104 | // Now terminate the program: 105 | #if HWY_ARCH_RISCV 106 | exit(1); // trap/abort just freeze Spike. 107 | #elif HWY_IS_DEBUG_BUILD && !HWY_COMPILER_MSVC && !HWY_ARCH_ARM 108 | // Facilitates breaking into a debugger, but don't use this in non-debug 109 | // builds because it looks like "illegal instruction", which is misleading. 110 | // Also does not work on Arm. 111 | __builtin_trap(); 112 | #else 113 | abort(); // Compile error without this due to HWY_NORETURN. 114 | #endif 115 | } 116 | 117 | } // namespace hwy 118 | -------------------------------------------------------------------------------- /hwy/abort.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Arm Limited and/or its affiliates 2 | // SPDX-License-Identifier: Apache-2.0 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #ifndef HIGHWAY_HWY_ABORT_H_ 6 | #define HIGHWAY_HWY_ABORT_H_ 7 | 8 | // Empty header for compatibility. 9 | // All Abort/Warn functionalities are in base.h. 10 | 11 | #endif // HIGHWAY_HWY_ABORT_H_ 12 | -------------------------------------------------------------------------------- /hwy/abort_header_only_test.cc: -------------------------------------------------------------------------------- 1 | 2 | #define HWY_HEADER_ONLY 3 | 4 | #include 5 | #include "hwy/base.h" 6 | #include "hwy/tests/hwy_gtest.h" 7 | #include "hwy/tests/test_util-inl.h" // HWY_ASSERT_EQ 8 | 9 | namespace hwy { 10 | namespace { 11 | 12 | #ifdef GTEST_HAS_DEATH_TEST 13 | 14 | TEST(AbortDeathTest, AbortDefault) { 15 | std::string expected = std::string("Abort at ") + __FILE__ + ":" + 16 | std::to_string(__LINE__ + 1) + ": Test Abort"; 17 | ASSERT_DEATH(HWY_ABORT("Test %s", "Abort"), expected); 18 | } 19 | #endif // GTEST_HAS_DEATH_TEST 20 | 21 | } // namespace 22 | } // namespace hwy 23 | -------------------------------------------------------------------------------- /hwy/abort_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Arm Limited and/or its affiliates 2 | // SPDX-License-Identifier: Apache-2.0 3 | // SPDX-License-Identifier: BSD-3-Clause 4 | 5 | #include "hwy/abort.h" 6 | 7 | #include 8 | 9 | #include 10 | 11 | #include "hwy/base.h" 12 | #include "hwy/tests/hwy_gtest.h" 13 | #include "hwy/tests/test_util-inl.h" // HWY_ASSERT_EQ 14 | 15 | namespace hwy { 16 | namespace { 17 | 18 | TEST(AbortTest, WarnOverrideChain) { 19 | WarnFunc FirstHandler = [](const char* file, int line, 20 | const char* formatted_err) -> void { 21 | fprintf(stderr, "%s from %d of %s", formatted_err, line, file); 22 | }; 23 | WarnFunc SecondHandler = [](const char* file, int line, 24 | const char* formatted_err) -> void { 25 | fprintf(stderr, "%s from %d of %s", formatted_err, line, file); 26 | }; 27 | 28 | // Do not check that the first SetWarnFunc returns nullptr, because it is 29 | // not guaranteed to be the first call - other TEST may come first. 30 | (void)SetWarnFunc(FirstHandler); 31 | HWY_ASSERT(GetWarnFunc() == FirstHandler); 32 | HWY_ASSERT(SetWarnFunc(SecondHandler) == FirstHandler); 33 | HWY_ASSERT(GetWarnFunc() == SecondHandler); 34 | HWY_ASSERT(SetWarnFunc(nullptr) == SecondHandler); 35 | HWY_ASSERT(GetWarnFunc() == nullptr); 36 | } 37 | 38 | #ifdef GTEST_HAS_DEATH_TEST 39 | 40 | std::string GetBaseName(std::string const& file_name) { 41 | auto last_slash = file_name.find_last_of("/\\"); 42 | return file_name.substr(last_slash + 1); 43 | } 44 | 45 | TEST(AbortDeathTest, AbortDefault) { 46 | std::string expected = std::string("Abort at ") + GetBaseName(__FILE__) + 47 | ":" + std::to_string(__LINE__ + 1) + ": Test Abort"; 48 | ASSERT_DEATH(HWY_ABORT("Test %s", "Abort"), expected); 49 | } 50 | 51 | TEST(AbortDeathTest, AbortOverride) { 52 | const AbortFunc CustomAbortHandler = [](const char* file, int line, 53 | const char* formatted_err) -> void { 54 | fprintf(stderr, "%s from %02d of %s", formatted_err, line, 55 | GetBaseName(file).data()); 56 | }; 57 | 58 | SetAbortFunc(CustomAbortHandler); 59 | 60 | // googletest regex does not support `+` for digits on Windows?! 61 | // https://google.github.io/googletest/advanced.html#regular-expression-syntax 62 | // Hence we insert the expected line number manually. 63 | char buf[100]; 64 | const std::string file = GetBaseName(__FILE__); 65 | const int line = __LINE__ + 2; // from which HWY_ABORT is called 66 | snprintf(buf, sizeof(buf), "Test Abort from %02d of %s", line, file.c_str()); 67 | ASSERT_DEATH({ HWY_ABORT("Test %s", "Abort"); }, buf); 68 | } 69 | #endif // GTEST_HAS_DEATH_TEST 70 | 71 | TEST(AbortTest, AbortOverrideChain) { 72 | AbortFunc FirstHandler = [](const char* file, int line, 73 | const char* formatted_err) -> void { 74 | fprintf(stderr, "%s from %d of %s", formatted_err, line, file); 75 | }; 76 | AbortFunc SecondHandler = [](const char* file, int line, 77 | const char* formatted_err) -> void { 78 | fprintf(stderr, "%s from %d of %s", formatted_err, line, file); 79 | }; 80 | 81 | // Do not check that the first SetAbortFunc returns nullptr, because it is 82 | // not guaranteed to be the first call - other TEST may come first. 83 | (void)SetAbortFunc(FirstHandler); 84 | HWY_ASSERT(GetAbortFunc() == FirstHandler); 85 | HWY_ASSERT(SetAbortFunc(SecondHandler) == FirstHandler); 86 | HWY_ASSERT(GetAbortFunc() == SecondHandler); 87 | HWY_ASSERT(SetAbortFunc(nullptr) == SecondHandler); 88 | HWY_ASSERT(GetAbortFunc() == nullptr); 89 | } 90 | 91 | } // namespace 92 | } // namespace hwy 93 | 94 | HWY_TEST_MAIN(); 95 | -------------------------------------------------------------------------------- /hwy/auto_tune_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/auto_tune.h" 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | #include 23 | #include 24 | 25 | #include "hwy/base.h" // HWY_ASSERT 26 | #include "hwy/nanobenchmark.h" // Unpredictable1 27 | #include "hwy/tests/hwy_gtest.h" 28 | #include "hwy/tests/test_util-inl.h" 29 | 30 | namespace hwy { 31 | namespace { 32 | 33 | // Returns random floating-point number in [-8, 8). 34 | static double Random(RandomState& rng) { 35 | const int32_t bits = static_cast(Random32(&rng)) & 1023; 36 | return (bits - 512) / 64.0; 37 | } 38 | 39 | TEST(AutoTuneTest, TestCostDistribution) { 40 | // All equal exercises the MAD=0 trimming case. 41 | const size_t kMaxValues = CostDistribution::kMaxValues; 42 | for (size_t num : {size_t{3}, kMaxValues - 1, kMaxValues, kMaxValues + 1}) { 43 | const double kVal = 6.5; 44 | for (double outlier : {0.0, 1000.0}) { 45 | CostDistribution cd; 46 | const size_t num_outliers = HWY_MAX(num / 4, size_t{1}); 47 | for (size_t i = 0; i < num - num_outliers; ++i) cd.Notify(kVal); 48 | for (size_t i = 0; i < num_outliers; ++i) cd.Notify(outlier); 49 | const double cost = cd.EstimateCost(); 50 | // Winsorization allows outliers to shift the central tendency a bit. 51 | HWY_ASSERT(cost >= kVal - 0.25); 52 | HWY_ASSERT(cost <= kVal + 0.25); 53 | } 54 | } 55 | 56 | // Gaussian distribution with additive+multiplicative noise. 57 | RandomState rng; 58 | for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { 59 | CostDistribution cd; 60 | const size_t num = 1000; // enough for stable variance 61 | for (size_t i = 0; i < num; ++i) { 62 | // Central limit theorem: sum of independent random is Gaussian. 63 | double sum = 500.0; 64 | for (size_t sum_idx = 0; sum_idx < 100; ++sum_idx) sum += Random(rng); 65 | 66 | // 16% noise: mostly additive, some lucky shots. 67 | const uint32_t r = Random32(&rng); 68 | if (r < (1u << 28)) { 69 | static constexpr double kPowers[4] = {0.0, 1E3, 1E4, 1E5}; 70 | static constexpr double kMul[4] = {0.50, 0.75, 0.85, 0.90}; 71 | if (r & 3) { // 75% chance of large additive noise 72 | sum += kPowers[r & 3]; 73 | } else { // 25% chance of small multiplicative reduction 74 | sum *= kMul[(r >> 2) & 3]; 75 | } 76 | } 77 | cd.Notify(sum); 78 | } 79 | const double cost = cd.EstimateCost(); 80 | if (!(490.0 <= cost && cost <= 540.0)) { 81 | HWY_ABORT("Cost %f outside expected range.", cost); 82 | } 83 | } 84 | } 85 | 86 | TEST(AutoTuneTest, TestNextEdges) { 87 | NextWithSkip list(123); 88 | HWY_ASSERT_EQ(0, list.Next(122)); // Check wrap-around 89 | HWY_ASSERT_EQ(1, list.Next(0)); 90 | list.Skip(1); 91 | HWY_ASSERT_EQ(2, list.Next(0)); 92 | list.Skip(2); 93 | HWY_ASSERT_EQ(3, list.Next(0)); 94 | 95 | // Skip last 96 | list.Skip(122); 97 | HWY_ASSERT_EQ(0, list.Next(121)); 98 | 99 | // Skip first 100 | list.Skip(0); 101 | HWY_ASSERT_EQ(3, list.Next(121)); 102 | } 103 | 104 | TEST(AutoTuneTest, TestNextSkipAllButOne) { 105 | // Prime, pow2 +/- 1 106 | for (size_t num : {size_t{37}, size_t{63}, size_t{513}}) { 107 | NextWithSkip list(num); 108 | std::vector pos; 109 | pos.reserve(num); 110 | for (size_t i = 0; i < num; ++i) { 111 | pos.push_back(static_cast(i)); 112 | } 113 | std::mt19937 rng(static_cast(129 * Unpredictable1())); 114 | std::shuffle(pos.begin(), pos.end(), rng); 115 | for (size_t i = 0; i < num - 1; ++i) { 116 | list.Skip(pos[i]); 117 | } 118 | HWY_ASSERT_EQ(pos.back(), list.Next(pos.back())); // only one left 119 | } 120 | } 121 | 122 | } // namespace 123 | } // namespace hwy 124 | 125 | HWY_TEST_MAIN(); 126 | -------------------------------------------------------------------------------- /hwy/bit_set.h: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef HIGHWAY_HWY_BIT_SET_H_ 17 | #define HIGHWAY_HWY_BIT_SET_H_ 18 | 19 | // BitSet with fast Foreach for up to 64 and 4096 members. 20 | 21 | #include 22 | 23 | #include "hwy/base.h" 24 | 25 | namespace hwy { 26 | 27 | // 64-bit specialization of std::bitset, which lacks Foreach. 28 | class BitSet64 { 29 | public: 30 | // No harm if `i` is already set. 31 | void Set(size_t i) { 32 | HWY_DASSERT(i < 64); 33 | bits_ |= (1ULL << i); 34 | HWY_DASSERT(Get(i)); 35 | } 36 | 37 | // Equivalent to Set(i) for i in [0, 64) where (bits >> i) & 1. This does 38 | // not clear any existing bits. 39 | void SetNonzeroBitsFrom64(uint64_t bits) { bits_ |= bits; } 40 | 41 | void Clear(size_t i) { 42 | HWY_DASSERT(i < 64); 43 | bits_ &= ~(1ULL << i); 44 | } 45 | 46 | bool Get(size_t i) const { 47 | HWY_DASSERT(i < 64); 48 | return (bits_ & (1ULL << i)) != 0; 49 | } 50 | 51 | // Returns true if any Get(i) would return true for i in [0, 64). 52 | bool Any() const { return bits_ != 0; } 53 | 54 | // Returns lowest i such that Get(i). Caller must ensure Any() beforehand! 55 | size_t First() const { 56 | HWY_DASSERT(Any()); 57 | return Num0BitsBelowLS1Bit_Nonzero64(bits_); 58 | } 59 | 60 | // Returns uint64_t(Get(i)) << i for i in [0, 64). 61 | uint64_t Get64() const { return bits_; } 62 | 63 | // Calls `func(i)` for each `i` in the set. It is safe for `func` to modify 64 | // the set, but the current Foreach call is unaffected. 65 | template 66 | void Foreach(const Func& func) const { 67 | uint64_t remaining_bits = bits_; 68 | while (remaining_bits != 0) { 69 | const size_t i = Num0BitsBelowLS1Bit_Nonzero64(remaining_bits); 70 | remaining_bits &= remaining_bits - 1; // clear LSB 71 | func(i); 72 | } 73 | } 74 | 75 | size_t Count() const { return PopCount(bits_); } 76 | 77 | private: 78 | uint64_t bits_ = 0; 79 | }; 80 | 81 | // Two-level bitset for up to kMaxSize <= 4096 values. 82 | template 83 | class BitSet4096 { 84 | public: 85 | // No harm if `i` is already set. 86 | void Set(size_t i) { 87 | HWY_DASSERT(i < kMaxSize); 88 | const size_t idx = i / 64; 89 | const size_t mod = i % 64; 90 | bits_[idx].Set(mod); 91 | nonzero_.Set(idx); 92 | HWY_DASSERT(Get(i)); 93 | } 94 | 95 | // Equivalent to Set(i) for i in [0, 64) where (bits >> i) & 1. This does 96 | // not clear any existing bits. 97 | void SetNonzeroBitsFrom64(uint64_t bits) { 98 | bits_[0].SetNonzeroBitsFrom64(bits); 99 | if (bits) nonzero_.Set(0); 100 | } 101 | 102 | void Clear(size_t i) { 103 | HWY_DASSERT(i < kMaxSize); 104 | const size_t idx = i / 64; 105 | const size_t mod = i % 64; 106 | bits_[idx].Clear(mod); 107 | if (!bits_[idx].Any()) { 108 | nonzero_.Clear(idx); 109 | } 110 | HWY_DASSERT(!Get(i)); 111 | } 112 | 113 | bool Get(size_t i) const { 114 | HWY_DASSERT(i < kMaxSize); 115 | const size_t idx = i / 64; 116 | const size_t mod = i % 64; 117 | return bits_[idx].Get(mod); 118 | } 119 | 120 | // Returns true if any Get(i) would return true for i in [0, 64). 121 | bool Any() const { return nonzero_.Any(); } 122 | 123 | // Returns lowest i such that Get(i). Caller must ensure Any() beforehand! 124 | size_t First() const { 125 | HWY_DASSERT(Any()); 126 | const size_t idx = nonzero_.First(); 127 | return idx * 64 + bits_[idx].First(); 128 | } 129 | 130 | // Returns uint64_t(Get(i)) << i for i in [0, 64). 131 | uint64_t Get64() const { return bits_[0].Get64(); } 132 | 133 | // Calls `func(i)` for each `i` in the set. It is safe for `func` to modify 134 | // the set, but the current Foreach call is only affected if changing one of 135 | // the not yet visited BitSet64 for which Any() is true. 136 | template 137 | void Foreach(const Func& func) const { 138 | nonzero_.Foreach([&func, this](size_t idx) { 139 | bits_[idx].Foreach([idx, &func](size_t mod) { func(idx * 64 + mod); }); 140 | }); 141 | } 142 | 143 | size_t Count() const { 144 | size_t total = 0; 145 | nonzero_.Foreach( 146 | [&total, this](size_t idx) { total += bits_[idx].Count(); }); 147 | return total; 148 | } 149 | 150 | private: 151 | static_assert(kMaxSize <= 64 * 64, "One BitSet64 insufficient"); 152 | BitSet64 nonzero_; 153 | BitSet64 bits_[kMaxSize / 64]; 154 | }; 155 | 156 | } // namespace hwy 157 | 158 | #endif // HIGHWAY_HWY_BIT_SET_H_ 159 | -------------------------------------------------------------------------------- /hwy/cache_control.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef HIGHWAY_HWY_CACHE_CONTROL_H_ 17 | #define HIGHWAY_HWY_CACHE_CONTROL_H_ 18 | 19 | #include "hwy/base.h" 20 | 21 | // Requires SSE2; fails to compile on 32-bit Clang 7 (see 22 | // https://github.com/gperftools/gperftools/issues/946). 23 | #if !defined(__SSE2__) || (HWY_COMPILER_CLANG && HWY_ARCH_X86_32) 24 | #undef HWY_DISABLE_CACHE_CONTROL 25 | #define HWY_DISABLE_CACHE_CONTROL 26 | #endif 27 | 28 | #ifndef HWY_DISABLE_CACHE_CONTROL 29 | // intrin.h is sufficient on MSVC and already included by base.h. 30 | #if HWY_ARCH_X86 && !HWY_COMPILER_MSVC 31 | #include // SSE2 32 | #include // _mm_prefetch 33 | #elif HWY_ARCH_ARM_A64 34 | #include 35 | #endif 36 | #endif // HWY_DISABLE_CACHE_CONTROL 37 | 38 | namespace hwy { 39 | 40 | // Even if N*sizeof(T) is smaller, Stream may write a multiple of this size. 41 | #define HWY_STREAM_MULTIPLE 16 42 | 43 | // The following functions may also require an attribute. 44 | #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) && !HWY_COMPILER_MSVC 45 | #define HWY_ATTR_CACHE __attribute__((target("sse2"))) 46 | #else 47 | #define HWY_ATTR_CACHE 48 | #endif 49 | 50 | // Windows.h #defines this, which causes infinite recursion. Temporarily 51 | // undefine to avoid conflict with our function. 52 | // TODO(janwas): remove when this function is removed. 53 | #pragma push_macro("LoadFence") 54 | #undef LoadFence 55 | 56 | // Delays subsequent loads until prior loads are visible. Beware of potentially 57 | // differing behavior across architectures and vendors: on Intel but not 58 | // AMD CPUs, also serves as a full fence (waits for all prior instructions to 59 | // complete). 60 | HWY_INLINE HWY_ATTR_CACHE void LoadFence() { 61 | #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) 62 | _mm_lfence(); 63 | #endif 64 | } 65 | 66 | // TODO(janwas): remove when this function is removed. (See above.) 67 | #pragma pop_macro("LoadFence") 68 | 69 | // Ensures values written by previous `Stream` calls are visible on the current 70 | // core. This is NOT sufficient for synchronizing across cores; when `Stream` 71 | // outputs are to be consumed by other core(s), the producer must publish 72 | // availability (e.g. via mutex or atomic_flag) after `FlushStream`. 73 | HWY_INLINE HWY_ATTR_CACHE void FlushStream() { 74 | #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) 75 | _mm_sfence(); 76 | #endif 77 | } 78 | 79 | // Optionally begins loading the cache line containing "p" to reduce latency of 80 | // subsequent actual loads. 81 | template 82 | HWY_INLINE HWY_ATTR_CACHE void Prefetch(const T* p) { 83 | (void)p; 84 | #ifndef HWY_DISABLE_CACHE_CONTROL 85 | #if HWY_ARCH_X86 86 | _mm_prefetch(reinterpret_cast(p), _MM_HINT_T0); 87 | #elif HWY_COMPILER_GCC // includes clang 88 | // Hint=0 (NTA) behavior differs, but skipping outer caches is probably not 89 | // desirable, so use the default 3 (keep in caches). 90 | __builtin_prefetch(p, /*write=*/0, /*hint=*/3); 91 | #endif 92 | #endif // HWY_DISABLE_CACHE_CONTROL 93 | } 94 | 95 | // Invalidates and flushes the cache line containing "p", if possible. 96 | HWY_INLINE HWY_ATTR_CACHE void FlushCacheline(const void* p) { 97 | #if HWY_ARCH_X86 && !defined(HWY_DISABLE_CACHE_CONTROL) 98 | _mm_clflush(p); 99 | #else 100 | (void)p; 101 | #endif 102 | } 103 | 104 | // Hints that we are inside a spin loop and potentially reduces power 105 | // consumption and coherency traffic. For example, x86 avoids multiple 106 | // outstanding load requests, which reduces the memory order violation penalty 107 | // when exiting the loop. 108 | HWY_INLINE HWY_ATTR_CACHE void Pause() { 109 | #ifndef HWY_DISABLE_CACHE_CONTROL 110 | #if HWY_ARCH_X86 111 | _mm_pause(); 112 | #elif HWY_ARCH_ARM_A64 && HWY_COMPILER_CLANG 113 | // This is documented in ACLE and the YIELD instruction is also available in 114 | // Armv7, but the intrinsic is broken for Armv7 clang, hence A64 only. 115 | __yield(); 116 | #elif HWY_ARCH_ARM && HWY_COMPILER_GCC // includes clang 117 | __asm__ volatile("yield" ::: "memory"); 118 | #elif HWY_ARCH_PPC && HWY_COMPILER_GCC // includes clang 119 | __asm__ volatile("or 27,27,27" ::: "memory"); 120 | #endif 121 | #endif // HWY_DISABLE_CACHE_CONTROL 122 | } 123 | 124 | } // namespace hwy 125 | 126 | #endif // HIGHWAY_HWY_CACHE_CONTROL_H_ 127 | -------------------------------------------------------------------------------- /hwy/contrib/algo/find-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Per-target include guard 17 | #if defined(HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_) == \ 18 | defined(HWY_TARGET_TOGGLE) // NOLINT 19 | #ifdef HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_ 20 | #undef HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_ 21 | #else 22 | #define HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_ 23 | #endif 24 | 25 | #include "hwy/highway.h" 26 | 27 | HWY_BEFORE_NAMESPACE(); 28 | namespace hwy { 29 | namespace HWY_NAMESPACE { 30 | 31 | // Returns index of the first element equal to `value` in `in[0, count)`, or 32 | // `count` if not found. 33 | template > 34 | size_t Find(D d, T value, const T* HWY_RESTRICT in, size_t count) { 35 | const size_t N = Lanes(d); 36 | const Vec broadcasted = Set(d, value); 37 | 38 | size_t i = 0; 39 | if (count >= N) { 40 | for (; i <= count - N; i += N) { 41 | const intptr_t pos = FindFirstTrue(d, Eq(broadcasted, LoadU(d, in + i))); 42 | if (pos >= 0) return i + static_cast(pos); 43 | } 44 | } 45 | 46 | if (i != count) { 47 | #if HWY_MEM_OPS_MIGHT_FAULT 48 | // Scan single elements. 49 | const CappedTag d1; 50 | using V1 = Vec; 51 | const V1 broadcasted1 = Set(d1, GetLane(broadcasted)); 52 | for (; i < count; ++i) { 53 | if (AllTrue(d1, Eq(broadcasted1, LoadU(d1, in + i)))) { 54 | return i; 55 | } 56 | } 57 | #else 58 | const size_t remaining = count - i; 59 | HWY_DASSERT(0 != remaining && remaining < N); 60 | const Mask mask = FirstN(d, remaining); 61 | const Vec v = MaskedLoad(mask, d, in + i); 62 | // Apply mask so that we don't 'find' the zero-padding from MaskedLoad. 63 | const intptr_t pos = FindFirstTrue(d, And(Eq(broadcasted, v), mask)); 64 | if (pos >= 0) return i + static_cast(pos); 65 | #endif // HWY_MEM_OPS_MIGHT_FAULT 66 | } 67 | 68 | return count; // not found 69 | } 70 | 71 | // Returns index of the first element in `in[0, count)` for which `func(d, vec)` 72 | // returns true, otherwise `count`. 73 | template > 74 | size_t FindIf(D d, const T* HWY_RESTRICT in, size_t count, const Func& func) { 75 | const size_t N = Lanes(d); 76 | 77 | size_t i = 0; 78 | if (count >= N) { 79 | for (; i <= count - N; i += N) { 80 | const intptr_t pos = FindFirstTrue(d, func(d, LoadU(d, in + i))); 81 | if (pos >= 0) return i + static_cast(pos); 82 | } 83 | } 84 | 85 | if (i != count) { 86 | #if HWY_MEM_OPS_MIGHT_FAULT 87 | // Scan single elements. 88 | const CappedTag d1; 89 | for (; i < count; ++i) { 90 | if (AllTrue(d1, func(d1, LoadU(d1, in + i)))) { 91 | return i; 92 | } 93 | } 94 | #else 95 | const size_t remaining = count - i; 96 | HWY_DASSERT(0 != remaining && remaining < N); 97 | const Mask mask = FirstN(d, remaining); 98 | const Vec v = MaskedLoad(mask, d, in + i); 99 | // Apply mask so that we don't 'find' the zero-padding from MaskedLoad. 100 | const intptr_t pos = FindFirstTrue(d, And(func(d, v), mask)); 101 | if (pos >= 0) return i + static_cast(pos); 102 | #endif // HWY_MEM_OPS_MIGHT_FAULT 103 | } 104 | 105 | return count; // not found 106 | } 107 | 108 | // NOLINTNEXTLINE(google-readability-namespace-comments) 109 | } // namespace HWY_NAMESPACE 110 | } // namespace hwy 111 | HWY_AFTER_NAMESPACE(); 112 | 113 | #endif // HIGHWAY_HWY_CONTRIB_ALGO_FIND_INL_H_ 114 | -------------------------------------------------------------------------------- /hwy/contrib/image/image.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/image/image.h" 17 | 18 | #include 19 | #include 20 | 21 | #include // std::swap 22 | 23 | #include "hwy/aligned_allocator.h" 24 | #include "hwy/base.h" 25 | #include "hwy/per_target.h" 26 | 27 | namespace hwy { 28 | 29 | size_t ImageBase::VectorSize() { 30 | // Do not cache result - must return the current value, which may be greater 31 | // than the first call if it was subject to DisableTargets! 32 | return VectorBytes(); 33 | } 34 | 35 | size_t ImageBase::BytesPerRow(const size_t xsize, const size_t sizeof_t) { 36 | const size_t vec_size = VectorSize(); 37 | size_t valid_bytes = xsize * sizeof_t; 38 | 39 | // Allow unaligned accesses starting at the last valid value - this may raise 40 | // msan errors unless the user calls InitializePaddingForUnalignedAccesses. 41 | // Skip for the scalar case because no extra lanes will be loaded. 42 | if (vec_size != 1) { 43 | HWY_DASSERT(vec_size >= sizeof_t); 44 | valid_bytes += vec_size - sizeof_t; 45 | } 46 | 47 | // Round up to vector and cache line size. 48 | const size_t align = HWY_MAX(vec_size, HWY_ALIGNMENT); 49 | size_t bytes_per_row = RoundUpTo(valid_bytes, align); 50 | 51 | // During the lengthy window before writes are committed to memory, CPUs 52 | // guard against read after write hazards by checking the address, but 53 | // only the lower 11 bits. We avoid a false dependency between writes to 54 | // consecutive rows by ensuring their sizes are not multiples of 2 KiB. 55 | // Avoid2K prevents the same problem for the planes of an Image3. 56 | if (bytes_per_row % HWY_ALIGNMENT == 0) { 57 | bytes_per_row += align; 58 | } 59 | 60 | HWY_DASSERT(bytes_per_row % align == 0); 61 | return bytes_per_row; 62 | } 63 | 64 | ImageBase::ImageBase(const size_t xsize, const size_t ysize, 65 | const size_t sizeof_t) 66 | : xsize_(static_cast(xsize)), 67 | ysize_(static_cast(ysize)), 68 | bytes_(nullptr, AlignedFreer(&AlignedFreer::DoNothing, nullptr)) { 69 | HWY_ASSERT(sizeof_t == 1 || sizeof_t == 2 || sizeof_t == 4 || sizeof_t == 8); 70 | 71 | bytes_per_row_ = 0; 72 | // Dimensions can be zero, e.g. for lazily-allocated images. Only allocate 73 | // if nonzero, because "zero" bytes still have padding/bookkeeping overhead. 74 | if (xsize != 0 && ysize != 0) { 75 | bytes_per_row_ = BytesPerRow(xsize, sizeof_t); 76 | bytes_ = AllocateAligned(bytes_per_row_ * ysize); 77 | HWY_ASSERT(bytes_.get() != nullptr); 78 | InitializePadding(sizeof_t, Padding::kRoundUp); 79 | } 80 | } 81 | 82 | ImageBase::ImageBase(const size_t xsize, const size_t ysize, 83 | const size_t bytes_per_row, void* const aligned) 84 | : xsize_(static_cast(xsize)), 85 | ysize_(static_cast(ysize)), 86 | bytes_per_row_(bytes_per_row), 87 | bytes_(static_cast(aligned), 88 | AlignedFreer(&AlignedFreer::DoNothing, nullptr)) { 89 | const size_t vec_size = VectorSize(); 90 | HWY_ASSERT(bytes_per_row % vec_size == 0); 91 | HWY_ASSERT(reinterpret_cast(aligned) % vec_size == 0); 92 | } 93 | 94 | void ImageBase::InitializePadding(const size_t sizeof_t, Padding padding) { 95 | #if HWY_IS_MSAN || HWY_IDE 96 | if (xsize_ == 0 || ysize_ == 0) return; 97 | 98 | const size_t vec_size = VectorSize(); // Bytes, independent of sizeof_t! 99 | if (vec_size == 1) return; // Scalar mode: no padding needed 100 | 101 | const size_t valid_size = xsize_ * sizeof_t; 102 | const size_t initialize_size = padding == Padding::kRoundUp 103 | ? RoundUpTo(valid_size, vec_size) 104 | : valid_size + vec_size - sizeof_t; 105 | if (valid_size == initialize_size) return; 106 | 107 | for (size_t y = 0; y < ysize_; ++y) { 108 | uint8_t* HWY_RESTRICT row = static_cast(VoidRow(y)); 109 | #if defined(__clang__) && (__clang_major__ <= 6) 110 | // There's a bug in msan in clang-6 when handling AVX2 operations. This 111 | // workaround allows tests to pass on msan, although it is slower and 112 | // prevents msan warnings from uninitialized images. 113 | memset(row, 0, initialize_size); 114 | #else 115 | hwy::ZeroBytes(row + valid_size, initialize_size - valid_size); 116 | #endif // clang6 117 | } 118 | #else 119 | (void)sizeof_t; 120 | (void)padding; 121 | #endif // HWY_IS_MSAN 122 | } 123 | 124 | void ImageBase::Swap(ImageBase& other) { 125 | std::swap(xsize_, other.xsize_); 126 | std::swap(ysize_, other.ysize_); 127 | std::swap(bytes_per_row_, other.bytes_per_row_); 128 | std::swap(bytes_, other.bytes_); 129 | } 130 | 131 | } // namespace hwy 132 | -------------------------------------------------------------------------------- /hwy/contrib/sort/order.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Tag arguments that determine the sort order. Used by both vqsort.h and the 17 | // VQSortStatic in vqsort-inl.h. Moved to a separate header so that the latter 18 | // can be used without pulling in the dllimport statements in vqsort.h. 19 | 20 | #ifndef HIGHWAY_HWY_CONTRIB_SORT_ORDER_H_ 21 | #define HIGHWAY_HWY_CONTRIB_SORT_ORDER_H_ 22 | 23 | namespace hwy { 24 | 25 | struct SortAscending { 26 | static constexpr bool IsAscending() { return true; } 27 | }; 28 | struct SortDescending { 29 | static constexpr bool IsAscending() { return false; } 30 | }; 31 | 32 | } // namespace hwy 33 | 34 | #endif // HIGHWAY_HWY_CONTRIB_SORT_ORDER_H_ 35 | -------------------------------------------------------------------------------- /hwy/contrib/sort/print_network.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | 18 | #include 19 | 20 | #include "hwy/base.h" 21 | 22 | // Based on A.7 in "Entwurf und Implementierung vektorisierter 23 | // Sortieralgorithmen" and code by Mark Blacher. 24 | static void PrintMergeNetwork(int rows, int cols) { 25 | printf("\n%d x %d:\n", rows, cols); 26 | // Powers of two 27 | HWY_ASSERT(rows != 0 && (rows & (rows - 1)) == 0); 28 | HWY_ASSERT(cols != 0 && (cols & (cols - 1)) == 0); 29 | HWY_ASSERT(rows >= 4); 30 | HWY_ASSERT(cols >= 2); // otherwise no cross-column merging required 31 | HWY_ASSERT(cols <= 16); // SortTraits lacks Reverse32 32 | 33 | // Log(rows) times: sort half of the vectors with reversed groups of the 34 | // other half. Group size halves until we are sorting adjacent vectors. 35 | int group_size = rows; 36 | int num_groups = 1; 37 | for (; group_size >= 2; group_size /= 2, num_groups *= 2) { 38 | // All vectors except those being reversed. Allows us to group the 39 | // ReverseKeys and Sort2 operations, which is easier to read and may help 40 | // in-order machines with high-latency ReverseKeys. 41 | std::vector all_vi; 42 | for (int group = 0; group < num_groups; ++group) { 43 | for (int i = 0; i < group_size / 2; ++i) { 44 | all_vi.push_back(group * group_size + i); 45 | } 46 | } 47 | for (int vi : all_vi) { 48 | const int vr = vi ^ (group_size - 1); 49 | printf("v%x = st.ReverseKeys%d(d, v%x);\n", vr, cols, vr); 50 | } 51 | for (int vi : all_vi) { 52 | const int vr = vi ^ (group_size - 1); 53 | printf("st.Sort2(d, v%x, v%x);\n", vi, vr); 54 | } 55 | printf("\n"); 56 | } 57 | 58 | // Now merge across columns in all vectors. 59 | if (cols > 2) { 60 | for (int i = 0; i < rows; ++i) { 61 | printf("v%x = st.SortPairsReverse%d(d, v%x);\n", i, cols, i); 62 | } 63 | printf("\n"); 64 | } 65 | if (cols >= 16) { 66 | for (int i = 0; i < rows; ++i) { 67 | printf("v%x = st.SortPairsDistance4(d, v%x);\n", i, i); 68 | } 69 | printf("\n"); 70 | } 71 | if (cols >= 8) { 72 | for (int i = 0; i < rows; ++i) { 73 | printf("v%x = st.SortPairsDistance2(d, v%x);\n", i, i); 74 | } 75 | printf("\n"); 76 | } 77 | for (int i = 0; i < rows; ++i) { 78 | printf("v%x = st.SortPairsDistance1(d, v%x);\n", i, i); 79 | } 80 | printf("\n"); 81 | } 82 | 83 | int main(int argc, char** argv) { 84 | PrintMergeNetwork(8, 2); 85 | PrintMergeNetwork(8, 4); 86 | PrintMergeNetwork(16, 4); 87 | PrintMergeNetwork(16, 8); 88 | PrintMergeNetwork(16, 16); 89 | return 0; 90 | } 91 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_128a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_128a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void Sort128Asc(uint128_t* HWY_RESTRICT keys, const size_t num) { 31 | // 128-bit keys require 128-bit SIMD. 32 | #if HWY_TARGET != HWY_SCALAR 33 | return VQSortStatic(keys, num, SortAscending()); 34 | #else 35 | (void)keys; 36 | (void)num; 37 | #endif 38 | } 39 | 40 | void PartialSort128Asc(uint128_t* HWY_RESTRICT keys, const size_t num, 41 | const size_t k) { 42 | // 128-bit keys require 128-bit SIMD. 43 | #if HWY_TARGET != HWY_SCALAR 44 | return VQPartialSortStatic(keys, num, k, SortAscending()); 45 | #else 46 | (void)keys; 47 | (void)num; 48 | (void)k; 49 | #endif 50 | } 51 | 52 | void Select128Asc(uint128_t* HWY_RESTRICT keys, const size_t num, 53 | const size_t k) { 54 | // 128-bit keys require 128-bit SIMD. 55 | #if HWY_TARGET != HWY_SCALAR 56 | return VQSelectStatic(keys, num, k, SortAscending()); 57 | #else 58 | (void)keys; 59 | (void)num; 60 | (void)k; 61 | #endif 62 | } 63 | 64 | } // namespace 65 | // NOLINTNEXTLINE(google-readability-namespace-comments) 66 | } // namespace HWY_NAMESPACE 67 | } // namespace hwy 68 | HWY_AFTER_NAMESPACE(); 69 | 70 | #if HWY_ONCE 71 | namespace hwy { 72 | namespace { 73 | HWY_EXPORT(Sort128Asc); 74 | HWY_EXPORT(PartialSort128Asc); 75 | HWY_EXPORT(Select128Asc); 76 | } // namespace 77 | 78 | void VQSort(uint128_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 79 | HWY_DYNAMIC_DISPATCH(Sort128Asc)(keys, n); 80 | } 81 | 82 | void VQPartialSort(uint128_t* HWY_RESTRICT keys, const size_t n, const size_t k, 83 | SortAscending) { 84 | HWY_DYNAMIC_DISPATCH(PartialSort128Asc)(keys, n, k); 85 | } 86 | 87 | void VQSelect(uint128_t* HWY_RESTRICT keys, const size_t n, const size_t k, 88 | SortAscending) { 89 | HWY_DYNAMIC_DISPATCH(Select128Asc)(keys, n, k); 90 | } 91 | 92 | } // namespace hwy 93 | #endif // HWY_ONCE 94 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_128d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_128d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void Sort128Desc(uint128_t* HWY_RESTRICT keys, const size_t num) { 31 | // 128-bit keys require 128-bit SIMD. 32 | #if HWY_TARGET != HWY_SCALAR 33 | return VQSortStatic(keys, num, SortDescending()); 34 | #else 35 | (void)keys; 36 | (void)num; 37 | #endif 38 | } 39 | 40 | void PartialSort128Desc(uint128_t* HWY_RESTRICT keys, const size_t num, 41 | const size_t k) { 42 | // 128-bit keys require 128-bit SIMD. 43 | #if HWY_TARGET != HWY_SCALAR 44 | return VQPartialSortStatic(keys, num, k, SortDescending()); 45 | #else 46 | (void)keys; 47 | (void)num; 48 | (void)k; 49 | #endif 50 | } 51 | 52 | void Select128Desc(uint128_t* HWY_RESTRICT keys, const size_t num, 53 | const size_t k) { 54 | // 128-bit keys require 128-bit SIMD. 55 | #if HWY_TARGET != HWY_SCALAR 56 | return VQSelectStatic(keys, num, k, SortDescending()); 57 | #else 58 | (void)keys; 59 | (void)num; 60 | (void)k; 61 | #endif 62 | } 63 | 64 | } // namespace 65 | // NOLINTNEXTLINE(google-readability-namespace-comments) 66 | } // namespace HWY_NAMESPACE 67 | } // namespace hwy 68 | HWY_AFTER_NAMESPACE(); 69 | 70 | #if HWY_ONCE 71 | namespace hwy { 72 | namespace { 73 | HWY_EXPORT(Sort128Desc); 74 | HWY_EXPORT(PartialSort128Desc); 75 | HWY_EXPORT(Select128Desc); 76 | } // namespace 77 | 78 | void VQSort(uint128_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 79 | HWY_DYNAMIC_DISPATCH(Sort128Desc)(keys, n); 80 | } 81 | 82 | void VQPartialSort(uint128_t* HWY_RESTRICT keys, const size_t n, const size_t k, 83 | SortDescending) { 84 | HWY_DYNAMIC_DISPATCH(PartialSort128Desc)(keys, n, k); 85 | } 86 | 87 | void VQSelect(uint128_t* HWY_RESTRICT keys, const size_t n, const size_t k, 88 | SortDescending) { 89 | HWY_DYNAMIC_DISPATCH(Select128Desc)(keys, n, k); 90 | } 91 | 92 | } // namespace hwy 93 | #endif // HWY_ONCE 94 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_f16a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f16a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortF16Asc(float16_t* HWY_RESTRICT keys, const size_t num) { 31 | #if HWY_HAVE_FLOAT16 32 | return VQSortStatic(keys, num, SortAscending()); 33 | #else 34 | (void)keys; 35 | (void)num; 36 | HWY_ASSERT(0); 37 | #endif 38 | } 39 | 40 | void PartialSortF16Asc(float16_t* HWY_RESTRICT keys, const size_t num, 41 | const size_t k) { 42 | #if HWY_HAVE_FLOAT16 43 | return VQPartialSortStatic(keys, num, k, SortAscending()); 44 | #else 45 | (void)keys; 46 | (void)num; 47 | (void)k; 48 | HWY_ASSERT(0); 49 | #endif 50 | } 51 | 52 | void SelectF16Asc(float16_t* HWY_RESTRICT keys, const size_t num, 53 | const size_t k) { 54 | #if HWY_HAVE_FLOAT16 55 | return VQSelectStatic(keys, num, k, SortAscending()); 56 | #else 57 | (void)keys; 58 | (void)num; 59 | (void)k; 60 | HWY_ASSERT(0); 61 | #endif 62 | } 63 | 64 | } // namespace 65 | // NOLINTNEXTLINE(google-readability-namespace-comments) 66 | } // namespace HWY_NAMESPACE 67 | } // namespace hwy 68 | HWY_AFTER_NAMESPACE(); 69 | 70 | #if HWY_ONCE 71 | namespace hwy { 72 | namespace { 73 | HWY_EXPORT(SortF16Asc); 74 | HWY_EXPORT(PartialSortF16Asc); 75 | HWY_EXPORT(SelectF16Asc); 76 | } // namespace 77 | 78 | void VQSort(float16_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 79 | HWY_DYNAMIC_DISPATCH(SortF16Asc)(keys, n); 80 | } 81 | 82 | void VQPartialSort(float16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 83 | SortAscending) { 84 | HWY_DYNAMIC_DISPATCH(PartialSortF16Asc)(keys, n, k); 85 | } 86 | 87 | void VQSelect(float16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 88 | SortAscending) { 89 | HWY_DYNAMIC_DISPATCH(SelectF16Asc)(keys, n, k); 90 | } 91 | 92 | } // namespace hwy 93 | #endif // HWY_ONCE 94 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_f16d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f16d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortF16Desc(float16_t* HWY_RESTRICT keys, const size_t num) { 31 | #if HWY_HAVE_FLOAT16 32 | return VQSortStatic(keys, num, SortDescending()); 33 | #else 34 | (void)keys; 35 | (void)num; 36 | HWY_ASSERT(0); 37 | #endif 38 | } 39 | 40 | void PartialSortF16Desc(float16_t* HWY_RESTRICT keys, const size_t num, 41 | const size_t k) { 42 | #if HWY_HAVE_FLOAT16 43 | return VQPartialSortStatic(keys, num, k, SortDescending()); 44 | #else 45 | (void)keys; 46 | (void)num; 47 | (void)k; 48 | HWY_ASSERT(0); 49 | #endif 50 | } 51 | 52 | void SelectF16Desc(float16_t* HWY_RESTRICT keys, const size_t num, 53 | const size_t k) { 54 | #if HWY_HAVE_FLOAT16 55 | return VQSelectStatic(keys, num, k, SortDescending()); 56 | #else 57 | (void)keys; 58 | (void)num; 59 | (void)k; 60 | HWY_ASSERT(0); 61 | #endif 62 | } 63 | 64 | } // namespace 65 | // NOLINTNEXTLINE(google-readability-namespace-comments) 66 | } // namespace HWY_NAMESPACE 67 | } // namespace hwy 68 | HWY_AFTER_NAMESPACE(); 69 | 70 | #if HWY_ONCE 71 | namespace hwy { 72 | namespace { 73 | HWY_EXPORT(SortF16Desc); 74 | HWY_EXPORT(PartialSortF16Desc); 75 | HWY_EXPORT(SelectF16Desc); 76 | } // namespace 77 | 78 | void VQSort(float16_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 79 | HWY_DYNAMIC_DISPATCH(SortF16Desc)(keys, n); 80 | } 81 | 82 | void VQPartialSort(float16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 83 | SortDescending) { 84 | HWY_DYNAMIC_DISPATCH(PartialSortF16Desc)(keys, n, k); 85 | } 86 | 87 | void VQSelect(float16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 88 | SortDescending) { 89 | HWY_DYNAMIC_DISPATCH(SelectF16Desc)(keys, n, k); 90 | } 91 | 92 | } // namespace hwy 93 | #endif // HWY_ONCE 94 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_f32a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f32a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortF32Asc(float* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortF32Asc(float* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectF32Asc(float* HWY_RESTRICT keys, const size_t num, const size_t k) { 40 | return VQSelectStatic(keys, num, k, SortAscending()); 41 | } 42 | 43 | } // namespace 44 | // NOLINTNEXTLINE(google-readability-namespace-comments) 45 | } // namespace HWY_NAMESPACE 46 | } // namespace hwy 47 | HWY_AFTER_NAMESPACE(); 48 | 49 | #if HWY_ONCE 50 | namespace hwy { 51 | namespace { 52 | HWY_EXPORT(SortF32Asc); 53 | HWY_EXPORT(PartialSortF32Asc); 54 | HWY_EXPORT(SelectF32Asc); 55 | } // namespace 56 | 57 | void VQSort(float* HWY_RESTRICT keys, const size_t n, SortAscending) { 58 | HWY_DYNAMIC_DISPATCH(SortF32Asc)(keys, n); 59 | } 60 | 61 | void VQPartialSort(float* HWY_RESTRICT keys, const size_t n, const size_t k, 62 | SortAscending) { 63 | HWY_DYNAMIC_DISPATCH(PartialSortF32Asc)(keys, n, k); 64 | } 65 | 66 | void VQSelect(float* HWY_RESTRICT keys, const size_t n, const size_t k, 67 | SortAscending) { 68 | HWY_DYNAMIC_DISPATCH(SelectF32Asc)(keys, n, k); 69 | } 70 | 71 | } // namespace hwy 72 | #endif // HWY_ONCE 73 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_f32d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f32d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortF32Desc(float* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortF32Desc(float* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectF32Desc(float* HWY_RESTRICT keys, const size_t num, const size_t k) { 40 | return VQSelectStatic(keys, num, k, SortDescending()); 41 | } 42 | 43 | } // namespace 44 | // NOLINTNEXTLINE(google-readability-namespace-comments) 45 | } // namespace HWY_NAMESPACE 46 | } // namespace hwy 47 | HWY_AFTER_NAMESPACE(); 48 | 49 | #if HWY_ONCE 50 | namespace hwy { 51 | namespace { 52 | HWY_EXPORT(SortF32Desc); 53 | HWY_EXPORT(PartialSortF32Desc); 54 | HWY_EXPORT(SelectF32Desc); 55 | } // namespace 56 | 57 | void VQSort(float* HWY_RESTRICT keys, const size_t n, SortDescending) { 58 | HWY_DYNAMIC_DISPATCH(SortF32Desc)(keys, n); 59 | } 60 | 61 | void VQPartialSort(float* HWY_RESTRICT keys, const size_t n, const size_t k, 62 | SortDescending) { 63 | HWY_DYNAMIC_DISPATCH(PartialSortF32Desc)(keys, n, k); 64 | } 65 | 66 | void VQSelect(float* HWY_RESTRICT keys, const size_t n, const size_t k, 67 | SortDescending) { 68 | HWY_DYNAMIC_DISPATCH(SelectF32Desc)(keys, n, k); 69 | } 70 | 71 | } // namespace hwy 72 | #endif // HWY_ONCE 73 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_f64a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f64a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortF64Asc(double* HWY_RESTRICT keys, const size_t num) { 31 | #if HWY_HAVE_FLOAT64 32 | return VQSortStatic(keys, num, SortAscending()); 33 | #else 34 | (void)keys; 35 | (void)num; 36 | HWY_ASSERT(0); 37 | #endif 38 | } 39 | 40 | void PartialSortF64Asc(double* HWY_RESTRICT keys, const size_t num, 41 | const size_t k) { 42 | #if HWY_HAVE_FLOAT64 43 | return VQPartialSortStatic(keys, num, k, SortAscending()); 44 | #else 45 | (void)keys; 46 | (void)num; 47 | (void)k; 48 | HWY_ASSERT(0); 49 | #endif 50 | } 51 | 52 | void SelectF64Asc(double* HWY_RESTRICT keys, const size_t num, const size_t k) { 53 | #if HWY_HAVE_FLOAT64 54 | return VQSelectStatic(keys, num, k, SortAscending()); 55 | #else 56 | (void)keys; 57 | (void)num; 58 | (void)k; 59 | HWY_ASSERT(0); 60 | #endif 61 | } 62 | 63 | } // namespace 64 | // NOLINTNEXTLINE(google-readability-namespace-comments) 65 | } // namespace HWY_NAMESPACE 66 | } // namespace hwy 67 | HWY_AFTER_NAMESPACE(); 68 | 69 | #if HWY_ONCE 70 | namespace hwy { 71 | namespace { 72 | HWY_EXPORT(SortF64Asc); 73 | HWY_EXPORT(PartialSortF64Asc); 74 | HWY_EXPORT(SelectF64Asc); 75 | } // namespace 76 | 77 | void VQSort(double* HWY_RESTRICT keys, const size_t n, SortAscending) { 78 | HWY_DYNAMIC_DISPATCH(SortF64Asc)(keys, n); 79 | } 80 | 81 | void VQPartialSort(double* HWY_RESTRICT keys, const size_t n, const size_t k, 82 | SortAscending) { 83 | HWY_DYNAMIC_DISPATCH(PartialSortF64Asc)(keys, n, k); 84 | } 85 | 86 | void VQSelect(double* HWY_RESTRICT keys, const size_t n, const size_t k, 87 | SortAscending) { 88 | HWY_DYNAMIC_DISPATCH(SelectF64Asc)(keys, n, k); 89 | } 90 | 91 | } // namespace hwy 92 | #endif // HWY_ONCE 93 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_f64d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_f64d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortF64Desc(double* HWY_RESTRICT keys, const size_t num) { 31 | #if HWY_HAVE_FLOAT64 32 | return VQSortStatic(keys, num, SortDescending()); 33 | #else 34 | (void)keys; 35 | (void)num; 36 | HWY_ASSERT(0); 37 | #endif 38 | } 39 | 40 | void PartialSortF64Desc(double* HWY_RESTRICT keys, const size_t num, 41 | const size_t k) { 42 | #if HWY_HAVE_FLOAT64 43 | return VQPartialSortStatic(keys, num, k, SortDescending()); 44 | #else 45 | (void)keys; 46 | (void)num; 47 | (void)k; 48 | HWY_ASSERT(0); 49 | #endif 50 | } 51 | 52 | void SelectF64Desc(double* HWY_RESTRICT keys, const size_t num, 53 | const size_t k) { 54 | #if HWY_HAVE_FLOAT64 55 | return VQSelectStatic(keys, num, k, SortDescending()); 56 | #else 57 | (void)keys; 58 | (void)num; 59 | (void)k; 60 | HWY_ASSERT(0); 61 | #endif 62 | } 63 | 64 | } // namespace 65 | // NOLINTNEXTLINE(google-readability-namespace-comments) 66 | } // namespace HWY_NAMESPACE 67 | } // namespace hwy 68 | HWY_AFTER_NAMESPACE(); 69 | 70 | #if HWY_ONCE 71 | namespace hwy { 72 | namespace { 73 | HWY_EXPORT(SortF64Desc); 74 | HWY_EXPORT(PartialSortF64Desc); 75 | HWY_EXPORT(SelectF64Desc); 76 | } // namespace 77 | 78 | void VQSort(double* HWY_RESTRICT keys, const size_t n, SortDescending) { 79 | HWY_DYNAMIC_DISPATCH(SortF64Desc)(keys, n); 80 | } 81 | 82 | void VQPartialSort(double* HWY_RESTRICT keys, const size_t n, const size_t k, 83 | SortDescending) { 84 | HWY_DYNAMIC_DISPATCH(PartialSortF64Desc)(keys, n, k); 85 | } 86 | 87 | void VQSelect(double* HWY_RESTRICT keys, const size_t n, const size_t k, 88 | SortDescending) { 89 | HWY_DYNAMIC_DISPATCH(SelectF64Desc)(keys, n, k); 90 | } 91 | 92 | } // namespace hwy 93 | #endif // HWY_ONCE 94 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_i16a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i16a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortI16Asc(int16_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortI16Asc(int16_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectI16Asc(int16_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortAscending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortI16Asc); 54 | HWY_EXPORT(PartialSortI16Asc); 55 | HWY_EXPORT(SelectI16Asc); 56 | } // namespace 57 | 58 | void VQSort(int16_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 59 | HWY_DYNAMIC_DISPATCH(SortI16Asc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(int16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortAscending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortI16Asc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(int16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortAscending) { 69 | HWY_DYNAMIC_DISPATCH(SelectI16Asc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_i16d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i16d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortI16Desc(int16_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortI16Desc(int16_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectI16Desc(int16_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortDescending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortI16Desc); 54 | HWY_EXPORT(PartialSortI16Desc); 55 | HWY_EXPORT(SelectI16Desc); 56 | } // namespace 57 | 58 | void VQSort(int16_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 59 | HWY_DYNAMIC_DISPATCH(SortI16Desc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(int16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortDescending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortI16Desc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(int16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortDescending) { 69 | HWY_DYNAMIC_DISPATCH(SelectI16Desc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_i32a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i32a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortI32Asc(int32_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortI32Asc(int32_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectI32Asc(int32_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortAscending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortI32Asc); 54 | HWY_EXPORT(PartialSortI32Asc); 55 | HWY_EXPORT(SelectI32Asc); 56 | } // namespace 57 | 58 | void VQSort(int32_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 59 | HWY_DYNAMIC_DISPATCH(SortI32Asc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(int32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortAscending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortI32Asc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(int32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortAscending) { 69 | HWY_DYNAMIC_DISPATCH(SelectI32Asc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_i32d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i32d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortI32Desc(int32_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortI32Desc(int32_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectI32Desc(int32_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortDescending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortI32Desc); 54 | HWY_EXPORT(PartialSortI32Desc); 55 | HWY_EXPORT(SelectI32Desc); 56 | } // namespace 57 | 58 | void VQSort(int32_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 59 | HWY_DYNAMIC_DISPATCH(SortI32Desc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(int32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortDescending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortI32Desc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(int32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortDescending) { 69 | HWY_DYNAMIC_DISPATCH(SelectI32Desc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_i64a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i64a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortI64Asc(int64_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortI64Asc(int64_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectI64Asc(int64_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortAscending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortI64Asc); 54 | HWY_EXPORT(PartialSortI64Asc); 55 | HWY_EXPORT(SelectI64Asc); 56 | } // namespace 57 | 58 | void VQSort(int64_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 59 | HWY_DYNAMIC_DISPATCH(SortI64Asc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(int64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortAscending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortI64Asc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(int64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortAscending) { 69 | HWY_DYNAMIC_DISPATCH(SelectI64Asc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_i64d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_i64d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortI64Desc(int64_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortI64Desc(int64_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectI64Desc(int64_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortDescending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortI64Desc); 54 | HWY_EXPORT(PartialSortI64Desc); 55 | HWY_EXPORT(SelectI64Desc); 56 | } // namespace 57 | 58 | void VQSort(int64_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 59 | HWY_DYNAMIC_DISPATCH(SortI64Desc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(int64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortDescending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortI64Desc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(int64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortDescending) { 69 | HWY_DYNAMIC_DISPATCH(SelectI64Desc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_kv128a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | // clang-format off 20 | // (avoid line break, which would prevent Copybara rules from matching) 21 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv128a.cc" //NOLINT 22 | // clang-format on 23 | #include "hwy/foreach_target.h" // IWYU pragma: keep 24 | 25 | // After foreach_target 26 | #include "hwy/contrib/sort/vqsort-inl.h" 27 | 28 | HWY_BEFORE_NAMESPACE(); 29 | namespace hwy { 30 | namespace HWY_NAMESPACE { 31 | namespace { 32 | 33 | void SortKV128Asc(K64V64* HWY_RESTRICT keys, const size_t num) { 34 | // 128-bit keys require 128-bit SIMD. 35 | #if HWY_TARGET != HWY_SCALAR 36 | return VQSortStatic(keys, num, SortAscending()); 37 | #else 38 | (void)keys; 39 | (void)num; 40 | #endif 41 | } 42 | 43 | void PartialSortKV128Asc(K64V64* HWY_RESTRICT keys, const size_t num, 44 | const size_t k) { 45 | // 128-bit keys require 128-bit SIMD. 46 | #if HWY_TARGET != HWY_SCALAR 47 | return VQPartialSortStatic(keys, num, k, SortAscending()); 48 | #else 49 | (void)keys; 50 | (void)num; 51 | (void)k; 52 | #endif 53 | } 54 | 55 | void SelectKV128Asc(K64V64* HWY_RESTRICT keys, const size_t num, 56 | const size_t k) { 57 | // 128-bit keys require 128-bit SIMD. 58 | #if HWY_TARGET != HWY_SCALAR 59 | return VQSelectStatic(keys, num, k, SortAscending()); 60 | #else 61 | (void)keys; 62 | (void)num; 63 | (void)k; 64 | #endif 65 | } 66 | 67 | } // namespace 68 | // NOLINTNEXTLINE(google-readability-namespace-comments) 69 | } // namespace HWY_NAMESPACE 70 | } // namespace hwy 71 | HWY_AFTER_NAMESPACE(); 72 | 73 | #if HWY_ONCE 74 | namespace hwy { 75 | namespace { 76 | HWY_EXPORT(SortKV128Asc); 77 | HWY_EXPORT(PartialSortKV128Asc); 78 | HWY_EXPORT(SelectKV128Asc); 79 | } // namespace 80 | 81 | void VQSort(K64V64* HWY_RESTRICT keys, const size_t n, SortAscending) { 82 | HWY_DYNAMIC_DISPATCH(SortKV128Asc)(keys, n); 83 | } 84 | 85 | void VQPartialSort(K64V64* HWY_RESTRICT keys, const size_t n, const size_t k, 86 | SortAscending) { 87 | HWY_DYNAMIC_DISPATCH(PartialSortKV128Asc)(keys, n, k); 88 | } 89 | 90 | void VQSelect(K64V64* HWY_RESTRICT keys, const size_t n, const size_t k, 91 | SortAscending) { 92 | HWY_DYNAMIC_DISPATCH(SelectKV128Asc)(keys, n, k); 93 | } 94 | 95 | } // namespace hwy 96 | #endif // HWY_ONCE 97 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_kv128d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | // clang-format off 20 | // (avoid line break, which would prevent Copybara rules from matching) 21 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv128d.cc" //NOLINT 22 | // clang-format on 23 | #include "hwy/foreach_target.h" // IWYU pragma: keep 24 | 25 | // After foreach_target 26 | #include "hwy/contrib/sort/vqsort-inl.h" 27 | 28 | HWY_BEFORE_NAMESPACE(); 29 | namespace hwy { 30 | namespace HWY_NAMESPACE { 31 | namespace { 32 | 33 | void SortKV128Desc(K64V64* HWY_RESTRICT keys, const size_t num) { 34 | // 128-bit keys require 128-bit SIMD. 35 | #if HWY_TARGET != HWY_SCALAR 36 | return VQSortStatic(keys, num, SortDescending()); 37 | #else 38 | (void)keys; 39 | (void)num; 40 | #endif 41 | } 42 | 43 | void PartialSortKV128Desc(K64V64* HWY_RESTRICT keys, const size_t num, 44 | const size_t k) { 45 | // 128-bit keys require 128-bit SIMD. 46 | #if HWY_TARGET != HWY_SCALAR 47 | return VQPartialSortStatic(keys, num, k, SortDescending()); 48 | #else 49 | (void)keys; 50 | (void)num; 51 | (void)k; 52 | #endif 53 | } 54 | 55 | void SelectKV128Desc(K64V64* HWY_RESTRICT keys, const size_t num, 56 | const size_t k) { 57 | // 128-bit keys require 128-bit SIMD. 58 | #if HWY_TARGET != HWY_SCALAR 59 | return VQSelectStatic(keys, num, k, SortDescending()); 60 | #else 61 | (void)keys; 62 | (void)num; 63 | (void)k; 64 | #endif 65 | } 66 | 67 | } // namespace 68 | // NOLINTNEXTLINE(google-readability-namespace-comments) 69 | } // namespace HWY_NAMESPACE 70 | } // namespace hwy 71 | HWY_AFTER_NAMESPACE(); 72 | 73 | #if HWY_ONCE 74 | namespace hwy { 75 | namespace { 76 | HWY_EXPORT(SortKV128Desc); 77 | HWY_EXPORT(PartialSortKV128Desc); 78 | HWY_EXPORT(SelectKV128Desc); 79 | } // namespace 80 | 81 | void VQSort(K64V64* HWY_RESTRICT keys, const size_t n, SortDescending) { 82 | HWY_DYNAMIC_DISPATCH(SortKV128Desc)(keys, n); 83 | } 84 | 85 | void VQPartialSort(K64V64* HWY_RESTRICT keys, const size_t n, const size_t k, 86 | SortDescending) { 87 | HWY_DYNAMIC_DISPATCH(PartialSortKV128Desc)(keys, n, k); 88 | } 89 | 90 | void VQSelect(K64V64* HWY_RESTRICT keys, const size_t n, const size_t k, 91 | SortDescending) { 92 | HWY_DYNAMIC_DISPATCH(SelectKV128Desc)(keys, n, k); 93 | } 94 | 95 | } // namespace hwy 96 | #endif // HWY_ONCE 97 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_kv64a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | // clang-format off 20 | // (avoid line break, which would prevent Copybara rules from matching) 21 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv64a.cc" //NOLINT 22 | // clang-format on 23 | #include "hwy/foreach_target.h" // IWYU pragma: keep 24 | 25 | // After foreach_target 26 | #include "hwy/contrib/sort/vqsort-inl.h" 27 | 28 | HWY_BEFORE_NAMESPACE(); 29 | namespace hwy { 30 | namespace HWY_NAMESPACE { 31 | namespace { 32 | 33 | void SortKV64Asc(K32V32* HWY_RESTRICT keys, const size_t num) { 34 | return VQSortStatic(keys, num, SortAscending()); 35 | } 36 | 37 | void PartialSortKV64Asc(K32V32* HWY_RESTRICT keys, const size_t num, 38 | const size_t k) { 39 | return VQPartialSortStatic(keys, num, k, SortAscending()); 40 | } 41 | 42 | void SelectKV64Asc(K32V32* HWY_RESTRICT keys, const size_t num, 43 | const size_t k) { 44 | return VQSelectStatic(keys, num, k, SortAscending()); 45 | } 46 | 47 | } // namespace 48 | // NOLINTNEXTLINE(google-readability-namespace-comments) 49 | } // namespace HWY_NAMESPACE 50 | } // namespace hwy 51 | HWY_AFTER_NAMESPACE(); 52 | 53 | #if HWY_ONCE 54 | namespace hwy { 55 | namespace { 56 | HWY_EXPORT(SortKV64Asc); 57 | HWY_EXPORT(PartialSortKV64Asc); 58 | HWY_EXPORT(SelectKV64Asc); 59 | } // namespace 60 | 61 | void VQSort(K32V32* HWY_RESTRICT keys, const size_t n, SortAscending) { 62 | HWY_DYNAMIC_DISPATCH(SortKV64Asc)(keys, n); 63 | } 64 | 65 | void VQPartialSort(K32V32* HWY_RESTRICT keys, const size_t n, const size_t k, 66 | SortAscending) { 67 | HWY_DYNAMIC_DISPATCH(PartialSortKV64Asc)(keys, n, k); 68 | } 69 | 70 | void VQSelect(K32V32* HWY_RESTRICT keys, const size_t n, const size_t k, 71 | SortAscending) { 72 | HWY_DYNAMIC_DISPATCH(SelectKV64Asc)(keys, n, k); 73 | } 74 | 75 | } // namespace hwy 76 | #endif // HWY_ONCE 77 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_kv64d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | // clang-format off 20 | // (avoid line break, which would prevent Copybara rules from matching) 21 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_kv64d.cc" //NOLINT 22 | // clang-format on 23 | #include "hwy/foreach_target.h" // IWYU pragma: keep 24 | 25 | // After foreach_target 26 | #include "hwy/contrib/sort/vqsort-inl.h" 27 | 28 | HWY_BEFORE_NAMESPACE(); 29 | namespace hwy { 30 | namespace HWY_NAMESPACE { 31 | namespace { 32 | 33 | void SortKV64Desc(K32V32* HWY_RESTRICT keys, const size_t num) { 34 | return VQSortStatic(keys, num, SortDescending()); 35 | } 36 | 37 | void PartialSortKV64Desc(K32V32* HWY_RESTRICT keys, const size_t num, 38 | const size_t k) { 39 | return VQPartialSortStatic(keys, num, k, SortDescending()); 40 | } 41 | 42 | void SelectKV64Desc(K32V32* HWY_RESTRICT keys, const size_t num, 43 | const size_t k) { 44 | return VQSelectStatic(keys, num, k, SortDescending()); 45 | } 46 | 47 | } // namespace 48 | // NOLINTNEXTLINE(google-readability-namespace-comments) 49 | } // namespace HWY_NAMESPACE 50 | } // namespace hwy 51 | HWY_AFTER_NAMESPACE(); 52 | 53 | #if HWY_ONCE 54 | namespace hwy { 55 | namespace { 56 | HWY_EXPORT(SortKV64Desc); 57 | HWY_EXPORT(PartialSortKV64Desc); 58 | HWY_EXPORT(SelectKV64Desc); 59 | } // namespace 60 | 61 | void VQSort(K32V32* HWY_RESTRICT keys, const size_t n, SortDescending) { 62 | HWY_DYNAMIC_DISPATCH(SortKV64Desc)(keys, n); 63 | } 64 | 65 | void VQPartialSort(K32V32* HWY_RESTRICT keys, const size_t n, const size_t k, 66 | SortDescending) { 67 | HWY_DYNAMIC_DISPATCH(PartialSortKV64Desc)(keys, n, k); 68 | } 69 | 70 | void VQSelect(K32V32* HWY_RESTRICT keys, const size_t n, const size_t k, 71 | SortDescending) { 72 | HWY_DYNAMIC_DISPATCH(SelectKV64Desc)(keys, n, k); 73 | } 74 | 75 | } // namespace hwy 76 | #endif // HWY_ONCE 77 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_u16a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u16a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortU16Asc(uint16_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortU16Asc(uint16_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectU16Asc(uint16_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortAscending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortU16Asc); 54 | HWY_EXPORT(PartialSortU16Asc); 55 | HWY_EXPORT(SelectU16Asc); 56 | } // namespace 57 | 58 | void VQSort(uint16_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 59 | HWY_DYNAMIC_DISPATCH(SortU16Asc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(uint16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortAscending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortU16Asc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(uint16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortAscending) { 69 | HWY_DYNAMIC_DISPATCH(SelectU16Asc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_u16d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u16d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortU16Desc(uint16_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortU16Desc(uint16_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectU16Desc(uint16_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortDescending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortU16Desc); 54 | HWY_EXPORT(PartialSortU16Desc); 55 | HWY_EXPORT(SelectU16Desc); 56 | } // namespace 57 | 58 | void VQSort(uint16_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 59 | HWY_DYNAMIC_DISPATCH(SortU16Desc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(uint16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortDescending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortU16Desc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(uint16_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortDescending) { 69 | HWY_DYNAMIC_DISPATCH(SelectU16Desc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_u32a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u32a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortU32Asc(uint32_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortU32Asc(uint32_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectU32Asc(uint32_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortAscending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortU32Asc); 54 | HWY_EXPORT(PartialSortU32Asc); 55 | HWY_EXPORT(SelectU32Asc); 56 | } // namespace 57 | 58 | void VQSort(uint32_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 59 | HWY_DYNAMIC_DISPATCH(SortU32Asc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(uint32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortAscending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortU32Asc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(uint32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortAscending) { 69 | HWY_DYNAMIC_DISPATCH(SelectU32Asc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_u32d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u32d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortU32Desc(uint32_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortU32Desc(uint32_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectU32Desc(uint32_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortDescending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortU32Desc); 54 | HWY_EXPORT(PartialSortU32Desc); 55 | HWY_EXPORT(SelectU32Desc); 56 | } // namespace 57 | 58 | void VQSort(uint32_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 59 | HWY_DYNAMIC_DISPATCH(SortU32Desc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(uint32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortDescending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortU32Desc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(uint32_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortDescending) { 69 | HWY_DYNAMIC_DISPATCH(SelectU32Desc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_u64a.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u64a.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortU64Asc(uint64_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortAscending()); 32 | } 33 | 34 | void PartialSortU64Asc(uint64_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortAscending()); 37 | } 38 | 39 | void SelectU64Asc(uint64_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortAscending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortU64Asc); 54 | HWY_EXPORT(PartialSortU64Asc); 55 | HWY_EXPORT(SelectU64Asc); 56 | } // namespace 57 | 58 | void VQSort(uint64_t* HWY_RESTRICT keys, const size_t n, SortAscending) { 59 | HWY_DYNAMIC_DISPATCH(SortU64Asc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(uint64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortAscending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortU64Asc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(uint64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortAscending) { 69 | HWY_DYNAMIC_DISPATCH(SelectU64Asc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/sort/vqsort_u64d.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/sort/vqsort.h" // VQSort 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "hwy/contrib/sort/vqsort_u64d.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | 22 | // After foreach_target 23 | #include "hwy/contrib/sort/vqsort-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | void SortU64Desc(uint64_t* HWY_RESTRICT keys, const size_t num) { 31 | return VQSortStatic(keys, num, SortDescending()); 32 | } 33 | 34 | void PartialSortU64Desc(uint64_t* HWY_RESTRICT keys, const size_t num, 35 | const size_t k) { 36 | return VQPartialSortStatic(keys, num, k, SortDescending()); 37 | } 38 | 39 | void SelectU64Desc(uint64_t* HWY_RESTRICT keys, const size_t num, 40 | const size_t k) { 41 | return VQSelectStatic(keys, num, k, SortDescending()); 42 | } 43 | 44 | } // namespace 45 | // NOLINTNEXTLINE(google-readability-namespace-comments) 46 | } // namespace HWY_NAMESPACE 47 | } // namespace hwy 48 | HWY_AFTER_NAMESPACE(); 49 | 50 | #if HWY_ONCE 51 | namespace hwy { 52 | namespace { 53 | HWY_EXPORT(SortU64Desc); 54 | HWY_EXPORT(PartialSortU64Desc); 55 | HWY_EXPORT(SelectU64Desc); 56 | } // namespace 57 | 58 | void VQSort(uint64_t* HWY_RESTRICT keys, const size_t n, SortDescending) { 59 | HWY_DYNAMIC_DISPATCH(SortU64Desc)(keys, n); 60 | } 61 | 62 | void VQPartialSort(uint64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 63 | SortDescending) { 64 | HWY_DYNAMIC_DISPATCH(PartialSortU64Desc)(keys, n, k); 65 | } 66 | 67 | void VQSelect(uint64_t* HWY_RESTRICT keys, const size_t n, const size_t k, 68 | SortDescending) { 69 | HWY_DYNAMIC_DISPATCH(SelectU64Desc)(keys, n, k); 70 | } 71 | 72 | } // namespace hwy 73 | #endif // HWY_ONCE 74 | -------------------------------------------------------------------------------- /hwy/contrib/thread_pool/spin_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/thread_pool/spin.h" 17 | 18 | #include 19 | #include 20 | #include 21 | 22 | #include 23 | 24 | #include "hwy/aligned_allocator.h" // HWY_ALIGNMENT 25 | #include "hwy/contrib/thread_pool/futex.h" // NanoSleep 26 | #include "hwy/contrib/thread_pool/thread_pool.h" 27 | #include "hwy/contrib/thread_pool/topology.h" 28 | #include "hwy/tests/hwy_gtest.h" 29 | #include "hwy/tests/test_util-inl.h" 30 | #include "hwy/timer.h" 31 | 32 | namespace hwy { 33 | namespace { 34 | 35 | struct TestPingPongT { 36 | template 37 | void operator()(const Spin& spin) const { 38 | constexpr size_t kU32PerLine = HWY_ALIGNMENT / 4; 39 | constexpr size_t kF64PerLine = HWY_ALIGNMENT / 8; 40 | alignas(HWY_ALIGNMENT) std::atomic thread_active[kU32PerLine]; 41 | alignas(HWY_ALIGNMENT) std::atomic thread_done[kU32PerLine]; 42 | 43 | thread_active[0].store(0, std::memory_order_release); 44 | thread_done[0].store(0, std::memory_order_release); 45 | hwy::ThreadPool pool(1); 46 | HWY_ASSERT(pool.NumWorkers() == 2); 47 | 48 | const double t0 = hwy::platform::Now(); 49 | std::atomic_flag error = ATOMIC_FLAG_INIT; 50 | 51 | alignas(HWY_ALIGNMENT) std::atomic reps1; 52 | alignas(HWY_ALIGNMENT) std::atomic reps2; 53 | 54 | alignas(HWY_ALIGNMENT) std::atomic before_thread_done[kF64PerLine]; 55 | alignas(HWY_ALIGNMENT) std::atomic before_thread_go[kF64PerLine]; 56 | alignas(HWY_ALIGNMENT) std::atomic ack_thread_done[kF64PerLine]; 57 | alignas(HWY_ALIGNMENT) std::atomic ack_thread_release[kF64PerLine]; 58 | 59 | const auto kAcq = std::memory_order_acquire; 60 | const auto kRel = std::memory_order_release; 61 | pool.Run(0, 2, [&](uint64_t task, size_t thread) { 62 | HWY_ASSERT(task == thread); 63 | if (task == 0) { // new thread 64 | SpinResult result = spin.UntilDifferent(0, thread_active[0]); 65 | ack_thread_release[0].store(hwy::platform::Now(), kRel); 66 | reps1.store(result.reps); 67 | if (!NanoSleep(20 * 1000 * 1000)) { 68 | error.test_and_set(); 69 | } 70 | before_thread_done[0].store(hwy::platform::Now(), kRel); 71 | thread_done[0].store(1, kRel); 72 | } else { // main thread 73 | if (!NanoSleep(30 * 1000 * 1000)) { 74 | error.test_and_set(); 75 | } 76 | // Release the thread. 77 | before_thread_go[0].store(hwy::platform::Now(), kRel); 78 | thread_active[0].store(1, kRel); 79 | // Wait for it to finish. 80 | const size_t reps = spin.UntilEqual(1, thread_done[0]); 81 | ack_thread_done[0].store(hwy::platform::Now(), kRel); 82 | reps2.store(reps); 83 | } 84 | }); 85 | 86 | const double t1 = hwy::platform::Now(); 87 | const double elapsed = t1 - t0; 88 | const double latency1 = 89 | ack_thread_release[0].load(kAcq) - before_thread_go[0].load(kAcq); 90 | const double latency2 = 91 | ack_thread_done[0].load(kAcq) - before_thread_done[0].load(kAcq); 92 | fprintf(stderr, 93 | "Elapsed time: %f us; reps1=%zu, reps2=%zu, latency=%f %f us\n", 94 | elapsed * 1E6, reps1.load(), reps2.load(), latency1 * 1E6, 95 | latency2 * 1E6); 96 | // Unless NanoSleep failed to sleep, this should take 50ms+epsilon. 97 | HWY_ASSERT(error.test_and_set() || elapsed > 25E-3); 98 | } 99 | }; // namespace hwy 100 | 101 | // Simple mutex. 102 | TEST(SpinTest, TestPingPong) { 103 | if (!HaveThreadingSupport()) { 104 | HWY_WARN("Threads not supported, skipping test\n"); 105 | return; 106 | } 107 | 108 | const SpinType spin_type = DetectSpin(); 109 | fprintf(stderr, "Spin method : %s\n", ToString(spin_type)); 110 | CallWithSpin(spin_type, TestPingPongT()); 111 | } 112 | 113 | } // namespace 114 | } // namespace hwy 115 | 116 | HWY_TEST_MAIN(); 117 | -------------------------------------------------------------------------------- /hwy/contrib/thread_pool/topology_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/contrib/thread_pool/topology.h" 17 | 18 | #include 19 | #include 20 | 21 | #include 22 | 23 | #include "hwy/base.h" 24 | #include "hwy/tests/hwy_gtest.h" 25 | #include "hwy/tests/test_util-inl.h" 26 | #include "hwy/timer.h" 27 | 28 | namespace hwy { 29 | namespace { 30 | 31 | TEST(TopologyTest, TestNum) { 32 | const size_t total = TotalLogicalProcessors(); 33 | fprintf(stderr, "TotalLogical %zu\n", total); 34 | 35 | LogicalProcessorSet lps; 36 | if (GetThreadAffinity(lps)) { 37 | fprintf(stderr, "Active %zu\n", lps.Count()); 38 | HWY_ASSERT(lps.Count() <= total); 39 | } 40 | } 41 | 42 | TEST(TopologyTest, TestTopology) { 43 | char cpu100[100]; 44 | if (hwy::platform::GetCpuString(cpu100)) { 45 | fprintf(stderr, "%s\n", cpu100); 46 | } 47 | 48 | Topology topology; 49 | if (topology.packages.empty()) return; 50 | 51 | fprintf(stderr, "Topology: %zuP %zuX %zuC\n", topology.packages.size(), 52 | topology.packages[0].clusters.size(), 53 | topology.packages[0].clusters[0].lps.Count()); 54 | 55 | HWY_ASSERT(!topology.lps.empty()); 56 | LogicalProcessorSet nodes; 57 | for (size_t lp = 0; lp < topology.lps.size(); ++lp) { 58 | const size_t node = static_cast(topology.lps[lp].node); 59 | if (!nodes.Get(node)) { 60 | fprintf(stderr, "Found NUMA node %zu, LP %zu\n", node, lp); 61 | nodes.Set(node); 62 | } 63 | } 64 | 65 | size_t lps_by_cluster = 0; 66 | size_t lps_by_core = 0; 67 | LogicalProcessorSet all_lps; 68 | for (const Topology::Package& pkg : topology.packages) { 69 | HWY_ASSERT(!pkg.clusters.empty()); 70 | HWY_ASSERT(!pkg.cores.empty()); 71 | HWY_ASSERT(pkg.clusters.size() <= pkg.cores.size()); 72 | 73 | for (const Topology::Cluster& c : pkg.clusters) { 74 | lps_by_cluster += c.lps.Count(); 75 | c.lps.Foreach([&all_lps](size_t lp) { all_lps.Set(lp); }); 76 | } 77 | for (const Topology::Core& c : pkg.cores) { 78 | lps_by_core += c.lps.Count(); 79 | c.lps.Foreach([&all_lps](size_t lp) { all_lps.Set(lp); }); 80 | } 81 | } 82 | // Ensure the per-cluster and per-core sets sum to the total. 83 | HWY_ASSERT(lps_by_cluster == topology.lps.size()); 84 | HWY_ASSERT(lps_by_core == topology.lps.size()); 85 | // .. and are a partition of unity (all LPs are covered) 86 | HWY_ASSERT(all_lps.Count() == topology.lps.size()); 87 | } 88 | 89 | void PrintCache(const Cache& c, size_t level) { 90 | fprintf(stderr, 91 | "L%zu: size %u KiB, line size %u, assoc %u, sets %u, cores %u\n", 92 | level, c.size_kib, c.bytes_per_line, c.associativity, c.sets, 93 | c.cores_sharing); 94 | } 95 | 96 | static void CheckCache(const Cache& c, size_t level) { 97 | // L1-L2 must exist, L3 is not guaranteed. 98 | if (level == 3 && c.size_kib == 0) { 99 | HWY_ASSERT(c.associativity == 0 && c.bytes_per_line == 0 && c.sets == 0); 100 | return; 101 | } 102 | 103 | // size and thus sets are not necessarily powers of two. 104 | HWY_ASSERT(c.size_kib != 0); 105 | HWY_ASSERT(c.sets != 0); 106 | 107 | // Intel Skylake has non-pow2 L3 associativity, and Apple L2 also, so we can 108 | // only check loose bounds. 109 | HWY_ASSERT(c.associativity >= 2); 110 | HWY_ASSERT(c.associativity <= Cache::kMaxAssociativity); 111 | 112 | // line sizes are always powers of two because CPUs partition addresses into 113 | // line offsets (the lower bits), set, and tag. 114 | const auto is_pow2 = [](uint32_t x) { return x != 0 && (x & (x - 1)) == 0; }; 115 | HWY_ASSERT(is_pow2(c.bytes_per_line)); 116 | HWY_ASSERT(32 <= c.bytes_per_line && c.bytes_per_line <= 1024); 117 | 118 | HWY_ASSERT(c.cores_sharing != 0); 119 | // +1 observed on RISC-V. 120 | HWY_ASSERT(c.cores_sharing <= TotalLogicalProcessors() + 1); 121 | } 122 | 123 | TEST(TopologyTest, TestCaches) { 124 | const Cache* caches = DataCaches(); 125 | if (!caches) return; 126 | for (size_t level = 1; level <= 3; ++level) { 127 | PrintCache(caches[level], level); 128 | CheckCache(caches[level], level); 129 | } 130 | } 131 | 132 | } // namespace 133 | } // namespace hwy 134 | 135 | HWY_TEST_MAIN(); 136 | -------------------------------------------------------------------------------- /hwy/contrib/unroller/README.md: -------------------------------------------------------------------------------- 1 | # Unroller 2 | 3 | All contents of the `unroller` folder are experimental and subject to changes. 4 | 5 | `Unroller` is a templated function that automatically implements common optimizations that are usually handled by compilers when writing scalar code. Modern CPUs operate much more efficiently when non-dependent calculations are packed into an instruction pipeline. For scalar code, this often means a compiler will take a one-line loop, and compile it down to hundreds of lines of machine code in order to fully capture these efficiencies. 6 | 7 | As of today (2023-07-06), compilers are not nearly as good at implementing these optimizations for code written in SIMD intrinsics. `Unroller` is a templated function that takes in an `UnrollerUnit` of SIMD instructions, and then implements unrolling, reordering, hoisting and tail-handling (URHT optimizations) of arrays of data being processed with SIMD intrinsics. 8 | 9 | ### `UnrollerUnit` 10 | 11 | `UnrollerUnit` and `UnrollerUnit2D` are a base classes of functions that `Unroller` needs implemented in order to properly handle URHT. `UnrollerUnit` has default implementations for all but the `Func` method, which defines the SIMD operation to be applied. Many examples of how to implement these functions are in the tests. 12 | 13 | ### Doubling values of an array example 14 | 15 | ``` 16 | struct DoubleUnit : UnrollerUnit { 17 | using TT = ScalableTag; 18 | inline Vec Func(ptrdiff_t idx, Vec x, Vec y) { 19 | TT d; 20 | return Mul(x, Set(d, 2)); 21 | } 22 | }; 23 | ``` 24 | 25 | Leaving all other methods in their default state, the following code will double all the values in array `a` and place them in `r` 26 | 27 | ``` 28 | DoubleUnit dblunit; 29 | int r[N]; 30 | Unroller(dblunit, a, r, N); 31 | ``` -------------------------------------------------------------------------------- /hwy/examples/profiler_example.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2017 Google Inc. All Rights Reserved. 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | #include 16 | 17 | #include "hwy/base.h" // Abort 18 | #include "hwy/profiler.h" 19 | #include "hwy/timer.h" 20 | 21 | namespace hwy { 22 | namespace { 23 | 24 | void Spin(const double min_time) { 25 | const double t0 = hwy::platform::Now(); 26 | for (;;) { 27 | const double elapsed = hwy::platform::Now() - t0; 28 | if (elapsed > min_time) { 29 | break; 30 | } 31 | } 32 | } 33 | 34 | void Spin10() { 35 | PROFILER_FUNC; 36 | Spin(10E-6); 37 | } 38 | 39 | void Spin20() { 40 | PROFILER_FUNC; 41 | Spin(20E-6); 42 | } 43 | 44 | void Spin3060() { 45 | { 46 | PROFILER_ZONE("spin30"); 47 | Spin(30E-6); 48 | } 49 | { 50 | PROFILER_ZONE("spin60"); 51 | Spin(60E-6); 52 | } 53 | } 54 | 55 | void Level3() { 56 | PROFILER_FUNC; 57 | for (int rep = 0; rep < 10; ++rep) { 58 | double total = 0.0; 59 | for (int i = 0; i < 100 - rep; ++i) { 60 | total += std::pow(0.9, i); 61 | } 62 | if (std::abs(total - 9.999) > 1E-2) { 63 | HWY_ABORT("unexpected total %f", total); 64 | } 65 | } 66 | } 67 | 68 | void Level2() { 69 | PROFILER_FUNC; 70 | Level3(); 71 | } 72 | 73 | void Level1() { 74 | PROFILER_FUNC; 75 | Level2(); 76 | } 77 | 78 | void ProfilerExample() { 79 | { 80 | PROFILER_FUNC; 81 | Spin10(); 82 | Spin20(); 83 | Spin3060(); 84 | Level1(); 85 | } 86 | PROFILER_PRINT_RESULTS(); 87 | } 88 | 89 | } // namespace 90 | } // namespace hwy 91 | 92 | int main(int /*argc*/, char* /*argv*/[]) { 93 | hwy::ProfilerExample(); 94 | return 0; 95 | } 96 | -------------------------------------------------------------------------------- /hwy/examples/skeleton-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Demo of functions that might be called from multiple SIMD modules (either 17 | // other -inl.h files, or a .cc file between begin/end_target-inl). This is 18 | // optional - all SIMD code can reside in .cc files. However, this allows 19 | // splitting code into different files while still inlining instead of requiring 20 | // calling through function pointers. 21 | 22 | // Per-target include guard. This is only required when using dynamic dispatch, 23 | // i.e. including foreach_target.h. For static dispatch, a normal include 24 | // guard would be fine because the header is only compiled once. 25 | #if defined(HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_) == defined(HWY_TARGET_TOGGLE) 26 | #ifdef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ 27 | #undef HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ 28 | #else 29 | #define HIGHWAY_HWY_EXAMPLES_SKELETON_INL_H_ 30 | #endif 31 | 32 | // It is fine to #include normal or *-inl headers. 33 | #include "hwy/highway.h" 34 | 35 | HWY_BEFORE_NAMESPACE(); 36 | namespace skeleton { 37 | namespace HWY_NAMESPACE { 38 | 39 | // Highway ops reside here; ADL does not find templates nor builtins. 40 | namespace hn = hwy::HWY_NAMESPACE; 41 | 42 | // Example of a type-agnostic (caller-specified lane type) and width-agnostic 43 | // (uses best available instruction set) function in a header. 44 | // 45 | // Computes x[i] = mul_array[i] * x_array[i] + add_array[i] for i < size. 46 | template 47 | HWY_MAYBE_UNUSED void MulAddLoop(const D d, const T* HWY_RESTRICT mul_array, 48 | const T* HWY_RESTRICT add_array, 49 | const size_t size, T* HWY_RESTRICT x_array) { 50 | for (size_t i = 0; i < size; i += hn::Lanes(d)) { 51 | const auto mul = hn::Load(d, mul_array + i); 52 | const auto add = hn::Load(d, add_array + i); 53 | auto x = hn::Load(d, x_array + i); 54 | x = hn::MulAdd(mul, x, add); 55 | hn::Store(x, d, x_array + i); 56 | } 57 | } 58 | 59 | // NOLINTNEXTLINE(google-readability-namespace-comments) 60 | } // namespace HWY_NAMESPACE 61 | } // namespace skeleton 62 | HWY_AFTER_NAMESPACE(); 63 | 64 | #endif // include guard 65 | -------------------------------------------------------------------------------- /hwy/examples/skeleton.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/examples/skeleton.h" 17 | 18 | #include 19 | 20 | // >>>> for dynamic dispatch only, skip if you want static dispatch 21 | 22 | // First undef to prevent error when re-included. 23 | #undef HWY_TARGET_INCLUDE 24 | // For dynamic dispatch, specify the name of the current file (unfortunately 25 | // __FILE__ is not reliable) so that foreach_target.h can re-include it. 26 | #define HWY_TARGET_INCLUDE "hwy/examples/skeleton.cc" 27 | // Generates code for each enabled target by re-including this source file. 28 | #include "hwy/foreach_target.h" // IWYU pragma: keep 29 | 30 | // <<<< end of dynamic dispatch 31 | 32 | // Must come after foreach_target.h to avoid redefinition errors. 33 | #include "hwy/highway.h" 34 | 35 | // Optional, can instead add HWY_ATTR to all functions. 36 | HWY_BEFORE_NAMESPACE(); 37 | 38 | namespace skeleton { 39 | // This namespace name is unique per target, which allows code for multiple 40 | // targets to co-exist in the same translation unit. Required when using dynamic 41 | // dispatch, otherwise optional. 42 | namespace HWY_NAMESPACE { 43 | namespace { 44 | 45 | // Highway ops reside here; ADL does not find templates nor builtins. 46 | namespace hn = hwy::HWY_NAMESPACE; 47 | 48 | // Computes log2 by converting to a vector of floats. Compiled once per target. 49 | template 50 | HWY_ATTR_NO_MSAN void OneFloorLog2(const DF df, 51 | const uint8_t* HWY_RESTRICT values, 52 | uint8_t* HWY_RESTRICT log2) { 53 | // Type tags for converting to other element types (Rebind = same count). 54 | const hn::RebindToSigned d32; 55 | const hn::Rebind d8; 56 | using VI32 = hn::Vec; 57 | 58 | const VI32 vi32 = hn::PromoteTo(d32, hn::Load(d8, values)); 59 | const VI32 bits = hn::BitCast(d32, hn::ConvertTo(df, vi32)); 60 | const VI32 exponent = hn::Sub(hn::ShiftRight<23>(bits), hn::Set(d32, 127)); 61 | hn::Store(hn::DemoteTo(d8, exponent), d8, log2); 62 | } 63 | 64 | void CodepathDemo() { 65 | // Highway defaults to portability, but per-target codepaths may be selected 66 | // via #if HWY_TARGET == HWY_SSE4 or by testing capability macros: 67 | #if HWY_HAVE_INTEGER64 68 | const char* gather = "Has int64"; 69 | #else 70 | const char* gather = "No int64"; 71 | #endif 72 | printf("Target %s: %s\n", hwy::TargetName(HWY_TARGET), gather); 73 | } 74 | 75 | void FloorLog2(const uint8_t* HWY_RESTRICT values, size_t count, 76 | uint8_t* HWY_RESTRICT log2) { 77 | CodepathDemo(); 78 | 79 | const hn::ScalableTag df; 80 | const size_t N = hn::Lanes(df); 81 | size_t i = 0; 82 | for (; i + N <= count; i += N) { 83 | OneFloorLog2(df, values + i, log2 + i); 84 | } 85 | for (; i < count; ++i) { 86 | hn::CappedTag d1; 87 | OneFloorLog2(d1, values + i, log2 + i); 88 | } 89 | } 90 | 91 | } // namespace 92 | // NOLINTNEXTLINE(google-readability-namespace-comments) 93 | } // namespace HWY_NAMESPACE 94 | } // namespace skeleton 95 | HWY_AFTER_NAMESPACE(); 96 | 97 | // The table of pointers to the various implementations in HWY_NAMESPACE must 98 | // be compiled only once (foreach_target #includes this file multiple times). 99 | // HWY_ONCE is true for only one of these 'compilation passes'. 100 | #if HWY_ONCE 101 | 102 | namespace skeleton { 103 | 104 | // This macro declares a static array used for dynamic dispatch; it resides in 105 | // the same outer namespace that contains FloorLog2. 106 | HWY_EXPORT(FloorLog2); 107 | 108 | // This function is optional and only needed in the case of exposing it in the 109 | // header file. Otherwise using HWY_DYNAMIC_DISPATCH(FloorLog2) in this module 110 | // is equivalent to inlining this function. 111 | HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in, 112 | const size_t count, 113 | uint8_t* HWY_RESTRICT out) { 114 | // This must reside outside of HWY_NAMESPACE because it references (calls the 115 | // appropriate one from) the per-target implementations there. 116 | // For static dispatch, use HWY_STATIC_DISPATCH. 117 | return HWY_DYNAMIC_DISPATCH(FloorLog2)(in, count, out); 118 | } 119 | 120 | HWY_DLLEXPORT void SavedCallFloorLog2(const uint8_t* HWY_RESTRICT in, 121 | const size_t count, 122 | uint8_t* HWY_RESTRICT out) { 123 | const auto ptr = HWY_DYNAMIC_POINTER(FloorLog2); 124 | return ptr(in, count, out); 125 | } 126 | 127 | // Optional: anything to compile only once, e.g. non-SIMD implementations of 128 | // public functions provided by this module, can go inside #if HWY_ONCE. 129 | 130 | } // namespace skeleton 131 | #endif // HWY_ONCE 132 | -------------------------------------------------------------------------------- /hwy/examples/skeleton.h: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Demo interface to target-specific code in skeleton.cc 17 | 18 | // Normal header with include guard and namespace. 19 | #ifndef HIGHWAY_HWY_EXAMPLES_SKELETON_H_ 20 | #define HIGHWAY_HWY_EXAMPLES_SKELETON_H_ 21 | 22 | // Platform-specific definitions used for declaring an interface, independent of 23 | // the SIMD instruction set. 24 | #include "hwy/base.h" // HWY_RESTRICT 25 | 26 | namespace skeleton { 27 | 28 | // Computes base-2 logarithm by converting to float. Supports dynamic dispatch. 29 | HWY_DLLEXPORT void CallFloorLog2(const uint8_t* HWY_RESTRICT in, size_t count, 30 | uint8_t* HWY_RESTRICT out); 31 | 32 | // Same, but uses HWY_DYNAMIC_POINTER to save a function pointer and call it. 33 | HWY_DLLEXPORT void SavedCallFloorLog2(const uint8_t* HWY_RESTRICT in, 34 | size_t count, uint8_t* HWY_RESTRICT out); 35 | 36 | } // namespace skeleton 37 | 38 | #endif // HIGHWAY_HWY_EXAMPLES_SKELETON_H_ 39 | -------------------------------------------------------------------------------- /hwy/highway_export.h: -------------------------------------------------------------------------------- 1 | // Pseudo-generated file to handle both cmake & bazel build system. 2 | 3 | // Initial generation done using cmake code: 4 | // include(GenerateExportHeader) 5 | // generate_export_header(hwy EXPORT_MACRO_NAME HWY_DLLEXPORT EXPORT_FILE_NAME 6 | // hwy/highway_export.h) 7 | // code reformatted using clang-format --style=Google 8 | 9 | #ifndef HWY_DLLEXPORT_H 10 | #define HWY_DLLEXPORT_H 11 | 12 | #if !defined(HWY_SHARED_DEFINE) 13 | #define HWY_DLLEXPORT 14 | #define HWY_CONTRIB_DLLEXPORT 15 | #define HWY_TEST_DLLEXPORT 16 | #else // !HWY_SHARED_DEFINE 17 | 18 | #ifndef HWY_DLLEXPORT 19 | #if defined(hwy_EXPORTS) 20 | /* We are building this library */ 21 | #ifdef _WIN32 22 | #define HWY_DLLEXPORT __declspec(dllexport) 23 | #else 24 | #define HWY_DLLEXPORT __attribute__((visibility("default"))) 25 | #endif 26 | #else // defined(hwy_EXPORTS) 27 | /* We are using this library */ 28 | #ifdef _WIN32 29 | #define HWY_DLLEXPORT __declspec(dllimport) 30 | #else 31 | #define HWY_DLLEXPORT __attribute__((visibility("default"))) 32 | #endif 33 | #endif // defined(hwy_EXPORTS) 34 | #endif // HWY_DLLEXPORT 35 | 36 | #ifndef HWY_CONTRIB_DLLEXPORT 37 | #if defined(hwy_contrib_EXPORTS) 38 | /* We are building this library */ 39 | #ifdef _WIN32 40 | #define HWY_CONTRIB_DLLEXPORT __declspec(dllexport) 41 | #else 42 | #define HWY_CONTRIB_DLLEXPORT __attribute__((visibility("default"))) 43 | #endif 44 | #else // defined(hwy_contrib_EXPORTS) 45 | /* We are using this library */ 46 | #ifdef _WIN32 47 | #define HWY_CONTRIB_DLLEXPORT __declspec(dllimport) 48 | #else 49 | #define HWY_CONTRIB_DLLEXPORT __attribute__((visibility("default"))) 50 | #endif 51 | #endif // defined(hwy_contrib_EXPORTS) 52 | #endif // HWY_CONTRIB_DLLEXPORT 53 | 54 | #ifndef HWY_TEST_DLLEXPORT 55 | #if defined(hwy_test_EXPORTS) 56 | /* We are building this library */ 57 | #ifdef _WIN32 58 | #define HWY_TEST_DLLEXPORT __declspec(dllexport) 59 | #else 60 | #define HWY_TEST_DLLEXPORT __attribute__((visibility("default"))) 61 | #endif 62 | #else // defined(hwy_test_EXPORTS) 63 | /* We are using this library */ 64 | #ifdef _WIN32 65 | #define HWY_TEST_DLLEXPORT __declspec(dllimport) 66 | #else 67 | #define HWY_TEST_DLLEXPORT __attribute__((visibility("default"))) 68 | #endif 69 | #endif // defined(hwy_test_EXPORTS) 70 | #endif // HWY_TEST_DLLEXPORT 71 | 72 | #endif // !HWY_SHARED_DEFINE 73 | 74 | #endif /* HWY_DLLEXPORT_H */ 75 | -------------------------------------------------------------------------------- /hwy/hwy.version: -------------------------------------------------------------------------------- 1 | HWY_0 { 2 | global: 3 | extern "C++" { 4 | *hwy::*; 5 | }; 6 | 7 | local: 8 | # Hide all the std namespace symbols. std namespace is explicitly marked 9 | # as visibility(default) and header-only functions or methods (such as those 10 | # from templates) should be exposed in shared libraries as weak symbols but 11 | # this is only needed when we expose those types in the shared library API 12 | # in any way. We don't use C++ std types in the API and we also don't 13 | # support exceptions in the library. 14 | # See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=36022 for a discussion 15 | # about this. 16 | extern "C++" { 17 | *std::*; 18 | }; 19 | }; 20 | -------------------------------------------------------------------------------- /hwy/nanobenchmark_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/nanobenchmark.h" 17 | 18 | #include 19 | #include 20 | 21 | #include "hwy/tests/hwy_gtest.h" 22 | #include "hwy/tests/test_util-inl.h" 23 | 24 | namespace hwy { 25 | namespace { 26 | 27 | // Governs duration of test; avoid timeout in debug builds. 28 | #if HWY_IS_DEBUG_BUILD 29 | constexpr size_t kMaxEvals = 3; 30 | #else 31 | constexpr size_t kMaxEvals = 4; 32 | #endif 33 | 34 | FuncOutput Div(const void*, FuncInput in) { 35 | // Here we're measuring the throughput because benchmark invocations are 36 | // independent. Any dividend will do; the divisor is nonzero. 37 | return 0xFFFFF / in; 38 | } 39 | 40 | template 41 | void MeasureDiv(const FuncInput (&inputs)[N]) { 42 | printf("Measuring integer division (output on final two lines)\n"); 43 | Result results[N]; 44 | Params params; 45 | params.max_evals = kMaxEvals; 46 | const size_t num_results = Measure(&Div, nullptr, inputs, N, results, params); 47 | for (size_t i = 0; i < num_results; ++i) { 48 | printf("%5d: %6.2f ticks; MAD=%4.2f%%\n", 49 | static_cast(results[i].input), results[i].ticks, 50 | results[i].variability * 100.0); 51 | } 52 | } 53 | 54 | RandomState rng; 55 | 56 | // A function whose runtime depends on rng. 57 | FuncOutput Random(const void* /*arg*/, FuncInput in) { 58 | const size_t r = rng() & 0xF; 59 | FuncOutput ret = static_cast(in); 60 | for (size_t i = 0; i < r; ++i) { 61 | ret /= ((rng() & 1) + 2); 62 | } 63 | return ret; 64 | } 65 | 66 | // Ensure the measured variability is high. 67 | template 68 | void MeasureRandom(const FuncInput (&inputs)[N]) { 69 | Result results[N]; 70 | Params p; 71 | p.max_evals = kMaxEvals; 72 | p.verbose = false; 73 | const size_t num_results = Measure(&Random, nullptr, inputs, N, results, p); 74 | for (size_t i = 0; i < num_results; ++i) { 75 | HWY_ASSERT(results[i].variability > 1E-3); 76 | } 77 | } 78 | 79 | TEST(NanobenchmarkTest, RunTest) { 80 | const int unpredictable = Unpredictable1(); // == 1, unknown to compiler. 81 | static const FuncInput inputs[] = {static_cast(unpredictable) + 2, 82 | static_cast(unpredictable + 9)}; 83 | 84 | MeasureDiv(inputs); 85 | MeasureRandom(inputs); 86 | } 87 | 88 | } // namespace 89 | } // namespace hwy 90 | 91 | HWY_TEST_MAIN(); 92 | -------------------------------------------------------------------------------- /hwy/per_target.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Enable all targets so that calling Have* does not call into a null pointer. 17 | #ifndef HWY_COMPILE_ALL_ATTAINABLE 18 | #define HWY_COMPILE_ALL_ATTAINABLE 19 | #endif 20 | #include "hwy/per_target.h" 21 | 22 | #include 23 | #include 24 | 25 | #undef HWY_TARGET_INCLUDE 26 | #define HWY_TARGET_INCLUDE "hwy/per_target.cc" 27 | #include "hwy/foreach_target.h" // IWYU pragma: keep 28 | #include "hwy/highway.h" 29 | 30 | HWY_BEFORE_NAMESPACE(); 31 | namespace hwy { 32 | namespace HWY_NAMESPACE { 33 | namespace { 34 | int64_t GetTarget() { return HWY_TARGET; } 35 | size_t GetVectorBytes() { return Lanes(ScalableTag()); } 36 | bool GetHaveInteger64() { return HWY_HAVE_INTEGER64 != 0; } 37 | bool GetHaveFloat16() { return HWY_HAVE_FLOAT16 != 0; } 38 | bool GetHaveFloat64() { return HWY_HAVE_FLOAT64 != 0; } 39 | } // namespace 40 | // NOLINTNEXTLINE(google-readability-namespace-comments) 41 | } // namespace HWY_NAMESPACE 42 | 43 | } // namespace hwy 44 | HWY_AFTER_NAMESPACE(); 45 | 46 | #if HWY_ONCE 47 | namespace hwy { 48 | namespace { 49 | HWY_EXPORT(GetTarget); 50 | HWY_EXPORT(GetVectorBytes); 51 | HWY_EXPORT(GetHaveInteger64); 52 | HWY_EXPORT(GetHaveFloat16); 53 | HWY_EXPORT(GetHaveFloat64); 54 | } // namespace 55 | 56 | HWY_DLLEXPORT int64_t DispatchedTarget() { 57 | return HWY_DYNAMIC_DISPATCH(GetTarget)(); 58 | } 59 | 60 | HWY_DLLEXPORT size_t VectorBytes() { 61 | return HWY_DYNAMIC_DISPATCH(GetVectorBytes)(); 62 | } 63 | 64 | HWY_DLLEXPORT bool HaveInteger64() { 65 | return HWY_DYNAMIC_DISPATCH(GetHaveInteger64)(); 66 | } 67 | 68 | HWY_DLLEXPORT bool HaveFloat16() { 69 | return HWY_DYNAMIC_DISPATCH(GetHaveFloat16)(); 70 | } 71 | 72 | HWY_DLLEXPORT bool HaveFloat64() { 73 | return HWY_DYNAMIC_DISPATCH(GetHaveFloat64)(); 74 | } 75 | 76 | } // namespace hwy 77 | #endif // HWY_ONCE 78 | -------------------------------------------------------------------------------- /hwy/per_target.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef HIGHWAY_HWY_PER_TARGET_H_ 17 | #define HIGHWAY_HWY_PER_TARGET_H_ 18 | 19 | #include 20 | #include 21 | 22 | #include "hwy/highway_export.h" 23 | 24 | // Functions to query the capabilities of the target that will be called by 25 | // HWY_DYNAMIC_DISPATCH, which is not necessarily the current target. 26 | 27 | namespace hwy { 28 | 29 | // Returns the HWY_TARGET which HWY_DYNAMIC_DISPATCH selected. 30 | HWY_DLLEXPORT int64_t DispatchedTarget(); 31 | 32 | // Returns size in bytes of a vector, i.e. `Lanes(ScalableTag())`. 33 | // 34 | // Do not cache the result, which may change after calling DisableTargets, or 35 | // if software requests a different vector size (e.g. when entering/exiting SME 36 | // streaming mode). Instead call this right before the code that depends on the 37 | // result, without any DisableTargets or SME transition in-between. Note that 38 | // this involves an indirect call, so prefer not to call this frequently nor 39 | // unnecessarily. 40 | HWY_DLLEXPORT size_t VectorBytes(); 41 | 42 | // Returns whether 64-bit integers, 16/64-bit floats are a supported lane type. 43 | HWY_DLLEXPORT bool HaveInteger64(); 44 | HWY_DLLEXPORT bool HaveFloat16(); 45 | HWY_DLLEXPORT bool HaveFloat64(); 46 | 47 | } // namespace hwy 48 | 49 | #endif // HIGHWAY_HWY_PER_TARGET_H_ 50 | -------------------------------------------------------------------------------- /hwy/print-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Print() function 17 | 18 | #include "hwy/highway.h" 19 | #include "hwy/print.h" 20 | 21 | // Per-target include guard 22 | #if defined(HIGHWAY_HWY_PRINT_INL_H_) == defined(HWY_TARGET_TOGGLE) 23 | #ifdef HIGHWAY_HWY_PRINT_INL_H_ 24 | #undef HIGHWAY_HWY_PRINT_INL_H_ 25 | #else 26 | #define HIGHWAY_HWY_PRINT_INL_H_ 27 | #endif 28 | 29 | #if HWY_TARGET == HWY_RVV 30 | #include "hwy/aligned_allocator.h" 31 | #endif 32 | 33 | HWY_BEFORE_NAMESPACE(); 34 | namespace hwy { 35 | namespace HWY_NAMESPACE { 36 | 37 | // Prints lanes around `lane`, in memory order. 38 | template > 39 | HWY_API void Print(const D d, const char* caption, V v, size_t lane_u = 0, 40 | size_t max_lanes = 7) { 41 | const size_t N = Lanes(d); 42 | using T = TFromD; 43 | #if HWY_TARGET == HWY_RVV 44 | auto storage = AllocateAligned(N); 45 | T* HWY_RESTRICT lanes = storage.get(); 46 | #else 47 | // This works around an SVE compile error on GCC 11 and 12. Calling 48 | // AllocateAligned here would seem to require it be marked with HWY_ATTR. 49 | HWY_ALIGN T lanes[MaxLanes(d)]; 50 | #endif 51 | Store(v, d, lanes); 52 | 53 | const auto info = hwy::detail::MakeTypeInfo(); 54 | hwy::detail::PrintArray(info, caption, lanes, N, lane_u, max_lanes); 55 | } 56 | 57 | // NOLINTNEXTLINE(google-readability-namespace-comments) 58 | } // namespace HWY_NAMESPACE 59 | } // namespace hwy 60 | HWY_AFTER_NAMESPACE(); 61 | 62 | #endif // per-target include guard 63 | -------------------------------------------------------------------------------- /hwy/print.h: -------------------------------------------------------------------------------- 1 | // Copyright 2022 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef HWY_PRINT_H_ 17 | #define HWY_PRINT_H_ 18 | 19 | // Helpers for printing vector lanes. 20 | 21 | #include 22 | #include 23 | 24 | #include "hwy/base.h" 25 | #include "hwy/highway_export.h" 26 | 27 | namespace hwy { 28 | 29 | namespace detail { 30 | 31 | // For implementing value comparisons etc. as type-erased functions to reduce 32 | // template bloat. 33 | struct TypeInfo { 34 | size_t sizeof_t; 35 | bool is_float; 36 | bool is_signed; 37 | bool is_bf16; 38 | }; 39 | 40 | template 41 | HWY_INLINE TypeInfo MakeTypeInfo() { 42 | TypeInfo info; 43 | info.sizeof_t = sizeof(T); 44 | info.is_float = IsFloat(); 45 | info.is_signed = IsSigned(); 46 | info.is_bf16 = IsSame(); 47 | return info; 48 | } 49 | 50 | HWY_DLLEXPORT void TypeName(const TypeInfo& info, size_t N, char* string100); 51 | HWY_DLLEXPORT void ToString(const TypeInfo& info, const void* ptr, 52 | char* string100); 53 | 54 | HWY_DLLEXPORT void PrintArray(const TypeInfo& info, const char* caption, 55 | const void* array_void, size_t N, 56 | size_t lane_u = 0, size_t max_lanes = 7); 57 | 58 | } // namespace detail 59 | 60 | template 61 | HWY_NOINLINE void PrintValue(T value) { 62 | char str[100]; 63 | detail::ToString(hwy::detail::MakeTypeInfo(), &value, str); 64 | fprintf(stderr, "%s,", str); 65 | } 66 | 67 | template 68 | HWY_NOINLINE void PrintArray(const T* value, size_t count) { 69 | detail::PrintArray(hwy::detail::MakeTypeInfo(), "", value, count, 0, 70 | count); 71 | } 72 | 73 | } // namespace hwy 74 | 75 | #endif // HWY_PRINT_H_ 76 | -------------------------------------------------------------------------------- /hwy/stats.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2024 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // https://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/stats.h" 17 | 18 | #include 19 | 20 | #include // std::min 21 | #include 22 | 23 | #include "hwy/base.h" // HWY_ASSERT 24 | 25 | namespace hwy { 26 | 27 | void Stats::Assimilate(const Stats& other) { 28 | const int64_t total_n = n_ + other.n_; 29 | if (total_n == 0) return; // Nothing to do; prevents div by zero. 30 | 31 | min_ = std::min(min_, other.min_); 32 | max_ = std::max(max_, other.max_); 33 | 34 | sum_log_ += other.sum_log_; 35 | 36 | const double product_n = n_ * other.n_; 37 | const double n2 = n_ * n_; 38 | const double other_n2 = other.n_ * other.n_; 39 | const int64_t total_n2 = total_n * total_n; 40 | const double total_n3 = static_cast(total_n2) * total_n; 41 | // Precompute reciprocal for speed - used at least twice. 42 | const double inv_total_n = 1.0 / total_n; 43 | const double inv_total_n2 = 1.0 / total_n2; 44 | 45 | const double delta = other.m1_ - m1_; 46 | const double delta2 = delta * delta; 47 | const double delta3 = delta * delta2; 48 | const double delta4 = delta2 * delta2; 49 | 50 | m1_ = (n_ * m1_ + other.n_ * other.m1_) * inv_total_n; 51 | 52 | const double new_m2 = m2_ + other.m2_ + delta2 * product_n * inv_total_n; 53 | 54 | const double new_m3 = 55 | m3_ + other.m3_ + delta3 * product_n * (n_ - other.n_) * inv_total_n2 + 56 | 3.0 * delta * (n_ * other.m2_ - other.n_ * m2_) * inv_total_n; 57 | 58 | m4_ += other.m4_ + 59 | delta4 * product_n * (n2 - product_n + other_n2) / total_n3 + 60 | 6.0 * delta2 * (n2 * other.m2_ + other_n2 * m2_) * inv_total_n2 + 61 | 4.0 * delta * (n_ * other.m3_ - other.n_ * m3_) * inv_total_n; 62 | 63 | m2_ = new_m2; 64 | m3_ = new_m3; 65 | n_ = total_n; 66 | } 67 | 68 | std::string Stats::ToString(int exclude) const { 69 | if (Count() == 0) return std::string("(none)"); 70 | 71 | char buf[300]; 72 | int pos = 0; 73 | int ret; // snprintf - bytes written or negative for error. 74 | 75 | if ((exclude & kNoCount) == 0) { 76 | ret = snprintf(buf + pos, sizeof(buf) - pos, "Count=%9zu ", 77 | static_cast(Count())); 78 | HWY_ASSERT(ret > 0); 79 | pos += ret; 80 | } 81 | 82 | if ((exclude & kNoMeanSD) == 0) { 83 | const float sd = StandardDeviation(); 84 | if (sd > 100) { 85 | ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%8.2e SD=%7.1e ", 86 | Mean(), sd); 87 | } else { 88 | ret = snprintf(buf + pos, sizeof(buf) - pos, "Mean=%8.6e SD=%7.5e ", 89 | Mean(), sd); 90 | } 91 | HWY_ASSERT(ret > 0); 92 | pos += ret; 93 | } 94 | 95 | if ((exclude & kNoMinMax) == 0) { 96 | ret = snprintf(buf + pos, sizeof(buf) - pos, "Min=%8.5e Max=%8.5e ", Min(), 97 | Max()); 98 | HWY_ASSERT(ret > 0); 99 | pos += ret; 100 | } 101 | 102 | if ((exclude & kNoSkewKurt) == 0) { 103 | ret = snprintf(buf + pos, sizeof(buf) - pos, "Skew=%5.2f Kurt=%7.2f ", 104 | Skewness(), Kurtosis()); 105 | HWY_ASSERT(ret > 0); 106 | pos += ret; 107 | } 108 | 109 | if ((exclude & kNoGeomean) == 0) { 110 | ret = snprintf(buf + pos, sizeof(buf) - pos, "GeoMean=%9.6f ", 111 | GeometricMean()); 112 | HWY_ASSERT(ret > 0); 113 | pos += ret; 114 | } 115 | 116 | HWY_ASSERT(pos < static_cast(sizeof(buf))); 117 | return buf; 118 | } 119 | 120 | } // namespace hwy 121 | -------------------------------------------------------------------------------- /hwy/tests/bit_permute_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | 19 | #undef HWY_TARGET_INCLUDE 20 | #define HWY_TARGET_INCLUDE "tests/bit_permute_test.cc" 21 | #include "hwy/foreach_target.h" // IWYU pragma: keep 22 | #include "hwy/highway.h" 23 | #include "hwy/tests/test_util-inl.h" 24 | 25 | HWY_BEFORE_NAMESPACE(); 26 | namespace hwy { 27 | namespace HWY_NAMESPACE { 28 | namespace { 29 | 30 | struct TestBitShuffle { 31 | template 32 | HWY_NOINLINE void operator()(T /*unused*/, D d) { 33 | #if HWY_TARGET == HWY_SCALAR 34 | (void)d; 35 | #else // HWY_TARGET != HWY_SCALAR 36 | using TU = MakeUnsigned; 37 | 38 | const size_t N = Lanes(d); 39 | 40 | auto in1_lanes = AllocateAligned(N); 41 | auto in2_lanes = AllocateAligned(N * sizeof(T)); 42 | auto expected = AllocateAligned(N); 43 | HWY_ASSERT(in1_lanes && in2_lanes && expected); 44 | 45 | constexpr uint8_t kBitIdxMask = static_cast((sizeof(T) * 8) - 1); 46 | 47 | const Repartition du8; 48 | const RebindToSigned di8; 49 | 50 | RandomState rng; 51 | for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { 52 | for (size_t i = 0; i < N; i++) { 53 | TU src_val = static_cast(rng()); 54 | TU expected_result = static_cast(0); 55 | for (size_t j = 0; j < sizeof(T); j++) { 56 | const uint8_t bit_idx = static_cast(rng() & kBitIdxMask); 57 | 58 | in2_lanes[i * sizeof(T) + j] = bit_idx; 59 | expected_result = static_cast(expected_result | 60 | (((src_val >> bit_idx) & 1) << j)); 61 | } 62 | 63 | in1_lanes[i] = static_cast(src_val); 64 | expected[i] = static_cast(expected_result); 65 | } 66 | 67 | const auto in1 = Load(d, in1_lanes.get()); 68 | const auto in2 = Load(du8, in2_lanes.get()); 69 | HWY_ASSERT_VEC_EQ(d, expected.get(), BitShuffle(in1, in2)); 70 | HWY_ASSERT_VEC_EQ(d, expected.get(), BitShuffle(in1, BitCast(di8, in2))); 71 | } 72 | #endif // HWY_TARGET == HWY_SCALAR 73 | } 74 | }; 75 | 76 | HWY_NOINLINE void TestAllBitShuffle() { 77 | #if HWY_HAVE_INTEGER64 78 | ForPartialFixedOrFullScalableVectors()(int64_t()); 79 | ForPartialFixedOrFullScalableVectors()(uint64_t()); 80 | #endif 81 | } 82 | 83 | } // namespace 84 | // NOLINTNEXTLINE(google-readability-namespace-comments) 85 | } // namespace HWY_NAMESPACE 86 | } // namespace hwy 87 | HWY_AFTER_NAMESPACE(); 88 | 89 | #if HWY_ONCE 90 | namespace hwy { 91 | namespace { 92 | HWY_BEFORE_TEST(HwyBitPermuteTest); 93 | HWY_EXPORT_AND_TEST_P(HwyBitPermuteTest, TestAllBitShuffle); 94 | HWY_AFTER_TEST(); 95 | } // namespace 96 | } // namespace hwy 97 | HWY_TEST_MAIN(); 98 | #endif // HWY_ONCE 99 | -------------------------------------------------------------------------------- /hwy/tests/list_targets.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2020 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // Simple tool to print the list of targets that were compiled in when building 17 | // this tool. 18 | 19 | #include 20 | #include 21 | 22 | #include "hwy/detect_compiler_arch.h" 23 | #include "hwy/highway.h" 24 | 25 | namespace { 26 | 27 | void PrintCompiler() { 28 | if (HWY_COMPILER_CLANG) { 29 | fprintf(stderr, "Compiler: Clang %d\n", HWY_COMPILER_CLANG); 30 | } else if (HWY_COMPILER_CLANGCL) { 31 | fprintf(stderr, "Compiler: Clang-cl %d\n", HWY_COMPILER_CLANGCL); 32 | } else if (HWY_COMPILER_GCC_ACTUAL) { 33 | fprintf(stderr, "Compiler: GCC %d\n", HWY_COMPILER_GCC_ACTUAL); 34 | } else if (HWY_COMPILER_ICC) { 35 | fprintf(stderr, "Compiler: ICC %d\n", HWY_COMPILER_ICC); 36 | } else if (HWY_COMPILER_ICX) { 37 | fprintf(stderr, "Compiler: ISX %d\n", HWY_COMPILER_ICX); 38 | } else if (HWY_COMPILER_MSVC) { 39 | fprintf(stderr, "Compiler: MSVC %d\n", HWY_COMPILER_MSVC); 40 | } else { 41 | fprintf(stderr, "Compiler unknown!\n"); 42 | } 43 | } 44 | 45 | void PrintConfig() { 46 | #ifdef HWY_COMPILE_ONLY_EMU128 47 | const int only_emu128 = 1; 48 | #else 49 | const int only_emu128 = 0; 50 | #endif 51 | #ifdef HWY_COMPILE_ONLY_SCALAR 52 | const int only_scalar = 1; 53 | #else 54 | const int only_scalar = 0; 55 | #endif 56 | #ifdef HWY_COMPILE_ONLY_STATIC 57 | const int only_static = 1; 58 | #else 59 | const int only_static = 0; 60 | #endif 61 | #ifdef HWY_COMPILE_ALL_ATTAINABLE 62 | const int all_attain = 1; 63 | #else 64 | const int all_attain = 0; 65 | #endif 66 | #ifdef HWY_IS_TEST 67 | const int is_test = 1; 68 | #else 69 | const int is_test = 0; 70 | #endif 71 | fprintf(stderr, 72 | "Config: emu128:%d scalar:%d static:%d all_attain:%d is_test:%d\n", 73 | only_emu128, only_scalar, only_static, all_attain, is_test); 74 | } 75 | 76 | void PrintHave() { 77 | fprintf(stderr, 78 | "Have: constexpr_lanes:%d runtime_dispatch:%d auxv:%d" 79 | "f16 type:%d/ops%d bf16 type:%d/ops%d\n", 80 | HWY_HAVE_CONSTEXPR_LANES, HWY_HAVE_RUNTIME_DISPATCH, HWY_HAVE_AUXV, 81 | HWY_HAVE_SCALAR_F16_TYPE, HWY_HAVE_SCALAR_F16_OPERATORS, 82 | HWY_HAVE_SCALAR_BF16_TYPE, HWY_HAVE_SCALAR_BF16_OPERATORS); 83 | } 84 | 85 | void PrintTargets(const char* msg, int64_t targets) { 86 | fprintf(stderr, "%s", msg); 87 | // For each bit other than the sign bit: 88 | for (int64_t x = targets & hwy::LimitsMax(); x != 0; 89 | x = x & (x - 1)) { 90 | // Extract value of least-significant bit. 91 | fprintf(stderr, " %s", hwy::TargetName(x & (~x + 1))); 92 | } 93 | fprintf(stderr, "\n"); 94 | } 95 | 96 | void TestVisitor() { 97 | long long enabled = 0; // NOLINT 98 | #define PER_TARGET(TARGET, NAMESPACE) enabled |= TARGET; 99 | HWY_VISIT_TARGETS(PER_TARGET) 100 | if (enabled != HWY_TARGETS) { 101 | HWY_ABORT("Enabled %llx != HWY_TARGETS %llx\n", enabled, HWY_TARGETS); 102 | } 103 | } 104 | 105 | } // namespace 106 | 107 | int main() { 108 | PrintCompiler(); 109 | PrintConfig(); 110 | PrintHave(); 111 | 112 | PrintTargets("Compiled HWY_TARGETS: ", HWY_TARGETS); 113 | PrintTargets("HWY_ATTAINABLE_TARGETS:", HWY_ATTAINABLE_TARGETS); 114 | PrintTargets("HWY_BASELINE_TARGETS: ", HWY_BASELINE_TARGETS); 115 | PrintTargets("HWY_STATIC_TARGET: ", HWY_STATIC_TARGET); 116 | PrintTargets("HWY_BROKEN_TARGETS: ", HWY_BROKEN_TARGETS); 117 | PrintTargets("HWY_DISABLED_TARGETS: ", HWY_DISABLED_TARGETS); 118 | PrintTargets("Current CPU supports: ", hwy::SupportedTargets()); 119 | TestVisitor(); 120 | return 0; 121 | } 122 | -------------------------------------------------------------------------------- /hwy/tests/mask_slide_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #undef HWY_TARGET_INCLUDE 17 | #define HWY_TARGET_INCLUDE "tests/mask_slide_test.cc" 18 | #include "hwy/foreach_target.h" // IWYU pragma: keep 19 | #include "hwy/highway.h" 20 | #include "hwy/tests/test_util-inl.h" 21 | 22 | HWY_BEFORE_NAMESPACE(); 23 | namespace hwy { 24 | namespace HWY_NAMESPACE { 25 | namespace { 26 | 27 | struct TestSlideMaskDownLanes { 28 | template 29 | HWY_NOINLINE void operator()(T /*unused*/, D d) { 30 | #if HWY_TARGET != HWY_SCALAR 31 | using TI = MakeSigned; 32 | 33 | const RebindToSigned di; 34 | 35 | const size_t N = Lanes(d); 36 | if (N < 2) { 37 | return; 38 | } 39 | 40 | auto bool_lanes = AllocateAligned(N); 41 | auto expected = AllocateAligned(N); 42 | HWY_ASSERT(bool_lanes && expected); 43 | 44 | // For all combinations of zero/nonzero state of subset of lanes: 45 | const size_t max_lanes = AdjustedLog2Reps(HWY_MIN(N, size_t(6))); 46 | 47 | ZeroBytes(bool_lanes.get(), max_lanes * sizeof(TI)); 48 | for (size_t i = max_lanes; i < N; i++) { 49 | bool_lanes[i] = TI(-1); 50 | } 51 | 52 | for (size_t code = 0; code < (1ull << max_lanes); ++code) { 53 | for (size_t i = 0; i < max_lanes; ++i) { 54 | bool_lanes[i] = (code & (1ull << i)) ? TI(-1) : TI(0); 55 | } 56 | 57 | for (size_t i = 0; i < max_lanes; i++) { 58 | ZeroBytes(expected.get() + N - i, i * sizeof(TI)); 59 | for (size_t j = 0; j < N - i; j++) { 60 | expected[j] = bool_lanes[j + i]; 61 | } 62 | 63 | const auto src_mask = 64 | MaskFromVec(BitCast(d, Load(di, bool_lanes.get()))); 65 | const auto expected_mask = 66 | MaskFromVec(BitCast(d, Load(di, expected.get()))); 67 | const auto actual_mask = SlideMaskDownLanes(d, src_mask, i); 68 | HWY_ASSERT_MASK_EQ(d, expected_mask, actual_mask); 69 | 70 | if (i == 1) { 71 | HWY_ASSERT_MASK_EQ(d, expected_mask, SlideMask1Down(d, src_mask)); 72 | } 73 | } 74 | } 75 | #else 76 | (void)d; 77 | #endif 78 | } 79 | }; 80 | 81 | HWY_NOINLINE void TestAllSlideMaskDownLanes() { 82 | ForAllTypes(ForPartialVectors()); 83 | } 84 | 85 | struct TestSlideMaskUpLanes { 86 | template 87 | HWY_NOINLINE void operator()(T /*unused*/, D d) { 88 | #if HWY_TARGET != HWY_SCALAR 89 | using TI = MakeSigned; 90 | 91 | const RebindToSigned di; 92 | 93 | const size_t N = Lanes(d); 94 | if (N < 2) { 95 | return; 96 | } 97 | 98 | auto bool_lanes = AllocateAligned(N); 99 | auto expected = AllocateAligned(N); 100 | HWY_ASSERT(bool_lanes && expected); 101 | 102 | // For all combinations of zero/nonzero state of subset of lanes: 103 | const size_t max_lanes = AdjustedLog2Reps(HWY_MIN(N, size_t(6))); 104 | 105 | ZeroBytes(bool_lanes.get(), max_lanes * sizeof(TI)); 106 | for (size_t i = max_lanes; i < N; i++) { 107 | bool_lanes[i] = TI(-1); 108 | } 109 | 110 | for (size_t code = 0; code < (1ull << max_lanes); ++code) { 111 | for (size_t i = 0; i < max_lanes; ++i) { 112 | bool_lanes[i] = (code & (1ull << i)) ? TI(-1) : TI(0); 113 | } 114 | 115 | for (size_t i = 0; i < max_lanes; i++) { 116 | ZeroBytes(expected.get(), i * sizeof(TI)); 117 | for (size_t j = 0; j < N - i; j++) { 118 | expected[j + i] = bool_lanes[j]; 119 | } 120 | 121 | const auto src_mask = 122 | MaskFromVec(BitCast(d, Load(di, bool_lanes.get()))); 123 | const auto expected_mask = 124 | MaskFromVec(BitCast(d, Load(di, expected.get()))); 125 | const auto actual_mask = SlideMaskUpLanes(d, src_mask, i); 126 | HWY_ASSERT_MASK_EQ(d, expected_mask, actual_mask); 127 | 128 | if (i == 1) { 129 | HWY_ASSERT_MASK_EQ(d, expected_mask, SlideMask1Up(d, src_mask)); 130 | } 131 | } 132 | } 133 | #else 134 | (void)d; 135 | #endif 136 | } 137 | }; 138 | 139 | HWY_NOINLINE void TestAllSlideMaskUpLanes() { 140 | ForAllTypes(ForPartialVectors()); 141 | } 142 | 143 | } // namespace 144 | // NOLINTNEXTLINE(google-readability-namespace-comments) 145 | } // namespace HWY_NAMESPACE 146 | } // namespace hwy 147 | HWY_AFTER_NAMESPACE(); 148 | 149 | #if HWY_ONCE 150 | namespace hwy { 151 | namespace { 152 | HWY_BEFORE_TEST(HwyMaskSlideTest); 153 | HWY_EXPORT_AND_TEST_P(HwyMaskSlideTest, TestAllSlideMaskDownLanes); 154 | HWY_EXPORT_AND_TEST_P(HwyMaskSlideTest, TestAllSlideMaskUpLanes); 155 | HWY_AFTER_TEST(); 156 | } // namespace 157 | } // namespace hwy 158 | HWY_TEST_MAIN(); 159 | #endif // HWY_ONCE 160 | -------------------------------------------------------------------------------- /hwy/tests/sign_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "tests/sign_test.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | #include "hwy/highway.h" 22 | #include "hwy/tests/test_util-inl.h" 23 | 24 | HWY_BEFORE_NAMESPACE(); 25 | namespace hwy { 26 | namespace HWY_NAMESPACE { 27 | namespace { 28 | 29 | struct TestCopySign { 30 | template 31 | HWY_NOINLINE void operator()(T /*unused*/, D d) { 32 | const auto v0 = Zero(d); 33 | const auto vp = Iota(d, 1); 34 | const auto vn = Iota(d, -1E5); // assumes N < 10^5 35 | 36 | // Zero remains zero regardless of sign 37 | HWY_ASSERT_VEC_EQ(d, v0, CopySign(v0, v0)); 38 | HWY_ASSERT_VEC_EQ(d, v0, CopySign(v0, vp)); 39 | HWY_ASSERT_VEC_EQ(d, v0, CopySign(v0, vn)); 40 | HWY_ASSERT_VEC_EQ(d, v0, CopySignToAbs(v0, v0)); 41 | HWY_ASSERT_VEC_EQ(d, v0, CopySignToAbs(v0, vp)); 42 | HWY_ASSERT_VEC_EQ(d, v0, CopySignToAbs(v0, vn)); 43 | 44 | // Positive input, positive sign => unchanged 45 | HWY_ASSERT_VEC_EQ(d, vp, CopySign(vp, vp)); 46 | HWY_ASSERT_VEC_EQ(d, vp, CopySignToAbs(vp, vp)); 47 | 48 | // Positive input, negative sign => negated 49 | HWY_ASSERT_VEC_EQ(d, Neg(vp), CopySign(vp, vn)); 50 | HWY_ASSERT_VEC_EQ(d, Neg(vp), CopySignToAbs(vp, vn)); 51 | 52 | // Negative input, negative sign => unchanged 53 | HWY_ASSERT_VEC_EQ(d, vn, CopySign(vn, vn)); 54 | 55 | // Negative input, positive sign => negated 56 | HWY_ASSERT_VEC_EQ(d, Neg(vn), CopySign(vn, vp)); 57 | } 58 | }; 59 | 60 | HWY_NOINLINE void TestAllCopySign() { 61 | ForFloatTypes(ForPartialVectors()); 62 | } 63 | 64 | struct TestBroadcastSignBit { 65 | template 66 | HWY_NOINLINE void operator()(T /*unused*/, D d) { 67 | const auto s0 = Zero(d); 68 | const auto s1 = Set(d, -1); // all bit set 69 | const auto vpos = And(Iota(d, 0), Set(d, LimitsMax())); 70 | const auto vneg = Sub(s1, vpos); 71 | 72 | HWY_ASSERT_VEC_EQ(d, s0, BroadcastSignBit(vpos)); 73 | HWY_ASSERT_VEC_EQ(d, s0, BroadcastSignBit(Set(d, LimitsMax()))); 74 | 75 | HWY_ASSERT_VEC_EQ(d, s1, BroadcastSignBit(vneg)); 76 | HWY_ASSERT_VEC_EQ(d, s1, BroadcastSignBit(Set(d, LimitsMin()))); 77 | HWY_ASSERT_VEC_EQ(d, s1, BroadcastSignBit(Set(d, LimitsMin() / 2))); 78 | } 79 | }; 80 | 81 | HWY_NOINLINE void TestAllBroadcastSignBit() { 82 | ForSignedTypes(ForPartialVectors()); 83 | } 84 | 85 | } // namespace 86 | // NOLINTNEXTLINE(google-readability-namespace-comments) 87 | } // namespace HWY_NAMESPACE 88 | } // namespace hwy 89 | HWY_AFTER_NAMESPACE(); 90 | 91 | #if HWY_ONCE 92 | namespace hwy { 93 | namespace { 94 | HWY_BEFORE_TEST(HwySignTest); 95 | HWY_EXPORT_AND_TEST_P(HwySignTest, TestAllCopySign); 96 | HWY_EXPORT_AND_TEST_P(HwySignTest, TestAllBroadcastSignBit); 97 | HWY_AFTER_TEST(); 98 | } // namespace 99 | } // namespace hwy 100 | HWY_TEST_MAIN(); 101 | #endif // HWY_ONCE 102 | -------------------------------------------------------------------------------- /hwy/tests/test_util.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2021 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include "hwy/tests/test_util.h" 17 | 18 | #include 19 | 20 | #include "hwy/base.h" 21 | #include "hwy/print.h" 22 | 23 | namespace hwy { 24 | 25 | HWY_TEST_DLLEXPORT bool BytesEqual(const void* p1, const void* p2, 26 | const size_t size, size_t* pos) { 27 | const uint8_t* bytes1 = reinterpret_cast(p1); 28 | const uint8_t* bytes2 = reinterpret_cast(p2); 29 | for (size_t i = 0; i < size; ++i) { 30 | if (bytes1[i] != bytes2[i]) { 31 | if (pos != nullptr) { 32 | *pos = i; 33 | } 34 | return false; 35 | } 36 | } 37 | return true; 38 | } 39 | 40 | void AssertStringEqual(const char* expected, const char* actual, 41 | const char* target_name, const char* filename, 42 | int line) { 43 | while (*expected == *actual++) { 44 | if (*expected++ == '\0') return; 45 | } 46 | 47 | Abort(filename, line, "%s string mismatch: expected '%s', got '%s'.\n", 48 | target_name, expected, actual); 49 | } 50 | 51 | namespace detail { 52 | 53 | HWY_TEST_DLLEXPORT bool IsEqual(const TypeInfo& info, const void* expected_ptr, 54 | const void* actual_ptr) { 55 | if (!info.is_float) { 56 | return BytesEqual(expected_ptr, actual_ptr, info.sizeof_t); 57 | } 58 | if (info.sizeof_t == 2) { 59 | const float expected = info.is_bf16 ? F32FromBF16Mem(expected_ptr) 60 | : F32FromF16Mem(expected_ptr); 61 | const float actual = 62 | info.is_bf16 ? F32FromBF16Mem(actual_ptr) : F32FromF16Mem(actual_ptr); 63 | return ComputeUlpDelta(expected, actual) <= 1; 64 | } else if (info.sizeof_t == 4) { 65 | float expected, actual; 66 | CopyBytes<4>(expected_ptr, &expected); 67 | CopyBytes<4>(actual_ptr, &actual); 68 | return ComputeUlpDelta(expected, actual) <= 1; 69 | } else if (info.sizeof_t == 8) { 70 | double expected, actual; 71 | CopyBytes<8>(expected_ptr, &expected); 72 | CopyBytes<8>(actual_ptr, &actual); 73 | return ComputeUlpDelta(expected, actual) <= 1; 74 | } else { 75 | HWY_ABORT("Unexpected float size %d\n", static_cast(info.sizeof_t)); 76 | } 77 | } 78 | 79 | HWY_TEST_DLLEXPORT HWY_NORETURN void PrintMismatchAndAbort( 80 | const TypeInfo& info, const void* expected_ptr, const void* actual_ptr, 81 | const char* target_name, const char* filename, int line, size_t lane, 82 | size_t num_lanes) { 83 | char type_name[100]; 84 | TypeName(info, 1, type_name); 85 | char expected_str[100]; 86 | ToString(info, expected_ptr, expected_str); 87 | char actual_str[100]; 88 | ToString(info, actual_ptr, actual_str); 89 | Abort(filename, line, 90 | "%s, %sx%d lane %d mismatch: expected '%s', got '%s'.\n", target_name, 91 | type_name, static_cast(num_lanes), static_cast(lane), 92 | expected_str, actual_str); 93 | } 94 | 95 | HWY_TEST_DLLEXPORT void AssertArrayEqual(const TypeInfo& info, 96 | const void* expected_void, 97 | const void* actual_void, size_t N, 98 | const char* target_name, 99 | const char* filename, int line) { 100 | const uint8_t* expected_array = 101 | reinterpret_cast(expected_void); 102 | const uint8_t* actual_array = reinterpret_cast(actual_void); 103 | for (size_t i = 0; i < N; ++i) { 104 | const void* expected_ptr = expected_array + i * info.sizeof_t; 105 | const void* actual_ptr = actual_array + i * info.sizeof_t; 106 | if (!IsEqual(info, expected_ptr, actual_ptr)) { 107 | fprintf(stderr, "\n\n"); 108 | PrintArray(info, "expect", expected_array, N, i); 109 | PrintArray(info, "actual", actual_array, N, i); 110 | 111 | PrintMismatchAndAbort(info, expected_ptr, actual_ptr, target_name, 112 | filename, line, i, N); 113 | } 114 | } 115 | } 116 | 117 | } // namespace detail 118 | } // namespace hwy 119 | -------------------------------------------------------------------------------- /hwy/tests/test_util_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | #include 18 | 19 | #include 20 | 21 | #include "hwy/base.h" 22 | 23 | #undef HWY_TARGET_INCLUDE 24 | #define HWY_TARGET_INCLUDE "tests/test_util_test.cc" 25 | #include "hwy/foreach_target.h" // IWYU pragma: keep 26 | #include "hwy/highway.h" 27 | #include "hwy/tests/test_util-inl.h" 28 | 29 | HWY_BEFORE_NAMESPACE(); 30 | namespace hwy { 31 | namespace HWY_NAMESPACE { 32 | namespace { 33 | 34 | struct TestName { 35 | template 36 | HWY_NOINLINE void operator()(T t, D d) { 37 | char num[10]; 38 | std::string expected = IsFloat() ? "f" : (IsSigned() ? "i" : "u"); 39 | snprintf(num, sizeof(num), "%u", static_cast(sizeof(T) * 8)); 40 | expected += num; 41 | 42 | const size_t N = Lanes(d); 43 | if (N != 1) { 44 | expected += 'x'; 45 | snprintf(num, sizeof(num), "%u", static_cast(N)); 46 | expected += num; 47 | } 48 | const std::string actual = TypeName(t, N); 49 | if (expected != actual) { 50 | HWY_ABORT("%s mismatch: expected '%s', got '%s'.\n", 51 | hwy::TargetName(HWY_TARGET), expected.c_str(), actual.c_str()); 52 | } 53 | } 54 | }; 55 | 56 | HWY_NOINLINE void TestAllName() { ForAllTypes(ForPartialVectors()); } 57 | 58 | struct TestEqualInteger { 59 | template 60 | HWY_NOINLINE void operator()(T /*t*/) const { 61 | HWY_ASSERT_EQ(0, 0); 62 | HWY_ASSERT_EQ(1, 1); 63 | HWY_ASSERT_EQ(-1, -1); 64 | HWY_ASSERT_EQ(LimitsMin(), LimitsMin()); 65 | 66 | HWY_ASSERT(!IsEqual(0, 1)); 67 | HWY_ASSERT(!IsEqual(1, 0)); 68 | HWY_ASSERT(!IsEqual(1, -1)); 69 | HWY_ASSERT(!IsEqual(-1, 1)); 70 | HWY_ASSERT(!IsEqual(LimitsMin(), LimitsMax())); 71 | HWY_ASSERT(!IsEqual(LimitsMax(), LimitsMin())); 72 | } 73 | }; 74 | 75 | struct TestEqualFloat { 76 | template 77 | HWY_NOINLINE void operator()(T /*t*/) const { 78 | const T k0 = ConvertScalarTo(0); 79 | const T p1 = ConvertScalarTo(1); 80 | const T n1 = ConvertScalarTo(-1); 81 | HWY_ASSERT(IsEqual(k0, k0)); 82 | HWY_ASSERT(IsEqual(p1, p1)); 83 | HWY_ASSERT(IsEqual(n1, n1)); 84 | HWY_ASSERT(IsEqual(MantissaEnd(), MantissaEnd())); 85 | 86 | HWY_ASSERT(!IsEqual(k0, p1)); 87 | HWY_ASSERT(!IsEqual(p1, k0)); 88 | HWY_ASSERT(!IsEqual(p1, n1)); 89 | HWY_ASSERT(!IsEqual(n1, p1)); 90 | HWY_ASSERT(!IsEqual(LowestValue(), HighestValue())); 91 | HWY_ASSERT(!IsEqual(HighestValue(), LowestValue())); 92 | } 93 | }; 94 | 95 | HWY_NOINLINE void TestAllEqual() { 96 | ForIntegerTypes(TestEqualInteger()); 97 | ForFloatTypes(TestEqualFloat()); 98 | } 99 | 100 | } // namespace 101 | // NOLINTNEXTLINE(google-readability-namespace-comments) 102 | } // namespace HWY_NAMESPACE 103 | } // namespace hwy 104 | HWY_AFTER_NAMESPACE(); 105 | 106 | #if HWY_ONCE 107 | namespace hwy { 108 | namespace { 109 | HWY_BEFORE_TEST(TestUtilTest); 110 | HWY_EXPORT_AND_TEST_P(TestUtilTest, TestAllName); 111 | HWY_EXPORT_AND_TEST_P(TestUtilTest, TestAllEqual); 112 | HWY_AFTER_TEST(); 113 | } // namespace 114 | } // namespace hwy 115 | HWY_TEST_MAIN(); 116 | #endif // HWY_ONCE 117 | -------------------------------------------------------------------------------- /hwy/tests/truncate_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2019 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "tests/truncate_test.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | #include "hwy/highway.h" 22 | #include "hwy/tests/test_util-inl.h" 23 | 24 | HWY_BEFORE_NAMESPACE(); 25 | namespace hwy { 26 | namespace HWY_NAMESPACE { 27 | namespace { 28 | 29 | template 30 | constexpr bool IsSupportedTruncation() { 31 | return (sizeof(To) < sizeof(From) && Rebind().Pow2() >= -3 && 32 | Rebind().Pow2() + 4 >= static_cast(CeilLog2(sizeof(To)))); 33 | } 34 | 35 | struct TestTruncateTo { 36 | template ()>* = nullptr> 38 | HWY_NOINLINE void testTo(From, To, const D) { 39 | // do nothing 40 | } 41 | 42 | template ()>* = nullptr> 44 | HWY_NOINLINE void testTo(From, To, const D d) { 45 | constexpr uint32_t base = 0xFA578D00; 46 | const Rebind dTo; 47 | const Vec src = Iota(d, base & hwy::LimitsMax()); 48 | const Vec expected = Iota(dTo, base & hwy::LimitsMax()); 49 | const VFromD actual = TruncateTo(dTo, src); 50 | HWY_ASSERT_VEC_EQ(dTo, expected, actual); 51 | } 52 | 53 | template 54 | HWY_NOINLINE void operator()(T from, const D d) { 55 | testTo(from, uint8_t(), d); 56 | testTo(from, uint16_t(), d); 57 | testTo(from, uint32_t(), d); 58 | } 59 | }; 60 | 61 | HWY_NOINLINE void TestAllTruncate() { 62 | ForU163264(ForDemoteVectors()); 63 | } 64 | 65 | struct TestOrderedTruncate2To { 66 | template 67 | HWY_NOINLINE void operator()(T /*t*/, D d) { 68 | #if HWY_TARGET != HWY_SCALAR 69 | const Repartition, decltype(d)> dn; 70 | using TN = TFromD; 71 | 72 | const size_t N = Lanes(d); 73 | const size_t twiceN = N * 2; 74 | auto from = AllocateAligned(twiceN); 75 | auto expected = AllocateAligned(twiceN); 76 | HWY_ASSERT(from && expected); 77 | 78 | const T max = LimitsMax(); 79 | 80 | constexpr uint32_t iota_base = 0xFA578D00; 81 | const auto src_iota_a = Iota(d, iota_base); 82 | const auto src_iota_b = Iota(d, iota_base + N); 83 | const auto expected_iota_trunc_result = Iota(dn, iota_base); 84 | const auto actual_iota_trunc_result = 85 | OrderedTruncate2To(dn, src_iota_a, src_iota_b); 86 | HWY_ASSERT_VEC_EQ(dn, expected_iota_trunc_result, actual_iota_trunc_result); 87 | 88 | RandomState rng; 89 | for (size_t rep = 0; rep < AdjustedReps(1000); ++rep) { 90 | for (size_t i = 0; i < twiceN; ++i) { 91 | const uint64_t bits = rng(); 92 | CopyBytes(&bits, &from[i]); // not same size 93 | expected[i] = static_cast(from[i] & max); 94 | } 95 | 96 | const auto in_1 = Load(d, from.get()); 97 | const auto in_2 = Load(d, from.get() + N); 98 | const auto actual = OrderedTruncate2To(dn, in_1, in_2); 99 | HWY_ASSERT_VEC_EQ(dn, expected.get(), actual); 100 | } 101 | #else 102 | (void)d; 103 | #endif 104 | } 105 | }; 106 | 107 | HWY_NOINLINE void TestAllOrderedTruncate2To() { 108 | ForU163264(ForShrinkableVectors()); 109 | } 110 | 111 | } // namespace 112 | // NOLINTNEXTLINE(google-readability-namespace-comments) 113 | } // namespace HWY_NAMESPACE 114 | } // namespace hwy 115 | HWY_AFTER_NAMESPACE(); 116 | 117 | #if HWY_ONCE 118 | namespace hwy { 119 | namespace { 120 | HWY_BEFORE_TEST(HwyTruncateTest); 121 | HWY_EXPORT_AND_TEST_P(HwyTruncateTest, TestAllTruncate); 122 | HWY_EXPORT_AND_TEST_P(HwyTruncateTest, TestAllOrderedTruncate2To); 123 | HWY_AFTER_TEST(); 124 | } // namespace 125 | } // namespace hwy 126 | HWY_TEST_MAIN(); 127 | #endif // HWY_ONCE 128 | -------------------------------------------------------------------------------- /hwy/tests/tuple_test.cc: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #include 17 | 18 | #undef HWY_TARGET_INCLUDE 19 | #define HWY_TARGET_INCLUDE "tests/tuple_test.cc" 20 | #include "hwy/foreach_target.h" // IWYU pragma: keep 21 | #include "hwy/highway.h" 22 | #include "hwy/tests/test_util-inl.h" 23 | 24 | HWY_BEFORE_NAMESPACE(); 25 | namespace hwy { 26 | namespace HWY_NAMESPACE { 27 | namespace { 28 | 29 | struct TestCreateAndSet { 30 | template 31 | HWY_NOINLINE void operator()(T /*unused*/, D d) { 32 | #if HWY_HAVE_TUPLE 33 | const Vec v0 = Zero(d); 34 | const Vec vi = Iota(d, 1); 35 | const Vec v2 = Set(d, ConvertScalarTo(2)); 36 | const Vec v3 = Set(d, ConvertScalarTo(3)); 37 | 38 | Vec2 t2 = Create2(d, v0, vi); 39 | HWY_ASSERT_VEC_EQ(d, v0, Get2<0>(t2)); 40 | HWY_ASSERT_VEC_EQ(d, vi, Get2<1>(t2)); 41 | 42 | t2 = Set2<0>(t2, vi); 43 | t2 = Set2<1>(t2, v0); 44 | HWY_ASSERT_VEC_EQ(d, vi, Get2<0>(t2)); 45 | HWY_ASSERT_VEC_EQ(d, v0, Get2<1>(t2)); 46 | 47 | Vec3 t3 = Create3(d, v0, vi, v2); 48 | HWY_ASSERT_VEC_EQ(d, v0, Get3<0>(t3)); 49 | HWY_ASSERT_VEC_EQ(d, vi, Get3<1>(t3)); 50 | HWY_ASSERT_VEC_EQ(d, v2, Get3<2>(t3)); 51 | 52 | t3 = Set3<0>(t3, v2); 53 | t3 = Set3<1>(t3, vi); 54 | t3 = Set3<2>(t3, v0); 55 | HWY_ASSERT_VEC_EQ(d, v2, Get3<0>(t3)); 56 | HWY_ASSERT_VEC_EQ(d, vi, Get3<1>(t3)); 57 | HWY_ASSERT_VEC_EQ(d, v0, Get3<2>(t3)); 58 | 59 | Vec4 t4 = Create4(d, v0, vi, v2, v3); 60 | HWY_ASSERT_VEC_EQ(d, v0, Get4<0>(t4)); 61 | HWY_ASSERT_VEC_EQ(d, vi, Get4<1>(t4)); 62 | HWY_ASSERT_VEC_EQ(d, v2, Get4<2>(t4)); 63 | HWY_ASSERT_VEC_EQ(d, v3, Get4<3>(t4)); 64 | 65 | t4 = Set4<0>(t4, v3); 66 | t4 = Set4<1>(t4, v2); 67 | t4 = Set4<2>(t4, vi); 68 | t4 = Set4<3>(t4, v0); 69 | HWY_ASSERT_VEC_EQ(d, v3, Get4<0>(t4)); 70 | HWY_ASSERT_VEC_EQ(d, v2, Get4<1>(t4)); 71 | HWY_ASSERT_VEC_EQ(d, vi, Get4<2>(t4)); 72 | HWY_ASSERT_VEC_EQ(d, v0, Get4<3>(t4)); 73 | #else 74 | (void)d; 75 | HWY_WARN("Tuples disabled for target %s\n", hwy::TargetName(HWY_TARGET)); 76 | #endif // HWY_HAVE_TUPLE 77 | } 78 | }; 79 | 80 | HWY_NOINLINE void TestAllCreate() { 81 | // RVV can only do tuples up to LMUL=2. 82 | ForAllTypes(ForMaxPow2()); 83 | } 84 | 85 | } // namespace 86 | // NOLINTNEXTLINE(google-readability-namespace-comments) 87 | } // namespace HWY_NAMESPACE 88 | } // namespace hwy 89 | HWY_AFTER_NAMESPACE(); 90 | 91 | #if HWY_ONCE 92 | namespace hwy { 93 | namespace { 94 | HWY_BEFORE_TEST(TupleTest); 95 | HWY_EXPORT_AND_TEST_P(TupleTest, TestAllCreate); 96 | HWY_AFTER_TEST(); 97 | } // namespace 98 | } // namespace hwy 99 | HWY_TEST_MAIN(); 100 | #endif // HWY_ONCE 101 | -------------------------------------------------------------------------------- /hwy/timer-inl.h: -------------------------------------------------------------------------------- 1 | // Copyright 2023 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | // DEPRECATED, use timer.h instead. 17 | 18 | #include "hwy/timer.h" 19 | 20 | #if defined(HIGHWAY_HWY_TIMER_INL_H_) == defined(HWY_TARGET_TOGGLE) 21 | #ifdef HIGHWAY_HWY_TIMER_INL_H_ 22 | #undef HIGHWAY_HWY_TIMER_INL_H_ 23 | #else 24 | #define HIGHWAY_HWY_TIMER_INL_H_ 25 | #endif 26 | 27 | #include "hwy/highway.h" 28 | 29 | HWY_BEFORE_NAMESPACE(); 30 | namespace hwy { 31 | namespace HWY_NAMESPACE { 32 | namespace timer { 33 | 34 | // Deprecated aliases so that old code still compiles. Prefer to use 35 | // `hwy::timer::*` from timer.h because that does not require highway.h. 36 | using Ticks = hwy::timer::Ticks; 37 | 38 | inline Ticks Start() { return hwy::timer::Start(); } 39 | inline Ticks Stop() { return hwy::timer::Stop(); } 40 | 41 | } // namespace timer 42 | 43 | // NOLINTNEXTLINE(google-readability-namespace-comments) 44 | } // namespace HWY_NAMESPACE 45 | } // namespace hwy 46 | HWY_AFTER_NAMESPACE(); 47 | 48 | #endif // per-target include guard 49 | -------------------------------------------------------------------------------- /hwy/x86_cpuid.h: -------------------------------------------------------------------------------- 1 | // Copyright 2025 Google LLC 2 | // SPDX-License-Identifier: Apache-2.0 3 | // 4 | // Licensed under the Apache License, Version 2.0 (the "License"); 5 | // you may not use this file except in compliance with the License. 6 | // You may obtain a copy of the License at 7 | // 8 | // http://www.apache.org/licenses/LICENSE-2.0 9 | // 10 | // Unless required by applicable law or agreed to in writing, software 11 | // distributed under the License is distributed on an "AS IS" BASIS, 12 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | // See the License for the specific language governing permissions and 14 | // limitations under the License. 15 | 16 | #ifndef HIGHWAY_HWY_X86_CPUID_H_ 17 | #define HIGHWAY_HWY_X86_CPUID_H_ 18 | 19 | // Wrapper for x86 CPUID intrinsics. Empty on other platforms. 20 | 21 | #include 22 | 23 | #include "hwy/base.h" 24 | 25 | #if HWY_ARCH_X86 26 | 27 | #if HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL 28 | #include 29 | #else 30 | #include 31 | #endif 32 | 33 | namespace hwy { 34 | namespace x86 { 35 | 36 | // Calls CPUID instruction with eax=level and ecx=count and returns the result 37 | // in abcd array where abcd = {eax, ebx, ecx, edx} (hence the name abcd). 38 | static inline void Cpuid(const uint32_t level, const uint32_t count, 39 | uint32_t* HWY_RESTRICT abcd) { 40 | #if HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL 41 | int regs[4]; 42 | __cpuidex(regs, static_cast(level), static_cast(count)); 43 | for (int i = 0; i < 4; ++i) { 44 | abcd[i] = static_cast(regs[i]); 45 | } 46 | #else // HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL 47 | uint32_t a; 48 | uint32_t b; 49 | uint32_t c; 50 | uint32_t d; 51 | __cpuid_count(level, count, a, b, c, d); 52 | abcd[0] = a; 53 | abcd[1] = b; 54 | abcd[2] = c; 55 | abcd[3] = d; 56 | #endif // HWY_COMPILER_MSVC || HWY_COMPILER_CLANGCL 57 | } 58 | 59 | static inline bool IsBitSet(const uint32_t reg, const int index) { 60 | return (reg & (1U << index)) != 0; 61 | } 62 | 63 | static inline uint32_t MaxLevel() { 64 | uint32_t abcd[4]; 65 | Cpuid(0, 0, abcd); 66 | return abcd[0]; 67 | } 68 | 69 | static inline bool IsAMD() { 70 | uint32_t abcd[4]; 71 | Cpuid(0, 0, abcd); 72 | const uint32_t max_level = abcd[0]; 73 | return max_level >= 1 && abcd[1] == 0x68747541 && abcd[2] == 0x444d4163 && 74 | abcd[3] == 0x69746e65; 75 | } 76 | 77 | } // namespace x86 78 | } // namespace hwy 79 | 80 | #endif // HWY_ARCH_X86 81 | #endif // HIGHWAY_HWY_X86_CPUID_H_ 82 | -------------------------------------------------------------------------------- /libhwy-contrib.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@ 4 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ 5 | 6 | Name: libhwy-contrib 7 | Description: Additions to Highway: dot product, image, math, sort 8 | Version: @HWY_LIBRARY_VERSION@ 9 | Libs: -L${libdir} -lhwy_contrib @HWY_PC_WIN32_SYNCHRONIZATION_LIBS@ 10 | Cflags: -I${includedir} @HWY_PC_DISABLE_FUTEX_CFLAGS@ 11 | -------------------------------------------------------------------------------- /libhwy-test.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@ 4 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ 5 | 6 | Name: libhwy-test 7 | Description: Efficient and performance-portable SIMD wrapper, test helpers. 8 | Requires: @HWY_PC_HWY_TEST_REQUIRES@ 9 | Version: @HWY_LIBRARY_VERSION@ 10 | Libs: -L${libdir} -lhwy_test 11 | Cflags: -I${includedir} @HWY_PC_HWY_TEST_CFLAGS@ 12 | -------------------------------------------------------------------------------- /libhwy.pc.in: -------------------------------------------------------------------------------- 1 | prefix=@CMAKE_INSTALL_PREFIX@ 2 | exec_prefix=${prefix} 3 | libdir=${exec_prefix}/@CMAKE_INSTALL_LIBDIR@ 4 | includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ 5 | 6 | Name: libhwy 7 | Description: Efficient and performance-portable SIMD wrapper 8 | Version: @HWY_LIBRARY_VERSION@ 9 | Libs: -L${libdir} -lhwy 10 | Cflags: -I${includedir} -D@DLLEXPORT_TO_DEFINE@ 11 | -------------------------------------------------------------------------------- /preamble.js.lds: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2019 Google LLC 3 | * 4 | * This source code is licensed under the BSD-style license found in the 5 | * LICENSE file in the root directory of this source tree. 6 | */ 7 | 8 | /* mock crypto module for benchmarks and unit tests or std::random_device fails at runtime */ 9 | var crypto = { getRandomValues: function(array) { for (var i = 0; i < array.length; i++) array[i] = (Math.random()*256)|0 } }; -------------------------------------------------------------------------------- /run_tests.bat: -------------------------------------------------------------------------------- 1 | @echo off 2 | REM Switch directory of this batch file 3 | cd %~dp0 4 | 5 | if not exist build_win mkdir build_win 6 | 7 | cd build_win 8 | cmake .. -DHWY_WARNINGS_ARE_ERRORS:BOOL=ON -G Ninja || goto error 9 | ninja || goto error 10 | ctest -j || goto error 11 | 12 | cd .. 13 | echo Success 14 | goto end 15 | 16 | :error 17 | echo Failure 18 | exit /b 1 19 | 20 | :end 21 | --------------------------------------------------------------------------------