├── .gitignore ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── _config.yml ├── check_c_compiles.py ├── images └── pysimd.png ├── include ├── core_simd_info.h ├── simd_vec.h ├── simd_vec_arith.h ├── simd_vec_filter.h ├── simd_vec_type.h └── vec_macros.h ├── setup.cfg ├── setup.py ├── src └── pymain.c └── tests ├── __main__.py ├── import_test.c └── size_test.c /.gitignore: -------------------------------------------------------------------------------- 1 | ## 2 | ## This gitignore addresses windows and unix systems. 3 | ## Ignore Visual Studio temporary files, build results, and 4 | ## files generated by popular Visual Studio add-ons. 5 | 6 | .DS_Store 7 | 8 | # User-specific files 9 | *.suo 10 | *.user 11 | *.userosscache 12 | *.sln.docstates 13 | 14 | # User-specific files (MonoDevelop/Xamarin Studio) 15 | *.userprefs 16 | 17 | # Build results 18 | [Dd]ebug/ 19 | [Dd]ebugPublic/ 20 | [Rr]elease/ 21 | [Rr]eleases/ 22 | x64/ 23 | x86/ 24 | bld/ 25 | [Bb]in/ 26 | [Oo]bj/ 27 | [Ll]og/ 28 | build/ 29 | dist/ 30 | MANIFEST 31 | 32 | # Visual Studio 2015 cache/options directory 33 | .vs/ 34 | # Uncomment if you have tasks that create the project's static files in wwwroot 35 | #wwwroot/ 36 | 37 | # MSTest test Results 38 | [Tt]est[Rr]esult*/ 39 | [Bb]uild[Ll]og.* 40 | 41 | # NUNIT 42 | *.VisualState.xml 43 | TestResult.xml 44 | 45 | # Build Results of an ATL Project 46 | [Dd]ebugPS/ 47 | [Rr]eleasePS/ 48 | dlldata.c 49 | 50 | # DNX 51 | project.lock.json 52 | project.fragment.lock.json 53 | artifacts/ 54 | 55 | *.ilk 56 | *.meta 57 | *.obj 58 | *.pch 59 | *.pdb 60 | *.pgc 61 | *.pgd 62 | *.rsp 63 | *.sbr 64 | *.tlb 65 | *.tli 66 | *.tlh 67 | *.tmp 68 | *.tmp_proj 69 | *.log 70 | *.vspscc 71 | *.vssscc 72 | .builds 73 | *.pidb 74 | *.svclog 75 | *.scc 76 | 77 | # Chutzpah Test files 78 | _Chutzpah* 79 | 80 | # Visual C++ cache files 81 | ipch/ 82 | *.aps 83 | *.ncb 84 | *.opendb 85 | *.opensdf 86 | *.sdf 87 | *.cachefile 88 | *.VC.db 89 | *.VC.VC.opendb 90 | 91 | # Visual Studio profiler 92 | *.psess 93 | *.vsp 94 | *.vspx 95 | *.sap 96 | 97 | # TFS 2012 Local Workspace 98 | $tf/ 99 | 100 | # Guidance Automation Toolkit 101 | *.gpState 102 | 103 | # ReSharper is a .NET coding add-in 104 | _ReSharper*/ 105 | *.[Rr]e[Ss]harper 106 | *.DotSettings.user 107 | 108 | # JustCode is a .NET coding add-in 109 | .JustCode 110 | 111 | # TeamCity is a build add-in 112 | _TeamCity* 113 | 114 | # DotCover is a Code Coverage Tool 115 | *.dotCover 116 | 117 | # NCrunch 118 | _NCrunch_* 119 | .*crunch*.local.xml 120 | nCrunchTemp_* 121 | 122 | # MightyMoose 123 | *.mm.* 124 | AutoTest.Net/ 125 | 126 | # Web workbench (sass) 127 | .sass-cache/ 128 | 129 | # Installshield output folder 130 | [Ee]xpress/ 131 | 132 | # DocProject is a documentation generator add-in 133 | DocProject/buildhelp/ 134 | DocProject/Help/*.HxT 135 | DocProject/Help/*.HxC 136 | DocProject/Help/*.hhc 137 | DocProject/Help/*.hhk 138 | DocProject/Help/*.hhp 139 | DocProject/Help/Html2 140 | DocProject/Help/html 141 | 142 | # Click-Once directory 143 | publish/ 144 | 145 | # Publish Web Output 146 | *.[Pp]ublish.xml 147 | *.azurePubxml 148 | # TODO: Comment the next line if you want to checkin your web deploy settings 149 | # but database connection strings (with potential passwords) will be unencrypted 150 | #*.pubxml 151 | *.publishproj 152 | 153 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 154 | # checkin your Azure Web App publish settings, but sensitive information contained 155 | # in these scripts will be unencrypted 156 | PublishScripts/ 157 | 158 | # NuGet Packages 159 | *.nupkg 160 | # The packages folder can be ignored because of Package Restore 161 | **/packages/* 162 | # except build/, which is used as an MSBuild target. 163 | !**/packages/build/ 164 | # Uncomment if necessary however generally it will be regenerated when needed 165 | #!**/packages/repositories.config 166 | # NuGet v3's project.json files produces more ignoreable files 167 | *.nuget.props 168 | *.nuget.targets 169 | 170 | # Microsoft Azure Build Output 171 | csx/ 172 | *.build.csdef 173 | 174 | # Microsoft Azure Emulator 175 | ecf/ 176 | rcf/ 177 | 178 | # Windows Store app package directories and files 179 | AppPackages/ 180 | BundleArtifacts/ 181 | Package.StoreAssociation.xml 182 | _pkginfo.txt 183 | 184 | # Visual Studio cache files 185 | # files ending in .cache can be ignored 186 | *.[Cc]ache 187 | # but keep track of directories ending in .cache 188 | !*.[Cc]ache/ 189 | 190 | # Others 191 | ClientBin/ 192 | ~$* 193 | *~ 194 | *.dbmdl 195 | *.dbproj.schemaview 196 | *.jfm 197 | *.pfx 198 | *.publishsettings 199 | node_modules/ 200 | orleans.codegen.cs 201 | 202 | # Since there are multiple workflows, uncomment next line to ignore bower_components 203 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 204 | #bower_components/ 205 | 206 | # RIA/Silverlight projects 207 | Generated_Code/ 208 | 209 | # Backup & report files from converting an old project file 210 | # to a newer Visual Studio version. Backup files are not needed, 211 | # because we have git ;-) 212 | _UpgradeReport_Files/ 213 | Backup*/ 214 | UpgradeLog*.XML 215 | UpgradeLog*.htm 216 | 217 | # SQL Server files 218 | *.mdf 219 | *.ldf 220 | 221 | # Business Intelligence projects 222 | *.rdl.data 223 | *.bim.layout 224 | *.bim_*.settings 225 | 226 | # Microsoft Fakes 227 | FakesAssemblies/ 228 | 229 | # GhostDoc plugin setting file 230 | *.GhostDoc.xml 231 | 232 | # Node.js Tools for Visual Studio 233 | .ntvs_analysis.dat 234 | 235 | # Visual Studio 6 build log 236 | *.plg 237 | 238 | # Visual Studio 6 workspace options file 239 | *.opt 240 | 241 | # Visual Studio LightSwitch build output 242 | **/*.HTMLClient/GeneratedArtifacts 243 | **/*.DesktopClient/GeneratedArtifacts 244 | **/*.DesktopClient/ModelManifest.xml 245 | **/*.Server/GeneratedArtifacts 246 | **/*.Server/ModelManifest.xml 247 | _Pvt_Extensions 248 | 249 | # Paket dependency manager 250 | .paket/paket.exe 251 | paket-files/ 252 | 253 | # FAKE - F# Make 254 | .fake/ 255 | 256 | # JetBrains Rider 257 | .idea/ 258 | *.sln.iml 259 | 260 | # CodeRush 261 | .cr/ 262 | 263 | # Python Tools for Visual Studio (PTVS) 264 | __pycache__/ 265 | *.pyc 266 | 267 | # ignore bin folder 268 | bin/ 269 | 270 | 271 | # Prerequisites 272 | *.d 273 | 274 | # Object files 275 | *.o 276 | *.ko 277 | *.obj 278 | *.elf 279 | 280 | # Linker output 281 | *.ilk 282 | *.map 283 | *.exp 284 | 285 | # Precompiled Headers 286 | *.gch 287 | *.pch 288 | 289 | # Libraries 290 | *.lib 291 | *.a 292 | *.la 293 | *.lo 294 | 295 | # Shared objects (inc. Windows DLLs) 296 | *.dll 297 | *.so 298 | *.so.* 299 | *.dylib 300 | 301 | # Executables 302 | *.exe 303 | *.out 304 | *.app 305 | *.i*86 306 | *.x86_64 307 | *.hex 308 | 309 | # Debug files 310 | *.dSYM/ 311 | *.su 312 | *.idb 313 | *.pdb 314 | 315 | # Kernel Module Compile Results 316 | *.mod* 317 | *.cmd 318 | .tmp_versions/ 319 | modules.order 320 | Module.symvers 321 | Mkfile.old 322 | dkms.conf 323 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Josh Weinstein 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | global-exclude *.py[cod] 2 | graft include 3 | include tests/*.py 4 | include tests/*.c 5 | include README.rst 6 | include LICENSE.txt 7 | include check_c_compiles.py -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PySIMD 2 | ====== 3 | 4 | .. image:: images/pysimd.png 5 | :width: 800 6 | :alt: PySimd logo 7 | 8 | ``simd`` is the python module for SIMD computing and programming. It 9 | prodives an extensive interface to SIMD instruction sets on several different 10 | architectures, and fallback scalar implementations when no SIMD instructions 11 | are available for a particular operation. ``simd`` provides vector objects, 12 | which are collections of bytes aligned on a minimum boundary. The main advantage 13 | of using SIMD instructions is the potentially masive performance 14 | improvements over regular, scalar instructions. 15 | 16 | ``simd`` is a C extension, that is only compatible with Python 3. When 17 | built, it will do compile time checks to see what SIMD instructions are 18 | available on the current CPU. The advantage of using ``simd`` over other 19 | libraries or implementations is this module auto-detects the best 20 | instructions available for specific operations at install time and uses 21 | them anytime the module is imported into a Python program. 22 | 23 | Installation 24 | ------------ 25 | 26 | To install ``simd``, run the following ``pip`` command 27 | 28 | :: 29 | 30 | $ pip install simd 31 | 32 | Tests 33 | ----- 34 | 35 | To run the tests, run the following command 36 | 37 | :: 38 | 39 | $ python tests 40 | 41 | This will build the ``.c`` test files , link the python library into each one, and run them. 42 | 43 | Usage 44 | ----- 45 | 46 | The ``simd`` module can be used primarily through vector objects. Vector 47 | objects are special C objects that contain a portion of bytes aligned on 48 | at least a 16 byte boundary. This alignment allows SIMD operations to be 49 | performed at a higher scale, without needing to worry about the leftover 50 | bytes at the end of a data segment. 51 | 52 | Creation 53 | ~~~~~~~~ 54 | 55 | You can make an empty vector with a size 56 | 57 | .. code:: py 58 | 59 | >>> import simd 60 | >>> a = simd.Vec(size=32) 61 | >>> a 62 | [0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0] 63 | 64 | Or make a vector from a repeated value 65 | 66 | .. code:: py 67 | 68 | >>> a = simd.Vec(size=32, repeat_value=64, repeat_size=2) 69 | >>> a 70 | [40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0,40,0] 71 | 72 | Note: the ``__repr__`` method of ``Vec`` , implemented in C, displays a 73 | hexadecimal byte representation of the vector. 74 | 75 | However, if a size used cannot be aligned by 16 bytes, an error is 76 | thrown 77 | 78 | .. code:: py 79 | 80 | >>> a = simd.Vec(size=31, repeat_value=64, repeat_size=2) 81 | Traceback (most recent call last): 82 | File "", line 1, in 83 | simd.SimdError: The size '31' cannot be aligned by at least 16 bytes 84 | 85 | Operations 86 | ~~~~~~~~~~ 87 | 88 | The ``simd`` module supports a wide variety of operations, such as plain 89 | addition: 90 | 91 | .. code:: py 92 | 93 | >>> a = simd.Vec(size=32, repeat_value=1, repeat_size=1) 94 | >>> b = simd.Vec(size=32, repeat_value=1, repeat_size=1) 95 | >>> a 96 | [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 97 | >>> b 98 | [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] 99 | >>> a.add(b, width=1) 100 | >>> a 101 | [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] 102 | 103 | There are also operations to retrieve the data in the vector back as 104 | python types, such as with ``as_bytes()`` 105 | 106 | .. code:: py 107 | 108 | >>> a = simd.Vec(size=16, repeat_value=4, repeat_size=2) 109 | >>> a 110 | [4,0,4,0,4,0,4,0,4,0,4,0,4,0,4,0] 111 | >>> a.as_bytes(start = 3) 112 | b'\x00\x04\x00\x04\x00\x04\x00\x04\x00\x04\x00\x04\x00' 113 | >>> a.as_bytes() 114 | b'\x04\x00\x04\x00\x04\x00\x04\x00\x04\x00\x04\x00\x04\x00\x04\x00' 115 | >>> a.as_bytes(start=40) 116 | Traceback (most recent call last): 117 | File "", line 1, in 118 | simd.SimdError: start: '40', is out of bounds for vector of size 16 119 | 120 | The data inside a vector can also be retrieved as a collection type, like a ``tuple`` , 121 | 122 | .. code:: py 123 | 124 | >>> a = simd.Vec(size=32, repeat_value=5, repeat_size=4) 125 | >>> a 126 | [5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0,5,0,0,0] 127 | >>> a.as_tuple(type=int, width=4) 128 | (5, 5, 5, 5, 5, 5, 5, 5) 129 | >>> a.as_tuple(type=int, width=1) 130 | (5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0, 5, 0, 0, 0) 131 | >>> a.as_tuple(type=int, width=8) 132 | (21474836485, 21474836485, 21474836485, 21474836485) 133 | 134 | The above example shows the pure ``__repr__`` method of ``Vec`` only depicts a hexadecimal, byte level representation of the vector data, but a method like ``as_tuple`` allows the viewing of data with different types. One unique aspect of the ``simd`` module is it treats data and memory similar to that of C, where a chunk of 16 bytes could be two 64 bit integers, four 32 bit integers, and so on. 135 | 136 | 137 | Math 138 | ~~~~ 139 | 140 | The ``simd`` module supports simd operations that involve artihmetic and math on integers and floating point numbers. Operations like ``add`` or ``sub`` work off another vector and a ``width``. The ``width`` indicates the width of the data lane the simd instruction applies to, such as ``8`` for 64 bit operations. Here are a few examples: 141 | 142 | .. code:: py 143 | 144 | >>> v = simd.Vec(size=16, repeat_value=5, repeat_size=4) 145 | >>> v2 = simd.Vec(size=16, repeat_value=10, repeat_size=4) 146 | >>> v.add(v2) 147 | Traceback (most recent call last): 148 | File "", line 1, in 149 | TypeError: function missing required argument 'width' (pos 2) 150 | >>> v.add(v2, width=4) 151 | >>> v.as_tuple(type=int, width=4) 152 | (15, 15, 15, 15) 153 | >>> v.sub(v2, width=4) 154 | >>> v.sub(v2, width=4) 155 | >>> v.as_tuple(type=int, width=4) 156 | (-5, -5, -5, -5) 157 | 158 | 159 | -------------------------------------------------------------------------------- /_config.yml: -------------------------------------------------------------------------------- 1 | theme: jekyll-theme-minimal -------------------------------------------------------------------------------- /check_c_compiles.py: -------------------------------------------------------------------------------- 1 | import distutils.ccompiler 2 | import os 3 | import random 4 | import subprocess 5 | 6 | """ 7 | These classes allow a test to see if source code with the C compiler actually 8 | compiles. 9 | """ 10 | 11 | DEFAULT_COMPILER = distutils.ccompiler.get_default_compiler() 12 | 13 | C_EXTENSION = ".c" 14 | 15 | def create_file_with_rand_name(source): 16 | cur_dir = os.getcwd() 17 | rand_file = os.path.join(cur_dir, "c_" + str(random.getrandbits(72))) 18 | while os.path.exists(rand_file): 19 | rand_file = os.path.join(cur_dir, "c_" + str(random.getrandbits(72))) 20 | with open(rand_file + C_EXTENSION, "w") as c_file: 21 | c_file.write(source) 22 | return rand_file 23 | 24 | class CheckCCompiles(object): 25 | 26 | def __init__(self, name = "", source_code = ""): 27 | self.name = name 28 | self.source_code = source_code 29 | self.compiler = distutils.ccompiler.new_compiler() 30 | if DEFAULT_COMPILER == 'unix': 31 | # The idea here is that we want to have the compiler try and generate all the possible 32 | # simd instructions, then see by running it, if we get an illegal hardware instruction 33 | self.extra_args = ["-m" + self.name] 34 | elif DEFAULT_COMPILER == 'msvc': 35 | self.extra_args = ['/arch:AVX', '/arch:AVX2', '/arch:AVX512'] 36 | else: 37 | self.extra_args = [] 38 | self.works = False 39 | 40 | def try_run(self): 41 | try: 42 | self.run_result = subprocess.run(self.file_name, check=False) 43 | self.works = self.run_result.returncode == 0 44 | except Exception: 45 | self.works = False 46 | return self.works 47 | 48 | def __enter__(self): 49 | self.file_name = create_file_with_rand_name(self.source_code) 50 | self.c_name = self.file_name + C_EXTENSION 51 | try: 52 | self.obj_names = self.compiler.compile([self.c_name], extra_preargs=self.extra_args) 53 | except Exception as exc: 54 | print("FAILED " + self.name + " compile check: " + str(exc)) 55 | return self 56 | self.compiles = True 57 | try: 58 | self.compiler.link_executable(self.obj_names, self.file_name) 59 | except Exception as exc: 60 | print("FAILED " + self.name + " link check: " + str(exc)) 61 | return self 62 | self.links = True 63 | if self.try_run(): 64 | print("PASSED " + self.name) 65 | else: 66 | print("FAILED " + self.name + " run check: " + str(self.run_result.stderr)) 67 | return self 68 | 69 | def __exit__(self, exc_type, exc_val, exc_tb): 70 | try: 71 | os.remove(self.c_name) 72 | if os.name == 'nt': 73 | os.remove(self.file_name + ".exe") 74 | else: 75 | os.remove(self.file_name) 76 | for objfile in self.obj_names: 77 | os.remove(objfile) 78 | except Exception as exc: 79 | # Avoid noise for non existant files 80 | return -------------------------------------------------------------------------------- /images/pysimd.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jweinst1/pysimd/62bf99f8b9f4ca3d06bf59add960ff408f5781f7/images/pysimd.png -------------------------------------------------------------------------------- /include/core_simd_info.h: -------------------------------------------------------------------------------- 1 | #ifndef CORE_SIMD_INFO_H 2 | #define CORE_SIMD_INFO_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | 10 | #ifdef __GNUC__ 11 | # define PYSIMD_CC_GCC 12 | #endif // __GNUC__ 13 | 14 | #ifdef _MSC_VER 15 | # define PYSIMD_CC_MSVC 16 | #endif // _MSC_VER 17 | 18 | #ifdef __clang__ 19 | # define PYSIMD_CC_CLANG 20 | #endif // __clang__ 21 | 22 | // Architecture detection 23 | #if (defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64) || defined(_AMD64_)) 24 | # define PYSIMD_ARCH_X86_64 25 | #elif defined(__arm__) && defined(__aarch64__) 26 | # define PYSIMD_ARCH_ARM_64 27 | #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) 28 | # define PYSIMD_ARCH_MIPS 29 | #elif defined(__sparc) || defined(__sparc__) 30 | # define PYSIMD_ARCH_SPARC 31 | #else 32 | # define PYSIMD_ARCH_UNKNOWN 33 | #endif 34 | 35 | // Operating System Detection 36 | #if defined(_WIN32) 37 | # define PYSIMD_OS_WINDOWS 38 | #elif defined(__linux__) || defined(linux) || defined(__linux) 39 | # define PYSIMD_OS_LINUX 40 | #elif defined(macintosh) || defined(Macintosh) || (defined(__APPLE__) && defined(__MACH__)) 41 | # define PYSIMD_OS_MAC 42 | #else 43 | # define PYSIMD_OS_UNKNOWN 44 | #endif 45 | 46 | //Mobile OS detection (android is shared with linux) 47 | #ifdef __ANDROID__ 48 | # define PYSIMD_OSM_ANDROID 49 | #endif // __ANDROID__ 50 | 51 | #if defined(PYSIMD_ARCH_X86_64) 52 | # if defined(PYSIMD_OS_WINDOWS) 53 | # define PYSIMD_X86_CPUID(info, x) __cpuidex(info, x, 0) 54 | # elif defined(PYSIMD_CC_GCC) 55 | # include 56 | # define PYSIMD_X86_CPUID(info, x) __cpuid_count(x, 0, (info)[0], (info)[1], (info)[2], (info)[3]) 57 | # endif 58 | // intrinsic headers 59 | # if defined(PYSIMD_CC_GCC) || defined(PYSIMD_CC_CLANG) 60 | # include 61 | # else // defined(PYSIMD_CC_GCC) || defined(PYSIMD_CC_CLANG) 62 | # include 63 | # endif // !defined(PYSIMD_CC_GCC) || defined(PYSIMD_CC_CLANG) 64 | #endif 65 | 66 | // Exact width integer types 67 | #if defined(PYSIMD_OS_WINDOWS) && defined(_MSC_VER) 68 | typedef __int8 int8_t; 69 | typedef __int16 int16_t; 70 | typedef __int32 int32_t; 71 | typedef __int64 int64_t; 72 | typedef unsigned __int8 uint8_t; 73 | typedef unsigned __int16 uint16_t; 74 | typedef unsigned __int32 uint32_t; 75 | typedef unsigned __int64 uint64_t; 76 | #else 77 | # include 78 | #endif 79 | 80 | enum pysimd_arch { 81 | PYSIMD_ARCH_TYPE_UNKNOWN, 82 | PYSIMD_ARCH_TYPE_X86, 83 | PYSIMD_ARCH_TYPE_ARM, 84 | PYSIMD_ARCH_TYPE_MIPS, 85 | PYSIMD_ARCH_TYPE_SPARC 86 | }; 87 | 88 | static const char* pysimd_arch_stringify(enum pysimd_arch arch) 89 | { 90 | static const char* pysimd_x86 = "x86"; 91 | static const char* pysimd_arm = "arm"; 92 | static const char* pysimd_mips = "mips"; 93 | static const char* pysimd_sparc = "sparc"; 94 | static const char* pysimd_unknown = "unknown"; 95 | 96 | switch (arch) { 97 | case PYSIMD_ARCH_TYPE_X86: return pysimd_x86; 98 | case PYSIMD_ARCH_TYPE_ARM: return pysimd_arm; 99 | case PYSIMD_ARCH_TYPE_MIPS: return pysimd_mips; 100 | case PYSIMD_ARCH_TYPE_SPARC: return pysimd_sparc; 101 | case PYSIMD_ARCH_TYPE_UNKNOWN: return pysimd_unknown; 102 | default: 103 | return pysimd_unknown; 104 | } 105 | } 106 | 107 | enum pysimd_cc { 108 | PYSIMD_CC_TYPE_UNKNOWN, 109 | PYSIMD_CC_TYPE_GCC, 110 | PYSIMD_CC_TYPE_MSVC, 111 | PYSIMD_CC_TYPE_CLANG 112 | }; 113 | 114 | static const char* pysimd_cc_stringify(enum pysimd_cc cc) 115 | { 116 | static const char* pysimd_unknown = "unknown"; 117 | static const char* pysimd_gcc = "gcc"; 118 | static const char* pysimd_clang = "clang"; 119 | static const char* pysimd_msvc = "msvc"; 120 | 121 | switch (cc) { 122 | case PYSIMD_CC_TYPE_GCC: return pysimd_gcc; 123 | case PYSIMD_CC_TYPE_CLANG: return pysimd_clang; 124 | case PYSIMD_CC_TYPE_MSVC: return pysimd_msvc; 125 | default: 126 | return pysimd_unknown; 127 | } 128 | } 129 | 130 | #ifdef PYSIMD_ARCH_X86_64 131 | struct pysimd_x86_features { 132 | int mmx; 133 | int popcnt; 134 | int sse; 135 | int sse2; 136 | int sse3; 137 | int ssse3; 138 | int sse41; 139 | int sse42; 140 | int sse4a; 141 | int avx; 142 | int avx2; 143 | int fma; 144 | int fma4; 145 | int xop; 146 | int bmi; 147 | int bmi2; 148 | int avx512f; 149 | int avx512vl; 150 | int avx512bw; 151 | int avx512dq; 152 | int avx512cd; 153 | int avx512pf; 154 | int avx512er; 155 | int avx512ifma; 156 | int avx512vbmi; 157 | }; 158 | 159 | int pysimd_x86_features_init(struct pysimd_x86_features* feat) 160 | { 161 | #ifdef PYSIMD_CC_GCC 162 | #define FORMAT_CPU_FEATURE(name) feat->name = __builtin_cpu_supports(#name) 163 | FORMAT_CPU_FEATURE(mmx); 164 | FORMAT_CPU_FEATURE(sse); 165 | FORMAT_CPU_FEATURE(sse2); 166 | FORMAT_CPU_FEATURE(sse3); 167 | FORMAT_CPU_FEATURE(ssse3); 168 | feat->sse41 = __builtin_cpu_supports("sse4.1"); 169 | feat->sse42 = __builtin_cpu_supports("sse4.2"); 170 | FORMAT_CPU_FEATURE(sse4a); 171 | FORMAT_CPU_FEATURE(avx); 172 | FORMAT_CPU_FEATURE(avx2); 173 | FORMAT_CPU_FEATURE(fma); 174 | FORMAT_CPU_FEATURE(fma4); 175 | FORMAT_CPU_FEATURE(xop); 176 | FORMAT_CPU_FEATURE(bmi); 177 | FORMAT_CPU_FEATURE(bmi2); 178 | FORMAT_CPU_FEATURE(avx512f); 179 | FORMAT_CPU_FEATURE(avx512vl); 180 | FORMAT_CPU_FEATURE(avx512bw); 181 | FORMAT_CPU_FEATURE(avx512dq); 182 | FORMAT_CPU_FEATURE(avx512cd); 183 | FORMAT_CPU_FEATURE(avx512er); 184 | FORMAT_CPU_FEATURE(avx512pf); 185 | FORMAT_CPU_FEATURE(avx512ifma); 186 | FORMAT_CPU_FEATURE(avx512vbmi); 187 | #undef FORMAT_CPU_FEATURE 188 | return 1; 189 | #elif defined(PYSIMD_CC_MSVC) 190 | int infos[4]; 191 | PYSIMD_X86_CPUID(infos, 0); 192 | int nIds = infos[0]; 193 | 194 | PYSIMD_X86_CPUID(infos, 0x80000000); 195 | unsigned nExIds = (unsigned)infos[0]; 196 | 197 | // Detect Features 198 | if (nIds >= 0x00000001){ 199 | PYSIMD_X86_CPUID(infos,0x00000001); 200 | feat->mmx = (infos[3] & ((int)1 << 23)) != 0; 201 | feat->sse = (infos[3] & ((int)1 << 25)) != 0; 202 | feat->sse2 = (infos[3] & ((int)1 << 26)) != 0; 203 | feat->sse3 = (infos[2] & ((int)1 << 0)) != 0; 204 | feat->ssse3 = (infos[2] & ((int)1 << 9)) != 0; 205 | feat->sse41 = (infos[2] & ((int)1 << 19)) != 0; 206 | feat->sse42 = (infos[2] & ((int)1 << 20)) != 0; 207 | feat->popcnt = (infos[2] & ((int)1 << 23)) != 0; 208 | //info->HW_AES = (infos[2] & ((int)1 << 25)) != 0; 209 | 210 | feat->avx = (infos[2] & ((int)1 << 28)) != 0; 211 | feat->fma = (infos[2] & ((int)1 << 12)) != 0; 212 | 213 | //info->HW_RDRAND = (infos[2] & ((int)1 << 30)) != 0; 214 | } 215 | if (nIds >= 0x00000007){ 216 | PYSIMD_X86_CPUID(infos,0x00000007); 217 | feat->avx2 = (infos[1] & ((int)1 << 5)) != 0; 218 | 219 | feat->bmi = (infos[1] & ((int)1 << 3)) != 0; 220 | feat->bmi2 = (infos[1] & ((int)1 << 8)) != 0; 221 | //info->HW_PREFETCHWT1 = (infos[2] & ((int)1 << 0)) != 0; 222 | 223 | feat->avx512f = (infos[1] & ((int)1 << 16)) != 0; 224 | feat->avx512cd = (infos[1] & ((int)1 << 28)) != 0; 225 | feat->avx512pf = (infos[1] & ((int)1 << 26)) != 0; 226 | feat->avx512er = (infos[1] & ((int)1 << 27)) != 0; 227 | feat->avx512vl = (infos[1] & ((int)1 << 31)) != 0; 228 | feat->avx512bw = (infos[1] & ((int)1 << 30)) != 0; 229 | feat->avx512dq = (infos[1] & ((int)1 << 17)) != 0; 230 | feat->avx512ifma = (infos[1] & ((int)1 << 21)) != 0; 231 | feat->avx512vbmi = (infos[2] & ((int)1 << 1)) != 0; 232 | } 233 | if (nExIds >= 0x80000001){ 234 | PYSIMD_X86_CPUID(infos,0x80000001); 235 | //info->HW_ABM = (infos[2] & ((int)1 << 5)) != 0; 236 | feat->sse4a = (infos[2] & ((int)1 << 6)) != 0; 237 | feat->fma4 = (infos[2] & ((int)1 << 16)) != 0; 238 | feat->xop = (infos[2] & ((int)1 << 11)) != 0; 239 | } 240 | return 1; 241 | #else 242 | // Unsupported x86 243 | fprintf(stderr, "WARN %s\n", "Compiler does not support cpuid feature detection\n"); 244 | return 0; 245 | #endif 246 | } 247 | #endif // PYSIMD_ARCH_X86_64 248 | 249 | struct pysimd_sys_info { 250 | enum pysimd_arch arch; 251 | enum pysimd_cc compiler; 252 | #ifdef PYSIMD_ARCH_X86_64 253 | struct pysimd_x86_features features; 254 | #endif // PYSIMD_ARCH_X86_64 255 | }; 256 | 257 | static void pysimd_sys_info_init(struct pysimd_sys_info* sinfo) 258 | { 259 | #if defined(PYSIMD_ARCH_X86_64) 260 | sinfo->arch = PYSIMD_ARCH_TYPE_X86; 261 | (void)pysimd_x86_features_init(&(sinfo->features)); 262 | #elif defined(PYSIMD_ARCH_ARM_64) 263 | sinfo->arch = PYSIMD_ARCH_TYPE_ARM; 264 | #elif defined(PYSIMD_ARCH_MIPS) 265 | sinfo->arch = PYSIMD_ARCH_TYPE_MIPS; 266 | #elif defined(PYSIMD_ARCH_SPARC) 267 | sinfo->arch = PYSIMD_ARCH_TYPE_SPARC; 268 | #else 269 | sinfo->arch = PYSIMD_ARCH_TYPE_UNKNOWN; 270 | #endif 271 | 272 | #if defined(PYSIMD_CC_MSVC) 273 | sinfo->compiler = PYSIMD_CC_TYPE_MSVC; 274 | #elif defined(PYSIMD_CC_GCC) 275 | sinfo->compiler = PYSIMD_CC_TYPE_GCC; 276 | #elif defined(PYSIMD_CC_CLANG) 277 | sinfo->compiler = PYSIMD_CC_TYPE_CLANG; 278 | #else 279 | sinfo->compiler = PYSIMD_CC_TYPE_UNKNOWN; 280 | #endif 281 | } 282 | 283 | #endif // CORE_SIMD_INFO_H -------------------------------------------------------------------------------- /include/simd_vec.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMD_VEC_H 2 | #define SIMD_VEC_H 3 | 4 | #include "simd_vec_type.h" 5 | 6 | static inline void pysimd_vec_clear(struct pysimd_vec_t* vec) { 7 | vec->size = 0; 8 | vec->data = NULL; 9 | } 10 | 11 | static inline void pysimd_vec_clear_data(struct pysimd_vec_t* vec) { 12 | unsigned char* cleaner = vec->data; 13 | const unsigned char* end = cleaner + vec->size; 14 | while (cleaner != end) { 15 | *cleaner++ = 0; 16 | } 17 | } 18 | 19 | static void pysimd_vec_init(struct pysimd_vec_t* buf, size_t capacity) 20 | { 21 | buf->size = capacity; 22 | buf->data = calloc(1, capacity); 23 | } 24 | 25 | static int pysimd_vec_resize(struct pysimd_vec_t* buf, size_t new_size) { 26 | if (new_size == 0) 27 | return 0; 28 | size_t old_size = buf->size; 29 | buf->size = new_size; 30 | buf->data = realloc(buf->data, new_size); 31 | while (old_size < new_size) { 32 | // Must make sure upstream is zeroed 33 | buf->data[old_size++] = 0; 34 | } 35 | return 1; 36 | } 37 | 38 | static void pysimd_vec_deinit(struct pysimd_vec_t* buf) 39 | { 40 | buf->size = 0; 41 | free(buf->data); 42 | } 43 | 44 | static char* pysimd_vec_repr(const struct pysimd_vec_t* buf) 45 | { 46 | char* repr_str = calloc(1, buf->size * 4 + 2); 47 | char* writer = repr_str; 48 | *writer++ = '['; 49 | size_t i = 0; 50 | for (; i < buf->size; ++i) 51 | { 52 | writer += sprintf(writer, "%x", buf->data[i]); 53 | if (i != (buf->size - 1)) 54 | *writer++ = ','; 55 | } 56 | *writer++ = ']'; 57 | return repr_str; 58 | } 59 | 60 | static int pysimd_vec_copy(struct pysimd_vec_t* dst, 61 | const struct pysimd_vec_t* src, 62 | size_t start, 63 | size_t end) 64 | { 65 | size_t diff = end - start; 66 | if (diff % 16 != 0) 67 | return 0; 68 | pysimd_vec_init(dst, diff); 69 | const unsigned char* reader = src->data + start; 70 | const unsigned char* read_end = src->data + end; 71 | unsigned char* writer = dst->data; 72 | while (reader < read_end) { 73 | #if defined(PYSIMD_X86_SSE2) 74 | _mm_store_si128((__m128i*)writer, _mm_load_si128((__m128i const*)reader)); 75 | reader += 16; 76 | writer += 16; 77 | #else 78 | *(long long*)writer = *(long long*)reader; 79 | writer += sizeof(long long); 80 | reader += sizeof(long long); 81 | #endif 82 | } 83 | return 1; 84 | } 85 | 86 | static int pysimd_vec_fill(struct pysimd_vec_t* buf, size_t val, unsigned char sizer) 87 | { 88 | unsigned char* data_ptr = buf->data; 89 | const unsigned char* data_end = buf->data + buf->size; 90 | #if defined(PYSIMD_X86_SSE2) 91 | __m128i filler; 92 | switch (sizer) { 93 | case 1: 94 | filler = _mm_set1_epi8((char)val); 95 | break; 96 | case 2: 97 | filler = _mm_set1_epi16((short)val); 98 | break; 99 | case 4: 100 | filler = _mm_set1_epi32((int)val); 101 | break; 102 | case 8: 103 | filler = _mm_set1_epi64x(val); 104 | break; 105 | default: 106 | return 0; 107 | } 108 | while (data_ptr < data_end) { 109 | _mm_store_si128((__m128i*)data_ptr, filler); 110 | data_ptr += 16; 111 | } 112 | #else 113 | char filler[16] = {0}; 114 | switch (sizer) { 115 | case 1: 116 | filler[0] = (char)val; filler[1] = (char)val; filler[2] = (char)val; filler[3] = (char)val; 117 | filler[4] = (char)val; filler[5] = (char)val; filler[6] = (char)val; filler[7] = (char)val; 118 | filler[8] = (char)val; filler[9] = (char)val; filler[10] = (char)val; filler[11] = (char)val; 119 | filler[12] = (char)val; filler[13] = (char)val; filler[14] = (char)val; filler[15] = (char)val; 120 | break; 121 | case 2: 122 | *(short*)filler = (short)val; *(short*)(filler + 2) = (short)val; 123 | *(short*)(filler + 4) = (short)val; *(short*)(filler + 6) = (short)val; 124 | *(short*)(filler + 8) = (short)val; *(short*)(filler + 10) = (short)val; 125 | *(short*)(filler + 12) = (short)val; *(short*)(filler + 14) = (short)val; 126 | break; 127 | case 4: 128 | *(int*)filler = (int)val; *(int*)(filler + 4) = (int)val; 129 | *(int*)(filler + 8) = (int)val; *(int*)(filler + 12) = (int)val; 130 | break; 131 | case 8: 132 | *(long long*)filler = (long long)val; 133 | *(long long*)(filler + 8) = (long long)val; 134 | break; 135 | default: 136 | return 0; 137 | } 138 | while(data_ptr < data_end) { 139 | *(long long*)data_ptr = *(long long*)filler; 140 | data_ptr += 8; 141 | } 142 | #endif 143 | return 1; 144 | } 145 | 146 | static int pysimd_vec_fill_float(struct pysimd_vec_t* buf, double val, unsigned char sizer) { 147 | unsigned char* data_ptr = buf->data; 148 | const unsigned char* data_end = buf->data + buf->size; 149 | #if defined(PYSIMD_X86_SSE2) 150 | switch (sizer) { 151 | case 4: 152 | { 153 | __m128 filler = _mm_set1_ps((float)val); 154 | while (data_ptr < data_end) { 155 | _mm_store_ps((float*)data_ptr, filler); 156 | data_ptr += 16; 157 | } 158 | } 159 | break; 160 | case 8: 161 | { 162 | __m128d filler = _mm_set1_pd(val); 163 | while (data_ptr < data_end) { 164 | _mm_store_pd((double*)data_ptr, filler); 165 | data_ptr += 16; 166 | } 167 | } 168 | break; 169 | default: 170 | return 0; 171 | } 172 | #else 173 | switch (sizer) { 174 | case 4: 175 | { 176 | float filler = (float)val; 177 | while (data_ptr < data_end) { 178 | *(float*)(data_ptr) = filler; 179 | *(float*)(data_ptr + 4) = filler; 180 | *(float*)(data_ptr + 8) = filler; 181 | *(float*)(data_ptr + 12) = filler; 182 | data_ptr += 16; 183 | } 184 | } 185 | break; 186 | case 8: 187 | { 188 | double filler = val; 189 | while (data_ptr < data_end) { 190 | *(double*)(data_ptr) = filler; 191 | *(double*)(data_ptr + 8) = filler; 192 | data_ptr += 16; 193 | } 194 | } 195 | break; 196 | default: 197 | return 0; 198 | } 199 | #endif 200 | return 1; 201 | } 202 | 203 | #endif // SIMD_DATA_OBJECT_H -------------------------------------------------------------------------------- /include/simd_vec_arith.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMD_VEC_ARITH_H 2 | #define SIMD_VEC_ARITH_H 3 | 4 | #include "simd_vec_type.h" 5 | #include "vec_macros.h" 6 | 7 | static void simd_vec_add_i8(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 8 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 9 | size_t i = 0; 10 | while (i < oper_region) { 11 | #if defined(PYSIMD_X86_SSE2) 12 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 13 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 14 | __m128i added = _mm_add_epi8 (v1seg, v2seg); 15 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 16 | i += 16; 17 | #else 18 | v1->data[i] = v1->data[i] + v2->data[i]; 19 | ++i; 20 | #endif 21 | } 22 | } 23 | 24 | static void simd_vec_add_i16(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 25 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 26 | size_t i = 0; 27 | while (i < oper_region) { 28 | #if defined(PYSIMD_X86_SSE2) 29 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 30 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 31 | __m128i added = _mm_add_epi16(v1seg, v2seg); 32 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 33 | i += 16; 34 | #else 35 | *(short*)(v1->data + i) = (*(short*)(v1->data + i)) + (*(short*)(v2->data + i)); 36 | i += 2; 37 | #endif 38 | } 39 | } 40 | 41 | static void simd_vec_add_i32(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 42 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 43 | size_t i = 0; 44 | while (i < oper_region) { 45 | #if defined(PYSIMD_X86_SSE2) 46 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 47 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 48 | __m128i added = _mm_add_epi32(v1seg, v2seg); 49 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 50 | i += 16; 51 | #else 52 | *(int*)(v1->data + i) = (*(int*)(v1->data + i)) + (*(int*)(v2->data + i)); 53 | i += 4; 54 | #endif 55 | } 56 | } 57 | 58 | static void simd_vec_add_i64(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 59 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 60 | size_t i = 0; 61 | while (i < oper_region) { 62 | #if defined(PYSIMD_X86_SSE2) 63 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 64 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 65 | __m128i added = _mm_add_epi64(v1seg, v2seg); 66 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 67 | i += 16; 68 | #else 69 | *(long long*)(v1->data + i) = (*(long long*)(v1->data + i)) + (*(long long*)(v2->data + i)); 70 | i += 8; 71 | #endif 72 | } 73 | } 74 | 75 | static void simd_vec_add_f32(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 76 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 77 | size_t i = 0; 78 | while (i < oper_region) { 79 | #if defined(PYSIMD_X86_SSE2) 80 | __m128 v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 81 | __m128 v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 82 | __m128 added = _mm_add_ps(v1seg, v2seg); 83 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 84 | i += 16; 85 | #else 86 | *(float*)(v1->data + i) = (*(float*)(v1->data + i)) + (*(float*)(v2->data + i)); 87 | i += 4; 88 | #endif 89 | } 90 | } 91 | 92 | static void simd_vec_add_f64(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 93 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 94 | size_t i = 0; 95 | while (i < oper_region) { 96 | #if defined(PYSIMD_X86_SSE2) 97 | __m128d v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 98 | __m128d v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 99 | __m128d added = _mm_add_pd(v1seg, v2seg); 100 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 101 | i += 16; 102 | #else 103 | *(double*)(v1->data + i) = (*(double*)(v1->data + i)) + (*(double*)(v2->data + i)); 104 | i += 8; 105 | #endif 106 | } 107 | } 108 | 109 | static void simd_vec_sub_i8(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 110 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 111 | size_t i = 0; 112 | while (i < oper_region) { 113 | #if defined(PYSIMD_X86_SSE2) 114 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 115 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 116 | __m128i added = _mm_sub_epi8 (v1seg, v2seg); 117 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 118 | i += 16; 119 | #else 120 | v1->data[i] = v1->data[i] - v2->data[i]; 121 | ++i; 122 | #endif 123 | } 124 | } 125 | 126 | static void simd_vec_sub_i16(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 127 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 128 | size_t i = 0; 129 | while (i < oper_region) { 130 | #if defined(PYSIMD_X86_SSE2) 131 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 132 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 133 | __m128i added = _mm_sub_epi16(v1seg, v2seg); 134 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 135 | i += 16; 136 | #else 137 | *(short*)(v1->data + i) = (*(short*)(v1->data + i)) - (*(short*)(v2->data + i)); 138 | i += 2; 139 | #endif 140 | } 141 | } 142 | 143 | static void simd_vec_sub_i32(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 144 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 145 | size_t i = 0; 146 | while (i < oper_region) { 147 | #if defined(PYSIMD_X86_SSE2) 148 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 149 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 150 | __m128i added = _mm_sub_epi32(v1seg, v2seg); 151 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 152 | i += 16; 153 | #else 154 | *(int*)(v1->data + i) = (*(int*)(v1->data + i)) - (*(int*)(v2->data + i)); 155 | i += 4; 156 | #endif 157 | } 158 | } 159 | 160 | static void simd_vec_sub_i64(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 161 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 162 | size_t i = 0; 163 | while (i < oper_region) { 164 | #if defined(PYSIMD_X86_SSE2) 165 | __m128i v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 166 | __m128i v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 167 | __m128i added = _mm_sub_epi64(v1seg, v2seg); 168 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 169 | i += 16; 170 | #else 171 | *(long long*)(v1->data + i) = (*(long long*)(v1->data + i)) - (*(long long*)(v2->data + i)); 172 | i += 8; 173 | #endif 174 | } 175 | } 176 | 177 | static void simd_vec_sub_f32(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 178 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 179 | size_t i = 0; 180 | while (i < oper_region) { 181 | #if defined(PYSIMD_X86_SSE2) 182 | __m128 v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 183 | __m128 v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 184 | __m128 added = _mm_sub_ps(v1seg, v2seg); 185 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 186 | i += 16; 187 | #else 188 | *(float*)(v1->data + i) = (*(float*)(v1->data + i)) - (*(float*)(v2->data + i)); 189 | i += 4; 190 | #endif 191 | } 192 | } 193 | 194 | static void simd_vec_sub_f64(struct pysimd_vec_t* v1, const struct pysimd_vec_t* v2) { 195 | const size_t oper_region = PYSIMD_MIN_VEC_SIZE(v1, v2); 196 | size_t i = 0; 197 | while (i < oper_region) { 198 | #if defined(PYSIMD_X86_SSE2) 199 | __m128d v1seg = _mm_load_si128((__m128i const*)(v1->data + i)); 200 | __m128d v2seg = _mm_load_si128((__m128i const*)(v2->data + i)); 201 | __m128d added = _mm_sub_pd(v1seg, v2seg); 202 | _mm_storeu_si128 ((__m128i*)(v1->data + i), added); 203 | i += 16; 204 | #else 205 | *(double*)(v1->data + i) = (*(double*)(v1->data + i)) - (*(double*)(v2->data + i)); 206 | i += 8; 207 | #endif 208 | } 209 | } 210 | 211 | #endif // SIMD_VEC_ARITH_H 212 | -------------------------------------------------------------------------------- /include/simd_vec_filter.h: -------------------------------------------------------------------------------- 1 | #ifndef PYSIMD_VEC_FILTER_H 2 | #define PYSIMD_VEC_FILTER_H 3 | 4 | #include "simd_vec_type.h" 5 | #include "vec_macros.h" 6 | 7 | 8 | static int pysimd_vec_filter_32(struct pysimd_vec_t* vec, int* gt, 9 | int* lt, 10 | int* eq) { 11 | #if defined(PYSIMD_X86_SSE2) 12 | // pass 13 | unsigned char* ptr = vec->data; 14 | const unsigned char* ptr_end = ptr + vec->size; 15 | while (ptr < ptr_end) { 16 | __m128i loaded = _mm_load_si128((__m128i const*)ptr); 17 | __m128i mask = _mm_set1_epi8(0xff); 18 | if (gt != NULL) { 19 | __m128i gtnum = _mm_set1_epi32(*gt); 20 | __m128i gtres = _mm_cmpgt_epi32 (loaded, gtnum); 21 | mask = _mm_and_si128(mask, gtres); 22 | } 23 | if (lt != NULL) { 24 | __m128i ltnum = _mm_set1_epi32(*lt); 25 | __m128i ltres = _mm_cmplt_epi32 (loaded, ltnum); 26 | mask = _mm_and_si128(mask, ltres); 27 | } 28 | if (eq != NULL) { 29 | __m128i eqnum = _mm_set1_epi32(*eq); 30 | __m128i eqres = _mm_cmpeq_epi32 (loaded, eqnum); 31 | mask = _mm_and_si128(mask, eqres); 32 | } 33 | __m128i final_result = _mm_and_si128(mask, loaded); 34 | int mask_result = _mm_movemask_epi8 (final_result); 35 | size_t to_advance = 0; 36 | switch (mask_result) { 37 | case 0xFFFF: 38 | case 0x0FFF: 39 | case 0x00FF: 40 | case 0x000F: 41 | case 0x0: 42 | // no filtering needed 43 | to_advance = 16; 44 | break; 45 | case 0xFF0F: 46 | final_result = _mm_shuffle_epi32(final_result, 0x78); 47 | to_advance = 12; 48 | break; 49 | case 0xF00F: 50 | // shuffle, reverse order of 0b10101100 51 | final_result = _mm_shuffle_epi32(final_result, 0xac); 52 | to_advance = 8; 53 | break; 54 | case 0x0FF0: 55 | // shuffle, reverse order of 0b11001001 56 | final_result = _mm_shuffle_epi32(final_result, 0xc9); 57 | to_advance = 8; 58 | break; 59 | case 0xF0F0: 60 | // shuffle, reverse order of 0b10001101 61 | final_result = _mm_shuffle_epi32(final_result, 0x8d); 62 | to_advance = 8; 63 | break; 64 | case 0x0F0F: 65 | // shuffle, reverse order of 0b11011000 66 | final_result = _mm_shuffle_epi32(final_result, 0xd8); 67 | to_advance = 8; 68 | break; 69 | case 0xF0FF: 70 | final_result = _mm_shuffle_epi32(final_result, 0xb4); 71 | to_advance = 12; 72 | break; 73 | case 0xFFF0: 74 | // shuffle, reversed order of 00011011 75 | final_result = _mm_shuffle_epi32(final_result, 0x1b); 76 | to_advance = 12; 77 | break; 78 | case 0xFF00: 79 | // shuffle, reversed order of 00001110 80 | final_result = _mm_shuffle_epi32(final_result, 0xe); 81 | to_advance = 8; 82 | break; 83 | case 0xF000: 84 | // shuffle, reversed order of 00000011 85 | final_result = _mm_shuffle_epi32(final_result, 0x3); 86 | to_advance = 4; 87 | break; 88 | case 0x0F00: 89 | // shuffle, reversed order of 0b00000010 90 | final_result = _mm_shuffle_epi32(final_result, 0x2); 91 | to_advance = 4; 92 | break; 93 | case 0x00F0: 94 | // shuffle, reversed order of 0b00000001 95 | final_result = _mm_shuffle_epi32(final_result, 0x1); 96 | to_advance = 4; 97 | break; 98 | default: 99 | fprintf(stderr, "Got impossible mask value: 0x%x, aborting ...\n", mask_result); 100 | abort(); 101 | } 102 | _mm_store_si128 ((__m128i*)ptr, final_result); 103 | ptr += to_advance; 104 | } 105 | #else 106 | const unsigned char* reader = vec->data; 107 | const unsigned char* read_end = reader + vec->size; 108 | void* new_buf = calloc(1, vec->size); 109 | unsigned char* writer = new_buf; 110 | while (reader < read_end) { 111 | if (*reader) { 112 | *writer++ = *reader; 113 | } 114 | ++reader; 115 | } 116 | free(vec->data); 117 | vec->data = new_buf; 118 | #endif 119 | return 1; 120 | } 121 | 122 | static int pysimd_vec_filter_64(struct pysimd_vec_t* vec, long long* gt, 123 | long long* lt, 124 | long long* eq) { 125 | #if defined(PYSIMD_X86_SSE2) 126 | // pass 127 | unsigned char* ptr = vec->data; 128 | const unsigned char* ptr_end = ptr + vec->size; 129 | while (ptr < ptr_end) { 130 | __m128i loaded = _mm_load_si128((__m128i const*)ptr); 131 | __m128i mask = _mm_set1_epi8(0xff); 132 | if (gt != NULL) { 133 | __m128i gtnum = _mm_set1_epi64(*gt); 134 | __m128i gtres = _mm_cmpgt_epi64(loaded, gtnum); 135 | mask = _mm_and_si128(mask, gtres); 136 | } 137 | if (lt != NULL) { 138 | __m128i ltnum = _mm_set1_epi64(*lt); 139 | __m128i ltres = _mm_cmplt_epi64(loaded, ltnum); 140 | mask = _mm_and_si128(mask, ltres); 141 | } 142 | if (eq != NULL) { 143 | __m128i eqnum = _mm_set1_epi64(*eq); 144 | __m128i eqres = _mm_cmpeq_epi64(loaded, eqnum); 145 | mask = _mm_and_si128(mask, eqres); 146 | } 147 | __m128i final_result = _mm_and_si128(mask, loaded); 148 | int mask_result = _mm_movemask_epi8 (final_result); 149 | size_t to_advance = 0; 150 | switch (mask_result) { 151 | case 0xFFFF: 152 | case 0x0FFF: 153 | case 0x00FF: 154 | case 0x000F: 155 | case 0x0: 156 | // no filtering needed 157 | to_advance = 16; 158 | break; 159 | case 0xFF0F: 160 | final_result = _mm_shuffle_epi32(final_result, 0x78); 161 | to_advance = 12; 162 | break; 163 | case 0xF00F: 164 | // shuffle, reverse order of 0b10101100 165 | final_result = _mm_shuffle_epi32(final_result, 0xac); 166 | to_advance = 8; 167 | break; 168 | case 0x0FF0: 169 | // shuffle, reverse order of 0b11001001 170 | final_result = _mm_shuffle_epi32(final_result, 0xc9); 171 | to_advance = 8; 172 | break; 173 | case 0xF0F0: 174 | // shuffle, reverse order of 0b10001101 175 | final_result = _mm_shuffle_epi32(final_result, 0x8d); 176 | to_advance = 8; 177 | break; 178 | case 0x0F0F: 179 | // shuffle, reverse order of 0b11011000 180 | final_result = _mm_shuffle_epi32(final_result, 0xd8); 181 | to_advance = 8; 182 | break; 183 | case 0xF0FF: 184 | final_result = _mm_shuffle_epi32(final_result, 0xb4); 185 | to_advance = 12; 186 | break; 187 | case 0xFFF0: 188 | // shuffle, reversed order of 00011011 189 | final_result = _mm_shuffle_epi32(final_result, 0x1b); 190 | to_advance = 12; 191 | break; 192 | case 0xFF00: 193 | // shuffle, reversed order of 00001110 194 | final_result = _mm_shuffle_epi32(final_result, 0xe); 195 | to_advance = 8; 196 | break; 197 | case 0xF000: 198 | // shuffle, reversed order of 00000011 199 | final_result = _mm_shuffle_epi32(final_result, 0x3); 200 | to_advance = 4; 201 | break; 202 | case 0x0F00: 203 | // shuffle, reversed order of 0b00000010 204 | final_result = _mm_shuffle_epi32(final_result, 0x2); 205 | to_advance = 4; 206 | break; 207 | case 0x00F0: 208 | // shuffle, reversed order of 0b00000001 209 | final_result = _mm_shuffle_epi32(final_result, 0x1); 210 | to_advance = 4; 211 | break; 212 | default: 213 | fprintf(stderr, "Got impossible mask value: 0x%x, aborting ...\n", mask_result); 214 | abort(); 215 | } 216 | _mm_store_si128 ((__m128i*)ptr, final_result); 217 | ptr += to_advance; 218 | } 219 | #else 220 | const unsigned char* reader = vec->data; 221 | const unsigned char* read_end = reader + vec->size; 222 | void* new_buf = calloc(1, vec->size); 223 | unsigned char* writer = new_buf; 224 | while (reader < read_end) { 225 | if (*reader) { 226 | *writer++ = *reader; 227 | } 228 | ++reader; 229 | } 230 | free(vec->data); 231 | vec->data = new_buf; 232 | #endif 233 | return 1; 234 | } 235 | 236 | #endif // PYSIMD_VEC_FILTER_H 237 | -------------------------------------------------------------------------------- /include/simd_vec_type.h: -------------------------------------------------------------------------------- 1 | #ifndef SIMD_VEC_TYPE_H 2 | #define SIMD_VEC_TYPE_H 3 | 4 | #include "core_simd_info.h" 5 | 6 | struct pysimd_vec_t { 7 | size_t size; 8 | uint8_t* data; 9 | }; 10 | 11 | #endif // SIMD_VEC_TYPE_H -------------------------------------------------------------------------------- /include/vec_macros.h: -------------------------------------------------------------------------------- 1 | #ifndef PYSIMD_VEC_MACROS_H 2 | #define PYSIMD_VEC_MACROS_H 3 | 4 | 5 | #define PYSIMD_MIN_VEC_SIZE(v1, v2) (((v1)->size) < ((v2)->size)) ? ((v1)->size) : ((v2)->size) 6 | 7 | 8 | #endif // PYSIMD_VEC_MACROS_H 9 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.rst -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup, Extension 2 | from distutils.ccompiler import get_default_compiler 3 | import os 4 | from check_c_compiles import CheckCCompiles 5 | 6 | DEFAULT_COMPILER = get_default_compiler() 7 | 8 | # This attribute determines the minimum alignment required by sizes of a simd.Vec object 9 | # The intention is that, the minimum allows any simd instruction available to be executed 10 | # on vector object without needing to check the length/size of it 11 | pysimd_minimum_align = 8 12 | 13 | pysimd_patch_version = 0 14 | pysimd_minor_version = 1 15 | pysimd_major_version = 0 16 | 17 | pysimd_version = [pysimd_major_version, 18 | pysimd_minor_version, 19 | pysimd_patch_version] 20 | 21 | keyword_list = [ 22 | 'simd', 23 | 'x86', 24 | 'arm', 25 | 'sse2', 26 | 'avx', 27 | 'avx512', 28 | 'performance', 29 | 'big data', 30 | 'data science' 31 | ] 32 | 33 | classifers_list = [ 34 | 'Development Status :: 2 - Pre-Alpha', 35 | 'Operating System :: OS Independent', 36 | 'Programming Language :: C', 37 | 'Programming Language :: Python', 38 | 'Topic :: Scientific/Engineering', 39 | 'Topic :: System :: Hardware' 40 | ] 41 | 42 | macro_defs = [ 43 | ('SIMDPY_VERSION_MAJOR', str(pysimd_major_version)), 44 | ('SIMDPY_VERSION_MINOR', str(pysimd_minor_version)), 45 | ('SIMDPY_VERSION_PATCH', str(pysimd_patch_version)) 46 | ] 47 | 48 | # extra_compile_args 49 | compiler_flags = [] 50 | 51 | x86_header_string = """ 52 | #ifdef _WIN32 53 | # include 54 | #else 55 | # include 56 | #endif 57 | """ 58 | 59 | with CheckCCompiles("sse2", x86_header_string + """ 60 | int main(void) { 61 | __m128i foo = _mm_set1_epi8(8); 62 | __m128i new_vec = _mm_add_epi8(foo, _mm_setzero_si128()); 63 | (void)new_vec; 64 | return 0; 65 | } 66 | 67 | """) as sse2_test: 68 | if sse2_test.works: 69 | macro_defs.append(('PYSIMD_X86_SSE2', '1')) 70 | pysimd_minimum_align = 16 71 | 72 | with CheckCCompiles("sse3", x86_header_string + """ 73 | int main(void) { 74 | float lst[4] = {1.0, 2.0, 3.0, 4.0}; 75 | __m128 lstv = _mm_load_ps((float const*)lst); 76 | __m128 hadded = _mm_hadd_ps(lstv, lstv); 77 | (void)hadded; 78 | return 0; 79 | } 80 | """) as sse3_test: 81 | if sse3_test.works: 82 | macro_defs.append(('PYSIMD_X86_SSE3', '1')) 83 | if DEFAULT_COMPILER == 'unix': 84 | compiler_flags.append('-msse3') 85 | 86 | with CheckCCompiles("ssse3", x86_header_string + """ 87 | 88 | #include 89 | 90 | int main(void) { 91 | unsigned char nums[16] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16}; 92 | __m128i loaded = _mm_load_si128((__m128i const*)nums); 93 | __m128i direction = _mm_set1_epi8(1); 94 | __m128i shuffled = _mm_shuffle_epi8(loaded, direction); 95 | _mm_store_si128((__m128i*)nums, shuffled); 96 | assert(nums[0] == 2); 97 | return 0; 98 | } 99 | """) as ssse3_test: 100 | if ssse3_test.works: 101 | macro_defs.append(('PYSIMD_X86_SSSE3', '1')) 102 | if DEFAULT_COMPILER == 'unix': 103 | compiler_flags.append('-mssse3') 104 | 105 | with CheckCCompiles("avx", x86_header_string + """ 106 | 107 | int main(void) { 108 | __m256d a = _mm256_set1_pd(3.0); 109 | __m256d b = _mm256_set1_pd(3.0); 110 | __m256d added = _mm256_add_pd(a, b); 111 | (void)added; 112 | return 0; 113 | } 114 | """) as avx_test: 115 | if avx_test.works: 116 | macro_defs.append(('PYSIMD_X86_AVX', '1')) 117 | pysimd_minimum_align = 32 118 | if DEFAULT_COMPILER == 'unix': 119 | compiler_flags.append('-mavx') 120 | 121 | with CheckCCompiles("avx2", x86_header_string + """ 122 | 123 | int main(void) { 124 | __m256i a = _mm256_set1_epi32(-20); 125 | __m256i abs_a = _mm256_abs_epi16(a); 126 | __m256i and_a = _mm256_and_si256(a, abs_a); 127 | (void)and_a; 128 | return 0; 129 | } 130 | """) as avx2_test: 131 | if avx2_test.works: 132 | macro_defs.append(('PYSIMD_X86_AVX2', '1')) 133 | pysimd_minimum_align = 32 134 | if DEFAULT_COMPILER == 'unix': 135 | compiler_flags.append('-mavx2') 136 | 137 | with CheckCCompiles("avx512f", x86_header_string + """ 138 | 139 | #include 140 | 141 | static char storedata[256]; 142 | 143 | int main(void) { 144 | __m512i a = _mm512_set1_epi16(3); 145 | __m512i b = _mm512_set1_epi16(3); 146 | __m512i added = _mm512_add_epi32(a, b); 147 | _mm512_store_si512((void*)storedata, added); 148 | return 0; 149 | } 150 | """) as avx512f_test: 151 | if avx512f_test.works: 152 | macro_defs.append(('PYSIMD_X86_AVX512F', '1')) 153 | pysimd_minimum_align = 64 154 | if DEFAULT_COMPILER == 'unix': 155 | compiler_flags.append('-mavx512f') 156 | 157 | macro_defs.append(('PYSIMD_MIN_ALIGN', str(pysimd_minimum_align))) 158 | 159 | if os.name == 'nt': 160 | macro_defs.append(('_CRT_SECURE_NO_WARNINGS', '1')) 161 | 162 | if DEFAULT_COMPILER == 'unix': 163 | # shut off not so useful warnings 164 | compiler_flags.append('-Wno-sign-compare') 165 | 166 | # A Python package may have multiple extensions, but this 167 | # template has one. 168 | module1 = Extension('simd', 169 | define_macros = macro_defs, 170 | include_dirs = ['include'], 171 | sources = ['src/pymain.c'], 172 | extra_compile_args=compiler_flags) 173 | 174 | setup (name = 'simd', 175 | version = ".".join([str(elem) for elem in pysimd_version]), 176 | description = 'The SIMD Python Module', 177 | author = 'Joshua Weinstein', 178 | author_email = 'jweinst1@berkeley.edu', 179 | url = 'https://github.com/jweinst1/pysimd', 180 | license = 'MIT', 181 | keywords = keyword_list, 182 | classifiers = classifers_list, 183 | long_description = open('README.rst').read(), 184 | ext_modules = [module1]) -------------------------------------------------------------------------------- /src/pymain.c: -------------------------------------------------------------------------------- 1 | #include "core_simd_info.h" 2 | #include "simd_vec.h" 3 | #include "simd_vec_arith.h" 4 | //#include "simd_vec_filter.h" 5 | #define PY_SSIZE_T_CLEAN 6 | #include 7 | #include "structmember.h" 8 | 9 | #define RETURN_OR_SYS_ERROR(variable) \ 10 | if (variable == NULL) { \ 11 | return PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); \ 12 | } \ 13 | return variable 14 | 15 | typedef struct { 16 | PyObject_HEAD 17 | struct pysimd_vec_t vec; 18 | } SimdObject; 19 | 20 | extern PyTypeObject SimdObjectType; 21 | static PyObject *SimdError; 22 | 23 | static void SimdObject_dealloc(SimdObject* self) 24 | { 25 | pysimd_vec_deinit(&(self->vec)); 26 | Py_TYPE(self)->tp_free((PyObject*)self); 27 | } 28 | 29 | static PyObject* 30 | SimdObject_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 31 | { 32 | SimdObject *self; 33 | self = (SimdObject*) type->tp_alloc(type, 0); 34 | if (self != NULL) { 35 | pysimd_vec_clear(&(self->vec)); 36 | } 37 | return (PyObject *) self; 38 | } 39 | 40 | static int SimdObject_init(SimdObject* self, PyObject *args, PyObject *kwds) 41 | { 42 | static char *kwlist[] = {"size", "repeat_value", "repeat_size", NULL}; 43 | Py_ssize_t param_size = 0; 44 | PyObject* param_rep_val = NULL; 45 | unsigned char param_rep_size = 0; 46 | 47 | if (!PyArg_ParseTupleAndKeywords(args, kwds, "|nOb", kwlist, 48 | ¶m_size, ¶m_rep_val, ¶m_rep_size)) 49 | return -1; 50 | if (param_size > 0 && param_size % 16 != 0) { 51 | PyErr_Format(SimdError, "The size '%zu' cannot be aligned by at least 16 bytes", (size_t)param_size); 52 | return -1; 53 | } 54 | param_size = param_size == 0 ? /*default*/ 64 : param_size; 55 | pysimd_vec_init(&(self->vec), (size_t)param_size); 56 | if (param_rep_val != NULL && param_rep_size != 0) { 57 | if (PyLong_Check(param_rep_val)) { 58 | size_t rep_value = PyLong_AsSize_t(param_rep_val); 59 | if (!pysimd_vec_fill(&(self->vec), rep_value, param_rep_size)) { 60 | PyErr_Format(SimdError, "Invalid repeat parameters, value: %zu, size: %u", rep_value, param_rep_size); 61 | return -1; 62 | } 63 | } else if (PyFloat_Check(param_rep_val)) { 64 | double rep_value = PyFloat_AsDouble(param_rep_val); 65 | if (!pysimd_vec_fill_float(&(self->vec), rep_value, param_rep_size)) { 66 | PyErr_Format(SimdError, "Invalid repeat parameters, value: %f, size: %u", rep_value, param_rep_size); 67 | return -1; 68 | } 69 | } else { 70 | PyErr_Format(SimdError, "The type '%s' is not supported for 'repeat_value' option", param_rep_val->ob_type->tp_name); 71 | return -1; 72 | } 73 | } 74 | return 0; 75 | } 76 | 77 | static PyObject* 78 | SimdObject_copy(SimdObject *self, PyObject *args, PyObject *kwargs) 79 | { 80 | static char *kwlist[] = {"start", "end", NULL}; 81 | Py_ssize_t param_start = -1; 82 | Py_ssize_t param_end = -1; 83 | size_t actual_start = 0; 84 | size_t actual_end = self->vec.size; 85 | PyObject* copied = NULL; 86 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|nn", kwlist, 87 | ¶m_start, ¶m_end)) { 88 | return NULL; 89 | } 90 | 91 | if (param_start > -1) { 92 | if (param_start >= self->vec.size) { 93 | PyErr_Format(SimdError, "'start' option: %ld is out of bounds", param_start); 94 | return NULL; 95 | } 96 | actual_start = (size_t)param_start; 97 | } 98 | 99 | if (param_end > -1) { 100 | if (param_end >= self->vec.size || param_end <= param_start) { 101 | PyErr_Format(SimdError, "'end' option: %ld is out of bounds", param_start); 102 | return NULL; 103 | } 104 | actual_end = (size_t)param_end; 105 | } 106 | 107 | // Alignment check 108 | if ((actual_end - actual_start) % 16 != 0) { 109 | PyErr_Format(SimdError, "requested copy size: %zu is not aligned on a 16 byte boundary", actual_end - actual_start); 110 | return NULL; 111 | } 112 | copied = SimdObjectType.tp_alloc(&SimdObjectType, 0); 113 | if (copied == NULL) { 114 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 115 | return NULL; 116 | } 117 | if (!pysimd_vec_copy( &((SimdObject*)copied)->vec, &self->vec, actual_start, actual_end)) { 118 | PyErr_SetString(SimdError, "Internal vector copy failure"); 119 | SimdObject_dealloc((SimdObject*)copied); 120 | return NULL; 121 | } 122 | return copied; 123 | } 124 | 125 | static PyObject* SimdObject_repr(SimdObject* self) 126 | { 127 | char* representation = pysimd_vec_repr(&(self->vec)); 128 | PyObject* printed = NULL; 129 | printed = PyUnicode_FromString(representation); 130 | free(representation); 131 | RETURN_OR_SYS_ERROR(printed); 132 | } 133 | 134 | static PyObject * 135 | SimdObject_size(SimdObject *self, PyObject *Py_UNUSED(ignored)) 136 | { 137 | PyObject* size_val = NULL; 138 | size_val = PyLong_FromSize_t(self->vec.size); 139 | RETURN_OR_SYS_ERROR(size_val); 140 | } 141 | 142 | static PyObject* 143 | SimdObject_resize(SimdObject *self, PyObject *args, PyObject *kwargs) 144 | { 145 | Py_ssize_t resize_to = 0; 146 | PyObject* size_val = NULL; 147 | if (!PyArg_ParseTuple(args, "n", &resize_to)) { 148 | return NULL; 149 | } 150 | if (resize_to == 0) { 151 | PyErr_SetString(SimdError, "vector cannot be resized to 0"); 152 | return NULL; 153 | } else if (resize_to % 16 != 0) { 154 | PyErr_SetString(SimdError, "vector can only be resized to 16-byte aligned size"); 155 | return NULL; 156 | } 157 | pysimd_vec_resize(&self->vec, (size_t)resize_to); 158 | size_val = PyLong_FromSize_t(self->vec.size); 159 | RETURN_OR_SYS_ERROR(size_val); 160 | } 161 | 162 | static PyObject* 163 | SimdObject_add(SimdObject *self, PyObject *args, PyObject *kwargs) 164 | { 165 | static char *kwlist[] = {"other", "width", NULL}; 166 | Py_ssize_t param_width = 0; 167 | PyObject* param_other = NULL; 168 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On", kwlist, 169 | ¶m_other, ¶m_width)) { 170 | return NULL; 171 | } 172 | 173 | if (param_other->ob_type != &SimdObjectType) { 174 | PyErr_Format(SimdError, "Expected vector, got type '%s'", param_other->ob_type->tp_name); 175 | return NULL; 176 | } 177 | 178 | switch (param_width) { 179 | case 1: 180 | simd_vec_add_i8(&(self->vec), &(((SimdObject*)param_other)->vec)); 181 | break; 182 | case 2: 183 | simd_vec_add_i16(&(self->vec), &(((SimdObject*)param_other)->vec)); 184 | break; 185 | case 4: 186 | simd_vec_add_i32(&(self->vec), &(((SimdObject*)param_other)->vec)); 187 | break; 188 | case 8: 189 | simd_vec_add_i64(&(self->vec), &(((SimdObject*)param_other)->vec)); 190 | break; 191 | default: 192 | PyErr_Format(SimdError, "Unrecognized width: %zu for add operation", (size_t)param_width); 193 | return NULL; 194 | } 195 | Py_INCREF(Py_None); 196 | return Py_None; 197 | } 198 | 199 | static PyObject* 200 | SimdObject_fadd(SimdObject *self, PyObject *args, PyObject *kwargs) 201 | { 202 | static char *kwlist[] = {"other", "width", NULL}; 203 | Py_ssize_t param_width = 0; 204 | PyObject* param_other = NULL; 205 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On", kwlist, 206 | ¶m_other, ¶m_width)) { 207 | return NULL; 208 | } 209 | 210 | if (param_other->ob_type != &SimdObjectType) { 211 | PyErr_Format(SimdError, "Expected vector, got type '%s'", param_other->ob_type->tp_name); 212 | return NULL; 213 | } 214 | 215 | switch (param_width) { 216 | case 4: 217 | simd_vec_add_f32(&(self->vec), &(((SimdObject*)param_other)->vec)); 218 | break; 219 | case 8: 220 | simd_vec_add_f64(&(self->vec), &(((SimdObject*)param_other)->vec)); 221 | break; 222 | default: 223 | PyErr_Format(SimdError, "Unrecognized width: %zu for fadd operation", (size_t)param_width); 224 | return NULL; 225 | } 226 | Py_INCREF(Py_None); 227 | return Py_None; 228 | } 229 | 230 | static PyObject* 231 | SimdObject_sub(SimdObject *self, PyObject *args, PyObject *kwargs) 232 | { 233 | static char *kwlist[] = {"other", "width", NULL}; 234 | Py_ssize_t param_width = 0; 235 | PyObject* param_other = NULL; 236 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On", kwlist, 237 | ¶m_other, ¶m_width)) { 238 | return NULL; 239 | } 240 | 241 | if (param_other->ob_type != &SimdObjectType) { 242 | PyErr_Format(SimdError, "Expected vector, got type '%s'", param_other->ob_type->tp_name); 243 | return NULL; 244 | } 245 | 246 | switch (param_width) { 247 | case 1: 248 | simd_vec_sub_i8(&(self->vec), &(((SimdObject*)param_other)->vec)); 249 | break; 250 | case 2: 251 | simd_vec_sub_i16(&(self->vec), &(((SimdObject*)param_other)->vec)); 252 | break; 253 | case 4: 254 | simd_vec_sub_i32(&(self->vec), &(((SimdObject*)param_other)->vec)); 255 | break; 256 | case 8: 257 | simd_vec_sub_i64(&(self->vec), &(((SimdObject*)param_other)->vec)); 258 | break; 259 | default: 260 | PyErr_Format(SimdError, "Unrecognized width: %zu for sub operation", (size_t)param_width); 261 | return NULL; 262 | } 263 | Py_INCREF(Py_None); 264 | return Py_None; 265 | } 266 | 267 | static PyObject* 268 | SimdObject_fsub(SimdObject *self, PyObject *args, PyObject *kwargs) 269 | { 270 | static char *kwlist[] = {"other", "width", NULL}; 271 | Py_ssize_t param_width = 0; 272 | PyObject* param_other = NULL; 273 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On", kwlist, 274 | ¶m_other, ¶m_width)) { 275 | return NULL; 276 | } 277 | 278 | if (param_other->ob_type != &SimdObjectType) { 279 | PyErr_Format(SimdError, "Expected vector, got type '%s'", param_other->ob_type->tp_name); 280 | return NULL; 281 | } 282 | 283 | switch (param_width) { 284 | case 4: 285 | simd_vec_sub_f32(&(self->vec), &(((SimdObject*)param_other)->vec)); 286 | break; 287 | case 8: 288 | simd_vec_sub_f64(&(self->vec), &(((SimdObject*)param_other)->vec)); 289 | break; 290 | default: 291 | PyErr_Format(SimdError, "Unrecognized width: %zu for fsub operation", (size_t)param_width); 292 | return NULL; 293 | } 294 | Py_INCREF(Py_None); 295 | return Py_None; 296 | } 297 | 298 | static PyObject* 299 | SimdObject_as_bytes(SimdObject *self, PyObject *args, PyObject *kwargs) 300 | { 301 | static char *kwlist[] = {"start", "end", NULL}; 302 | Py_ssize_t param_start = 0; 303 | Py_ssize_t param_end = 0; 304 | size_t actual_start = 0; 305 | size_t actual_end = self->vec.size; 306 | PyObject* bytes_made = NULL; 307 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|nn", kwlist, 308 | ¶m_start, ¶m_end)) { 309 | return NULL; 310 | } 311 | 312 | if (param_start != 0) { 313 | if (param_start > self->vec.size || param_start < 0) { 314 | PyErr_Format(SimdError, "start: '%ld', is out of bounds for vector of size %zu", param_start, self->vec.size); 315 | return NULL; 316 | } 317 | actual_start = (size_t)param_start; 318 | } 319 | 320 | if (param_end != 0) { 321 | if (param_end > self->vec.size || param_end < 0) { 322 | PyErr_Format(SimdError, "end: '%ld', is out of bounds for vector of size %zu", param_end, self->vec.size); 323 | return NULL; 324 | } 325 | actual_end = (size_t)param_end; 326 | } 327 | 328 | bytes_made = PyBytes_FromStringAndSize((const char*)self->vec.data + actual_start, actual_end - actual_start); 329 | RETURN_OR_SYS_ERROR(bytes_made); 330 | 331 | } 332 | 333 | static PyObject* 334 | SimdObject_as_tuple(SimdObject *self, PyObject *args, PyObject *kwargs) 335 | { 336 | static char *kwlist[] = {"type", "width", NULL}; 337 | PyObject* tuple_to_give = NULL; 338 | PyObject* param_type = NULL; 339 | Py_ssize_t param_width = 0; 340 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, "On", kwlist, 341 | ¶m_type, ¶m_width)) { 342 | return NULL; 343 | } 344 | 345 | size_t actual_width = (size_t)param_width; 346 | if (actual_width != 1 && actual_width != 2 && actual_width != 4 && actual_width != 8) { 347 | PyErr_Format(SimdError, "The width '%zu' is not supported for method 'as_tuple'", actual_width); 348 | return NULL; 349 | } 350 | size_t n_members = self->vec.size / actual_width; 351 | tuple_to_give = PyTuple_New(n_members); 352 | 353 | if ((PyTypeObject*)param_type == &PyLong_Type) { 354 | if (actual_width == 1) { 355 | char* reader = (char*)(self->vec.data); 356 | for (size_t i = 0; i < n_members; ++i) { 357 | PyObject* to_put = PyLong_FromLong(reader[i]); 358 | if (to_put == NULL) { 359 | Py_DECREF(tuple_to_give); 360 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 361 | return NULL; 362 | } 363 | PyTuple_SET_ITEM(tuple_to_give, i, to_put); 364 | } 365 | } else if (actual_width == 2) { 366 | short* reader = (short*)(self->vec.data); 367 | for (size_t i = 0; i < n_members; ++i) { 368 | PyObject* to_put = PyLong_FromLong(reader[i]); 369 | if (to_put == NULL) { 370 | Py_DECREF(tuple_to_give); 371 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 372 | return NULL; 373 | } 374 | PyTuple_SET_ITEM(tuple_to_give, i, to_put); 375 | } 376 | } else if (actual_width == 4) { 377 | int* reader = (int*)(self->vec.data); 378 | for (size_t i = 0; i < n_members; ++i) { 379 | PyObject* to_put = PyLong_FromLong(reader[i]); 380 | if (to_put == NULL) { 381 | Py_DECREF(tuple_to_give); 382 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 383 | return NULL; 384 | } 385 | PyTuple_SET_ITEM(tuple_to_give, i, to_put); 386 | } 387 | } else if (actual_width == 8) { 388 | long long* reader = (long long*)(self->vec.data); 389 | for (size_t i = 0; i < n_members; ++i) { 390 | PyObject* to_put = PyLong_FromLongLong(reader[i]); 391 | if (to_put == NULL) { 392 | Py_DECREF(tuple_to_give); 393 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 394 | return NULL; 395 | } 396 | PyTuple_SET_ITEM(tuple_to_give, i, to_put); 397 | } 398 | } else { 399 | Py_FatalError("Should not reach this point in 'as_tuple', width error"); 400 | } 401 | } else if ((PyTypeObject*)param_type == &PyFloat_Type) { 402 | if (actual_width == 4) { 403 | float* reader = (float*)(self->vec.data); 404 | for (size_t i = 0; i < n_members; ++i) { 405 | PyObject* to_put = PyFloat_FromDouble((double)reader[i]); 406 | if (to_put == NULL) { 407 | Py_DECREF(tuple_to_give); 408 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 409 | return NULL; 410 | } 411 | PyTuple_SET_ITEM(tuple_to_give, i, to_put); 412 | } 413 | } else if (actual_width == 8) { 414 | double* reader = (double*)(self->vec.data); 415 | for (size_t i = 0; i < n_members; ++i) { 416 | PyObject* to_put = PyFloat_FromDouble(reader[i]); 417 | if (to_put == NULL) { 418 | Py_DECREF(tuple_to_give); 419 | PyErr_Format(PyExc_SystemError, "Internal object failure line: %u", __LINE__); 420 | return NULL; 421 | } 422 | PyTuple_SET_ITEM(tuple_to_give, i, to_put); 423 | } 424 | } else { 425 | if (actual_width == 1 || actual_width == 2) { 426 | PyErr_Format(SimdError, "The width '%zu' is not supported for floats for 'as_tuple'", actual_width); 427 | Py_DECREF(tuple_to_give); 428 | return NULL; 429 | } else { 430 | Py_FatalError("Should not reach invalid state for float in 'as_tuple"); 431 | } 432 | } 433 | } else { 434 | Py_DECREF(tuple_to_give); 435 | PyErr_Format(SimdError, "The type '%s' is not supported for method 'as_tuple'", param_type->ob_type->tp_name); 436 | return NULL; 437 | } 438 | return tuple_to_give; 439 | } 440 | 441 | static PyObject * 442 | SimdObject_clear(SimdObject *self, PyObject *Py_UNUSED(ignored)) 443 | { 444 | pysimd_vec_clear_data(&(self->vec)); 445 | Py_INCREF(Py_None); 446 | return Py_None; 447 | } 448 | 449 | static PyMethodDef SimdObject_methods[] = { 450 | {"clear", (PyCFunction) SimdObject_clear, METH_NOARGS, 451 | "Sets all bytes in the vector to 0" 452 | }, 453 | {"size", (PyCFunction) SimdObject_size, METH_NOARGS, 454 | "Returns the current size of the vector" 455 | }, 456 | {"resize", (PyCFunction) SimdObject_resize, METH_VARARGS | METH_KEYWORDS, 457 | "Resizes the vector to the desired capacity" 458 | }, 459 | {"add", (PyCFunction) SimdObject_add, METH_VARARGS | METH_KEYWORDS, 460 | "Adds a vector into another vector, without creating a new vector" 461 | }, 462 | {"fadd", (PyCFunction) SimdObject_fadd, METH_VARARGS | METH_KEYWORDS, 463 | "Adds a vector into another vector as floating point numbers" 464 | }, 465 | {"sub", (PyCFunction) SimdObject_sub, METH_VARARGS | METH_KEYWORDS, 466 | "Subtracts a vector from another vector, without creating a new vector" 467 | }, 468 | {"fsub", (PyCFunction) SimdObject_fsub, METH_VARARGS | METH_KEYWORDS, 469 | "Subtracts a vector from another vector as floating point numbers" 470 | }, 471 | {"as_bytes", (PyCFunction) SimdObject_as_bytes, METH_VARARGS | METH_KEYWORDS, 472 | "Returns a bytes object representing the internal bytes of the vector" 473 | }, 474 | {"as_tuple", (PyCFunction) SimdObject_as_tuple, METH_VARARGS | METH_KEYWORDS, 475 | "Returns a tuple populated with members of the vector, defaults to 32 bit integers" 476 | }, 477 | {"copy", (PyCFunction) SimdObject_copy, METH_VARARGS | METH_KEYWORDS, 478 | "Returns a copy of the vector" 479 | }, 480 | {NULL} /* Sentinel */ 481 | }; 482 | 483 | PyTypeObject SimdObjectType = { 484 | PyVarObject_HEAD_INIT(NULL, 0) 485 | .tp_name = "simd.Vec", 486 | .tp_doc = "A vector containing simd data", 487 | .tp_basicsize = sizeof(SimdObject), 488 | .tp_itemsize = 0, 489 | .tp_flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, 490 | .tp_new = SimdObject_new, 491 | .tp_init = (initproc) SimdObject_init, 492 | .tp_dealloc = (destructor) SimdObject_dealloc, 493 | .tp_repr = (reprfunc) SimdObject_repr, 494 | .tp_methods = SimdObject_methods, 495 | }; 496 | 497 | 498 | static PyObject* _system_info(PyObject* self, PyObject *Py_UNUSED(ignored)) 499 | { 500 | PyObject* info_dict = NULL; 501 | PyObject* arch_str = NULL; 502 | PyObject* cc_str = NULL; 503 | PyObject* features_dict = NULL; 504 | struct pysimd_sys_info sinfo; 505 | pysimd_sys_info_init(&sinfo); 506 | info_dict = PyDict_New(); 507 | if (info_dict == NULL) { 508 | // Can't allocate for dict 509 | return NULL; 510 | } 511 | arch_str = PyUnicode_FromString(pysimd_arch_stringify(sinfo.arch)); 512 | if (arch_str == NULL) { 513 | goto DICT_ERRCLEAN; 514 | } 515 | 516 | cc_str = PyUnicode_FromString(pysimd_cc_stringify(sinfo.compiler)); 517 | if (cc_str == NULL) { 518 | goto DICT_ERRCLEAN; 519 | } 520 | 521 | if (0 != PyDict_SetItemString(info_dict, "arch", arch_str)) { 522 | goto DICT_ERRCLEAN; 523 | } 524 | Py_DECREF(arch_str); 525 | if (0 != PyDict_SetItemString(info_dict, "compiler", cc_str)) { 526 | goto DICT_ERRCLEAN; 527 | } 528 | Py_DECREF(cc_str); 529 | 530 | features_dict = PyDict_New(); 531 | if (features_dict == NULL) { 532 | goto DICT_ERRCLEAN; 533 | } 534 | #ifdef PYSIMD_ARCH_X86_64 535 | #define X86_PYDICT_SETTER(ftname) \ 536 | if (0 != PyDict_SetItemString(features_dict, #ftname, PyBool_FromLong(sinfo.features.ftname))) { \ 537 | goto DICT_ERRCLEAN; \ 538 | } 539 | X86_PYDICT_SETTER(mmx) 540 | X86_PYDICT_SETTER(popcnt) 541 | X86_PYDICT_SETTER(sse) 542 | X86_PYDICT_SETTER(sse2) 543 | X86_PYDICT_SETTER(sse3) 544 | X86_PYDICT_SETTER(ssse3) 545 | X86_PYDICT_SETTER(sse41) 546 | X86_PYDICT_SETTER(sse42) 547 | X86_PYDICT_SETTER(sse4a) 548 | X86_PYDICT_SETTER(avx) 549 | X86_PYDICT_SETTER(avx2) 550 | X86_PYDICT_SETTER(fma) 551 | X86_PYDICT_SETTER(fma4) 552 | X86_PYDICT_SETTER(xop) 553 | X86_PYDICT_SETTER(bmi) 554 | X86_PYDICT_SETTER(bmi2) 555 | X86_PYDICT_SETTER(avx512f) 556 | X86_PYDICT_SETTER(avx512vl) 557 | X86_PYDICT_SETTER(avx512bw) 558 | X86_PYDICT_SETTER(avx512dq) 559 | X86_PYDICT_SETTER(avx512cd) 560 | X86_PYDICT_SETTER(avx512pf) 561 | X86_PYDICT_SETTER(avx512er) 562 | X86_PYDICT_SETTER(avx512ifma) 563 | X86_PYDICT_SETTER(avx512vbmi) 564 | #undef X86_PYDICT_SETTER 565 | #endif // PYSIMD_ARCH_X86_64 566 | if (0 != PyDict_SetItemString(info_dict, "features", features_dict)) { 567 | goto DICT_ERRCLEAN; 568 | } 569 | Py_DECREF(features_dict); 570 | return info_dict; 571 | DICT_ERRCLEAN: 572 | Py_XDECREF(info_dict); 573 | Py_XDECREF(arch_str); 574 | Py_XDECREF(cc_str); 575 | Py_XDECREF(features_dict); 576 | return NULL; 577 | } 578 | 579 | static PyObject* _simd_verion(PyObject* self, PyObject *Py_UNUSED(ignored)) 580 | { 581 | return Py_BuildValue("III", 0, 0, 1); 582 | 583 | } 584 | 585 | static PyMethodDef myMethods[] = { 586 | { "system_info", (PyCFunction)_system_info, METH_NOARGS, 587 | "Returns a dictionary containing information on the system architecture and features." 588 | }, 589 | { "version", (PyCFunction)_simd_verion, METH_NOARGS, 590 | "Returns the version of pysimd." 591 | }, 592 | { NULL, NULL, 0, NULL } 593 | }; 594 | 595 | // Our Module Definition struct 596 | static struct PyModuleDef simdModule = { 597 | PyModuleDef_HEAD_INIT, 598 | "simd", 599 | "The Python SIMD Module", 600 | -1, 601 | myMethods 602 | }; 603 | 604 | // Initializes our module using our above struct 605 | PyMODINIT_FUNC PyInit_simd(void) 606 | { 607 | PyObject *m; 608 | if (PyType_Ready(&SimdObjectType) < 0) 609 | return NULL; 610 | 611 | m = PyModule_Create(&simdModule); 612 | if (m == NULL) 613 | return NULL; 614 | 615 | Py_INCREF(&SimdObjectType); 616 | if (PyModule_AddObject(m, "Vec", (PyObject *) &SimdObjectType) < 0) { 617 | Py_DECREF(&SimdObjectType); 618 | Py_DECREF(m); 619 | return NULL; 620 | } 621 | 622 | SimdError = PyErr_NewException("simd.SimdError", NULL, NULL); 623 | Py_XINCREF(SimdError); 624 | if (PyModule_AddObject(m, "error", SimdError) < 0) { 625 | Py_XDECREF(SimdError); 626 | Py_CLEAR(SimdError); 627 | Py_DECREF(&SimdObjectType); 628 | Py_DECREF(m); 629 | return NULL; 630 | } 631 | 632 | return m; 633 | } -------------------------------------------------------------------------------- /tests/__main__.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import shutil 4 | import subprocess 5 | import unittest 6 | import distutils.ccompiler 7 | import sysconfig 8 | from distutils.spawn import find_executable 9 | 10 | CURRENT_DIR = os.path.dirname(os.path.abspath(__file__)) 11 | BUILT_TEST_DIR = os.path.join(CURRENT_DIR, 'bin') 12 | PROJECT_DIR = os.path.dirname(CURRENT_DIR) 13 | BUILD_DIR = os.path.join(PROJECT_DIR, 'build') 14 | BINPY_DIR = os.path.dirname(sys.executable) 15 | 16 | os.makedirs(BUILT_TEST_DIR, exist_ok=True) 17 | 18 | if not os.path.isdir(BUILD_DIR): 19 | try: 20 | import simd 21 | print("Testing extension already importable in " + CURRENT_DIR) 22 | except Exception as exc: 23 | raise Exception("Cannot find directory with built C extension, nor is it importable: " + str(exc)) 24 | else: 25 | lib_paths = [path for path in os.listdir(BUILD_DIR) if path.startswith('lib.')] 26 | extend_dir = os.path.join(BUILD_DIR, lib_paths[0]) 27 | extensions = [path for path in os.listdir(extend_dir) if path.startswith('simd')] 28 | if len(extensions) > 0: 29 | testing_ext = os.path.join(BUILT_TEST_DIR, extensions[0]) 30 | shutil.copy(os.path.join(extend_dir, extensions[0]), testing_ext) 31 | print("Testing extension at setup path " + testing_ext) 32 | else: 33 | raise Exception("Found no extensions in " + extend_dir) 34 | 35 | # Try to import extension 36 | try: 37 | import simd 38 | except Exception as exc: 39 | raise("Could not import extension, reason: " + str(exc)) 40 | 41 | compiler_includes_and_libs = sysconfig.get_config_vars('INCLUDEPY', 'LIBPL', 'LIBRARY') 42 | if len(compiler_includes_and_libs) != 3: 43 | raise Exception("Missing or unexpected system compiler configuration: " + str(compiler_includes_and_libs)) 44 | (cc_includes, cc_lib_dirs, cc_libs) = compiler_includes_and_libs 45 | 46 | if cc_libs.startswith("lib"): 47 | cc_libs = cc_libs[3:] 48 | if cc_libs.endswith(".a"): 49 | cc_libs = cc_libs[:-2] 50 | 51 | py3_embed_cc = distutils.ccompiler.new_compiler() 52 | 53 | current_dir_cfiles = [os.path.join(CURRENT_DIR, path) for path in os.listdir(CURRENT_DIR) if path.endswith(".c")] 54 | built_tests = [] 55 | 56 | for cfile in current_dir_cfiles: 57 | cfile_dir = os.path.dirname(cfile) 58 | built_name = os.path.join(cfile_dir, 'bin', os.path.basename(cfile)[:-2]) 59 | print("Building: " + cfile) 60 | obj_file = py3_embed_cc.compile([cfile], include_dirs=[cc_includes], macros=[('TESTING_BIN_PATH', "\"" + cfile_dir + "\"")]) 61 | py3_embed_cc.link_executable(obj_file, library_dirs=[cc_lib_dirs], libraries=[cc_libs], 62 | output_progname=built_name) 63 | built_tests.append(built_name) 64 | 65 | # Run the tests 66 | tests_passed = [] 67 | tests_failed = [] 68 | 69 | for cmdtest in built_tests: 70 | try: 71 | result = subprocess.run(cmdtest, check=False, timeout=600) 72 | if result.returncode == 0: 73 | tests_passed.append(cmdtest) 74 | else: 75 | print("Test {} FAILED with return_code {}".format(cmdtest, str(result.returncode))) 76 | tests_failed.append(cmdtest) 77 | except subprocess.TimeoutExpired as exc: 78 | print("Test {} FAILED due to timeout error: {}".format(cmdtest, str(exc))) 79 | tests_failed.append(cmdtest) 80 | except Exception as exc: 81 | print("Test {} FAILED due to error: {}".format(cmdtest, str(exc))) 82 | tests_failed.append(cmdtest) 83 | 84 | print("------------------------------------------------------") 85 | print("{} tests failed out of {} total tests".format(len(tests_failed), len(built_tests))) 86 | print("------------------------------------------------------") 87 | -------------------------------------------------------------------------------- /tests/import_test.c: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | 4 | static const char* EXT_MOD_NAME = "simd"; 5 | static const char* VEC_TYPE = "Vec"; 6 | 7 | static PyObject* args_list = NULL; 8 | static PyObject* kw_list = NULL; 9 | 10 | int 11 | main(int argc, char *argv[]) 12 | { 13 | PyObject* pModule, *pVecType, *pNewed; 14 | wchar_t *program = Py_DecodeLocale(argv[0], NULL); 15 | if (program == NULL) { 16 | fprintf(stderr, "Fatal error: cannot decode argv[0]\n"); 17 | exit(1); 18 | } 19 | Py_SetProgramName(program); /* optional but recommended */ 20 | Py_Initialize(); 21 | PyObject * sys_path = PySys_GetObject("path"); 22 | PyList_Append(sys_path, PyUnicode_FromString(TESTING_BIN_PATH)); 23 | 24 | args_list = PyList_New(0); 25 | kw_list = PyDict_New(); 26 | 27 | if (args_list == NULL || kw_list == NULL) { 28 | Py_FatalError("Cannot initialize empty args or kwlist, something is really wrong"); 29 | } 30 | 31 | pModule = PyImport_ImportModule(EXT_MOD_NAME); 32 | 33 | if (pModule != NULL) { 34 | printf("Imported module: %s\n", EXT_MOD_NAME); 35 | pVecType = PyObject_GetAttrString(pModule, VEC_TYPE); 36 | if (pVecType != NULL) { 37 | printf("Found type '%s'\n", VEC_TYPE); 38 | pNewed = PyType_GenericNew((PyTypeObject*)pVecType, NULL, NULL); 39 | if (pNewed != NULL) { 40 | printf("Calling new worked on type: %s\n", VEC_TYPE); 41 | Py_DECREF(pNewed); 42 | Py_DECREF(pModule); 43 | Py_DECREF(pVecType); 44 | } else { 45 | PyErr_Print(); 46 | fprintf(stderr, "Failed to call new on type: %s\n", VEC_TYPE); 47 | Py_DECREF(pModule); 48 | Py_DECREF(pVecType); 49 | PyMem_RawFree(program); 50 | return 1; 51 | } 52 | } else { 53 | PyErr_Print(); 54 | fprintf(stderr, "Module %s did not contain type '%s'\n", EXT_MOD_NAME, VEC_TYPE); 55 | Py_DECREF(pModule); 56 | PyMem_RawFree(program); 57 | return 1; 58 | } 59 | } else { 60 | PyErr_Print(); 61 | PyMem_RawFree(program); 62 | return 1; 63 | } 64 | 65 | if (Py_FinalizeEx() < 0) { 66 | exit(120); 67 | } 68 | 69 | PyMem_RawFree(program); 70 | return 0; 71 | } 72 | -------------------------------------------------------------------------------- /tests/size_test.c: -------------------------------------------------------------------------------- 1 | #define PY_SSIZE_T_CLEAN 2 | #include 3 | 4 | #define TARGET_SIZE 128 5 | 6 | static const char* EXT_MOD_NAME = "simd"; 7 | static const char* VEC_TYPE = "Vec"; 8 | static const char* SIZE_FUNC = "size"; 9 | 10 | static inline void print_and_dec(PyObject* obj, const char* label) 11 | { 12 | printf("REF '%s' = %ld\n", label, Py_REFCNT(obj)); 13 | Py_DECREF(obj); 14 | } 15 | 16 | int 17 | main(int argc, char *argv[]) 18 | { 19 | PyObject *pModule, *pVecType, *pNewed, *pSized, *pFName, *pArgs; 20 | PyObject *pInitArg; 21 | Py_Initialize(); 22 | PyObject * sys_path = PySys_GetObject("path"); 23 | PyList_Append(sys_path, PyUnicode_FromString(TESTING_BIN_PATH)); 24 | 25 | pModule = PyImport_ImportModule(EXT_MOD_NAME); 26 | if (pModule != NULL) { 27 | pVecType = PyObject_GetAttrString(pModule, VEC_TYPE); 28 | if (pVecType != NULL) { 29 | pArgs = PyTuple_New(1); 30 | pInitArg = PyLong_FromSize_t(TARGET_SIZE); 31 | if (pArgs == NULL || pInitArg == NULL) { 32 | Py_FatalError("Cannot initialize single argument list, something is really wrong"); 33 | } 34 | PyTuple_SetItem(pArgs, 0, pInitArg); 35 | pNewed = PyObject_CallObject(pVecType, pArgs); 36 | Py_DECREF(pArgs); 37 | if (pNewed != NULL) { 38 | pFName = PyUnicode_FromString(SIZE_FUNC); 39 | if (pFName != NULL) { 40 | pSized = PyObject_CallMethodObjArgs(pNewed, pFName, NULL); 41 | if (pSized != NULL && PyLong_Check(pSized)) { 42 | size_t oSize = PyLong_AsSize_t(pSized); 43 | if (oSize != (size_t)-1) { 44 | printf("Succesfully determined size of vec is %zu\n", oSize); 45 | print_and_dec(pSized, "size of vec"); 46 | print_and_dec(pFName, "size func name"); 47 | print_and_dec(pNewed, "Vec obj"); 48 | print_and_dec(pVecType, "Vec type"); 49 | print_and_dec(pModule, "simd mod"); 50 | if (oSize != TARGET_SIZE) { 51 | fprintf(stderr, "Expected size to be %zu\n", (size_t)TARGET_SIZE); 52 | return 1; 53 | } 54 | } else { 55 | PyErr_Print(); 56 | print_and_dec(pSized, "size of vec"); 57 | print_and_dec(pFName, "size func name"); 58 | print_and_dec(pNewed, "Vec obj"); 59 | print_and_dec(pVecType, "Vec type"); 60 | print_and_dec(pModule, "simd mod"); 61 | return 1; 62 | } 63 | } else { 64 | PyErr_Print(); 65 | print_and_dec(pFName, "size func name"); 66 | print_and_dec(pNewed, "Vec obj"); 67 | print_and_dec(pVecType, "Vec type"); 68 | print_and_dec(pModule, "simd mod"); 69 | return 1; 70 | } 71 | } else { 72 | PyErr_Print(); 73 | print_and_dec(pNewed, "Vec obj"); 74 | print_and_dec(pVecType, "Vec type"); 75 | print_and_dec(pModule, "simd mod"); 76 | return 1; 77 | } 78 | } else { 79 | PyErr_Print(); 80 | print_and_dec(pVecType, "Vec type"); 81 | print_and_dec(pModule, "simd mod"); 82 | return 1; 83 | } 84 | } else { 85 | PyErr_Print(); 86 | print_and_dec(pModule, "simd mod"); 87 | return 1; 88 | } 89 | } else { 90 | PyErr_Print(); 91 | return 1; 92 | } 93 | 94 | if (Py_FinalizeEx() < 0) { 95 | exit(120); 96 | } 97 | return 0; 98 | } 99 | 100 | #undef TARGET_SIZE --------------------------------------------------------------------------------