├── tests ├── gencl.sh ├── BUILDVC11.BAT ├── time_opencl_kernel.ocl ├── kat_opencl_kernel.ocl ├── kat_metal_kernel.metal ├── BUILDVC.BAT ├── kat_c.c ├── time_misc.h ├── rngNxW.h ├── ut_ReinterpretCtr.cpp ├── util_print.h ├── ut_uniform_reference.hpp ├── ut_ua.cpp ├── kat_dev_execute.h ├── kat_cuda.cu ├── kat.h ├── util_expandtpl.h ├── util_demangle.hpp ├── ut_ars.c ├── util_m128.h ├── kat_metal.m ├── util_metal.h ├── ut_gsl.c ├── ut_aes.cpp ├── time_boxmuller.cpp ├── kat_opencl.c ├── ut_M128.cpp ├── time_random123.h ├── time_initkeyctr.h ├── util_cuda.h └── time_serial.c ├── .gitignore ├── .travis.yml ├── docs └── Doxyfile ├── LICENSE ├── examples ├── gencl.sh ├── simplepp.cpp ├── example_seeds.h ├── pi_opencl_kernel.ocl ├── pi_gsl.c ├── simple.c ├── pi_metal_kernel.metal ├── pi_check.h ├── pi_capi.c ├── pi_cppapi.cpp ├── README ├── pi_aes.cpp ├── pi_opencl.c ├── pi_metal.m ├── pi_cuda.cu ├── pi_cudapp.cu ├── pi_microurng.cpp ├── GNUmakefile └── pi_uniform.cpp ├── include └── Random123 │ ├── features │ ├── fujitsufeatures.h │ ├── open64features.h │ ├── openclfeatures.h │ ├── clangfeatures.h │ ├── metalfeatures.h │ ├── nvccfeatures.h │ ├── sunprofeatures.h │ └── msvcfeatures.h │ ├── ReinterpretCtr.hpp │ ├── boxmuller.hpp │ └── MicroURNG.hpp └── GNUmakefile /tests/gencl.sh: -------------------------------------------------------------------------------- 1 | ../examples/gencl.sh -------------------------------------------------------------------------------- /tests/BUILDVC11.BAT: -------------------------------------------------------------------------------- 1 | rem /DR123_USE_CXX11=1 will not work because VS2012 is not fully C++11 compliant yet. 2 | setlocal 3 | set CFLAGS=/I..\include /W3 /Ox /EHs /nologo /favor:INTEL64 4 | set VCBAT="c:\Program Files (x86)\Microsoft Visual Studio 11.0\vc\vcvarsall.bat" 5 | call BUILDVC.bat %1 6 | endlocal 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.[oa] 3 | *.pyc 4 | *.metallib 5 | *.air 6 | /.sconsign.dblite 7 | /objs 8 | /docs/html 9 | /docs/main.md 10 | # make leaves a lot of junk in examples/ and tests/ 11 | # We really should create objects in a subdir! 12 | **/kat_c 13 | **/kat_cpp 14 | **/kat_metal 15 | **/pi_aes 16 | **/pi_capi 17 | **/pi_cppapi 18 | **/pi_gsl 19 | **/pi_microurng 20 | **/pi_uniform 21 | **/pi_metal 22 | **/pi_opencl 23 | **/simple 24 | **/simplepp 25 | **/time_boxmuller 26 | **/time_serial 27 | **/time_thread 28 | **/timers 29 | **/ut_Engine 30 | **/ut_M128 31 | **/ut_ReinterpretCtr 32 | **/ut_aes 33 | **/ut_ars 34 | **/ut_carray 35 | **/ut_features 36 | **/ut_gsl 37 | **/ut_ua 38 | **/ut_uniform 39 | **/ut_uniform_IEEEkat 40 | **/kat_cuda 41 | **/pi_cuda 42 | **/pi_cudapp 43 | **/pi_opencl 44 | **/time_boxmuller_cuda 45 | **/time_cuda 46 | **/*.i 47 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | version: ~> 1.0 3 | language: cpp 4 | os: linux 5 | dist: focal 6 | compiler: 7 | - gcc 8 | - clang 9 | 10 | before_install: 11 | - sudo apt-get update -y 12 | - sudo DEBIAN_FRONTEND=noninteractive apt-get install -y doxygen graphviz 13 | 14 | script: 15 | - make check 16 | - make html 17 | 18 | deploy: 19 | edge: true # opt into deployment v2 20 | provider: pages:git 21 | local_dir: docs/html 22 | token: 23 | secure: "p16xEv9Aeny8UkXVLjhL1mIT32PqikuJtidHEUIUE7fHqw7HVfBOLi++o1/FYupQD7E//c4oGTpyswycIjd8z7yGKcyiOgKtkgxc3Gigp+I8sUwFRVp6t21tGMjbrWQKWPQyOsCZv5fl+EwZNNGzzRT8SEQ70ylxotjD9ZOFT3lX3c0btuk7kxA00c4GtX3JWlR9o7kB0KV7Tm1VzdjUJ78tp9GENj1Y9qffLCJb5h+DbR1ESM3hhJMVU9ImzCfX8xohm0hhpbUqXaE6OUy5PPutYoEq97RjsdHy/efKY+jDqZwKNDQpDqwuQZ/+G/cSh+Kypqy7qMVnp4zmapI2VbYjdCHUtvjVr8j2LHL1lD9a66+dunEi9SZjTa7lqLY+3aLFGQSnrCtmd6UMaow++EXOKVTa0CnMU693AD1E+rOetB4JEWNsOsJYaz4yrHhUJMnT/1JYtGfFxOyojgn/eIYwdVCTmZwWf4PMuZ4IaYV/4wniRLmTlHa4E+P2ab3V0AgAdI+l9wEkO/MZBYmjUkHQ3tyO7INJXGBLocatjrvRZOc4qLLpNslCb8/36ZvODERSmlboRB0lGeA3Nn7sW604x9NgN39Gy9sXe8xGYX8UB50AHd7XiPfp6+2AW9uEa8YBQxWB8gwlzwLyWQt8gKPZfzdJOHvY0/feCHGxdrA=" 24 | on: 25 | branch: main 26 | condition: $CC = gcc 27 | -------------------------------------------------------------------------------- /docs/Doxyfile: -------------------------------------------------------------------------------- 1 | # generated with doxygen -g and edited 2 | # Doxyfile 1.8.11 3 | 4 | PROJECT_NAME = "Random123" 5 | STRIP_FROM_PATH = ../include .. 6 | STRIP_FROM_INC_PATH = ../include/ 7 | TAB_SIZE = 8 8 | EXTENSION_MAPPING = .h=C++ 9 | MARKDOWN_SUPPORT = NO 10 | AUTOLINK_SUPPORT = NO 11 | EXTRACT_ALL = YES 12 | EXTRACT_STATIC = YES 13 | INPUT = main.md cbrng.dox releasenotes.dox \ 14 | ../include/Random123 \ 15 | ../include/Random123/conventional \ 16 | ../include/Random123/features/sse.h \ 17 | ../include/Random123/features/compilerfeatures.h \ 18 | ../tests/README \ 19 | ../examples/README \ 20 | ../LICENSE 21 | 22 | #USE_MDFILE_AS_MAINPAGE = 23 | 24 | #HTML_HEADER = header.html 25 | HTML_TIMESTAMP = YES 26 | GENERATE_LATEX = NO 27 | MACRO_EXPANSION = YES 28 | 29 | PREDEFINED = \ 30 | "R123_STATIC_ASSERT(e,m)= " \ 31 | "R123_FORCE_INLINE(decl)= decl " \ 32 | "R123_STATIC_INLINE= static inline " \ 33 | "R123_CUDA_DEVICE= " \ 34 | "__cplusplus " \ 35 | "R123_USE_SSE= 1" \ 36 | "R123_USE_AES_NI= 1" \ 37 | "R123_USE_U01_DOUBLE= 1" \ 38 | "R123_USE_PHILOX_64BIT= 1" \ 39 | "R123_USE_64BIT= 1" \ 40 | "R123_USE_CXX11_STD_ARRAY= 1" 41 | EXPAND_AS_DEFINED = \ 42 | R123_ULONG_LONG \ 43 | R123_STATIC_INLINE \ 44 | R123_CUDA_DEVICE 45 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | /** @page LICENSE 2 | Copyright 2010-2012, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | -------------------------------------------------------------------------------- /tests/time_opencl_kernel.ocl: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __OPENCL_VERSION__ 33 | #define __OPENCL_VERSION__ 1 34 | #endif 35 | #include "time_random123.h" 36 | -------------------------------------------------------------------------------- /tests/kat_opencl_kernel.ocl: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "kat.h" 33 | 34 | #define KAT_KERNEL __kernel 35 | #define KAT_GLOBAL __global 36 | 37 | #include "kat_dev_execute.h" 38 | -------------------------------------------------------------------------------- /tests/kat_metal_kernel.metal: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "kat.h" 33 | 34 | #define KAT_KERNEL kernel 35 | #define KAT_GLOBAL device 36 | #define KAT_METAL_BUFFER0 [[ buffer(0) ]] 37 | 38 | #include "kat_dev_execute.h" 39 | -------------------------------------------------------------------------------- /tests/BUILDVC.BAT: -------------------------------------------------------------------------------- 1 | :: Call this with either x86 (for 32bit compile on a 32bit machine), 2 | :: amd64 (if you want to do a 64bit compile on a 64bit machine), 3 | :: or x86_amd64 (if you want to compile for amd64 on an x86) 4 | :: With no argument, will default to amd64 on Win64 and x86 otherwise. 5 | :: Call this with the argument "run" if you want to only run the 6 | :: previously compiled executables. 7 | @echo off 8 | setlocal 9 | if not "%1"=="run" goto :Default 10 | set CC=echo 11 | set CFLAGS= 12 | goto :Loop 13 | 14 | :Default 15 | if "%~1"=="" goto :Guess 16 | set NEWBUILDVC=%1 17 | goto :Next 18 | 19 | :Guess 20 | set RegQry=HKLM\Hardware\Description\System\CentralProcessor\0 21 | REG.exe Query %RegQry% > hwdesc.o 22 | FIND /i "x86" < hwdesc.o > hwcheck.o 23 | if %errorlevel% == 0 ( 24 | set NEWBUILDVC=x86 25 | ) else ( 26 | set NEWBUILDVC=amd64 27 | ) 28 | 29 | :Next 30 | if "%BUILDVC%"=="%NEWBUILDVC%" goto :Continue 31 | if NOT DEFINED VCBAT set VCBAT="c:\Program Files (x86)\Microsoft Visual Studio 10.0\vc\vcvarsall.bat" 32 | call %VCBAT% %NEWBUILDVC% 33 | if errorlevel 1 exit /b 1 34 | set BUILDVC=%NEWBUILDVC% 35 | 36 | :Continue 37 | :: /Zi for debug. /favor:INTEL64 is ignored for 32bit compiles. 38 | if NOT DEFINED CFLAGS set CFLAGS=/DR123_NO_SINCOS=1 /I..\include /W3 /Ox /EHs /nologo /favor:INTEL64 39 | set CC=cl 40 | echo Using %VCBAT% 41 | echo Building for %BUILDVC% with %CC% %CFLAGS% 42 | 43 | :Loop 44 | set BUILDFILES= ( kat_c.c kat_cpp.cpp pi_aes.cpp pi_capi.c pi_cppapi.cpp pi_microurng.cpp simple.c simplepp.cpp time_serial.c time_boxmuller.cpp timers.cpp ut_Engine.cpp ut_M128.cpp ut_ReinterpretCtr.cpp ut_aes.cpp ut_ars.c ut_carray.cpp ut_features.cpp ut_uniform.cpp ) 45 | FOR %%A IN %BUILDFILES% DO ( 46 | %CC% %CFLAGS% %%A 47 | if errorlevel 1 exit /b 1 48 | %%~nA 49 | if errorlevel 1 exit /b 1 ) 50 | endlocal 51 | -------------------------------------------------------------------------------- /tests/kat_c.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "kat_main.h" 33 | 34 | #define KAT_KERNEL 35 | #define KAT_GLOBAL 36 | #include "kat_dev_execute.h" 37 | 38 | void host_execute_tests(kat_instance *tests, unsigned ntests){ 39 | (void)ntests; /* unused */ 40 | dev_execute_tests(tests); 41 | } 42 | -------------------------------------------------------------------------------- /examples/gencl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Run the C preprocessor on an OpenCL kernel to generate a C string array 3 | # suitable for clCreateProgramWithSource. This allows us to create 4 | # standalone OpenCL programs that do not depend on paths to the source 5 | # tree (the programs will still run the OpenCL run-time compiler to 6 | # compile the kernel, but the kernel is a string within the program, with 7 | # no external include dependencies) 8 | # Mark Moraes, D. E. Shaw Research 9 | 10 | # indenting the cpp output makes errors from the OpenCL runtime compiler 11 | # much more understandable. User can override with whatever they want. 12 | # The classic BSD indent (yes, the one that lived in /usr/ucb/indent once) 13 | # defaults to -br, but recent GNU indent versions do not. Both appear to 14 | # accept -br, fortunately... (BSD indent does not accept -kr or -linux, alas) 15 | 16 | PATH=$PATH:/usr/bin 17 | export PATH 18 | if type indent > /dev/null 2>&1; then 19 | : ${GENCL_INDENT=indent} 20 | else 21 | : ${GENCL_INDENT=cat} 22 | fi 23 | 24 | # We rely on gsub in awk, which exists in everything except classic 25 | # old V7 awk (Solaris!). If we can find gawk or nawk, we prefer those. 26 | # http://www.shelldorado.com/articles/awkcompat.html 27 | for f in gawk nawk awk; do 28 | if type "$f" > /dev/null 2>&1; then 29 | : ${GENCL_AWK="$f"} 30 | break 31 | fi 32 | done 33 | [ ${GENCL_AWK:+set} ] || { echo "$0: could not find awk!">&2; exit 1; } 34 | 35 | usage="Usage: $0 inputoclfilename" 36 | case $# in 37 | 1) ;; 38 | *) echo "$usage" >&2; exit 1;; 39 | esac 40 | case "$1" in 41 | ''|-*) echo "Invalid or empty inputoclfilename: $1 42 | $usage" >&2; exit 1;; 43 | esac 44 | set -e 45 | ${CC-cc} -xc -E -P -nostdinc -D__OPENCL_VERSION__=1 $CPPFLAGS "$1" | 46 | ${GENCL_INDENT} | 47 | ${GENCL_AWK} 'BEGIN {print "\"\\n\\"} 48 | {gsub("\\", "\\\\", $0); gsub("\"", "\\\"", $0); print $0 "\\n\\"} 49 | END {print "\""}' 50 | -------------------------------------------------------------------------------- /tests/time_misc.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef TIME_MISC_H__ 33 | #define TIME_MISC_H__ 1 34 | /* Miscellaneous common definitions for time_*.c */ 35 | 36 | const char *progname; 37 | int verbose = 0; 38 | int debug = 0; 39 | int numtrials = 5; 40 | double sec_per_trial = 0.2; 41 | 42 | #define PREFIX "test_" 43 | 44 | #endif /* TIME_MISC_H__ */ 45 | -------------------------------------------------------------------------------- /tests/rngNxW.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | RNGNxW_TPL(philox, 2, 32) 34 | RNGNxW_TPL(philox, 4, 32) 35 | RNGNxW_TPL(threefry, 2, 32) 36 | RNGNxW_TPL(threefry, 4, 32) 37 | #if R123_USE_64BIT 38 | #if R123_USE_PHILOX_64BIT 39 | RNGNxW_TPL(philox, 2, 64) 40 | RNGNxW_TPL(philox, 4, 64) 41 | #endif 42 | RNGNxW_TPL(threefry, 2, 64) 43 | RNGNxW_TPL(threefry, 4, 64) 44 | #endif 45 | #if R123_USE_AES_NI 46 | RNGNxW_TPL(ars, 4, 32) 47 | RNGNxW_TPL(aesni, 4, 32) 48 | #endif 49 | -------------------------------------------------------------------------------- /include/Random123/features/fujitsufeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Note: Minimum/Initial version derived from openclfeatures.h to work 3 | with fujitsu compiler (_FCC). 4 | */ 5 | 6 | #ifndef __fujitsufeatures_dot_hpp 7 | #define __fujitsufeatures_dot_hpp 8 | 9 | #ifndef R123_STATIC_INLINE 10 | #define R123_STATIC_INLINE static __inline 11 | #endif 12 | 13 | #ifndef R123_FORCE_INLINE 14 | #define R123_FORCE_INLINE(decl) decl 15 | #endif 16 | 17 | #ifndef R123_CUDA_DEVICE 18 | #define R123_CUDA_DEVICE 19 | #endif 20 | 21 | #ifndef R123_ASSERT 22 | #include 23 | #define R123_ASSERT(x) assert(x) 24 | #endif 25 | 26 | #ifndef R123_BUILTIN_EXPECT 27 | #define R123_BUILTIN_EXPECT(expr,likely) expr 28 | #endif 29 | 30 | #ifndef R123_USE_WMMINTRIN_H 31 | #define R123_USE_WMMINTRIN_H 0 32 | #endif 33 | 34 | #ifndef R123_USE_INTRIN_H 35 | #define R123_USE_INTRIN_H 0 36 | #endif 37 | 38 | #ifndef R123_USE_MULHILO32_ASM 39 | #define R123_USE_MULHILO32_ASM 0 40 | #endif 41 | 42 | #ifndef R123_USE_MULHILO64_ASM 43 | #define R123_USE_MULHILO64_ASM 0 44 | #endif 45 | 46 | #ifndef R123_USE_MULHILO64_MSVC_INTRIN 47 | #define R123_USE_MULHILO64_MSVC_INTRIN 0 48 | #endif 49 | 50 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN 51 | #define R123_USE_MULHILO64_CUDA_INTRIN 0 52 | #endif 53 | 54 | #ifndef R123_USE_MULHILO64_OPENCL_INTRIN 55 | #define R123_USE_MULHILO64_OPENCL_INTRIN 0 56 | #endif 57 | 58 | #ifndef R123_USE_MULHILO64_MULHI_INTRIN 59 | #if (defined(__powerpc64__)) 60 | #define R123_USE_MULHILO64_MULHI_INTRIN 1 61 | #else 62 | #define R123_USE_MULHILO64_MULHI_INTRIN 0 63 | #endif 64 | #endif 65 | 66 | #ifndef R123_MULHILO64_MULHI_INTRIN 67 | #define R123_MULHILO64_MULHI_INTRIN __mulhdu 68 | #endif 69 | 70 | #ifndef R123_USE_MULHILO32_MULHI_INTRIN 71 | #define R123_USE_MULHILO32_MULHI_INTRIN 0 72 | #endif 73 | 74 | #ifndef R123_MULHILO32_MULHI_INTRIN 75 | #define R123_MULHILO32_MULHI_INTRIN __mulhwu 76 | #endif 77 | 78 | #ifndef __STDC_CONSTANT_MACROS 79 | #define __STDC_CONSTANT_MACROS 80 | #endif 81 | #include 82 | #ifndef UINT64_C 83 | #error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include 84 | #endif 85 | 86 | #endif 87 | -------------------------------------------------------------------------------- /GNUmakefile: -------------------------------------------------------------------------------- 1 | # This Makefile is EXTREMELY simple. Let's keep it that way. 2 | 3 | all: 4 | @echo Random123 is a header-only package. There is nothing to build. 5 | @echo 'However, "make install" understands prefix, DESTDIR, etc.,' 6 | @echo 'and "make check" understands CFLAGS, CXXFLAGS, LDFLAGS, etc.' 7 | @echo '"make html" will run doxygen to create docs/html' 8 | .PHONY: all 9 | 10 | check: 11 | cd tests && $(MAKE) runcore 12 | .PHONY: check 13 | 14 | prefix?=/usr/local 15 | includedir?=$(prefix)/include 16 | datarootdir?=$(prefix)/share 17 | datadir?=$(datarootdir) 18 | docdir?=$(datarootdir)/doc/Random123 19 | export prefix includedir datarootdir datadir docdir 20 | 21 | install: install-html install-include install-examples install-tests 22 | .PHONY: install 23 | 24 | # recursively copy include/Random123 to $(includedir) 25 | install-include: 26 | mkdir -p $(DESTDIR)$(includedir) 27 | cp -dr include/Random123 $(DESTDIR)$(includedir) 28 | .PHONY: install-include 29 | 30 | # docs/main.md is the same as README.md, but it has a @mainpage 31 | # directive, and the @ref directives are *not* commented out. 32 | docs/main.md : README.md 33 | echo @mainpage Random123: a Library of Counter-Based Random Number Generators > docs/main.md 34 | echo '' >> docs/main.md 35 | sed -e 's//\1/g' README.md >> docs/main.md 36 | 37 | # the html target removes and then recreates docs/html. 38 | html: docs/main.md 39 | -[ -d docs/html ] && rm -rf docs/html 40 | cd docs && doxygen 41 | .PHONY: html 42 | 43 | install-html: html 44 | mkdir -p $(DESTDIR)$(docdir) 45 | -[ -d $(DESTDIR)$(docdir)/html ] && rm -rf $(DESTDIR)$(docdir)/html 46 | cp -a docs/html $(DESTDIR)$(docdir) 47 | .PHONY: install-html 48 | 49 | # install-examples and install-tests copy files to 50 | # $(DESTDIR)$(docdir). Since 'make check' (or other devel activity) 51 | # might "pollute" the examples/ and tests/ directories, the files to 52 | # be copied are enumerated in {examples,tests}/GNUmakefile. 53 | install-examples: 54 | cd examples; $(MAKE) install 55 | .PHONY: install-examples 56 | 57 | install-tests: 58 | cd tests; $(MAKE) install 59 | .PHONY: install-tests 60 | 61 | -------------------------------------------------------------------------------- /tests/ut_ReinterpretCtr.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include 35 | #include "util_demangle.hpp" 36 | 37 | using namespace r123; 38 | int main(int, char **){ 39 | r123array4x32 c = {{}}; 40 | r123array4x32 r; 41 | 42 | #if R123_USE_64BIT 43 | ReinterpretCtr p; 44 | Threefry2x64::key_type kp = {{}}; 45 | r = p(c, kp); 46 | std::cout << demangle(p) << ": " << r << "\n"; 47 | #else 48 | std::cout << "Can't test ReinterpretCtr with R123_USE_64BIT = 0\n"; 49 | #endif 50 | return 0; 51 | } 52 | -------------------------------------------------------------------------------- /include/Random123/features/open64features.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __open64features_dot_hpp 33 | #define __open64features_dot_hpp 34 | 35 | /* The gcc features are mostly right. We just override a few and then include gccfeatures.h */ 36 | 37 | /* Open64 4.2.3 and 4.2.4 accept the __uint128_t code without complaint 38 | but produce incorrect code for 64-bit philox. The MULHILO64_ASM 39 | seems to work fine */ 40 | #ifndef R123_USE_GNU_UINT128 41 | #define R123_USE_GNU_UINT128 0 42 | #endif 43 | 44 | #ifndef R123_USE_MULHILO64_ASM 45 | #define R123_USE_MULHILO64_ASM 1 46 | #endif 47 | 48 | #include "gccfeatures.h" 49 | 50 | #endif 51 | -------------------------------------------------------------------------------- /examples/simplepp.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include "example_seeds.h" 35 | 36 | int main(int, char **){ 37 | typedef r123::Threefry2x64 CBRNG; 38 | CBRNG::key_type::value_type seed = example_seed_u64(EXAMPLE_SEED1_U64); // example of user-settable seed 39 | CBRNG::key_type key = {{seed}}; 40 | CBRNG::ctr_type ctr = {{0,0}}; 41 | CBRNG g; 42 | std::cout << std::hex << "The first few 2x64 randoms from Threefry2x64 with hex key " << key << "\n"; 43 | for(int i=0; i<10; ++i){ 44 | ctr[0] = i; 45 | CBRNG::ctr_type rand = g(ctr, key); 46 | std::cout << "ctr: " << ctr << " Threefry2x64<>(ctr, key): " << rand << "\n"; 47 | } 48 | return 0; 49 | } 50 | -------------------------------------------------------------------------------- /tests/util_print.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef UTIL_PRINT_H__ 33 | #define UTIL_PRINT_H__ 34 | 35 | #include 36 | 37 | extern int verbose; 38 | 39 | #define TEST_TPL(NAME, N, W, R) \ 40 | void printline_##NAME##N##x##W##_##R(NAME##N##x##W##_ukey_t ukey, NAME##N##x##W##_ctr_t ictr, NAME##N##x##W##_ctr_t *octrs, size_t nctr) \ 41 | { \ 42 | size_t i; \ 43 | for (i = 0; i < nctr; i++) { \ 44 | if (i > 0) { \ 45 | printf(" [%lu]", (unsigned long)i); \ 46 | PRINTARRAY(octrs[i], stdout); \ 47 | putc('\n', stdout); \ 48 | fflush(stdout); \ 49 | } else { \ 50 | PRINTLINE(NAME, N, W, R, ictr, ukey, octrs[0], stdout); \ 51 | } \ 52 | if (verbose < 2) break; \ 53 | } \ 54 | } 55 | 56 | #include "util_expandtpl.h" 57 | 58 | #endif /* UTIL_PRINT_H__ */ 59 | -------------------------------------------------------------------------------- /tests/ut_uniform_reference.hpp: -------------------------------------------------------------------------------- 1 | RefHist("u01 Threefry4x32 float", " 0 0 0 0 0 0 0 0 0 0 0 0 0 301 330 326 320 295 291 298 287 305 307 310 316 314"); 2 | RefHist("u01 Threefry4x32 double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 301 330 326 320 295 291 298 287 305 307 310 316 314"); 3 | RefHist("u01 Threefry4x32 long double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 301 330 326 320 295 291 298 287 305 307 310 316 314"); 4 | RefHist("u01 Threefry4x64 float", " 0 0 0 0 0 0 0 0 0 0 0 0 0 308 295 322 300 316 291 311 289 346 297 310 340 275"); 5 | RefHist("u01 Threefry4x64 double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 308 295 322 300 316 291 311 289 346 297 310 340 275"); 6 | RefHist("u01 Threefry4x64 long double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 308 295 322 300 316 291 311 289 346 297 310 340 275"); 7 | RefHist("uneg11 Threefry4x32 float", " 156 139 148 146 159 148 159 168 142 160 156 161 153 143 158 150 180 174 152 163 157 129 166 151 140 142"); 8 | RefHist("uneg11 Threefry4x32 double", " 156 139 148 146 159 148 159 168 142 160 156 161 153 143 158 150 180 174 152 163 157 129 166 151 140 142"); 9 | RefHist("uneg11 Threefry4x32 long double", " 156 139 148 146 159 148 159 168 142 160 156 161 153 143 158 150 180 174 152 163 157 129 166 151 140 142"); 10 | RefHist("uneg11 Threefry4x64 float", " 159 141 148 184 162 142 155 137 173 187 153 140 135 164 144 146 149 151 171 152 148 137 179 146 145 152"); 11 | RefHist("uneg11 Threefry4x64 double", " 159 141 148 184 162 142 155 137 173 187 153 140 135 164 144 146 149 151 171 152 148 137 179 146 145 152"); 12 | RefHist("uneg11 Threefry4x64 long double", " 159 141 148 184 162 142 155 137 173 187 153 140 135 164 144 146 149 151 171 152 148 137 179 146 145 152"); 13 | RefHist("u01fixedpt Threefry4x32 float", " 0 0 0 0 0 0 0 0 0 0 0 0 0 301 330 326 320 295 291 298 287 305 307 310 316 314"); 14 | RefHist("u01fixedpt Threefry4x32 double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 301 330 326 320 295 291 298 287 305 307 310 316 314"); 15 | RefHist("u01fixedpt Threefry4x32 long double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 301 330 326 320 295 291 298 287 305 307 310 316 314"); 16 | RefHist("u01fixedpt Threefry4x64 float", " 0 0 0 0 0 0 0 0 0 0 0 0 0 308 295 322 300 316 291 311 289 346 297 310 340 275"); 17 | RefHist("u01fixedpt Threefry4x64 double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 308 295 322 300 316 291 311 289 346 297 310 340 275"); 18 | RefHist("u01fixedpt Threefry4x64 long double", " 0 0 0 0 0 0 0 0 0 0 0 0 0 308 295 322 300 316 291 311 289 346 297 310 340 275"); 19 | // ./ut_uniform: SUCCESS 20 | -------------------------------------------------------------------------------- /tests/ut_ua.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2013, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | #if __cplusplus<201103L 34 | #include 35 | int main(int, char**){ 36 | std::cout << "ua.hpp requires C++11. No tests performed\n"; 37 | return 0; 38 | } 39 | #else 40 | 41 | #include 42 | #include 43 | #include 44 | 45 | using namespace r123; 46 | int main(int, char **){ 47 | Threefry4x32 rng; 48 | Threefry4x32::ctr_type c = {{1, 2, 3, 4}}; 49 | Threefry4x32::ukey_type uk = {{5, 6, 7, 8}}; 50 | Threefry4x32::key_type k = uk; 51 | auto a = u01all(rng(c, k)); // returns std::array 52 | for(auto e : a){ 53 | std::cout << e << "\n"; 54 | } 55 | c.incr(); 56 | auto b = u01all(rng(c, k)); 57 | for(auto e : b){ 58 | std::cout << e << "\n"; 59 | } 60 | 61 | return 0; 62 | } 63 | #endif 64 | -------------------------------------------------------------------------------- /tests/kat_dev_execute.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "kat.h" 33 | #ifndef KAT_METAL_BUFFER0 34 | #define KAT_METAL_BUFFER0 /* non-empty only when compiling Metal kernel */ 35 | #endif 36 | KAT_KERNEL void dev_execute_tests(KAT_GLOBAL kat_instance *tests KAT_METAL_BUFFER0){ 37 | size_t i; 38 | int done = 0; 39 | for(i=0; !done; ++i){ 40 | KAT_GLOBAL kat_instance *ti = &tests[i]; 41 | switch(tests[i].method){ 42 | //case philox2x32_e: ti->u.philox2x32_data.computed = philox2x32_R(ti->rounds, ti->u.philox2x32_data.ctr, ti->u.philox2x32_data.key); 43 | #define RNGNxW_TPL(base, N, W) case base##N##x##W##_e: ti->u.base##N##x##W##_data.computed = base##N##x##W##_R(ti->nrounds, ti->u.base##N##x##W##_data.ctr, base##N##x##W##keyinit(ti->u.base##N##x##W##_data.ukey)); break; 44 | #include "rngNxW.h" 45 | #undef RNGNxW_TPL 46 | case last: 47 | done = 1; 48 | break; 49 | } 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/example_seeds.h: -------------------------------------------------------------------------------- 1 | #ifndef EXAMPLE_SEEDS_H__ 2 | #define EXAMPLE_SEEDS_H__ 1 3 | 4 | /* 5 | * This entire file is overkill to allow seeds to be set in Random123 6 | * example and test programs via a R123EXAMPLE_ENVCONF_SEED environment 7 | * variable, mainly to illustrate and test how one might use a user-set 8 | * seed to produce different random streams for different runs. 9 | * None of this code is needed for the correct functioning or 10 | * use of the Random123 library. 11 | */ 12 | 13 | #include // for strtoul 14 | #include // for ULONG_MAX 15 | #include // for errno 16 | #include // for strerror 17 | #include // for stderr 18 | 19 | /* 20 | * The following arbitrary values for sample seeds (used to 21 | * initialize keys and counters in the examples) have no 22 | * particular meaning. They could equally easily all be 0. 23 | */ 24 | #define EXAMPLE_SEED1_U32 0x11111111U 25 | #define EXAMPLE_SEED2_U32 0x22222222U 26 | #define EXAMPLE_SEED3_U32 0x33333333U 27 | #define EXAMPLE_SEED4_U32 0x44444444U 28 | 29 | #define EXAMPLE_SEED5_U32 0xdeadbeefU 30 | #define EXAMPLE_SEED6_U32 0xbeadcafeU 31 | #define EXAMPLE_SEED7_U32 0x12345678U 32 | #define EXAMPLE_SEED8_U32 0x90abcdefU 33 | #define EXAMPLE_SEED9_U32 0xdecafbadU 34 | 35 | #if R123_USE_64BIT 36 | #define EXAMPLE_SEED1_U64 R123_64BIT(0xdeadbeef12345678) 37 | #define EXAMPLE_SEED2_U64 R123_64BIT(0xdecafbadbeadfeed) 38 | #endif 39 | 40 | static inline unsigned long example_seed_u64(uint64_t defaultseed) { 41 | const char *e = "R123EXAMPLE_ENVCONF_SEED"; 42 | const char *cp = getenv(e); 43 | unsigned long u; 44 | char *ep; 45 | errno = 0; 46 | if (cp) { 47 | u = strtoul(cp, &ep, 0); 48 | if (u == ULONG_MAX && errno != 0) { 49 | fprintf(stderr, "strtoul failed to convert environment variable %s=\"%s\" to unsigned long: %s\n", 50 | e, cp, strerror(errno)); 51 | exit(1); 52 | } else if (*ep != '\0') { 53 | fprintf(stderr, "strtoul failed to fully convert environment variable %s=\"%s\" to unsigned long, got 0x%lu\n", 54 | e, cp, u); 55 | exit(1); 56 | } 57 | } else { 58 | u = defaultseed; 59 | } 60 | return u; 61 | } 62 | 63 | static inline uint32_t example_seed_u32(uint32_t defaultseed) { 64 | uint64_t u64 = example_seed_u64(defaultseed); 65 | if (u64 > 0xFFFFFFFFUL /* UINT32_MAX, which clang29 does not have, sigh */) { 66 | fprintf(stderr, "Warning: truncating seed 0x%lu to uint32_t\n", (unsigned long)u64); 67 | } 68 | return (uint32_t)u64; 69 | } 70 | 71 | 72 | #endif /* EXAMPLE_SEEDS_H__ */ 73 | -------------------------------------------------------------------------------- /tests/kat_cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "util_cuda.h" 33 | #include "kat_main.h" 34 | 35 | #define KAT_KERNEL __global__ 36 | #define KAT_GLOBAL 37 | #include "kat_dev_execute.h" 38 | 39 | void host_execute_tests(kat_instance *tests_host, unsigned ntests){ 40 | CUDAInfo *infop; 41 | kat_instance *tests_dev; 42 | size_t tests_sz; 43 | 44 | infop = cuda_init(NULL); 45 | 46 | tests_sz = sizeof(tests_host[0]) * (ntests+1); // +1 for sentinel test with method==last 47 | CHECKCALL(cudaMalloc(&tests_dev, tests_sz)); 48 | CHECKCALL(cudaMemcpy(tests_dev, tests_host, tests_sz, cudaMemcpyHostToDevice)); 49 | 50 | printf("starting %u tests on 1 blocks with 1 threads/block\n", ntests); 51 | fflush(stdout); 52 | 53 | // TO DO: call this with parallelism, <<blocks_per_grid, infop->threads_per_block>>> 54 | // and then insure that each of the threads got the same result. 55 | dev_execute_tests<<<1, 1>>>(tests_dev); 56 | 57 | CHECKCALL(cudaDeviceSynchronize()); 58 | CHECKCALL(cudaMemcpy(tests_host, tests_dev, tests_sz, cudaMemcpyDeviceToHost)); 59 | CHECKCALL(cudaFree(tests_dev)); 60 | cuda_done(infop); 61 | } 62 | 63 | -------------------------------------------------------------------------------- /tests/kat.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __katdoth__ 33 | #define __katdoth__ 34 | 35 | #include 36 | #include 37 | #include 38 | #include 39 | 40 | enum method_e{ 41 | #define RNGNxW_TPL(base, N, W) base##N##x##W##_e, 42 | #include "rngNxW.h" 43 | #undef RNGNxW_TPL 44 | last 45 | }; 46 | 47 | #define RNGNxW_TPL(base, N, W) \ 48 | typedef struct { \ 49 | base##N##x##W##_ctr_t ctr; \ 50 | base##N##x##W##_ukey_t ukey; \ 51 | base##N##x##W##_ctr_t expected; \ 52 | base##N##x##W##_ctr_t computed; \ 53 | } base##N##x##W##_kat; 54 | #include "rngNxW.h" 55 | #undef RNGNxW_TPL 56 | 57 | typedef struct{ 58 | enum method_e method; 59 | unsigned nrounds; 60 | union{ 61 | #define RNGNxW_TPL(base, N, W) base##N##x##W##_kat base##N##x##W##_data; 62 | #include "rngNxW.h" 63 | #undef RNGNxW_TPL 64 | /* Sigh... For those platforms that lack uint64_t, carve 65 | out 128 bytes for the counter, key, expected, and computed. */ 66 | char justbytes[128]; 67 | }u; 68 | } kat_instance; 69 | 70 | #endif 71 | -------------------------------------------------------------------------------- /examples/pi_opencl_kernel.ocl: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | /* 33 | * This file is the OpenCL kernel. It gets preprocessed, munged 34 | * into a C string declaration and included in pi_opencl, so that 35 | * running the compiled pi_opencl does not depend on any include 36 | * files, paths etc. 37 | */ 38 | 39 | #include 40 | 41 | /* 42 | * counthits generates n x,y points and returns hits[tid] with 43 | * the count of number of those points within the unit circle on 44 | * each thread. 45 | */ 46 | __kernel void counthits(unsigned n, unsigned useed, __global uint2 *hitsp) { 47 | unsigned tid = get_global_id(0); 48 | unsigned hits = 0, tries = 0; 49 | threefry4x32_key_t k = {{tid, useed}}; 50 | threefry4x32_ctr_t c = {{}}; // start counter from 0 51 | while (tries < n) { 52 | union { 53 | threefry4x32_ctr_t c; 54 | int4 i; 55 | } u; 56 | c.v[0]++; 57 | u.c = threefry4x32(c, k); 58 | long x1 = u.i.x, y1 = u.i.y; 59 | long x2 = u.i.z, y2 = u.i.w; 60 | if ((x1*x1 + y1*y1) < (1L<<62)) { 61 | hits++; 62 | } 63 | tries++; 64 | if ((x2*x2 + y2*y2) < (1L<<62)) { 65 | hits++; 66 | } 67 | tries++; 68 | } 69 | hitsp[tid].x = hits; 70 | hitsp[tid].y = tries; 71 | } 72 | -------------------------------------------------------------------------------- /tests/util_expandtpl.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef TEST_TPL 33 | #error "TEST_TPL not defined before including util_expandtpl.h" 34 | #else 35 | /* 36 | * This is included by various files after defining TEST_TPL to 37 | * expand TEST_TPL for each of the RNGs we want to test. 38 | * TEST_TPL args are the name of the RNG, N, W, and R, 39 | * N being the number of words, W being the wordsize in bits, 40 | * and R being the number of rounds. 41 | */ 42 | 43 | #if TRY_PHILOX2X32 44 | TEST_TPL(philox, 2, 32, 7) 45 | TEST_TPL(philox, 2, 32, 10) 46 | #endif 47 | TEST_TPL(philox, 4, 32, 7) 48 | TEST_TPL(philox, 4, 32, 10) 49 | #if R123_USE_PHILOX_64BIT 50 | TEST_TPL(philox, 2, 64, 6) 51 | TEST_TPL(philox, 2, 64, 10) 52 | TEST_TPL(philox, 4, 64, 7) 53 | TEST_TPL(philox, 4, 64, 10) 54 | #endif 55 | #if R123_USE_64BIT 56 | TEST_TPL(threefry, 2, 64, 13) 57 | TEST_TPL(threefry, 2, 64, 20) 58 | TEST_TPL(threefry, 4, 64, 12) 59 | TEST_TPL(threefry, 4, 64, 20) 60 | TEST_TPL(threefry, 4, 64, 72) 61 | #endif 62 | TEST_TPL(threefry, 4, 32, 12) 63 | TEST_TPL(threefry, 4, 32, 20) 64 | 65 | #if R123_USE_AES_NI 66 | TEST_TPL(ars, 4, 32, 5) 67 | TEST_TPL(ars, 4, 32, 7) 68 | TEST_TPL(aesni, 4, 32, 10) 69 | #endif 70 | 71 | #undef TEST_TPL 72 | #endif /* TEST_TPL */ 73 | -------------------------------------------------------------------------------- /examples/pi_gsl.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include "Random123/philox.h" 35 | #include "Random123/threefry.h" 36 | #include "Random123/gsl_microrng.h" 37 | 38 | /* Compute pi, using the gsl_ran_flat distribution with 39 | an underlying threefry4x64 counter-based rng (cbrng). 40 | We can call cbrng 8 times between calls to cbrng_reset */ 41 | 42 | GSL_MICRORNG(cbrng, threefry4x64); /* creates gsl_rng_cbrng */ 43 | 44 | #include "pi_check.h" 45 | 46 | int main(int argc, char **argv){ 47 | unsigned long hits = 0, tries = 0; 48 | gsl_rng *r; 49 | (void)argc; (void)argv; /* unused */ 50 | 51 | threefry4x64_ctr_t c = {{0}}; 52 | threefry4x64_key_t k = {{0}}; 53 | r = gsl_rng_alloc(gsl_rng_cbrng); 54 | printf("%lu uniforms from %s\n", NTRIES, gsl_rng_name(r)); 55 | while (tries < NTRIES) { 56 | double x, y; 57 | c.v[0]++; /* increment the counter */ 58 | cbrng_reset(r, c, k); /* reset the rng to the new counter */ 59 | x = gsl_ran_flat (r, -1.0, 1.0); 60 | y = gsl_ran_flat (r, -1.0, 1.0); 61 | if( x*x + y*y < 1.0 ) 62 | hits++; 63 | tries++; 64 | } 65 | gsl_rng_free (r); 66 | return pi_check(hits, tries); 67 | } 68 | -------------------------------------------------------------------------------- /examples/simple.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include "example_seeds.h" 35 | 36 | int main(int argc, char **argv){ 37 | int i; 38 | uint32_t seed = example_seed_u32(EXAMPLE_SEED1_U32); // example of user-settable seed 39 | 40 | /* while this example starts the counter from 0 and then increments by 0, 41 | this could start anywhere and increment by any stride or pattern that 42 | makes sense for the application as long as it produces a non-repeating stream */ 43 | threefry4x32_ctr_t ctr = {{0,0,0,0}}; 44 | /* we illustrate one user-specified seed and one constant as the key */ 45 | threefry4x32_key_t key = {{seed, EXAMPLE_SEED2_U32,0,0}}; 46 | (void)argc; (void)argv; /* unused */ 47 | printf( "The first few randoms with key 0x%llx 0x%llx\n", 48 | (unsigned long long)key.v[0], (unsigned long long)key.v[1]); 49 | for(i=0; i<10; ++i){ 50 | ctr.v[0] = i; 51 | { 52 | threefry4x32_ctr_t rand = threefry4x32(ctr, key); 53 | printf("ctr: %llx %llx threefry4x32(20, ctr, key): %llx %llx\n", 54 | (unsigned long long)ctr.v[0], (unsigned long long)ctr.v[1], 55 | (unsigned long long)rand.v[0], (unsigned long long)rand.v[1]); 56 | } 57 | } 58 | return 0; 59 | } 60 | -------------------------------------------------------------------------------- /examples/pi_metal_kernel.metal: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | /* 33 | * This file is the Metal kernel. 34 | * 35 | * Written by Tom Schoonjans 36 | */ 37 | 38 | #include 39 | 40 | /* 41 | * counthits generates n x,y points and returns hits[tid] with 42 | * the count of number of those points within the unit circle on 43 | * each thread. 44 | */ 45 | kernel void counthits(const device unsigned &n [[ buffer(0) ]], 46 | device uint *hitsp [[ buffer(1) ]], 47 | device uint *triesp [[ buffer(2) ]], 48 | uint tid [[thread_position_in_grid]]) { 49 | unsigned hits = 0, tries = 0; 50 | threefry4x32_key_t k = {{tid, 0xdecafbad, 0xfacebead, 0x12345678}}; 51 | threefry4x32_ctr_t c = {{0, 0xf00dcafe, 0xdeadbeef, 0xbeeff00d}}; 52 | const float uint_max_fl = (float) UINT_MAX; 53 | while (tries < n) { 54 | union { 55 | threefry4x32_ctr_t c; 56 | uint4 i; 57 | } u; 58 | c.v[0]++; 59 | u.c = threefry4x32(c, k); 60 | float x1 = ((float) u.i.x) / uint_max_fl, y1 = ((float) u.i.y) / uint_max_fl; 61 | float x2 = ((float) u.i.z) / uint_max_fl, y2 = ((float) u.i.w) / uint_max_fl; 62 | if ((x1*x1 + y1*y1) < (1.0)) { 63 | hits++; 64 | } 65 | tries++; 66 | if ((x2*x2 + y2*y2) < (1.0)) { 67 | hits++; 68 | } 69 | tries++; 70 | } 71 | hitsp[tid] = hits; 72 | triesp[tid] = tries; 73 | } 74 | -------------------------------------------------------------------------------- /tests/util_demangle.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef demangle_dot_hpp_ 33 | #include 34 | 35 | // Every compiler has a demangling library *somewhere*. Unfortunately, they're 36 | // all different... 37 | 38 | #ifdef __GNUC__ 39 | // Clang defines __GNUC__, but clang3.1 with -stdlib=libc++ can't 40 | // find a even though it *can* find the symbols at link 41 | // time. I suspect this is a bug/oversight in the installation 42 | // process (which, in June 2012 is still pretty fluid for libc++), so 43 | // it might be fixed in the future. On the other hand, the API in 44 | // cxxabi.h is locked down pretty tightly, so writing out an explicit 45 | // extern declaration is pretty safe, and avoids a rats nest of 46 | // ifdefs. It is tempting to use clang's __has_include(), 47 | // but it feels like more #ifdefs with no obvious upside. 48 | // 49 | // #include 50 | extern "C"{ 51 | char* 52 | __cxa_demangle(const char* __mangled_name, char* __output_buffer, 53 | size_t* __length, int* __status); 54 | } 55 | #endif 56 | #include 57 | 58 | template 59 | std::string demangle(const T& ignored){ 60 | #ifdef __GNUC__ 61 | int status; 62 | char *realname = __cxa_demangle(typeid(ignored).name(), 0, 0, &status); 63 | std::string ret; 64 | if(status!=0 || realname==0) 65 | ret = typeid(ignored).name(); 66 | else 67 | ret = realname; 68 | free(realname); 69 | return ret; 70 | #else 71 | return typeid(ignored).name(); 72 | #endif 73 | } 74 | 75 | #endif 76 | -------------------------------------------------------------------------------- /include/Random123/features/openclfeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __openclfeatures_dot_hpp 33 | #define __openclfeatures_dot_hpp 34 | 35 | #ifndef R123_STATIC_INLINE 36 | #define R123_STATIC_INLINE inline 37 | #endif 38 | 39 | #ifndef R123_FORCE_INLINE 40 | #define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) 41 | #endif 42 | 43 | #ifndef R123_CUDA_DEVICE 44 | #define R123_CUDA_DEVICE 45 | #endif 46 | 47 | #ifndef R123_ASSERT 48 | #define R123_ASSERT(x) 49 | #endif 50 | 51 | #ifndef R123_BUILTIN_EXPECT 52 | #define R123_BUILTIN_EXPECT(expr,likely) expr 53 | #endif 54 | 55 | #ifndef R123_USE_GNU_UINT128 56 | #define R123_USE_GNU_UINT128 0 57 | #endif 58 | 59 | #ifndef R123_USE_MULHILO64_ASM 60 | #define R123_USE_MULHILO64_ASM 0 61 | #endif 62 | 63 | #ifndef R123_USE_MULHILO64_MSVC_INTRIN 64 | #define R123_USE_MULHILO64_MSVC_INTRIN 0 65 | #endif 66 | 67 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN 68 | #define R123_USE_MULHILO64_CUDA_INTRIN 0 69 | #endif 70 | 71 | #ifndef R123_USE_MULHILO64_OPENCL_INTRIN 72 | #define R123_USE_MULHILO64_OPENCL_INTRIN 1 73 | #endif 74 | 75 | #ifndef R123_USE_AES_NI 76 | #define R123_USE_AES_NI 0 77 | #endif 78 | 79 | // XXX ATI APP SDK 2.4 clBuildProgram SEGVs if one uses uint64_t instead of 80 | // ulong to mul_hi. And gets lots of complaints from stdint.h 81 | // on some machines. 82 | // But these typedefs mean we cannot include stdint.h with 83 | // these headers? Do we need R123_64T, R123_32T, R123_8T? 84 | typedef ulong uint64_t; 85 | typedef uint uint32_t; 86 | typedef uchar uint8_t; 87 | #define UINT64_C(x) ((ulong)(x##UL)) 88 | 89 | #endif 90 | -------------------------------------------------------------------------------- /examples/pi_check.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef PI_CHECK_H__ 33 | #define PI_CHECK_H__ 1 34 | 35 | #include 36 | 37 | const unsigned long NTRIES = 10000000UL; 38 | 39 | /* XX Cannot make this static, is included in some files that only use it 40 | under ifdef-conditionally, and we do not want to ifdef this to match. */ 41 | int pi_check(unsigned long hits, unsigned long tries) 42 | { 43 | const double PI = 3.14159265358979323846; 44 | double ourpi, mean, var, delta, chisq; 45 | printf("%lu out of %lu darts thrown at a square board hit the inscribed circle\n", 46 | hits, tries); 47 | ourpi = 4.*hits/tries; 48 | printf("pi is approximately %.8g (diff = %.2g %%)\n", ourpi, (ourpi - PI)*100./PI); 49 | mean = tries*(PI/4.); 50 | var = tries * (PI/4.)*(1. - (PI/4.)); 51 | delta = hits - mean; 52 | chisq = delta*delta/var; 53 | /* Sigh. Jump through hoops so we don't want to link with -lm for sqrt */ 54 | if( chisq < 1. ) 55 | printf("OK, # of hits is less than one 'sigma' away from expectation\n(chisquared = %.2g)\n", chisq); 56 | else if(chisq < 4.) 57 | printf("OK, # of hits is between one and two 'sigma' away from expectation\n(chisquared = %.2g)\n", chisq); 58 | else if(chisq < 9.) 59 | printf("Maybe OK, # of hits is between two and three 'sigma' away from expectation\n(chisquared = %.2g)\n", chisq); 60 | else { 61 | printf("May not be OK, # of hits is more than three 'sigma'. Worth looking into.\n(chisquared = %.2g)\n", chisq); 62 | return 1; 63 | } 64 | return 0; 65 | } 66 | 67 | #endif /* PI_CHECK_H__ */ 68 | -------------------------------------------------------------------------------- /examples/pi_capi.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include 35 | 36 | /* Everyone's favorite PRNG example: calculate pi/4 by throwing darts 37 | // at a square board and counting the fraction that are inside the 38 | // inscribed circle. 39 | 40 | // This version uses the C API to threefry2x64. */ 41 | 42 | #include "pi_check.h" 43 | 44 | int main(int argc, char **argv){ 45 | unsigned long hits = 0, tries = 0; 46 | const int64_t two_to_the_62 = ((int64_t)1)<<62; 47 | 48 | threefry2x64_key_t key = {{0, 0}}; 49 | threefry2x64_ctr_t ctr = {{0, 0}}; 50 | enum { int32s_per_counter = sizeof(ctr)/sizeof(int32_t) }; 51 | (void)argc;(void)argv; /* unused */ 52 | 53 | printf("Throwing %lu darts at a square board using threefry2x64\n", NTRIES); 54 | 55 | /* make the most of each bijection by looping over as many 56 | int32_t's as we can find in the ctr_type. */ 57 | assert( int32s_per_counter%2 == 0 ); 58 | while(tries < NTRIES){ 59 | /* Use a union to avoid strict aliasing issues. */ 60 | union{ 61 | threefry2x64_ctr_t ct; 62 | int32_t i32[int32s_per_counter]; 63 | }u; 64 | size_t j; 65 | /* Don't worry about the 'carry'. We're not going to loop 66 | more than 2^64 times. */ 67 | ctr.v[0]++; 68 | u.ct = threefry2x64(ctr, key); 69 | for(j=0; j 33 | #include 34 | #include 35 | #include 36 | 37 | // Everyone's favorite PRNG example: calculate pi/4 by throwing darts 38 | // at a square board and counting the fraction that are inside the 39 | // inscribed circle. 40 | 41 | // This version uses the C++ API to Threefry4x64, and the 42 | // ReinterpretCtr template to get 32-bit values. 43 | 44 | // Note - by using ReinterpretCtr, the result depends on the 45 | // endianness of the hardware it runs on even though the underlying 46 | // generator is endian-independent. An easy way to make the result 47 | // endian-independent would be to eliminate ReinterpretCtr and to use 48 | // a generator that works natively with 32-bit quantities, e.g., 49 | // Threefry4x32 or Philox4x32. 50 | 51 | using namespace r123; 52 | 53 | #include "pi_check.h" 54 | 55 | int main(int, char **){ 56 | unsigned long hits = 0, tries = 0; 57 | const int64_t two_to_the_62 = ((int64_t)1)<<62; 58 | 59 | typedef ReinterpretCtr G; 60 | G generator; 61 | G::key_type key = {{}}; // initialize with zeros 62 | G::ctr_type ctr = {{}}; 63 | 64 | printf("Throwing %lu darts at a square board using Threefry4x64\n", NTRIES); 65 | 66 | while(tries < NTRIES){ 67 | ctr.incr(); 68 | G::ctr_type r = generator(ctr, key); 69 | for(size_t j=0; j 33 | #include 34 | #include 35 | #include 36 | 37 | #if !R123_USE_SSE 38 | int main(int argc, char **argv){ 39 | (void)argc; (void)argv; /* unused */ 40 | printf("No SSE support. This test is not compiled\n"); 41 | return 0; 42 | } 43 | #else 44 | #include "util_m128.h" 45 | 46 | int 47 | main(int argc, char **argv) 48 | { 49 | #if R123_USE_AES_NI 50 | struct r123array1xm128i c, k, ret; 51 | char m128str[M128_STR_SIZE], *kat; 52 | 53 | if (haveAESNI()) { 54 | c.v[0].m = m128i_from_charbuf("01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"); 55 | k.v[0].m = m128i_from_charbuf("01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"); 56 | ret = ars1xm128i_R(7, c, k); 57 | kat = "2b1623350cd214dc 7740187993411872"; 58 | if (strcmp(m128i_to_charbuf(ret.v[0].m, m128str), kat) != 0) { 59 | fprintf(stderr, "%s: error, expected %s, got %s\n", argv[0], kat, m128str); 60 | exit(1); 61 | } 62 | printf("%s: OK, got %s\n", argv[0], kat); 63 | c.v[0].m = m128i_from_charbuf("00 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00"); 64 | k.v[0].m = m128i_from_charbuf("01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00"); 65 | ret = ars1xm128i_R(7, c, k); 66 | kat = "2de6b66fa461b668 f380126f32b9cd22"; 67 | if (strcmp(m128i_to_charbuf(ret.v[0].m, m128str), kat) != 0) { 68 | fprintf(stderr, "%s: error, expected %s, got %s\n", argv[0], kat, m128str); 69 | exit(1); 70 | } 71 | printf("%s: OK, got %s\n", argv[0], kat); 72 | } else { 73 | printf("%s: no AES-NI on this machine\n", argv[0]); 74 | } 75 | #else 76 | printf("%s: no AES-NI compiled into this program\n", argv[0]); 77 | #endif 78 | (void)argc; (void)argv; /* unused */ 79 | return 0; 80 | } 81 | 82 | #endif 83 | -------------------------------------------------------------------------------- /tests/util_m128.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef UTIL_M128_H__ 33 | #define UTIL_M128_H__ 34 | #include 35 | 36 | // The formatting in fips-197 seems to correspond to 37 | // byte[15] [14] ... [0] 38 | __m128i m128i_from_charbuf(const char *s){ 39 | unsigned int bytes[16]; 40 | sscanf(s, "%02x%02x%02x%02x" "%02x%02x%02x%02x" "%02x%02x%02x%02x" "%02x%02x%02x%02x", 41 | &bytes[0], &bytes[1], &bytes[2], &bytes[3], 42 | &bytes[4], &bytes[5], &bytes[6], &bytes[7], 43 | &bytes[8], &bytes[9], &bytes[10], &bytes[11], 44 | &bytes[12], &bytes[13], &bytes[14], &bytes[15]); 45 | return _mm_set_epi8( 46 | bytes[15], bytes[14], bytes[13], bytes[12], 47 | bytes[11], bytes[10], bytes[9], bytes[8], 48 | bytes[7], bytes[6], bytes[5], bytes[4], 49 | bytes[3], bytes[2], bytes[1], bytes[0] 50 | ); 51 | } 52 | 53 | #define M128_STR_SIZE 34 /* minimum size of the charbuf "hex" argument */ 54 | 55 | char *m128i_to_charbuf(__m128i m, char *hex){ 56 | union { 57 | unsigned char bytes[16]; 58 | __m128i m; 59 | } u; 60 | _mm_storeu_si128((__m128i*)&u.bytes[0], m); 61 | sprintf(hex, "%02x%02x%02x%02x" "%02x%02x%02x%02x" 62 | " " 63 | "%02x%02x%02x%02x""%02x%02x%02x%02x", 64 | u.bytes[0], u.bytes[1], u.bytes[2], u.bytes[3], 65 | u.bytes[4], u.bytes[5], u.bytes[6], u.bytes[7], 66 | u.bytes[8], u.bytes[9], u.bytes[10], u.bytes[11], 67 | u.bytes[12], u.bytes[13], u.bytes[14], u.bytes[15]); 68 | 69 | return hex; 70 | } 71 | 72 | #ifdef __cplusplus 73 | #include 74 | 75 | __m128i m128i_from_string(const std::string& s) { 76 | return m128i_from_charbuf(s.c_str()); 77 | } 78 | 79 | std::string m128i_to_string(__m128i m) { 80 | char hex[M128_STR_SIZE]; 81 | 82 | m128i_to_charbuf(m, hex); 83 | return std::string(hex); 84 | } 85 | #endif /* __cplusplus */ 86 | 87 | #endif /* UTIL_M128_H__ */ 88 | -------------------------------------------------------------------------------- /include/Random123/features/clangfeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2016, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __clangfeatures_dot_hpp 33 | #define __clangfeatures_dot_hpp 34 | 35 | #ifndef R123_USE_X86INTRIN_H 36 | #if (defined(__x86_64__)||defined(__i386__)) 37 | #define R123_USE_X86INTRIN_H 1 38 | #else 39 | #define R123_USE_X86INTRIN_H 0 40 | #endif 41 | #endif 42 | 43 | #ifndef R123_USE_CXX11_UNRESTRICTED_UNIONS 44 | #define R123_USE_CXX11_UNRESTRICTED_UNIONS __has_feature(cxx_unrestricted_unions) 45 | #endif 46 | 47 | #ifndef R123_USE_CXX11_STATIC_ASSERT 48 | #define R123_USE_CXX11_STATIC_ASSERT __has_feature(cxx_static_assert) 49 | #endif 50 | 51 | // With clang-3.6, -Wall warns about unused-local-typedefs. 52 | // The "obvious" thing to do is to ignore -Wunused-local-typedefs, 53 | // but that doesn't work because earlier versions of clang blow 54 | // up on an 'unknown warning group'. So we briefly ignore -Wall... 55 | // It's tempting to just give up on static assertions in pre-c++11 code. 56 | #if !R123_USE_CXX11_STATIC_ASSERT && !defined(R123_STATIC_ASSERT) 57 | #define R123_STATIC_ASSERT(expr, msg) \ 58 | _Pragma("clang diagnostic push") \ 59 | _Pragma("clang diagnostic ignored \"-Wall\"") \ 60 | typedef char static_assertion[(!!(expr))*2-1] \ 61 | _Pragma("clang diagnostic pop") 62 | #endif 63 | 64 | #ifndef R123_USE_CXX11_CONSTEXPR 65 | #define R123_USE_CXX11_CONSTEXPR __has_feature(cxx_constexpr) 66 | #endif 67 | 68 | #ifndef R123_USE_CXX11_EXPLICIT_CONVERSIONS 69 | #define R123_USE_CXX11_EXPLICIT_CONVERSIONS __has_feature(cxx_explicit_conversions) 70 | #endif 71 | 72 | // With clang-3.0, the apparently simpler: 73 | // #define R123_USE_CXX11_RANDOM __has_include() 74 | // dumps core. 75 | #ifndef R123_USE_CXX11_RANDOM 76 | #if __cplusplus>=201103L && __has_include() 77 | #define R123_USE_CXX11_RANDOM 1 78 | #else 79 | #define R123_USE_CXX11_RANDOM 0 80 | #endif 81 | #endif 82 | 83 | #ifndef R123_USE_CXX11_TYPE_TRAITS 84 | #if __cplusplus>=201103L && __has_include() 85 | #define R123_USE_CXX11_TYPE_TRAITS 1 86 | #else 87 | #define R123_USE_CXX11_TYPE_TRAITS 0 88 | #endif 89 | #endif 90 | 91 | #include "gccfeatures.h" 92 | 93 | #endif 94 | -------------------------------------------------------------------------------- /include/Random123/features/metalfeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | /* 34 | * Written by Tom Schoonjans 35 | */ 36 | 37 | #ifndef __metalfeatures_dot_hpp 38 | #define __metalfeatures_dot_hpp 39 | 40 | #ifndef R123_STATIC_INLINE 41 | #define R123_STATIC_INLINE inline 42 | #endif 43 | 44 | #ifndef R123_FORCE_INLINE 45 | #define R123_FORCE_INLINE(decl) decl __attribute__((always_inline)) 46 | #endif 47 | 48 | #ifndef R123_CUDA_DEVICE 49 | #define R123_CUDA_DEVICE 50 | #endif 51 | 52 | #ifndef R123_METAL_THREAD_ADDRESS_SPACE 53 | #define R123_METAL_THREAD_ADDRESS_SPACE thread 54 | #endif 55 | 56 | #ifndef R123_METAL_CONSTANT_ADDRESS_SPACE 57 | #define R123_METAL_CONSTANT_ADDRESS_SPACE constant 58 | #endif 59 | 60 | #ifndef R123_ASSERT 61 | #define R123_ASSERT(x) 62 | #endif 63 | 64 | #ifndef R123_BUILTIN_EXPECT 65 | #define R123_BUILTIN_EXPECT(expr,likely) expr 66 | #endif 67 | 68 | #ifndef R123_USE_GNU_UINT128 69 | #define R123_USE_GNU_UINT128 0 70 | #endif 71 | 72 | #ifndef R123_USE_MULHILO64_ASM 73 | #define R123_USE_MULHILO64_ASM 0 74 | #endif 75 | 76 | #ifndef R123_USE_MULHILO64_MSVC_INTRIN 77 | #define R123_USE_MULHILO64_MSVC_INTRIN 0 78 | #endif 79 | 80 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN 81 | #define R123_USE_MULHILO64_CUDA_INTRIN 0 82 | #endif 83 | 84 | #ifndef R123_USE_MULHILO64_OPENCL_INTRIN 85 | #define R123_USE_MULHILO64_OPENCL_INTRIN 0 86 | #endif 87 | 88 | #ifndef R123_USE_MULHILO32_MULHI_INTRIN 89 | #define R123_USE_MULHILO32_MULHI_INTRIN 1 90 | #endif 91 | 92 | #if R123_USE_MULHILO32_MULHI_INTRIN 93 | #include 94 | #define R123_MULHILO32_MULHI_INTRIN metal::mulhi 95 | #endif 96 | 97 | #ifndef R123_USE_AES_NI 98 | #define R123_USE_AES_NI 0 99 | #endif 100 | 101 | #ifndef R123_USE_64BIT 102 | #define R123_USE_64BIT 0 /* Metal currently (Feb 2019, Specification-2) does not support 64-bit variable types */ 103 | #endif 104 | 105 | #ifndef R123_ULONG_LONG 106 | /* the longest integer type in Metal (Feb 2019, Specification-2) is a 107 | * 32-bit unsigned int. Let's hope for the best... */ 108 | #define R123_ULONG_LONG unsigned int 109 | #endif 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /tests/kat_metal.m: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Simple Metal device kernel and host main program to 33 | // compute pi via random darts at a square 34 | // 35 | // Written by Tom Schoonjans 36 | 37 | // We're compiling on the host, so we don't include metalfeatures.h, 38 | // but metal doesn't have 64-bit arithmetic, so we turn it off here. 39 | #define R123_USE_64BIT 0 40 | // functions to do boilerplate Metal begin and end 41 | #include "../tests/util_metal.h" 42 | #include "kat_main.h" 43 | 44 | void host_execute_tests(kat_instance *tests, unsigned ntests){ 45 | UMetalInfo *infop; 46 | size_t i, nthreads = 1024, hits_sz; 47 | uint *tests_host; 48 | NSString *kernelname = @"dev_execute_tests"; 49 | NSError *err = nil; 50 | id function; 51 | id tests_dev; 52 | size_t tests_sz; 53 | id buffer; 54 | id encoder; 55 | id pipeline; 56 | 57 | infop = metal_init(NULL, "kat_metal_kernel.metallib"); 58 | CHECKNOTZERO(function = [infop->library newFunctionWithName: kernelname]); 59 | tests_sz = sizeof(kat_instance) * (ntests+1); // +1 for sentinel test with method==last 60 | CHECKNOTZERO(tests_dev = [infop->device newBufferWithBytes: tests length: tests_sz options:0]); 61 | 62 | CHECKNOTZERO(buffer = [infop->queue commandBuffer]); 63 | CHECKNOTZERO(encoder = [buffer computeCommandEncoder]); 64 | CHECKERR(pipeline = [infop->device newComputePipelineStateWithFunction:function error:&err]); 65 | [encoder setComputePipelineState:pipeline]; 66 | [encoder setBuffer:tests_dev offset:0 atIndex:0]; 67 | 68 | MTLSize threadsPerThreadgroup = MTLSizeMake([pipeline maxTotalThreadsPerThreadgroup], 1, 1); 69 | MTLSize grid = MTLSizeMake(nthreads, 1, 1); 70 | 71 | [encoder dispatchThreads:grid threadsPerThreadgroup:threadsPerThreadgroup]; 72 | [encoder endEncoding]; 73 | [buffer commit]; 74 | [buffer waitUntilCompleted]; 75 | 76 | tests_host = [tests_dev contents]; 77 | 78 | memcpy(tests, tests_host, tests_sz); 79 | metal_done(infop); 80 | [function release]; 81 | [tests_dev release]; 82 | [buffer release]; 83 | [encoder release]; 84 | [pipeline release]; 85 | } 86 | -------------------------------------------------------------------------------- /examples/README: -------------------------------------------------------------------------------- 1 | This file is examples/README and is also linked to from the doxygen main page. 2 | 3 | /** 4 | @page ExamplesREADME Examples 5 | 6 | The examples/ directory contains usage examples 7 | for the components of the Random123 library. 8 | 9 | @section building Compiling and Running the code 10 | 11 | Installing and using Random123 requires only the use 12 | of the header files, and has no prerequisites other than 13 | a reasonable C99 or C++98 compiler. 14 | 15 | With a modern GNU make (3.80 or newer), building and running the core tests 16 | and examples can be as easy as running gmake with no arguments. 17 | Note, though, that the provided examples/GNUmakefile intentionally avoids setting 18 | any of the standard make variables: CC, CXX, CPPFLAGS, CFLAGS, 19 | CXXFLAGS, TARGET_ARCH, LDFLAGS, LOADLIBES, LDLIBS. GNU make 20 | will inherit settings for these variables from the environment, 21 | or they may be set on the command line. If none are set, 22 | compilation will proceed using system-wide default flags, generally 23 | without advanced optimization, architectural tuning, warnings, or other 24 | common options. 25 | 26 | Before putting the Random123 library to use in an application, 27 | it is important to test it using the same compiler flags and 28 | features that the application will use. In other words, 29 | the conventional make variables should be set 30 | the same way when testing the library as they will be set when the 31 | library is actually compiled into your application. 32 | Something like: 33 | @code 34 | gmake CFLAGS="-std=c99" CXXFLAGS="-std=c++0x" CPPFLAGS="/alternate/location/include -O3 -Wall -Wstrict-aliasing=2" TARGET_ARCH="-march=native" 35 | @endcode 36 | would confirm that all is well with optimization on, and output targeted at 37 | an architecture with the same capabilities as the machine running the compilation. 38 | 39 | Very old versions of GNU make (pre-2002) or non-GNU 40 | make will not work with examples/GNUmakefile.. Lacking a suitably modern GNU make, 41 | our advice is to invoke the 42 | C or C++ compiler directly on the source files in the examples/ directory. 43 | 44 | @section examples Examples 45 | 46 | @subsection simple Simple examples in C and C++ 47 | 48 | There are two extremely short examples that show all the code necessary to 49 | obtain and print a few random numbers in C and C++: 50 |
    51 |
  • simple.c 52 |
  • simplepp.cpp 53 |
54 | 55 | @subsection pi Estimating pi using different APIs 56 | 57 | Using random numbers to estimate pi is a classic example. The idea 58 | is to choose points at random in a square and to count how many of 59 | them lie within the inscribed circle. Since the area of the square 60 | is 4*r^2 and the area of the circle is pi*r^2, the ratio of the 61 | number of points in the circle to the total number of points should 62 | approach pi/4 as the number of points grows. 63 | 64 | We give several examples of pi estimation, each of 65 | which illustrates a slightly different API 66 | 67 |
    68 |
  • pi_capi - using only the basic C API 69 |
  • pi_cppapi - using only the basic C++ API 70 |
  • pi_u01 - using the C++ API and uniform.hpp 71 |
  • pi_gsl - using a Random123 generator, but a gsl distribution to obtain real-valued random numbers. Requires the GNU Scientific Library 72 |
  • pi_microurng - using a Random123 generator, but a C++11 \ distribution to obtain real-valued random numbers 73 |
  • pi_cuda - using the Random123 library with CUDA, runnable on an NVIDIA GPU 74 |
  • pi_cudapp - using the C++ API with CUDA, runnable on an NVIDIA GPU 75 |
  • pi_opencl - using the Random123 library with OpenCL, runnable on any OpenCL platform: e.g. NVIDIA or ATI GPUs or Intel or AMD CPUs. The actual 76 | compute kernel lives in the \c pi_opencl_kernel.ocl file and is transformed by \c gencl.sh into strings that get included in \c pi_opencl.c, since 77 | the OpenCL kernels get compiled for the target OpenCL platform at run-time. Note that Apple deprecated OpenCL in MacOS 10.14 (2018). See pi_metal. 78 |
  • pi_aes - uses the AESNI4x32 Random123 generator 79 |
  • pi_metal - uses Apple's Metal framework (replacement for OpenCL). 80 |
81 | 82 | 83 | */ 84 | -------------------------------------------------------------------------------- /examples/pi_aes.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include 35 | using namespace r123; 36 | 37 | // Everyone's favorite PRNG example: calculate pi/4 by throwing darts 38 | // at a square board and counting the fraction that are inside the 39 | // inscribed circle. 40 | 41 | // This version uses the C++ API to AESNI. 42 | 43 | #include "pi_check.h" 44 | #include "example_seeds.h" 45 | 46 | int main(int, char **){ 47 | #if R123_USE_AES_NI 48 | unsigned long seed = example_seed_u32(EXAMPLE_SEED1_U32); // example user-settable seed 49 | unsigned long hits = 0, tries = 0; 50 | const int64_t two_to_the_62 = ((int64_t)1)<<62; 51 | 52 | if (!haveAESNI()) { 53 | std::cerr << "AES-NI instructions not available on this hardware, skipping the pi_aes test." << std::endl; 54 | return 0; 55 | } 56 | typedef AESNI4x32 G; 57 | G generator; 58 | // As an example, we illustrate one user-provided seed word and the rest as arbitrary constants 59 | G::ukey_type ukey = {{(G::ukey_type::value_type)seed, EXAMPLE_SEED2_U32, EXAMPLE_SEED3_U32, EXAMPLE_SEED4_U32}}; 60 | // The key_type constructor transforms the 128bit AES ukey_type to an expanded (1408bit) form. 61 | G::key_type key = ukey; 62 | // start ctr from an arbitrary point. 0 would work fine too, this is just to show that it does 63 | // not matter where it starts from (or for that matter, whether it increments by 1, or some other 64 | // arbitrary stride or in some other way, as long as it never repeats a key,ctr combination) 65 | G::ctr_type ctr = {{EXAMPLE_SEED5_U32, EXAMPLE_SEED6_U32, EXAMPLE_SEED7_U32, EXAMPLE_SEED8_U32}}; 66 | 67 | printf("Throwing %lu darts at a square board using AESNI4x32\n", NTRIES); 68 | std::cout << "Initializing AES key with hex userkey: " << std::hex << ukey << " ctr: " << ctr << std::endl; 69 | 70 | while(tries < NTRIES){ 71 | ctr.incr(); 72 | G::ctr_type r = generator(ctr, key); 73 | if (tries == 0) { 74 | std::cout << "first random from AESNI is " << std::hex << r << std::endl;; 75 | } 76 | for(size_t j=0; j 37 | 38 | namespace r123{ 39 | /*! 40 | ReinterpretCtr uses memcpy to map back and forth 41 | between a CBRNG's ctr_type and the specified ToType. For example, 42 | after: 43 | 44 | typedef ReinterpretCtr G; 45 | 46 | G is a bona fide CBRNG with ctr_type r123array4x32. 47 | 48 | WARNING: ReinterpretCtr is endian dependent. The 49 | values returned by G, declared as above, 50 | will depend on the endianness of the machine on which it runs. 51 | */ 52 | 53 | template 54 | struct ReinterpretCtr{ 55 | typedef ToType ctr_type; 56 | typedef typename CBRNG::key_type key_type; 57 | typedef typename CBRNG::ctr_type bctype; 58 | typedef typename CBRNG::ukey_type ukey_type; 59 | R123_STATIC_ASSERT(sizeof(ToType) == sizeof(bctype) && sizeof(typename bctype::value_type) != 16, 60 | "ReinterpretCtr: sizeof(ToType) is not the same as sizeof(CBRNG::ctr_type) or CBRNG::ctr_type::value_type looks like it might be __m128i"); 61 | // It's amazingly difficult to safely do conversions with __m128i. 62 | // If we use the operator() implementation below with a CBRNG 63 | // whose ctr_type is r123array1xm128i, gcc4.6 optimizes away the 64 | // memcpys, inlines the operator()(c,k), and produces assembly 65 | // language that ends with an aesenclast instruction with a 66 | // destination operand pointing to an unaligned memory address ... 67 | // Segfault! See: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=50444 68 | // MSVC also produces code that crashes. We suspect a 69 | // similar mechanism but haven't done the debugging necessary to 70 | // be sure. We were able to 'fix' gcc4.6 by making bc a mutable 71 | // data member rather than declaring it in the scope of 72 | // operator(). That didn't fix the MSVC problems, though. 73 | // 74 | // Conclusion - don't touch __m128i, at least for now. The 75 | // easiest (but highly imprecise) way to do that is the static 76 | // assertion above that rejects bctype::value_types of size 16. - 77 | // Sep 2011. 78 | ctr_type operator()(ctr_type c, key_type k){ 79 | bctype bc; 80 | std::memcpy(&bc, &c, sizeof(c)); 81 | CBRNG b; 82 | bc = b(bc, k); 83 | std::memcpy(&c, &bc, sizeof(bc)); 84 | return c; 85 | } 86 | }; 87 | } // namespace r123 88 | #endif 89 | -------------------------------------------------------------------------------- /tests/util_metal.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef UTIL_METAL_H__ 33 | #define UTIL_METAL_H__ 34 | /* 35 | * has a couple of utility functions to setup and teardown Metal. 36 | * Avoid much boilerplate in every Metal program 37 | * 38 | * Written by Tom Schoonjans 39 | */ 40 | 41 | #import 42 | #import 43 | 44 | #include "util.h" 45 | 46 | typedef struct umetal_info { 47 | id device; 48 | id queue; 49 | id library; 50 | } UMetalInfo; 51 | 52 | /* Miscellaneous checking macros for convenience */ 53 | static char *print_metal_errstring(NSError *err) { 54 | if (err == nil) 55 | return nil; 56 | return strdup(err.localizedDescription.UTF8String); 57 | } 58 | 59 | #define CHECKERR(x) do { \ 60 | (x); \ 61 | if (err != nil) { \ 62 | fprintf(stderr, "%s: error %s from %s\n", progname, print_metal_errstring(err), #x); \ 63 | exit(1); \ 64 | } \ 65 | } while(0) 66 | 67 | //#define CHECK(x) CHECKERR(err = (x)) 68 | 69 | static UMetalInfo *metal_init(const char *devstr, const char *metallib) 70 | { 71 | UMetalInfo *tp; 72 | NSError *err = nil; 73 | NSArray> *devices; 74 | unsigned i; 75 | unsigned long ndevices; 76 | 77 | /* get list of platforms */ 78 | CHECKNOTZERO(devices = MTLCopyAllDevices()); 79 | ndevices = (unsigned long) devices.count; 80 | dprintf(("ndevices = %lu\n", ndevices)); 81 | if (ndevices == 0) { 82 | fprintf(stderr, "No Metal devices available\n"); 83 | return NULL; 84 | } 85 | dprintf(("found %lu device%s\n", ndevices, ndevices == 1 ? "" : "s")); 86 | CHECKNOTZERO(tp = (UMetalInfo *) malloc(sizeof(UMetalInfo))); 87 | for (i = 0; i < devices.count; i++) { 88 | dprintf(("device %d: %s\n", i, devices[i].name.UTF8String)); 89 | } 90 | 91 | // get default device 92 | tp->device = MTLCreateSystemDefaultDevice(); 93 | 94 | dprintf(("create Metal command queue for device %s\n", tp->device.name.UTF8String)); 95 | CHECKNOTZERO(tp->queue = [tp->device newCommandQueue]); 96 | dprintf(("create Metal library from %s\n", metallib)); 97 | CHECKERR(tp->library = [tp->device newLibraryWithFile: [[NSString alloc] initWithUTF8String:metallib] error:&err]); 98 | 99 | return tp; 100 | } 101 | 102 | 103 | static void metal_done(UMetalInfo *tp) { 104 | 105 | dprintf(("metal_done\n")); 106 | [tp->library release]; 107 | [tp->queue release]; 108 | [tp->device release]; 109 | free(tp); 110 | } 111 | 112 | 113 | #endif /* UTIL_METAL_H__ */ 114 | -------------------------------------------------------------------------------- /examples/pi_opencl.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Simple OpenCL device kernel and host main program to 33 | // compute pi via random darts at a square 34 | 35 | // util_opencl.h has a large amount of OpenCL boilerplate. 36 | // It contains nothing RNG-specific. 37 | #include "../tests/util_opencl.h" 38 | #include "pi_check.h" 39 | #include "example_seeds.h" 40 | 41 | static const char *opencl_src = 42 | // pi_opencl_kernel.i contains a literal string 43 | // with the kernel source code. It's generated 44 | // by ./gencl.sh opencl_kernel.ocl 45 | #include "pi_opencl_kernel.i" 46 | ; 47 | 48 | const char *progname; 49 | int verbose = 0; 50 | int debug = 0; 51 | 52 | int 53 | main(int argc, char **argv) 54 | { 55 | unsigned seed = example_seed_u32(EXAMPLE_SEED9_U32); // example user-settable seed 56 | unsigned count = argc > 1 ? atoi(argv[1]) : 0; 57 | UCLInfo *infop; 58 | size_t i, nthreads, hits_sz; 59 | cl_mem hits_dev; 60 | cl_uint2 *hits_host; 61 | const char *kernelname = "counthits"; 62 | cl_int err; 63 | cl_kernel kern; 64 | double d = 0.; 65 | 66 | d = timer(&d); 67 | progname = argv[0]; 68 | verbose = debug = argc > 2 ? atoi(argv[2]): 0; 69 | infop = opencl_init(argc > 3 ? argv[3] : NULL, opencl_src, argc > 4 ? argv[4] : ""); 70 | CHECKERR(kern = clCreateKernel(infop->prog, kernelname, &err)); 71 | if (infop->wgsize > 64) infop->wgsize /= 2; 72 | nthreads = infop->cores * infop->wgsize; 73 | if (count == 0) 74 | count = NTRIES/nthreads; 75 | hits_sz = nthreads * sizeof(hits_host[0]); 76 | CHECKNOTZERO(hits_host = (cl_uint2 *)malloc(hits_sz)); 77 | CHECKERR(hits_dev = clCreateBuffer(infop->ctx, CL_MEM_WRITE_ONLY, hits_sz, 0, &err)); 78 | CHECK(clSetKernelArg(kern, 0, sizeof(unsigned), (void*)&count)); 79 | CHECK(clSetKernelArg(kern, 1, sizeof(unsigned), (void*)&seed)); 80 | CHECK(clSetKernelArg(kern, 2, sizeof(cl_mem), (void*)&hits_dev)); 81 | printf("queuing kernel for %lu threads with %lu work group size, %u points with seed 0x%x\n", 82 | (unsigned long)nthreads, (unsigned long)infop->wgsize, count, seed); 83 | CHECK(clEnqueueNDRangeKernel(infop->cmdq, kern, 1, 0, &nthreads, &infop->wgsize, 0, 0, 0)); 84 | CHECK(clFinish(infop->cmdq)); 85 | CHECK(clEnqueueReadBuffer(infop->cmdq, hits_dev, CL_TRUE, 0, hits_sz, hits_host, 0, 0, 0)); 86 | 87 | unsigned long hits = 0, tries = 0; 88 | for (i = 0; i < nthreads; i++) { 89 | if (debug) 90 | printf("%lu %u %u\n", (unsigned long)i, hits_host[i].x, hits_host[i].y); 91 | hits += hits_host[i].x; 92 | tries += hits_host[i].y; 93 | } 94 | CHECK(clReleaseMemObject(hits_dev)); 95 | CHECK(clReleaseKernel(kern)); 96 | opencl_done(infop); 97 | return pi_check(hits, tries); 98 | } 99 | -------------------------------------------------------------------------------- /tests/ut_gsl.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include "Random123/philox.h" 35 | #include "Random123/threefry.h" 36 | #include "Random123/conventional/gsl_cbrng.h" 37 | #include 38 | 39 | /* Exercise the GSL_CBRNG macro */ 40 | 41 | GSL_CBRNG(cbrng, threefry4x64); /* creates gsl_rng_cbrng */ 42 | 43 | int main(int argc, char **argv){ 44 | int i; 45 | gsl_rng *r; 46 | gsl_rng *rcopy; 47 | unsigned long save, x; 48 | unsigned long saved[5]; 49 | double sum = 0.; 50 | (void)argc; (void)argv; /* unused */ 51 | 52 | r = gsl_rng_alloc(gsl_rng_cbrng); 53 | assert (gsl_rng_min(r) == 0); 54 | assert (gsl_rng_max(r) == 0xffffffffUL); // Not necessarily ~0UL 55 | assert (gsl_rng_size(r) > 0); 56 | 57 | printf("%s\nulongs from %s in initial state\n", argv[0], gsl_rng_name(r)); 58 | for (i = 0; i < 5; i++) { 59 | x = gsl_rng_get(r); 60 | saved[i] = x; 61 | printf("%d: 0x%lx\n", i, x); 62 | assert(x != 0); 63 | } 64 | printf("uniforms from %s\n", gsl_rng_name(r)); 65 | for (i = 0; i < 5; i++) { 66 | double z = gsl_rng_uniform(r); 67 | sum += z; 68 | printf("%d: %.4g\n", i, z); 69 | } 70 | assert( sum < 0.9*5 && sum > 0.1*5 && (long)"sum must be reasonably close to 0.5*number of trials"); 71 | save = gsl_rng_get(r); 72 | 73 | gsl_rng_set(r, 0xdeadbeef); /* set a non-zero seed */ 74 | printf("ulongs from %s after seed\n", gsl_rng_name(r)); 75 | for (i = 0; i < 5; i++) { 76 | x = gsl_rng_get(r); 77 | printf("%d: 0x%lx\n", i, x); 78 | assert(x != 0); 79 | } 80 | /* make a copy of the total state */ 81 | rcopy = gsl_rng_alloc(gsl_rng_cbrng); 82 | gsl_rng_memcpy(rcopy, r); 83 | printf("uniforms from %s\n", gsl_rng_name(r)); 84 | sum = 0.; 85 | for (i = 0; i < 5; i++) { 86 | double x = gsl_rng_uniform(r); 87 | double y = gsl_rng_uniform(rcopy); 88 | printf("%d: %.4g\n", i, x); 89 | sum += x; 90 | assert(x == y); 91 | } 92 | assert(gsl_rng_get(r) != save); 93 | assert( sum < 0.9*5 && sum > 0.1*5 && (long)"sum must be reasonably close to 0.5*number of trials"); 94 | 95 | /* gsl_rng_set(*, 0) is supposed to recover the default seed */ 96 | gsl_rng_set(r, 0); 97 | printf("ulongs from %s after restore to initial\n", gsl_rng_name(r)); 98 | for (i = 0; i < 5; i++) { 99 | x = gsl_rng_get(r); 100 | assert( x == saved[i] ); 101 | printf("%d: 0x%lx\n", i, x); 102 | assert(x != 0); 103 | } 104 | printf("uniforms from %s\n", gsl_rng_name(r)); 105 | for (i = 0; i < 5; i++) { 106 | printf("%d: %.4g\n", i, gsl_rng_uniform(r)); 107 | } 108 | assert(gsl_rng_get(r) == save); 109 | 110 | gsl_rng_free (r); 111 | return 0; 112 | } 113 | -------------------------------------------------------------------------------- /tests/ut_aes.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Check our AES implementation against the example in FIPS-197 33 | 34 | #include 35 | #include 36 | #if R123_USE_AES_OPENSSL 37 | #include 38 | #endif 39 | #include 40 | #include 41 | #include 42 | #include 43 | #include 44 | 45 | using namespace std; 46 | using namespace r123; 47 | 48 | #if !R123_USE_SSE 49 | int main(int, char **){ 50 | std::cout << "No SSE support. This test is not compiled\n"; 51 | return 0; 52 | } 53 | #else 54 | 55 | #include "util_m128.h" 56 | 57 | int main(int, char **){ 58 | r123array1xm128i IN, K; 59 | 60 | K.v[0].m = m128i_from_charbuf("0001020304050607 08090a0b0c0d0e0f"); 61 | IN.v[0].m = m128i_from_charbuf("0011223344556677 8899aabbccddeeff"); 62 | // From FIPS-197, this is the official "right answer" 63 | r123array1xm128i right_answer; 64 | right_answer[0] = m128i_from_charbuf("69c4 e0d8 6a7b 0430 d8cd b780 70b4 c55a"); 65 | (void)right_answer; /* don't complain about an unused variable if neither NI nor OPENSSL are enabled. */ 66 | #if R123_USE_AES_NI 67 | if( haveAESNI() ){ 68 | AESNI1xm128i::key_type xk(K); 69 | AESNI1xm128i bx; 70 | AESNI1xm128i::ctr_type x = bx(IN, xk); 71 | 72 | assert( x==right_answer ); 73 | cout << "IN: " << m128i_to_string(IN[0]) << "\n"; 74 | cout << "K : " << m128i_to_string(K[0]) << "\n"; 75 | cout << "AES:" << m128i_to_string(x[0]) << "\n"; 76 | cout << "Hooray! AESNI1xm128i(IN, K) matches the published test vector!\n"; 77 | }else{ 78 | cout << "The AES-NI instructions are not available on this hardware. Skipping AES-NI tests\n"; 79 | } 80 | #else 81 | cout << "The AES-NI Bijections are not compiled into this binary. Skipping AES-NI tests\n"; 82 | #endif 83 | 84 | // And let's do it with AESOpenSSL. But since AESOpenSSL has its own 85 | // format for keys and counters we make a union for the key types and 86 | // use ReinterpretCtr to wrap a union around the counter types. 87 | #if R123_USE_AES_OPENSSL 88 | #if R123_USE_AES_NI 89 | typedef AESNI1xm128i::ctr_type nictype; 90 | #else 91 | typedef r123array1xm128i nictype; 92 | #endif 93 | AESOpenSSL16x8::ukey_type ouk; 94 | _mm_storeu_si128((__m128i*)&ouk.v[0], K.v[0].m); 95 | AESOpenSSL16x8::key_type okey(ouk); 96 | ReinterpretCtr osslb; 97 | assert( osslb(IN, okey) == right_answer ); 98 | cout << "Hooray! AESOpenSSL16x8(IN, K) matches the published test vector!\n"; 99 | #else 100 | cout << "The OpenSSL AES implementation is not linked with this binary. Skipping the AESOpenSSL16x8\n"; 101 | #endif // R123_USE_AES_OPENSSL 102 | 103 | return 0; 104 | } 105 | 106 | #endif 107 | -------------------------------------------------------------------------------- /examples/pi_metal.m: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Simple Metal device kernel and host main program to 33 | // compute pi via random darts at a square 34 | // 35 | // Written by Tom Schoonjans 36 | 37 | // functions to do boilerplate Metal begin and end 38 | #include "../tests/util_metal.h" 39 | #include "pi_check.h" 40 | 41 | const char *progname; 42 | int verbose = 0; 43 | int debug = 0; 44 | 45 | int 46 | main(int argc, char **argv) 47 | { 48 | unsigned count = argc > 1 ? atoi(argv[1]) : 0; 49 | UMetalInfo *infop; 50 | size_t i, nthreads = 1024, hits_sz; 51 | uint *hits_host; 52 | uint *tries_host; 53 | NSString *kernelname = @"counthits"; 54 | NSError *err = nil; 55 | double d = 0.; 56 | id function; 57 | id hits_dev, tries_dev; 58 | id buffer; 59 | id encoder; 60 | id pipeline; 61 | 62 | d = timer(&d); 63 | progname = argv[0]; 64 | verbose = debug = argc > 2 ? atoi(argv[2]): 0; 65 | infop = metal_init(argc > 3 ? argv[3] : NULL, "pi_metal_kernel.metallib"); 66 | CHECKNOTZERO(function = [infop->library newFunctionWithName: kernelname]); 67 | CHECKNOTZERO(hits_dev = [infop->device newBufferWithLength: sizeof(uint) * nthreads options:0]); 68 | CHECKNOTZERO(tries_dev = [infop->device newBufferWithLength: sizeof(uint) * nthreads options:0]); 69 | 70 | if (count == 0) 71 | count = NTRIES/nthreads; 72 | 73 | CHECKNOTZERO(buffer = [infop->queue commandBuffer]); 74 | CHECKNOTZERO(encoder = [buffer computeCommandEncoder]); 75 | CHECKERR(pipeline = [infop->device newComputePipelineStateWithFunction:function error:&err]); 76 | [encoder setComputePipelineState:pipeline]; 77 | [encoder setBytes:&count length:sizeof(uint) atIndex:0]; 78 | [encoder setBuffer:hits_dev offset:0 atIndex:1]; 79 | [encoder setBuffer:tries_dev offset:0 atIndex:2]; 80 | 81 | MTLSize threadsPerThreadgroup = MTLSizeMake([pipeline maxTotalThreadsPerThreadgroup], 1, 1); 82 | MTLSize grid = MTLSizeMake(nthreads, 1, 1); 83 | 84 | [encoder dispatchThreads:grid threadsPerThreadgroup:threadsPerThreadgroup]; 85 | [encoder endEncoding]; 86 | [buffer commit]; 87 | [buffer waitUntilCompleted]; 88 | 89 | hits_host = [hits_dev contents]; 90 | tries_host = [tries_dev contents]; 91 | 92 | unsigned long hits = 0, tries = 0; 93 | for (i = 0; i < nthreads; i++) { 94 | if (debug) 95 | printf("%lu %u %u\n", (unsigned long)i, hits_host[i], tries_host[i]); 96 | hits += hits_host[i]; 97 | tries += tries_host[i]; 98 | } 99 | metal_done(infop); 100 | [function release]; 101 | [hits_dev release]; 102 | [tries_dev release]; 103 | [buffer release]; 104 | [encoder release]; 105 | [pipeline release]; 106 | return pi_check(hits, tries); 107 | } 108 | -------------------------------------------------------------------------------- /examples/pi_cuda.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Simple CUDA device kernel and host main program to 33 | // compute pi via random darts at a square 34 | 35 | // functions for boilerplate CUDA init and done 36 | #include "../tests/util_cuda.h" 37 | 38 | #include 39 | 40 | int debug = 0; 41 | const char *progname; 42 | 43 | // CUDA Kernel: 44 | // generates n x,y points and returns hits[tid] with the count of number 45 | // of those points within the unit circle on each thread. 46 | __global__ void counthits(unsigned n, unsigned useed, uint2 *hitsp) 47 | { 48 | unsigned tid = blockDim.x * blockIdx.x + threadIdx.x; 49 | unsigned hits = 0, tries = 0; 50 | philox4x32_key_t k = {{tid, useed}}; 51 | philox4x32_ctr_t c = {{}}; // start counter from 0 52 | 53 | while (tries < n) { 54 | union { 55 | philox4x32_ctr_t c; 56 | int4 i; 57 | }u; 58 | c.v[0] = tries; 59 | u.c = philox4x32(c, k); 60 | int64_t x1 = u.i.x, y1 = u.i.y; 61 | int64_t x2 = u.i.z, y2 = u.i.w; 62 | if ((x1*x1 + y1*y1) < (1LL<<62)) { 63 | hits++; 64 | } 65 | tries++; 66 | if ((x2*x2 + y2*y2) < (1LL<<62)) { 67 | hits++; 68 | } 69 | tries++; 70 | } 71 | hitsp[tid] = make_uint2(hits, tries); 72 | } 73 | 74 | #include "pi_check.h" 75 | #include "example_seeds.h" 76 | 77 | int 78 | main(int argc, char **argv) 79 | { 80 | unsigned seed = example_seed_u32(EXAMPLE_SEED9_U32); // example user-settable seed 81 | CUDAInfo *infop; 82 | uint2 *hits_host, *hits_dev; 83 | size_t hits_sz; 84 | unsigned nthreads; 85 | unsigned count = argc > 1 ? atoi(argv[1]) : 0; 86 | double d = 0.; 87 | 88 | d = timer(&d); 89 | progname = argv[0]; 90 | debug = argc > 2 ? atoi(argv[2]): 0; 91 | 92 | infop = cuda_init(argc > 3 ? argv[3] : NULL); 93 | nthreads = infop->blocks_per_grid * infop->threads_per_block; 94 | if (count == 0) 95 | count = NTRIES/nthreads; 96 | 97 | hits_sz = nthreads * sizeof(hits_host[0]); 98 | CHECKCALL(cudaMalloc(&hits_dev, hits_sz)); 99 | CHECKNOTZERO((hits_host = (uint2 *)malloc(hits_sz))); 100 | 101 | printf("starting %u blocks with %u threads/block for %u points each with seed 0x%x\n", 102 | infop->blocks_per_grid, infop->threads_per_block, count, seed); 103 | fflush(stdout); 104 | 105 | counthits<<blocks_per_grid, infop->threads_per_block>>>(count, seed, hits_dev); 106 | 107 | CHECKCALL(cudaDeviceSynchronize()); 108 | CHECKCALL(cudaMemcpy(hits_host, hits_dev, hits_sz, cudaMemcpyDeviceToHost)); 109 | 110 | unsigned long long hits = 0, tries = 0; 111 | for (unsigned i = 0; i < nthreads; i++) { 112 | if (debug) 113 | printf("%u %u %u\n", i, hits_host[i].x, hits_host[i].y); 114 | hits += hits_host[i].x; 115 | tries += hits_host[i].y; 116 | } 117 | CHECKCALL(cudaFree(hits_dev)); 118 | free(hits_host); 119 | cuda_done(infop); 120 | return pi_check(hits, tries); 121 | } 122 | -------------------------------------------------------------------------------- /examples/pi_cudapp.cu: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Simple CUDA device kernel and host main program to 33 | // compute pi via random darts at a square 34 | 35 | // functions for boilerplate CUDA init and done 36 | #include "../tests/util_cuda.h" 37 | 38 | #include 39 | 40 | using namespace r123; 41 | 42 | int debug = 0; 43 | const char *progname; 44 | 45 | 46 | // CUDA Kernel: 47 | // generates n x,y points and returns hits[tid] with the count of number 48 | // of those points within the unit circle on each thread. 49 | __global__ void counthits(unsigned n, unsigned useed, uint2 *hitsp) 50 | { 51 | unsigned tid = blockDim.x * blockIdx.x + threadIdx.x; 52 | unsigned hits = 0, tries = 0; 53 | typedef Philox4x32 G; 54 | G rng; 55 | G::key_type k = {{tid, useed}}; 56 | G::ctr_type c = {{}}; 57 | 58 | while (tries < n) { 59 | union { 60 | G::ctr_type c; 61 | int4 i; 62 | }u; 63 | c.incr(); 64 | u.c = rng(c, k); 65 | int64_t x1 = u.i.x, y1 = u.i.y; 66 | int64_t x2 = u.i.z, y2 = u.i.w; 67 | if ((x1*x1 + y1*y1) < (1LL<<62)) { 68 | hits++; 69 | } 70 | tries++; 71 | if ((x2*x2 + y2*y2) < (1LL<<62)) { 72 | hits++; 73 | } 74 | tries++; 75 | } 76 | hitsp[tid] = make_uint2(hits, tries); 77 | } 78 | 79 | #include "pi_check.h" 80 | #include "example_seeds.h" 81 | 82 | int 83 | main(int argc, char **argv) 84 | { 85 | unsigned seed = example_seed_u32(EXAMPLE_SEED9_U32); // example user-settable seed 86 | CUDAInfo *infop; 87 | uint2 *hits_host, *hits_dev; 88 | size_t hits_sz; 89 | unsigned nthreads; 90 | unsigned count = argc > 1 ? atoi(argv[1]) : 0; 91 | double d = 0.; 92 | 93 | d = timer(&d); 94 | progname = argv[0]; 95 | debug = argc > 2 ? atoi(argv[2]): 0; 96 | 97 | infop = cuda_init(argc > 3 ? argv[3] : NULL); 98 | nthreads = infop->blocks_per_grid * infop->threads_per_block; 99 | if (count == 0) 100 | count = NTRIES/nthreads; 101 | 102 | hits_sz = nthreads * sizeof(hits_host[0]); 103 | CHECKCALL(cudaMalloc(&hits_dev, hits_sz)); 104 | CHECKNOTZERO((hits_host = (uint2 *)malloc(hits_sz))); 105 | 106 | printf("starting %u blocks with %u threads/block for %u points each with seed 0x%x\n", 107 | infop->blocks_per_grid, infop->threads_per_block, count, seed); 108 | fflush(stdout); 109 | 110 | counthits<<blocks_per_grid, infop->threads_per_block>>>(count, seed, hits_dev); 111 | 112 | CHECKCALL(cudaDeviceSynchronize()); 113 | CHECKCALL(cudaMemcpy(hits_host, hits_dev, nthreads*sizeof(hits_dev[0]), 114 | cudaMemcpyDeviceToHost)); 115 | 116 | unsigned long hits = 0, tries = 0; 117 | for (unsigned i = 0; i < nthreads; i++) { 118 | if (debug) 119 | printf("%u %u %u\n", i, hits_host[i].x, hits_host[i].y); 120 | hits += hits_host[i].x; 121 | tries += hits_host[i].y; 122 | } 123 | CHECKCALL(cudaFree(hits_dev)); 124 | free(hits_host); 125 | cuda_done(infop); 126 | return pi_check(hits, tries); 127 | } 128 | -------------------------------------------------------------------------------- /examples/pi_microurng.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | // Everyone's favorite PRNG example: calculate pi/4 by throwing darts 33 | // at a square board and counting the fraction that are inside the 34 | // inscribed circle. 35 | 36 | // This version uses Philox4x32 with a MicroURNG and the C++11 standard 37 | // library std::uniform_real distribution to generate floats in [-1..1] 38 | 39 | // N.B. The results are hardware dependent even though the underlying 40 | // counter based RNG is hardware and endian-invariant. On x86, 41 | // floating point temporaries, e.g., x, y, x*x, etc., are stored in 42 | // 80-bit extended precision registers. On x86-64 (and other IEEE-754 43 | // systems), temporaries are stored in 32-bit SSE registers. 44 | 45 | #include 46 | #include 47 | #include 48 | #if R123_USE_CXX11_RANDOM 49 | #include 50 | #endif 51 | #include 52 | #include 53 | #include "pi_check.h" 54 | 55 | using namespace r123; 56 | 57 | int main(int, char**){ 58 | typedef Philox4x32 RNG; 59 | RNG::ctr_type c = {{}}; 60 | RNG::key_type k = {{}}; 61 | MicroURNG longmurng(c.incr(), k); 62 | #if R123_USE_STD_RANDOM 63 | std::uniform_real_distribution u(-1., 1.); 64 | 65 | // First, compute pi with a nice long MicroURNG that we cancall 66 | // billions of times (2^31) before it runs out of state: 67 | unsigned long hits=0; 68 | std::cout << "Calling a single MicroURNG " << NTRIES << " times" << std::endl; 69 | for(unsigned long i=0; i shorturng(c.incr(), k); 84 | float x = u(shorturng); 85 | float y = u(shorturng); 86 | if( (x*x + y*y) < 1.0f ) 87 | hits++; 88 | } 89 | return pi_check(hits, NTRIES); 90 | #else 91 | // MicroURNG's are interesting because they allow us to use std::distributions, 92 | // as in the above code. Std::distributions are nice, but if all we need is 93 | // a uniform integer, we can do without such fancy C++11 features: 94 | unsigned long hits=0; 95 | std::cout << "Calling a single MicroURNG " << NTRIES << " times" << std::endl; 96 | for(unsigned long i=0; i::max() - 1.; 98 | float y = 2.*longmurng()/(double)std::numeric_limits::max() - 1.; 99 | if( (x*x + y*y) < 1.0f ) 100 | hits++; 101 | } 102 | if (pi_check(hits, NTRIES) != 0) { 103 | return 1; 104 | } 105 | #endif 106 | 107 | 108 | } 109 | -------------------------------------------------------------------------------- /tests/time_boxmuller.cpp: -------------------------------------------------------------------------------- 1 | // Test for boxmuller.h on CPU 2 | #include 3 | #include 4 | #include 5 | #include "util.h" // for timer() 6 | 7 | #if __GNUC__>=7 8 | #pragma GCC diagnostic ignored "-Wimplicit-fallthrough" 9 | #endif 10 | 11 | typedef r123::Philox4x32 CBRNGF; 12 | #if R123_USE_64BIT 13 | typedef r123::Threefry2x64 CBRNGD; 14 | #endif 15 | 16 | const char *progname = "time_boxmuller"; 17 | 18 | // Each call to boxmuller() returns a pair of values in the .x and .y 19 | // members, which we add up into sum just to avoid being optimized away. 20 | template 21 | F timedloop(typename CBRNG::ukey_type k, size_t Ntry){ 22 | F sum = 0.f; 23 | typename CBRNG::ctr_type ctr = {{}}; 24 | const size_t csize = sizeof(ctr)/sizeof(ctr[0]); 25 | CBRNG rng; 26 | 27 | for(size_t i=0; i 45 | void dumploop(FILE *fp, typename CBRNG::ukey_type k, size_t Ntry){ 46 | typename CBRNG::ctr_type ctr = {{}}; 47 | const size_t csize = sizeof(ctr)/sizeof(ctr[0]); 48 | CBRNG rng; 49 | 50 | for(size_t i=0; i 68 | void timedcall(const char *tname, typename CBRNG::ukey_type k, size_t Ntry, char *out_fname) { 69 | double cur_time, dt; 70 | F sums[NREPEAT]; 71 | int i; 72 | FILE *fp; 73 | char *fname; 74 | if (out_fname) { 75 | fname = (char *) malloc(strlen(out_fname) + strlen(tname) + 2); 76 | CHECKNOTZERO(fname); 77 | sprintf(fname, "%s-%s", out_fname, tname); 78 | fp = fopen(fname, "w"); 79 | CHECKNOTZERO(fp); 80 | } else { 81 | fname = NULL; 82 | fp = NULL; 83 | } 84 | (void) timer(&cur_time); 85 | /* 86 | * we call timedloop NREPEAT times so that it is easy to keep 87 | * Ntry the same for boxmuller.cu and boxmuller.cpp, so sum[0] 88 | * can be checked. 89 | */ 90 | for (i = 0; i < NREPEAT; i++) { 91 | k.v[sizeof(k)/sizeof(k.v[0])-1] = i; 92 | if (fp) 93 | dumploop(fp, k, Ntry); 94 | else 95 | sums[i] = timedloop(k, Ntry); 96 | } 97 | dt = timer(&cur_time); 98 | if (fp) { 99 | printf("%s %lu written to %s in %g sec: %gM/sec\n", tname, (unsigned long)(Ntry*NREPEAT), fname, dt, Ntry*NREPEAT*1.e-6/dt); 100 | fclose(fp); 101 | free(fname); 102 | } else { 103 | printf("%s %lu in %g sec: %gM/sec, sum = %g\n", tname, (unsigned long)(Ntry*NREPEAT), dt, Ntry*NREPEAT*1.e-6/dt, sums[0]); 104 | for (i = 1; i < NREPEAT; i++) { 105 | printf(" %g", sums[i]); 106 | } 107 | printf("\n"); 108 | } 109 | } 110 | 111 | const size_t DEF_N = 200000; 112 | 113 | int main(int argc, char **argv){ 114 | CBRNGF::ukey_type keyf = {{}}; 115 | #if R123_USE_64BIT 116 | CBRNGD::ukey_type keyd = {{}}; 117 | #endif 118 | size_t Ntry = DEF_N; 119 | char *dumpfname; 120 | 121 | dumpfname = getenv("BOXMULLER_DUMPFILE"); 122 | if(argc>1) { 123 | if (argv[1][0] == '-') { 124 | fprintf(stderr, "Usage: %s [iterations_per_thread [key0 [key1]]]\n", argv[0]); 125 | exit(1); 126 | } 127 | Ntry = atol(argv[1]); 128 | } 129 | for (int i = 0; i < (int)(sizeof(keyf)/sizeof(keyf[0])-1) && 2+i < argc; i++) { 130 | keyf.v[i] = atol(argv[2+i]); 131 | } 132 | timedcall("float", keyf, Ntry, dumpfname); 133 | 134 | #if R123_USE_64BIT 135 | for (int i = 0; i < (int)(sizeof(keyd)/sizeof(keyd[0])-1) && 2+i < argc; i++) { 136 | keyd.v[i] = atol(argv[2+i]); 137 | } 138 | timedcall("double", keyd, Ntry, dumpfname); 139 | #endif 140 | return 0; 141 | } 142 | 143 | -------------------------------------------------------------------------------- /tests/kat_opencl.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include "util_opencl.h" 33 | #include "kat_main.h" 34 | 35 | // USE_GENCL: gencl.sh is a small shell script 36 | // that pre-processes foo.ocl into foo.i, containing 37 | // a definition like: 38 | // const char opencl_src[] = "preprocessed text of foo.ocl" 39 | // Thus, with gencl, this file says 40 | // #include 41 | // and the binary obtained by compiling it 42 | // is fully "baked". Runtime behavior doesn't depend 43 | // on the contents of some file (e.g., foo.ocl or some 44 | // header that it includes) that might have changed long after this 45 | // file was compiled. 46 | // 47 | // The alternative (USE_GENCL 0) seems to be more along 48 | // the lines of what OpenCL designers imagine. It makes the text of the 49 | // kernel program the string "#include ". This eliminates 50 | // the need for the extra machinery in gencl.sh, but runtime 51 | // behavior is susceptable to changes in foo.c, or files included 52 | // by foo.c long after this file is compiled. It also requires some 53 | // hocus pocus to get absolute paths for the -I options needed 54 | // to compile the code at runtime. Something like: 55 | // override CFLAGS += -DSRCDIR=\"$(dir $(abspath $<)).\" 56 | #define USE_GENCL 1 57 | 58 | #if USE_GENCL 59 | const char *opencl_src = 60 | #include "kat_opencl_kernel.i" 61 | ; 62 | #else 63 | #ifndef SRCDIR 64 | #error -DSRCDIR="/absolute/path/to/examples" should have been put on the command-line by GNUmakefile 65 | #endif 66 | #endif 67 | 68 | void host_execute_tests(kat_instance *tests, unsigned ntests){ 69 | UCLInfo *infop; 70 | cl_kernel kern; 71 | size_t nthreads, tests_sz; 72 | cl_mem tests_dev; 73 | const char *kernelname = "dev_execute_tests"; 74 | cl_int err; 75 | 76 | #if USE_GENCL 77 | infop = opencl_init(NULL, opencl_src, ""); 78 | #else 79 | infop = opencl_init(NULL, "#include ", 80 | " -I" SRCDIR 81 | " -I" SRCDIR "/../include " 82 | " -DKAT_KERNEL=__kernel " 83 | " -DKAT_GLOBAL=__global "); 84 | #endif 85 | CHECKERR(kern = clCreateKernel(infop->prog, kernelname, &err)); 86 | if (infop->wgsize > 64) infop->wgsize /= 2; 87 | nthreads = infop->cores * infop->wgsize; 88 | tests_sz = sizeof(*tests) * (ntests+1); // +1 for sentinel test with method==last 89 | CHECKERR(tests_dev = clCreateBuffer(infop->ctx, CL_MEM_READ_WRITE|CL_MEM_USE_HOST_PTR, tests_sz, tests, &err)); 90 | CHECK(clEnqueueWriteBuffer(infop->cmdq, tests_dev, CL_TRUE, 0, tests_sz, tests, 0, 0, 0)); 91 | CHECK(clSetKernelArg(kern, 0, sizeof(cl_mem), (void*)&tests_dev)); 92 | printf("queuing kernel for %lu threads with %lu work group size, %u tests\n", 93 | (unsigned long)nthreads, (unsigned long)infop->wgsize, ntests); 94 | CHECK(clEnqueueNDRangeKernel(infop->cmdq, kern, 1, 0, &nthreads, &infop->wgsize, 0, 0, 0)); 95 | CHECK(clFinish(infop->cmdq)); 96 | CHECK(clEnqueueReadBuffer(infop->cmdq, tests_dev, CL_TRUE, 0, tests_sz, tests, 0, 0, 0)); 97 | CHECK(clReleaseMemObject(tests_dev)); 98 | CHECK(clReleaseKernel(kern)); 99 | opencl_done(infop); 100 | } 101 | -------------------------------------------------------------------------------- /include/Random123/features/nvccfeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __r123_nvcc_features_dot_h__ 33 | #define __r123_nvcc_features_dot_h__ 34 | 35 | #if !defined(CUDART_VERSION) 36 | #error "why are we in nvccfeatures.h if CUDART_VERSION is not defined" 37 | #endif 38 | 39 | #if CUDART_VERSION < 4010 40 | #error "CUDA versions earlier than 4.1 produce incorrect results for some templated functions in namespaces. Random123 isunsupported. See comments in nvccfeatures.h" 41 | // This test was added in Random123-1.08 (August, 2013) because we 42 | // discovered that Ftype(maxTvalue()) with Ftype=double and 43 | // T=uint64_t in examples/uniform.hpp produces -1 for CUDA4.0 and 44 | // earlier. We can't be sure this bug doesn't also affect invocations 45 | // of other templated functions, e.g., essentially all of Random123. 46 | // Thus, we no longer trust CUDA versions earlier than 4.1 even though 47 | // we had previously tested and timed Random123 with CUDA 3.x and 4.0. 48 | // If you feel lucky or desperate, you can change #error to #warning, but 49 | // please take extra care to be sure that you are getting correct 50 | // results. 51 | #endif 52 | 53 | // nvcc falls through to gcc or msvc. So first define 54 | // a couple of things and then include either gccfeatures.h 55 | // or msvcfeatures.h 56 | 57 | //#ifdef __CUDA_ARCH__ allows Philox32 and Philox64 to be compiled 58 | //for both device and host functions in CUDA by setting compiler flags 59 | //for the device function 60 | #ifdef __CUDA_ARCH__ 61 | #ifndef R123_CUDA_DEVICE 62 | #define R123_CUDA_DEVICE __device__ 63 | #endif 64 | 65 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN 66 | #define R123_USE_MULHILO64_CUDA_INTRIN 1 67 | #endif 68 | 69 | #ifndef R123_THROW 70 | // No exceptions in CUDA, at least upto 4.0 71 | #define R123_THROW(x) R123_ASSERT(0) 72 | #endif 73 | 74 | #ifndef R123_ASSERT 75 | #define R123_ASSERT(x) if((x)) ; else asm("trap;") 76 | #endif 77 | 78 | #ifndef R123_BUILTIN_EXPECT 79 | #define R123_BUILTIN_EXPECT(expr,likely) expr 80 | #endif 81 | 82 | #ifndef R123_USE_AES_NI 83 | #define R123_USE_AES_NI 0 84 | #endif 85 | 86 | #ifndef R123_USE_SSE4_2 87 | #define R123_USE_SSE4_2 0 88 | #endif 89 | 90 | #ifndef R123_USE_SSE4_1 91 | #define R123_USE_SSE4_1 0 92 | #endif 93 | 94 | #ifndef R123_USE_SSE 95 | #define R123_USE_SSE 0 96 | #endif 97 | 98 | #ifndef R123_USE_GNU_UINT128 99 | #define R123_USE_GNU_UINT128 0 100 | #endif 101 | 102 | #ifndef R123_ULONG_LONG 103 | // uint64_t, which is what we'd get without this, is 104 | // not the same as unsigned long long 105 | #define R123_ULONG_LONG unsigned long long 106 | #endif 107 | 108 | #else // ! __CUDA_ARCH__ 109 | 110 | // If we're using nvcc, but not compiling for the CUDA architecture, 111 | // then we must be compiling for the host. But host-compilation might 112 | // use gcc, msvc, or xlc. This #else/#endif used to be higher up, 113 | // mistakenly turning off all kinds of things the host that are really 114 | // problematic only in device code. It's not clear that we need to do 115 | // anything special for host-code that we wouldn't otherwise do in 116 | // xlcfeatures, gccfeatures or msvcfeatures. But if we do, this is 117 | // the place to do it. 118 | 119 | #endif // __CUDA_ARCH__ 120 | 121 | #if defined(__xlC__) || defined(__ibmxl__) 122 | #include "xlcfeatures.h" 123 | #elif defined(__GNUC__) 124 | #include "gccfeatures.h" 125 | #elif defined(_MSC_FULL_VER) 126 | #include "msvcfeatures.h" 127 | #endif 128 | 129 | #endif 130 | -------------------------------------------------------------------------------- /include/Random123/features/sunprofeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __sunprofeatures_dot_hpp 33 | #define __sunprofeatures_dot_hpp 34 | 35 | #ifndef R123_STATIC_INLINE 36 | #define R123_STATIC_INLINE static inline 37 | #endif 38 | 39 | #ifndef R123_FORCE_INLINE 40 | #define R123_FORCE_INLINE(decl) decl 41 | #endif 42 | 43 | #ifndef R123_CUDA_DEVICE 44 | #define R123_CUDA_DEVICE 45 | #endif 46 | 47 | #ifndef R123_ASSERT 48 | #include 49 | #define R123_ASSERT(x) assert(x) 50 | #endif 51 | 52 | #ifndef R123_BUILTIN_EXPECT 53 | #define R123_BUILTIN_EXPECT(expr,likely) expr 54 | #endif 55 | 56 | // The basic idiom is: 57 | // #ifndef R123_SOMETHING 58 | // #if some condition 59 | // #define R123_SOMETHING 1 60 | // #else 61 | // #define R123_SOMETHING 0 62 | // #endif 63 | // #endif 64 | // This idiom allows an external user to override any decision 65 | // in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 66 | 67 | // An alternative idiom is: 68 | // #ifndef R123_SOMETHING 69 | // #define R123_SOMETHING (some boolean expression) 70 | // #endif 71 | // where the boolean expression might contain previously-defined R123_SOMETHING_ELSE 72 | // pp-symbols. 73 | 74 | #ifndef R123_USE_AES_NI 75 | #define R123_USE_AES_NI 0 76 | #endif 77 | 78 | #ifndef R123_USE_SSE4_2 79 | #define R123_USE_SSE4_2 0 80 | #endif 81 | 82 | #ifndef R123_USE_SSE4_1 83 | #define R123_USE_SSE4_1 0 84 | #endif 85 | 86 | #ifndef R123_USE_SSE 87 | #define R123_USE_SSE 0 88 | #endif 89 | 90 | #ifndef R123_USE_AES_OPENSSL 91 | #define R123_USE_AES_OPENSSL 0 92 | #endif 93 | 94 | #ifndef R123_USE_GNU_UINT128 95 | #define R123_USE_GNU_UINT128 0 96 | #endif 97 | 98 | #ifndef R123_USE_ASM_GNU 99 | #define R123_USE_ASM_GNU 0 100 | #endif 101 | 102 | #ifndef R123_USE_CPUID_MSVC 103 | #define R123_USE_CPUID_MSVC 0 104 | #endif 105 | 106 | #ifndef R123_USE_X86INTRIN_H 107 | #define R123_USE_X86INTRIN_H 0 108 | #endif 109 | 110 | #ifndef R123_USE_IA32INTRIN_H 111 | #define R123_USE_IA32INTRIN_H 0 112 | #endif 113 | 114 | #ifndef R123_USE_XMMINTRIN_H 115 | #define R123_USE_XMMINTRIN_H 0 116 | #endif 117 | 118 | #ifndef R123_USE_EMMINTRIN_H 119 | #define R123_USE_EMMINTRIN_H 0 120 | #endif 121 | 122 | #ifndef R123_USE_SMMINTRIN_H 123 | #define R123_USE_SMMINTRIN_H 0 124 | #endif 125 | 126 | #ifndef R123_USE_WMMINTRIN_H 127 | #define R123_USE_WMMINTRIN_H 0 128 | #endif 129 | 130 | #ifndef R123_USE_INTRIN_H 131 | #define R123_USE_INTRIN_H 0 132 | #endif 133 | 134 | #ifndef R123_USE_MULHILO16_ASM 135 | #define R123_USE_MULHILO16_ASM 0 136 | #endif 137 | 138 | #ifndef R123_USE_MULHILO32_ASM 139 | #define R123_USE_MULHILO32_ASM 0 140 | #endif 141 | 142 | #ifndef R123_USE_MULHILO64_ASM 143 | #define R123_USE_MULHILO64_ASM 0 144 | #endif 145 | 146 | #ifndef R123_USE_MULHILO64_MSVC_INTRIN 147 | #define R123_USE_MULHILO64_MSVC_INTRIN 0 148 | #endif 149 | 150 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN 151 | #define R123_USE_MULHILO64_CUDA_INTRIN 0 152 | #endif 153 | 154 | #ifndef R123_USE_MULHILO64_OPENCL_INTRIN 155 | #define R123_USE_MULHILO64_OPENCL_INTRIN 0 156 | #endif 157 | 158 | #ifndef R123_USE_PHILOX_64BIT 159 | #define R123_USE_PHILOX_64BIT 0 160 | #endif 161 | 162 | #ifndef __STDC_CONSTANT_MACROS 163 | #define __STDC_CONSTANT_MACROS 164 | #endif 165 | #include 166 | #ifndef UINT64_C 167 | #error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include 168 | #endif 169 | 170 | // If you add something, it must go in all the other XXfeatures.hpp 171 | // and in ../ut_features.cpp 172 | #endif 173 | -------------------------------------------------------------------------------- /tests/ut_M128.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2016, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #if !R123_USE_SSE 34 | #include 35 | int main(){ printf("No SSE. Nothing to check\n"); return 0; } 36 | #else 37 | 38 | #include 39 | #include 40 | 41 | int main(int, char **){ 42 | r123m128i uninitialized; 43 | __m128i zm = _mm_setzero_si128(); 44 | #if R123_USE_CXX1X_UNRESTRICTED_UNIONS 45 | r123m128i zM(zm); 46 | #else 47 | r123m128i zM; zM.m = zm; 48 | #endif 49 | uninitialized.m = _mm_setzero_si128(); 50 | 51 | // operator bool (or maybe void*) 52 | assert(!uninitialized); 53 | assert(!zM); 54 | 55 | // operator=(__m128i) 56 | // conversion to __m128i 57 | __m128i one = _mm_set_epi32(0, 0, 0, 1); 58 | __m128i two = _mm_set_epi32(0, 0, 0, 2); 59 | r123m128i One, Two; 60 | One = one; 61 | Two = two; 62 | assert(!!One); 63 | assert(!!Two); 64 | r123m128i AnotherOne; 65 | AnotherOne = one; 66 | 67 | assert( AnotherOne == One ); 68 | assert( Two != One ); 69 | __m128i m = One; 70 | AnotherOne = m; 71 | assert( AnotherOne == One ); 72 | 73 | // operator++ (prefix) 74 | ++One; 75 | assert( One == Two ); 76 | assert( One != AnotherOne ); 77 | 78 | // operator+=(R123_ULONG_LONG) 79 | // operator==(R123_ULONG_LONG, r123m128i) 80 | R123_ULONG_LONG ull = 2; 81 | AnotherOne += 1; 82 | for(int i=0; i<1000; ++i){ 83 | AnotherOne += i; 84 | ull += i; 85 | for(int j=0; j> YetAnother; 109 | 110 | assert( YetAnother == AnotherOne ); 111 | } 112 | #endif 113 | 114 | // Sep 2011 - clang in the fink build of llvm-2.9.1 on MacOS 10.5.8 115 | // fails to catch anything, and hence fails this test. I suspect 116 | // a problem with the packaging/installation rather than a bug 117 | // in llvm. However, if it shows up in other contexts, some 118 | // kind of #ifndef might be appropriate. N.B. There's a similar 119 | // exception test in ut_carray.cpp 120 | bool caught; 121 | caught = false; 122 | try{ 123 | (void)(One < AnotherOne); 124 | }catch(std::runtime_error& ){ caught = true; } 125 | assert(caught); 126 | 127 | caught = false; 128 | try{ 129 | (void)(One <= AnotherOne); 130 | }catch(std::runtime_error& ){ caught = true; } 131 | assert(caught); 132 | 133 | caught = false; 134 | try{ 135 | (void)(One > AnotherOne); 136 | }catch(std::runtime_error& ){ caught = true; } 137 | assert(caught); 138 | 139 | caught = false; 140 | try{ 141 | (void)(One >= AnotherOne); 142 | }catch(std::runtime_error& ){ caught = true; } 143 | assert(caught); 144 | 145 | // assemble_from_u32 146 | 147 | std::cout << "ut_M128: OK\n"; 148 | return 0; 149 | } 150 | 151 | #endif 152 | -------------------------------------------------------------------------------- /include/Random123/boxmuller.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | 33 | // This file implements the Box-Muller method for generating gaussian 34 | // random variables (GRVs). Box-Muller has the advantage of 35 | // deterministically requiring exactly two uniform random variables as 36 | // input and producing exactly two GRVs as output, which makes it 37 | // especially well-suited to the counter-based generators in 38 | // Random123. Other methods (e.g., Ziggurat, polar) require an 39 | // indeterminate number of inputs for each output and so require a 40 | // 'MicroURNG' to be used with Random123. The down side of Box-Muller 41 | // is that it calls sincos, log and sqrt, which may be slow. However, 42 | // on GPUs, these functions are remarkably fast, which makes 43 | // Box-Muller the fastest GRV generator we know of on GPUs. 44 | // 45 | // This file exports two structs and one overloaded function, 46 | // all in the r123 namespace: 47 | // struct r123::float2{ float x,y; } 48 | // struct r123::double2{ double x,y; } 49 | // 50 | // r123::float2 r123::boxmuller(uint32_t u0, uint32_t u1); 51 | // r123::double2 r123::boxmuller(uint64_t u0, uint64_t u1); 52 | // 53 | // float2 and double2 are identical to their synonymous global- 54 | // namespace structures in CUDA. 55 | // 56 | // This file may not be as portable, and has not been tested as 57 | // rigorously as other files in the library, e.g., the generators. 58 | // Nevertheless, we hope it is useful and we encourage developers to 59 | // copy it and modify it for their own use. We invite comments and 60 | // improvements. 61 | 62 | #ifndef _r123_BOXMULLER_HPP__ 63 | #define _r123_BOXMULLER_HPP__ 64 | 65 | #include 66 | #include 67 | #include 68 | 69 | namespace r123{ 70 | 71 | #if !defined(__CUDACC__) 72 | typedef struct { float x, y; } float2; 73 | typedef struct { double x, y; } double2; 74 | #else 75 | typedef ::float2 float2; 76 | typedef ::double2 double2; 77 | #endif 78 | 79 | #if !defined(R123_NO_SINCOS) && defined(__APPLE__) 80 | /* MacOS X 10.10.5 (2015) doesn't have sincosf */ 81 | #define R123_NO_SINCOS 1 82 | #endif 83 | 84 | #if R123_NO_SINCOS /* enable this if sincos and sincosf are not in the math library */ 85 | R123_CUDA_DEVICE R123_STATIC_INLINE void sincosf(float x, float *s, float *c) { 86 | *s = sinf(x); 87 | *c = cosf(x); 88 | } 89 | 90 | R123_CUDA_DEVICE R123_STATIC_INLINE void sincos(double x, double *s, double *c) { 91 | *s = sin(x); 92 | *c = cos(x); 93 | } 94 | #endif /* sincos is not in the math library */ 95 | 96 | #if !defined(CUDART_VERSION) || CUDART_VERSION < 5000 /* enabled if sincospi and sincospif are not in math lib */ 97 | 98 | R123_CUDA_DEVICE R123_STATIC_INLINE void sincospif(float x, float *s, float *c){ 99 | const float PIf = 3.1415926535897932f; 100 | sincosf(PIf*x, s, c); 101 | } 102 | 103 | R123_CUDA_DEVICE R123_STATIC_INLINE void sincospi(double x, double *s, double *c) { 104 | const double PI = 3.1415926535897932; 105 | sincos(PI*x, s, c); 106 | } 107 | #endif /* sincospi is not in math lib */ 108 | 109 | /* 110 | * take two 32bit unsigned random values and return a float2 with 111 | * two random floats in a normal distribution via a Box-Muller transform 112 | */ 113 | R123_CUDA_DEVICE R123_STATIC_INLINE float2 boxmuller(uint32_t u0, uint32_t u1) { 114 | float r; 115 | float2 f; 116 | sincospif(uneg11(u0), &f.x, &f.y); 117 | r = sqrtf(-2.f * logf(u01(u1))); // u01 is guaranteed to avoid 0. 118 | f.x *= r; 119 | f.y *= r; 120 | return f; 121 | } 122 | 123 | /* 124 | * take two 64bit unsigned random values and return a double2 with 125 | * two random doubles in a normal distribution via a Box-Muller transform 126 | */ 127 | R123_CUDA_DEVICE R123_STATIC_INLINE double2 boxmuller(uint64_t u0, uint64_t u1) { 128 | double r; 129 | double2 f; 130 | 131 | sincospi(uneg11(u0), &f.x, &f.y); 132 | r = sqrt(-2. * log(u01(u1))); // u01 is guaranteed to avoid 0. 133 | f.x *= r; 134 | f.y *= r; 135 | return f; 136 | } 137 | } // namespace r123 138 | 139 | #endif /* BOXMULLER_H__ */ 140 | -------------------------------------------------------------------------------- /examples/GNUmakefile: -------------------------------------------------------------------------------- 1 | no_target_specified: runcore 2 | @echo 3 | @echo The default make rule is equivalent to \'make runcore\' which builds 4 | @echo and runs most of the examples. 5 | @echo The following \'meta-targets\' are available: 6 | @echo " " $(meta_targets) 7 | @echo Here is the complete list of individual program targets: 8 | @echo " " $(all_primary_targets) 9 | @echo Prepend \'run\' to any of the program targets or metatargets 10 | @echo to run the binary and check for a zero exit status. 11 | @echo Adding force=1 on the command line causes all targets to be considered out-of-date. 12 | .PHONY: no_target_specified 13 | 14 | # metatargets are variables which get mapped by METATARGET_template 15 | meta_targets:=core aesni c cpp gsl cuda opencl thread metal 16 | 17 | # Platform metatargets: each one typically has specific requirements in the build environment. 18 | # c is C99 (will work in MSVC), cpp is C++98, gsl requires the GNU Scientific Library 19 | # (specifically, the gsl-config program in the PATH), thread requires POSIX threads, 20 | # CUDA requires NVIDIA CUDA 3.x or newer, OpenCL requires OpenCL includes & libraries 21 | # (e.g. AMD APP SDK, NVIDIA SDK) 22 | c:=simple pi_capi 23 | cpp:=simplepp pi_uniform pi_cppapi pi_microurng 24 | gsl:=pi_gsl 25 | cuda:=pi_cuda pi_cudapp 26 | opencl:=pi_opencl 27 | thread:= # ?? should we have a thread example? 28 | metal:=pi_metal 29 | 30 | # Convenience metatargets: these are to help developers test functional subsets across platforms 31 | core:=$(c) $(cpp) 32 | aesni:=pi_aes 33 | 34 | $(gsl) : override LDLIBS += `gsl-config --libs` 35 | $(gsl) : override CFLAGS += `gsl-config --cflags` 36 | 37 | $(opencl) : % : %_kernel.i 38 | # Or try this if we USE_GENCL in kat_opencl.c 39 | #$(opencl) : override CPPFLAGS += -DSRCDIR=\"$(dir $(abspath $<)).\" 40 | 41 | ifeq ($(shell uname),Darwin) 42 | $(opencl) : override LDLIBS+=-framework OpenCL 43 | else 44 | $(opencl) : override LDLIBS+=-lOpenCL 45 | endif 46 | $(opencl) : override CFLAGS+=-I. 47 | # Note, the Intel OpenCL SDK (1.5) has unresolved C++ symbols in its 48 | # libOpenCL.so Even though 'main' is a C program, you may need to link 49 | # it with a C++ compiler-driver, e.g., g++. Since this Makefile does 50 | # compile-and-link in one step, use something like: 51 | # $(opencl) : CC=g++ -xc 52 | # which will invoke the g++ compiler-driver, but will treat the 53 | # program as C rather than C++. 54 | 55 | $(metal) : % : %_kernel.metallib 56 | $(metal) : override LDLIBS+=-framework Metal -framework Foundation -framework CoreGraphics 57 | 58 | all_primary_targets += $(addsuffix _kernel.i, $(opencl)) 59 | all_primary_targets += $(addsuffix _kernel.metallib, $(metal)) 60 | all_primary_targets += $(addsuffix _kernel.air, $(metal)) 61 | 62 | ################################################ 63 | # Generic boilerplate from here down: 64 | vpath %.c $(srcdir/) 65 | vpath %.cpp $(srcdir/) 66 | vpath %.cu $(srcdir/) 67 | vpath %.ocl $(srcdir/) 68 | vpath %.metal $(srcdir/) 69 | 70 | define METATARGET_template 71 | .PHONY: $(1) 72 | $(1) : $(filter-out $(SKIP_TARGETS), $($(1))) 73 | .PHONY: run$(1) 74 | run$(1) : $(addprefix run, $(filter-out $(SKIP_TARGETS), $($(1)))) 75 | all_primary_targets += $($(1)) 76 | endef 77 | 78 | $(foreach T,$(meta_targets), $(eval $(call METATARGET_template,$(T)))) 79 | 80 | # sort also does 'uniq' 81 | all_primary_targets:=$(sort $(all_primary_targets)) 82 | 83 | INC=$(srcdir/)../include 84 | override CPPFLAGS += -I$(INC) 85 | 86 | ifndef NVCC 87 | NVCC:=nvcc 88 | endif 89 | # The rngs are *very* slow without optimization. In the simplest case, 90 | # where the user just calls 'make', we don't want them to see terrible 91 | # performance. Unfortunately, this might surprise someone 92 | # who says, e.g., make CPPFLAGS=-O0. Oh well... 93 | ifndef CFLAGS 94 | CFLAGS:=-O 95 | endif 96 | ifndef CXXFLAGS 97 | CXXFLAGS:=-O 98 | endif 99 | 100 | %.i : %.ocl 101 | CC="$(CC)" CPPFLAGS="$(CPPFLAGS)" $(srcdir/)./gencl.sh $< > $@ 102 | 103 | % : %.cu 104 | $(NVCC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LOADLIBES) $(LDLIBS) -o $@ 105 | 106 | % : %.c 107 | $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LOADLIBES) $(LDLIBS) -o $@ 108 | 109 | % : %.cpp 110 | $(CXX) $(CXXFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LOADLIBES) $(LDLIBS) -o $@ 111 | 112 | % : %.m 113 | $(CC) $(OBJCFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LOADLIBES) $(LDLIBS) -o $@ 114 | 115 | %.air : %.metal 116 | xcrun --sdk macosx metal $(CPPFLAGS) -c $< -o $@ 117 | 118 | %.metallib : %.air 119 | xcrun --sdk macosx metallib $< -o $@ 120 | 121 | run% : % 122 | ./$^ $(RUN_ARGS) 123 | 124 | # In lieu of autodepends, just say that all the compilation targets depend on all the headers. 125 | hdrs:=$(wildcard $(srcdir/)*.h $(srcdir/)gsl/*.c $(INC)/Random123/*.h $(INC)/Random123/*.hpp $(INC)/Random123/*/*.h $(INC)/Random123/*/*.hpp) 126 | misc:=$(wildcard $(srcdir/)*.cu $(srcdir/)*.ocl) 127 | $(all_primary_targets) : $(hdrs) 128 | $(misc) : $(hdrs) 129 | 130 | # If you put force=y on the command line, then $(all_primary_targets) will be 131 | # depend on FORCE, and hence will not be up-to-date. 132 | ifdef force 133 | $(all_primary_targets) : FORCE 134 | FORCE: 135 | endif 136 | 137 | .PHONY : echo_build_commands 138 | echo_build_commands: 139 | make -n force=1 $(all_primary_targets) | grep -v 'is up to date' 140 | 141 | .PHONY : clean veryclean 142 | clean: 143 | rm -f $(all_primary_targets) 144 | 145 | veryclean: 146 | rm -f $(all_primary_targets) *.o \#* *~ *.pdb *.exe *.obj *.ilk *.suo 147 | 148 | .PHONY : install 149 | 150 | # N.B. normally these are exported by ../GNUmakefile 151 | prefix?=/usr/local 152 | datarootdir?=$(prefix)/share 153 | docdir?=$(datarootdir)/doc/Random123 154 | install: 155 | mkdir -p $(DESTDIR)$(docdir)/examples 156 | cp README GNUmakefile *.sh *.c *.h *.cpp *.cu *.metal *.m *.ocl $(DESTDIR)$(docdir)/examples 157 | -------------------------------------------------------------------------------- /examples/pi_uniform.cpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #include 33 | #include 34 | #include 35 | 36 | /* Compute pi, using the u01 conversion with threefry2x64 and threefry2x32 */ 37 | 38 | #include "pi_check.h" 39 | #include "example_seeds.h" 40 | 41 | using namespace r123; 42 | 43 | template 44 | void pi(typename CBRNG::key_type k); 45 | 46 | int errs = 0; 47 | int main(int, char **){ 48 | uint64_t seed64 = example_seed_u64(EXAMPLE_SEED1_U64); // example user-settable seed 49 | unsigned long hits = 0, tries = 0; 50 | 51 | // First, we demonstrate how to compute pi 52 | // using uneg11 to convert the integer output 53 | // of threefry2x64 to a double in (-1, 1). 54 | Threefry2x64::ctr_type c = {{0}}, r; 55 | Threefry2x64::ukey_type uk = {{seed64}}; 56 | Threefry2x64::key_type k = uk; 57 | printf("%lu uniform doubles from threefry2x64\n", NTRIES); 58 | while (tries < NTRIES) { 59 | double x, y; 60 | c.v[0]++; /* increment the counter */ 61 | r = threefry2x64(c, k); 62 | x = uneg11(r.v[0]); 63 | y = uneg11(r.v[1]); 64 | if( x*x + y*y < 1.0 ) 65 | hits++; 66 | tries++; 67 | } 68 | errs += pi_check(hits, tries); 69 | 70 | // Extra credit: use some template hackery to exercise various 71 | // combinations of float, double and long double, unit64_t and 72 | // uint32_t and the conversion functions u01, uneg11 and ufixed01. 73 | // This provides minimal testing of the conversion functions. 74 | pi(k); 75 | pi(k); 76 | pi(k); 77 | uint32_t seed32 = example_seed_u32(EXAMPLE_SEED9_U32); 78 | 79 | Threefry2x32::ukey_type ukh = {{seed32}}; 80 | Threefry2x32::key_type kh = ukh; 81 | pi(kh); 82 | pi(kh); 83 | pi(kh); 84 | 85 | return !!errs; 86 | } 87 | 88 | template 89 | void pi(typename CBRNG::key_type k){ 90 | unsigned long hits = 0, tries = 0; 91 | CBRNG rng; 92 | 93 | printf("Compute pi with uneg11:\n"); 94 | typename CBRNG::ctr_type c = {{0}}, r; 95 | hits = tries = 0; 96 | while (tries < NTRIES) { 97 | Ftype x, y; 98 | c.v[0]++; /* increment the counter */ 99 | r = rng(c, k); 100 | // x and y in the entire square from (-1,-1) to (1,1) 101 | x = uneg11(r.v[0]); 102 | y = uneg11(r.v[1]); 103 | if( x*x + y*y < 1.0 ) 104 | hits++; 105 | tries++; 106 | } 107 | errs += pi_check(hits, tries); 108 | 109 | #if __cplusplus >= 201103L 110 | printf("Compute pi with uneg11all (requires C++11):\n"); 111 | hits = tries = 0; 112 | while (tries < NTRIES) { 113 | c.v[0]++; /* increment the counter */ 114 | r = rng(c, k); 115 | // x and y in the entire square from (-1,-1) to (1,1) 116 | auto a = uneg11all(r); 117 | if( a[0]*a[0] + a[1]*a[1] < 1.0 ) 118 | hits++; 119 | tries++; 120 | } 121 | errs += pi_check(hits, tries); 122 | #endif 123 | 124 | printf("Compute pi with u01:\n"); 125 | hits = tries = 0; 126 | while (tries < NTRIES) { 127 | Ftype x, y; 128 | c.v[0]++; /* increment the counter */ 129 | r = rng(c, k); 130 | // generate x and y in the first quadrant from (0,0) to (1,1) 131 | x = u01(r.v[0]); 132 | y = u01(r.v[1]); 133 | if( x*x + y*y < 1.0 ) 134 | hits++; 135 | tries++; 136 | } 137 | errs += pi_check(hits, tries); 138 | 139 | printf("Compute pi with u01fixedpt:\n"); 140 | hits = tries = 0; 141 | while (tries < NTRIES) { 142 | Ftype x, y; 143 | c.v[0]++; /* increment the counter */ 144 | r = rng(c, k); 145 | // generate x and y in the first quadrant from (0,0) to (1,1) 146 | x = u01fixedpt(r.v[0]); 147 | y = u01fixedpt(r.v[1]); 148 | if( x*x + y*y < 1.0 ) 149 | hits++; 150 | tries++; 151 | } 152 | errs += pi_check(hits, tries); 153 | } 154 | 155 | -------------------------------------------------------------------------------- /include/Random123/MicroURNG.hpp: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __MicroURNG_dot_hpp__ 33 | #define __MicroURNG_dot_hpp__ 34 | 35 | #include 36 | #include 37 | 38 | namespace r123{ 39 | /** 40 | Given a CBRNG whose ctr_type has an unsigned integral value_type, 41 | MicroURNG(c, k) is a type that satisfies the 42 | requirements of a C++11 Uniform Random Number Generator. 43 | 44 | The intended purpose is for a MicroURNG to be passed 45 | as an argument to a C++11 Distribution, e.g., 46 | std::normal_distribution. See examples/MicroURNG.cpp. 47 | 48 | The MicroURNG functor has a period of "only" 49 | 50 | ctr_type.size()*2^32, 51 | 52 | after which it will silently repeat. 53 | 54 | The high 32 bits of the highest word in the counter c, passed to 55 | the constructor must be zero. MicroURNG uses these bits to 56 | "count". 57 | 58 | Older versions of the library permitted a second template 59 | parameter by which the caller could control the number of 60 | bits devoted to the URNG's internal counter. This flexibility 61 | has been disabled because URNGs created with different 62 | numbers of counter bits could, conceivably "collide". 63 | 64 | \code 65 | typedef ?someCBRNG? RNG; 66 | RNG::ctr_type c = ...; // under application control 67 | RNG::key_type k = ...; // 68 | std::normal_distribution nd; 69 | MicroURNG urng(c, k); 70 | for(???){ 71 | ... 72 | nd(urng); // may be called several hundred times with BITS=10 73 | ... 74 | } 75 | \endcode 76 | */ 77 | 78 | template 79 | class MicroURNG{ 80 | // According to C++11, a URNG requires only a result_type, 81 | // operator()(), min() and max() methods. Everything else 82 | // (ctr_type, key_type, reset() method, etc.) is "value added" 83 | // for the benefit of users that "know" that they're dealing with 84 | // a MicroURNG. 85 | public: 86 | typedef CBRNG cbrng_type; 87 | static const int BITS = 32; 88 | typedef typename cbrng_type::ctr_type ctr_type; 89 | typedef typename cbrng_type::key_type key_type; 90 | typedef typename cbrng_type::ukey_type ukey_type; 91 | typedef typename ctr_type::value_type result_type; 92 | 93 | R123_STATIC_ASSERT( std::numeric_limits::digits >= BITS, "The result_type must have at least 32 bits" ); 94 | 95 | result_type operator()(){ 96 | if(last_elem == 0){ 97 | // jam n into the high bits of c 98 | const size_t W = std::numeric_limits::digits; 99 | ctr_type c = c0; 100 | c[c0.size()-1] |= n<<(W-BITS); 101 | rdata = b(c,k); 102 | n++; 103 | last_elem = rdata.size(); 104 | } 105 | return rdata[--last_elem]; 106 | } 107 | MicroURNG(cbrng_type _b, ctr_type _c0, ukey_type _uk) : b(_b), c0(_c0), k(_uk), n(0), last_elem(0) { 108 | chkhighbits(); 109 | } 110 | MicroURNG(ctr_type _c0, ukey_type _uk) : b(), c0(_c0), k(_uk), n(0), last_elem(0) { 111 | chkhighbits(); 112 | } 113 | 114 | // _Min and _Max work around a bug in the library shipped with MacOS Xcode 4.5.2. 115 | // See the commment in conventional/Engine.hpp. 116 | const static result_type _Min = 0; 117 | const static result_type _Max = ~((result_type)0); 118 | 119 | static R123_CONSTEXPR result_type min R123_NO_MACRO_SUBST () { return _Min; } 120 | static R123_CONSTEXPR result_type max R123_NO_MACRO_SUBST () { return _Max; } 121 | // extra methods: 122 | const ctr_type& counter() const{ return c0; } 123 | void reset(ctr_type _c0, ukey_type _uk){ 124 | c0 = _c0; 125 | chkhighbits(); 126 | k = _uk; 127 | n = 0; 128 | last_elem = 0; 129 | } 130 | 131 | private: 132 | cbrng_type b; 133 | ctr_type c0; 134 | key_type k; 135 | R123_ULONG_LONG n; 136 | size_t last_elem; 137 | ctr_type rdata; 138 | void chkhighbits(){ 139 | result_type r = c0[c0.size()-1]; 140 | result_type mask = ((uint64_t)std::numeric_limits::max R123_NO_MACRO_SUBST ())>>BITS; 141 | if((r&mask) != r) 142 | throw std::runtime_error("MicroURNG: c0, does not have high bits clear"); 143 | } 144 | }; 145 | } // namespace r123 146 | #endif 147 | -------------------------------------------------------------------------------- /include/Random123/features/msvcfeatures.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef __msvcfeatures_dot_hpp 33 | #define __msvcfeatures_dot_hpp 34 | 35 | //#if _MSVC_FULL_VER <= 15 36 | //#error "We've only tested MSVC_FULL_VER==15." 37 | //#endif 38 | 39 | #if !defined(_M_IX86) && !defined(_M_X64) 40 | # error "This code has only been tested on x86 platforms." 41 | { // maybe an unbalanced brace will terminate the compilation 42 | // You are invited to try Random123 on other architectures, by changing 43 | // the conditions that reach this error, but you should consider it a 44 | // porting exercise and expect to encounter bugs and deficiencies. 45 | // Please let the authors know of any successes (or failures). 46 | #endif 47 | 48 | #ifndef R123_STATIC_INLINE 49 | #define R123_STATIC_INLINE static __inline 50 | #endif 51 | 52 | #ifndef R123_FORCE_INLINE 53 | #define R123_FORCE_INLINE(decl) _forceinline decl 54 | #endif 55 | 56 | #ifndef R123_CUDA_DEVICE 57 | #define R123_CUDA_DEVICE 58 | #endif 59 | 60 | #ifndef R123_ASSERT 61 | #include 62 | #define R123_ASSERT(x) assert(x) 63 | #endif 64 | 65 | #ifndef R123_BUILTIN_EXPECT 66 | #define R123_BUILTIN_EXPECT(expr,likely) expr 67 | #endif 68 | 69 | // The basic idiom is: 70 | // #ifndef R123_SOMETHING 71 | // #if some condition 72 | // #define R123_SOMETHING 1 73 | // #else 74 | // #define R123_SOMETHING 0 75 | // #endif 76 | // #endif 77 | // This idiom allows an external user to override any decision 78 | // in this file with a command-line -DR123_SOMETHING=1 or -DR123_SOMETHINE=0 79 | 80 | // An alternative idiom is: 81 | // #ifndef R123_SOMETHING 82 | // #define R123_SOMETHING (some boolean expression) 83 | // #endif 84 | // where the boolean expression might contain previously-defined R123_SOMETHING_ELSE 85 | // pp-symbols. 86 | 87 | #ifndef R123_USE_AES_NI 88 | #if defined(_M_X64) 89 | #define R123_USE_AES_NI 1 90 | #else 91 | #define R123_USE_AES_NI 0 92 | #endif 93 | #endif 94 | 95 | #ifndef R123_USE_SSE4_2 96 | #if defined(_M_X64) || _MSC_VER > 1899 97 | #define R123_USE_SSE4_2 1 98 | #else 99 | #define R123_USE_SSE4_2 0 100 | #endif 101 | #endif 102 | 103 | #ifndef R123_USE_SSE4_1 104 | #if defined(_M_X64) || _MSC_VER > 1899 105 | #define R123_USE_SSE4_1 1 106 | #else 107 | #define R123_USE_SSE4_1 0 108 | #endif 109 | #endif 110 | 111 | #ifndef R123_USE_SSE 112 | #define R123_USE_SSE 1 113 | #endif 114 | 115 | #ifndef R123_USE_AES_OPENSSL 116 | #define R123_USE_AES_OPENSSL 0 117 | #endif 118 | 119 | #ifndef R123_USE_GNU_UINT128 120 | #define R123_USE_GNU_UINT128 0 121 | #endif 122 | 123 | #ifndef R123_USE_ASM_GNU 124 | #define R123_USE_ASM_GNU 0 125 | #endif 126 | 127 | #ifndef R123_USE_CPUID_MSVC 128 | #define R123_USE_CPUID_MSVC 1 129 | #endif 130 | 131 | #ifndef R123_USE_X86INTRIN_H 132 | #define R123_USE_X86INTRIN_H 0 133 | #endif 134 | 135 | #ifndef R123_USE_IA32INTRIN_H 136 | #define R123_USE_IA32INTRIN_H 0 137 | #endif 138 | 139 | #ifndef R123_USE_XMMINTRIN_H 140 | #define R123_USE_XMMINTRIN_H 0 141 | #endif 142 | 143 | #ifndef R123_USE_EMMINTRIN_H 144 | #define R123_USE_EMMINTRIN_H 1 145 | #endif 146 | 147 | #ifndef R123_USE_SMMINTRIN_H 148 | #define R123_USE_SMMINTRIN_H 1 149 | #endif 150 | 151 | #ifndef R123_USE_WMMINTRIN_H 152 | #define R123_USE_WMMINTRIN_H 1 153 | #endif 154 | 155 | #ifndef R123_USE_INTRIN_H 156 | #define R123_USE_INTRIN_H 1 157 | #endif 158 | 159 | #ifndef R123_USE_MULHILO16_ASM 160 | #define R123_USE_MULHILO16_ASM 0 161 | #endif 162 | 163 | #ifndef R123_USE_MULHILO32_ASM 164 | #define R123_USE_MULHILO32_ASM 0 165 | #endif 166 | 167 | #ifndef R123_USE_MULHILO64_ASM 168 | #define R123_USE_MULHILO64_ASM 0 169 | #endif 170 | 171 | #ifndef R123_USE_MULHILO64_MSVC_INTRIN 172 | #if defined(_M_X64) 173 | #define R123_USE_MULHILO64_MSVC_INTRIN 1 174 | #else 175 | #define R123_USE_MULHILO64_MSVC_INTRIN 0 176 | #endif 177 | #endif 178 | 179 | #ifndef R123_USE_MULHILO64_CUDA_INTRIN 180 | #define R123_USE_MULHILO64_CUDA_INTRIN 0 181 | #endif 182 | 183 | #ifndef R123_USE_MULHILO64_OPENCL_INTRIN 184 | #define R123_USE_MULHILO64_OPENCL_INTRIN 0 185 | #endif 186 | 187 | #ifndef __STDC_CONSTANT_MACROS 188 | #define __STDC_CONSTANT_MACROS 189 | #endif 190 | #include 191 | #ifndef UINT64_C 192 | #error UINT64_C not defined. You must define __STDC_CONSTANT_MACROS before you #include 193 | #endif 194 | 195 | #pragma warning(disable:4244) 196 | #pragma warning(disable:4996) 197 | 198 | // If you add something, it must go in all the other XXfeatures.hpp 199 | // and in ../ut_features.cpp 200 | #endif 201 | -------------------------------------------------------------------------------- /tests/time_random123.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef TIME_RANDOM123_H__ 33 | #define TIME_RANDOM123_H__ 1 34 | 35 | /* 36 | * This file contains the performance timing kernels for Random123 37 | * RNGs and a few conventional PRNGs, which have been hacked into 38 | * this framework. This code should NOT be considered as an 39 | * example of using PRNGs. A few macros try to keep it 40 | * cross-platform across C (serial or threads), CUDA and OpenCL. 41 | * The TEST_TPL + include util_expandtpl trick is used to 42 | * template-generate kernels for all the different RNGs across 43 | * NxW. 44 | */ 45 | 46 | #include 47 | #include 48 | 49 | #if defined(__OPENCL_VERSION__) 50 | #define KERNEL __kernel 51 | #define MEMTYPE __global 52 | #define xprintf(x) 53 | #elif defined(__CUDA_ARCH__) 54 | #define xprintf(x) 55 | #else 56 | #define xprintf(x) printf x 57 | #endif 58 | 59 | #ifndef KERNEL 60 | #define KERNEL 61 | #endif 62 | 63 | #ifndef MEMTYPE 64 | #define MEMTYPE 65 | #endif 66 | 67 | /* LOOK_AT forces the compiler to actually produce the code that 68 | computes the elements of A. Without it, optimizing compilers can 69 | elide some or all of the computation of A (i.e., the RNG we're 70 | trying to get timings for). There are many ways to do this. A 71 | perhaps more natural way would be to keep a running sum of all the 72 | results so far, but we found that gcc 4.5 and 4.6 could unroll that 73 | code into a fully SSE-ized loop, which appears to be 74 | unrepresentative of the kinds of optimizations that are possible in 75 | practice. I.e., we could not find any "real" use of the output of 76 | the RNG that permitted SSE-ization of the RNG. */ 77 | #define LOOK_AT(A, I, N) do{ \ 78 | if (N==4) if(R123_BUILTIN_EXPECT(!(A.v[N>2?3:0]^A.v[N>2?2:0]^A.v[N>1?1:0]^A.v[0]), 0)) ++I; \ 79 | if (N==2) if(R123_BUILTIN_EXPECT(!(A.v[N>1?1:0]^A.v[0]), 0)) ++I; \ 80 | if (N==1) if(R123_BUILTIN_EXPECT(!(A.v[0]), 0)) ++I; \ 81 | }while(0) 82 | 83 | 84 | /* Macro that will expand later into all the Random123 PRNGs for NxW_R */ 85 | /* XXX AMDAPPSDK 2.4 seemed unhappy with the first arg being uint, but 86 | was ok when it was changed to uint64_t. It's now back to unsigned 87 | because that seems more correct and generic. Nobody's using 2.4 88 | any more, are they? Note that this macro is expanded into CPU, 89 | CUDA and OpenCL "kernels", so it has to be generic. */ 90 | #define TEST_TPL(NAME, N, W, R) \ 91 | KERNEL void test_##NAME##N##x##W##_##R(unsigned n, NAME##N##x##W##_ctr_t ctrinit, NAME##N##x##W##_ukey_t uk, MEMTYPE NAME##N##x##W##_ctr_t *ctr) \ 92 | { \ 93 | unsigned tid = get_global_id(0); \ 94 | unsigned i; \ 95 | NAME##N##x##W##_ctr_t c, v={{0}}; \ 96 | NAME##N##x##W##_key_t k=NAME##N##x##W##keyinit(uk); \ 97 | c = ctrinit; \ 98 | if( R == NAME##N##x##W##_rounds ){ \ 99 | for (i = 0; i < n; ++i) { \ 100 | v = NAME##N##x##W(c, k); \ 101 | LOOK_AT(v, i, N); \ 102 | c.v[0]++; \ 103 | } \ 104 | }else { \ 105 | for (i = 0; i < n; ++i) { \ 106 | v = NAME##N##x##W##_R(R, c, k); \ 107 | /*xprintf(("1: %s k[0] %lx c[0] %lx v[0] %lx\n", #NAME #N "x" #W "_" #R, (unsigned long) k.v[0], (unsigned long) c.v[0], (unsigned long) v.v[0]));*/ \ 108 | /*if (c.v[0] == 0) printline_##NAME##N##x##W##_##R(k, c, &v, 1);*/ \ 109 | LOOK_AT(v, i, N); \ 110 | c.v[0]++; \ 111 | } \ 112 | } \ 113 | ctr[tid] = v; \ 114 | } 115 | 116 | /* Now expand TEST_TPL for all the relevant RNGs */ 117 | #include "util_expandtpl.h" 118 | 119 | #endif /* TIME_RANDOM123_H__ */ 120 | -------------------------------------------------------------------------------- /tests/time_initkeyctr.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef TIME_INITKEYCTR_H__ 33 | #define TIME_INITKEYCTR_H__ 1 34 | 35 | /* 36 | * EXAMPLE_KEY* and EXAMPLE_CTR* values are just arbitrary numbers 37 | * with some bits set, they have no special importance, they do 38 | * not even have to be different. If they are changed, then the 39 | * good_* values further below will need to be updated to match. 40 | */ 41 | 42 | #define EXAMPLE_KEY0 0xdeadbeefU 43 | #define EXAMPLE_KEY1 0x12345678U 44 | #define EXAMPLE_KEY2 0xc0debad1U 45 | #define EXAMPLE_KEY3 0x31415926U 46 | 47 | #define EXAMPLE_CTR0 0x00000000U 48 | #define EXAMPLE_CTR1 0x10000000U 49 | #define EXAMPLE_CTR2 0x20000000U 50 | #define EXAMPLE_CTR3 0x30000000U 51 | 52 | /* 53 | * The magic hex numbers below are just known good values that 54 | * result from the arbitrary EXAMPLE* inputs above. We check that 55 | * we got these results at the end of timing tests to ensure that 56 | * we didn't accidentally let a compiler optimize away some loop. 57 | */ 58 | #if R123_USE_PHILOX_64BIT 59 | static philox2x64_ctr_t good_philox2x64_6 = {{R123_64BIT(0xdd40cdb81af968d2),R123_64BIT(0x0cb57d6d5f7b68dc)}}; 60 | static philox2x64_ctr_t good_philox2x64_10 = {{R123_64BIT(0x539e5b3d18faf5da),R123_64BIT(0x838ca1328d07d3ba)}}; 61 | static philox4x64_ctr_t good_philox4x64_7 = {{R123_64BIT(0xcf492074862957a2),R123_64BIT(0x7057627260938584),R123_64BIT(0x676e23214a14901d),R123_64BIT(0xefa2c5df3848e3fe)}}; 62 | static philox4x64_ctr_t good_philox4x64_10 = {{R123_64BIT(0x1b64f56b381a5a89),R123_64BIT(0x940a282a8add45e1),R123_64BIT(0x53c936376ac7d5df),R123_64BIT(0x6147e87ec9bd9caa)}}; 63 | #endif 64 | static philox4x32_ctr_t good_philox4x32_7 = {{0x40ba6a95,0x799e6a43,0x7dcabe10,0xa7a81636}}; 65 | static philox4x32_ctr_t good_philox4x32_10 = {{0xf16d828e,0xa1c5962d,0xacac820c,0x58113d7a}}; 66 | static threefry4x32_ctr_t good_threefry4x32_12 = {{0xe461db1c,0xfdfa62a7,0x0b10cd2a,0xa3679758}}; 67 | static threefry4x32_ctr_t good_threefry4x32_20 = {{0xf82cf576,0x162ca116,0x3afefe23,0x54cc64ac}}; 68 | #if R123_USE_64BIT 69 | static threefry2x64_ctr_t good_threefry2x64_13 = {{R123_64BIT(0xdf0f096c179ad798),R123_64BIT(0x077862fbaa1a0d11)}}; 70 | static threefry2x64_ctr_t good_threefry2x64_20 = {{R123_64BIT(0xb91153d59815d50e),R123_64BIT(0xdb0dd45e5b0eab81)}}; 71 | static threefry4x64_ctr_t good_threefry4x64_12 = {{R123_64BIT(0x416d1802da0a4a0f),R123_64BIT(0xabd4d80749306281),R123_64BIT(0x62c6b120b542bff0),R123_64BIT(0xefb28dc80c6fc36c)}}; 72 | static threefry4x64_ctr_t good_threefry4x64_20 = {{R123_64BIT(0xad8f0b8c18ed5187),R123_64BIT(0xd80146a6961e1880),R123_64BIT(0x7fce9d950d8acbc4),R123_64BIT(0x782948d5203519f1)}}; 73 | static threefry4x64_ctr_t good_threefry4x64_72 = {{R123_64BIT(0x73ff3f7a0b878f68),R123_64BIT(0x6668f6bbaba83f31),R123_64BIT(0x088eb85d40fbdb56),R123_64BIT(0xd1f39136adc96552)}}; 74 | #endif 75 | 76 | #if R123_USE_AES_NI 77 | static ars4x32_ctr_t good_ars4x32_5 = {{0x279f6b0b, 0xd0b1edf6, 0x6044b433, 0x66c06817}}; 78 | static ars4x32_ctr_t good_ars4x32_7 = {{0xa9cd8055, 0x80272a47, 0x4b7ab914, 0x5351d78e}}; 79 | static aesni4x32_ctr_t good_aesni4x32_10 = {{0x1e68c9fd, 0x347b0858, 0x503d8d91, 0x9e73460a}}; 80 | #endif 81 | 82 | /* 83 | * template code initializes a ukey and counter to known values 84 | * with a known offset and calls a Random123 test function with 85 | * that ukey, ctr and a count and closure. keyctroffset is 86 | * a variable initialized from runtime environment (e.g. argv, argc, 87 | * getenv(), etc) to avoid compile-time optimization 88 | * caused by constants, so we get worst-case numbers. Users may, 89 | * of course, benefit from compile-time optimization if they 90 | * have some constants for key or ctr values. 91 | * 92 | */ 93 | #define TEST_TPL(NAME, N, W, R) \ 94 | if ((strncmp(#NAME, "aes", 3) == 0 || strncmp(#NAME, "ars", 3) == 0) && !haveAESNI()) { \ 95 | printf("AESNI not available on this hardware\n"); \ 96 | } else { \ 97 | NAME##N##x##W##_ukey_t ukey={{0}}; \ 98 | NAME##N##x##W##_ctr_t ctr={{0}}; \ 99 | size_t xi; \ 100 | for (xi = 0; xi < sizeof(ukey)/sizeof(ukey.v[0]); xi++) { \ 101 | switch (xi) { \ 102 | case 0: ukey.v[xi] = EXAMPLE_KEY0+keyctroffset; break; \ 103 | case 1: ukey.v[xi] = EXAMPLE_KEY1+keyctroffset; break; \ 104 | case 2: ukey.v[xi] = EXAMPLE_KEY2+keyctroffset; break; \ 105 | case 3: ukey.v[xi] = EXAMPLE_KEY3+keyctroffset; break; \ 106 | } \ 107 | } \ 108 | for (xi = 0; xi < N; xi++) { \ 109 | switch (xi) { \ 110 | case 0: ctr.v[xi] = EXAMPLE_CTR0+keyctroffset; break; \ 111 | case 1: ctr.v[xi] = EXAMPLE_CTR1+keyctroffset; break; \ 112 | case 2: ctr.v[xi] = EXAMPLE_CTR2+keyctroffset; break; \ 113 | case 3: ctr.v[xi] = EXAMPLE_CTR3+keyctroffset; break; \ 114 | } \ 115 | } \ 116 | NAME##N##x##W##_##R(ctr, ukey, good_##NAME##N##x##W##_##R, count, infop); \ 117 | } 118 | 119 | #include "util_expandtpl.h" 120 | 121 | #endif /* TIME_INITKEYCTR_H__ */ 122 | 123 | 124 | -------------------------------------------------------------------------------- /tests/util_cuda.h: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | #ifndef UTIL_CUDA_H__ 33 | #define UTIL_CUDA_H__ 34 | 35 | #include "util.h" 36 | 37 | #include 38 | #include 39 | 40 | // utility macros to check return codes and complain/exit on failure 41 | #define CHECKLAST(MSG) do { cudaError_t e = cudaGetLastError(); if (e != cudaSuccess) {fprintf(stderr, "%s:%d: CUDA Error: %s: %s\n", __FILE__, __LINE__, (MSG), cudaGetErrorString(e)); exit(1); }} while(0) 42 | #define CHECKCALL(RET) do { cudaError_t e = (RET); if (e != cudaSuccess) { fprintf(stderr, "%s:%d: CUDA Error: %s\n", __FILE__, __LINE__, cudaGetErrorString(e)); exit(1); } } while(0) 43 | 44 | typedef struct cuda_info { 45 | int devnum, cores, blocks_per_grid, threads_per_block; 46 | double cycles; 47 | struct cudaDeviceProp dev; 48 | } CUDAInfo; 49 | 50 | // If devstr is none, chooses device with most cores. 51 | static CUDAInfo *cuda_init(const char *devstr) 52 | { 53 | CUDAInfo *tp; 54 | int i, ndev, cores, devcores, devdev; 55 | double cycles; 56 | CHECKNOTZERO(tp = (CUDAInfo *) malloc(sizeof(CUDAInfo))); 57 | CHECKCALL( cudaGetDeviceCount(&ndev) ); 58 | devcores = 0; 59 | devdev = -1; 60 | if (devstr == NULL) { 61 | devstr = getenv("R123EXAMPLE_ENVCONF_CUDA_DEVICE"); 62 | if (devstr && devstr[0] >= '0' && devstr[0] <= '9' && devstr[1] == '\0') { 63 | devdev = devstr[0]-'0'; 64 | } 65 | } 66 | for (i = 0; i < ndev; i++) { 67 | struct cudaDeviceProp cu; 68 | CHECKCALL( cudaGetDeviceProperties (&cu, i) ); 69 | // Number of cores is not available from a query, have to hardwire 70 | // some knowledge here, from web articles about the various generations 71 | // SM or SMX, might also find this info in 72 | // CUDA SDK $CUDA_SAMPLES_DIR/common/inc/helper_cuda.h 73 | // or https://github.com/NVIDIA/nvidia-docker/blob/master/tools/src/cuda/cuda.go 74 | cores = cu.multiProcessorCount; 75 | if (cu.major == 1 && cu.minor >= 0 && cu.minor <= 3) { 76 | // 1.0 (G80, G92, aka GTX880, Tesla [CSD]870) to 1.3 (GT200, aka GTX280, Tesla [CS]10xx) have 8 cores per MP 77 | cores *= 8; 78 | } else if (cu.major == 2 && cu.minor == 0) { 79 | // 2.0 (G100, aka GTX480, Tesla/Fermi [CSM]20[567]0, and GF110, aka GTX580, M2090) 80 | cores *= 32; 81 | } else if (cu.major == 2 && cu.minor == 1) { 82 | // 2.1 (GF104, GF114, GF116 aka GTX [45][56]0) 83 | cores *= 48; 84 | } else if (cu.major == 3) { 85 | // 3.0 (Kepler GK104 aka GTX 680), 3.2 (TK1), 3.5 (GK11x, GK20x), 3.7 (GK21x) 86 | cores *= 192; 87 | } else if (cu.major == 5) { 88 | // 5.0 (Maxwell GM10x), 5.2 (GM20x), 5.3 (TX1) 89 | cores *= 128; 90 | } else if (cu.major == 6 && cu.minor == 0) { 91 | // 6.0 (Pascal P100) 92 | cores *= 64; 93 | } else if (cu.major == 6) { 94 | // 6.1 (Pascal 10xx, Titan Xp, P40), 6.2 (Drive PX2 and Tegra) 95 | cores *= 128; 96 | } else if (cu.major == 7) { 97 | // 7.[05] (Volta and Turing RTX 20[678]0, Titan RTX, Quadro RTX), 7.2 (Xavier Jetson) 98 | cores *= 64; 99 | } else if (cu.major == 8 && cu.minor == 0) { 100 | // 8.0 (Ampere A100) 101 | cores *= 64; 102 | } else if (cu.major == 8 && cu.minor == 6) { 103 | // 8.6 (Ampere RTX 30[56789]0, A[23456]000, A45000) 104 | cores *= 128; 105 | } else { 106 | int coremultguess = 384; 107 | cores *= coremultguess; 108 | fprintf(stderr, "WARNING: Unknown number of cores per MP for this device: assuming %d, so cpb calculation will be wrong and choice of blocks/grid might be suboptimal\n", coremultguess); 109 | } 110 | /* clockrate is in KHz */ 111 | cycles = 1e3 * cu.clockRate * cores; 112 | printf(" %d: maj %d min %d %s%s ( %d units @ %g MHz ECC=%d %d cores %g Gcycles/s)\n", 113 | i, cu.major, cu.minor, cu.name, cu.integrated ? " integrated" : "", 114 | cu.multiProcessorCount, cu.clockRate*1e-3, cu.ECCEnabled, cores, cycles*1e-9); 115 | if ((devstr && strstr(cu.name, devstr) == NULL )|| 116 | devdev >= 0 && i != devdev) { 117 | dprintf(("skipping device %s\n", cu.name)); 118 | continue; 119 | } 120 | if (cores > devcores) { 121 | devcores = cores; 122 | tp->devnum = i; 123 | tp->cores = cores; 124 | tp->cycles = cycles; 125 | tp->dev = cu; 126 | } 127 | } 128 | if (devcores == 0) { 129 | fprintf(stderr, "could not find specified device\n"); 130 | exit(1); 131 | } 132 | tp->blocks_per_grid = tp->cores; /* seems like a good guess */ 133 | tp->threads_per_block = tp->dev.warpSize * 2; 134 | printf("Using CUDA device %d, %d cores, %g cycles, will try %d blocks/grid %d threads/block\n", 135 | tp->devnum, tp->cores, tp->cycles, tp->blocks_per_grid, tp->threads_per_block); 136 | CHECKCALL(cudaSetDevice(tp->devnum)); 137 | dprintf(("cuda_init done\n")); 138 | return tp; 139 | } 140 | 141 | static void cuda_done(CUDAInfo *tp) 142 | { 143 | dprintf(("cuda_done\n")); 144 | free(tp); 145 | } 146 | 147 | #endif /* UTIL_CUDA_H__ */ 148 | -------------------------------------------------------------------------------- /tests/time_serial.c: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2010-2011, D. E. Shaw Research. 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | * Redistributions of source code must retain the above copyright 10 | notice, this list of conditions, and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions, and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | * Neither the name of D. E. Shaw Research nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | */ 32 | /* 33 | * Single-core CPU test and timing harness for Random123 RNGs. 34 | * Uses macros and util_expandtpl.h to "templatize" over all the 35 | * different permutations of RNGs and NxW and R. 36 | */ 37 | #include "util.h" 38 | 39 | #include "Random123/philox.h" 40 | #include "Random123/threefry.h" 41 | #include "Random123/ars.h" 42 | #include "Random123/aes.h" 43 | 44 | #include "time_misc.h" 45 | #include "util_print.h" 46 | 47 | #include "util.h" 48 | #define KERNEL R123_STATIC_INLINE 49 | #define get_global_id(i) (i) 50 | #include "time_random123.h" 51 | 52 | static double cpu_hz = -1.; 53 | 54 | #define TEST_TPL(NAME, N, W, R) \ 55 | void NAME##N##x##W##_##R(NAME##N##x##W##_ctr_t ctr, NAME##N##x##W##_ukey_t ukey, NAME##N##x##W##_ctr_t kactr, unsigned count, void* unused) \ 56 | { \ 57 | const char *kernelname = #NAME #N "x" #W "_" #R; \ 58 | double cur_time; \ 59 | int n, niterations = numtrials; /* we make niterations + 2 (warmup, overhead) calls to the kernel */ \ 60 | unsigned kcount = 0; \ 61 | double basetime = 0., dt = 0., mindt = 0.; \ 62 | NAME##N##x##W##_ctr_t C, *hC = &C; \ 63 | (void)unused; /* suppress warning */ \ 64 | \ 65 | for (n = -2; n < niterations; n++) { \ 66 | if (n == -2) { \ 67 | if (count == 0) { \ 68 | /* try to set a good guess for count */ \ 69 | count = 1000000; \ 70 | dprintf(("starting with count = %u\n", count)); \ 71 | } \ 72 | kcount = count; \ 73 | } else if (n == -1) { \ 74 | /* use first iteration time to calibrate count to get approximately sec_per_trial */ \ 75 | if (count > 1) { \ 76 | count = (unsigned)(count * sec_per_trial / dt); \ 77 | dprintf(("scaled count = %u\n", count)); \ 78 | } \ 79 | /* second iteration is to calculate overhead after warmup */ \ 80 | kcount = 1; \ 81 | } else if (n == 0) { \ 82 | int xj; \ 83 | /* Check that we got the expected value */ \ 84 | for (xj = 0; xj < N; xj++) { \ 85 | if (kactr.v[xj] != hC[0].v[xj]) { \ 86 | printf("%s mismatch: xj = %d, expected\n", kernelname, xj); \ 87 | printline_##NAME##N##x##W##_##R(ukey, ctr, &kactr, 1); \ 88 | printf(" but got\n"); \ 89 | printline_##NAME##N##x##W##_##R(ukey, ctr, hC, 1); \ 90 | if(!debug) exit(1); \ 91 | else break; \ 92 | } else { \ 93 | dprintf(("%s matched word %d\n", kernelname, xj)); \ 94 | } \ 95 | } \ 96 | basetime = dt; \ 97 | if (debug||verbose) { \ 98 | dprintf(("%s %.3f secs\n", kernelname, basetime)); \ 99 | printline_##NAME##N##x##W##_##R(ukey, ctr, hC, 1); \ 100 | } \ 101 | kcount = count + 1; \ 102 | } \ 103 | /* calling timer *before* dprintf avoids an ARMv7 gcc 4.8.3 -O3 compiler bug! */ \ 104 | (void)timer(&cur_time); \ 105 | dprintf(("call function %s\n", kernelname)); \ 106 | test_##NAME##N##x##W##_##R(kcount, ctr, ukey, hC); \ 107 | dt = timer(&cur_time); \ 108 | dprintf(("iteration %d took %.3f secs\n", n, dt)); \ 109 | ALLZEROS(hC, 1, N); \ 110 | if (n == 0 || dt < mindt) mindt = dt; \ 111 | } \ 112 | if (count > 1) { \ 113 | double tpB = (mindt - basetime) / ( (kcount - 1.) * (N * W / 8.) ); \ 114 | if(cpu_hz > 0.) \ 115 | printf("%-17s %#5.3g cpB %#5.3g GB/s %u B granularity (best %u in %.3f s - %.6f s)\n", \ 116 | kernelname, tpB*cpu_hz, 1e-9/tpB, \ 117 | (unsigned)(N*W/8), kcount, mindt, basetime ); \ 118 | else \ 119 | printf("%-17s %#5.3g GB/s %u B granularity (best %u in %.3f s - %.6f s)\n", \ 120 | kernelname, 1e-9/tpB, \ 121 | (unsigned)(N*W/8), kcount, mindt, basetime ); \ 122 | fflush(stdout); \ 123 | } \ 124 | } 125 | 126 | #include "util_expandtpl.h" 127 | 128 | int main(int argc, char **argv) 129 | { 130 | char *cp; 131 | unsigned count = 0; 132 | int keyctroffset = 0; 133 | void* infop = NULL; 134 | 135 | progname = argv[0]; 136 | if (argc > 3|| (argv[1] && argv[1][0] == '-')) { 137 | fprintf(stderr, "Usage: %s [COUNT]]\n", progname); 138 | exit(1); 139 | } 140 | if (argc > 1) 141 | count = atoi(argv[1]); 142 | if ((cp = getenv("TIME_SERIAL_CPU_GHZ")) != NULL) { 143 | cpu_hz = 1.e9 * atof(cp); 144 | } 145 | if ((cp = getenv("TIME_SERIAL_VERBOSE")) != NULL) { 146 | verbose = atoi(cp); 147 | } 148 | if ((cp = getenv("TIME_SERIAL_DEBUG")) != NULL) { 149 | debug = atoi(cp); 150 | } 151 | if ((cp = getenv("TIME_SERIAL_OFFSET")) != NULL) { 152 | keyctroffset = atoi(cp); 153 | } 154 | if ((cp = getenv("TIME_SERIAL_NUMTRIALS")) != NULL) { 155 | numtrials = atoi(cp); 156 | } 157 | if ((cp = getenv("TIME_SERIAL_SEC_PER_TRIAL")) != NULL) { 158 | sec_per_trial = atof(cp); 159 | } 160 | # include "time_initkeyctr.h" 161 | return 0; 162 | } 163 | --------------------------------------------------------------------------------