├── .gitignore ├── README.md ├── exercises ├── sheet_01.md ├── sheet_02.md ├── sheet_03.md ├── sheet_04.md ├── sheet_05.md ├── sheet_06.md ├── sheet_07.md ├── sheet_08.md ├── sheet_09.md ├── sheet_10.md ├── sheet_11.md └── sheet_12.md ├── larger_samples ├── README.md ├── npb_bt │ ├── CMakeLists.txt │ └── src │ │ ├── a │ │ └── npbparams.h │ │ ├── add.c │ │ ├── adi.c │ │ ├── b │ │ └── npbparams.h │ │ ├── bt.c │ │ ├── c │ │ └── npbparams.h │ │ ├── c_print_results.c │ │ ├── c_timers.c │ │ ├── error.c │ │ ├── exact_rhs.c │ │ ├── exact_solution.c │ │ ├── header.h │ │ ├── initialize.c │ │ ├── print_results.c │ │ ├── print_results.h │ │ ├── randdp.c │ │ ├── randdp.h │ │ ├── rhs.c │ │ ├── s │ │ └── npbparams.h │ │ ├── set_constants.c │ │ ├── solve_subs.c │ │ ├── timers.h │ │ ├── type.h │ │ ├── verify.c │ │ ├── w │ │ └── npbparams.h │ │ ├── work_lhs.h │ │ ├── wtime.c │ │ ├── wtime.h │ │ ├── x_solve.c │ │ ├── y_solve.c │ │ └── z_solve.c └── ssca2 │ ├── CMakeLists.txt │ └── src │ ├── SSCA2.c │ ├── betweennessCentrality.c │ ├── computeGraph.c │ ├── defs.h │ ├── findSubGraphs.c │ ├── gen2DTorus.c │ ├── genScalData.c │ ├── getStartLists.c │ ├── init.c │ ├── sprng2.0 │ ├── checkid.c │ ├── checkid.h │ ├── cmrg.c │ ├── cmrg.h │ ├── communicate.c │ ├── cputime.c │ ├── cputime.h │ ├── fwrap.h │ ├── fwrap_.h │ ├── fwrap_mpi.c │ ├── include │ │ ├── sprng.h │ │ ├── sprng_f.h │ │ └── sprng_interface.h │ ├── int64.h │ ├── lcg.c │ ├── lcg.h │ ├── lcg64.c │ ├── lcg64.h │ ├── lfg.c │ ├── lfg.h │ ├── makeseed.c │ ├── memory.c │ ├── memory.h │ ├── mlfg.c │ ├── mlfg.h │ ├── multiply.h │ ├── primelist_32.h │ ├── primelist_64.h │ ├── primes_32.c │ ├── primes_32.h │ ├── primes_64.c │ ├── primes_64.h │ ├── simple_.h │ ├── simple_mpi.c │ ├── sprng.c │ ├── sprng.h │ ├── sprng_f.h │ ├── sprng_interface.h │ ├── store.c │ └── store.h │ └── utils.c ├── lcc3_helpers ├── README.md ├── job_example.sh └── modules.sh ├── lua └── fib.lua ├── slides ├── 01_introduction.pdf ├── 02_experimentation.pdf ├── 03_perf_evaluation.pdf ├── 04_optimization_overview.pdf ├── 05_optimization_memory.pdf ├── 06_optimization_data_structures.pdf ├── 07_optimization_algorithms.pdf └── extra_lcc3_info.pdf ├── small_samples ├── CMakeLists.txt ├── README.md ├── delannoy │ └── delannoy.c ├── filegen │ └── filegen.c ├── filesearch │ └── filesearch.c ├── mmul │ └── mmul.c ├── nbody │ └── nbody.c └── qap │ ├── problems │ ├── chr10a.dat │ ├── chr12a.dat │ ├── chr12b.dat │ ├── chr12c.dat │ ├── chr15a.dat │ ├── chr15b.dat │ ├── chr15c.dat │ ├── chr18a.dat │ ├── chr18b.dat │ ├── chr20a.dat │ ├── chr20b.dat │ ├── chr20c.dat │ ├── chr22a.dat │ ├── chr22b.dat │ └── chr25a.dat │ └── qap.c └── tools ├── CMakeLists.txt ├── load_generator ├── exec_with_workstation_heavy.sh ├── loadcapture.c ├── loadgen.c ├── loadprofile.c ├── loadproplot.gnu ├── loadtest.c ├── loadutils.c ├── loadutils.h ├── mc3 │ ├── loadprofile_mc3.dat │ ├── loadprofile_mc3.png │ ├── loadprofile_mc3.xlsx │ ├── loadprofile_mc3_03.txt │ ├── loadprofile_mc3_09.txt │ ├── loadprofile_mc3_13.txt │ ├── loadprofile_mc3_17.txt │ └── loadprofile_mc3_summary.txt ├── synth │ └── 0_1_altern_static.txt └── workstation │ ├── Thumbs.db │ ├── sys_load_profile_workstation.txt │ ├── sys_load_profile_workstation_excerpt.txt │ └── sysload_workstation.png └── malloctest └── malloctest.c /.gitignore: -------------------------------------------------------------------------------- 1 | small_samples/build 2 | small_samples/build_debug 3 | small_samples/build_release 4 | larger_samples/npb_bt/build 5 | larger_samples/npb_bt/build_debug 6 | larger_samples/npb_bt/build_release 7 | larger_samples/ssca2/build 8 | larger_samples/ssca2/build_debug 9 | larger_samples/ssca2/build_release 10 | tools/build 11 | tools/build_debug 12 | tools/build_release 13 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Performance-oriented Computing 2 | ============================== 3 | 4 | This repo contains resources for the UIBK course. -------------------------------------------------------------------------------- /exercises/sheet_01.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 1 2 | ================ 3 | 4 | A) Preparation 5 | -------------- 6 | 7 | Take a look at the programs in `small_samples`. Build them, determine how to run each program, what parameters it needs and how they are set, and how the workload can be scaled. 8 | 9 | For each program, measure the real (wall clock) time, CPU time, system time and maximum memory usage for specific executions. Empirically select a suitable set of execution parameters for each program. 10 | Document your results, and provide an argument for why you chose a specific set of parameters. 11 | 12 | > *Hint* 13 | > The program `/bin/time` can provide all the requested metrics. 14 | 15 | B) Experiments 16 | -------------- 17 | 18 | Create a simple automated experiment setup, e.g. using your favourite scripting language. All programs should be executed, each with a specified number of repetitions, and the output should include the mean of the requested performance metrics, as well as the variance. All raw data should also be stored in a structured fashion for later use. 19 | 20 | Provide the results for each benchmark, both on one of your personal compute platforms (describe it!) as well as on the LCC3 cluster. 21 | 22 | Submission 23 | ---------- 24 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet1]" in the subject line, before the start of the next VU at the latest. 25 | Try not to include attachments with a total size larger than 2 MiB. 26 | -------------------------------------------------------------------------------- /exercises/sheet_02.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 2 2 | ================ 3 | 4 | **Don't run either of these two exercises on the LCC3 headnode!** 5 | 6 | A) External CPU load 7 | -------------------- 8 | 9 | Have a look at `loadgen` in the tools folder. 10 | Use it (e.g. `exec_with_workstation_heavy.sh`) to generate external load while benchmarking and track the changes this causes to your measured performance metrics. 11 | 12 | Improve your experiment setup/script from the first exercise to dynamically adjust the number of repetitions performed until some statistical confidence interval is reached, and present your results. 13 | 14 | 15 | B) External I/O load 16 | -------------------- 17 | 18 | Create a load generator which produces a configurable I/O load on the file system. 19 | Document its working principle, and demonstrate its impact on the speed of the I/O-bound benchmarks in `small_samples`. 20 | 21 | **On the cluster, use the local file system for all FS benchmarking!** 22 | Do **not** use `/home` or `/scratch`. 23 | 24 | Submission 25 | ---------- 26 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet2]" in the subject line, before the start of the next VU at the latest. 27 | Try not to include attachments with a total size larger than 2 MiB. 28 | -------------------------------------------------------------------------------- /exercises/sheet_03.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 3 2 | ================ 3 | 4 | A) Traditional profiling 5 | ------------------------ 6 | 7 | In `larger_samples/npb_bt` you find a somewhat larger C program. There are 5 variants generated at compile time, named `_s`, `_w`, `_a`, `_b`, `_c`, representing successively larger workloads in this order. 8 | 9 | Use traditional profiling with `gprof` in order to gain insight into the CPU time distribution of this program, on both your own hardware and LCC3, for at least 2 different workload sizes. 10 | 11 | Summarize, interpret and report your findings. 12 | 13 | 14 | B) Hybrid trace profiling 15 | ------------------------- 16 | 17 | Use the Tracy profiler to profile the same application. Think about what might be a good mapping for the concept of a "frame" in the context of the given program. Also annotate all functions or individual code regions which seem interesting (e.g. based on the traditional flat profile). 18 | 19 | Try to measure the overhead of both gprof and Tracy, compared to the native execution without any profiling information. 20 | 21 | You do not necessarily need to do this on LCC3, but can do so for extra clout. 22 | 23 | 24 | Submission 25 | ---------- 26 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet3]" in the subject line, before the start of the next VU at the latest. 27 | Try not to include attachments with a total size larger than 2 MiB. 28 | -------------------------------------------------------------------------------- /exercises/sheet_04.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 4 2 | ================ 3 | 4 | For this exercise, perform all measurments on LCC3. You can of course (and probably should) run analysis tools (e.g. massif-visualizer) locally. 5 | 6 | A) Memory profiling 7 | ------------------- 8 | 9 | In `larger_samples` you find two somewhat larger C programs. The first one you are already familiar with. The second one, "SSCA2", takes a "scale" as its command line parameter -- use `ssca2 17` for this exercise. 10 | 11 | For both programs: 12 | * Use the valgrind "massif" tool in Valgrind to determine the largest sources of heap memory utilization, and visualize the results with "massif-visualizer". 13 | * How significant is the perturbation in execution time caused by using massif? 14 | 15 | Summarize, interpret and report your findings. 16 | 17 | B) Measuring CPU counters 18 | ------------------------- 19 | 20 | For both programs, measure **all** events in the `[Hardware cache event]` category reported by `perf list`. Note that as discussed in the lecture, there is a limit on the number of hardware counters you can measure in a single run. 21 | 22 | For both programs: 23 | * Report the results in **relative** metrics, and compare these between the programs. 24 | * How significant is the perturbation in execution time caused by using perf to measure performance counters? 25 | 26 | Summarize, interpret and report your findings. 27 | 28 | 29 | Submission 30 | ---------- 31 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet4]" in the subject line, before the start of the next VU at the latest. 32 | Try not to include attachments with a total size larger than 2 MiB. 33 | -------------------------------------------------------------------------------- /exercises/sheet_05.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 5 2 | ================ 3 | 4 | Use the GCC 12 installation available in a module on LCC3 for this exercise (`gcc version 12.2.0 (Spack GCC)` / module `gcc/12.2.0-gcc-8.5.0-p4pe45v`). 5 | 6 | For benchmarking, use both of our `larger_samples` testing programs, as well as `mmul`, `nbody`, `qap` and `delannoy`. 7 | Use the following configurations: 8 | ``` 9 | mmul: default (S=1000) 10 | nbody: M=400, others default 11 | qap: chr15c.dat 12 | delannoy: 13 13 | 14 | npb_bt: W 15 | ssca: 15 16 | ``` 17 | 18 | **NOTE**: You can query the available optimization options in GCC with 19 | ```bash 20 | gcc -Q --help=optimizers 21 | ``` 22 | This will also list their default setting. 23 | To get the setting of individual options at a given level of overall optimization, you can set that flag. E.g. you can use 24 | ```bash 25 | gcc -Q --help=optimizers -O2 26 | ``` 27 | to see the default values at `-O2`. 28 | 29 | 30 | A) Basic Optimization Levels 31 | ---------------------------- 32 | 33 | Benchmark each of the test cases at the `-O0`, `-O1`, `-O2`, `-O3`, `-Os` and `-Ofast` optimization levels. 34 | 35 | Visualize and discuss the results. 36 | 37 | 38 | B) Individual Compiler Optimizations 39 | ------------------------------------ 40 | 41 | Determine the set of compiler flags which changes when going from `-O2` to `-O3`. It is relatively small (at least compared to the total available amount of options). 42 | 43 | Write a script or program which, starting from `-O2`, individually toggles these options to their `-O3` levels, trying all the options in turn and recording the resulting performance. 44 | 45 | - Which of the options is the most meaningful for each of the programs? 46 | - For the top 3 most impactful options across all test cases, read up on exactly what they do and try to reason about why they are effective for the given test cases. 47 | 48 | 49 | C) Autotuning (optional) 50 | ------------------------ 51 | 52 | Extend your script to search for the best compiler configuration for each program. 53 | The exact mechanism of this search is up to you. 54 | 55 | Can you find a configuration for any of the programs which is better than any default (`-O*`) configuration to a statistically meaningful extent? 56 | 57 | 58 | Submission 59 | ---------- 60 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet5]" in the subject line, before the start of the next VU at the latest. 61 | Try not to include attachments with a total size larger than 2 MiB. 62 | -------------------------------------------------------------------------------- /exercises/sheet_06.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 6 2 | ================ 3 | 4 | A) MMUL tiling 5 | -------------- 6 | 7 | Take the "mmul" small sample program and apply a tiling optimization to its main computation loop nest. 8 | Think about which loop(s) to tile in order to achieve a meaningful performance benefit, and argue why your choice makes sense in terms of reuse distance reduction. 9 | 10 | Test various tiling options on LCC3 (either manually or in an automated fashion) and report the results. Attempt to provide an explanation for the best parameter choices you found. 11 | 12 | Note: use a **2048²** matrix for this benchmark. 13 | 14 | 15 | B) Cache investigation 16 | ---------------------- 17 | 18 | Think about (and/or research) how you would implement a benchmark to measure cache latencies over progressively larger memory blocks, as seen in the lecture on memory optimization. Precisely explain its working principle and how it determines access latency while avoiding unintended effects. 19 | 20 | 21 | C) Cache benchmark (optional) 22 | ----------------------------- 23 | 24 | Implement your idea from B). Use the resulting program to measure and plot the access latency on LCC3 compute nodes for blocks of size 512 Byte to 16 MiB, in powers of 2. 25 | 26 | 27 | Submission 28 | ---------- 29 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet6]" in the subject line, before the start of the next VU at the latest. 30 | Try not to include attachments with a total size larger than 2 MiB. 31 | -------------------------------------------------------------------------------- /exercises/sheet_07.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 7 2 | ================ 3 | 4 | All measurements and compilation for this sheet should be performed on LCC3 (as always, benchmarks should be scheduled using Slurm, with exclusive node assignment). 5 | 6 | 7 | A) Preloading General Allocators 8 | -------------------------------- 9 | 10 | Fetch and build RPMalloc and MiMalloc from github: 11 | 12 | - https://github.com/mjansson/rpmalloc 13 | - https://github.com/microsoft/mimalloc 14 | 15 | We will use preloading (`LD_PRELOAD`) to replace the memory allocator in a large application with lots of small allocations, namely the Clang/LLVM compiler (`module load llvm/15.0.4-python-3.10.8-gcc-8.5.0-bq44zh7`). 16 | 17 | As our benchmark, we'll compile the allscale_api project. To prepare, run this (I suggest doing so in `scratch`). 18 | 19 | ```bash 20 | git clone https://github.com/allscale/allscale_api.git 21 | mkdir build 22 | cmake -DCMAKE_BUILD_TYPE=Release -G Ninja ../code 23 | ``` 24 | 25 | For each benchmark run, first perform a `ninja clean`, and then measure the time and peak memory consumption of `ninja`. 26 | **Note**: As always, you should benchmark by submitting a job to a node. To eliminate overhead to shared network file infrastructure, 27 | copy the input source code to the local file system (i.e. `/tmp`) and run the compilation with that as the source. 28 | 29 | Perform 3 sets of benchmarks: 30 | 31 | 1. No allocator preloading 32 | 2. Preloading the RPMalloc allocator 33 | 3. Preloading the MiMalloc allocator 34 | 35 | Chart and report the CPU time, wall time and peak memory consumption for each of these. 36 | 37 | 38 | B) Implementing a special-purpose allocator 39 | ------------------------------------------- 40 | 41 | For this exercise, we are using the benchmark found in `tools/malloctest`. 42 | This is a very simple memory allocator benchmark which repeatedly performs a sequence of random allocations, and then frees them all. 43 | 44 | Implement a bump allocator working on an arena, and use it with this benchmark. 45 | 46 | Report the benchmark time for the default allocator, and your bump allocator, for a call to 47 | ```bash 48 | ./malloctest 1 500 1000000 10 1000 49 | ``` 50 | 51 | 52 | Submission 53 | ---------- 54 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet7]" in the subject line, before the start of the next VU at the latest. 55 | Try not to include attachments with a total size larger than 2 MiB. 56 | -------------------------------------------------------------------------------- /exercises/sheet_08.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 8 2 | ================ 3 | 4 | A) False Sharing 5 | ---------------- 6 | 7 | Have a look at this git PR: 8 | https://github.com/KhronosGroup/Vulkan-ValidationLayers/pull/5587 9 | 10 | Explain the problem it tries to solve, and how it attempts to do so. 11 | 12 | 13 | B) Data Structure Selection 14 | --------------------------- 15 | 16 | Search on Github for a merged pull request in a reasonably sized and popular project (>100 stars) which replaces a data structure in order to improve performance. 17 | 18 | Examine the use of this data structure, evaluating all the decision criteria discussed in the lecture, and report your findings. 19 | Do these criteria help indicate that the change in data structure would be beneficial? 20 | 21 | 22 | Submission 23 | ---------- 24 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet8]" in the subject line, before the start of the next VU at the latest. 25 | Try not to include attachments with a total size larger than 2 MiB. 26 | -------------------------------------------------------------------------------- /exercises/sheet_09.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 9 2 | ================ 3 | 4 | A) Data Structure Benchmark Design and Implementation 5 | ----------------------------------------------------- 6 | 7 | Create a benchmark setup which can measure a given set of mixes of insertion/deletion and read/write operations on different containers of arbitrary types and size. (Compare with the "The Dangers of Intuition" slide in the Optimization / Data Structures chapter) 8 | For the insertion/deletion and read/write categories, ensure a 50%/50% frequency each (i.e., after an even number of insertion/deletion operations the number of elements is the same as at the start). Also implement the requested instruction mix using the minimum spacing possible, that is, for a total of 100 instructions, a ratio of 20% ins/del and 80% read/write should be implemented as 9 | 10 | ``` 11 | read 12 | write 13 | read 14 | write 15 | insertion 16 | read 17 | write 18 | read 19 | write 20 | deletion 21 | ... repeat x 10 22 | ``` 23 | 24 | rather than e.g. 25 | 26 | ``` 27 | read 28 | write 29 | ... repeat x 40 30 | insertion 31 | deletion 32 | ... repeat x 10 33 | ``` 34 | 35 | Make sure to **follow all the best practices regarding benchmark design which we outlined in the lecture**, and try to avoid any perturbation unrelated to the effect we want to measure. 36 | 37 | Some additional notes on implementation specifics: 38 | 39 | * Pre-initialize all the data structures to N elements (you may reserve N+1 for the array) 40 | * Perform linear traversal, wrapping around at the end (you can also, optionally and additionally, benchmark operations at random locations) 41 | * You are free to select a programming language, but please choose one that compiles a priori to machine code 42 | * For the write and read operations, ensure that the values are used in some way and not compile-time constants, and that the compiler can not throw away the operations 43 | * Your number of benchmark iterations should not necessarily be equal to the number of elements of the data structure! E.g. for the 10 and 1000 element cases it should be much larger; one option is to implement a fixed time frame for each benchmark and measure the number of operations completed during that time 44 | * For linked lists, to simulate a more realistic use case, allocate and free a new element each time insertion/deletion happens (you can also, optionally and additionally, benchmark using a single re-used element) 45 | * For linked lists, implement 2 different allocation policies: one which allocates the elements in sequence (as in, how they will be connected and traversed), and one which allocates them in a random order. 46 | 47 | 48 | B) Array-like vs. Linked Lists 49 | ------------------------------ 50 | 51 | Apply your benchmark over the cross product of the following settings. Exclude combinations which would run out of memory. 52 | Run these benchmarks **on LCC3**, and also on some local hardware available to you. 53 | 54 | **Data Structures**: Array, linked list 55 | 56 | **Instruction Mix**: 57 | | ins/del | read/write | 58 | | -------:| ----------:| 59 | | 0% | 100% | 60 | | 1% | 99% | 61 | | 10% | 90% | 62 | | 50% | 50% | 63 | 64 | **Element Size**: 8 Byte, 512 Byte, 8 MB 65 | 66 | **Number of Elements**: 10, 1000, 100000, 10000000 67 | 68 | Plot and discuss your results. 69 | 70 | Submission 71 | ---------- 72 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet9]" in the subject line, before the start of the next VU at the latest. 73 | Try not to include attachments with a total size larger than 2 MiB. 74 | -------------------------------------------------------------------------------- /exercises/sheet_10.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 10 2 | ================= 3 | 4 | This exercise sheet should be worked on in groups. 5 | Perform all benchmarks on LCC3, using the benchmark structure defined in sheet 9. 6 | 7 | 8 | A) Unrolled Linked Lists 9 | ------------------------ 10 | 11 | Implement an "unrolled linked list" data structure (i.e. a linked list of potentially partially empty fixed-size chunks). Evaluate different chunk sizes, and benchmark it compared to arrays and standard linked lists. 12 | 13 | 14 | B) Tiered Arrays 15 | ---------------- 16 | 17 | Implement a "tiered array" data structure (i.e. a fixed set of indirection layers [at minimum one] which links to chunks of an array). Evaluate different configurations, and benchmark it compared to arrays and standard linked lists. 18 | 19 | 20 | C) Extended Benchmarking 21 | ------------------------ 22 | 23 | Extend the list benchmark setup with an option to use a random access pattern (make sure to minimize the overhead of selecting the random locations, e.g. by precomputing them!). 24 | 25 | Evaluate and plot how this change affects the performance of the two baselines (arrays and linked lists) as well as your new, more complex data structures. 26 | 27 | 28 | Submission 29 | ---------- 30 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet10]" in the subject line, before the start of the next VU at the latest. 31 | Include your group composition in the email. 32 | Try not to include attachments with a total size larger than 2 MiB. 33 | -------------------------------------------------------------------------------- /exercises/sheet_11.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 11 2 | ================= 3 | 4 | This exercise sheet should be worked on **individually**. 5 | Perform all benchmarks on LCC3. 6 | 7 | 8 | A) Applying Memoization (optional) 9 | ---------------------------------- 10 | 11 | Apply basic hash-based memoization to `small_samples/delannoy` and benchmark your implementation. 12 | 13 | * What level of performance improvement can you achieve, both theoretically and practically? 14 | * What is the space complexity of your optimized version in terms of the parameters `x` and `y`? 15 | 16 | 17 | B) Algorithm Tabulation (optional) 18 | ---------------------------------- 19 | 20 | Use dynamic programming tabulation to implement the `delannoy` benchmark while only requiring `O(x)` additional space and no hashing. Benchmark this solution and compare the results to basic hash-based memoization. 21 | 22 | 23 | Submission 24 | ---------- 25 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet11]" in the subject line, before the start of the next VU at the latest. 26 | Try not to include attachments with a total size larger than 2 MiB. 27 | -------------------------------------------------------------------------------- /exercises/sheet_12.md: -------------------------------------------------------------------------------- 1 | Exercise Sheet 12 (final) 2 | ========================= 3 | 4 | This exercise sheet about the Lua interpreter should be worked on **in groups**. 5 | Perform all benchmarks on LCC3. 6 | 7 | 8 | A) Setup and Basic Execution 9 | ---------------------------- 10 | 11 | Get the latest stable baseline Lua interpreter sources here: https://www.lua.org/download.html 12 | Compile it, and use it to run the benchmarks in `lua/fib.lua` ["the benchmark"], i.e: 13 | 14 | ``` 15 | lua perf-oriented-dev/lua/fib.lua 16 | ``` 17 | 18 | Note the timing results you get for these initial runs, we will use them as our baseline. 19 | 20 | 21 | B) Profiling 22 | ------------ 23 | 24 | * Apply a profiler of your choice to study the behaviour of the Lua interpreter while running the benchmark. 25 | * Is the result sufficient to base optimization decisions on? Why? If not, use other tools or manually track and report additional information about the execution. 26 | 27 | Report and discuss your results. 28 | 29 | 30 | C) Code Understanding 31 | --------------------- 32 | 33 | * Describe the overall process of Lua execution in the interpreter. What are the major phases, and how much time do they take for the benchmark? 34 | * What does the option `LUA_USE_JUMPTABLE` do? Measure its performance impact on the benchmark. 35 | 36 | 37 | D) Optimization 38 | --------------- 39 | 40 | Optimize the Lua interpreter to more efficiently execute the benchmark. 41 | Valid strategies include: 42 | 43 | * Compiler optimizations or hints 44 | * Any manual procedural or algorithmic optimizations 45 | * Making suitable assumptions / implementing heuristics based on the properties of the benchmark 46 | 47 | **Invalid** strategies are: 48 | 49 | * Anything which checks the source code (or its hash etc) against a table of pre-built or pre-optimized solutions 50 | * Anything which touches the input program 51 | * Obviously, anything which breaks the interpreter for any other valid Lua program 52 | 53 | Your tuned interpreters' best times for all 3 benchmarks will be compared against all other groups' times. 54 | 55 | 56 | Submission 57 | ---------- 58 | Please submit your solutions by email to peter.thoman at UIBK, using the string "[Perf2025-sheet12]" in the subject line, before the start of the next VU at the latest. 59 | Include your group composition in the email. 60 | Try not to include attachments with a total size larger than 2 MiB. 61 | -------------------------------------------------------------------------------- /larger_samples/README.md: -------------------------------------------------------------------------------- 1 | This folder contains some slightly larger C benchmark codes. 2 | 3 | Basic build instructions (from each individual benchmark directory): 4 | 5 | ```bash 6 | mkdir build 7 | cd build 8 | cmake .. -G Ninja -DCMAKE_BUILD_TYPE=Release 9 | ninja 10 | ``` 11 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11) 2 | project(perf_prog_npb_bt) 3 | 4 | set_property(GLOBAL PROPERTY C_STANDARD 11) 5 | 6 | if(MSVC) 7 | add_compile_options(/W4) 8 | else() 9 | add_compile_options(-Wall -Wextra -Wno-unknown-pragmas -Wno-unused-parameter) 10 | endif() 11 | 12 | file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS src/*.c src/*.h) 13 | 14 | link_libraries(m) 15 | 16 | add_executable(npb_bt_w ${SOURCE_FILES}) 17 | target_include_directories(npb_bt_w PRIVATE src/w) 18 | 19 | add_executable(npb_bt_s ${SOURCE_FILES}) 20 | target_include_directories(npb_bt_s PRIVATE src/s) 21 | 22 | add_executable(npb_bt_a ${SOURCE_FILES}) 23 | target_include_directories(npb_bt_a PRIVATE src/a) 24 | 25 | add_executable(npb_bt_b ${SOURCE_FILES}) 26 | target_include_directories(npb_bt_b PRIVATE src/b) 27 | 28 | add_executable(npb_bt_c ${SOURCE_FILES}) 29 | target_include_directories(npb_bt_c PRIVATE src/c) 30 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/a/npbparams.h: -------------------------------------------------------------------------------- 1 | /* CLASS = A */ 2 | /* 3 | This file is generated automatically by the setparams utility. 4 | It sets the number of processors and the class of the NPB 5 | in this directory. Do not modify it by hand. 6 | */ 7 | #define PROBLEM_SIZE 64 8 | #define NITER_DEFAULT 200 9 | #define DT_DEFAULT 0.0008 10 | 11 | #define CONVERTDOUBLE false 12 | #define COMPILETIME "11 Nov 2013" 13 | #define NPBVERSION "3.3.1" 14 | #define CS1 "gcc" 15 | #define CS2 "$(CC)" 16 | #define CS3 "-lm" 17 | #define CS4 "(none)" 18 | #define CS5 "(unknown)" 19 | #define CS6 "(unknown)" 20 | #define CS7 "(none)" 21 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/add.c: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------// 2 | // // 3 | // This benchmark is an OpenMP C version of the NPB BT code. This OpenMP // 4 | // C version is developed by the Center for Manycore Programming at Seoul // 5 | // National University and derived from the OpenMP Fortran versions in // 6 | // "NPB3.3-OMP" developed by NAS. // 7 | // // 8 | // Permission to use, copy, distribute and modify this software for any // 9 | // purpose with or without fee is hereby granted. This software is // 10 | // provided "as is" without express or implied warranty. // 11 | // // 12 | // Information on NPB 3.3, including the technical report, the original // 13 | // specifications, source code, results and information on how to submit // 14 | // new results, is available at: // 15 | // // 16 | // http://www.nas.nasa.gov/Software/NPB/ // 17 | // // 18 | // Send comments or suggestions for this OpenMP C version to // 19 | // cmp@aces.snu.ac.kr // 20 | // // 21 | // Center for Manycore Programming // 22 | // School of Computer Science and Engineering // 23 | // Seoul National University // 24 | // Seoul 151-744, Korea // 25 | // // 26 | // E-mail: cmp@aces.snu.ac.kr // 27 | // // 28 | //-------------------------------------------------------------------------// 29 | 30 | //-------------------------------------------------------------------------// 31 | // Authors: Sangmin Seo, Jungwon Kim, Jun Lee, Jeongho Nah, Gangwon Jo, // 32 | // and Jaejin Lee // 33 | //-------------------------------------------------------------------------// 34 | 35 | #include "header.h" 36 | #include "timers.h" 37 | 38 | //--------------------------------------------------------------------- 39 | // addition of update to the vector u 40 | //--------------------------------------------------------------------- 41 | void add() 42 | { 43 | int i, j, k, m; 44 | 45 | if (timeron) timer_start(t_add); 46 | #pragma omp parallel for default(shared) private(i,j,k,m) 47 | for (k = 1; k <= grid_points[2]-2; k++) { 48 | for (j = 1; j <= grid_points[1]-2; j++) { 49 | for (i = 1; i <= grid_points[0]-2; i++) { 50 | for (m = 0; m < 5; m++) { 51 | u[k][j][i][m] = u[k][j][i][m] + rhs[k][j][i][m]; 52 | } 53 | } 54 | } 55 | } 56 | if (timeron) timer_stop(t_add); 57 | } 58 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/adi.c: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------// 2 | // // 3 | // This benchmark is an OpenMP C version of the NPB BT code. This OpenMP // 4 | // C version is developed by the Center for Manycore Programming at Seoul // 5 | // National University and derived from the OpenMP Fortran versions in // 6 | // "NPB3.3-OMP" developed by NAS. // 7 | // // 8 | // Permission to use, copy, distribute and modify this software for any // 9 | // purpose with or without fee is hereby granted. This software is // 10 | // provided "as is" without express or implied warranty. // 11 | // // 12 | // Information on NPB 3.3, including the technical report, the original // 13 | // specifications, source code, results and information on how to submit // 14 | // new results, is available at: // 15 | // // 16 | // http://www.nas.nasa.gov/Software/NPB/ // 17 | // // 18 | // Send comments or suggestions for this OpenMP C version to // 19 | // cmp@aces.snu.ac.kr // 20 | // // 21 | // Center for Manycore Programming // 22 | // School of Computer Science and Engineering // 23 | // Seoul National University // 24 | // Seoul 151-744, Korea // 25 | // // 26 | // E-mail: cmp@aces.snu.ac.kr // 27 | // // 28 | //-------------------------------------------------------------------------// 29 | 30 | //-------------------------------------------------------------------------// 31 | // Authors: Sangmin Seo, Jungwon Kim, Jun Lee, Jeongho Nah, Gangwon Jo, // 32 | // and Jaejin Lee // 33 | //-------------------------------------------------------------------------// 34 | 35 | #include "header.h" 36 | 37 | void adi() 38 | { 39 | compute_rhs(); 40 | 41 | x_solve(); 42 | 43 | y_solve(); 44 | 45 | z_solve(); 46 | 47 | add(); 48 | } 49 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/b/npbparams.h: -------------------------------------------------------------------------------- 1 | /* CLASS = B */ 2 | /* 3 | This file is generated automatically by the setparams utility. 4 | It sets the number of processors and the class of the NPB 5 | in this directory. Do not modify it by hand. 6 | */ 7 | #define PROBLEM_SIZE 102 8 | #define NITER_DEFAULT 200 9 | #define DT_DEFAULT 0.0003 10 | 11 | #define CONVERTDOUBLE false 12 | #define COMPILETIME "11 Nov 2013" 13 | #define NPBVERSION "3.3.1" 14 | #define CS1 "gcc" 15 | #define CS2 "$(CC)" 16 | #define CS3 "-lm" 17 | #define CS4 "(none)" 18 | #define CS5 "(unknown)" 19 | #define CS6 "(unknown)" 20 | #define CS7 "(none)" 21 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/c/npbparams.h: -------------------------------------------------------------------------------- 1 | /* CLASS = C */ 2 | /* 3 | This file is generated automatically by the setparams utility. 4 | It sets the number of processors and the class of the NPB 5 | in this directory. Do not modify it by hand. 6 | */ 7 | #define PROBLEM_SIZE 162 8 | #define NITER_DEFAULT 200 9 | #define DT_DEFAULT 0.0001 10 | 11 | #define CONVERTDOUBLE false 12 | #define COMPILETIME "11 Nov 2013" 13 | #define NPBVERSION "3.3.1" 14 | #define CS1 "gcc" 15 | #define CS2 "$(CC)" 16 | #define CS3 "-lm" 17 | #define CS4 "(none)" 18 | #define CS5 "(unknown)" 19 | #define CS6 "(unknown)" 20 | #define CS7 "(none)" 21 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/c_print_results.c: -------------------------------------------------------------------------------- 1 | /*****************************************************************/ 2 | /****** C _ P R I N T _ R E S U L T S ******/ 3 | /*****************************************************************/ 4 | #include 5 | #include 6 | #ifdef _OPENMP 7 | #include 8 | #endif 9 | 10 | void c_print_results( char *name, 11 | char class, 12 | int n1, 13 | int n2, 14 | int n3, 15 | int niter, 16 | double t, 17 | double mops, 18 | char *optype, 19 | int passed_verification, 20 | char *npbversion, 21 | char *compiletime, 22 | char *cc, 23 | char *clink, 24 | char *c_lib, 25 | char *c_inc, 26 | char *cflags, 27 | char *clinkflags ) 28 | { 29 | int num_threads, max_threads; 30 | 31 | 32 | max_threads = 1; 33 | num_threads = 1; 34 | 35 | /* figure out number of threads used */ 36 | #ifdef _OPENMP 37 | max_threads = omp_get_max_threads(); 38 | #pragma omp parallel shared(num_threads) 39 | { 40 | #pragma omp master 41 | num_threads = omp_get_num_threads(); 42 | } 43 | #endif 44 | 45 | 46 | printf( "\n\n %s Benchmark Completed\n", name ); 47 | 48 | printf( " Class = %c\n", class ); 49 | 50 | if( n3 == 0 ) { 51 | long nn = n1; 52 | if ( n2 != 0 ) nn *= n2; 53 | printf( " Size = %12ld\n", nn ); /* as in IS */ 54 | } 55 | else 56 | printf( " Size = %4dx%4dx%4d\n", n1,n2,n3 ); 57 | 58 | printf( " Iterations = %12d\n", niter ); 59 | 60 | // printf( " Time in seconds = %12.2f\n", t ); 61 | 62 | printf( " Total threads = %12d\n", num_threads); 63 | 64 | printf( " Avail threads = %12d\n", max_threads); 65 | 66 | if (num_threads != max_threads) 67 | printf( " Warning: Threads used differ from threads available\n"); 68 | 69 | // printf( " Mop/s total = %12.2f\n", mops ); 70 | 71 | // printf( " Mop/s/thread = %12.2f\n", 72 | //mops/(double)num_threads ); 73 | 74 | printf( " Operation type = %24s\n", optype); 75 | 76 | if( passed_verification < 0 ) 77 | printf( " Verification = NOT PERFORMED\n" ); 78 | else if( passed_verification ) 79 | printf( " Verification = SUCCESSFUL\n" ); 80 | else 81 | printf( " Verification = UNSUCCESSFUL\n" ); 82 | 83 | printf( " Version = %12s\n", npbversion ); 84 | 85 | printf( " Compile date = %12s\n", compiletime ); 86 | 87 | printf( "\n Compile options:\n" ); 88 | 89 | printf( " CC = %s\n", cc ); 90 | 91 | printf( " CLINK = %s\n", clink ); 92 | 93 | printf( " C_LIB = %s\n", c_lib ); 94 | 95 | printf( " C_INC = %s\n", c_inc ); 96 | 97 | printf( " CFLAGS = %s\n", cflags ); 98 | 99 | printf( " CLINKFLAGS = %s\n", clinkflags ); 100 | 101 | printf( "\n--------------------------------------\n"); 102 | printf( " Please send all errors/feedbacks to:\n"); 103 | printf( " Center for Manycore Programming\n"); 104 | printf( " cmp@aces.snu.ac.kr\n"); 105 | printf( " http://aces.snu.ac.kr\n"); 106 | printf( "--------------------------------------\n"); 107 | } 108 | 109 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/c_timers.c: -------------------------------------------------------------------------------- 1 | #include "wtime.h" 2 | #include 3 | #ifdef _OPENMP 4 | #include 5 | #endif 6 | 7 | /* Prototype */ 8 | void wtime( double * ); 9 | 10 | 11 | /*****************************************************************/ 12 | /****** E L A P S E D _ T I M E ******/ 13 | /*****************************************************************/ 14 | static double elapsed_time( void ) 15 | { 16 | double t; 17 | 18 | #if defined(_OPENMP) && (_OPENMP > 200010) 19 | /* Use the OpenMP timer if we can */ 20 | t = omp_get_wtime(); 21 | #else 22 | wtime( &t ); 23 | #endif 24 | return( t ); 25 | } 26 | 27 | 28 | static double start[64], elapsed[64]; 29 | static unsigned count[64]; 30 | #ifdef _OPENMP 31 | #pragma omp threadprivate(start, elapsed, count) 32 | #endif 33 | 34 | /*****************************************************************/ 35 | /****** T I M E R _ C L E A R ******/ 36 | /*****************************************************************/ 37 | void timer_clear( int n ) 38 | { 39 | elapsed[n] = 0.0; 40 | count[n] = 0; 41 | } 42 | 43 | 44 | /*****************************************************************/ 45 | /****** T I M E R _ S T A R T ******/ 46 | /*****************************************************************/ 47 | void timer_start( int n ) 48 | { 49 | start[n] = elapsed_time(); 50 | } 51 | 52 | 53 | /*****************************************************************/ 54 | /****** T I M E R _ S T O P ******/ 55 | /*****************************************************************/ 56 | void timer_stop( int n ) 57 | { 58 | double t, now; 59 | 60 | now = elapsed_time(); 61 | t = now - start[n]; 62 | elapsed[n] += t; 63 | count[n]++; 64 | 65 | } 66 | 67 | 68 | /*****************************************************************/ 69 | /****** T I M E R _ R E A D ******/ 70 | /*****************************************************************/ 71 | double timer_read( int n ) 72 | { 73 | return( elapsed[n] ); 74 | } 75 | 76 | unsigned timer_count( int n ) 77 | { 78 | return count[n]; 79 | } 80 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/error.c: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------// 2 | // // 3 | // This benchmark is an OpenMP C version of the NPB BT code. This OpenMP // 4 | // C version is developed by the Center for Manycore Programming at Seoul // 5 | // National University and derived from the OpenMP Fortran versions in // 6 | // "NPB3.3-OMP" developed by NAS. // 7 | // // 8 | // Permission to use, copy, distribute and modify this software for any // 9 | // purpose with or without fee is hereby granted. This software is // 10 | // provided "as is" without express or implied warranty. // 11 | // // 12 | // Information on NPB 3.3, including the technical report, the original // 13 | // specifications, source code, results and information on how to submit // 14 | // new results, is available at: // 15 | // // 16 | // http://www.nas.nasa.gov/Software/NPB/ // 17 | // // 18 | // Send comments or suggestions for this OpenMP C version to // 19 | // cmp@aces.snu.ac.kr // 20 | // // 21 | // Center for Manycore Programming // 22 | // School of Computer Science and Engineering // 23 | // Seoul National University // 24 | // Seoul 151-744, Korea // 25 | // // 26 | // E-mail: cmp@aces.snu.ac.kr // 27 | // // 28 | //-------------------------------------------------------------------------// 29 | 30 | //-------------------------------------------------------------------------// 31 | // Authors: Sangmin Seo, Jungwon Kim, Jun Lee, Jeongho Nah, Gangwon Jo, // 32 | // and Jaejin Lee // 33 | //-------------------------------------------------------------------------// 34 | 35 | #include 36 | #include "header.h" 37 | 38 | //--------------------------------------------------------------------- 39 | // this function computes the norm of the difference between the 40 | // computed solution and the exact solution 41 | //--------------------------------------------------------------------- 42 | void error_norm(double rms[5]) 43 | { 44 | int i, j, k, m, d; 45 | double xi, eta, zeta, u_exact[5], add; 46 | double rms_local[5]; 47 | 48 | for (m = 0; m < 5; m++) { 49 | rms[m] = 0.0; 50 | } 51 | 52 | #pragma omp parallel default(shared) \ 53 | private(i,j,k,m,zeta,eta,xi,add,u_exact,rms_local) shared(rms) 54 | { 55 | for (m = 0; m < 5; m++) { 56 | rms_local[m] = 0.0; 57 | } 58 | #pragma omp for nowait 59 | for (k = 0; k <= grid_points[2]-1; k++) { 60 | zeta = (double)(k) * dnzm1; 61 | for (j = 0; j <= grid_points[1]-1; j++) { 62 | eta = (double)(j) * dnym1; 63 | for (i = 0; i <= grid_points[0]-1; i++) { 64 | xi = (double)(i) * dnxm1; 65 | exact_solution(xi, eta, zeta, u_exact); 66 | 67 | for (m = 0; m < 5; m++) { 68 | add = u[k][j][i][m]-u_exact[m]; 69 | rms_local[m] = rms_local[m] + add*add; 70 | } 71 | } 72 | } 73 | } 74 | for (m = 0; m < 5; m++) { 75 | #pragma omp atomic 76 | rms[m] += rms_local[m]; 77 | } 78 | } //end parallel 79 | 80 | for (m = 0; m < 5; m++) { 81 | for (d = 0; d < 3; d++) { 82 | rms[m] = rms[m] / (double)(grid_points[d]-2); 83 | } 84 | rms[m] = sqrt(rms[m]); 85 | } 86 | } 87 | 88 | 89 | void rhs_norm(double rms[5]) 90 | { 91 | int i, j, k, d, m; 92 | double add; 93 | double rms_local[5]; 94 | 95 | for (m = 0; m < 5; m++) { 96 | rms[m] = 0.0; 97 | } 98 | 99 | #pragma omp parallel default(shared) private(i,j,k,m,add,rms_local) \ 100 | shared(rms) 101 | { 102 | for (m = 0; m < 5; m++) { 103 | rms_local[m] = 0.0; 104 | } 105 | #pragma omp for nowait 106 | for (k = 1; k <= grid_points[2]-2; k++) { 107 | for (j = 1; j <= grid_points[1]-2; j++) { 108 | for (i = 1; i <= grid_points[0]-2; i++) { 109 | for (m = 0; m < 5; m++) { 110 | add = rhs[k][j][i][m]; 111 | rms_local[m] = rms_local[m] + add*add; 112 | } 113 | } 114 | } 115 | } 116 | for (m = 0; m < 5; m++) { 117 | #pragma omp atomic 118 | rms[m] += rms_local[m]; 119 | } 120 | } //end parallel 121 | 122 | for (m = 0; m < 5; m++) { 123 | for (d = 0; d < 3; d++) { 124 | rms[m] = rms[m] / (double)(grid_points[d]-2); 125 | } 126 | rms[m] = sqrt(rms[m]); 127 | } 128 | } 129 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/exact_solution.c: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------// 2 | // // 3 | // This benchmark is an OpenMP C version of the NPB BT code. This OpenMP // 4 | // C version is developed by the Center for Manycore Programming at Seoul // 5 | // National University and derived from the OpenMP Fortran versions in // 6 | // "NPB3.3-OMP" developed by NAS. // 7 | // // 8 | // Permission to use, copy, distribute and modify this software for any // 9 | // purpose with or without fee is hereby granted. This software is // 10 | // provided "as is" without express or implied warranty. // 11 | // // 12 | // Information on NPB 3.3, including the technical report, the original // 13 | // specifications, source code, results and information on how to submit // 14 | // new results, is available at: // 15 | // // 16 | // http://www.nas.nasa.gov/Software/NPB/ // 17 | // // 18 | // Send comments or suggestions for this OpenMP C version to // 19 | // cmp@aces.snu.ac.kr // 20 | // // 21 | // Center for Manycore Programming // 22 | // School of Computer Science and Engineering // 23 | // Seoul National University // 24 | // Seoul 151-744, Korea // 25 | // // 26 | // E-mail: cmp@aces.snu.ac.kr // 27 | // // 28 | //-------------------------------------------------------------------------// 29 | 30 | //-------------------------------------------------------------------------// 31 | // Authors: Sangmin Seo, Jungwon Kim, Jun Lee, Jeongho Nah, Gangwon Jo, // 32 | // and Jaejin Lee // 33 | //-------------------------------------------------------------------------// 34 | 35 | #include "header.h" 36 | 37 | //--------------------------------------------------------------------- 38 | // this function returns the exact solution at point xi, eta, zeta 39 | //--------------------------------------------------------------------- 40 | void exact_solution(double xi, double eta, double zeta, double dtemp[5]) 41 | { 42 | int m; 43 | 44 | for (m = 0; m < 5; m++) { 45 | dtemp[m] = ce[m][0] + 46 | xi*(ce[m][1] + xi*(ce[m][4] + xi*(ce[m][7] + xi*ce[m][10]))) + 47 | eta*(ce[m][2] + eta*(ce[m][5] + eta*(ce[m][8] + eta*ce[m][11])))+ 48 | zeta*(ce[m][3] + zeta*(ce[m][6] + zeta*(ce[m][9] + 49 | zeta*ce[m][12]))); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/print_results.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #ifdef _OPENMP 4 | #include 5 | #endif 6 | 7 | #include "type.h" 8 | 9 | 10 | void print_results(char *name, char class, int n1, int n2, int n3, int niter, 11 | double t, double mops, char *optype, logical verified, char *npbversion, 12 | char *compiletime, char *cs1, char *cs2, char *cs3, char *cs4, char *cs5, 13 | char *cs6, char *cs7) 14 | { 15 | char size[16]; 16 | int j; 17 | int num_threads, max_threads; 18 | 19 | max_threads = 1; 20 | num_threads = 1; 21 | 22 | // figure out number of threads used 23 | #ifdef _OPENMP 24 | max_threads = omp_get_max_threads(); 25 | #pragma omp parallel shared(num_threads) 26 | { 27 | #pragma omp master 28 | num_threads = omp_get_num_threads(); 29 | } 30 | #endif 31 | 32 | 33 | printf( "\n\n %s Benchmark Completed.\n", name ); 34 | printf( " Class = %12c\n", class ); 35 | 36 | // If this is not a grid-based problem (EP, FT, CG), then 37 | // we only print n1, which contains some measure of the 38 | // problem size. In that case, n2 and n3 are both zero. 39 | // Otherwise, we print the grid size n1xn2xn3 40 | 41 | if ( ( n2 == 0 ) && ( n3 == 0 ) ) { 42 | if ( ( name[0] == 'E' ) && ( name[1] == 'P' ) ) { 43 | sprintf( size, "%15.0lf", pow(2.0, n1) ); 44 | j = 14; 45 | if ( size[j] == '.' ) { 46 | size[j] = ' '; 47 | j--; 48 | } 49 | size[j+1] = '\0'; 50 | printf( " Size = %15s\n", size ); 51 | } else { 52 | printf( " Size = %12d\n", n1 ); 53 | } 54 | } else { 55 | printf( " Size = %4dx%4dx%4d\n", n1, n2, n3 ); 56 | } 57 | 58 | printf( " Iterations = %12d\n", niter ); 59 | // printf( " Time in seconds = %12.2lf\n", t ); 60 | 61 | printf( " Total threads = %12d\n", num_threads ); 62 | printf( " Avail threads = %12d\n", max_threads ); 63 | if ( num_threads != max_threads ) 64 | printf( " Warning: Threads used differ from threads available\n" ); 65 | 66 | //printf( " Mop/s total = %15.2lf\n", mops ); 67 | //printf( " Mop/s/thread = %15.2lf\n", mops/(double)num_threads ); 68 | 69 | printf( " Operation type = %24s\n", optype ); 70 | if ( verified ) 71 | printf( " Verification = %12s\n", "SUCCESSFUL" ); 72 | else 73 | printf( " Verification = %12s\n", "UNSUCCESSFUL" ); 74 | printf( " Version = %12s\n", npbversion ); 75 | printf( " Compile date = %12s\n", compiletime ); 76 | 77 | printf( "\n Compile options:\n" 78 | " CC = %s\n", cs1 ); 79 | printf( " CLINK = %s\n", cs2 ); 80 | printf( " C_LIB = %s\n", cs3 ); 81 | printf( " C_INC = %s\n", cs4 ); 82 | printf( " CFLAGS = %s\n", cs5 ); 83 | printf( " CLINKFLAGS = %s\n", cs6 ); 84 | printf( " RAND = %s\n", cs7 ); 85 | 86 | printf( "\n--------------------------------------\n" 87 | " Please send all errors/feedbacks to:\n" 88 | " Center for Manycore Programming\n" 89 | " cmp@aces.snu.ac.kr\n" 90 | " http://aces.snu.ac.kr\n" 91 | "--------------------------------------\n\n"); 92 | } 93 | 94 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/print_results.h: -------------------------------------------------------------------------------- 1 | #ifndef __PRINT_RESULTS_H__ 2 | #define __PRINT_RESULTS_H__ 3 | 4 | void print_results(char *name, char class, int n1, int n2, int n3, int niter, 5 | double t, double mops, char *optype, logical verified, char *npbversion, 6 | char *compiletime, char *cs1, char *cs2, char *cs3, char *cs4, char *cs5, 7 | char *cs6, char *cs7); 8 | 9 | #endif //__PRINT_RESULTS_H__ 10 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/randdp.h: -------------------------------------------------------------------------------- 1 | #ifndef __RANDDP_H__ 2 | #define __RANDDP_H__ 3 | 4 | double randlc( double *x, double a ); 5 | void vranlc( int n, double *x, double a, double y[] ); 6 | 7 | #endif 8 | 9 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/s/npbparams.h: -------------------------------------------------------------------------------- 1 | /* CLASS = S */ 2 | /* 3 | This file is generated automatically by the setparams utility. 4 | It sets the number of processors and the class of the NPB 5 | in this directory. Do not modify it by hand. 6 | */ 7 | #define PROBLEM_SIZE 12 8 | #define NITER_DEFAULT 60 9 | #define DT_DEFAULT 0.010 10 | 11 | #define CONVERTDOUBLE false 12 | #define COMPILETIME "11 Nov 2013" 13 | #define NPBVERSION "3.3.1" 14 | #define CS1 "gcc" 15 | #define CS2 "$(CC)" 16 | #define CS3 "-lm" 17 | #define CS4 "(none)" 18 | #define CS5 "(unknown)" 19 | #define CS6 "(unknown)" 20 | #define CS7 "(none)" 21 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/timers.h: -------------------------------------------------------------------------------- 1 | #ifndef __TIMERS_H__ 2 | #define __TIMERS_H__ 3 | 4 | void timer_clear( int n ); 5 | void timer_start( int n ); 6 | void timer_stop( int n ); 7 | double timer_read( int n ); 8 | unsigned timer_count( int n ); 9 | 10 | #endif 11 | 12 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/type.h: -------------------------------------------------------------------------------- 1 | #ifndef __TYPE_H__ 2 | #define __TYPE_H__ 3 | 4 | typedef enum logical { false, true } logical; 5 | typedef struct { 6 | double real; 7 | double imag; 8 | } dcomplex; 9 | 10 | 11 | #define min(x,y) ((x) < (y) ? (x) : (y)) 12 | #define max(x,y) ((x) > (y) ? (x) : (y)) 13 | 14 | #endif //__TYPE_H__ 15 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/w/npbparams.h: -------------------------------------------------------------------------------- 1 | /* CLASS = W */ 2 | /* 3 | This file is generated automatically by the setparams utility. 4 | It sets the number of processors and the class of the NPB 5 | in this directory. Do not modify it by hand. 6 | */ 7 | #define PROBLEM_SIZE 24 8 | #define NITER_DEFAULT 200 9 | #define DT_DEFAULT 0.0008 10 | 11 | #define CONVERTDOUBLE false 12 | #define COMPILETIME "11 Nov 2013" 13 | #define NPBVERSION "3.3.1" 14 | #define CS1 "gcc" 15 | #define CS2 "$(CC)" 16 | #define CS3 "-lm" 17 | #define CS4 "(none)" 18 | #define CS5 "(unknown)" 19 | #define CS6 "(unknown)" 20 | #define CS7 "(none)" 21 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/work_lhs.h: -------------------------------------------------------------------------------- 1 | //-------------------------------------------------------------------------// 2 | // // 3 | // This benchmark is an OpenMP C version of the NPB BT code. This OpenMP // 4 | // C version is developed by the Center for Manycore Programming at Seoul // 5 | // National University and derived from the OpenMP Fortran versions in // 6 | // "NPB3.3-OMP" developed by NAS. // 7 | // // 8 | // Permission to use, copy, distribute and modify this software for any // 9 | // purpose with or without fee is hereby granted. This software is // 10 | // provided "as is" without express or implied warranty. // 11 | // // 12 | // Information on NPB 3.3, including the technical report, the original // 13 | // specifications, source code, results and information on how to submit // 14 | // new results, is available at: // 15 | // // 16 | // http://www.nas.nasa.gov/Software/NPB/ // 17 | // // 18 | // Send comments or suggestions for this OpenMP C version to // 19 | // cmp@aces.snu.ac.kr // 20 | // // 21 | // Center for Manycore Programming // 22 | // School of Computer Science and Engineering // 23 | // Seoul National University // 24 | // Seoul 151-744, Korea // 25 | // // 26 | // E-mail: cmp@aces.snu.ac.kr // 27 | // // 28 | //-------------------------------------------------------------------------// 29 | 30 | //-------------------------------------------------------------------------// 31 | // Authors: Sangmin Seo, Jungwon Kim, Jun Lee, Jeongho Nah, Gangwon Jo, // 32 | // and Jaejin Lee // 33 | //-------------------------------------------------------------------------// 34 | 35 | //--------------------------------------------------------------------- 36 | //--------------------------------------------------------------------- 37 | // 38 | // work_lhs.h 39 | // 40 | //--------------------------------------------------------------------- 41 | //--------------------------------------------------------------------- 42 | 43 | /* common /work_lhs/ */ 44 | extern double fjac[PROBLEM_SIZE+1][5][5]; 45 | extern double njac[PROBLEM_SIZE+1][5][5]; 46 | extern double lhs [PROBLEM_SIZE+1][3][5][5]; 47 | extern double tmp1, tmp2, tmp3; 48 | #pragma omp threadprivate(fjac,njac,lhs,tmp1,tmp2,tmp3) 49 | 50 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/wtime.c: -------------------------------------------------------------------------------- 1 | #include "wtime.h" 2 | #include 3 | #ifndef DOS 4 | #include 5 | #endif 6 | 7 | void wtime(double *t) 8 | { 9 | /* a generic timer */ 10 | static int sec = -1; 11 | struct timeval tv; 12 | gettimeofday(&tv, (void *)0); 13 | if (sec < 0) sec = tv.tv_sec; 14 | *t = (tv.tv_sec - sec) + 1.0e-6*tv.tv_usec; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /larger_samples/npb_bt/src/wtime.h: -------------------------------------------------------------------------------- 1 | /* C/Fortran interface is different on different machines. 2 | * You may need to tweak this. 3 | */ 4 | 5 | 6 | #if defined(IBM) 7 | #define wtime wtime 8 | #elif defined(CRAY) 9 | #define wtime WTIME 10 | #else 11 | #define wtime wtime_ 12 | #endif 13 | -------------------------------------------------------------------------------- /larger_samples/ssca2/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.11) 2 | project(perf_prog_npb_bt) 3 | 4 | set_property(GLOBAL PROPERTY C_STANDARD 11) 5 | 6 | file(GLOB_RECURSE SOURCE_FILES CONFIGURE_DEPENDS src/*.c src/*.h src/sprng2.0/*.c src/sprng2.0/*.h) 7 | include_directories(src/sprng2.0/include) 8 | 9 | link_libraries(m) 10 | add_definitions(-DLittleEndian, -DAdd__) 11 | 12 | add_executable(ssca2 ${SOURCE_FILES}) 13 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/defs.h: -------------------------------------------------------------------------------- 1 | #ifndef _DEFS_H 2 | #define _DEFS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #ifdef _OPENMP 11 | #include 12 | #endif 13 | #include "sprng.h" 14 | 15 | /* Uncomment this line, or use the flag -DDIAGNOSTIC for 16 | iverbose benchmark output */ 17 | /* #define DIAGNOSTIC */ 18 | 19 | /* Uncomment this line, or use the flag -DVERIFYK4 to 20 | generate a 2D torus */ 21 | /* #define VERIFYK4 */ 22 | 23 | #define INT_T int 24 | #define DOUBLE_T double 25 | 26 | #if defined(MASSIVE_GRAPH) 27 | #define VERT_T long 28 | #define LONG_T long 29 | #elif defined(LARGE_GRAPH) 30 | #define VERT_T int 31 | #define LONG_T long 32 | #else 33 | #define VERT_T int 34 | #define LONG_T int 35 | #endif 36 | 37 | #define WEIGHT_T VERT_T 38 | 39 | #ifdef _OPENMP 40 | extern int NUM_THREADS; 41 | #endif 42 | 43 | /* Data structure for representing tuples 44 | * in the Scalable Data Generator */ 45 | typedef struct 46 | { 47 | /* Edge lists */ 48 | VERT_T* startVertex; 49 | VERT_T* endVertex; 50 | WEIGHT_T* weight; 51 | 52 | /* No. of edges */ 53 | LONG_T m; 54 | 55 | /* No. of vertices */ 56 | LONG_T n; 57 | 58 | } graphSDG; 59 | 60 | /* The graph data structure*/ 61 | typedef struct 62 | { 63 | LONG_T n; 64 | LONG_T m; 65 | 66 | /* Directed edges out of vertex vi (say, k edges -- v1, v2, ... vk) 67 | * are stored in the contiguous block endV[numEdges[i] .. numEdges[i+1]] 68 | * So, numEdges[i+1] - numEdges[i] = k in this case */ 69 | VERT_T* endV; 70 | LONG_T* numEdges; 71 | WEIGHT_T* weight; 72 | 73 | } graph; 74 | 75 | /* Edge data structure for Kernel 2 */ 76 | typedef struct 77 | { 78 | VERT_T startVertex; 79 | VERT_T endVertex; 80 | WEIGHT_T w; 81 | LONG_T e; 82 | } edge; 83 | 84 | /* Predecessor list data structure for Kernel 4 */ 85 | typedef struct 86 | { 87 | VERT_T* list; 88 | VERT_T count; 89 | LONG_T degree; 90 | } plist; 91 | 92 | 93 | /* Global variables */ 94 | extern INT_T SCALE; 95 | extern LONG_T N; 96 | extern LONG_T M; 97 | 98 | /* R-MAT (graph generator) parameters */ 99 | extern DOUBLE_T A; 100 | extern DOUBLE_T B; 101 | extern DOUBLE_T C; 102 | extern DOUBLE_T D; 103 | 104 | extern WEIGHT_T MaxIntWeight; 105 | extern INT_T SubGraphPathLength; 106 | extern INT_T K4approx; 107 | 108 | /* Function declarations */ 109 | 110 | void init(int); 111 | double genScalData(graphSDG*); 112 | double gen2DTorus(graphSDG*); 113 | 114 | /* The four kernels */ 115 | double computeGraph(graph*, graphSDG*); 116 | double getStartLists(graph*, edge**, INT_T*); 117 | double findSubGraphs(graph*, edge*, INT_T); 118 | double betweennessCentrality(graph*, DOUBLE_T *); 119 | 120 | /* other useful routines */ 121 | void prefix_sums(LONG_T*, LONG_T*, LONG_T*, LONG_T); 122 | DOUBLE_T get_seconds(void); 123 | #endif 124 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/findSubGraphs.c: -------------------------------------------------------------------------------- 1 | #include "defs.h" 2 | 3 | double findSubGraphs(graph* G, 4 | edge* maxIntWtList, int maxIntWtListSize) { 5 | 6 | VERT_T* S; 7 | LONG_T *start; 8 | char* visited; 9 | LONG_T *pSCount; 10 | #ifdef _OPENMP 11 | omp_lock_t* vLock; 12 | #endif 13 | 14 | LONG_T phase_num, numPhases; 15 | LONG_T count; 16 | 17 | double elapsed_time = get_seconds(); 18 | 19 | numPhases = SubGraphPathLength + 1; 20 | 21 | #ifdef _OPENMP 22 | omp_set_num_threads(NUM_THREADS); 23 | #pragma omp parallel 24 | { 25 | #endif 26 | 27 | VERT_T *pS, *pSt; 28 | LONG_T pCount, pS_size; 29 | LONG_T v, w, search_num; 30 | int tid, nthreads; 31 | 32 | LONG_T j, k, vert, n; 33 | 34 | #ifdef _OPENMP 35 | LONG_T i; 36 | tid = omp_get_thread_num(); 37 | nthreads = omp_get_num_threads(); 38 | #else 39 | tid = 0; 40 | nthreads = 1; 41 | #endif 42 | 43 | n = G->n; 44 | 45 | pS_size = n/nthreads + 1; 46 | pS = (VERT_T *) malloc(pS_size*sizeof(VERT_T)); 47 | assert(pS != NULL); 48 | 49 | if (tid == 0) { 50 | S = (VERT_T *) malloc(n*sizeof(VERT_T)); 51 | visited = (char *) calloc(n, sizeof(char)); 52 | start = (LONG_T *) calloc((numPhases+2), sizeof(LONG_T)); 53 | pSCount = (LONG_T *) malloc((nthreads+1)*sizeof(LONG_T)); 54 | #ifdef _OPENMP 55 | vLock = (omp_lock_t *) malloc(n*sizeof(omp_lock_t)); 56 | #endif 57 | } 58 | 59 | #ifdef _OPENMP 60 | #pragma omp barrier 61 | 62 | #pragma omp for 63 | for (i=0; inumEdges[v]; jnumEdges[v+1]; j++) { 105 | w = G->endV[j]; 106 | if (v == w) 107 | continue; 108 | #ifdef _OPENMP 109 | int myLock = omp_test_lock(&vLock[w]); 110 | if (myLock) { 111 | #endif 112 | if (visited[w] != (char) 1) { 113 | visited[w] = (char) 1; 114 | if (pCount == pS_size) { 115 | /* Resize pS */ 116 | pSt = (VERT_T *) 117 | malloc(2*pS_size*sizeof(VERT_T)); 118 | memcpy(pSt, pS, pS_size*sizeof(VERT_T)); 119 | free(pS); 120 | pS = pSt; 121 | pS_size = 2*pS_size; 122 | } 123 | pS[pCount++] = w; 124 | } 125 | #ifdef _OPENMP 126 | omp_unset_lock(&vLock[w]); 127 | } 128 | #endif 129 | } 130 | } 131 | 132 | 133 | #ifdef _OPENMP 134 | #pragma omp barrier 135 | #endif 136 | pSCount[tid+1] = pCount; 137 | 138 | #ifdef _OPENMP 139 | #pragma omp barrier 140 | #endif 141 | 142 | if (tid == 0) { 143 | pSCount[0] = start[phase_num+1]; 144 | for(k=1; k<=nthreads; k++) { 145 | pSCount[k] = pSCount[k-1] + pSCount[k]; 146 | } 147 | start[phase_num+2] = pSCount[nthreads]; 148 | count = pSCount[nthreads]; 149 | phase_num++; 150 | } 151 | 152 | #ifdef _OPENMP 153 | #pragma omp barrier 154 | #endif 155 | for (k = pSCount[tid]; k < pSCount[tid+1]; k++) { 156 | S[k] = pS[k-pSCount[tid]]; 157 | } 158 | 159 | 160 | #ifdef _OPENMP 161 | #pragma omp barrier 162 | #endif 163 | } /* End of search */ 164 | 165 | if (tid == 0) { 166 | fprintf(stderr, "Search from <%ld, %ld>, number of vertices visited:" 167 | " %ld\n", (long) S[0], (long) S[1], (long) count); 168 | } 169 | 170 | } /* End of outer loop */ 171 | 172 | free(pS); 173 | #ifdef _OPENMP 174 | #pragma omp barrier 175 | 176 | #pragma omp for 177 | for (i=0; i 0) { 107 | src[offset+count] = y*i + j; 108 | dest[offset+count] = y*i + j - 1; 109 | } else { 110 | src[offset+count] = y*i + j; 111 | dest[offset+count] = y*i + y - 1; 112 | } 113 | 114 | count++; 115 | 116 | /* go up */ 117 | if (j < y-1) { 118 | src[offset+count] = y*i + j; 119 | dest[offset+count] = y*i + j + 1; 120 | } else { 121 | src[offset+count] = y*i + j; 122 | dest[offset+count] = y*i; 123 | } 124 | 125 | count++; 126 | 127 | /* go left */ 128 | if (i > 0) { 129 | src[offset+count] = y*i + j; 130 | dest[offset+count] = y*(i-1) + j; 131 | } else { 132 | src[offset+count] = y*i + j; 133 | dest[offset+count] = y*(x-1) + j; 134 | } 135 | 136 | count++; 137 | 138 | /* go right */ 139 | if (i < x-1) { 140 | src[offset+count] = y*i + j; 141 | dest[offset+count] = y*(i+1) + j; 142 | } else { 143 | src[offset+count] = y*i + j; 144 | dest[offset+count] = j; 145 | } 146 | 147 | count++; 148 | 149 | } 150 | } 151 | 152 | // } 153 | 154 | #ifdef _OPENMP 155 | #pragma omp barrier 156 | #endif 157 | 158 | #ifdef DIAGNOSTIC 159 | if (tid == 0) { 160 | elapsed_time_part = get_seconds() -elapsed_time_part; 161 | fprintf(stderr, "Tuple generation time: %lf seconds\n", elapsed_time_part); 162 | elapsed_time_part = get_seconds(); 163 | } 164 | #endif 165 | 166 | #ifdef _OPENMP 167 | #pragma omp barrier 168 | 169 | #pragma omp for 170 | #endif 171 | for (i=0; in = n; 184 | SDGdata->m = m; 185 | SDGdata->startVertex = src; 186 | SDGdata->endVertex = dest; 187 | SDGdata->weight = wt; 188 | 189 | #ifdef _OPENMP 190 | } 191 | #endif 192 | 193 | elapsed_time = get_seconds() - elapsed_time; 194 | return elapsed_time; 195 | } 196 | 197 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/getStartLists.c: -------------------------------------------------------------------------------- 1 | #include "defs.h" 2 | 3 | double getStartLists(graph* G, edge** maxIntWtListPtr, 4 | INT_T* maxIntWtListSizePtr) { 5 | 6 | LONG_T *local_max, maxWeight; 7 | 8 | edge *maxIntWtList; 9 | LONG_T maxIntWtListSize; 10 | 11 | LONG_T *p_start, *p_end; 12 | double elapsed_time; 13 | elapsed_time = get_seconds(); 14 | 15 | #ifdef _OPENMP 16 | omp_set_num_threads(NUM_THREADS); 17 | #pragma omp parallel 18 | { 19 | #endif 20 | 21 | LONG_T i, j, n; 22 | edge* pList; 23 | LONG_T pCount, tmpListSize; 24 | int tid, nthreads; 25 | #ifdef DIAGNOSTIC 26 | double elapsed_time_part; 27 | #endif 28 | 29 | #ifdef _OPENMP 30 | tid = omp_get_thread_num(); 31 | nthreads = omp_get_num_threads(); 32 | #else 33 | tid = 0; 34 | nthreads = 1; 35 | #endif 36 | 37 | n = G->n; 38 | 39 | /* Determine the maximum edge weight */ 40 | 41 | if (tid == 0) { 42 | local_max = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); 43 | } 44 | 45 | /* Allocate memory for partial edge list on each thread */ 46 | tmpListSize = 1000; 47 | pList = (edge *) malloc(tmpListSize*sizeof(edge)); 48 | pCount = 0; 49 | 50 | #ifdef _OPENMP 51 | #pragma omp barrier 52 | #endif 53 | 54 | local_max[tid] = -1; 55 | 56 | #ifdef DIAGNOSTIC 57 | if (tid == 0) { 58 | elapsed_time_part = get_seconds(); 59 | } 60 | #endif 61 | 62 | 63 | #ifdef _OPENMP 64 | #pragma omp for 65 | #endif 66 | for (i=0; inumEdges[i]; jnumEdges[i+1]; j++) { 68 | if (G->weight[j] > local_max[tid]) { 69 | local_max[tid] = G->weight[j]; 70 | pCount = 0; 71 | pList[pCount].startVertex = i; 72 | pList[pCount].endVertex = G->endV[j]; 73 | pList[pCount].w = local_max[tid]; 74 | pList[pCount].e = j; 75 | pCount++; 76 | } else if (G->weight[j] == local_max[tid]) { 77 | pList[pCount].startVertex = i; 78 | pList[pCount].endVertex = G->endV[j]; 79 | pList[pCount].w = local_max[tid]; 80 | pList[pCount].e = j; 81 | pCount++; 82 | } 83 | } 84 | } 85 | 86 | #ifdef _OPENMP 87 | #pragma omp barrier 88 | #endif 89 | 90 | if (tid == 0) { 91 | 92 | #ifdef DIAGNOSTIC 93 | if (tid == 0) { 94 | elapsed_time_part = get_seconds() - elapsed_time_part; 95 | fprintf(stderr, "Max. weight computation time: %lf seconds\n", elapsed_time_part); 96 | } 97 | #endif 98 | 99 | maxWeight = local_max[0]; 100 | 101 | for (i=1; i maxWeight) 103 | maxWeight = local_max[i]; 104 | } 105 | // free(local_max); 106 | } 107 | 108 | #ifdef _OPENMP 109 | #pragma omp barrier 110 | #endif 111 | 112 | if (local_max[tid] != maxWeight) { 113 | pCount = 0; 114 | } 115 | 116 | /* Merge all te partial edge lists */ 117 | if (tid == 0) { 118 | p_start = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); 119 | p_end = (LONG_T *) malloc(nthreads*sizeof(LONG_T)); 120 | } 121 | 122 | #ifdef _OPENMP 123 | #pragma omp barrier 124 | #endif 125 | 126 | p_end[tid] = pCount; 127 | p_start[tid] = 0; 128 | 129 | #ifdef _OPENMP 130 | #pragma omp barrier 131 | #endif 132 | 133 | if (tid == 0) { 134 | for (i=1; i 2 | #include 3 | #include "memory.h" 4 | #include "checkid.h" 5 | 6 | #define CHECK_MASK 0x1c /* Assumes exactly last two bits are 0 ... */ 7 | /* ... If not, change checklist dimension */ 8 | 9 | Checklisttype checklist[(CHECK_MASK>>2)+1] = {{NULL,NULL},{NULL,NULL},{NULL,NULL},{NULL,NULL}}; 10 | 11 | #ifdef __STDC__ 12 | int *checkID( int *ptr) 13 | #else 14 | int *checkID(ptr) 15 | int *ptr; 16 | #endif 17 | { 18 | int bucket; 19 | Checklisttype *next; 20 | 21 | if(ptr == NULL) 22 | return NULL; 23 | 24 | bucket = (((long) ptr)&CHECK_MASK)>>2; 25 | next = checklist[bucket].next; 26 | 27 | while(next != NULL) 28 | { 29 | if(next->ID == ptr) 30 | { 31 | return (int *) ptr; 32 | } 33 | else 34 | { 35 | next = next->next; 36 | } 37 | 38 | } 39 | 40 | fprintf(stderr,"ERROR: Invalid generator ID %p\n", ptr); 41 | return NULL; 42 | } 43 | 44 | 45 | 46 | #ifdef __STDC__ 47 | int *deleteID( int *ptr) 48 | #else 49 | int *deleteID(ptr) 50 | int *ptr; 51 | #endif 52 | { 53 | int bucket; 54 | Checklisttype *next, *temp; 55 | 56 | 57 | if(ptr == NULL) 58 | return NULL; 59 | 60 | bucket = (((long) ptr)&CHECK_MASK)>>2; 61 | next = &checklist[bucket]; 62 | 63 | while(next->next != NULL) 64 | if(next->next->ID == ptr) 65 | { 66 | temp = next->next; 67 | next->next = next->next->next; 68 | 69 | free(temp); 70 | return (int *) ptr; 71 | } 72 | else 73 | { 74 | next = next->next; 75 | } 76 | 77 | 78 | fprintf(stderr,"ERROR: Invalid generator ID %p\n", ptr); 79 | return NULL; 80 | } 81 | 82 | 83 | #ifdef __STDC__ 84 | int *addID( int *ptr) 85 | #else 86 | int *addID(ptr) 87 | int *ptr; 88 | #endif 89 | { 90 | int bucket; 91 | Checklisttype *next, *temp; 92 | 93 | if(ptr == NULL) 94 | return NULL; 95 | 96 | 97 | bucket = (((long) ptr)&CHECK_MASK)>>2; 98 | 99 | temp = (Checklisttype *) mymalloc(sizeof(Checklisttype)); 100 | if(temp == NULL) 101 | return NULL; 102 | 103 | temp->ID = (int *) ptr; 104 | temp->next = checklist[bucket].next; 105 | checklist[bucket].next = temp; 106 | 107 | 108 | return (int *) ptr; 109 | } 110 | 111 | 112 | 113 | 114 | 115 | 116 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/checkid.h: -------------------------------------------------------------------------------- 1 | typedef struct checkidstruct 2 | { 3 | int *ID; 4 | struct checkidstruct *next; 5 | } Checklisttype; 6 | 7 | int *checkID(int* ptr); 8 | int *deleteID( int *ptr); 9 | int *addID( int *ptr); 10 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/cmrg.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _cmrg_h 3 | #define _cmrg_h 4 | 5 | #ifndef ANSI_ARGS 6 | #ifdef __STDC__ 7 | #define ANSI_ARGS(args) args 8 | #else 9 | #define ANSI_ARGS(args) () 10 | #endif 11 | #endif 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | int cmrg_get_rn_int ANSI_ARGS((int *igenptr)); 18 | float cmrg_get_rn_flt ANSI_ARGS((int *igenptr)); 19 | double cmrg_get_rn_dbl ANSI_ARGS((int *igenptr)); 20 | int *cmrg_init_rng ANSI_ARGS((int rng_type, int gennum, int total_gen, int seed, 21 | int mult)); 22 | int cmrg_spawn_rng ANSI_ARGS((int *igenptr, int nspawned, int ***newgens, int checkid) ); 23 | int cmrg_get_seed_rng ANSI_ARGS((int *genptr)); 24 | int cmrg_free_rng ANSI_ARGS((int *genptr)); 25 | int cmrg_pack_rng ANSI_ARGS(( int *genptr, char **buffer)); 26 | int *cmrg_unpack_rng ANSI_ARGS(( char *packed)); 27 | int cmrg_print_rng ANSI_ARGS(( int *igen)); 28 | 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/communicate.c: -------------------------------------------------------------------------------- 1 | #ifdef SPRNG_MPI 2 | #include 3 | #endif 4 | #include 5 | #include "sprng_interface.h" 6 | 7 | 8 | #ifdef __STDC__ 9 | void get_proc_info_mpi(int *myid, int *nprocs) 10 | #else 11 | void get_proc_info_mpi(myid, nprocs) 12 | int *myid, *nprocs; 13 | #endif 14 | { 15 | #ifdef SPRNG_MPI 16 | MPI_Comm_rank(MPI_COMM_WORLD, myid); 17 | MPI_Comm_size(MPI_COMM_WORLD, nprocs); 18 | #else 19 | *myid = 0; 20 | *nprocs = 1; 21 | #endif 22 | } 23 | 24 | 25 | #ifdef __STDC__ 26 | int make_new_seed_mpi(void) 27 | #else 28 | int make_new_seed_mpi() 29 | #endif 30 | { 31 | #ifdef SPRNG_MPI 32 | unsigned int temp2; 33 | int myid, nprocs; 34 | MPI_Comm newcomm; 35 | 36 | MPI_Comm_dup(MPI_COMM_WORLD, &newcomm); /* create a temporary communicator */ 37 | 38 | MPI_Comm_rank(newcomm, &myid); 39 | MPI_Comm_size(newcomm, &nprocs); 40 | 41 | if(myid == 0) 42 | temp2 = make_new_seed(); 43 | 44 | MPI_Bcast(&temp2,1,MPI_UNSIGNED,0,newcomm); 45 | 46 | MPI_Comm_free(&newcomm); 47 | 48 | return temp2; 49 | #else 50 | return make_new_seed(); 51 | #endif 52 | } 53 | 54 | 55 | #if 0 56 | main() 57 | { 58 | printf("%u\n", make_new_seed()); 59 | printf("%u\n", make_new_seed()); 60 | } 61 | #endif 62 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/cputime.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "fwrap.h" 5 | 6 | #ifdef __STDC__ 7 | double cputime(void) 8 | #else 9 | double cputime() 10 | #endif 11 | { 12 | double current_time; 13 | 14 | #ifdef RUSAGE_SELF 15 | struct rusage temp; 16 | 17 | getrusage(RUSAGE_SELF, &temp); 18 | 19 | current_time = (temp.ru_utime.tv_sec + temp.ru_stime.tv_sec + 20 | 1.0e-6*(temp.ru_utime.tv_usec + temp.ru_stime.tv_usec)); 21 | 22 | #elif defined(CLOCKS_PER_SEC) 23 | current_time = clock()/((double) CLOCKS_PER_SEC); 24 | 25 | #else 26 | fprintf(stderr,"\nERROR: Timing routines not available\n\n"); 27 | current_time = 0.0; 28 | #endif 29 | 30 | return (current_time); 31 | } 32 | 33 | 34 | 35 | 36 | 37 | #ifdef __STDC__ 38 | double FNAMEOF_fcpu_t(void) 39 | #else 40 | double FNAMEOF_fcpu_t() 41 | #endif 42 | { 43 | return cputime(); 44 | } 45 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/cputime.h: -------------------------------------------------------------------------------- 1 | #ifdef __STDC__ 2 | double cputime(void); 3 | #else 4 | double cputime(); 5 | #endif 6 | 7 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/fwrap.h: -------------------------------------------------------------------------------- 1 | #ifndef _fwrap_h 2 | #define _fwrap_h 3 | 4 | /************************************************************************/ 5 | /************************************************************************/ 6 | /* Inter-language Naming Convention Problem Solution */ 7 | /* */ 8 | /* Note that with different compilers you may find that */ 9 | /* the linker fails to find certain modules due to the naming */ 10 | /* conventions implicit in particular compilers. Here the */ 11 | /* solution was to look at the object code produced by the FORTRAN */ 12 | /* compiler and modify this wrapper code so that the C routines */ 13 | /* compiled with the same routine names as produced in the FORTRAN */ 14 | /* program. */ 15 | /* */ 16 | /************************************************************************/ 17 | /************************************************************************/ 18 | 19 | 20 | /* 21 | Turn funcName (which must be all lower-case) into something callable from 22 | FORTRAN, typically by appending one or more underscores. 23 | */ 24 | #if defined(Add__) 25 | #define FORTRAN_CALLABLE(funcName) funcName ## __ 26 | #elif defined(NoChange) 27 | #define FORTRAN_CALLABLE(funcName) funcName 28 | #elif defined(Add_) 29 | #define FORTRAN_CALLABLE(funcName) funcName ## _ 30 | #endif 31 | 32 | #ifdef UpCase 33 | #define FNAMEOF_finit_rng FINIT_RNG 34 | #define FNAMEOF_fspawn_rng FSPAWN_RNG 35 | #define FNAMEOF_fget_rn_int FGET_RN_INT 36 | #define FNAMEOF_fget_rn_flt FGET_RN_FLT 37 | #define FNAMEOF_fget_rn_dbl FGET_RN_DBL 38 | #define FNAMEOF_fmake_new_seed FMAKE_NEW_SEED 39 | #define FNAMEOF_fseed_mpi FSEED_MPI 40 | #define FNAMEOF_ffree_rng FFREE_RNG 41 | #define FNAMEOF_fget_seed_rng FGET_SEED_RNG 42 | #define FNAMEOF_fpack_rng FPACK_RNG 43 | #define FNAMEOF_funpack_rng FUNPACK_RNG 44 | #define FNAMEOF_fprint_rng FPRINT_RNG 45 | 46 | #define FNAMEOF_finit_rng_sim FINIT_RNG_SIM 47 | #define FNAMEOF_fget_rn_int_sim FGET_RN_INT_SIM 48 | #define FNAMEOF_fget_rn_flt_sim FGET_RN_FLT_SIM 49 | #define FNAMEOF_fget_rn_dbl_sim FGET_RN_DBL_SIM 50 | #define FNAMEOF_finit_rng_simmpi FINIT_RNG_SIMMPI 51 | #define FNAMEOF_fget_rn_int_simmpi FGET_RN_INT_SIMMPI 52 | #define FNAMEOF_fget_rn_flt_simmpi FGET_RN_FLT_SIMMPI 53 | #define FNAMEOF_fget_rn_dbl_simmpi FGET_RN_DBL_SIMMPI 54 | #define FNAMEOF_fpack_rng_simple FPACK_RNG_SIMPLE 55 | #define FNAMEOF_funpack_rng_simple FUNPACK_RNG_SIMPLE 56 | #define FNAMEOF_fprint_rng_simple FPRINT_RNG_SIMPLE 57 | 58 | #define FNAMEOF_finit_rng_ptr FINIT_RNG_PTR 59 | #define FNAMEOF_fget_rn_int_ptr FGET_RN_INT_PTR 60 | #define FNAMEOF_fget_rn_flt_ptr FGET_RN_FLT_PTR 61 | #define FNAMEOF_fget_rn_dbl_ptr FGET_RN_DBL_PTR 62 | #define FNAMEOF_fpack_rng_ptr FPACK_RNG_PTR 63 | #define FNAMEOF_funpack_rng_ptr FUNPACK_RNG_PTR 64 | #define FNAMEOF_fprint_rng_ptr FPRINT_RNG_PTR 65 | #define FNAMEOF_ffree_rng_ptr FFREE_RNG_PTR 66 | #define FNAMEOF_fspawn_rng_ptr FSPAWN_RNG_PTR 67 | 68 | #define FNAMEOF_fcpu_t FCPU_T 69 | 70 | #else 71 | 72 | #define FNAMEOF_ffree_rng FORTRAN_CALLABLE(ffree_rng) 73 | #define FNAMEOF_fmake_new_seed FORTRAN_CALLABLE(fmake_new_seed) 74 | #define FNAMEOF_fseed_mpi FORTRAN_CALLABLE(fseed_mpi) 75 | #define FNAMEOF_finit_rng FORTRAN_CALLABLE(finit_rng) 76 | #define FNAMEOF_fspawn_rng FORTRAN_CALLABLE(fspawn_rng) 77 | #define FNAMEOF_fget_rn_int FORTRAN_CALLABLE(fget_rn_int) 78 | #define FNAMEOF_fget_rn_flt FORTRAN_CALLABLE(fget_rn_flt) 79 | #define FNAMEOF_fget_rn_dbl FORTRAN_CALLABLE(fget_rn_dbl) 80 | #define FNAMEOF_fget_seed_rng FORTRAN_CALLABLE(fget_seed_rng) 81 | #define FNAMEOF_fpack_rng FORTRAN_CALLABLE(fpack_rng) 82 | #define FNAMEOF_funpack_rng FORTRAN_CALLABLE(funpack_rng) 83 | #define FNAMEOF_fprint_rng FORTRAN_CALLABLE(fprint_rng) 84 | 85 | #define FNAMEOF_finit_rng_sim FORTRAN_CALLABLE(finit_rng_sim) 86 | #define FNAMEOF_fget_rn_int_sim FORTRAN_CALLABLE(fget_rn_int_sim) 87 | #define FNAMEOF_fget_rn_flt_sim FORTRAN_CALLABLE(fget_rn_flt_sim) 88 | #define FNAMEOF_fget_rn_dbl_sim FORTRAN_CALLABLE(fget_rn_dbl_sim) 89 | #define FNAMEOF_finit_rng_simmpi FORTRAN_CALLABLE(finit_rng_simmpi) 90 | #define FNAMEOF_fget_rn_int_simmpi FORTRAN_CALLABLE(fget_rn_int_simmpi) 91 | #define FNAMEOF_fget_rn_flt_simmpi FORTRAN_CALLABLE(fget_rn_flt_simmpi) 92 | #define FNAMEOF_fget_rn_dbl_simmpi FORTRAN_CALLABLE(fget_rn_dbl_simmpi) 93 | #define FNAMEOF_fpack_rng_simple FORTRAN_CALLABLE(fpack_rng_simple) 94 | #define FNAMEOF_funpack_rng_simple FORTRAN_CALLABLE(funpack_rng_simple) 95 | #define FNAMEOF_fprint_rng_simple FORTRAN_CALLABLE(fprint_rng_simple) 96 | 97 | #define FNAMEOF_finit_rng_ptr FORTRAN_CALLABLE(finit_rng_ptr) 98 | #define FNAMEOF_fget_rn_int_ptr FORTRAN_CALLABLE(fget_rn_int_ptr) 99 | #define FNAMEOF_fget_rn_flt_ptr FORTRAN_CALLABLE(fget_rn_flt_ptr) 100 | #define FNAMEOF_fget_rn_dbl_ptr FORTRAN_CALLABLE(fget_rn_dbl_ptr) 101 | #define FNAMEOF_fpack_rng_ptr FORTRAN_CALLABLE(fpack_rng_ptr) 102 | #define FNAMEOF_funpack_rng_ptr FORTRAN_CALLABLE(funpack_rng_ptr) 103 | #define FNAMEOF_fprint_rng_ptr FORTRAN_CALLABLE(fprint_rng_ptr) 104 | #define FNAMEOF_ffree_rng_ptr FORTRAN_CALLABLE(ffree_rng_ptr) 105 | #define FNAMEOF_fspawn_rng_ptr FORTRAN_CALLABLE(fspawn_rng_ptr) 106 | 107 | #define FNAMEOF_fcpu_t FORTRAN_CALLABLE(fcpu_t) 108 | 109 | #endif 110 | 111 | #endif 112 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/fwrap_mpi.c: -------------------------------------------------------------------------------- 1 | 2 | #include "fwrap.h" 3 | #include "sprng_interface.h" 4 | #include "memory.h" 5 | #include 6 | #include 7 | #include 8 | 9 | /************************************************************************/ 10 | /************************************************************************/ 11 | /* */ 12 | /* This package of C wrappers is intended to be called from a */ 13 | /* FORTRAN program. The main purpose of the package is to mediate */ 14 | /* between the call-by-address and call-by-value conventions in */ 15 | /* the two languages. In most cases, the arguments of the C */ 16 | /* routines and the wrappers are the same. There are two */ 17 | /* exceptions to this. The trivial exception is that the C number */ 18 | /* scheme of 0 thru N-1 is automatically converted to the FORTRAN */ 19 | /* scheme of 1 thru N, so when referring to a particular generator */ 20 | /* the FORTRAN user should number as is natural to that language. */ 21 | /* */ 22 | /* */ 23 | /* The wrappers should be treated as FORTRAN function calls. */ 24 | /* */ 25 | /************************************************************************/ 26 | /************************************************************************/ 27 | 28 | 29 | #ifdef __STDC__ 30 | int FNAMEOF_fseed_mpi(void) 31 | #else 32 | int FNAMEOF_fseed_mpi() 33 | #endif 34 | { 35 | #ifdef SPRNG_MPI 36 | return make_new_seed_mpi(); 37 | #else 38 | return -1; 39 | #endif 40 | } 41 | 42 | #ifdef SPRNG_MPI 43 | 44 | #ifdef __STDC__ 45 | int * FNAMEOF_finit_rng_simmpi(int *rng_type, int *seed, int *mult) 46 | #else 47 | int * FNAMEOF_finit_rng_simmpi(rng_type,seed,mult) 48 | int *rng_type,*mult,*seed; 49 | #endif 50 | { 51 | return init_rng_simple_mpi(*rng_type,*seed, *mult); 52 | } 53 | 54 | #ifdef __STDC__ 55 | int FNAMEOF_fget_rn_int_simmpi(void) 56 | #else 57 | int FNAMEOF_fget_rn_int_simmpi() 58 | #endif 59 | { 60 | return get_rn_int_simple_mpi(); 61 | } 62 | 63 | 64 | #ifdef __STDC__ 65 | float FNAMEOF_fget_rn_flt_simmpi(void) 66 | #else 67 | float FNAMEOF_fget_rn_flt_simmpi() 68 | #endif 69 | { 70 | return get_rn_flt_simple_mpi(); 71 | } 72 | 73 | 74 | 75 | #ifdef __STDC__ 76 | double FNAMEOF_fget_rn_dbl_simmpi(void) 77 | #else 78 | double FNAMEOF_fget_rn_dbl_simmpi() 79 | #endif 80 | { 81 | return get_rn_dbl_simple_mpi(); 82 | } 83 | #endif 84 | 85 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/include/sprng.h: -------------------------------------------------------------------------------- 1 | #ifndef _sprng_h_ 2 | #define _sprng_h_ 3 | 4 | 5 | #define SPRNG_LFG 0 6 | #define SPRNG_LCG 1 7 | #define SPRNG_LCG64 2 8 | #define SPRNG_CMRG 3 9 | #define SPRNG_MLFG 4 10 | #define SPRNG_PMLCG 5 11 | 12 | #include "sprng_interface.h" 13 | 14 | #define SPRNG_DEFAULT 0 15 | #define CRAYLCG 0 16 | #define DRAND48 1 17 | #define FISH1 2 18 | #define FISH2 3 19 | #define FISH3 4 20 | #define FISH4 5 21 | #define FISH5 6 22 | #define LECU1 0 23 | #define LECU2 1 24 | #define LECU3 2 25 | #define LAG1279 0 26 | #define LAG17 1 27 | #define LAG31 2 28 | #define LAG55 3 29 | #define LAG63 4 30 | #define LAG127 5 31 | #define LAG521 6 32 | #define LAG521B 7 33 | #define LAG607 8 34 | #define LAG607B 9 35 | #define LAG1279B 10 36 | 37 | #define CHECK 1 38 | 39 | #define MAX_PACKED_LENGTH 24000 40 | 41 | #ifdef USE_MPI 42 | #define MPINAME(A) A ## _mpi 43 | #else 44 | #define MPINAME(A) A 45 | #endif 46 | 47 | #define make_sprng_seed MPINAME(make_new_seed) 48 | 49 | #if defined(SIMPLE_SPRNG) 50 | 51 | #define pack_sprng pack_rng_simple 52 | #define unpack_sprng unpack_rng_simple 53 | #define isprng MPINAME(get_rn_int_simple) 54 | #define init_sprng MPINAME(init_rng_simple) 55 | #define print_sprng print_rng_simple 56 | 57 | #ifdef FLOAT_GEN 58 | #define sprng MPINAME(get_rn_flt_simple) 59 | #else 60 | #define sprng MPINAME(get_rn_dbl_simple) 61 | #endif 62 | 63 | #elif !defined(CHECK_POINTERS) 64 | 65 | #define free_sprng free_rng 66 | #define pack_sprng pack_rng 67 | #define unpack_sprng unpack_rng 68 | #define isprng get_rn_int 69 | #define spawn_sprng(A,B,C) spawn_rng(A,B,C,!CHECK) 70 | #define init_sprng init_rng 71 | #define print_sprng print_rng 72 | 73 | #ifdef FLOAT_GEN 74 | #define sprng get_rn_flt 75 | #else 76 | #define sprng get_rn_dbl 77 | #endif 78 | 79 | #else 80 | 81 | #define free_sprng(A) ((deleteID(A)==NULL) ? -1 : free_rng(A)) 82 | #define pack_sprng(A,B) ((checkID(A)==NULL) ? 0 : pack_rng(A,B)) 83 | #define unpack_sprng(A) addID(unpack_rng(A)) 84 | #define isprng(A) ((checkID(A)==NULL) ? -1 : get_rn_int(A)) 85 | #define spawn_sprng(A,B,C) ((checkID(A)==NULL) ? 0 : spawn_rng(A,B,C,CHECK)) 86 | #define init_sprng(A,B,C,D,E) addID(init_rng(A,B,C,D,E)) 87 | #define print_sprng(A) ((checkID(A)==NULL) ? 0 : print_rng(A)) 88 | 89 | #ifdef FLOAT_GEN 90 | #define sprng(A) ((checkID(A)==NULL) ? -1.0 : get_rn_flt(A)) 91 | #else 92 | #define sprng(A) ((checkID(A)==NULL) ? -1.0 : get_rn_dbl(A)) 93 | #endif 94 | 95 | #endif 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/include/sprng_f.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef _sprngf_h_ 4 | 5 | #define SPRNG_LFG 0 6 | #define SPRNG_LCG 1 7 | #define SPRNG_LCG64 2 8 | #define SPRNG_CMRG 3 9 | #define SPRNG_MLFG 4 10 | #define SPRNG_PMLCG 5 11 | #define DEFAULT_RNG_TYPE SPRNG_LFG 12 | 13 | #define SPRNG_DEFAULT 0 14 | #define CRAYLCG 0 15 | #define DRAND48 1 16 | #define FISH1 2 17 | #define FISH2 3 18 | #define FISH3 4 19 | #define FISH4 5 20 | #define FISH5 6 21 | #define LECU1 0 22 | #define LECU2 1 23 | #define LECU3 2 24 | #define LAG1279 0 25 | #define LAG17 1 26 | #define LAG31 2 27 | #define LAG55 3 28 | #define LAG63 4 29 | #define LAG127 5 30 | #define LAG521 6 31 | #define LAG521B 7 32 | #define LAG607 8 33 | #define LAG607B 9 34 | #define LAG1279B 10 35 | 36 | #ifdef CHECK_POINTERS 37 | #define CHECK 1 38 | #else 39 | #define CHECK 0 40 | #endif /* ifdef CHECK_POINTERS */ 41 | 42 | #define MAX_PACKED_LENGTH 24000 43 | 44 | #ifdef POINTER_SIZE 45 | #if POINTER_SIZE == 8 46 | #define SPRNG_POINTER integer*8 47 | #else 48 | #define SPRNG_POINTER integer*4 49 | #endif 50 | #else 51 | #define SPRNG_POINTER integer*4 52 | #endif /* ifdef POINTER_SIZE */ 53 | 54 | #ifdef USE_MPI 55 | #define make_sprng_seed fseed_mpi 56 | #else 57 | #define make_sprng_seed fmake_new_seed 58 | #endif 59 | 60 | #endif /* ifdef _sprng_h */ 61 | 62 | #ifdef USE_MPI 63 | external fseed_mpi 64 | integer fseed_mpi 65 | #else 66 | external fmake_new_seed 67 | integer fmake_new_seed 68 | #endif 69 | 70 | #ifndef DEFAULTINT 71 | #define DEFAULTINT 72 | #endif 73 | #ifndef FLOAT_GEN 74 | #define DBLGEN 75 | #endif 76 | 77 | #if defined(SIMPLE_SPRNG) 78 | #undef DEFAULTINT 79 | 80 | #ifndef _sprngf_h_ 81 | #define pack_sprng fpack_rng_simple 82 | #define unpack_sprng funpack_rng_simple 83 | #ifdef USE_MPI 84 | #define isprng fget_rn_int_simmpi 85 | #define init_sprng finit_rng_simmpi 86 | #else 87 | #define isprng fget_rn_int_sim 88 | #define init_sprng finit_rng_sim 89 | #endif /* ifdef USE_MPI */ 90 | #define print_sprng fprint_rng_simple 91 | 92 | #if defined(FLOAT_GEN) && defined(USE_MPI) 93 | #define sprng fget_rn_flt_simmpi 94 | #endif 95 | #if defined(FLOAT_GEN) && !defined(USE_MPI) 96 | #define sprng fget_rn_flt_sim 97 | #endif 98 | #if defined(DBLGEN) && defined(USE_MPI) 99 | #define sprng fget_rn_dbl_simmpi 100 | #endif 101 | #if defined(DBLGEN) && !defined(USE_MPI) 102 | #define sprng fget_rn_dbl_sim 103 | #endif 104 | 105 | #endif /* ifdef _sprng_h */ 106 | external isprng 107 | external fget_rn_dbl_sim, fget_rn_flt_sim 108 | external init_sprng, fpack_rng_simple 109 | external funpack_rng_simple, fprint_rng_simple 110 | #ifdef USE_MPI 111 | external fget_rn_flt_simmpi, fget_rn_dbl_simmpi 112 | real*4 fget_rn_flt_simmpi 113 | real*8 fget_rn_dbl_simmpi 114 | #endif 115 | integer isprng,fpack_rng_simple,fprint_rng_simple 116 | SPRNG_POINTER init_sprng, funpack_rng_simple 117 | real*4 fget_rn_flt_sim 118 | real*8 fget_rn_dbl_sim 119 | #endif 120 | 121 | #if defined(CHECK_POINTERS) 122 | #undef DEFAULTINT 123 | external fget_rn_int_ptr, fget_rn_flt_ptr, fget_rn_dbl_ptr 124 | external fspawn_rng_ptr, ffree_rng_ptr, finit_rng_ptr 125 | external fpack_rng_ptr, funpack_rng_ptr, fprint_rng_ptr 126 | 127 | integer fget_rn_int_ptr, ffree_rng_ptr, fpack_rng_ptr 128 | SPRNG_POINTER finit_rng_ptr, funpack_rng_ptr 129 | integer fspawn_rng_ptr, fprint_rng_ptr 130 | real*4 fget_rn_flt_ptr 131 | real*8 fget_rn_dbl_ptr 132 | 133 | #ifndef _sprngf_h_ 134 | #define isprng fget_rn_int_ptr 135 | #define free_sprng ffree_rng_ptr 136 | #define spawn_sprng(A,B,C) fspawn_rng_ptr(A,B,C,CHECK) 137 | #define pack_sprng fpack_rng_ptr 138 | #define unpack_sprng funpack_rng_ptr 139 | #define init_sprng finit_rng_ptr 140 | #define print_sprng fprint_rng_ptr 141 | #ifdef FLOAT_GEN 142 | #define sprng fget_rn_flt_ptr 143 | #else 144 | #define sprng fget_rn_dbl_ptr 145 | #endif 146 | #endif 147 | #endif 148 | 149 | #if defined(DEFAULTINT) 150 | external fget_rn_int, fget_rn_flt, fget_rn_dbl 151 | external fspawn_rng, ffree_rng, finit_rng 152 | external fpack_rng, funpack_rng, fprint_rng 153 | 154 | integer fget_rn_int, ffree_rng, fpack_rng 155 | SPRNG_POINTER finit_rng, funpack_rng 156 | integer fspawn_rng, fprint_rng 157 | real*4 fget_rn_flt 158 | real*8 fget_rn_dbl 159 | 160 | #ifndef _sprngf_h_ 161 | #define isprng fget_rn_int 162 | #define free_sprng ffree_rng 163 | #define spawn_sprng(A,B,C) fspawn_rng(A,B,C,CHECK) 164 | #define pack_sprng fpack_rng 165 | #define unpack_sprng funpack_rng 166 | #define init_sprng finit_rng 167 | #define print_sprng fprint_rng 168 | #ifdef FLOAT_GEN 169 | #define sprng fget_rn_flt 170 | #else 171 | #define sprng fget_rn_dbl 172 | #endif 173 | #endif 174 | 175 | #endif 176 | 177 | 178 | 179 | #ifndef _sprngf_h_ 180 | #define _sprngf_h_ 181 | #endif 182 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/include/sprng_interface.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _sprng_interface_h_ 3 | #define _sprng_interface_h_ 4 | 5 | #define DEFAULT_RNG_TYPE SPRNG_LFG 6 | 7 | #ifndef ANSI_ARGS 8 | #ifdef __STDC__ 9 | #define ANSI_ARGS(args) args 10 | #else 11 | #define ANSI_ARGS(args) () 12 | #endif 13 | #endif 14 | 15 | #ifdef __cplusplus 16 | extern "C" { 17 | #endif 18 | 19 | int get_rn_int ANSI_ARGS((int *igenptr)); 20 | float get_rn_flt ANSI_ARGS((int *igenptr)); 21 | double get_rn_dbl ANSI_ARGS((int *igenptr)); 22 | int *init_rng ANSI_ARGS((int rng_type, int gennum, int total_gen, int seed, 23 | int mult)); 24 | int spawn_rng ANSI_ARGS((int *igenptr, int nspawned, int ***newgens, int checkid) ); 25 | int make_new_seed ANSI_ARGS((void)); 26 | int make_new_seed_mpi ANSI_ARGS((void)); 27 | int get_seed_rng ANSI_ARGS((int *genptr)); 28 | int free_rng ANSI_ARGS((int *genptr)); 29 | int pack_rng ANSI_ARGS(( int *genptr, char **buffer)); 30 | int *unpack_rng ANSI_ARGS(( char *packed)); 31 | int print_rng ANSI_ARGS(( int *igen)); 32 | int *checkID ANSI_ARGS(( int *igen)); 33 | int *addID ANSI_ARGS(( int *igen)); 34 | int *deleteID ANSI_ARGS(( int *igen)); 35 | 36 | 37 | int *init_rng_simple ANSI_ARGS((int rng_type, int seed, int mult)); 38 | int *init_rng_simple_mpi ANSI_ARGS((int rng_type, int seed, int mult)); 39 | int get_rn_int_simple ANSI_ARGS((void)); 40 | int get_rn_int_simple_mpi ANSI_ARGS((void)); 41 | float get_rn_flt_simple ANSI_ARGS((void)); 42 | float get_rn_flt_simple_mpi ANSI_ARGS((void)); 43 | double get_rn_dbl_simple ANSI_ARGS((void)); 44 | double get_rn_dbl_simple_mpi ANSI_ARGS((void)); 45 | int pack_rng_simple ANSI_ARGS((char **buffer)); 46 | int *unpack_rng_simple ANSI_ARGS(( char *packed)); 47 | int print_rng_simple ANSI_ARGS((void)); 48 | 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/int64.h: -------------------------------------------------------------------------------- 1 | #ifndef _INT64_H_ 2 | #define _INT64_H_ 3 | 4 | #include 5 | #include "store.h" 6 | 7 | #ifndef __STDC__ 8 | #define const /* old C does not have 'const' qualifier */ 9 | #endif 10 | 11 | 12 | #if LONG_MAX > 2147483647L 13 | #if LONG_MAX > 35184372088831L 14 | #if LONG_MAX >= 9223372036854775807L 15 | #define LONG_SPRNG 16 | #define LONG64 long /* 64 bit long */ 17 | #endif 18 | #endif 19 | #endif 20 | 21 | #if !defined(LONG_SPRNG) && defined(_LONG_LONG) 22 | #define LONG64 long long 23 | #endif 24 | 25 | 26 | #ifdef LONG64 27 | 28 | typedef unsigned LONG64 uint64; 29 | 30 | #ifdef LONG_SPRNG 31 | #define MINUS1 0xffffffffffffffffUL /* -1 (mod 2^(BITS-2)) */ 32 | #define ONE 0x1UL 33 | #define MASK64 0xffffffffffffffffUL 34 | #else 35 | #define MINUS1 0xffffffffffffffffULL /* -1 (mod 2^(BITS-2)) */ 36 | #define ONE 0x1ULL 37 | #define MASK64 0xffffffffffffffffULL 38 | #endif 39 | 40 | #define multiply(a,b,c) {c = (a)*(b); c &= MASK64;} 41 | #define add(a,b,c) {c = (a)+(b); c &= MASK64;} 42 | #define decrement(a,c) {c = (a)-1; c &= MASK64;} 43 | #define and(a,b,c) {c = (a)&(b);} 44 | #define or(a,b,c) {c = (a)|(b);} 45 | #define xor(a,b,c) {c = (a)^(b);} 46 | #define notzero(a) (a==0?0:1) 47 | #define lshift(a,b,c) {c = (a)<<(b); c &= MASK64;} /* b is an int */ 48 | #define rshift(a,b,c) {c = (a)>>(b); c &= MASK64;} /* b is an int */ 49 | #define highword(a) ((unsigned int)((a)>>32)) 50 | #define lowword(a) ((unsigned int)((a)&0xffffffff)) 51 | #define set(a,b) {b = (a)&MASK64;} 52 | #define seti(a,b) {b = (a)&MASK64;} /* b is an int */ 53 | #define seti2(a,b,c) {c = (b); c <<= 32; c |= (a); c &= MASK64;}/*a,b=+int*/ 54 | 55 | #else /* Simulate 64 bit arithmetic on 32 bit integers */ 56 | 57 | typedef unsigned int uint64[2]; 58 | 59 | static const uint64 MASK64={0xffffffffU,0xffffffffU}; 60 | static const uint64 MINUS1={0xffffffffU,0xffffffffU}; /* -1 (mod 2^(BITS-2)) */ 61 | static uint64 ONE={0x1U,0x0U}; 62 | #define TWO_M32 2.3283064365386962e-10 /* 2^(-32) */ 63 | 64 | #define and(a,b,c) {c[0] = a[0]&b[0]; c[1] = a[1]&b[1];} 65 | #define or(a,b,c) {c[0] = a[0]|b[0]; c[1] = a[1]|b[1];} 66 | #define xor(a,b,c) {c[0] = a[0]^b[0]; c[1] = a[1]^b[1];} 67 | #define notzero(a) ((a[0]==0 && a[1]==0)?0:1) 68 | #define multiply(a,b,c) {c[1] = a[0]*b[1]+a[1]*b[0];\ 69 | c[1] += (unsigned int) (((double)a[0]*(double)b[0])\ 70 | *TWO_M32);\ 71 | c[0] = a[0]*b[0]; and(c,MASK64,c);} 72 | #define add(a,b,c) {unsigned int t = a[0]+b[0]; \ 73 | c[1] = a[1]+b[1]+(t>(32-b)); c[0] = a[0]<<(b);} 82 | else {c[1]=a[0]<<(b-32);c[0]=0;} 83 | and(c,MASK64,c); 84 | } 85 | 86 | static void rshift(uint64 a,int b,uint64 c) 87 | { 88 | if(b<32) 89 | {c[0] = (a[0]>>b)|(a[1]<<(32-b));c[1] = a[1]>>(b);} 90 | else {c[0]=a[1]>>(b-32);c[1]=0;} 91 | and(c,MASK64,c); 92 | } 93 | 94 | #define highword(a) ((a)[1]) 95 | #define lowword(a) ((a)[0]) 96 | #define set(a,b) {b[0] = a[0];b[1]=a[1];and(b,MASK64,b);} 97 | #define seti(a,b) {b[0] = a;b[1]=0;} /* b is an int */ 98 | #define seti2(a,b,c) {c[1] = b; c[0] = a; and(c,MASK64,c);}/*a,b = +ve int*/ 99 | 100 | #endif /* LONG64 or 32 bit */ 101 | 102 | 103 | static int store_uint64(uint64 l, unsigned char *c) 104 | { 105 | int i; 106 | unsigned int m[2]; 107 | 108 | m[0] = highword(l); 109 | m[1] = lowword(l); 110 | 111 | c += store_intarray(m,2,4,c); 112 | 113 | return 8; /* return number of chars filled */ 114 | } 115 | 116 | 117 | static int store_uint64array(uint64 *l, int n, unsigned char *c) 118 | { 119 | int i; 120 | 121 | for(i=0; i 2 | 3 | #ifdef __STDC__ 4 | int make_new_seed() 5 | #else 6 | int make_new_seed() 7 | #endif 8 | { 9 | time_t tp; 10 | struct tm *temp; 11 | unsigned int temp2, temp3; 12 | static unsigned int temp4 = 0xe0e1; 13 | 14 | time(&tp); 15 | temp = localtime(&tp); 16 | 17 | temp2 = (temp->tm_sec<<26)+(temp->tm_min<<20)+(temp->tm_hour<<15)+ 18 | (temp->tm_mday<<10)+(temp->tm_mon<<6); 19 | temp3 = (temp->tm_year<<13)+(temp->tm_wday<<10)+(temp->tm_yday<<1)+ 20 | temp->tm_isdst; 21 | temp2 ^= clock()^temp3; 22 | 23 | temp4 = (temp4*0xeeee)%0xffff; 24 | temp2 ^= temp4<<16; 25 | temp4 = (temp4*0xaeee)%0xffff; 26 | temp2 ^= temp4; 27 | 28 | temp2 &= 0x7fffffff; 29 | 30 | return temp2; 31 | } 32 | 33 | 34 | #if 0 35 | main() 36 | { 37 | printf("%u\n", make_new_seed()); 38 | } 39 | #endif 40 | 41 | 42 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/memory.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #ifdef __STDC__ 5 | void *_mymalloc(long size, int line, char *message) 6 | #else 7 | void *_mymalloc(size, line, message) 8 | long size; 9 | int line; 10 | char *message; 11 | #endif 12 | { 13 | char *temp; 14 | 15 | if(size == 0) 16 | return NULL; 17 | 18 | temp = (char *) malloc(size); 19 | 20 | if(temp == NULL) 21 | { 22 | fprintf(stderr,"\nmemory allocation failure in file: %s at line number: %d\n", message, line); 23 | return NULL; 24 | } 25 | 26 | return (void *) temp; 27 | } 28 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/memory.h: -------------------------------------------------------------------------------- 1 | #define mymalloc(a) (_mymalloc((a), __LINE__, __FILE__)) 2 | 3 | #ifndef ANSI_ARGS 4 | #ifdef __STDC__ 5 | #define ANSI_ARGS(args) args 6 | #else 7 | #define ANSI_ARGS(args) () 8 | #endif 9 | #endif 10 | 11 | void *_mymalloc ANSI_ARGS((long size, int line, char *message)); 12 | 13 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/mlfg.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _mlfg_h 3 | #define _mlfg_h 4 | 5 | #ifndef ANSI_ARGS 6 | #ifdef __STDC__ 7 | #define ANSI_ARGS(args) args 8 | #else 9 | #define ANSI_ARGS(args) () 10 | #endif 11 | #endif 12 | 13 | #ifdef __cplusplus 14 | extern "C" { 15 | #endif 16 | 17 | int mlfg_get_rn_int ANSI_ARGS((int *igenptr)); 18 | float mlfg_get_rn_flt ANSI_ARGS((int *igenptr)); 19 | double mlfg_get_rn_dbl ANSI_ARGS((int *igenptr)); 20 | int *mlfg_init_rng ANSI_ARGS((int rng_type, int gennum, int total_gen, int seed, 21 | int mult)); 22 | int mlfg_spawn_rng ANSI_ARGS((int *igenptr, int nspawned, int ***newgens, int checkid) ); 23 | int mlfg_get_seed_rng ANSI_ARGS((int *genptr)); 24 | int mlfg_free_rng ANSI_ARGS((int *genptr)); 25 | int mlfg_pack_rng ANSI_ARGS(( int *genptr, char **buffer)); 26 | int *mlfg_unpack_rng ANSI_ARGS(( char *packed)); 27 | int mlfg_print_rng ANSI_ARGS(( int *igen)); 28 | 29 | 30 | #ifdef __cplusplus 31 | } 32 | #endif 33 | 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/multiply.h: -------------------------------------------------------------------------------- 1 | #ifdef LONG64 2 | #define mult_48_64(a,b,c) c = (a*b); 3 | 4 | #define multiply(genptr) mult_48_64(genptr->seed,genptr->multiplier,genptr->seed); genptr->seed += genptr->prime; genptr->seed &= LSB48; 5 | 6 | #else 7 | #define mult_48_32(a,b,c) c[0] = a[0]*b[0]; c[1] = a[1]*b[0]+a[0]*b[1];\ 8 | c[2] = a[0]*b[2]+a[1]*b[1]+a[2]*b[0];\ 9 | c[3] = a[3]*b[0]+a[2]*b[1]+a[1]*b[2]+a[0]*b[3]; 10 | 11 | #define multiply(genptr,m,s,res) s[3] = (unsigned int) genptr->seed[0] >> 12;\ 12 | s[2] = genptr->seed[0] & 4095; s[1] = genptr->seed[1] >> 12;\ 13 | s[0] = genptr->seed[1] & 4095;\ 14 | mult_48_32(m,s,res);\ 15 | genptr->seed[1] = res[0] + ((res[1]&4095) << 12) + genptr->prime;\ 16 | genptr->seed[0] = ( (unsigned int) genptr->seed[1] >> 24)\ 17 | + res[2] + ((unsigned int) res[1] >> 12 ) + (res[3] << 12);\ 18 | genptr->seed[1] &= 16777215; genptr->seed[0] &= 16777215; 19 | #endif 20 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/primes_32.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "primes_32.h" 5 | #include "primelist_32.h" 6 | 7 | #define YES 1 8 | #define NO 0 9 | #define NPRIMES 1000 10 | 11 | static int primes[NPRIMES]; 12 | 13 | #ifdef __STDC__ 14 | int init_prime_32(void) 15 | #else 16 | int init_prime_32() 17 | #endif 18 | { 19 | int i, j, obtained = 0, isprime; 20 | 21 | for(i=3; i < MINPRIME; i += 2) 22 | { 23 | isprime = YES; 24 | 25 | for(j=0; j < obtained; j++) 26 | if(i%primes[j] == 0) 27 | { 28 | isprime = NO; 29 | break; 30 | } 31 | else if(primes[j]*primes[j] > i) 32 | break; 33 | 34 | if(isprime == YES) 35 | { 36 | primes[obtained] = i; 37 | obtained++; 38 | } 39 | } 40 | 41 | return obtained; 42 | } 43 | 44 | 45 | 46 | 47 | #ifdef __STDC__ 48 | int getprime_32(int need, int *prime_array, int offset) 49 | #else 50 | int getprime_32(need, prime_array, offset) 51 | int need, *prime_array,offset; 52 | #endif 53 | { 54 | static int initiallized = NO, num_prime; 55 | int largest; 56 | int i, isprime, index, obtained = 0; 57 | 58 | if(need <= 0) 59 | { 60 | fprintf(stderr,"WARNING: Number of primes needed = %d < 1; None returned\n" 61 | , need); 62 | return 0; 63 | } 64 | 65 | if(offset < 0) 66 | { 67 | fprintf(stderr,"WARNING: Offset of prime = %d < 1; None returned\n" 68 | , offset); 69 | return 0; 70 | } 71 | 72 | 73 | if(offset+need-1 MAXPRIMEOFFSET) 89 | { 90 | fprintf(stderr,"WARNING: generator has branched maximum number of times;\nindependence of generators no longer guaranteed"); 91 | offset = offset % MAXPRIMEOFFSET; 92 | } 93 | 94 | if(offset < PRIMELISTSIZE1) /* search table for previous prime */ 95 | { 96 | largest = prime_list_32[offset] + 2; 97 | offset = 0; 98 | } 99 | else 100 | { 101 | index = (int) ((offset-PRIMELISTSIZE1+1)/STEP) + PRIMELISTSIZE1 - 1; 102 | largest = prime_list_32[index] + 2; 103 | offset -= (index-PRIMELISTSIZE1+1)*STEP + PRIMELISTSIZE1 - 1; 104 | } 105 | 106 | 107 | while(need > obtained && largest > MINPRIME) 108 | { 109 | isprime = YES; 110 | largest -= 2; 111 | for(i=0; i 0) 119 | offset--; 120 | else if(isprime == YES) 121 | prime_array[obtained++] = largest; 122 | } 123 | 124 | if(need > obtained) 125 | fprintf(stderr,"ERROR: Insufficient number of primes: needed %d, obtained %d\n", need, obtained); 126 | 127 | return obtained; 128 | } 129 | 130 | 131 | #if 0 132 | main() 133 | { 134 | int newprimes[1500], np, i; 135 | 136 | np = getprime_32(2,newprimes,0); 137 | np = getprime_32(2,newprimes+2,9); 138 | np = getprime_32(2,newprimes+4,12); 139 | 140 | for(i=0; i<6; i++) 141 | printf("%d. %d \n", i, newprimes[i]); 142 | 143 | /*while(np--) 144 | printf("New primes: %d\n", newprimes[np]); 145 | 146 | np = getprime_32(5,newprimes); 147 | 148 | printf("%d new primes obtained ...\n", np); 149 | 150 | while(np--) 151 | printf("New primes: %d\n", newprimes[np]);*/ 152 | } 153 | #endif 154 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/primes_32.h: -------------------------------------------------------------------------------- 1 | #ifndef _primes_32_h_ 2 | #define _primes_32_h_ 3 | 4 | #ifndef ANSI_ARGS 5 | #ifdef __STDC__ 6 | #define ANSI_ARGS(args) args 7 | #else 8 | #define ANSI_ARGS(args) () 9 | #endif 10 | #endif 11 | 12 | int getprime_32 ANSI_ARGS((int need, int *prime_array, int offset)); 13 | 14 | #define MAXPRIME 11863285 /* sqrt(2)*2^23 + 2 */ 15 | #define MINPRIME 3444 /* sqrt(MAXPRIME) */ 16 | #define MAXPRIMEOFFSET 779156 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/primes_64.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include "primes_64.h" 5 | #include "primelist_64.h" 6 | 7 | #define YES 1 8 | #define NO 0 9 | #define NPRIMES 10000 10 | 11 | int primes[NPRIMES]; 12 | 13 | #ifdef __STDC__ 14 | int init_prime_64(void) 15 | #else 16 | int init_prime_64() 17 | #endif 18 | { 19 | int i, j, obtained = 0, isprime; 20 | 21 | for(i=3; i < MINPRIME; i += 2) 22 | { 23 | isprime = YES; 24 | 25 | for(j=0; j < obtained; j++) 26 | if(i%primes[j] == 0) 27 | { 28 | isprime = NO; 29 | break; 30 | } 31 | else if(primes[j]*primes[j] > i) 32 | break; 33 | 34 | if(isprime == YES) 35 | { 36 | primes[obtained] = i; 37 | obtained++; 38 | } 39 | } 40 | 41 | return obtained; 42 | } 43 | 44 | 45 | 46 | 47 | #ifdef __STDC__ 48 | int getprime_64(int need, unsigned int *prime_array, int offset) 49 | #else 50 | int getprime_64(need, prime_array, offset) 51 | int need, offset; 52 | unsigned int *prime_array; 53 | #endif 54 | { 55 | static int initiallized = NO, num_prime; 56 | unsigned int largest; 57 | int i, isprime, index, obtained = 0; 58 | 59 | if(need <= 0) 60 | { 61 | fprintf(stderr,"WARNING: Number of primes needed = %d < 1; None returned\n" 62 | , need); 63 | return 0; 64 | } 65 | 66 | if(offset < 0) 67 | { 68 | fprintf(stderr,"WARNING: Offset of prime = %d < 1; None returned\n" 69 | , offset); 70 | return 0; 71 | } 72 | 73 | 74 | if(offset+need-1 MAXPRIMEOFFSET) 90 | { 91 | fprintf(stderr,"WARNING: generator has branched maximum number of times;\nindependence of generators no longer guaranteed"); 92 | offset = offset % MAXPRIMEOFFSET; 93 | } 94 | 95 | if(offset < PRIMELISTSIZE1) /* search table for previous prime */ 96 | { 97 | largest = prime_list_64[offset] + 2; 98 | offset = 0; 99 | } 100 | else 101 | { 102 | index = (unsigned int) ((offset-PRIMELISTSIZE1+1)/STEP) + PRIMELISTSIZE1 - 1; 103 | largest = prime_list_64[index] + 2; 104 | offset -= (index-PRIMELISTSIZE1+1)*STEP + PRIMELISTSIZE1 - 1; 105 | } 106 | 107 | 108 | while(need > obtained && largest > MINPRIME) 109 | { 110 | isprime = YES; 111 | largest -= 2; 112 | for(i=0; i 0) 120 | offset--; 121 | else if(isprime == YES) 122 | prime_array[obtained++] = largest; 123 | } 124 | 125 | if(need > obtained) 126 | fprintf(stderr,"ERROR: Insufficient number of primes: needed %d, obtained %d\n", need, obtained); 127 | 128 | return obtained; 129 | } 130 | 131 | 132 | #if 0 133 | main() 134 | { 135 | unsigned int newprimes[1500], np, i; 136 | 137 | np = getprime(2,newprimes,0); 138 | np = getprime(2,newprimes+2,9); 139 | np = getprime(2,newprimes+4,12); 140 | 141 | for(i=0; i<6; i++) 142 | printf("%u. %u \n", i, newprimes[i]); 143 | 144 | /*while(np--) 145 | printf("New primes: %u\n", newprimes[np]); 146 | 147 | np = getprime(5,newprimes); 148 | 149 | printf("%d new primes obtained ...\n", np); 150 | 151 | while(np--) 152 | printf("New primes: %u\n", newprimes[np]);*/ 153 | } 154 | #endif 155 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/primes_64.h: -------------------------------------------------------------------------------- 1 | #ifndef _primes_64_h_ 2 | #define _primes_64_h_ 3 | 4 | #ifndef ANSI_ARGS 5 | #ifdef __STDC__ 6 | #define ANSI_ARGS(args) args 7 | #else 8 | #define ANSI_ARGS(args) () 9 | #endif 10 | #endif 11 | 12 | int getprime_64 ANSI_ARGS((int need, unsigned int *prime_array, int offset)); 13 | 14 | #define MAXPRIME 3037000501U /* largest odd # < sqrt(2)*2^31+2 */ 15 | #define MINPRIME 55108 /* sqrt(MAXPRIME) */ 16 | #define MAXPRIMEOFFSET 146138719U /* Total number of available primes */ 17 | 18 | #endif 19 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/simple_.h: -------------------------------------------------------------------------------- 1 | /*************************************************************************/ 2 | 3 | #include 4 | #include 5 | #include "memory.h" 6 | #include "sprng_interface.h" 7 | 8 | #ifndef ANSI_ARGS 9 | #ifdef __STDC__ 10 | #define ANSI_ARGS(args) args 11 | #else 12 | #define ANSI_ARGS(args) () 13 | #endif 14 | #endif 15 | 16 | 17 | int *defaultgen=NULL; 18 | int junk; /* pass useless pointer at times */ 19 | 20 | #ifdef __STDC__ 21 | int *init_rng_simple(int rng_type, int seed, int mult) 22 | #else 23 | int *init_rng_simple(rng_type,seed,mult) 24 | int rng_type,mult,seed; 25 | #endif 26 | { 27 | int myid=0, nprocs=1, *temp; 28 | 29 | temp = init_rng(rng_type,myid,nprocs,seed,mult); 30 | 31 | if(temp == NULL) 32 | return NULL; 33 | else 34 | { 35 | if(defaultgen != NULL) 36 | free_rng(defaultgen); 37 | defaultgen = temp; 38 | return &junk; /* return "garbage" value */ 39 | } 40 | } 41 | 42 | 43 | 44 | 45 | 46 | #ifdef __STDC__ 47 | int get_rn_int_simple(void) 48 | #else 49 | int get_rn_int_simple() 50 | #endif 51 | { 52 | if(defaultgen == NULL) 53 | if(init_rng_simple(DEFAULT_RNG_TYPE,0,0) == NULL) 54 | return -1.0; 55 | 56 | return get_rn_int(defaultgen); 57 | } 58 | 59 | 60 | 61 | 62 | #ifdef __STDC__ 63 | float get_rn_flt_simple(void) 64 | #else 65 | float get_rn_flt_simple() 66 | #endif 67 | { 68 | if(defaultgen == NULL) 69 | if(init_rng_simple(DEFAULT_RNG_TYPE,0,0) == NULL) 70 | return -1.0; 71 | 72 | return get_rn_flt(defaultgen); 73 | } 74 | 75 | 76 | #ifdef __STDC__ 77 | double get_rn_dbl_simple(void) 78 | #else 79 | double get_rn_dbl_simple() 80 | #endif 81 | { 82 | if(defaultgen == NULL) 83 | if(init_rng_simple(DEFAULT_RNG_TYPE,0,0) == NULL) 84 | return -1.0; 85 | 86 | return get_rn_dbl(defaultgen); 87 | } 88 | 89 | 90 | 91 | 92 | 93 | #ifdef __STDC__ 94 | int pack_rng_simple(char **buffer) 95 | #else 96 | int pack_rng_simple(buffer) 97 | char **buffer; 98 | #endif 99 | { 100 | if(defaultgen == NULL) 101 | return 0; 102 | 103 | return pack_rng(defaultgen,buffer); 104 | } 105 | 106 | 107 | 108 | #ifdef __STDC__ 109 | int *unpack_rng_simple( char *packed) 110 | #else 111 | int *unpack_rng_simple(packed) 112 | char *packed; 113 | #endif 114 | { 115 | int *temp; 116 | 117 | temp = unpack_rng(packed); 118 | 119 | if(temp == NULL) 120 | return NULL; 121 | else 122 | { 123 | if(defaultgen != NULL) 124 | free_rng(defaultgen); 125 | defaultgen = temp; 126 | return &junk; /* return "garbage" value */ 127 | } 128 | } 129 | 130 | 131 | 132 | #ifdef __STDC__ 133 | int print_rng_simple(void) 134 | #else 135 | int print_rng_simple() 136 | #endif 137 | { 138 | if(defaultgen == NULL) 139 | { 140 | fprintf(stderr,"WARNING: No generator initialized so far\n"); 141 | return 0; 142 | } 143 | 144 | return print_rng(defaultgen); 145 | } 146 | 147 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/simple_mpi.c: -------------------------------------------------------------------------------- 1 | /*************************************************************************/ 2 | /*************************************************************************/ 3 | /* LINEAR CONGRUENTIAL RANDOM NUMBER GENERATION WITH PRIME ADDEND */ 4 | /* */ 5 | /* Author: Ashok Srinivasan, */ 6 | /* NCSA, University of Illinois, Urbana-Champaign */ 7 | /* E-Mail: ashoks@ncsa.uiuc.edu */ 8 | /* */ 9 | /* Note: This generator is based on the Cray YMP compatible random number*/ 10 | /* generator for 32-bit IEEE machines by William Magro, Cornell Theory */ 11 | /* Center */ 12 | /* */ 13 | /* Disclaimer: NCSA expressly disclaims any and all warranties, expressed*/ 14 | /* or implied, concerning the enclosed software. The intent in sharing */ 15 | /* this software is to promote the productive interchange of ideas */ 16 | /* throughout the research community. All software is furnished on an */ 17 | /* "as is" basis. No further updates to this software should be */ 18 | /* expected. Although this may occur, no commitment exists. The authors */ 19 | /* certainly invite your comments as well as the reporting of any bugs. */ 20 | /* NCSA cannot commit that any or all bugs will be fixed. */ 21 | /*************************************************************************/ 22 | /*************************************************************************/ 23 | 24 | #include 25 | #include 26 | #include "memory.h" 27 | #include "sprng.h" 28 | 29 | #ifndef ANSI_ARGS 30 | #ifdef __STDC__ 31 | #define ANSI_ARGS(args) args 32 | #else 33 | #define ANSI_ARGS(args) () 34 | #endif 35 | #endif 36 | 37 | extern int *defaultgen; 38 | int junkmpi; /* pass useless pointer at times */ 39 | 40 | 41 | #ifdef SPRNG_MPI 42 | #ifdef __STDC__ 43 | int *init_rng_simple_mpi(int rng_type, int seed, int mult) 44 | #else 45 | int *init_rng_simple_mpi(rng_type,seed,mult) 46 | int rng_type,mult,seed; 47 | #endif 48 | { 49 | int myid=0, nprocs=1, *temp; 50 | 51 | get_proc_info_mpi(&myid,&nprocs); 52 | 53 | temp = init_rng(rng_type,myid,nprocs,seed,mult); 54 | 55 | if(temp == NULL) 56 | return NULL; 57 | else 58 | { 59 | if(defaultgen != NULL) 60 | free_rng(defaultgen); 61 | defaultgen = temp; 62 | return &junkmpi; /* return "garbage" value */ 63 | } 64 | } 65 | 66 | 67 | #ifdef __STDC__ 68 | int get_rn_int_simple_mpi(void) 69 | #else 70 | int get_rn_int_simple_mpi() 71 | #endif 72 | { 73 | if(defaultgen == NULL) 74 | if(init_rng_simple_mpi(DEFAULT_RNG_TYPE,0,0) == NULL) 75 | return -1.0; 76 | 77 | return get_rn_int(defaultgen); 78 | } 79 | 80 | 81 | #ifdef __STDC__ 82 | float get_rn_flt_simple_mpi(void) 83 | #else 84 | float get_rn_flt_simple_mpi() 85 | #endif 86 | { 87 | if(defaultgen == NULL) 88 | if(init_rng_simple_mpi(DEFAULT_RNG_TYPE,0,0) == NULL) 89 | return -1.0; 90 | 91 | return get_rn_flt(defaultgen); 92 | } 93 | 94 | 95 | 96 | #ifdef __STDC__ 97 | double get_rn_dbl_simple_mpi(void) 98 | #else 99 | double get_rn_dbl_simple_mpi() 100 | #endif 101 | { 102 | if(defaultgen == NULL) 103 | if(init_rng_simple_mpi(DEFAULT_RNG_TYPE,0,0) == NULL) 104 | return -1.0; 105 | 106 | return get_rn_dbl(defaultgen); 107 | } 108 | 109 | #endif 110 | 111 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/sprng.h: -------------------------------------------------------------------------------- 1 | #ifndef _sprng_h_ 2 | #define _sprng_h_ 3 | 4 | 5 | #define SPRNG_LFG 0 6 | #define SPRNG_LCG 1 7 | #define SPRNG_LCG64 2 8 | #define SPRNG_CMRG 3 9 | #define SPRNG_MLFG 4 10 | #define SPRNG_PMLCG 5 11 | 12 | #include "sprng_interface.h" 13 | 14 | #define SPRNG_DEFAULT 0 15 | #define CRAYLCG 0 16 | #define DRAND48 1 17 | #define FISH1 2 18 | #define FISH2 3 19 | #define FISH3 4 20 | #define FISH4 5 21 | #define FISH5 6 22 | #define LECU1 0 23 | #define LECU2 1 24 | #define LECU3 2 25 | #define LAG1279 0 26 | #define LAG17 1 27 | #define LAG31 2 28 | #define LAG55 3 29 | #define LAG63 4 30 | #define LAG127 5 31 | #define LAG521 6 32 | #define LAG521B 7 33 | #define LAG607 8 34 | #define LAG607B 9 35 | #define LAG1279B 10 36 | 37 | #define CHECK 1 38 | 39 | #define MAX_PACKED_LENGTH 24000 40 | 41 | #ifdef USE_MPI 42 | #define MPINAME(A) A ## _mpi 43 | #else 44 | #define MPINAME(A) A 45 | #endif 46 | 47 | #define make_sprng_seed MPINAME(make_new_seed) 48 | 49 | #if defined(SIMPLE_SPRNG) 50 | 51 | #define pack_sprng pack_rng_simple 52 | #define unpack_sprng unpack_rng_simple 53 | #define isprng MPINAME(get_rn_int_simple) 54 | #define init_sprng MPINAME(init_rng_simple) 55 | #define print_sprng print_rng_simple 56 | 57 | #ifdef FLOAT_GEN 58 | #define sprng MPINAME(get_rn_flt_simple) 59 | #else 60 | #define sprng MPINAME(get_rn_dbl_simple) 61 | #endif 62 | 63 | #elif !defined(CHECK_POINTERS) 64 | 65 | #define free_sprng free_rng 66 | #define pack_sprng pack_rng 67 | #define unpack_sprng unpack_rng 68 | #define isprng get_rn_int 69 | #define spawn_sprng(A,B,C) spawn_rng(A,B,C,!CHECK) 70 | #define init_sprng init_rng 71 | #define print_sprng print_rng 72 | 73 | #ifdef FLOAT_GEN 74 | #define sprng get_rn_flt 75 | #else 76 | #define sprng get_rn_dbl 77 | #endif 78 | 79 | #else 80 | 81 | #define free_sprng(A) ((deleteID(A)==NULL) ? -1 : free_rng(A)) 82 | #define pack_sprng(A,B) ((checkID(A)==NULL) ? 0 : pack_rng(A,B)) 83 | #define unpack_sprng(A) addID(unpack_rng(A)) 84 | #define isprng(A) ((checkID(A)==NULL) ? -1 : get_rn_int(A)) 85 | #define spawn_sprng(A,B,C) ((checkID(A)==NULL) ? 0 : spawn_rng(A,B,C,CHECK)) 86 | #define init_sprng(A,B,C,D,E) addID(init_rng(A,B,C,D,E)) 87 | #define print_sprng(A) ((checkID(A)==NULL) ? 0 : print_rng(A)) 88 | 89 | #ifdef FLOAT_GEN 90 | #define sprng(A) ((checkID(A)==NULL) ? -1.0 : get_rn_flt(A)) 91 | #else 92 | #define sprng(A) ((checkID(A)==NULL) ? -1.0 : get_rn_dbl(A)) 93 | #endif 94 | 95 | #endif 96 | 97 | #endif 98 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/sprng_f.h: -------------------------------------------------------------------------------- 1 | 2 | 3 | #ifndef _sprngf_h_ 4 | 5 | #define SPRNG_LFG 0 6 | #define SPRNG_LCG 1 7 | #define SPRNG_LCG64 2 8 | #define SPRNG_CMRG 3 9 | #define SPRNG_MLFG 4 10 | #define SPRNG_PMLCG 5 11 | #define DEFAULT_RNG_TYPE SPRNG_LFG 12 | 13 | #define SPRNG_DEFAULT 0 14 | #define CRAYLCG 0 15 | #define DRAND48 1 16 | #define FISH1 2 17 | #define FISH2 3 18 | #define FISH3 4 19 | #define FISH4 5 20 | #define FISH5 6 21 | #define LECU1 0 22 | #define LECU2 1 23 | #define LECU3 2 24 | #define LAG1279 0 25 | #define LAG17 1 26 | #define LAG31 2 27 | #define LAG55 3 28 | #define LAG63 4 29 | #define LAG127 5 30 | #define LAG521 6 31 | #define LAG521B 7 32 | #define LAG607 8 33 | #define LAG607B 9 34 | #define LAG1279B 10 35 | 36 | #ifdef CHECK_POINTERS 37 | #define CHECK 1 38 | #else 39 | #define CHECK 0 40 | #endif /* ifdef CHECK_POINTERS */ 41 | 42 | #define MAX_PACKED_LENGTH 24000 43 | 44 | #ifdef POINTER_SIZE 45 | #if POINTER_SIZE == 8 46 | #define SPRNG_POINTER integer*8 47 | #else 48 | #define SPRNG_POINTER integer*4 49 | #endif 50 | #else 51 | #define SPRNG_POINTER integer*4 52 | #endif /* ifdef POINTER_SIZE */ 53 | 54 | #ifdef USE_MPI 55 | #define make_sprng_seed fseed_mpi 56 | #else 57 | #define make_sprng_seed fmake_new_seed 58 | #endif 59 | 60 | #endif /* ifdef _sprng_h */ 61 | 62 | #ifdef USE_MPI 63 | external fseed_mpi 64 | integer fseed_mpi 65 | #else 66 | external fmake_new_seed 67 | integer fmake_new_seed 68 | #endif 69 | 70 | #ifndef DEFAULTINT 71 | #define DEFAULTINT 72 | #endif 73 | #ifndef FLOAT_GEN 74 | #define DBLGEN 75 | #endif 76 | 77 | #if defined(SIMPLE_SPRNG) 78 | #undef DEFAULTINT 79 | 80 | #ifndef _sprngf_h_ 81 | #define pack_sprng fpack_rng_simple 82 | #define unpack_sprng funpack_rng_simple 83 | #ifdef USE_MPI 84 | #define isprng fget_rn_int_simmpi 85 | #define init_sprng finit_rng_simmpi 86 | #else 87 | #define isprng fget_rn_int_sim 88 | #define init_sprng finit_rng_sim 89 | #endif /* ifdef USE_MPI */ 90 | #define print_sprng fprint_rng_simple 91 | 92 | #if defined(FLOAT_GEN) && defined(USE_MPI) 93 | #define sprng fget_rn_flt_simmpi 94 | #endif 95 | #if defined(FLOAT_GEN) && !defined(USE_MPI) 96 | #define sprng fget_rn_flt_sim 97 | #endif 98 | #if defined(DBLGEN) && defined(USE_MPI) 99 | #define sprng fget_rn_dbl_simmpi 100 | #endif 101 | #if defined(DBLGEN) && !defined(USE_MPI) 102 | #define sprng fget_rn_dbl_sim 103 | #endif 104 | 105 | #endif /* ifdef _sprng_h */ 106 | external isprng 107 | external fget_rn_dbl_sim, fget_rn_flt_sim 108 | external init_sprng, fpack_rng_simple 109 | external funpack_rng_simple, fprint_rng_simple 110 | #ifdef USE_MPI 111 | external fget_rn_flt_simmpi, fget_rn_dbl_simmpi 112 | real*4 fget_rn_flt_simmpi 113 | real*8 fget_rn_dbl_simmpi 114 | #endif 115 | integer isprng,fpack_rng_simple,fprint_rng_simple 116 | SPRNG_POINTER init_sprng, funpack_rng_simple 117 | real*4 fget_rn_flt_sim 118 | real*8 fget_rn_dbl_sim 119 | #endif 120 | 121 | #if defined(CHECK_POINTERS) 122 | #undef DEFAULTINT 123 | external fget_rn_int_ptr, fget_rn_flt_ptr, fget_rn_dbl_ptr 124 | external fspawn_rng_ptr, ffree_rng_ptr, finit_rng_ptr 125 | external fpack_rng_ptr, funpack_rng_ptr, fprint_rng_ptr 126 | 127 | integer fget_rn_int_ptr, ffree_rng_ptr, fpack_rng_ptr 128 | SPRNG_POINTER finit_rng_ptr, funpack_rng_ptr 129 | integer fspawn_rng_ptr, fprint_rng_ptr 130 | real*4 fget_rn_flt_ptr 131 | real*8 fget_rn_dbl_ptr 132 | 133 | #ifndef _sprngf_h_ 134 | #define isprng fget_rn_int_ptr 135 | #define free_sprng ffree_rng_ptr 136 | #define spawn_sprng(A,B,C) fspawn_rng_ptr(A,B,C,CHECK) 137 | #define pack_sprng fpack_rng_ptr 138 | #define unpack_sprng funpack_rng_ptr 139 | #define init_sprng finit_rng_ptr 140 | #define print_sprng fprint_rng_ptr 141 | #ifdef FLOAT_GEN 142 | #define sprng fget_rn_flt_ptr 143 | #else 144 | #define sprng fget_rn_dbl_ptr 145 | #endif 146 | #endif 147 | #endif 148 | 149 | #if defined(DEFAULTINT) 150 | external fget_rn_int, fget_rn_flt, fget_rn_dbl 151 | external fspawn_rng, ffree_rng, finit_rng 152 | external fpack_rng, funpack_rng, fprint_rng 153 | 154 | integer fget_rn_int, ffree_rng, fpack_rng 155 | SPRNG_POINTER finit_rng, funpack_rng 156 | integer fspawn_rng, fprint_rng 157 | real*4 fget_rn_flt 158 | real*8 fget_rn_dbl 159 | 160 | #ifndef _sprngf_h_ 161 | #define isprng fget_rn_int 162 | #define free_sprng ffree_rng 163 | #define spawn_sprng(A,B,C) fspawn_rng(A,B,C,CHECK) 164 | #define pack_sprng fpack_rng 165 | #define unpack_sprng funpack_rng 166 | #define init_sprng finit_rng 167 | #define print_sprng fprint_rng 168 | #ifdef FLOAT_GEN 169 | #define sprng fget_rn_flt 170 | #else 171 | #define sprng fget_rn_dbl 172 | #endif 173 | #endif 174 | 175 | #endif 176 | 177 | 178 | 179 | #ifndef _sprngf_h_ 180 | #define _sprngf_h_ 181 | #endif 182 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/sprng_interface.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef _sprng_interface_h_ 3 | #define _sprng_interface_h_ 4 | 5 | #define DEFAULT_RNG_TYPE SPRNG_LFG 6 | 7 | #ifndef ANSI_ARGS 8 | #ifdef __STDC__ 9 | #define ANSI_ARGS(args) args 10 | #else 11 | #define ANSI_ARGS(args) () 12 | #endif 13 | #endif 14 | 15 | #ifdef __cplusplus 16 | extern "C" { 17 | #endif 18 | 19 | int get_rn_int ANSI_ARGS((int *igenptr)); 20 | float get_rn_flt ANSI_ARGS((int *igenptr)); 21 | double get_rn_dbl ANSI_ARGS((int *igenptr)); 22 | int *init_rng ANSI_ARGS((int rng_type, int gennum, int total_gen, int seed, 23 | int mult)); 24 | int spawn_rng ANSI_ARGS((int *igenptr, int nspawned, int ***newgens, int checkid) ); 25 | int make_new_seed ANSI_ARGS((void)); 26 | int make_new_seed_mpi ANSI_ARGS((void)); 27 | int get_seed_rng ANSI_ARGS((int *genptr)); 28 | int free_rng ANSI_ARGS((int *genptr)); 29 | int pack_rng ANSI_ARGS(( int *genptr, char **buffer)); 30 | int *unpack_rng ANSI_ARGS(( char *packed)); 31 | int print_rng ANSI_ARGS(( int *igen)); 32 | int *checkID ANSI_ARGS(( int *igen)); 33 | int *addID ANSI_ARGS(( int *igen)); 34 | int *deleteID ANSI_ARGS(( int *igen)); 35 | 36 | 37 | int *init_rng_simple ANSI_ARGS((int rng_type, int seed, int mult)); 38 | int *init_rng_simple_mpi ANSI_ARGS((int rng_type, int seed, int mult)); 39 | int get_rn_int_simple ANSI_ARGS((void)); 40 | int get_rn_int_simple_mpi ANSI_ARGS((void)); 41 | float get_rn_flt_simple ANSI_ARGS((void)); 42 | float get_rn_flt_simple_mpi ANSI_ARGS((void)); 43 | double get_rn_dbl_simple ANSI_ARGS((void)); 44 | double get_rn_dbl_simple_mpi ANSI_ARGS((void)); 45 | int pack_rng_simple ANSI_ARGS((char **buffer)); 46 | int *unpack_rng_simple ANSI_ARGS(( char *packed)); 47 | int print_rng_simple ANSI_ARGS((void)); 48 | 49 | 50 | #ifdef __cplusplus 51 | } 52 | #endif 53 | 54 | 55 | #endif 56 | -------------------------------------------------------------------------------- /larger_samples/ssca2/src/sprng2.0/store.c: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "store.h" 4 | 5 | 6 | int store_long(unsigned long l, int nbytes, unsigned char *c) 7 | { 8 | int i; 9 | 10 | for(i=0; i>(8*(nbytes-i-1)))&0xff; 12 | 13 | return nbytes; /* return number of chars filled */ 14 | } 15 | 16 | 17 | int store_longarray(unsigned long *l, int n, int nbytes, unsigned char *c) 18 | { 19 | int i; 20 | 21 | for(i=0; i>(8*(nbytes-i-1)))&0xff; 59 | 60 | return nbytes; /* return number of chars filled */ 61 | } 62 | 63 | 64 | int store_longlongarray(unsigned long long *l, int n, int nbytes, unsigned char *c) 65 | { 66 | int i; 67 | 68 | for(i=0; i>(8*(nbytes-i-1)))&0xff; 105 | 106 | return nbytes; /* return number of chars filled */ 107 | } 108 | 109 | 110 | int store_intarray(unsigned int *l, int n, int nbytes, unsigned char *c) 111 | { 112 | int i; 113 | 114 | for(i=0; i0) { 48 | add_value=p[tid-1]; 49 | for (j=start-1; j 2 | #include 3 | #include 4 | 5 | typedef unsigned long dn; 6 | 7 | dn delannoy(dn x, dn y) { 8 | if(x==0 || y==0) return 1; 9 | 10 | dn a = delannoy(x-1, y ); 11 | dn b = delannoy(x-1, y-1); 12 | dn c = delannoy( x, y-1); 13 | 14 | return a + b + c; 15 | } 16 | 17 | dn DELANNOY_RESULTS[] = { 18 | 1, 3, 13, 63, 321, 1683, 8989, 48639, 265729, 1462563, 8097453, 45046719, 251595969, 1409933619, 19 | 7923848253, 44642381823, 252055236609, 1425834724419, 8079317057869, 45849429914943, 260543813797441, 20 | 1482376214227923, 8443414161166173}; 21 | 22 | int NUM_RESULTS = sizeof(DELANNOY_RESULTS) / sizeof(dn); 23 | 24 | int main(int argc, char **argv) { 25 | if(argc<2) { 26 | printf("Usage: delannoy N [+t]\n"); 27 | exit(-1); 28 | } 29 | 30 | int n = atoi(argv[1]); 31 | if(n >= NUM_RESULTS) { 32 | printf("N too large (can only check up to %d)\n", NUM_RESULTS); 33 | exit(-1); 34 | } 35 | 36 | dn result = 0; 37 | result = delannoy(n, n); 38 | 39 | if(result == DELANNOY_RESULTS[n]) { 40 | printf("Verification: OK\n"); 41 | return EXIT_SUCCESS; 42 | } 43 | printf("Verification: ERR\n"); 44 | return EXIT_FAILURE; 45 | } 46 | -------------------------------------------------------------------------------- /small_samples/filegen/filegen.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #define MAX_FILENAME_LENGTH 256 8 | 9 | void random_string(char *s, const int len) { 10 | static const char charset[] = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; 11 | for (int i = 0; i < len; i++) { 12 | int r = rand() % (int) (sizeof(charset) - 1); 13 | s[i] = charset[r]; 14 | } 15 | s[len-1] = '\0'; 16 | } 17 | 18 | int random_number(int min, int max) { 19 | return min + rand() % (max - min + 1); 20 | } 21 | 22 | void create_files(const char* dirname, int num_files, int min_file_size, int max_file_size) { 23 | char full_dirname[MAX_FILENAME_LENGTH]; 24 | snprintf(full_dirname, MAX_FILENAME_LENGTH, "generated/%s", dirname); 25 | mkdir(full_dirname, 0755); // Create the directory 26 | 27 | int j; 28 | for (j = 0; j < num_files; j++) { 29 | char filename[MAX_FILENAME_LENGTH]; 30 | int written = snprintf(filename, MAX_FILENAME_LENGTH, "%s/file_%d", full_dirname, j); 31 | if(written < 0 || written >= MAX_FILENAME_LENGTH) { 32 | fprintf(stderr, "Error building file path"); 33 | } 34 | 35 | int size = random_number(min_file_size, max_file_size); 36 | char *data = (char*)malloc(size); 37 | random_string(data, size); 38 | 39 | FILE *fp = fopen(filename, "wb"); 40 | fwrite(data, 1, size, fp); 41 | fclose(fp); 42 | 43 | free(data); 44 | } 45 | } 46 | 47 | int main(int argc, char** argv) { 48 | int num_directories, num_files, min_file_size, max_file_size, seed; 49 | 50 | // Set default values for the parameters 51 | num_directories = 1; 52 | num_files = 10; 53 | min_file_size = 1024; 54 | max_file_size = 1048576; 55 | seed = 1234; // Set default seed to 1234 56 | 57 | // Parse command line arguments 58 | switch (argc) { 59 | case 6: 60 | seed = atoi(argv[5]); 61 | // fall through 62 | case 5: 63 | num_directories = atoi(argv[1]); 64 | num_files = atoi(argv[2]); 65 | min_file_size = atoi(argv[3]); 66 | max_file_size = atoi(argv[4]); 67 | break; 68 | default: 69 | fprintf(stderr, "Usage: %s []\n", argv[0]); 70 | return 1; 71 | } 72 | 73 | srand(seed); // Seed the random number generator 74 | 75 | mkdir("generated", 0755); // Create the parent directory 76 | 77 | int i; 78 | for (i = 0; i < num_directories; i++) { 79 | char dirname[MAX_FILENAME_LENGTH]; 80 | snprintf(dirname, MAX_FILENAME_LENGTH, "dir_%d", i); 81 | create_files(dirname, num_files, min_file_size, max_file_size); 82 | } 83 | 84 | return 0; 85 | } 86 | -------------------------------------------------------------------------------- /small_samples/filesearch/filesearch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | void find_largest_file(char *dir_path, long long *max_size, char *max_file); 7 | 8 | int main() { 9 | long long max_size = 0; 10 | char max_file[256] = ""; 11 | 12 | // Start the search in the current directory 13 | find_largest_file(".", &max_size, max_file); 14 | 15 | // Print the name of the largest file 16 | printf("The largest file is %s with size %lld bytes\n", max_file, max_size); 17 | 18 | return 0; 19 | } 20 | 21 | void find_largest_file(char *dir_path, long long *max_size, char *max_file) { 22 | DIR *dir; 23 | struct dirent *entry; 24 | struct stat file_stat; 25 | char file_path[512]; 26 | 27 | // Open the directory 28 | dir = opendir(dir_path); 29 | if (dir == NULL) { 30 | perror("Error opening directory"); 31 | return; 32 | } 33 | 34 | // Loop through all the files in the directory 35 | while ((entry = readdir(dir)) != NULL) { 36 | // Skip "." and ".." directories 37 | if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) { 38 | continue; 39 | } 40 | // Build the path to the file 41 | snprintf(file_path, sizeof(file_path), "%s/%s", dir_path, entry->d_name); 42 | // Get information about the file 43 | if (stat(file_path, &file_stat) == -1) { 44 | perror("Error getting file stat"); 45 | continue; 46 | } 47 | // If it's a regular file and larger than the current maximum, update the maximum 48 | if (S_ISREG(file_stat.st_mode) && file_stat.st_size > *max_size) { 49 | *max_size = file_stat.st_size; 50 | strcpy(max_file, file_path); 51 | } 52 | // If it's a directory, recursively search it for the largest file 53 | if (S_ISDIR(file_stat.st_mode)) { 54 | find_largest_file(file_path, max_size, max_file); 55 | } 56 | } 57 | 58 | closedir(dir); 59 | } 60 | -------------------------------------------------------------------------------- /small_samples/mmul/mmul.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #define S 1000 5 | #define N S 6 | #define M S 7 | #define K S 8 | 9 | #define MIN(X,Y) ((X)<(Y)?(X):(Y)) 10 | #define MAX(X,Y) ((X)>(Y)?(X):(Y)) 11 | 12 | #define TYPE double 13 | #define MATRIX TYPE** 14 | 15 | // A utility function 16 | MATRIX createMatrix(unsigned x, unsigned y) { 17 | TYPE* data = malloc(x * y * sizeof(TYPE)); 18 | 19 | TYPE** index = malloc(x * sizeof(TYPE*)); 20 | index[0] = data; 21 | for (unsigned i = 1; i < x; ++i) { 22 | index[i] = &(data[i*y]); 23 | } 24 | return index; 25 | } 26 | 27 | void freeMatrix(MATRIX matrix) { 28 | free(matrix[0]); 29 | free(matrix); 30 | } 31 | 32 | 33 | int main(void) { 34 | 35 | // create the matrices 36 | MATRIX A = createMatrix(N, M); 37 | MATRIX B = createMatrix(M, K); 38 | MATRIX C = createMatrix(N, K); 39 | 40 | // initialize the matrices 41 | 42 | // A contains real values 43 | for (int i=0; i 2 | #include 3 | #include 4 | 5 | // the problem size - the number of particles 6 | #ifndef N 7 | #define N 1000 8 | #endif 9 | 10 | // the second problem size = number of iterations 11 | #ifndef M 12 | #define M 100 13 | #endif 14 | 15 | #ifndef L 16 | #define L 1000 17 | #endif 18 | 19 | #define SPACE_SIZE 1000 20 | 21 | // the type used to represent a triple of doubles 22 | typedef struct { 23 | double x, y, z; 24 | } triple; 25 | 26 | 27 | // the types used to model position, speed and forces 28 | typedef triple position; 29 | typedef triple velocity; 30 | typedef triple force; 31 | typedef triple impulse; 32 | 33 | // the type used to model one body 34 | typedef struct { 35 | double m; // the mass of the body 36 | position pos; // the position in space 37 | velocity v; // the velocity of the body 38 | } body; 39 | 40 | // the list of bodies 41 | body B[N]; 42 | 43 | // the forces effecting the particless 44 | force F[N]; 45 | 46 | // ----- utility functions ------ 47 | double rand_val(double min, double max) { 48 | return (rand() / (double) RAND_MAX) * (max - min) + min; 49 | } 50 | 51 | triple triple_zero() { 52 | return (position) {0.0, 0.0, 0.0}; 53 | } 54 | 55 | triple triple_rand() { 56 | return (position) { 57 | rand_val(-SPACE_SIZE,SPACE_SIZE), 58 | rand_val(-SPACE_SIZE,SPACE_SIZE), 59 | rand_val(-SPACE_SIZE,SPACE_SIZE) 60 | }; 61 | } 62 | 63 | void triple_print(triple t) { 64 | printf("(%f,%f,%f)", t.x, t.y, t.z); 65 | } 66 | 67 | // some operators 68 | #define eps 0.0001 69 | #define abs(V) (((V)<0)?-(V):(V)) 70 | #define min(A,B) (((A)<(B))?(A):(B)) 71 | 72 | #define ADD(T1,T2) (triple) { (T1).x + (T2).x, (T1).y + (T2).y, (T1).z + (T2).z } 73 | #define SUB(T1,T2) (triple) { (T1).x - (T2).x, (T1).y - (T2).y, (T1).z - (T2).z } 74 | #define DIV(T1,T2) (triple) { (T1).x / (T2).x, (T1).y / (T2).y, (T1).z / (T2).z } 75 | 76 | #define MULS(T,S) (triple) { (T).x * (S), (T).y * (S), (T).z * (S) } 77 | #define DIVS(T,S) (triple) { (T).x / (S), (T).y / (S), (T).z / (S) } 78 | 79 | #define EQ(T1,T2) (abs((T1).x-(T2).x) < eps && abs((T1).y-(T2).y) < eps && abs((T1).z-(T2).z) < eps) 80 | 81 | #define ABS(T) sqrt((T).x*(T).x + (T).y*(T).y + (T).z*(T).z) 82 | 83 | #define NORM(T) MULS(T,(1/ABS(T))) 84 | 85 | // --- main ---- 86 | 87 | int main() { 88 | 89 | // distribute bodies in space (randomly) 90 | for(int i=0; i 3 | #include 4 | #include 5 | 6 | #define bool int 7 | #define true 1 8 | #define false 0 9 | 10 | // A quadratic matrix, dynamically sized 11 | typedef struct _qmatrix { 12 | int size; 13 | int data[]; 14 | } qmatrix; 15 | 16 | qmatrix* qm_create(int size) { 17 | qmatrix* res = (qmatrix*)malloc(sizeof(qmatrix) + size * size * sizeof(int)); 18 | res->size = size; 19 | return res; 20 | } 21 | 22 | void qm_del(qmatrix* matrix) { 23 | free(matrix); 24 | } 25 | 26 | #define get(M,I,J) M->data[(I) * (M)->size + (J)] 27 | 28 | 29 | // a struct describing a QAP instance 30 | typedef struct _problem { 31 | int size; // the size of the problem 32 | qmatrix* A; // the weight matrix (size x size) 33 | qmatrix* B; // the distance matrix (size x size) 34 | int optimum; // the value of the optimal solution 35 | } problem; 36 | 37 | problem* qap_load(char* file); 38 | 39 | void qap_del(problem* problem) { 40 | qm_del(problem->A); 41 | qm_del(problem->B); 42 | free(problem); 43 | } 44 | 45 | #define getA(P,I,J) get(P->A,I,J) 46 | #define getB(P,I,J) get(P->B,I,J) 47 | 48 | 49 | // a struct representing a (partial) solution to the problem 50 | typedef struct _solution { 51 | struct _solution* head; // the solution is forming a linked list 52 | int pos; // the location the current facility is assigned to 53 | } solution; 54 | 55 | solution* empty() { return 0; } 56 | 57 | 58 | void print(solution* solution) { 59 | if(!solution) return; 60 | print(solution->head); 61 | printf("-%d", solution->pos); 62 | } 63 | 64 | int solve_rec(problem* problem, solution* partial, int plant, int used_mask, int cur_cost, int best_known) { 65 | // terminal case 66 | if (plant >= problem->size) { 67 | return cur_cost; 68 | } 69 | 70 | if (cur_cost >= best_known) { 71 | return best_known; 72 | } 73 | 74 | // fix current position 75 | int best = best_known; 76 | for(int i=0; isize; i++) { 77 | // check whether current spot is a free spot 78 | if(!(1<pos; 89 | 90 | // add costs between current pair of plants 91 | new_cost += getA(problem, plant, cur_plant) * getB(problem, i, other_pos); 92 | new_cost += getA(problem, cur_plant, plant) * getB(problem, other_pos, i); 93 | 94 | // go to next plant 95 | cur = cur->head; 96 | cur_plant--; 97 | } 98 | 99 | // compute recursive rest 100 | int cur_best = solve_rec(problem, &tmp, plant+1, used_mask | (1<= 2) { 117 | problem_file = argv[1]; 118 | } 119 | 120 | // load problem 121 | problem* p = qap_load(problem_file); 122 | 123 | // run solver 124 | printf("Run solver ...\n"); 125 | int best = solve(p); 126 | printf("Done!\n"); 127 | printf("Best Result: %d\n", best); 128 | 129 | // verify result 130 | bool success = (best == p->optimum); 131 | printf("Verification: %s\n", (success?"OK":"ERR")); 132 | 133 | // free problem 134 | qap_del(p); 135 | 136 | return success ? EXIT_SUCCESS : EXIT_FAILURE; 137 | } 138 | 139 | 140 | problem* qap_load(char* file) { 141 | 142 | FILE* fp = fopen(file, "r"); 143 | printf("Loading Problem File %s ..\n", file); 144 | 145 | // get problem size 146 | int problemSize; 147 | int read = fscanf(fp, "%d", &problemSize); 148 | assert(read > 0); 149 | printf(" - problem size: %d\n", problemSize); 150 | 151 | // create problem instance 152 | problem* res = (problem*)malloc(sizeof(problem)); 153 | res->size = problemSize; 154 | res->A = qm_create(problemSize); 155 | res->B = qm_create(problemSize); 156 | 157 | // load matrix A 158 | for(int i=0; i 0); 162 | } 163 | } 164 | 165 | // load matrix B 166 | for(int i=0; i 0); 170 | } 171 | } 172 | 173 | // load optimum 174 | read = fscanf(fp, "%d", &(res->optimum)); 175 | assert(read > 0); 176 | printf(" - optimum: %d\n", res->optimum); 177 | 178 | if(read == 0) { 179 | printf("Error reading input!\n"); 180 | } 181 | 182 | fclose(fp); 183 | 184 | return res; 185 | } 186 | 187 | 188 | -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.10) 2 | project(perf_oriented_prog_tools) 3 | 4 | set_property(GLOBAL PROPERTY C_STANDARD 11) 5 | 6 | find_package(OpenMP) 7 | if(OpenMP_C_FOUND) 8 | link_libraries(OpenMP::OpenMP_C) 9 | endif() 10 | 11 | link_libraries(m pthread) 12 | 13 | if(MSVC) 14 | add_compile_options(/W4 /WX) 15 | else() 16 | add_compile_options(-Wall -Wextra -Wpedantic -Werror) 17 | endif() 18 | 19 | add_executable(loadcapture load_generator/loadcapture.c load_generator/loadutils.c) 20 | add_executable(loadgen load_generator/loadgen.c load_generator/loadutils.c) 21 | add_executable(loadprofile load_generator/loadprofile.c load_generator/loadutils.c) 22 | add_executable(loadtest load_generator/loadtest.c load_generator/loadutils.c) 23 | 24 | add_executable(malloctest malloctest/malloctest.c) 25 | -------------------------------------------------------------------------------- /tools/load_generator/exec_with_workstation_heavy.sh: -------------------------------------------------------------------------------- 1 | killall loadgen &> /dev/null 2 | ../build/loadgen mc3 workstation/sys_load_profile_workstation_excerpt.txt &> /dev/null & 3 | ../build/loadgen mc3 workstation/sys_load_profile_workstation_excerpt.txt &> /dev/null & 4 | ../build/loadgen mc3 workstation/sys_load_profile_workstation_excerpt.txt &> /dev/null & 5 | ../build/loadgen mc3 workstation/sys_load_profile_workstation_excerpt.txt &> /dev/null & 6 | ../build/loadgen mc3 workstation/sys_load_profile_workstation_excerpt.txt &> /dev/null & 7 | ../build/loadgen mc3 workstation/sys_load_profile_workstation_excerpt.txt &> /dev/null & 8 | #time -p nice -n 100 $1 9 | nice -n 1000 $1 10 | killall loadgen &> /dev/null 11 | -------------------------------------------------------------------------------- /tools/load_generator/loadcapture.c: -------------------------------------------------------------------------------- 1 | #include "loadutils.h" 2 | 3 | #define MAXCORES 4096 4 | #define MAXLINE 1024 * 16 5 | 6 | typedef struct system_load_profile_ { 7 | core_load_profile cores[MAXCORES]; 8 | } system_load_profile; 9 | 10 | void capture_system_load(system_load_profile *external_profile) { 11 | system_load_profile current_profile; 12 | 13 | printf("time:% 16lld ms / load: ", time_ms()); 14 | 15 | FILE *file = fopen("/proc/stat", "r"); 16 | // skip first line 17 | skipline(file); 18 | // read relevant lines 19 | char scanBuffer[255]; 20 | ull num_cores = get_num_cpus(); 21 | for (ull core = 0; core < num_cores; ++core) { 22 | sprintf(scanBuffer, "cpu%llu %%llu %%llu %%llu %%llu", core); 23 | if (fscanf(file, scanBuffer, ¤t_profile.cores[core].total_user, 24 | ¤t_profile.cores[core].total_user_low, 25 | ¤t_profile.cores[core].total_system, 26 | ¤t_profile.cores[core].total_idle) != 4) { 27 | printf("Error parsing /proc/stat.\n"); 28 | exit(-1); 29 | } 30 | skipline(file); // skip rest of line 31 | 32 | // overflow "handling" 33 | ull user = 0, user_low = 0, system = 0, idle = 0; 34 | 35 | if (current_profile.cores[core].total_user < 36 | external_profile->cores[core].total_user || 37 | external_profile->cores[core].total_user == 0) 38 | user = current_profile.cores[core].total_user; 39 | else 40 | user = current_profile.cores[core].total_user - 41 | external_profile->cores[core].total_user; 42 | 43 | if (current_profile.cores[core].total_user_low < 44 | external_profile->cores[core].total_user_low || 45 | external_profile->cores[core].total_user_low == 0) 46 | user_low = current_profile.cores[core].total_user_low; 47 | else 48 | user_low = current_profile.cores[core].total_user_low - 49 | external_profile->cores[core].total_user_low; 50 | 51 | if (current_profile.cores[core].total_system < 52 | external_profile->cores[core].total_system || 53 | external_profile->cores[core].total_system == 0) 54 | system = current_profile.cores[core].total_system; 55 | else 56 | system = current_profile.cores[core].total_system - 57 | external_profile->cores[core].total_system; 58 | 59 | if (current_profile.cores[core].total_idle < 60 | external_profile->cores[core].total_idle || 61 | external_profile->cores[core].total_idle == 0) 62 | idle = current_profile.cores[core].total_idle; 63 | else 64 | idle = current_profile.cores[core].total_idle - 65 | external_profile->cores[core].total_idle; 66 | 67 | // calculate load 68 | double load; 69 | double cur_nonidle = user + user_low + system; 70 | double cur_idle = idle; 71 | if (cur_idle <= 0.0) 72 | load = 1.0; 73 | else 74 | load = cur_nonidle / (cur_nonidle + cur_idle); 75 | printf("% 8.4f", load); 76 | 77 | // update profile data 78 | external_profile->cores[core] = current_profile.cores[core]; 79 | } 80 | fclose(file); 81 | 82 | printf("\n"); 83 | fflush(stdout); 84 | } 85 | 86 | #include 87 | 88 | static bool run = true; 89 | void handler(int param) { 90 | (void)(param); 91 | run = false; 92 | } 93 | 94 | int main(int argc, char **argv) { 95 | signal(SIGINT, handler); 96 | signal(SIGTERM, handler); 97 | 98 | int core = 3; 99 | if (argc > 1) 100 | core = atoi(argv[1]); 101 | set_affinity(core); 102 | 103 | system_load_profile profile; 104 | while (run) { 105 | ull t = time_ms(); 106 | capture_system_load(&profile); 107 | // sleep remaining time to 1 second 108 | ull tn = time_ms(); 109 | if ((t + 1000) > tn) 110 | microsleep((t + 1000 - tn) * 1000); 111 | } 112 | 113 | return 0; 114 | } 115 | -------------------------------------------------------------------------------- /tools/load_generator/loadgen.c: -------------------------------------------------------------------------------- 1 | #include "loadutils.h" 2 | 3 | #include 4 | #include 5 | #include 6 | 7 | typedef struct _load_profile { 8 | ull cores; 9 | ull length; 10 | double **values; 11 | } load_profile; 12 | 13 | void load_load_profile(const char *filename, load_profile *prof) { 14 | FILE *in = fopen(filename, "r"); 15 | 16 | // check if file successfully opened 17 | if (in == NULL) { 18 | printf("Error opening file: %s\n", filename); 19 | exit(-1); 20 | } 21 | 22 | // read sizes 23 | int ret = fscanf(in, "cores: %llu", &prof->cores); 24 | (void)(ret); // suppress warning in release builds 25 | assert(ret == 1 && "Error reading load profile."); 26 | skipline(in); 27 | ret = fscanf(in, "length: %llu", &prof->length); 28 | assert(ret == 1 && "Error reading load profile."); 29 | skipline(in); 30 | 31 | // allocate memory for values 32 | prof->values = (double **)calloc(sizeof(double *), prof->cores); 33 | for (ull core = 0; core < prof->cores; ++core) { 34 | prof->values[core] = (double *)calloc(sizeof(double), prof->length); 35 | } 36 | 37 | // read in values 38 | for (ull line = 0; line < prof->length; ++line) { 39 | ret = fscanf(in, "time: %*u ms / load:"); 40 | assert(ret == 0 && "Error reading load profile."); 41 | for (ull core = 0; core < prof->cores; ++core) { 42 | ret = fscanf(in, " %lf", &prof->values[core][line]); 43 | assert(ret == 1 && "Error reading load profile."); 44 | } 45 | skipline(in); 46 | } 47 | 48 | fclose(in); 49 | } 50 | 51 | void delete_load_profile(load_profile *prof) { 52 | for (ull core = 0; core < prof->cores; ++core) { 53 | free(prof->values[core]); 54 | } 55 | free(prof->values); 56 | } 57 | 58 | static bool run = true; 59 | void handler(int param) { 60 | (void)(param); 61 | run = false; 62 | } 63 | 64 | int main(int argc, char **argv) { 65 | signal(SIGINT, handler); 66 | signal(SIGTERM, handler); 67 | 68 | if (argc != 3) { 69 | printf("Usage: loadgen [machine] [profile]\n"); 70 | exit(-1); 71 | } 72 | 73 | load_characteristic characteristic; 74 | if (strcmp(argv[1], "mc3") == 0) 75 | characteristic = mc3_characteristic; 76 | else { 77 | printf("Unknown machine: %s\n", argv[1]); 78 | exit(-2); 79 | } 80 | 81 | load_profile profile; 82 | load_load_profile(argv[2], &profile); 83 | 84 | #pragma omp parallel 85 | { 86 | ull core = omp_get_thread_num(); 87 | set_affinity(core); 88 | 89 | ull core_index = core % profile.cores; 90 | ull pos_index = 0; 91 | while (run) { 92 | // #pragma omp barrier 93 | generate_load_timed(1, profile.values[core_index][pos_index], 94 | &characteristic); 95 | pos_index++; 96 | if (pos_index >= profile.length) 97 | pos_index = 0; 98 | } 99 | } 100 | 101 | delete_load_profile(&profile); 102 | exit(0); 103 | } 104 | -------------------------------------------------------------------------------- /tools/load_generator/loadprofile.c: -------------------------------------------------------------------------------- 1 | #include "loadutils.h" 2 | 3 | void full_coverage(int core, unsigned measurement_time) { 4 | set_affinity(core); 5 | core_load_profile profile; 6 | 7 | ull sleepphase_start = 0; 8 | ull sleepphase_step = 500; 9 | ull sleepphase_max = 25000; 10 | ull loopreps_start = 5000; 11 | ull loopreps_step = 5000; 12 | ull loopreps_max = 200000; 13 | 14 | printf("loopreps\\sleep: "); 15 | for (ull sleepphase = sleepphase_start; sleepphase <= sleepphase_max; 16 | sleepphase += sleepphase_step) { 17 | printf("% 10lld", sleepphase); 18 | } 19 | printf("\n"); 20 | 21 | for (ull loopreps = loopreps_start; loopreps <= loopreps_max; 22 | loopreps += loopreps_step) { 23 | printf("% 16lld", loopreps); 24 | for (ull sleepphase = sleepphase_start; sleepphase <= sleepphase_max; 25 | sleepphase += sleepphase_step) { 26 | get_rel_load_on_core(core, &profile); 27 | loadgen_timed(measurement_time, loopreps, sleepphase); 28 | double load = get_rel_load_on_core(core, &profile); 29 | printf("% 10.4f", load); 30 | fflush(stdout); 31 | } 32 | printf("\n"); 33 | } 34 | } 35 | 36 | int main(int argc, char **argv) { 37 | 38 | core_load_profile profile; 39 | int core = 3; 40 | if (argc > 1) 41 | core = atoi(argv[1]); 42 | 43 | set_affinity(core); 44 | printf("Calibrating on core %d...\n", core); 45 | get_rel_load_on_core(core, &profile); 46 | microsleep(1000000); 47 | printf("Startup: %f\n\n", get_rel_load_on_core(core, &profile)); 48 | 49 | full_coverage(core, 3); 50 | 51 | return 0; 52 | } 53 | -------------------------------------------------------------------------------- /tools/load_generator/loadproplot.gnu: -------------------------------------------------------------------------------- 1 | #set terminal png enhanced transparent size 1600,1000 font vera 2 | #set output 'loadprofile_mc3.png' 3 | set view 60, 30, 0.85, 1.1 4 | set contour base 5 | set cntrparam bspline 6 | set cntrparam levels incremental 0,0.1,1.0 7 | set title "mc3 load generation profile" 8 | set xlabel "X axis" 9 | set ylabel "Y axis" 10 | set zlabel "Z axis" 11 | #set zlabel offset character 1, 0, 0 font "" textcolor lt -1 norotate 12 | set zrange [ 0.00000 : 1.00000 ] noreverse nowriteback 13 | set style data lines 14 | splot 'loadprofile_mc3.dat' matrix linewidth 2 15 | -------------------------------------------------------------------------------- /tools/load_generator/loadtest.c: -------------------------------------------------------------------------------- 1 | #include "loadutils.h" 2 | 3 | int main(int argc, char **argv) { 4 | 5 | core_load_profile profile; 6 | int core = 3; 7 | if (argc > 1) 8 | core = atoi(argv[1]); 9 | 10 | set_affinity(core); 11 | printf("Testing on core %d...\n", core); 12 | get_rel_load_on_core(core, &profile); 13 | microsleep(1000000); 14 | printf("Startup: %f\n\n", get_rel_load_on_core(core, &profile)); 15 | 16 | for (int i = 0; i <= 100; i += 5) { 17 | get_rel_load_on_core(core, &profile); 18 | double targetload = i / 100.0; 19 | generate_load_timed(2, targetload, &mc3_characteristic); 20 | printf("Tried to generate: % 8.2f load, generated % 8.2f.\n", targetload, 21 | get_rel_load_on_core(core, &profile)); 22 | } 23 | 24 | return 0; 25 | } 26 | -------------------------------------------------------------------------------- /tools/load_generator/loadutils.c: -------------------------------------------------------------------------------- 1 | #include "loadutils.h" 2 | 3 | ull get_num_cpus(void) { 4 | ull ret = 1; 5 | #ifdef _SC_NPROCESSORS_ONLN 6 | // Linux 7 | ret = sysconf(_SC_NPROCESSORS_ONLN); 8 | #elif defined(_SC_NPROC_ONLN) 9 | // Irix 10 | ret = sysconf(_SC_NPROC_ONLN); 11 | #elif defined(MPC_GETNUMSPUS) 12 | // HPUX 13 | ret = mpctl(MPC_GETNUMSPUS, NULL, NULL); 14 | #endif 15 | if (ret < 1) 16 | ret = 1; 17 | return ret; 18 | } 19 | 20 | void set_affinity(int core) { 21 | cpu_set_t mask; 22 | CPU_ZERO(&mask); 23 | CPU_SET(core, &mask); 24 | if (pthread_setaffinity_np(pthread_self(), sizeof(mask), &mask) != 0) { 25 | printf("Error setting thread affinity.\n"); 26 | exit(-1); 27 | } 28 | } 29 | 30 | ull time_ms(void) { 31 | struct timeval tv; 32 | ull time; 33 | gettimeofday(&tv, 0); 34 | time = tv.tv_sec * 1000 + tv.tv_usec / 1000; 35 | return time; 36 | } 37 | 38 | void skipline(FILE *file) { 39 | char tmp[1024 * 32]; 40 | char *skipped; 41 | skipped = fgets(tmp, sizeof(tmp), file); // skip rest of line 42 | (void)(skipped); // suppress "unused" warning 43 | } 44 | 45 | void microsleep(ull period) { 46 | struct timespec wait, remaining; 47 | wait.tv_sec = period / 1000000; 48 | wait.tv_nsec = (period % 1000000) * 1000; 49 | while (nanosleep(&wait, &remaining) != 0) { 50 | wait = remaining; 51 | } 52 | } 53 | 54 | double get_rel_load_on_core(unsigned core, core_load_profile *profile) { 55 | ull user = 0, user_low = 0, system = 0, idle = 0; 56 | double ret = 0.0; 57 | 58 | ull total_user, total_user_low, total_system, total_idle; 59 | FILE *file = fopen("/proc/stat", "r"); 60 | // skip lines 61 | for (unsigned i = 0; i <= core; ++i) 62 | skipline(file); 63 | // read relevant line 64 | char scanBuffer[255]; 65 | sprintf(scanBuffer, "cpu%u %%llu %%llu %%llu %%llu", core); 66 | if (fscanf(file, scanBuffer, &total_user, &total_user_low, &total_system, 67 | &total_idle) != 4) { 68 | printf("Error parsing /proc/stat.\n"); 69 | exit(-1); 70 | } 71 | fclose(file); 72 | 73 | // overflow "handling" 74 | if (total_user < profile->total_user || profile->total_user == 0) 75 | user = total_user; 76 | else 77 | user = total_user - profile->total_user; 78 | if (total_user_low < profile->total_user_low || profile->total_user_low == 0) 79 | user_low = total_user_low; 80 | else 81 | user_low = total_user_low - profile->total_user_low; 82 | if (total_system < profile->total_system || profile->total_system == 0) 83 | system = total_system; 84 | else 85 | system = total_system - profile->total_system; 86 | if (total_idle < profile->total_idle || profile->total_idle == 0) 87 | idle = total_idle; 88 | else 89 | idle = total_idle - profile->total_idle; 90 | 91 | // calculate return value 92 | double cur_nonidle = user + user_low + system; 93 | double cur_idle = idle; 94 | if (cur_idle <= 0.0) 95 | ret = 1.0; 96 | else 97 | ret = cur_nonidle / (cur_nonidle + cur_idle); 98 | 99 | // update profile data 100 | profile->total_user = total_user; 101 | profile->total_user_low = total_user_low; 102 | profile->total_system = total_system; 103 | profile->total_idle = total_idle; 104 | 105 | return ret; 106 | } 107 | 108 | volatile long long __importantInt, __importantA = 12, __importantB = 98; 109 | volatile double __importantDouble, __importantAf = 4.56, __importantBf = 0.78; 110 | 111 | #pragma omp threadprivate(__importantInt, __importantA, __importantB, \ 112 | __importantDouble, __importantAf, __importantBf) 113 | 114 | void loadgen(ull repetitions, ull inner_repetitions, ull sleepyhead) { 115 | for (ull i = 0; i < repetitions; ++i) { 116 | for (ull j = 0; j < inner_repetitions; ++j) { 117 | __importantDouble += __importantAf * __importantBf; 118 | __importantDouble -= __importantBf; 119 | if (__importantInt < 0) 120 | printf("loadgen insanity int\n"); 121 | __importantInt += __importantA; 122 | __importantInt = 123 | (__importantInt % __importantB) * __importantA / __importantB; 124 | __importantDouble = cos(tan(sin(__importantBf))); 125 | __importantDouble += __importantAf + __importantInt; 126 | if (__importantDouble < 0) 127 | printf("loadgen insanity double\n"); 128 | } 129 | if (sleepyhead > 0) 130 | microsleep(sleepyhead); 131 | } 132 | } 133 | 134 | void loadgen_timed(unsigned seconds, ull inner_repetitions, 135 | ull sleepyhead) { 136 | ull ragnarok; 137 | ragnarok = time_ms() + seconds * 1000; 138 | while (time_ms() < ragnarok) { 139 | loadgen(1, inner_repetitions, sleepyhead); 140 | } 141 | } 142 | 143 | void generate_load_timed(unsigned seconds, double amount, 144 | const load_characteristic *lc) { 145 | double sampling_point = (lc->samples - 1) * amount; 146 | int int_sample1 = (int)sampling_point; 147 | int int_sample2 = (int)sampling_point + 1; 148 | if (int_sample2 >= (int)lc->samples) 149 | int_sample2 = lc->samples - 1; 150 | double dec_part = sampling_point - int_sample1; 151 | double weight1 = 1.0 - dec_part, weight2 = dec_part; 152 | ull loopreps = 153 | weight1 * lc->loopreps[int_sample1] + weight2 * lc->loopreps[int_sample2]; 154 | ull sleeptime = weight1 * lc->sleeptimes[int_sample1] + 155 | weight2 * lc->sleeptimes[int_sample2]; 156 | loadgen_timed(seconds, loopreps, sleeptime); 157 | } 158 | -------------------------------------------------------------------------------- /tools/load_generator/loadutils.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | #define _GNU_SOURCE 1 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | typedef unsigned long long ull; 16 | 17 | typedef struct _load_characteristic { 18 | unsigned samples; 19 | const ull *loopreps; 20 | const ull *sleeptimes; 21 | } load_characteristic; 22 | 23 | static const ull mc3_characteristic_loopreps[] = {0, 15000, 25000, 35000, 24 | 50000, 72000, 98000, 160000, 25 | 187000, 200000, 800000}; 26 | static const ull mc3_characteristic_sleeptimes[] = { 27 | 100000, 16000, 14000, 12500, 12000, 11000, 9000, 5900, 3000, 600, 0}; 28 | static const load_characteristic mc3_characteristic = { 29 | 11, mc3_characteristic_loopreps, mc3_characteristic_sleeptimes}; 30 | 31 | typedef struct core_load_profile_ { 32 | unsigned long total_user, total_user_low, total_system, total_idle; 33 | } core_load_profile; 34 | 35 | ull get_num_cpus(void); 36 | 37 | void set_affinity(int core); 38 | 39 | ull time_ms(void); 40 | 41 | void skipline(FILE *file); 42 | 43 | void microsleep(ull period); 44 | 45 | double get_rel_load_on_core(unsigned core, core_load_profile *profile); 46 | 47 | void loadgen(ull repetitions, ull inner_repetitions, ull sleepyhead); 48 | 49 | void loadgen_timed(unsigned seconds, ull inner_repetitions, ull sleepyhead); 50 | 51 | void generate_load_timed(unsigned seconds, double amount, 52 | const load_characteristic *lc); 53 | -------------------------------------------------------------------------------- /tools/load_generator/mc3/loadprofile_mc3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterTh/perf-oriented-dev/3f90467f1d4f17ccb3ea8b0996cf8a302eb24569/tools/load_generator/mc3/loadprofile_mc3.png -------------------------------------------------------------------------------- /tools/load_generator/mc3/loadprofile_mc3.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterTh/perf-oriented-dev/3f90467f1d4f17ccb3ea8b0996cf8a302eb24569/tools/load_generator/mc3/loadprofile_mc3.xlsx -------------------------------------------------------------------------------- /tools/load_generator/mc3/loadprofile_mc3_summary.txt: -------------------------------------------------------------------------------- 1 | load: 2 | 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0 3 | loopreps: 4 | 0, 15000, 25000, 35000, 50000, 75000, 100000, 165000, 185000, 200000, 800000 5 | sleeptimes: 6 | 100000, 16000, 14000, 12500, 12000, 11000, 9000, 6000, 3000, 500, 0 -------------------------------------------------------------------------------- /tools/load_generator/synth/0_1_altern_static.txt: -------------------------------------------------------------------------------- 1 | cores: 2 2 | length: 1 3 | time: 1328108775993 ms / load: 0.0 1.0 4 | -------------------------------------------------------------------------------- /tools/load_generator/workstation/Thumbs.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterTh/perf-oriented-dev/3f90467f1d4f17ccb3ea8b0996cf8a302eb24569/tools/load_generator/workstation/Thumbs.db -------------------------------------------------------------------------------- /tools/load_generator/workstation/sysload_workstation.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PeterTh/perf-oriented-dev/3f90467f1d4f17ccb3ea8b0996cf8a302eb24569/tools/load_generator/workstation/sysload_workstation.png -------------------------------------------------------------------------------- /tools/malloctest/malloctest.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | typedef struct { 7 | int64_t repeats; 8 | int64_t iterations; 9 | int64_t lower, upper; 10 | } thread_args; 11 | 12 | void* benchmark_thread(void *args) { 13 | thread_args *t_args = (thread_args*)args; 14 | for(int64_t r = 0; r < t_args->repeats; ++r) { 15 | unsigned seed = 0; 16 | void **allocations = (void**)calloc(t_args->iterations, sizeof(void*)); 17 | for(int64_t i = 0; i < t_args->iterations; ++i) { 18 | int64_t to_alloc = rand_r(&seed) % (t_args->upper - t_args->lower) + t_args->lower; 19 | allocations[i] = malloc(to_alloc); 20 | } 21 | for(int64_t i = 0; i < t_args->iterations; ++i) { 22 | free(allocations[i]); 23 | } 24 | free(allocations); 25 | } 26 | return NULL; 27 | } 28 | 29 | int main(int argc, char** argv) { 30 | int64_t num_threads = 100; 31 | if(argc != 6) { 32 | printf("USAGE: ./malloctest [num_threads] [num_repeats] [num_iterations] [lower] [upper]\n"); 33 | return -1; 34 | } 35 | num_threads = atol(argv[1]); 36 | thread_args t_args; 37 | t_args.repeats = atol(argv[2]); 38 | t_args.iterations = atol(argv[3]); 39 | t_args.lower = atol(argv[4]); 40 | t_args.upper = atol(argv[5]); 41 | 42 | pthread_t* threads = (pthread_t*)calloc(num_threads, sizeof(pthread_t)); 43 | 44 | for(int64_t i = 0; i < num_threads; ++i) { 45 | pthread_create(&threads[i], NULL, benchmark_thread, &t_args); 46 | } 47 | 48 | for(int64_t i = 0; i < num_threads; ++i) { 49 | pthread_join(threads[i], NULL); 50 | } 51 | 52 | free(threads); 53 | } 54 | --------------------------------------------------------------------------------