├── .gitignore
├── Jenkinsfile
├── LICENSE
├── Makefile
├── README.rst
├── benchmarks
    └── lockhammer
    │   ├── LICENSE
    │   ├── Makefile
    │   ├── README.rst
    │   ├── TODO
    │   ├── cpuorders
    │       └── schema.txt
    │   ├── graphs
    │       ├── github_lockhammer_all_common_20181106_cas_event_mutex_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_cas_lockref_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_cas_rw_lock_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_empty_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_event_mutex_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_incdec_refcount_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_jvm_objectmonitor_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_osq_lock_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_queued_spinlock_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_swap_mutex_200ns_1000ns.png
    │       ├── github_lockhammer_all_common_20181106_tbb_spin_rw_mutex_200ns_1000ns.png
    │       └── github_lockhammer_all_common_20181106_ticket_spinlock_200ns_1000ns.png
    │   ├── include
    │       ├── alloc.h
    │       ├── args.h
    │       ├── atomics.h
    │       ├── cpu_relax.h
    │       ├── lockhammer.h
    │       ├── perf_timer.h
    │       └── verbose.h
    │   ├── scripts
    │       ├── lh_sweepdelay_cfg.yaml
    │       ├── lh_sweeptest_cfg.yaml
    │       ├── lh_unittest_cfg.yaml
    │       ├── lockhammer-all.csv.xz
    │       ├── lockhammer-jupyter-notebook.ipynb
    │       ├── run-tests.sh
    │       ├── run_sweep_delay.sh
    │       ├── runall.sh
    │       ├── show-per-thread-lock-acquires.sh
    │       ├── sweep.sh
    │       ├── test_lockhammer.py
    │       └── view-results-json.sh
    │   ├── src
    │       ├── alloc.c
    │       ├── args.c
    │       ├── cpufreq-scaling-detect.c
    │       ├── lockhammer.c
    │       ├── measure.c
    │       └── report.c
    │   └── tests
    │       ├── cas_lockref.h
    │       ├── cas_rw_lock.h
    │       ├── empty.h
    │       ├── incdec_refcount.h
    │       └── swap_mutex.h
├── contributing.rst
├── ext
    ├── jvm
    │   └── jvm_objectmonitor.h
    ├── linux
    │   ├── hybrid_spinlock.h
    │   ├── hybrid_spinlock_fastdequeue.h
    │   ├── hybrid_spinlock_old_fastdequeue.h
    │   ├── include
    │   │   ├── lk_atomics.h
    │   │   ├── lk_barrier.h
    │   │   └── lk_cmpxchg.h
    │   ├── osq_lock.h
    │   ├── queued_spinlock.h
    │   └── ticket_spinlock.h
    ├── mysql
    │   ├── cas_event_mutex.h
    │   ├── event_mutex.h
    │   └── include
    │   │   └── ut_atomics.h
    ├── pagemap
    │   └── include
    │   │   └── pagemap.h
    ├── sms
    │   ├── base
    │   │   ├── build_config.h
    │   │   ├── cpu.h
    │   │   └── llsc.h
    │   └── clh_spinlock.h
    └── tbb
    │   ├── include
    │       └── tbb.h
    │   └── tbb_spin_rw_mutex.h
├── hooks
    └── commit-msg
└── tools
    └── .gitignore


/.gitignore:
--------------------------------------------------------------------------------
1 | benchmarks/lockhammer/build/
2 | benchmarks/lockhammer/build.*/
3 | benchmarks/lockhammer/*.json
4 | *.log
5 | 


--------------------------------------------------------------------------------
/Jenkinsfile:
--------------------------------------------------------------------------------
 1 | import static groovy.io.FileType.FILES
 2 | import static groovy.io.FileType.DIRECTORIES
 3 | 
 4 | def getRepoURL() {
 5 |   sh "git config --get remote.origin.url > .git/remote-url"
 6 |   return readFile(".git/remote-url").trim()
 7 | }
 8 | 
 9 | void setBuildStatus(String message, String state, String context) {
10 |   repoUrl = getRepoURL();
11 |   step([
12 |       $class: "GitHubCommitStatusSetter",
13 | 	  reposSource: [$class: "ManuallyEnteredRepositorySource", url: repoUrl],
14 |       contextSource: [$class: "ManuallyEnteredCommitContextSource", context: context],
15 |       errorHandlers: [[$class: "ChangingBuildStatusErrorHandler", result: "UNSTABLE"]],
16 |       statusResultSource: [ $class: "ConditionalStatusResultSource", results: [[$class: "AnyBuildResult", message: message, state: state]] ],
17 |       statusBackrefSource: [ $class: "ManuallyEnteredBackrefSource", backref: ""]
18 |   ]);
19 | }
20 | 
21 | node {
22 |     stage('checkout') {
23 |         checkout scm
24 |     }
25 | 
26 |     stage('Build') {
27 |         setBuildStatus("Building code", 'PENDING', 'Build');
28 |         def fails = []
29 |         def dir = new File("${env.WORKSPACE}/benchmarks/");
30 |         dir.traverse(type: DIRECTORIES, maxDepth: 0) {
31 |             //build
32 |             try {
33 |                 sh "make -C ${it}"
34 |             }
35 |             catch (exc) {
36 |                 fails.add(it.toString().substring(env.WORKSPACE.length()))
37 |             }
38 |         }
39 |         if (fails) {
40 |             setBuildStatus("${fails} failed to build", 'FAILURE', 'Build');
41 |             error "${fails} failed to build"
42 |         }
43 |         setBuildStatus("Build Successful!", 'SUCCESS', 'Build');
44 |     }
45 | 
46 |     stage('Test') {
47 |         setBuildStatus("Testing code", 'PENDING', 'Test');
48 | 
49 |         fails = []
50 |         dir = new File("${env.WORKSPACE}/benchmarks/");
51 |         // Run all scripts starting with the prefix "test"
52 |         dir.traverse(type: DIRECTORIES, maxDepth: 0) {
53 |             def scr = new File("${it}/scripts/")
54 |             scr.traverse(type: FILES, filter: ~/.*\/test[^\/]*/) {
55 |                 try {
56 |                     sh "${it}"
57 |                 }
58 |                 catch (exc) {
59 |                     fails.add(it.toString().substring(env.WORKSPACE.length()))
60 |                 }
61 |             }
62 |         }
63 |         if (fails) {
64 |             setBuildStatus("Tests scripts: ${fails} Failed", 'FAILURE', 'Test');
65 |             error "Tests scripts: ${fails} Failed"
66 |         }
67 |         setBuildStatus("Tests Passed!", 'SUCCESS', 'Test');
68 |     }
69 | 
70 | }
71 | 
72 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, ARM Limited. All rights reserved.
 2 | 
 3 | SPDX-License-Identifier:    BSD-3-Clause
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are met:
 7 | 
 8 | Redistributions of source code must retain the above copyright notice, this
 9 | list of conditions and the following disclaimer.
10 | 
11 | Redistributions in binary form must reproduce the above copyright notice, this
12 | list of conditions and the following disclaimer in the documentation and/or
13 | other materials provided with the distribution.
14 | 
15 | Neither the name of ARM Limited nor the names of its contributors may be used
16 | to endorse or promote products derived from this software without specific
17 | prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27 | TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | # SPDX-FileCopyrightText: Copyright 2019-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | 
 6 | 
 7 | .PHONY: help
 8 | 
 9 | LOCKHAMMER_DIR=benchmarks/lockhammer
10 | 
11 | help:
12 | 	@echo
13 | 	@echo "This Makefile passes to $(LOCKHAMMER_DIR)/Makefile"
14 | 	@echo
15 | 	@echo "try:"
16 | 	@echo
17 | 	@echo "   make -j 8 allvariants"
18 | 	@echo
19 | 
20 | %::
21 | 	$(MAKE) -C $(LOCKHAMMER_DIR) $(MAKEFLAGS) $(MAKECMDGOALS)
22 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | Synchronization Benchmarks
 2 | ==========================
 3 | 
 4 | This is a micro-benchmarks suite targeting evaluation of synchronization primitives used primarily
 5 | in data-center application and system software by evaluating their scalability and code overhead.  It contains synchronization 
 6 | primitives that are both independently developed and extracted from real software applications.
 7 | 
 8 | License
 9 | -------
10 | 
11 | The software is provided under a BSD-3-Clause `license`_. Contributions to this
12 | project are accepted under the same license with developer sign-off as
13 | described in the `Contributing Guidelines`_.
14 | 
15 | This project contains code from other projects, the license information for which
16 | can be found in the relevant directories or files. Any contributions to third party
17 | open source projects are under the relevant license for that project or file.
18 | 
19 | Repository Contents
20 | ===================
21 | 
22 | The synchronization-benchmarks repository is divided up into multiple directories with the following semantics:
23 | 
24 | - tools/ -- Contains support tools for the micro-benchmarks contained in benchmarks/ such as application profilers or code
25 |   analyzers.  In general, support code that applies to multiple benchmarks should go here.
26 | - benchmarks/ -- Broken up into sub-directories, one for each micro-benchmark.  Each sub-directory should general be structured
27 |   as:
28 | 
29 |   - / -- The root of the directory should contain a README with build instructions, and a detailed
30 |     description of the test: what it is testing, how it is testing, and how to interpret the results. The root
31 |     directory should also contain the build system files.
32 |   - src/
33 |   - include/
34 |   - scripts/ -- Automation scripts for running and parsing the output of your micro-benchmark
35 | 
36 | - ext/ -- This is a directory for third party code taken from other projects if for instance your micro-benchmark is
37 |   meant for testing example synchronization primitives for various sources.  For each third party source, a sub-directory
38 |   should be created that is descriptive of the origin of the imported code and the imported code placed in that sub-directory.  
39 |   All imported code needs to retain the original license and copyright information from the source location.  
40 |   For more detail on how to include third party code, please consult the `Contributing Guidelines`_.
41 | 
42 | Getting Started
43 | ===============
44 | 
45 | Clone this repository and add the commit-msg hook from the hooks/ directory into your .git/hooks directory.  To build
46 | the microbenchmarks, follow the build and run instructions in the individual test sub-directories contained
47 | in benchmarks/.
48 | 
49 | Feedback and support
50 | --------------------
51 | 
52 | Arm welcomes any feedback on this benchmark suite.  If you find that this suite lacks important
53 | tests, please use the `Github issue tracker`_ to log the issue and initiate a pull request with your fixes as outlined in
54 | the `Contributing Guidelines`_.
55 | 
56 | --------------
57 | 
58 | *Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.*
59 | 
60 | .. _GitHub: https://www.github.com/ARM-software/synchronization-benchmarks
61 | .. _GitHub issue tracker: https://github.com/ARM-software/synchronization-benchmarks/issues
62 | .. _license: ./LICENSE
63 | .. _Contributing Guidelines: ./contributing.rst
64 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, The Linux Foundation. All rights reserved.
 2 | 
 3 | SPDX-License-Identifier:    BSD-3-Clause
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions are
 7 | met:
 8 |     * Redistributions of source code must retain the above copyright
 9 |       notice, this list of conditions and the following disclaimer.
10 |     * Redistributions in binary form must reproduce the above
11 |       copyright notice, this list of conditions and the following
12 |       disclaimer in the documentation and/or other materials provided
13 |       with the distribution.
14 |     * Neither the name of The Linux Foundation nor the names of its
15 |       contributors may be used to endorse or promote products derived
16 |       from this software without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
19 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
21 | ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22 | BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 | BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 | OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 | IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/TODO:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | 
 4 | Physical address accessing strategy
 5 | - While a specified physical address can be obtained by mmap /dev/mem with
 6 |   CONFIG_STRICT_DEVMEM=n (and nopat on x86), we can not guarantee that memory
 7 |   location is freely available for use.  This may be OK for simulation, but not
 8 |   OK on a real system.
 9 | 
10 |   To get around this problem, find a physical address in a persistent hugepage.
11 |   This means not transparent hugepages, but HugeTLB pages.  Using a persistent
12 |   hugepage lets us access a physical memory that also persists after the
13 |   program ends so that there is repeatability.
14 | 
15 |   TODO:  check that N=1 hugepages works with multiple NUMA domains (yes it
16 |          does, but since hugepages are default round-robin distributed ("interleaved")
17 |          across NUMA domains, N=1 will place only one hugepage in the first NUMA domain.
18 |          If other domains are to be tested, use --hugepage-physaddr to request the
19 |          hugepage with that physical address in that NUMA domain.)
20 |   TODO:  use get_mempolicy() to determine the NUMA domain of a hugepage.
21 |   TODO:  use fewer hugepages (done)
22 |   TODO:  add a flag to specify the hugepage physical address, and to try remapping
23 |          hugepages until it is obtained again. (done)
24 |   TODO:  use set_mempolicy(MPOL_BIND) to place a hugepage on a node instead of the above.
25 |   TODO:  respect hugepage size in bytes and in kilobytes by name; it only takes in the abbreviated one right now
26 | 
27 | 
28 | Update SpinPause() in ext/jvm/jvm_objectmonitor.h
29 | - The SpinPause() function returns 0.  However, this is only the case
30 |   in now very old versions of OpenJDK.  Modern versions use pause
31 |   on 64-bit x86 (amd64) and a parameterized choice of a one or more
32 |   ISB, or NOP on aarch64.
33 | 
34 | 
35 | ext/linux/hybrid_spinlock*
36 | - use the lockhammer lock pointer instead of malloc'ing mcs_pool for better reproducibility
37 | 
38 | queued_spinlock
39 | - queued_spinlock uses the lock pointer as well as mcs_pool, so need a way to have both be reproducible.
40 | 
41 | tbb_spin_rw_mutex
42 | - instead of doing operations on the state variable, use the test harness lock pointer for better reproducibility
43 | 
44 | clh_spinlock
45 | - instead of operating on global_clh_lock, use the test harness lock pointer for better reproducibility
46 | 
47 | ticket_spinlock
48 | - Modify so that USE_RELAX is effective
49 | 
50 | 
51 | cpufreq check:
52 | - for the intel_pstate driver, warn if no_turbo is set to 0
53 | 
54 | 
55 | 
56 | 
57 | add a memory update in the critical section
58 | - optionally store update on the same cacheline as the lock
59 | - expect a lot of kernel locks to have this
60 | - optionally store update somewhere else than the lock cache line (GUPS)
61 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/cpuorders/schema.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | naming schema
 4 | 
 5 | # cloud instance, should be a symlink to a system cpuorder
 6 | <instance_type>.<cloud_provider>.cpuorder
 7 | 
 8 | # system cpuorder
 9 | <cpu_model>.<manufacturer>.cpuorder
10 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_cas_event_mutex_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_cas_event_mutex_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_cas_lockref_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_cas_lockref_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_cas_rw_lock_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_cas_rw_lock_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_empty_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_empty_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_event_mutex_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_event_mutex_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_incdec_refcount_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_incdec_refcount_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_jvm_objectmonitor_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_jvm_objectmonitor_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_osq_lock_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_osq_lock_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_queued_spinlock_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_queued_spinlock_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_swap_mutex_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_swap_mutex_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_tbb_spin_rw_mutex_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_tbb_spin_rw_mutex_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_ticket_spinlock_200ns_1000ns.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/graphs/github_lockhammer_all_common_20181106_ticket_spinlock_200ns_1000ns.png


--------------------------------------------------------------------------------
/benchmarks/lockhammer/include/alloc.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * Copyright (c) 2017-2025, The Linux Foundation. All rights reserved.
 4 |  *
 5 |  * SPDX-License-Identifier:    BSD-3-Clause
 6 |  *
 7 |  * Redistribution and use in source and binary forms, with or without
 8 |  * modification, are permitted provided that the following conditions are
 9 |  * met:
10 |  *     * Redistributions of source code must retain the above copyright
11 |  *       notice, this list of conditions and the following disclaimer.
12 |  *     * Redistributions in binary form must reproduce the above
13 |  *       copyright notice, this list of conditions and the following
14 |  *       disclaimer in the documentation and/or other materials provided
15 |  *       with the distribution.
16 |  *     * Neither the name of The Linux Foundation nor the names of its
17 |  *       contributors may be used to endorse or promote products derived
18 |  *       from this software without specific prior written permission.
19 |  *
20 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
21 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
23 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
24 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
27 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
28 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
29 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
30 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 |  */
32 | 
33 | 
34 | #ifndef ALLOC_H
35 | #define ALLOC_H
36 | 
37 | enum {
38 |     HUGEPAGES_NONE,
39 |     HUGEPAGES_DEFAULT,
40 |     HUGEPAGES_64K,
41 |     HUGEPAGES_2M,
42 |     HUGEPAGES_32M,
43 |     HUGEPAGES_512M,
44 |     HUGEPAGES_1G,
45 |     HUGEPAGES_16G,
46 |     HUGEPAGES_MAX_ENUM
47 | };
48 | 
49 | 
50 | void * do_hugepage_alloc(int use_hugepages, size_t hugepage_req_physaddr, int verbose);
51 | void * do_alloc(size_t length, int use_hugepages, size_t nonhuge_alignment, size_t hugepage_req_physaddr, int verbose);
52 | void print_hugepage_physaddr_and_exit(void * mmap_ret);
53 | 
54 | // hugepage flag parameter parsing
55 | int parse_hugepage_parameter(const char * optarg);
56 | const char * hugepage_map (int enum_param_value);
57 | 
58 | // function prototypes used by osq_lock
59 | uintptr_t get_phys_addr(uintptr_t vaddr);
60 | 
61 | #endif
62 | 
63 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
64 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/include/args.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017-2025, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #ifndef ARGS_H
33 | #define ARGS_H
34 | 
35 | #include "lockhammer.h"
36 | 
37 | int parse_args(int argc, char ** argv, test_args_t * pargs, const system_info_t * psysinfo);
38 | int init_sysinfo(system_info_t * psysinfo);
39 | void print_test_args(const test_args_t * p);
40 | 
41 | #endif
42 | 
43 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
44 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/include/cpu_relax.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017-2025, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #ifndef CPU_RELAX_H
33 | #define CPU_RELAX_H
34 | 
35 | 
36 | #ifndef CPU_RELAX_ITERATIONS
37 | #define CPU_RELAX_ITERATIONS 1
38 | #endif
39 | 
40 | static inline void __cpu_relax(void) {
41 |     for (unsigned long i = 0; i < CPU_RELAX_ITERATIONS; i++) {
42 | #ifdef __aarch64__
43 | #if defined(RELAX_IS_ISB)
44 |         asm volatile ("isb" : : : "memory" );
45 | #elif defined(RELAX_IS_NOP)
46 |         asm volatile ("nop" : : : "memory");
47 | #elif defined(RELAX_IS_EMPTY)
48 |         asm volatile ("" : : : "memory");
49 | #elif defined(RELAX_IS_NOTHING)
50 | 
51 | #endif
52 | #endif // __aarch64__
53 | 
54 | #ifdef __x86_64__
55 | 
56 | #if defined(RELAX_IS_PAUSE)
57 |     // RELAX_IS_PAUSE is the implementation for x86 in jdk-9
58 |     asm volatile ("rep; nop"); // aka pause
59 | #elif defined(RELAX_IS_EMPTY)
60 |     asm volatile ("" : : : "memory");
61 | #elif defined(RELAX_IS_NOTHING)
62 | 
63 | #endif
64 | #endif // __x86_64__
65 | 
66 |     }
67 | }
68 | 
69 | #endif // CPU_RELAX_H
70 | 
71 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
72 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/include/lockhammer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2017-2025, The Linux Foundation. All rights reserved.
  3 |  *
  4 |  * SPDX-License-Identifier:    BSD-3-Clause
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without
  7 |  * modification, are permitted provided that the following conditions are
  8 |  * met:
  9 |  *     * Redistributions of source code must retain the above copyright
 10 |  *       notice, this list of conditions and the following disclaimer.
 11 |  *     * Redistributions in binary form must reproduce the above
 12 |  *       copyright notice, this list of conditions and the following
 13 |  *       disclaimer in the documentation and/or other materials provided
 14 |  *       with the distribution.
 15 |  *     * Neither the name of The Linux Foundation nor the names of its
 16 |  *       contributors may be used to endorse or promote products derived
 17 |  *       from this software without specific prior written permission.
 18 |  *
 19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
 20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
 22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
 23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
 26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
 27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
 28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
 29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 |  */
 31 | 
 32 | #ifndef __LOCKHAMMER_H__
 33 | #define __LOCKHAMMER_H__
 34 | 
 35 | 
 36 | // PROGRESS_TICK_PROFILE - prints each thread's timer value at lock_acquires milestones to show thread concurrency
 37 | #define PROGRESS_TICK_PROFILE
 38 | 
 39 | enum units { NS,
 40 |              INSTS, NOT_SET };
 41 | typedef enum units Units;
 42 | 
 43 | #define _stringify(x) #x
 44 | #define stringify(x) _stringify(x)
 45 | 
 46 | // per_thread_results_t - each thread returns its results in this struct (inside thread_args_t)
 47 | typedef struct {
 48 |     unsigned long cpu_affined;  // which CPU this was pinned on.
 49 | 
 50 |     unsigned long lock_acquires;// number of locks acquired-and-released per thread
 51 |     unsigned long cputime_ns;   // this thread's CPU time in nanoseconds
 52 |     unsigned long walltime_ns;  // this thread's wall clock time in nanoseconds
 53 |     unsigned long hmrdepth;     // depth=lock-specific notion of contention
 54 | 
 55 |     unsigned long hwtimer_start; // timer value at start of measurement loop
 56 |     unsigned long hwtimer_end;   //         "'  at end
 57 | 
 58 |     unsigned long hwtimer_10p;   // timer value at 10% of work completion
 59 |     unsigned long hwtimer_25p;   //         ""  at 25%
 60 |     unsigned long hwtimer_50p;   //         ""  at 50%
 61 |     unsigned long hwtimer_75p;   //         ""  at 75%
 62 |     unsigned long hwtimer_90p;   //         ""  at 90%
 63 | 
 64 |     // hold/post durations from calibrate_timer()
 65 |     double hold_ns, post_ns;
 66 | 
 67 |     // metrics only for osq_lock
 68 |     unsigned long osq_lock_wait_next_spins;
 69 |     unsigned long osq_unlock_wait_next_spins;
 70 |     unsigned long osq_lock_locked_spins;
 71 |     unsigned long osq_lock_unqueue_spins;
 72 |     unsigned long osq_lock_acquire_backoffs;
 73 | 
 74 | } per_thread_results_t;
 75 | 
 76 | 
 77 | // thread_args_t -- pointer to an instance of this is passed to each thread
 78 | typedef struct {
 79 |     unsigned long thread_num;       // thread number, ordinal 0
 80 |     unsigned long num_threads;      // number of worker threads in total for experiment
 81 |     unsigned long num_acquires;     // -a flag, aka nacqrs, aka number of acquires per thread to do
 82 |     unsigned long *lock;            // pointer to the lock variable
 83 | 
 84 |     unsigned long *p_start_ns;      // marshal thread's monotonic start time, in ns, for computing wall_elapsed_ns; only marshall thread sets this
 85 |     unsigned long hold, post;       // ncrit, nparallel
 86 |     Units hold_unit, post_unit;     // NS or INSTS, hold_unit = ncrit_units, post_unit = nparallel_units
 87 |     unsigned long hold_count;
 88 |     unsigned long post_count;
 89 | 
 90 |     double tickspns;                // number of ticks_per_ns
 91 | 
 92 |     unsigned long run_on_this_cpu;  // logical CPU on which a worker thread is to run
 93 | 
 94 |     unsigned long run_limit_ticks;  // if non-zero, the number of timer ticks to run for when using --run-limit-ticks or --run-limit-seconds
 95 |     unsigned long run_limit_inner_loop_iters;  // the number of lock acquire/release sequences to run before checking the hwtimer when using --run-limit-ticks or --run-limit-seconds
 96 |     unsigned long hwtimer_frequency;
 97 | 
 98 |     int verbose;
 99 |     unsigned long blackhole_numtries;
100 | 
101 |     per_thread_results_t results;   // output data structure
102 | 
103 | } thread_args_t;
104 | 
105 | // pinorder_t - describes a set of CPUs on which to run worker threads
106 | typedef struct {
107 |     int * cpu_list;     // pointer to an array of int.  index into this array is the thread number, each element is the logical CPU on which that thread is to run.
108 |     size_t num_threads; // number of threads defined for this pinorder (i.e. length of the number of valid entries in the pinorder array).
109 | } pinorder_t;
110 | 
111 | 
112 | typedef struct {
113 |     unsigned long t;   // duration time, either in nanoseconds or iterations
114 |     Units unit;        // duration unit, either NS or INSTS
115 | } duration_t;
116 | 
117 | // test_args_t - mostly command line parameters
118 | typedef struct {
119 |     unsigned long num_acquires;  // -a    number of acquires (not documented?)
120 |     duration_t * crits;     // -c, --cn=, --ci=    critical duration
121 |     duration_t * pars;      // -p, --pn=, --pi=    parallel duration
122 |     size_t num_crits;
123 |     size_t num_pars;
124 |     unsigned long ileave;    // -i    interleave value for SMT pinning
125 |     int scheduling_policy;   // -S    use explicit scheduling policy
126 |     size_t num_pinorders;
127 |     pinorder_t * pinorders;  // -o    CPU pinning order
128 |     unsigned long  timeout_usec;   // -A  timeout_usec
129 | 
130 |     int hugepagesz;
131 |     int use_mmap;
132 |     int mmap_hugepage_offset_exists;
133 |     int print_hugepage_physaddr;
134 |     size_t mmap_hugepage_offset;
135 |     size_t mmap_hugepage_physaddr;
136 |     unsigned long hwtimer_frequency;
137 |     unsigned long probed_hwtimer_frequency;
138 |     long estimate_hwtimer_freq_cpu;
139 | 
140 |     double run_limit_seconds;
141 |     unsigned long run_limit_ticks;
142 |     unsigned long run_limit_inner_loop_iters;
143 |     int ignore_unknown_scaling_governor;
144 |     int suppress_cpu_frequency_warnings;
145 |     const char * cpuorder_filename;
146 | #ifdef JSON_OUTPUT
147 |     const char * json_output_filename;
148 | #endif
149 | #ifdef __aarch64__
150 |     char disable_outline_atomics_lse;
151 | #endif
152 |     int verbose;
153 |     size_t iterations;
154 |     size_t blackhole_numtries;
155 | } test_args_t;
156 | 
157 | // system_info_t - system configuration data
158 | typedef struct {
159 |     unsigned long num_cores;    // number of processors configured by the operating system
160 |     size_t page_size_bytes;     // page size in bytes
161 |     size_t erg_bytes;           // number of bytes per exclusive reservation granule (e.g. cache line/block)
162 | 
163 |     cpu_set_t avail_cores;      // cores that the CPU affinity mask allows us to run on
164 |     size_t num_avail_cores;     // number of cores that the CPU affinity mask allows us to run on
165 |     size_t num_online_cores;    // the number of cores that getconf _NPROCESSORS_ONLN returns
166 | 
167 |     // num_online_cores can be less than num_cores because some may be offline or not permitted by affinity mask
168 |     // num_avail_cores may be less than num_online_cores because some online cores may be isolated
169 | } system_info_t;
170 | 
171 | // locks_t -- pointers to the actual locks to be used
172 | typedef struct {
173 |     unsigned long * p_test_lock;        // address of main lock
174 |     unsigned long * p_ready_lock;       // lock to synchronize all threads' entry into hmr()
175 |     unsigned long * p_sync_lock;        // lock to synchronize before blackhole cabliration
176 |     unsigned long * p_calibrate_lock;   // lock to synchronize after blackhole calibration
177 | } locks_t;
178 | 
179 | // calibrate_blackhole -- (used in osq_lock)
180 | unsigned long calibrate_blackhole(unsigned long target, unsigned long tokens_low, unsigned long tokens_high, unsigned long core_id, unsigned long NUMTRIES);
181 | 
182 | // evaluate_blackhole -- returns average duration of NUMTRIES
183 | int64_t evaluate_blackhole( const unsigned long tokens_mid, const unsigned long NUMTRIES);
184 | 
185 | // blackhole() -- runs a small loop to consume time (also used in osq_lock)
186 | void blackhole(unsigned long iters);
187 | 
188 | // measure_setup_initialize_lock() -- calls lock-specific setup routine if it exists
189 | void measure_setup_initialize_lock(locks_t * p_locks, pinorder_t * pinorder);
190 | 
191 | // measure_setup_parse_test_args() -- calls lock-specific parsing routine if it exists
192 | void measure_setup_parse_test_args(test_args_t * p_test_args, int argc, char ** argv);
193 | 
194 | // convert the struct timespec to only nanoseconds
195 | unsigned long timespec_to_ns (struct timespec * ts);
196 | 
197 | // selectively disable LSE instructions in outline atomics/libgcc; in measure.c
198 | void handle_disable_outline_atomics_lse(void);
199 | 
200 | #if __GNUC__==1
201 | #define NOINLINE __attribute__((noinline))
202 | #elif __clang__==1
203 | #define NOINLINE __attribute__((noinline))
204 | #else
205 | #define NOINLINE
206 | #endif
207 | 
208 | #if __GNUC__==1
209 | #define NO_UNROLL_LOOP _Pragma("GCC unroll 0")
210 | #elif __clang__==1
211 | #define NO_UNROLL_LOOP _Pragma("clang loop unroll(disable)")
212 | #else
213 | #define NO_UNROLL_LOOP
214 | #endif
215 | 
216 | 
217 | #endif
218 | 
219 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
220 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/include/perf_timer.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2018, ARM Limited. All rights reserved.
  3 |  *
  4 |  * SPDX-License-Identifier:    BSD-3-Clause
  5 |  *
  6 |  * Redistribution and use in source and binary forms, with or without
  7 |  * modification, are permitted provided that the following conditions are met:
  8 |  *
  9 |  * Redistributions of source code must retain the above copyright notice, this
 10 |  * list of conditions and the following disclaimer.
 11 |  *
 12 |  * Redistributions in binary form must reproduce the above copyright notice, this
 13 |  * list of conditions and the following disclaimer in the documentation and/or
 14 |  * other materials provided with the distribution.
 15 |  *
 16 |  * Neither the name of ARM Limited nor the names of its contributors may be used
 17 |  * to endorse or promote products derived from this software without specific
 18 |  * prior written permission.
 19 |  *
 20 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 21 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 22 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 23 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 24 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 25 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 26 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 27 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 28 |  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 29 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 |  *
 31 |  * Authors: Rob Golshan,
 32 |  *          James Yang (James.Yang@arm.com),
 33 |  *          Geoffrey Blake (Geoffrey.Blake@arm.com)
 34 |  */
 35 | 
 36 | /* 
 37 |  * perf_timer.h
 38 |  * Functions to read hardware timers and query timer frequency.
 39 |  * Supports x86 and AArch64 platforms
 40 |  *
 41 |  * Define DEBUG in makefile or here if you desire debug output,
 42 |  * define DDEBUG if you require detailed debug output.
 43 |  */
 44 | 
 45 | #ifndef __PERF_TIMER_H_
 46 | #define __PERF_TIMER_H_
 47 | 
 48 | #include <stdint.h>
 49 | #include <stdlib.h>
 50 | 
 51 | #include <errno.h>
 52 | #include <string.h>
 53 | #include <unistd.h>    /* for access() */
 54 | #include <math.h>
 55 | 
 56 | #include "atomics.h"
 57 | 
 58 | extern __thread uint64_t prev_tsc;
 59 | 
 60 | #define MAX(x, y) (((x) > (y)) ? (x) : (y))
 61 | #define MIN(x, y) (((x) < (y)) ? (x) : (y))
 62 | 
 63 | /* Cautionary note about using the invariant TSC on x86:
 64 |    Depending upon the model of CPU, TSC may
 65 |    not count cycles representing the current
 66 |    operating frequency.  It may, for example,
 67 |    count cycles at the maximum frequency of the
 68 |    device, even if the CPU core is running at a
 69 |    lower frequency, or it may count at a frequency
 70 |    unrelated to the operating frequency.  Use
 71 |    the --estimate-hwtimer-frequency flag to measure
 72 |    the frequency and the --hwtimer-frequency flag to
 73 |    override the value detected by the code below.
 74 |  */
 75 | #ifdef __x86_64__
 76 | static inline uint64_t __attribute__((always_inline))
 77 | rdtsc(void)
 78 | {
 79 |     union {
 80 |         uint64_t tsc_64;
 81 |         struct {
 82 |             uint32_t lo_32;
 83 |             uint32_t hi_32;
 84 |         };
 85 |     } tsc;
 86 | 
 87 |     asm volatile("rdtsc" :
 88 |              "=a" (tsc.lo_32),
 89 |              "=d" (tsc.hi_32));
 90 | 
 91 |     return tsc.tsc_64;
 92 | }
 93 | 
 94 | // rdtscp is serializing; rdtsc is not
 95 | // NOTE: rdtscp can not guarantee subsequent instructions do not begin execution
 96 | // before the timer is read
 97 | static inline uint64_t __attribute__((always_inline))
 98 | rdtscp(void)
 99 | {
100 |     union {
101 |         uint64_t tsc_64;
102 |         struct {
103 |             uint32_t lo_32;
104 |             uint32_t hi_32;
105 |         };
106 |     } tsc;
107 | 
108 |     asm volatile("rdtscp" :
109 |              "=a" (tsc.lo_32),
110 |              "=d" (tsc.hi_32));
111 | 
112 |     return tsc.tsc_64;
113 | }
114 | 
115 | static inline void __attribute__((always_inline))
116 | cpuid(void)
117 | {
118 |     uint32_t a, b, c, d;
119 |     asm volatile("CPUID":
120 |             "=a" (a),
121 |             "=b" (b),
122 |             "=c" (c),
123 |             "=d" (d));
124 | }
125 | 
126 | /* CPUID creates a barrier to avoid out of order execution before rdtsc
127 |  */
128 | static inline uint64_t __attribute__((always_inline))
129 | rdtscp_start(void)
130 | {
131 |     union {
132 |         uint64_t tsc_64;
133 |         struct {
134 |             uint32_t lo_32;
135 |             uint32_t hi_32;
136 |         };
137 |     } tsc;
138 | 
139 |     asm volatile("CPUID\n\t" /* serialize */
140 |             "RDTSC\n\t" /*read clock */
141 |             "mov %%edx, %0\n\t"
142 |             "mov %%eax, %1\n\t":
143 |              "=r" (tsc.hi_32),
144 |              "=r" (tsc.lo_32)
145 |              ::"eax", "ebx", "ecx", "edx");
146 | 
147 |     return tsc.tsc_64;
148 | }
149 | 
150 | /* "RDTSCP instruction waits until all previous instructions have been executed
151 |  * before reading the counter. However, subsequent instructions may begin execution
152 |  * before the read operation is performed.”
153 |  * CPUID creates a barrier to avoid out of order execution
154 |  */
155 | static inline uint64_t __attribute__((always_inline))
156 | rdtscp_end(void)
157 | {
158 |     union {
159 |         uint64_t tsc_64;
160 |         struct {
161 |             uint32_t lo_32;
162 |             uint32_t hi_32;
163 |         };
164 |     } tsc;
165 | 
166 |     asm volatile("RDTSCP\n\t"
167 |             "mov %%edx, %0\n\t"
168 |             "mov %%eax, %1\n\t"
169 |             "CPUID\n\t":
170 |              "=r" (tsc.hi_32),
171 |              "=r" (tsc.lo_32)
172 |              ::"eax", "ebx", "ecx", "edx");
173 | 
174 |     return tsc.tsc_64;
175 | 
176 | }
177 | 
178 | 
179 | static inline uint64_t __attribute__((always_inline))
180 | get_raw_counter(void) {
181 |     return rdtsc();
182 | }
183 | #endif
184 | 
185 | 
186 | #ifdef __aarch64__
187 | static inline uint64_t __attribute__((always_inline))
188 | get_cntvct_el0(void) {
189 |     uint64_t t;
190 |     asm volatile ("ISB; mrs %0, cntvct_el0" : "=r" (t));
191 |     return t;
192 | }
193 | 
194 | 
195 | static inline uint64_t __attribute__((always_inline))
196 | get_raw_counter(void) {
197 |     return get_cntvct_el0();
198 | }
199 | #endif
200 | 
201 | 
202 | static inline void __attribute__((always_inline))
203 | timer_reset_counter()
204 | {
205 | #ifdef __aarch64__
206 |     __asm__ __volatile__ ("isb; mrs %0, cntvct_el0" : "=r" (prev_tsc));
207 | #elif __x86_64__
208 |     prev_tsc = rdtscp();
209 | #endif
210 | }
211 | 
212 | 
213 | /* Standard timer read functions */
214 | static inline uint64_t __attribute__((always_inline))
215 | timer_get_counter()
216 | {
217 |     /* this returns the counter value from a constant-rate timer */
218 | #ifdef __aarch64__
219 |         uint64_t counter_value;
220 |         __asm__ __volatile__ ("isb; mrs %0, cntvct_el0" : "=r" (counter_value));
221 | #elif __x86_64__
222 |     uint64_t counter_value = rdtscp();    // assume constant_tsc
223 | #endif
224 |     return counter_value;
225 | }
226 | 
227 | /* Timer read for when at start of timing block
228 |  */
229 | static inline uint64_t __attribute__((always_inline))
230 | timer_get_counter_start()
231 | {
232 |     /* this returns the counter value from a constant-rate timer */
233 | #ifdef __aarch64__
234 |         uint64_t counter_value;
235 |         __asm__ __volatile__ ("dsb ish; isb; mrs %0, cntvct_el0" : "=r" (counter_value));
236 | #elif __x86_64__
237 |     uint64_t counter_value = rdtscp_start();    // assume constant_tsc
238 | #endif
239 |     return counter_value;
240 | }
241 | 
242 | 
243 | /* Timer read for when at end of timing block
244 |  */
245 | static inline uint64_t __attribute__((always_inline))
246 | timer_get_counter_end()
247 | {
248 |     /* this returns the counter value from a constant-rate timer  */
249 | #ifdef __aarch64__
250 |         uint64_t counter_value;
251 |         __asm__ __volatile__ ("isb; mrs %0, cntvct_el0; isb" : "=r" (counter_value));
252 | #elif __x86_64__
253 |     uint64_t counter_value = rdtscp_end();    // assume constant_tsc
254 | #endif
255 |     return counter_value;
256 | }
257 | 
258 | static inline void __attribute__((always_inline))
259 | timer_reset_all()
260 | {
261 |     timer_reset_counter();
262 | }
263 | 
264 | static inline void __attribute__((always_inline))
265 | timer_init() {
266 | }
267 | 
268 | static inline uint64_t __attribute__((always_inline))
269 | timer_get_timer_freq(void)
270 | {
271 |     extern unsigned long hwtimer_frequency;
272 |     if (hwtimer_frequency) { return hwtimer_frequency; }
273 | 
274 |     uint64_t cnt_freq;
275 | #ifdef __aarch64__
276 |         __asm__ __volatile__ ("isb; mrs %0, cntfrq_el0" : "=r" (cnt_freq));
277 | #elif __x86_64__
278 |     // This code attempts to get the TSC frequency.  The assumption made
279 |     // is TSC frequency equals the CPUFreq cpuinfo_max_freq attribute
280 |     // value, which is the maximum operating frequency of the processor.
281 |     // However, this equality is not always true, and less so in newer CPUs.
282 |     // Also, the actual TSC frequency may not exactly match any nominal
283 |     // frequency attribute value provided by CPUFreq, so the chances of
284 |     // this returning the correct frequency have diminished.
285 | 
286 |     // If the CPUFreq cpuinfo_max_freq attribute is not available, this code
287 |     // then tries to quickly measure it.
288 | 
289 |     // Use --timer-frequency flag to override the frequency value.
290 |     // Use --estimate-timer-frequency to explicitly measure it.
291 | 
292 |     char buf[100];
293 |     FILE * f = fopen("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq", "r");
294 |     if (f == NULL) {
295 |         printf("Failed to open cpuinfo_max_freq, error %s\n",
296 |             strerror(errno));
297 |         uint64_t iterations = 2;
298 |         uint64_t time = 0;
299 |         for (uint64_t i = 0; i < iterations; i++) {
300 |             uint64_t start = rdtscp_start();
301 |             sleep(1);
302 |             uint64_t end = rdtscp_end();
303 |             time += end - start;
304 |         }
305 | 
306 |         // round down cycles
307 |         uint64_t tmp = (time/iterations);
308 |         unsigned long len = log10(tmp);
309 |         double div = pow(10, len-2);
310 |         return floor(tmp/div)*div;
311 |     }
312 |     while (! feof(f) && ! ferror(f)) {
313 |         size_t end = fread(buf, 1, sizeof(buf) - 1, f);
314 |         buf[end] = 0;
315 |     }
316 |     fclose(f);
317 | 
318 |     /* The ACPI cpufreq driver reports 'base' (aka non-turbo) frequency
319 |        in cpuinfo_max_freq while the intel_pstate driver reports the
320 |        turbo frequency. Warn if ACPI cpufreq is not found. */
321 |     if (access("/sys/devices/system/cpu/cpufreq", F_OK)) {
322 |         printf("cpuinfo_max_freq is not from ACPI cpufreq driver! TSC frequency is probably turbo frequency.\n");
323 |     }
324 | 
325 |     cnt_freq = strtoul(buf, NULL, 0);
326 |     cnt_freq = ((cnt_freq + 5000) / 10000) * 10000;    /* round to nearest 10000 kHz */
327 |     cnt_freq *= 1000;    /* convert KHz to Hz */
328 | #endif
329 |     return cnt_freq;
330 | }
331 | 
332 | #define TOKENS_MAX_HIGH    1000000        /* good for ~41500 cntvct cycles */
333 | #define THRESHOLD    1.05            // if the ratio of cycles to do the total eval loop  to  the sum of the individual
334 |                                      // calls (e.g. due to context switch), rerun
335 | 
336 | 
337 | #endif
338 | 
339 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
340 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/include/verbose.h:
--------------------------------------------------------------------------------
 1 | 
 2 | /*
 3 |  * SPDX-FileCopyrightText: Copyright 2024-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 4 |  * SPDX-License-Identifier: BSD-3-Clause
 5 |  */
 6 | 
 7 | #ifndef VERBOSE_H
 8 | #define VERBOSE_H
 9 | 
10 | enum {
11 |     VERBOSE_MORE=3,
12 |     VERBOSE_YES=2,
13 |     VERBOSE_LOW=1,    // default
14 |     VERBOSE_NONE=0    // to-be-implemented
15 | };
16 | 
17 | #endif
18 | 
19 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
20 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/lh_sweepdelay_cfg.yaml:
--------------------------------------------------------------------------------
  1 | # Copyright (c) 2018, ARM Limited. All rights reserved.
  2 | #
  3 | # SPDX-License-Identifier:    BSD-3-Clause
  4 | #
  5 | # Redistribution and use in source and binary forms, with or without
  6 | # modification, are permitted provided that the following conditions are met:
  7 | #
  8 | # Redistributions of source code must retain the above copyright notice, this
  9 | # list of conditions and the following disclaimer.
 10 | #
 11 | # Redistributions in binary form must reproduce the above copyright notice, this
 12 | # list of conditions and the following disclaimer in the documentation and/or
 13 | # other materials provided with the distribution.
 14 | #
 15 | # Neither the name of ARM Limited nor the names of its contributors may be used
 16 | # to endorse or promote products derived from this software without specific
 17 | # prior written permission.
 18 | #
 19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 27 | # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 | #
 30 | # The views and conclusions contained in the software and documentation are those
 31 | # of the authors and should not be interpreted as representing official policies,
 32 | # either expressed or implied, of this project.
 33 | 
 34 | 
 35 | ## Global Settings
 36 | globalcfg:
 37 |     execdir: ../build
 38 |     logfile: lockhammer.csv
 39 | 
 40 | 
 41 | ## Sweep Test Settings
 42 | #
 43 | # Common assumptions for sweep delay test:
 44 | # The system should be able to handle CAS workload at any delay.
 45 | # Using eBPF, we find that Nginx lockref inter-arrival delay is less than 32us,
 46 | # and lockref_get to lockref_put_return delay is less than 2us. Therefore we
 47 | # create this script to sweep <32us delay using -p parameter and keep -c
 48 | # parameter at 1us because eBPF intrinsic overhead is about 1us. We also find
 49 | # the ratio between -c and -p is relatively fixed, e.g. 1:8 ~ 1:16. We choose
 50 | # 1:9 for the last case (-c=3000ns, -p=28000ns).
 51 | #
 52 | ##
 53 | sweeptest:
 54 |     enabled: True
 55 |     safemode: True
 56 |     cmd:
 57 |         - lh_cas_lockref
 58 |     cmd_aarch64:
 59 |     cmd_x86_64:
 60 |     repeat: 9
 61 |     sweepargu: t
 62 |     argumax: 0
 63 |     skipsince: 48
 64 |     skipstep: 8
 65 |     argulist:
 66 |         - a: 5000
 67 |           c: 0ns
 68 |           p: 0ns
 69 |           o: lstopo
 70 |         - a: 5000
 71 |           c: 200ns
 72 |           p: 0ns
 73 |           o: lstopo
 74 |         - a: 5000
 75 |           c: 1000ns
 76 |           p: 0ns
 77 |           o: lstopo
 78 |         - a: 5000
 79 |           c: 200ns
 80 |           p: 1000ns
 81 |           o: lstopo
 82 |         - a: 5000
 83 |           c: 1000ns
 84 |           p: 1000ns
 85 |           o: lstopo
 86 |         - a: 5000
 87 |           c: 1000ns
 88 |           p: 2000ns
 89 |           o: lstopo
 90 |         - a: 5000
 91 |           c: 1000ns
 92 |           p: 3000ns
 93 |           o: lstopo
 94 |         - a: 5000
 95 |           c: 1000ns
 96 |           p: 4000ns
 97 |           o: lstopo
 98 |         - a: 5000
 99 |           c: 1000ns
100 |           p: 5000ns
101 |           o: lstopo
102 |         - a: 5000
103 |           c: 1000ns
104 |           p: 6000ns
105 |           o: lstopo
106 |         - a: 5000
107 |           c: 1000ns
108 |           p: 7000ns
109 |           o: lstopo
110 |         - a: 5000
111 |           c: 1000ns
112 |           p: 8000ns
113 |           o: lstopo
114 |         - a: 5000
115 |           c: 1000ns
116 |           p: 9000ns
117 |           o: lstopo
118 |         - a: 5000
119 |           c: 1000ns
120 |           p: 10000ns
121 |           o: lstopo
122 |         - a: 5000
123 |           c: 1000ns
124 |           p: 11000ns
125 |           o: lstopo
126 |         - a: 5000
127 |           c: 1000ns
128 |           p: 12000ns
129 |           o: lstopo
130 |         - a: 5000
131 |           c: 1000ns
132 |           p: 13000ns
133 |           o: lstopo
134 |         - a: 5000
135 |           c: 1000ns
136 |           p: 14000ns
137 |           o: lstopo
138 |         - a: 5000
139 |           c: 1000ns
140 |           p: 15000ns
141 |           o: lstopo
142 |         - a: 5000
143 |           c: 1000ns
144 |           p: 16000ns
145 |           o: lstopo
146 |         - a: 5000
147 |           c: 2000ns
148 |           p: 20000ns
149 |           o: lstopo
150 |         - a: 5000
151 |           c: 2000ns
152 |           p: 24000ns
153 |           o: lstopo
154 |         - a: 5000
155 |           c: 3000ns
156 |           p: 28000ns
157 |           o: lstopo
158 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/lh_sweeptest_cfg.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018, ARM Limited. All rights reserved.
 2 | #
 3 | # SPDX-License-Identifier:    BSD-3-Clause
 4 | #
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # Redistributions in binary form must reproduce the above copyright notice, this
12 | # list of conditions and the following disclaimer in the documentation and/or
13 | # other materials provided with the distribution.
14 | #
15 | # Neither the name of ARM Limited nor the names of its contributors may be used
16 | # to endorse or promote products derived from this software without specific
17 | # prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27 | # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # The views and conclusions contained in the software and documentation are those
31 | # of the authors and should not be interpreted as representing official policies,
32 | # either expressed or implied, of this project.
33 | 
34 | 
35 | ## Global Settings
36 | globalcfg:
37 |     execdir: ../build
38 |     logfile: lockhammer.csv
39 | 
40 | 
41 | ## Sweep Test Settings
42 | #
43 | # Common assumptions for sweeptest:
44 | # CPU Frequency = 2GHz
45 | # CPU Cycle = 0.5ns
46 | # Remote DRAM (x86_64 or aarch64, NUMA) = 100ns ~ 300ns
47 | # Page Fault (4KB, x86_64) = 2000 cycles = 1000ns
48 | # Therefore we set critical section delay (-c) to 0ns, 200ns and 1000ns
49 | # We also set post critical section delay (-p) to 5x of (-c) value
50 | # By default, sweeptest will sweep sweepargu (-t) from 1 to max core count
51 | #
52 | ##
53 | sweeptest:
54 |     enabled: True
55 |     safemode: True
56 |     cmd:
57 |         - lh_cas_event_mutex
58 |         - lh_cas_lockref
59 |         - lh_cas_rw_lock
60 |         - lh_empty
61 |         - lh_event_mutex
62 |         - lh_incdec_refcount
63 |         - lh_jvm_objectmonitor
64 |         - lh_osq_lock
65 |         - lh_queued_spinlock
66 |         - lh_swap_mutex
67 |         - lh_tbb_spin_rw_mutex
68 |         - lh_ticket_spinlock
69 |         - lh_clh_spinlock
70 |     cmd_aarch64: [lh_hybrid_spinlock, lh_hybrid_spinlock_fastdequeue]
71 |     cmd_x86_64:
72 |     repeat: 9
73 |     sweepargu: t
74 |     argumax: 0
75 |     skipsince: 48
76 |     skipstep: 8
77 |     argulist:
78 |         - a: 5000
79 |           c: 0ns
80 |           p: 0ns
81 |           o: lstopo
82 |         - a: 5000
83 |           c: 200ns
84 |           p: 0ns
85 |           o: lstopo
86 |         - a: 5000
87 |           c: 1000ns
88 |           p: 0ns
89 |           o: lstopo
90 |         - a: 5000
91 |           c: 200ns
92 |           p: 1000ns
93 |           o: lstopo
94 |         - a: 5000
95 |           c: 1000ns
96 |           p: 5000ns
97 |           o: lstopo
98 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/lh_unittest_cfg.yaml:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2018, ARM Limited. All rights reserved.
 2 | #
 3 | # SPDX-License-Identifier:    BSD-3-Clause
 4 | #
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are met:
 7 | #
 8 | # Redistributions of source code must retain the above copyright notice, this
 9 | # list of conditions and the following disclaimer.
10 | #
11 | # Redistributions in binary form must reproduce the above copyright notice, this
12 | # list of conditions and the following disclaimer in the documentation and/or
13 | # other materials provided with the distribution.
14 | #
15 | # Neither the name of ARM Limited nor the names of its contributors may be used
16 | # to endorse or promote products derived from this software without specific
17 | # prior written permission.
18 | #
19 | # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 | # AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 | # DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 | # SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26 | # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
27 | # TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 | # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | #
30 | # The views and conclusions contained in the software and documentation are those
31 | # of the authors and should not be interpreted as representing official policies,
32 | # either expressed or implied, of this project.
33 | 
34 | 
35 | ## Global Settings
36 | globalcfg:
37 |     execdir: ../build
38 |     logfile: lockhammer.csv
39 | 
40 | 
41 | ## Unittest Settings
42 | #
43 | # Common assumptions for unittest:
44 | # Only cover functional correctness, use as least time as possible
45 | # Normal runtime should be around 1 minute
46 | # t=0 means maximum core count
47 | # o=lstopo means using lstopo output as preferred thread pinning order
48 | #
49 | ##
50 | unittest:
51 |     enabled: True
52 |     safemode: True
53 |     testcase:
54 |         - cmd:
55 |             - lh_cas_event_mutex
56 |             - lh_cas_lockref
57 |             - lh_cas_rw_lock
58 |             - lh_empty
59 |             - lh_event_mutex
60 |             - lh_incdec_refcount
61 |             - lh_jvm_objectmonitor
62 |             - lh_osq_lock
63 |             - lh_queued_spinlock
64 |             - lh_swap_mutex
65 |             - lh_tbb_spin_rw_mutex
66 |             - lh_ticket_spinlock
67 |           cmd_aarch64: [lh_hybrid_spinlock, lh_hybrid_spinlock_fastdequeue]
68 |           cmd_x86_64:
69 |           t: [1, 0]
70 |           a: 100
71 |           c: [0ns, 50ns]
72 |           p: [0ns, 50ns]
73 | 
74 |         - cmd: lh_osq_lock
75 |           t: [1, 0]
76 |           a: 100
77 |           c: 50ns
78 |           p: 0ns
79 |           o: lstopo
80 |           extra:
81 |               u: 10
82 |               s: 2
83 | 
84 |         - cmd: lh_tbb_spin_rw_mutex
85 |           t: [1, 0]
86 |           a: 100
87 |           c: 50ns
88 |           p: 0ns
89 |           i: 1
90 |           o: '0:1:2:3'
91 |           extra:
92 |               r: 4
93 |               m: 1
94 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/lockhammer-all.csv.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/benchmarks/lockhammer/scripts/lockhammer-all.csv.xz


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/lockhammer-jupyter-notebook.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# License"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "Copyright (c) 2018, ARM Limited. All rights reserved.\n",
 15 |     "\n",
 16 |     "SPDX-License-Identifier: BSD-3-Clause\n",
 17 |     "\n",
 18 |     "Redistribution and use in source and binary forms, with or without\n",
 19 |     "modification, are permitted provided that the following conditions are met:\n",
 20 |     "\n",
 21 |     "Redistributions of source code must retain the above copyright notice, this\n",
 22 |     "list of conditions and the following disclaimer.\n",
 23 |     "\n",
 24 |     "Redistributions in binary form must reproduce the above copyright notice, this\n",
 25 |     "list of conditions and the following disclaimer in the documentation and/or\n",
 26 |     "other materials provided with the distribution.\n",
 27 |     "\n",
 28 |     "Neither the name of ARM Limited nor the names of its contributors may be used\n",
 29 |     "to endorse or promote products derived from this software without specific\n",
 30 |     "prior written permission.\n",
 31 |     "\n",
 32 |     "THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\n",
 33 |     "AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n",
 34 |     "IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\n",
 35 |     "DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\n",
 36 |     "FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\n",
 37 |     "DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\n",
 38 |     "SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\n",
 39 |     "CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR\n",
 40 |     "TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n",
 41 |     "OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "markdown",
 46 |    "metadata": {},
 47 |    "source": [
 48 |     "# Prerequisite Libraries"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "markdown",
 53 |    "metadata": {},
 54 |    "source": [
 55 |     "- ## python3\n",
 56 |     "`apt install python3 python3-pip`\n",
 57 |     "- ## jupyter-notebook\n",
 58 |     "`apt install jupyter-notebook`\n",
 59 |     "- ## matplotlib seaborn pandas numpy\n",
 60 |     "`pip3 install matplotlib seaborn pandas numpy`"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "markdown",
 65 |    "metadata": {},
 66 |    "source": [
 67 |     "# Matplotlib and Seaborn Settings"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {
 74 |     "collapsed": false
 75 |    },
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "%matplotlib inline\n",
 79 |     "import matplotlib\n",
 80 |     "import matplotlib.pyplot as plt\n",
 81 |     "import seaborn as sns\n",
 82 |     "import pandas as pd\n",
 83 |     "import numpy as np\n",
 84 |     "import warnings\n",
 85 |     "warnings.filterwarnings('ignore')\n",
 86 |     "sns.set()\n",
 87 |     "\n",
 88 |     "# default 12 colors and markers\n",
 89 |     "default_palette = [\n",
 90 |     "    '#765f97', #Purple\n",
 91 |     "    '#1b9e77', #Dark Green\n",
 92 |     "    '#8c5c20', #Brown\n",
 93 |     "    '#0038bd', #Blue\n",
 94 |     "    '#cf364a', #Red\n",
 95 |     "    '#343434', #Jet Black\n",
 96 |     "    '#878681', #Titanium Gray\n",
 97 |     "    '#f561dd', #Magenta\n",
 98 |     "    '#a6cee3', #Calico Blue\n",
 99 |     "    '#dea0dd', #Plum\n",
100 |     "    '#7fc97f', #Grass Green\n",
101 |     "    '#fdc086', #Pale Yellow\n",
102 |     "    ]\n",
103 |     "\n",
104 |     "default_markers=['^', '*', 'd', 'x', 'D', 'o', 'v', 's', 'p', '>', '<', '.']\n",
105 |     "default_marker_size = 100\n",
106 |     "\n",
107 |     "# seaborn settings\n",
108 |     "sns.set(context=\"notebook\", style=\"darkgrid\", font_scale=2, rc={\"lines.linewidth\": 3, \"xtick.major.size\": 4, \"ytick.major.size\": 4})\n",
109 |     "sns.set_palette(default_palette)\n",
110 |     "sns.palplot(sns.color_palette())"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "markdown",
115 |    "metadata": {},
116 |    "source": [
117 |     "# Lockhammer Common Settings"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": null,
123 |    "metadata": {
124 |     "collapsed": true
125 |    },
126 |    "outputs": [],
127 |    "source": [
128 |     "# common variables\n",
129 |     "lock_workloads = [\"cas_event_mutex\", \"cas_lockref\", \"cas_rw_lock\", \"incdec_refcount\", \"osq_lock\", \"queued_spinlock\", \"ticket_spinlock\", \"jvm_objectmonitor\", \"swap_mutex\", \"tbb_spin_rw_mutex\", \"event_mutex\", \"empty\"]\n",
130 |     "lock_hosts = [\"x86-1\", \"x86-2\", \"x86-3\", \"x86-4\"]\n",
131 |     "lock_parameters = [[\"-c\", \"200ns\", \"-p\", \"1000ns\"]]\n",
132 |     "exectx_count_max = 88\n",
133 |     "exectx_count_gap = 4\n",
134 |     "lh_csv_result_header = [\"num_threads\", \"avg_exectx\", \"scheduled_time_per_access\", \"real_time_per_access\", \"access_rate\", \"avg_lock_depth\",\n",
135 |     "                        \"date\", \"fqdn\", \"exec\", \"t\", \"tv\", \"a\", \"av\", \"c\", \"cv\", \"p\", \"pv\", \"o\", \"ov\"]\n",
136 |     "\n",
137 |     "param_name = \"contended_system_latency (ns)\"\n",
138 |     "lock_yaxis_name = param_name\n",
139 |     "\n",
140 |     "# lockhammer-all.csv.xz is a xz-compressed aggregated file of different machines' raw csv result\n",
141 |     "raw_csv_filename = \"lockhammer-all.csv.xz\"\n",
142 |     "raw_df = pd.read_csv(raw_csv_filename, sep=', ', header=None, names=lh_csv_result_header, engine='python')\n",
143 |     "\n",
144 |     "# common functions\n",
145 |     "def plot_lines_only(dataf):\n",
146 |     "    # each test repeat 9 times, but we only plot the median latency (access_rate)\n",
147 |     "    median_list = []\n",
148 |     "    for hst, grp0 in dataf.groupby(\"host\"):\n",
149 |     "        for nth, grp1 in grp0.groupby(\"num_threads\"):\n",
150 |     "            median_list.append({\"host\": hst, \"num_threads\": nth, param_name: grp1.median()[param_name]})\n",
151 |     "    median_df = pd.DataFrame(median_list)\n",
152 |     "    \n",
153 |     "    from matplotlib.colors import ListedColormap\n",
154 |     "    cmap = ListedColormap(sns.color_palette(default_palette).as_hex())\n",
155 |     "    for i, (hst, grp2) in enumerate(median_df.groupby(\"host\")):\n",
156 |     "        plt.plot(\"num_threads\", param_name, data=grp2, color=cmap(i))"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "markdown",
161 |    "metadata": {},
162 |    "source": [
163 |     "# Lockhammer all workloads, raw contended system latency, 2018.11.06."
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": null,
169 |    "metadata": {
170 |     "collapsed": false,
171 |     "scrolled": false
172 |    },
173 |    "outputs": [],
174 |    "source": [
175 |     "# use lmplot (not catplot) to plot raw system latencies\n",
176 |     "for param in lock_parameters:\n",
177 |     "    for workload in lock_workloads:\n",
178 |     "        tidy_df = pd.DataFrame()\n",
179 |     "        for sut in sorted(lock_hosts):\n",
180 |     "            host_df = raw_df.loc[(raw_df['fqdn'].str.startswith(sut) & raw_df['exec'].str.endswith(workload))]\n",
181 |     "            test_df = host_df.loc[(host_df['cv'] == param[1]) & (host_df['pv'] == param[3])]\n",
182 |     "            copy_df = test_df.copy()\n",
183 |     "            copy_df['host'] = sut\n",
184 |     "            all_df = pd.melt(copy_df, id_vars=['host', 'num_threads'], value_vars=['access_rate'], value_name=param_name)\n",
185 |     "            tidy_df = pd.concat([tidy_df, all_df])\n",
186 |     "        \n",
187 |     "        # because lmplot doesn't plot lines, we have to use plot_lines_only to plot them\n",
188 |     "        sns.lmplot(x=\"num_threads\", y=param_name, hue=\"host\", data=tidy_df, x_estimator=np.median, x_ci=50,\n",
189 |     "                          height=10, aspect=2, fit_reg=False, markers=default_markers[:len(lock_hosts)], scatter_kws={\"s\": default_marker_size})\n",
190 |     "        \n",
191 |     "        # plot lines which connect lmplot dots\n",
192 |     "        plot_lines_only(tidy_df)\n",
193 |     "        \n",
194 |     "        # change title / axis and save the figure\n",
195 |     "        plt.title(\"lockhammer workload: {}, critical_time: {}, parallel_time: {}\".format(workload, param[1], param[3]))\n",
196 |     "        plt.xlim(0, exectx_count_max)\n",
197 |     "        plt.xticks(np.arange(0, exectx_count_max+1, exectx_count_gap))\n",
198 |     "        plt.savefig(\"github_lockhammer_all_common_20181106_{}_{}_{}.png\".format(workload, param[1], param[3]))"
199 |    ]
200 |   },
201 |   {
202 |    "cell_type": "code",
203 |    "execution_count": null,
204 |    "metadata": {
205 |     "collapsed": true
206 |    },
207 |    "outputs": [],
208 |    "source": []
209 |   }
210 |  ],
211 |  "metadata": {
212 |   "kernelspec": {
213 |    "display_name": "Python 3",
214 |    "language": "python",
215 |    "name": "python3"
216 |   },
217 |   "language_info": {
218 |    "codemirror_mode": {
219 |     "name": "ipython",
220 |     "version": 3
221 |    },
222 |    "file_extension": ".py",
223 |    "mimetype": "text/x-python",
224 |    "name": "python",
225 |    "nbconvert_exporter": "python",
226 |    "pygments_lexer": "ipython3",
227 |    "version": "3.6.3"
228 |   }
229 |  },
230 |  "nbformat": 4,
231 |  "nbformat_minor": 2
232 | }
233 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/run-tests.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # SPDX-FileCopyrightText: Copyright 2019-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | 
  6 | # This script invokes lockhammer tests.
  7 | # This script is meant to be edited for customizing which tests to run.
  8 | # Edit the *_LIST variables below to choose the configuration combinations.
  9 | # A json file will be made for each variant and test combination.
 10 | 
 11 | set -e
 12 | 
 13 | usage() {
 14 | cat<<USAGE
 15 | 
 16 | run-tests.sh [options]
 17 | 
 18 | options:
 19 | 
 20 | -n       do a dry run
 21 | -P skip  processor skip step above 8 CPUs (default $CPU_SKIP)
 22 |          For example, -P 16 will measure 2, 4, 8, 24, 40...
 23 | -T sec   duration of each measurement in seconds (default $DURATION_SECONDS)
 24 | -h       print this usage help message
 25 | 
 26 | USAGE
 27 | 	exit 1
 28 | }
 29 | 
 30 | CPU_SKIP=8
 31 | DRY_RUN=0
 32 | DURATION_SECONDS=0.5
 33 | IGNORE_UNKNOWN_SCALING_GOVERNOR=-Y
 34 | 
 35 | shopt -s extglob
 36 | 
 37 | while getopts ":nP:T:h" name; do
 38 | 	case "${name}" in
 39 | 		n)	DRY_RUN=1
 40 | 			;;
 41 | 		P)	CPU_SKIP=${OPTARG}
 42 | 			;;
 43 | 		T)	DURATION_SECONDS=${OPTARG}
 44 | 			;;
 45 | 		h)	usage
 46 | 			;;
 47 | 		:)	>&2 echo "ERROR: flag -$OPTARG required an argument, but none was given"
 48 | 			usage
 49 | 			;;
 50 | 		*)	echo name=$name, OPTARG=$OPTARG
 51 | 			usage
 52 | 			;;
 53 | 	esac
 54 | done
 55 | 
 56 | shift $((OPTIND-1))
 57 | 
 58 | # make the list of build variants to run; use # to comment out variant
 59 | 
 60 | # TODO: update lists by arch
 61 | 
 62 | VARIANT_LIST=$(grep -v -E '\#|^$' <<'EOF1'
 63 | builtin.cond_load.relax_empty
 64 | builtin.cond_load.relax_nothing
 65 | builtin.cond_load.relax_pause
 66 | builtin.relax_empty
 67 | builtin.relax_nothing
 68 | builtin.relax_pause
 69 | cond_load.relax_empty
 70 | cond_load.relax_nothing
 71 | cond_load.relax_pause
 72 | relax_empty
 73 | relax_nothing
 74 | relax_pause
 75 | 
 76 | 
 77 | #builtin.relax_nothing
 78 | #builtin.relax_isb
 79 | #builtin.cond_load.relax_nothing
 80 | #builtin.cond_load.relax_isb
 81 | 
 82 | #lse.builtin.relax_nothing
 83 | #lse.builtin.relax_isb
 84 | #lse.builtin.cond_load.relax_nothing
 85 | #lse.builtin.cond_load.relax_isb
 86 | EOF1
 87 | )
 88 | 
 89 | # make the list of tests to run; use # to comment out test
 90 | 
 91 | TEST_LIST=$(grep -v -E '\#|^$' <<'EOF2'
 92 | lh_cas_event_mutex
 93 | lh_cas_lockref
 94 | lh_cas_rw_lock
 95 | #lh_clh_spinlock
 96 | #lh_empty
 97 | #lh_event_mutex
 98 | #lh_hybrid_spinlock
 99 | #lh_hybrid_spinlock_fastdequeue
100 | lh_incdec_refcount
101 | lh_jvm_objectmonitor
102 | lh_osq_lock
103 | #lh_queued_spinlock
104 | #lh_swap_mutex
105 | #lh_tbb_spin_rw_mutex
106 | lh_ticket_spinlock
107 | EOF2
108 | )
109 | 
110 | CRIT_NS_LIST=$(grep -v -E '\#|^$' <<'EOF_CRIT_NS'
111 | 0
112 | 500
113 | 1000
114 | EOF_CRIT_NS
115 | )
116 | 
117 | PAR_NS_LIST=$(grep -v -E '\#|^$' <<'EOF_PAR_NS'
118 | 0
119 | 500
120 | 1000
121 | 2000
122 | 4000
123 | EOF_PAR_NS
124 | )
125 | 
126 | PAR=""
127 | for a in $PAR_NS_LIST; do PAR+="-p${a}ns "; done
128 | 
129 | CRIT=""
130 | for a in $CRIT_NS_LIST; do CRIT+="-c${a}ns "; done
131 | 
132 | 
133 | # check that a hugepage is available
134 | #HUGEPAGE_SIZE=32MB
135 | #HUGEPAGE_SIZE_KB=$((32*1024))
136 | HUGEPAGE_SIZE=1GB
137 | HUGEPAGE_SIZE_KB=$((1024*1024))
138 | HUGEPAGES_DIR=/sys/kernel/mm/hugepages/hugepages-${HUGEPAGE_SIZE_KB}kB
139 | FREE_HUGEPAGES_FILE=$HUGEPAGES_DIR/free_hugepages
140 | NR_HUGEPAGES_FILE=$HUGEPAGES_DIR/nr_hugepages
141 | NR_HUGEPAGES=$(cat "$NR_HUGEPAGES_FILE")
142 | NR_HUGEPAGES_PLUS_ONE=$((NR_HUGEPAGES+1))
143 | if [ ! -e "$FREE_HUGEPAGES_FILE" ] || [ $(cat "$FREE_HUGEPAGES_FILE") -eq 0 ]; then
144 | 	echo "ERROR: no free $HUGEPAGE_SIZE hugepages.  Perhaps try running:"
145 | 	echo "echo $NR_HUGEPAGES_PLUS_ONE | sudo tee -a $NR_HUGEPAGES_FILE"
146 | 	exit -1
147 | fi
148 | HUGEPAGE_FLAGS="--hugepage-size $HUGEPAGE_SIZE"
149 | 
150 | 
151 | # determine cpuorder file to use based on hostname.
152 | HOSTNAME_S=$(hostname -s)
153 | CPUORDER_FLAGS=
154 | if [ -e hostname_to_cpuorder_type.sh ]; then
155 | 	. hostname_to_cpuorder_type.sh
156 | 
157 | 	CPUORDER_TYPE=$(hostname_to_cpuorder_type $HOSTNAME_S)
158 | 	CPUORDER=cpuorders/$CPUORDER_TYPE.cpuorder
159 | 
160 | 	if [ ! -e "$CPUORDER" ]; then
161 | 		echo "ERROR:  $CPUORDER does not exist!"
162 | 		exit -1
163 | 	fi
164 | 
165 | 	CPUORDER_FLAGS="-C $CPUORDER"
166 | fi
167 | 
168 | # compute the number of threads using the number of available processors
169 | NPROC=$(nproc)
170 | TLIST=
171 | for num_threads in 2 4 $(eval echo "{8..$NPROC..$CPU_SKIP}")
172 | do
173 | 	if [ $num_threads -gt $NPROC ]; then
174 | 		break
175 | 	fi
176 | 
177 | 	TLIST+="-t $num_threads "
178 | done
179 | 
180 | 
181 | # compute number of test and variants
182 | NUM_TESTS=$(echo $TEST_LIST | wc -w)
183 | NUM_VARIANTS=$(echo $VARIANT_LIST | wc -w)
184 | #echo NUM_VARIANTS=$NUM_VARIANTS NUM_TESTS=$NUM_TESTS
185 | NUM_TEST_AND_VARIANTS=$((NUM_TESTS*NUM_VARIANTS))
186 | TEST_AND_VARIANT_COUNT=0
187 | #echo NUM_TEST_AND_VARIANTS=$NUM_TEST_AND_VARIANTS
188 | 
189 | #exit 0
190 | 
191 | # change newline to space for the summary
192 | TEST_LIST=${TEST_LIST//$'\n'/ }
193 | VARIANT_LIST=${VARIANT_LIST//$'\n'/ }
194 | PAR_NS_LIST=${PAR_NS_LIST//$'\n'/ }
195 | CRIT_NS_LIST=${CRIT_NS_LIST//$'\n'/ }
196 | 
197 | cat<<SUMMARY
198 | hostname            = $HOSTNAME_S
199 | cpuorder            = $CPUORDER
200 | num_threads         = ${TLIST//-t /}
201 | variants            = $VARIANT_LIST
202 | tests               = $TEST_LIST
203 | critical times (ns) = $CRIT_NS_LIST
204 | parallel times (ns) = $PAR_NS_LIST
205 | duration (sec)      = $DURATION_SECONDS
206 | hugepage flags      = $HUGEPAGE_FLAGS
207 | SUMMARY
208 | 
209 | echo
210 | echo ------------------------------------------------------
211 | echo beginning measurements at: $(date)
212 | echo ------------------------------------------------------
213 | 
214 | START_EPOCHSECONDS=$EPOCHSECONDS
215 | 
216 | for BUILD_VARIANT in $VARIANT_LIST
217 | do
218 | 
219 | for test in $TEST_LIST
220 | do
221 | 
222 | 	TEST_AND_VARIANT_COUNT=$((TEST_AND_VARIANT_COUNT+1))
223 | 	echo
224 | 	echo running $TEST_AND_VARIANT_COUNT of $NUM_TEST_AND_VARIANTS test+variant combinations
225 | 	echo
226 | 
227 | 	EXE=build.$BUILD_VARIANT/$test
228 | 	JSON=$HOSTNAME_S.$test.$BUILD_VARIANT.json
229 | 	CMD="$EXE $IGNORE_UNKNOWN_SCALING_GOVERNOR $PAR $CRIT -T $DURATION_SECONDS $CPUORDER_FLAGS $HUGEPAGE_FLAGS $TLIST --json $JSON"
230 | 
231 | 	if [ ! -x "$EXE" ]; then
232 | 		echo ERROR: $EXE is not found or not executable, skipping test
233 | 		continue
234 | 	fi
235 | 
236 | 	if [ -e "$JSON" ]; then
237 | 		echo ERROR: $JSON already exists, will not overwrite, skipping test
238 | 		continue
239 | 	fi
240 | 
241 | 	echo
242 | 	if [ $DRY_RUN -eq 0 ]; then
243 | 		echo $CMD
244 | 		echo
245 | 		$CMD
246 | 	else
247 | 		echo This is a DRY RUN -- the following command will not be executed:
248 | 		echo
249 | 		echo $CMD
250 | 	fi
251 | done
252 | done
253 | 
254 | FINISH_EPOCHSECONDS=$EPOCHSECONDS
255 | ELAPSED_SECONDS=$((FINISH_EPOCHSECONDS-START_EPOCHSECONDS))
256 | if [ $ELAPSED_SECONDS -ge 86400 ]; then
257 | ELAPSED_TIME=$(date -u -d @$ELAPSED_SECONDS +%j:%T)
258 | else
259 | ELAPSED_TIME=$(date -u -d @$ELAPSED_SECONDS +%T)
260 | fi
261 | 
262 | echo
263 | echo ------------------------------------------------------
264 | echo finished measurements at: $(date)
265 | echo elapsed time: $ELAPSED_TIME
266 | echo ------------------------------------------------------
267 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/run_sweep_delay.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2017, The Linux Foundation. All rights reserved.
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are
 7 | # met:
 8 | #     * Redistributions of source code must retain the above copyright
 9 | #       notice, this list of conditions and the following disclaimer.
10 | #     * Redistributions in binary form must reproduce the above
11 | #       copyright notice, this list of conditions and the following
12 | #       disclaimer in the documentation and/or other materials provided
13 | #       with the distribution.
14 | #     * Neither the name of The Linux Foundation nor the names of its
15 | #       contributors may be used to endorse or promote products derived
16 | #       from this software without specific prior written permission.
17 | # 
18 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
19 | # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
21 | # ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22 | # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 | # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | nohup sudo ./test_lockhammer.py lh_sweepdelay_cfg.yaml &
31 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/runall.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2017, The Linux Foundation. All rights reserved.
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are
 7 | # met:
 8 | #     * Redistributions of source code must retain the above copyright
 9 | #       notice, this list of conditions and the following disclaimer.
10 | #     * Redistributions in binary form must reproduce the above
11 | #       copyright notice, this list of conditions and the following
12 | #       disclaimer in the documentation and/or other materials provided
13 | #       with the distribution.
14 | #     * Neither the name of The Linux Foundation nor the names of its
15 | #       contributors may be used to endorse or promote products derived
16 | #       from this software without specific prior written permission.
17 | # 
18 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
19 | # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
21 | # ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22 | # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 | # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | nohup sudo ./test_lockhammer.py lh_sweeptest_cfg.yaml &
31 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/show-per-thread-lock-acquires.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # SPDX-FileCopyrightText: Copyright 2019-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
 4 | # SPDX-License-Identifier: BSD-3-Clause
 5 | 
 6 | # This script shows the per-thread fairness of lock acquires in the result json(s).
 7 | 
 8 | # show-per-thread-lock-acquires.sh result1.json [result2.json ...]
 9 | 
10 | # add this to filter only one set of crit/par
11 | #.results[]|select(.nominal_critical==0 and .nominal_parallel==0)|
12 | 
13 | read -r -d '' CMD <<'EOF'
14 | .results[]|"\(.nominal_critical)\t\(.nominal_parallel)\t\(.num_threads)\t\(.full_concurrency_fraction*10000|round/10000)\t\(.lock_acquires_mean | round )\t\(.lock_acquires_stddev_over_mean * 10000 | round / 10000)\t\(.per_thread_stats | map(.lock_acquires) | sort | join(","))"
15 | EOF
16 | 
17 | #echo "$CMD"
18 | #exit
19 | 
20 | (
21 | echo -e "crit\tpar\tnthrds\tfcf\tlock_acquires_mean\tlock_acquires_stddev/mean\tlock_acquires_each_thread\n"
22 | jq -r "$CMD" "$@"
23 | ) | column -t
24 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/sweep.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Copyright (c) 2017, The Linux Foundation. All rights reserved.
 4 | # 
 5 | # Redistribution and use in source and binary forms, with or without
 6 | # modification, are permitted provided that the following conditions are
 7 | # met:
 8 | #     * Redistributions of source code must retain the above copyright
 9 | #       notice, this list of conditions and the following disclaimer.
10 | #     * Redistributions in binary form must reproduce the above
11 | #       copyright notice, this list of conditions and the following
12 | #       disclaimer in the documentation and/or other materials provided
13 | #       with the distribution.
14 | #     * Neither the name of The Linux Foundation nor the names of its
15 | #       contributors may be used to endorse or promote products derived
16 | #       from this software without specific prior written permission.
17 | # 
18 | # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
19 | # WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
20 | # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
21 | # ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
22 | # BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | # CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | # SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
25 | # BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 | # WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
27 | # OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
28 | # IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 | 
30 | cores=$(grep -c "^processor" /proc/cpuinfo)
31 | cores_q1=$(($cores / 4))
32 | cores_q2=$(($cores / 2))
33 | cores_q3=$(($cores_q1 + $cores_q2))
34 | cores_all="`seq 48` `seq 8 8 $(($cores))` $cores_q1 $cores_q2 $cores_q3 $cores"
35 | cores_sort=$(echo $cores_all | tr ' ' '\n' | sort -nu)
36 | for c in $cores_sort
37 | do
38 | 	if (( $c <= $cores ))
39 | 	then
40 | 		acquires=50000
41 | 		if (( $c > 8 ))
42 | 		then
43 | 			acquires=$((${acquires}*8/$c))
44 | 			if (( $acquires < 1000 ))
45 | 			then
46 | 				acquires=1000
47 | 			fi
48 | 		fi
49 | 
50 | 		echo Test: ${1} CPU: exectx=$c Date: `date` 1>&2
51 | 		sudo ../build/lh_${1} -t $c -a ${acquires} -c ${2} -p ${3}
52 | 		sleep 5s
53 | 	fi
54 | done
55 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/scripts/view-results-json.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # SPDX-FileCopyrightText: Copyright 2019-2025 Arm Limited and/or its affiliates <open-source-office@arm.com>
  4 | # SPDX-License-Identifier: BSD-3-Clause
  5 | 
  6 | # This script displays the values from one or more lockhammer json files in a table format using jq.
  7 | 
  8 | # XXX: can't differentiate between ns vs. inst for crit/par; please select only using the same units!
  9 | 
 10 | SORT_STRING='.num_threads'
 11 | REVERSE=0
 12 | DUMP_DATA=0
 13 | 
 14 | declare -a CRIT
 15 | declare -a PAR
 16 | declare -a NUM_THREADS
 17 | declare -a VARIANT_NAMES
 18 | 
 19 | usage() {
 20 | cat<<"USAGE"
 21 | 
 22 | ./view-results-json.sh [options] json [json ...]
 23 | 
 24 | select options:
 25 | -c crit           nominal critical time/inst parameter (repeatable)
 26 | -p par            nominal parallel time/inst parameter (repeatable)
 27 | -t num_threads    number of threads (repeatable)
 28 | -v variant_name   variant name (repeatable)
 29 | 
 30 | sort options:
 31 | -s sort_string    sort string (default is by '.num_threads')
 32 | -s help           print short header to .key mapping
 33 | -r                reverse the sort
 34 | 
 35 | output options:
 36 | -D                dump the records in a json array
 37 | 
 38 | -h                print this usage help message
 39 | 
 40 | 
 41 | Example:
 42 | 
 43 | # list all data with threads=8, parallel=1000 or parallel=500, and critical=0
 44 | # from files *osq_lock*.json, sort by overhead %
 45 | 
 46 | ./view-results-json.sh -s overhead_% -t 8 -p 1000 -p 500 -c 0 *osq_lock*.json
 47 | 
 48 | USAGE
 49 | 	exit 1
 50 | }
 51 | 
 52 | 
 53 | shopt -s extglob
 54 | 
 55 | while getopts ":c:p:t:v:s:rDh" name; do
 56 | 	case "${name}" in
 57 | 		c)	CRIT+=(${OPTARG})
 58 | 			;;
 59 | 		p)	PAR+=(${OPTARG})
 60 | 			;;
 61 | 		t)	NUM_THREADS+=(${OPTARG})
 62 | 			;;
 63 | 		v)	VARIANT_NAMES+=(${OPTARG})
 64 | 			;;
 65 | 		s)	SORT_STRING=${OPTARG}
 66 | 			;;
 67 | 		r)	REVERSE=1
 68 | 			;;
 69 | 		D)	DUMP_DATA=1
 70 | 			;;
 71 | 		h)	usage
 72 | 			;;
 73 | 		:)	>&2 echo "ERROR: flag -$OPTARG required an argument, but none was given"
 74 | 			usage
 75 | 			;;
 76 | 		*)	echo "ERROR: unknown flag name=$name, OPTARG=$OPTARG"
 77 | 			usage
 78 | 			;;
 79 | 	esac
 80 | done
 81 | 
 82 | shift $((OPTIND-1))
 83 | 
 84 | FILES="$@"
 85 | 
 86 | if [ -z "$FILES" ]; then
 87 | 	echo "no json files given; run with -h for usage help"
 88 |         exit -1
 89 | fi
 90 | 
 91 | # -----------------------------------------------------------------------------
 92 | # jq filter stages. Write as separate single-quoted strings so that escapes are not needed (i.e., do not use escapes!).
 93 | #
 94 | # reducer   - puts data from all the json into an array with some modifications
 95 | # selector  - selects the data from the array that match the command line criteria
 96 | # sorter    - sort the selected data by the sorting criteria
 97 | # filter    - convert the sorted data into formatted output
 98 | 
 99 | # ----------------------------------
100 | # Reducer gets the .results[] array from each json, and, for each results
101 | # element/object, deletes the pinorder and per_thread_sets, and adds an
102 | # .input_filename to the object.  The output is a single array of results
103 | # elements.
104 | 
105 | REDUCER='reduce inputs as $s ([]; . += [$s.results[] | del(.pinorder) | del(.per_thread_stats) | . += {"input_filename":input_filename}])'
106 | 
107 | 
108 | # ----------------------------------
109 | # Select the records with the requested element values
110 | 
111 | make_selector() {
112 | local NAME="$1"
113 | shift
114 | local AS_STRING=0
115 | if [ "$1" = "as_string" ]; then
116 | 	AS_STRING=1
117 | 	shift
118 | elif [ "$1" = "as_number" ]; then
119 | 	AS_STRING=0
120 | 	shift
121 | fi
122 | 
123 | local ARRAY=("$@")
124 | local ARRAY_SELECTOR=
125 | 
126 | if [ ${#ARRAY[@]} -eq 0 ]; then
127 | 	return
128 | fi
129 | 
130 | for a in ${ARRAY[@]}; do
131 | 	if [ -n "$ARRAY_SELECTOR" ]; then ARRAY_SELECTOR+=" or "; fi
132 | 	if [ $AS_STRING -eq 1 ]; then
133 | 		ARRAY_SELECTOR+=".${NAME}==\"${a}\""
134 | 	else
135 | 		ARRAY_SELECTOR+=".${NAME}==${a}"
136 | 	fi
137 | done
138 | 
139 | echo " and ($ARRAY_SELECTOR)"
140 | }
141 | 
142 | SELECTOR_ARGLIST="true"
143 | SELECTOR_ARGLIST+=$(make_selector nominal_parallel "${PAR[@]}")
144 | SELECTOR_ARGLIST+=$(make_selector nominal_critical "${CRIT[@]}")
145 | SELECTOR_ARGLIST+=$(make_selector num_threads "${NUM_THREADS[@]}")
146 | SELECTOR_ARGLIST+=$(make_selector variant_name as_string "${VARIANT_NAMES[@]}")
147 | 
148 | SELECTOR=' [.[] | select('$SELECTOR_ARGLIST')] '
149 | 
150 | 
151 | # ----------------------------------
152 | # Sort; output is an array
153 | 
154 | # for -s sort_string flag, map it to these fields.  TODO: reverse SPECIAL_HEADER array instead of hard-coding
155 | declare -A SHORT_HEADER
156 | SHORT_HEADER[cputime_ns/lock]=".cputime_ns_per_lock_acquire"
157 | SHORT_HEADER[cpu_ns/lock]=".cputime_ns_per_lock_acquire"
158 | SHORT_HEADER[wall_ns/lock]=".wall_elapsed_ns_per_lock_acquire"
159 | SHORT_HEADER[fcf]=".full_concurrency_fraction"
160 | SHORT_HEADER[nom_par]=".nominal_parallel"
161 | SHORT_HEADER[nom_crit]=".nominal_critical"
162 | SHORT_HEADER[par_ns]=".avg_parallel_ns_per_loop"
163 | SHORT_HEADER[crit_ns]=".avg_critical_ns_per_loop"
164 | SHORT_HEADER[overhead_ns]=".avg_lock_overhead_cputime_ns"
165 | SHORT_HEADER[overhead_%]=".lock_overhead_cputime_percent"
166 | SHORT_HEADER[locks/wall_sec]=".total_lock_acquires_per_second"
167 | SHORT_HEADER[num_threads]=".num_threads"
168 | SHORT_HEADER[json]=".input_filename"
169 | SHORT_HEADER[host]=".hostname"
170 | SHORT_HEADER[lasom]=".lock_acquires_stddev_over_mean"
171 | 
172 | # print SHORT_HEADER as a table
173 | if [[ $SORT_STRING == "help" ]]; then
174 | 	(echo "sort_key sort_string";
175 | 	for key in "${!SHORT_HEADER[@]}" ; do
176 | 		echo "$key ${SHORT_HEADER[$key]}"
177 | 	done) | column -t
178 | 	exit -1
179 | fi
180 | 
181 | if [[ -v SHORT_HEADER[$SORT_STRING] ]]; then
182 | 	SORT_STRING="${SHORT_HEADER[$SORT_STRING]}"
183 | elif [[ ! $SORT_STRING =~ ^\. ]]; then
184 | 	# we check for this to allow for complex multikey comma-separated sort string to be passed in as an argument.
185 | 	echo "ERROR: SORT_STRING does not being with a . and is not one of the SHORT_HEADER keys, so it's probably not referring to a results variable."
186 | 	exit -1
187 | fi
188 | 
189 | #SORTER='sort_by(.cputime_ns_per_lock_acquire) '
190 | #SORTER='sort_by(.num_threads) '
191 | SORTER='sort_by('$SORT_STRING')'
192 | if [ $REVERSE -eq 1 ]; then
193 | 	SORTER+=' | reverse'
194 | fi
195 | 
196 | # json output from jq
197 | if [ $DUMP_DATA -eq 1 ]; then
198 | 	exec jq -n -r "$REDUCER | $SELECTOR | $SORTER | . " $FILES
199 | fi
200 | 
201 | 
202 | # the rest of this is for the tabulated output
203 | 
204 | # ----------------------------------
205 | # Construct KEY_LIST, an array defining the order of the columns.
206 | # These are typically keynames from entries in the .results[] of a json or, if there's a corresponding entry in SPECIAL_HEADER or SPECIAL_FILTER, what to show instead.
207 | # If the row begins with #, the metric is omitted.
208 | read -r -d '' -a KEY_LIST <<'EOF_KEY_LIST'
209 | test_name
210 | variant_name
211 | num_threads
212 | nominal_critical
213 | nominal_parallel
214 | cputime_ns_per_lock_acquire
215 | avg_critical_ns_per_loop
216 | avg_parallel_ns_per_loop
217 | avg_lock_overhead_cputime_ns
218 | lock_overhead_cputime_percent
219 | full_concurrency_fraction
220 | lock_acquires_stddev_over_mean
221 | host
222 | #json
223 | wall_elapsed_ns_per_lock_acquire
224 | total_lock_acquires_per_second
225 | EOF_KEY_LIST
226 | 
227 | # SPECIAL_HEADER is what to print in the header for a key name. If the key does not exist, then the key name is used as the header.
228 | declare -A SPECIAL_HEADER
229 | SPECIAL_HEADER[cputime_ns_per_lock_acquire]="cpu_ns/lock"
230 | SPECIAL_HEADER[wall_elapsed_ns_per_lock_acquire]="wall_ns/lock"
231 | SPECIAL_HEADER[full_concurrency_fraction]="fcf"
232 | SPECIAL_HEADER[avg_parallel_ns_per_loop]="par_ns"
233 | SPECIAL_HEADER[avg_critical_ns_per_loop]="crit_ns"
234 | SPECIAL_HEADER[avg_lock_overhead_cputime_ns]="overhead_ns"
235 | SPECIAL_HEADER[lock_overhead_cputime_percent]="overhead_%"
236 | SPECIAL_HEADER[total_lock_acquires_per_second]="locks/wall_sec"
237 | SPECIAL_HEADER[lock_acquires_stddev_over_mean]="lasom"
238 | SPECIAL_HEADER[nominal_critical]="nom_crit"
239 | SPECIAL_HEADER[nominal_parallel]="nom_par"
240 | 
241 | # SPECIAL_FILTER is how to have jq format the element. If the key does not exist, then .key is used for the filter.
242 | declare -A SPECIAL_FILTER
243 | SPECIAL_FILTER[cputime_ns_per_lock_acquire]='\(.cputime_ns_per_lock_acquire|round)'
244 | SPECIAL_FILTER[wall_elapsed_ns_per_lock_acquire]='\(.wall_elapsed_ns_per_lock_acquire|round)'
245 | SPECIAL_FILTER[full_concurrency_fraction]='\(.full_concurrency_fraction * 100 | round / 100)'
246 | SPECIAL_FILTER[host]='\(.hostname | split(".") | .[0])'
247 | SPECIAL_FILTER[json]='\(.input_filename | split(".") | .[:-1] | join("."))'
248 | SPECIAL_FILTER[avg_critical_ns_per_loop]='\(.avg_critical_ns_per_loop | round)'
249 | SPECIAL_FILTER[avg_parallel_ns_per_loop]='\(.avg_parallel_ns_per_loop | round)'
250 | SPECIAL_FILTER[avg_lock_overhead_cputime_ns]='\(.avg_lock_overhead_cputime_ns | round)'
251 | SPECIAL_FILTER[lock_overhead_cputime_percent]='\(.lock_overhead_cputime_percent | round)'
252 | SPECIAL_FILTER[total_lock_acquires_per_second]='\(.total_lock_acquires_per_second|round)'
253 | SPECIAL_FILTER[lock_acquires_stddev_over_mean]='\(.lock_acquires_stddev_over_mean*10000|round/10000)'
254 | 
255 | # constructs the header or filter
256 | make_special() {
257 | local -n pointer="$1"		# name reference to associative array, needs bash 4.2 or later
258 | local normal_format_pre_eval=$2
259 | local normal_format
260 | local key
261 | local list=
262 | for key in "${KEY_LIST[@]}"
263 | do
264 | 	if [[ $key =~ ^\# ]]; then
265 | 		continue
266 | 	fi
267 | 	if [ -n "$list" ]; then
268 | 		list="$list\t"
269 | 	fi
270 | 
271 | 	normal_format=$(eval "echo \"$normal_format_pre_eval\"")
272 | 
273 | 	if [[ -v pointer[$key] ]]; then
274 | 		list+="${pointer[$key]}"
275 | 	else
276 | 		list+=$normal_format
277 | 	fi
278 | done
279 | echo "$list"
280 | }
281 | 
282 | HEADER=$(make_special SPECIAL_HEADER '$key')
283 | FILTER=$(make_special SPECIAL_FILTER '\(.${key})')
284 | 
285 | # ----------------------------------
286 | # finally invoke jq for tabulated output using 'column' to pretty print.
287 | (
288 | 	echo -e "$HEADER"
289 | 	jq -n -r "$REDUCER | $SELECTOR | $SORTER | .[] | \"$FILTER\" " $FILES
290 | ) | column -t -o " "
291 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/tests/cas_lockref.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #include "atomics.h"
33 | #include "cpu_relax.h"
34 | 
35 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
36 | 	unsigned long val, old;
37 | 
38 | 	do {
39 | 		old = *(volatile unsigned long *) lock;
40 | 		val = old + 0x100000000;
41 | 
42 | 		while ((old & 0xFFFFFFFF) && ((val >> 32) <= 32)) {
43 | 			old = *(volatile unsigned long *) lock;
44 | 			val = old + 0x100000000;
45 | 		}
46 | 
47 | 		val = cas64(lock, val, old);
48 | 		if (val == old) {
49 | 			break;
50 | 		}
51 | 		__cpu_relax();
52 | 	} while (1);
53 | 
54 | 	return val >> 32;
55 | }
56 | 
57 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
58 | 	unsigned long val, old;
59 | 
60 | 	do {
61 | 		old = *(volatile unsigned long *) lock;
62 | 		val = old - 0x100000000;
63 | 
64 | 		while ((old & 0xFFFFFFFF) && ((val >> 32) > 0)) {
65 | 			old = *(volatile unsigned long *) lock;
66 | 			val = old - 0x100000000;
67 | 		}
68 | 
69 | 		val = cas64(lock, val, old);
70 | 		if (val == old) {
71 | 			return;
72 | 		}
73 | 		__cpu_relax();
74 | 	} while (1);
75 | }
76 | 
77 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab: */
78 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/tests/cas_rw_lock.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #ifdef initialize_lock
33 | #undef initialize_lock
34 | #endif
35 | 
36 | #define initialize_lock(lock, pinorder, threads) cas_rw_lock_init(lock, threads)
37 | #define CAS_RW_INIT_VAL 0x20000000
38 | #define CAS_RW_THRESHOLD 0
39 | 
40 | #include "atomics.h"
41 | 
42 | void cas_rw_lock_init(uint64_t *lock, uint64_t threads) {
43 | 	*lock = CAS_RW_INIT_VAL;
44 | }
45 | 
46 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
47 | 	unsigned long val, old;
48 | 
49 | 	old = *(volatile unsigned long *) lock;
50 | 	val = old - 1;
51 | 
52 | 	while (*((long *) &old) > CAS_RW_THRESHOLD) {
53 | 		old = *(volatile unsigned long *) lock;
54 | 		val = old - 1;
55 | 		val = cas64_acquire(lock, val, old);
56 | 
57 | 		if (val == old) {
58 | 			return CAS_RW_INIT_VAL - val;
59 | 		}
60 | 	}
61 | 
62 | 	/* exclusive lock is held (should never actually happen in this test) */
63 | 	return 0;
64 | }
65 | 
66 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
67 | 	fetchadd64_release(lock, 1);
68 | }
69 | 
70 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab: */
71 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/tests/empty.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #include "atomics.h"
33 | 
34 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
35 | 	return 0;
36 | }
37 | 
38 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
39 | 	return;
40 | }
41 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/tests/incdec_refcount.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #include "atomics.h"
33 | 
34 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
35 | 	return fetchadd64(lock, 1ul);
36 | }
37 | 
38 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
39 | 	fetchsub64(lock, 1ul);
40 | }
41 | 
42 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab: */
43 | 


--------------------------------------------------------------------------------
/benchmarks/lockhammer/tests/swap_mutex.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Copyright (c) 2017, The Linux Foundation. All rights reserved.
 3 |  *
 4 |  * SPDX-License-Identifier:    BSD-3-Clause
 5 |  *
 6 |  * Redistribution and use in source and binary forms, with or without
 7 |  * modification, are permitted provided that the following conditions are
 8 |  * met:
 9 |  *     * Redistributions of source code must retain the above copyright
10 |  *       notice, this list of conditions and the following disclaimer.
11 |  *     * Redistributions in binary form must reproduce the above
12 |  *       copyright notice, this list of conditions and the following
13 |  *       disclaimer in the documentation and/or other materials provided
14 |  *       with the distribution.
15 |  *     * Neither the name of The Linux Foundation nor the names of its
16 |  *       contributors may be used to endorse or promote products derived
17 |  *       from this software without specific prior written permission.
18 |  *
19 |  * THIS SOFTWARE IS PROVIDED "AS IS" AND ANY EXPRESS OR IMPLIED
20 |  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
21 |  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT
22 |  * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS
23 |  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 |  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 |  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
26 |  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
27 |  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
28 |  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
29 |  * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |  */
31 | 
32 | #include "atomics.h"
33 | 
34 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
35 | 	unsigned long val = 1;
36 | 
37 | 	while (val) {
38 | 		val = swap64 (lock, 1); // uses acquire-release semantics
39 | 	}
40 | 
41 | 	return 0;
42 | }
43 | 
44 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
45 | 	__atomic_store_n(lock, 0, __ATOMIC_RELEASE);
46 | }
47 | 
48 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab: */
49 | 


--------------------------------------------------------------------------------
/contributing.rst:
--------------------------------------------------------------------------------
 1 | Contributing to Synchronization-Benchmarks
 2 | ==========================================
 3 | 
 4 | Getting Started
 5 | ---------------
 6 | 
 7 | -  Make sure you have a `GitHub account`_.
 8 | -  Create an `issue`_ for your work if one does not already exist. This gives
 9 |    everyone visibility of whether others are working on something similar.
10 | 
11 |    -  If you intend to include Third Party IP in your contribution, please
12 |       raise a separate `issue`_ for this and ensure that the changes that
13 |       include Third Party IP are made on a separate topic branch.
14 | 
15 | -  `Fork`_ `synchronization-benchmarks`_ on GitHub.
16 | -  Clone the fork to your own machine.
17 | -  Create a local topic branch based on the `synchronization-benchmarks`_ ``master``
18 |    branch.
19 | -  Make sure you have placed the hooks/commit-msg hook into your .git/hooks directory
20 |    to append change IDs to your commits.
21 | 
22 | Making Changes
23 | --------------
24 | 
25 | -  Make commits of logical units. See these general `Git guidelines`_ for
26 |    contributing to a project.
27 | -  Keep the commits on topic. If you need to fix another bug or make another
28 |    enhancement, please create a separate `issue`_ and address it on a separate
29 |    topic branch.
30 | -  Avoid long commit series. If you do have a long series, consider whether
31 |    some commits should be squashed together or addressed in a separate topic.
32 | -  Make sure your commit messages are in the proper format. If a commit fixes
33 |    a GitHub `issue`_, include a reference; this ensures the `issue`_ is
34 |    `automatically closed`_ when merged into the `synchronization-benchmarks`_ ``master``
35 |    branch.
36 | -  Where appropriate, please update the documentation and license of files.
37 | 
38 |    -  Ensure that each changed file has the correct copyright and license
39 |       information. Files that entirely consist of contributions to this
40 |       project should have the copyright notice and BSD-3-Clause SPDX license
41 |       identifier as shown in `license.rst`_. Files that contain
42 |       changes to imported Third Party IP should contain a notice as follows,
43 |       with the original copyright and license text retained:
44 | 
45 |       ::
46 | 
47 |         Portions copyright (c) [XXXX-]YYYY, ARM Limited and Contributors. All rights reserved.
48 | 
49 |       where XXXX is the year of first contribution (if different to YYYY) and
50 |       YYYY is the year of most recent contribution.
51 |    -  For topics with multiple commits, you should make all documentation
52 |       changes (and nothing else) in the last commit of the series. Otherwise,
53 |       include the documentation changes within the single commit.
54 | 
55 | Submitting Changes
56 | ------------------
57 | 
58 | -  We prefer that each commit in the series has at least one ``Signed-off-by:``
59 |    line, using your real name and email address, but it is not required.
60 | -  Push your local changes to your fork of the repository.
61 | -  Submit a `pull request`_ to the `synchronization-benchmarks`_ ``integration`` branch.
62 | 
63 |    -  The changes in the `pull request`_ will then undergo further review.
64 |       Any review comments will be made as comments on the `pull request`_.
65 |       This may require you to do some rework.
66 | 
67 | -  When the changes are accepted, the maintainer of the repository will integrate them.
68 | 
69 |    -  Typically, the Maintainers will merge the `pull request`_ into the
70 |       ``integration`` branch within the GitHub UI, creating a merge commit.
71 |    -  Please avoid creating merge commits in the `pull request`_ itself.
72 |    -  If the `pull request`_ is not based on a recent commit, the Maintainers
73 |       may rebase it onto the ``master`` branch first, or ask you to do this.
74 |    -  If the `pull request`_ cannot be automatically merged, the Maintainers
75 |       will ask you to rebase it onto the ``master`` branch.
76 |    -  After final integration testing, the Maintainers will push your merge
77 |       commit to the ``master`` branch. If a problem is found during integration,
78 |       the merge commit will be removed from the ``integration`` branch and the
79 |       Maintainers will ask you to create a new pull request to resolve the
80 |       problem.
81 |    -  Please do not delete your topic branch until it is safely merged into
82 |       the ``master`` branch.
83 | 
84 | --------------
85 | 
86 | *Copyright (c) 2018, ARM Limited and Contributors. All rights reserved.*
87 | 
88 | .. _GitHub account: https://github.com/signup/free
89 | .. _issue: https://github.com/ARM-software/synchronization-benchmarks/issues
90 | .. _Fork: https://help.github.com/articles/fork-a-repo
91 | .. _synchronization-benchmarks: https://github.com/ARM-software/synchronization-benchmarks
92 | .. _Git guidelines: http://git-scm.com/book/ch5-2.html
93 | .. _automatically closed: https://help.github.com/articles/closing-issues-via-commit-messages
94 | .. _license.rst: ./license.rst
95 | .. _pull request: https://help.github.com/articles/using-pull-requests
96 | 


--------------------------------------------------------------------------------
/ext/linux/hybrid_spinlock.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 ARM Ltd.
  3 |  *
  4 |  * This program is free software; you can redistribute it and/or modify
  5 |  * it under the terms of the GNU General Public License version 2 as
  6 |  * published by the Free Software Foundation.
  7 |  *
  8 |  * This program is distributed in the hope that it will be useful,
  9 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |  * GNU General Public License for more details.
 12 |  *
 13 |  * You should have received a copy of the GNU General Public License
 14 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 |  */
 16 | 
 17 | #ifdef initialize_lock
 18 | #undef initialize_lock
 19 | #endif
 20 | 
 21 | #define initialize_lock(lock, pinorder, threads) mcs_init_locks(lock, threads)
 22 | 
 23 | #include "atomics.h"
 24 | #include "lk_atomics.h"
 25 | 
 26 | #define _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
 27 |                                       << _Q_ ## type ## _OFFSET)
 28 | 
 29 | #define _Q_TAIL_IDX_OFFSET	0
 30 | #define _Q_TAIL_IDX_BITS	2
 31 | #define _Q_TAIL_IDX_MASK	_Q_SET_MASK(TAIL_IDX)
 32 | 
 33 | #define _Q_TAIL_CPU_OFFSET	(_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
 34 | #define _Q_TAIL_CPU_BITS	(16 - _Q_TAIL_CPU_OFFSET)
 35 | #define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
 36 | #define _Q_TAIL_OFFSET		_Q_TAIL_IDX_OFFSET
 37 | 
 38 | #define _Q_TAIL_MASK		(_Q_TAIL_CPU_MASK | _Q_TAIL_IDX_MASK)	
 39 | 
 40 | #define _Q_THRESHOLD		4
 41 | 
 42 | struct mcs_spinlock {
 43 | 	struct mcs_spinlock *next;
 44 | 	int locked;
 45 | 	int count;
 46 | };
 47 | 
 48 | struct mcs_spinlock *mcs_pool;
 49 | 
 50 | void mcs_init_locks (uint64_t *lock, unsigned long cores)
 51 | {
 52 | 	size_t n = 4 * cores * sizeof(struct mcs_spinlock);
 53 | 	if (mcs_pool) { free(mcs_pool); }
 54 | 	mcs_pool = (struct mcs_spinlock *) malloc(n);
 55 | 	if (! mcs_pool) { fprintf(stderr, "malloc failed in " __FILE__ " %s\n", __func__); exit(-1); }
 56 | 	memset(mcs_pool, 0, n);
 57 | }
 58 | 
 59 | static inline unsigned ticket_depth (unsigned ticketval)
 60 | {
 61 | 	return (((ticketval & 0xff000000) >> 24) - ((ticketval & 0x00ff0000) >> 16)) & 0xff;
 62 | }
 63 | 
 64 | static inline __attribute((pure)) u32 encode_tail(int cpu, int idx)
 65 | {
 66 | 	u32 tail;
 67 | 
 68 | #ifdef CONFIG_DEBUG_SPINLOCK
 69 | 	BUG_ON(idx > 3);
 70 | #endif
 71 | 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 72 | 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
 73 | 
 74 | 	return tail;
 75 | }
 76 | 
 77 | static inline __attribute((pure)) struct mcs_spinlock *decode_tail(u32 tail)
 78 | {
 79 | 	int cpu = ((tail & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET) - 1;
 80 | 	int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 81 | 
 82 | 	return &mcs_pool[4 * cpu + idx];
 83 | }
 84 | 
 85 | static __always_inline u32 xchg_tail(uint64_t *lock, u32 tail)
 86 | {
 87 | 	/*
 88 | 	 * Use release semantics to make sure that the MCS node is properly
 89 | 	 * initialized before changing the tail code.
 90 | 	 */
 91 | 	return (u32)xchg_release16((uint16_t *) lock,
 92 | 				 tail & _Q_TAIL_MASK);
 93 | }
 94 | 
 95 | unsigned long hybrid_spinlock_slowpath(uint64_t *lock, unsigned long threadnum)
 96 | {
 97 | 	unsigned long depth = 0;
 98 | 	struct mcs_spinlock *prev, *next, *node;
 99 | 
100 | 	u32 /* new, */ old, tail, val, ticketval;
101 | 
102 | 	int idx;
103 | 
104 | 	node = &mcs_pool[4 * threadnum];
105 | 	idx = node->count++;
106 | 
107 | 	tail = encode_tail(threadnum, idx);
108 | 
109 | 	node += idx;
110 | 	node->locked = 0;
111 | 	node->next = NULL;
112 | 
113 | 	old = xchg_tail(lock, tail);
114 | 	next = NULL;
115 | 
116 | 	if (old & _Q_TAIL_MASK) {
117 | 		prev = decode_tail(old);
118 | 		smp_read_barrier_depends();
119 | 
120 | 		WRITE_ONCE(prev->next, node);
121 | 
122 | 		arch_mcs_spin_lock_contended(&node->locked);
123 | 
124 | 		next = READ_ONCE(node->next);
125 | 		if (next)
126 | 			prefetchw(next);
127 | 	}
128 | 
129 | 	/* do ticket spin */
130 | #if defined(__aarch64__)
131 | 	unsigned /* tmp, */ tmp2, tmp3;
132 | asm volatile (
133 | "5:	ldaxr	%w[ticket], %[lock]\n"
134 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
135 | "	stxr	%w[tmp3], %w[tmp2], %[lock]\n"
136 | "	cbnz	%w[tmp3], 5b\n"
137 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
138 |   [tmp3] "=&r" (tmp3), [lock] "+Q" (*lock)
139 | : [ticket_inc] "r" (0x01000000)
140 | : );
141 | //		printf("%d enqueued on %d behind %d (serving %d)\n", ticketval >> 24, tail >> 2, old >> 2, (ticketval >> 16) & 0xFF);
142 | 
143 | 	depth = ticket_depth(ticketval);
144 | 
145 | asm volatile (
146 | "	sevl\n"
147 | "7:	wfe\n"
148 | "	ldaxrb	%w[tmp3], %[serving]\n"
149 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
150 | "	cbnz	%w[tmp2], 7b\n"
151 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
152 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
153 | : [tmp] "r" (ticketval >> 24)
154 | : );
155 | #else
156 | #endif
157 | 
158 | 	val = READ_ONCE(*lock);
159 | 
160 | 	/* If we're the list tail then destroy the queue */
161 | 	while ((val & _Q_TAIL_MASK) == tail) {
162 | 		old = atomic_cmpxchg_relaxed32((u32 *) lock, val, val & ~_Q_TAIL_MASK);
163 | 		
164 | 		if (old == val)
165 | 			goto release;
166 | 
167 | 		val = old;
168 | 	}
169 | 
170 | 	if (!next) {
171 | 		while (!(next = READ_ONCE(node->next)))
172 | 			cpu_relax();
173 | 	}
174 | 
175 | 	arch_mcs_spin_unlock_contended(&next->locked);
176 | 
177 | release:
178 | 
179 | 	mcs_pool[4 * threadnum].count--;
180 | 
181 | 	return depth;
182 | }
183 | 
184 | unsigned long __attribute__((noinline)) lock_acquire (uint64_t *lock, unsigned long threadnum) {
185 | 	unsigned long depth = 0;
186 | 
187 | 	u32 ticketval;
188 | 
189 | 	unsigned enqueue;
190 | 
191 | #if defined(__aarch64__)
192 | 	unsigned /* tmp, */ tmp2, tmp3;
193 | asm volatile (
194 | "1:	ldaxr	%w[ticket], %[lock]\n"
195 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
196 | "	rev16	%w[enqueue], %w[ticket]\n"
197 | "	eor	%w[enqueue], %w[enqueue], %w[ticket]\n"
198 | "	cbnz	%w[enqueue], 2f\n"
199 | "	stxr	%w[enqueue], %w[tmp2], %[lock]\n"
200 | "	cbnz	%w[enqueue], 1b\n"
201 | "2:\n"
202 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
203 |   [enqueue] "=&r" (enqueue), [lock] "+Q" (*lock)
204 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24)
205 | : );
206 | 	if (!enqueue)
207 | 		return 0; /* Ticket acquired immediately */
208 | 
209 | #else
210 | 	/* TODO: Generic C implementation of fastpath */
211 | 	val = READ_ONCE(*lock);
212 | 
213 | 	enqueue = val & _Q_TAIL_MASK;
214 | 
215 | 	if (!enqueue)
216 | 	{
217 | 	}
218 | #endif
219 | 
220 | #if defined (__aarch64__)
221 | asm volatile (
222 | "	mov	%w[enqueue], #1\n"
223 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
224 | "	rev16	%w[tmp2], %w[tmp3]\n"
225 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
226 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
227 | "	cbz	%w[tmp3], 4f\n"
228 | "	and	%w[tmp3], %w[ticket], %w[qtailmask]\n"
229 | "	cbnz	%w[tmp3], 4f\n"
230 | "3:	ldaxr	%w[ticket], %[lock]\n"
231 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
232 | "	rev16	%w[tmp2], %w[tmp3]\n"
233 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
234 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
235 | "	cbz	%w[tmp3], 4f\n"
236 | "	and	%w[tmp3], %w[ticket], %w[qtailmask]\n"
237 | "	cbnz	%w[tmp3], 4f\n"
238 | "	stxr	%w[enqueue], %w[tmp2], %[lock]\n"
239 | "	cbnz	%w[enqueue], 3b\n"
240 | "4:\n"
241 | : [ticket] "+&r" (ticketval), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
242 |   [enqueue] "=&r" (enqueue), [lock] "+Q" (*lock)
243 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24),
244 |   [qtailmask] "i" (_Q_TAIL_MASK)
245 | : );
246 | #else
247 | #endif
248 | 
249 | 	if (enqueue)
250 | 	{
251 | 		depth = hybrid_spinlock_slowpath(lock, threadnum);
252 | 	}
253 | 	else
254 | 	{
255 | 		depth = ticket_depth(ticketval);
256 | #if defined(__aarch64__)
257 | asm volatile (
258 | "	sevl\n"
259 | "9:	wfe\n"
260 | "	ldaxrb	%w[tmp3], %[serving]\n"
261 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
262 | "	cbnz	%w[tmp2], 9b\n"
263 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
264 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
265 | : [tmp] "r" (ticketval >> 24)
266 | : );
267 | #else
268 | #endif
269 | 	}
270 | 
271 | 	return depth;
272 | }
273 | 
274 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
275 | #if defined(__x86_64__)
276 | asm volatile (
277 | "	addw	$0x2,%[lock]\n"
278 | : [lock] "+m" (*lock)
279 | :
280 | : "cc" );
281 | #elif defined(__aarch64__)
282 | 	unsigned long tmp;
283 | asm volatile (
284 | "	ldrb	%w[tmp], %[lock]\n"
285 | "	add	%w[tmp], %w[tmp], #0x1\n"
286 | "	stlrb	%w[tmp], %[lock]\n"
287 | : [tmp] "=&r" (tmp), [lock] "+Q" (*(((unsigned char *) lock) + 2))
288 | :
289 | : );
290 | 
291 | #endif
292 | }
293 | 
294 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab : */
295 | 


--------------------------------------------------------------------------------
/ext/linux/hybrid_spinlock_fastdequeue.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 ARM Ltd.
  3 |  *
  4 |  * This program is free software; you can redistribute it and/or modify
  5 |  * it under the terms of the GNU General Public License version 2 as
  6 |  * published by the Free Software Foundation.
  7 |  *
  8 |  * This program is distributed in the hope that it will be useful,
  9 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |  * GNU General Public License for more details.
 12 |  *
 13 |  * You should have received a copy of the GNU General Public License
 14 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 |  */
 16 | 
 17 | #ifdef initialize_lock
 18 | #undef initialize_lock
 19 | #endif
 20 | 
 21 | #define initialize_lock(lock, pinorder, threads) mcs_init_locks(lock, threads)
 22 | 
 23 | #include "atomics.h"
 24 | #include "lk_atomics.h"
 25 | 
 26 | #define _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
 27 |                                       << _Q_ ## type ## _OFFSET)
 28 | 
 29 | #define _Q_TAIL_IDX_OFFSET	0
 30 | #define _Q_TAIL_IDX_BITS	2
 31 | #define _Q_TAIL_IDX_MASK	_Q_SET_MASK(TAIL_IDX)
 32 | 
 33 | #define _Q_TAIL_CPU_OFFSET	(_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
 34 | #define _Q_TAIL_CPU_BITS	(16 - _Q_TAIL_CPU_OFFSET)
 35 | #define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
 36 | #define _Q_TAIL_OFFSET		_Q_TAIL_IDX_OFFSET
 37 | 
 38 | #define _Q_TAIL_MASK		(_Q_TAIL_CPU_MASK | _Q_TAIL_IDX_MASK)	
 39 | 
 40 | /* Number of ticket waiters required before a queue is established */
 41 | #define _Q_THRESHOLD		6
 42 | /* Maximum number of queued waiters allowed to exit queue early */
 43 | #define _Q_DEQUEUE_THRESHOLD	2
 44 | 
 45 | struct mcs_spinlock {
 46 | 	struct mcs_spinlock *next;
 47 | 	int locked;
 48 | 	int count;
 49 | };
 50 | 
 51 | struct mcs_spinlock *mcs_pool;
 52 | 
 53 | void mcs_init_locks (uint64_t *lock, unsigned long cores)
 54 | {
 55 | 	size_t n = 4 * cores * sizeof(struct mcs_spinlock);
 56 | 	if (mcs_pool) { free(mcs_pool); }
 57 | 	mcs_pool = (struct mcs_spinlock *) malloc(n);
 58 | 	if (! mcs_pool) { fprintf(stderr, "malloc failed in " __FILE__ " %s\n", __func__); exit(-1); }
 59 | 	memset(mcs_pool, 0, n);
 60 | }
 61 | 
 62 | static inline unsigned ticket_depth (unsigned ticketval)
 63 | {
 64 | 	return (((ticketval & 0xff000000) >> 24) - ((ticketval & 0x00ff0000) >> 16)) & 0xff;
 65 | }
 66 | 
 67 | static inline __attribute((pure)) u32 encode_tail(int cpu, int idx)
 68 | {
 69 | 	u32 tail;
 70 | 
 71 | #ifdef CONFIG_DEBUG_SPINLOCK
 72 | 	BUG_ON(idx > 3);
 73 | #endif
 74 | 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 75 | 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
 76 | 
 77 | 	return tail;
 78 | }
 79 | 
 80 | static inline __attribute((pure)) struct mcs_spinlock *decode_tail(u32 tail)
 81 | {
 82 | 	int cpu = ((tail & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET) - 1;
 83 | 	int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 84 | 
 85 | 	return &mcs_pool[4 * cpu + idx];
 86 | }
 87 | 
 88 | static __always_inline u32 xchg_tail(uint64_t *lock, u32 tail)
 89 | {
 90 | 	/*
 91 | 	 * Use release semantics to make sure that the MCS node is properly
 92 | 	 * initialized before changing the tail code.
 93 | 	 */
 94 | 	return (u32)xchg_release16((uint16_t *) lock,
 95 | 				 tail & _Q_TAIL_MASK);
 96 | }
 97 | 
 98 | unsigned long hybrid_spinlock_slowpath(uint64_t *lock, unsigned long threadnum)
 99 | {
100 | 	unsigned long depth = 0;
101 | 	struct mcs_spinlock *prev, *next, *node;
102 | 
103 | 	u32 /* new, */ old, tail, val, ticketval;
104 | 
105 | 	int idx;
106 | 
107 | 	node = &mcs_pool[4 * threadnum];
108 | 	idx = node->count++;
109 | 
110 | 	tail = encode_tail(threadnum, idx);
111 | 
112 | 	node += idx;
113 | 	node->locked = 0;
114 | 	node->next = NULL;
115 | 
116 | 	old = xchg_tail(lock, tail);
117 | 	next = NULL;
118 | 
119 | 	if (old & _Q_TAIL_MASK) {
120 | 		prev = decode_tail(old);
121 | 		smp_read_barrier_depends();
122 | 
123 | 		WRITE_ONCE(prev->next, node);
124 | 
125 | 		arch_mcs_spin_lock_contended(&node->locked);
126 | 
127 | 		next = READ_ONCE(node->next);
128 | 		if (next)
129 | 			prefetchw(next);
130 | 	}
131 | 
132 | 	/* do ticket spin */
133 | #if defined(__aarch64__)
134 | 	unsigned /* tmp, */ tmp2, tmp3;
135 | #if _Q_DEQUEUE_THRESHOLD
136 | asm volatile (
137 | "	sevl\n"
138 | "44:	wfe\n"
139 | "5:	ldaxr	%w[ticket], %[lock]\n"
140 | "	sub	%w[tmp3], %w[ticket], %w[ticket], lsl #8\n"
141 | "	and	%w[tmp3], %w[tmp3], #0xFF000000\n"
142 | "	cmp	%w[tmp3], %w[qthresh]\n"
143 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
144 | "	bgt	44b\n"
145 | "	stxr	%w[tmp3], %w[tmp2], %[lock]\n"
146 | "	cbnz	%w[tmp3], 5b\n"
147 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
148 |   [tmp3] "=&r" (tmp3), [lock] "+Q" (*lock)
149 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" ((_Q_DEQUEUE_THRESHOLD) << 24)
150 | : "cc" );
151 | #else
152 | asm volatile (
153 | "5:	ldaxr	%w[ticket], %[lock]\n"
154 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
155 | "	stxr	%w[tmp3], %w[tmp2], %[lock]\n"
156 | "	cbnz	%w[tmp3], 5b\n"
157 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
158 |   [tmp3] "=&r" (tmp3), [lock] "+Q" (*lock)
159 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24)
160 | : );
161 | asm volatile (
162 | "	sevl\n"
163 | "7:	wfe\n"
164 | "	ldaxrb	%w[tmp3], %[serving]\n"
165 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
166 | "	cbnz	%w[tmp2], 7b\n"
167 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
168 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
169 | : [tmp] "r" (ticketval >> 24)
170 | : );
171 | #endif
172 | 
173 | 	depth = ticket_depth(ticketval);
174 | 	val = READ_ONCE(*lock);
175 | 
176 | 	/* If we're the list tail then destroy the queue */
177 | 	while ((val & _Q_TAIL_MASK) == tail) {
178 | 		old = atomic_cmpxchg_relaxed32((u32 *) lock, val, val & ~_Q_TAIL_MASK);
179 | 		
180 | 		if (old == val)
181 | 			goto release;
182 | 
183 | 		val = old;
184 | 	}
185 | 
186 | 	if (!next) {
187 | 		while (!(next = READ_ONCE(node->next)))
188 | 			cpu_relax();
189 | 	}
190 | 
191 | 	arch_mcs_spin_unlock_contended(&next->locked);
192 | 
193 | release:
194 | 
195 | 	mcs_pool[4 * threadnum].count--;
196 | 
197 | #if _Q_DEQUEUE_THRESHOLD
198 | asm volatile (
199 | "	sevl\n"
200 | "7:	wfe\n"
201 | "	ldaxrb	%w[tmp3], %[serving]\n"
202 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
203 | "	cbnz	%w[tmp2], 7b\n"
204 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
205 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
206 | : [tmp] "r" (ticketval >> 24)
207 | : );
208 | #endif
209 | 
210 | #else
211 | #endif
212 | 
213 | 	return depth;
214 | 
215 | }
216 | 
217 | unsigned long __attribute__((noinline)) lock_acquire (uint64_t *lock, unsigned long threadnum) {
218 | 	unsigned long depth = 0;
219 | 
220 | 	u32 ticketval;
221 | 
222 | 	unsigned enqueue;
223 | 
224 | #if defined(__aarch64__)
225 | 	unsigned /* tmp, */ tmp2, tmp3;
226 | asm volatile (
227 | "1:	ldaxr	%w[ticket], %[lock]\n"
228 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
229 | "	rev16	%w[enqueue], %w[ticket]\n"
230 | "	eor	%w[enqueue], %w[enqueue], %w[ticket]\n"
231 | "	cbnz	%w[enqueue], 2f\n"
232 | "	stxr	%w[enqueue], %w[tmp2], %[lock]\n"
233 | "	cbnz	%w[enqueue], 1b\n"
234 | "2:\n"
235 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
236 |   [enqueue] "=&r" (enqueue), [lock] "+Q" (*lock)
237 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24)
238 | : );
239 | 	if (!enqueue)
240 | 		return 0; /* Ticket acquired immediately */
241 | 
242 | #else
243 | 	/* TODO: Generic C implementation of fastpath */
244 | 	val = READ_ONCE(*lock);
245 | 
246 | 	enqueue = val & _Q_TAIL_MASK;
247 | 
248 | 	if (!enqueue)
249 | 	{
250 | 	}
251 | #endif
252 | 
253 | #if defined (__aarch64__)
254 | asm volatile (
255 | "	mov	%w[enqueue], #1\n"
256 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
257 | "	rev16	%w[tmp2], %w[tmp3]\n"
258 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
259 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
260 | "	cbz	%w[tmp3], 4f\n"
261 | "	and	%w[tmp3], %w[ticket], %w[qtailmask]\n"
262 | "	cbnz	%w[tmp3], 4f\n"
263 | "3:	ldaxr	%w[ticket], %[lock]\n"
264 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
265 | "	rev16	%w[tmp2], %w[tmp3]\n"
266 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
267 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
268 | "	cbz	%w[tmp3], 4f\n"
269 | "	and	%w[tmp3], %w[ticket], %w[qtailmask]\n"
270 | "	cbnz	%w[tmp3], 4f\n"
271 | "	stxr	%w[enqueue], %w[tmp2], %[lock]\n"
272 | "	cbnz	%w[enqueue], 3b\n"
273 | "4:\n"
274 | : [ticket] "+&r" (ticketval), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
275 |   [enqueue] "=&r" (enqueue), [lock] "+Q" (*lock)
276 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24),
277 |   [qtailmask] "i" (_Q_TAIL_MASK)
278 | : );
279 | #else
280 | #endif
281 | 
282 | 	if (enqueue)
283 | 	{
284 | 		depth = hybrid_spinlock_slowpath(lock, threadnum);
285 | 	}
286 | 	else
287 | 	{
288 | 		depth = ticket_depth(ticketval);
289 | #if defined(__aarch64__)
290 | asm volatile (
291 | "	sevl\n"
292 | "9:	wfe\n"
293 | "	ldaxrb	%w[tmp3], %[serving]\n"
294 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
295 | "	cbnz	%w[tmp2], 9b\n"
296 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
297 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
298 | : [tmp] "r" (ticketval >> 24)
299 | : );
300 | #else
301 | #endif
302 | 	}
303 | 
304 | 	return depth;
305 | }
306 | 
307 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
308 | #if defined(__x86_64__)
309 | asm volatile (
310 | "	addw	$0x2,%[lock]\n"
311 | : [lock] "+m" (*lock)
312 | :
313 | : "cc" );
314 | #elif defined(__aarch64__)
315 | 	unsigned long tmp;
316 | asm volatile (
317 | "	ldrb	%w[tmp], %[lock]\n"
318 | "	add	%w[tmp], %w[tmp], #0x1\n"
319 | "	stlrb	%w[tmp], %[lock]\n"
320 | : [tmp] "=&r" (tmp), [lock] "+Q" (*(((unsigned char *) lock) + 2))
321 | :
322 | : );
323 | 
324 | #endif
325 | }
326 | 
327 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab : */
328 | 


--------------------------------------------------------------------------------
/ext/linux/hybrid_spinlock_old_fastdequeue.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 ARM Ltd.
  3 |  *
  4 |  * This program is free software; you can redistribute it and/or modify
  5 |  * it under the terms of the GNU General Public License version 2 as
  6 |  * published by the Free Software Foundation.
  7 |  *
  8 |  * This program is distributed in the hope that it will be useful,
  9 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |  * GNU General Public License for more details.
 12 |  *
 13 |  * You should have received a copy of the GNU General Public License
 14 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 |  */
 16 | 
 17 | //
 18 | // NOTE: This file is currently unused
 19 | //
 20 | 
 21 | 
 22 | #ifdef initialize_lock
 23 | #undef initialize_lock
 24 | #endif
 25 | 
 26 | #define initialize_lock(lock, threads) mcs_init_locks(lock, threads)
 27 | 
 28 | #include "atomics.h"
 29 | #include "lk_atomics.h"
 30 | 
 31 | #define _Q_SET_MASK(type)       (((1U << _Q_ ## type ## _BITS) - 1)\
 32 |                                       << _Q_ ## type ## _OFFSET)
 33 | 
 34 | #define _Q_TAIL_IDX_OFFSET	0
 35 | #define _Q_TAIL_IDX_BITS	2
 36 | #define _Q_TAIL_IDX_MASK	_Q_SET_MASK(TAIL_IDX)
 37 | 
 38 | #define _Q_TAIL_CPU_OFFSET	(_Q_TAIL_IDX_OFFSET + _Q_TAIL_IDX_BITS)
 39 | #define _Q_TAIL_CPU_BITS	(16 - _Q_TAIL_CPU_OFFSET)
 40 | #define _Q_TAIL_CPU_MASK	_Q_SET_MASK(TAIL_CPU)
 41 | #define _Q_TAIL_OFFSET		_Q_TAIL_IDX_OFFSET
 42 | 
 43 | #define _Q_TAIL_MASK		(_Q_TAIL_CPU_MASK | _Q_TAIL_IDX_MASK)	
 44 | 
 45 | #define _Q_THRESHOLD		4
 46 | 
 47 | struct mcs_spinlock {
 48 | 	struct mcs_spinlock *next;
 49 | 	int locked;
 50 | 	int count;
 51 | };
 52 | 
 53 | struct mcs_spinlock *mcs_pool;
 54 | 
 55 | void mcs_init_locks (uint64_t *lock, unsigned long cores)
 56 | {
 57 | 	size_t n = 4 * cores * sizeof(struct mcs_spinlock);
 58 | 	if (mcs_pool) { free(mcs_pool); }
 59 | 	mcs_pool = (struct mcs_spinlock *) malloc(n);
 60 | 	if (! mcs_pool) { fprintf(stderr, "malloc failed in " __FILE__ " %s\n", __func__); exit(-1); }
 61 | 	memset(mcs_pool, 0, n);
 62 | }
 63 | 
 64 | static inline __attribute((pure)) u32 encode_tail(int cpu, int idx)
 65 | {
 66 | 	u32 tail;
 67 | 
 68 | #ifdef CONFIG_DEBUG_SPINLOCK
 69 | 	BUG_ON(idx > 3);
 70 | #endif
 71 | 	tail  = (cpu + 1) << _Q_TAIL_CPU_OFFSET;
 72 | 	tail |= idx << _Q_TAIL_IDX_OFFSET; /* assume < 4 */
 73 | 
 74 | 	return tail;
 75 | }
 76 | 
 77 | static inline __attribute((pure)) struct mcs_spinlock *decode_tail(u32 tail)
 78 | {
 79 | 	int cpu = ((tail & _Q_TAIL_CPU_MASK) >> _Q_TAIL_CPU_OFFSET) - 1;
 80 | 	int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET;
 81 | 
 82 | 	return &mcs_pool[4 * cpu + idx];
 83 | }
 84 | 
 85 | static __always_inline u32 xchg_tail(uint64_t *lock, u32 tail)
 86 | {
 87 | 	/*
 88 | 	 * Use release semantics to make sure that the MCS node is properly
 89 | 	 * initialized before changing the tail code.
 90 | 	 */
 91 | 	return (u32)xchg_release16((uint16_t *) lock,
 92 | 				 tail & _Q_TAIL_MASK);
 93 | }
 94 | 
 95 | void hybrid_spinlock_slowpath(uint64_t *lock, unsigned long threadnum)
 96 | {
 97 | 	unsigned long depth = 0;
 98 | 	struct mcs_spinlock *prev, *next, *node;
 99 | 
100 | 	u32 new, old, tail, val, ticketval;
101 | 
102 | 	int idx;
103 | 
104 | 	node = &mcs_pool[4 * threadnum];
105 | 	idx = node->count++;
106 | 
107 | 	tail = encode_tail(threadnum, idx);
108 | 
109 | 	node += idx;
110 | 	node->locked = 0;
111 | 	node->next = NULL;
112 | 
113 | 	old = xchg_tail(lock, tail);
114 | 	next = NULL;
115 | 
116 | 	if (old & _Q_TAIL_MASK) {
117 | 		prev = decode_tail(old);
118 | 		smp_read_barrier_depends();
119 | 
120 | 		WRITE_ONCE(prev->next, node);
121 | 
122 | 		arch_mcs_spin_lock_contended(&node->locked);
123 | 
124 | 		next = READ_ONCE(node->next);
125 | 		if (next)
126 | 			prefetchw(next);
127 | 	}
128 | 
129 | 	/* do ticket spin */
130 | #if defined(__aarch64__)
131 | 	unsigned tmp, tmp2, tmp3;
132 | asm volatile (
133 | "	sevl\n"
134 | "44:	wfe\n"
135 | "5:	ldaxr	%w[ticket], %[lock]\n"
136 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
137 | "	rev16	%w[tmp2], %w[tmp3]\n"
138 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
139 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
140 | "	cbz	%w[tmp3], 44b\n"
141 | "	stxr	%w[tmp3], %w[tmp2], %[lock]\n"
142 | "	cbnz	%w[tmp3], 5b\n"
143 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
144 |   [tmp3] "=&r" (tmp3), [lock] "+Q" (*lock)
145 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24)
146 | : );
147 | //		printf("%d enqueued on %d behind %d (serving %d)\n", ticketval >> 24, tail >> 2, old >> 2, (ticketval >> 16) & 0xFF);
148 | 	val = READ_ONCE(*lock);
149 | 
150 | 	/* If we're the list tail then destroy the queue */
151 | 	while ((val & _Q_TAIL_MASK) == tail) {
152 | 		old = atomic_cmpxchg_relaxed32((u32 *) lock, val, val & ~_Q_TAIL_MASK);
153 | 		
154 | 		if (old == val)
155 | 			goto release;
156 | 
157 | 		val = old;
158 | 	}
159 | 
160 | 	if (!next) {
161 | 		while (!(next = READ_ONCE(node->next)))
162 | 			cpu_relax();
163 | 	}
164 | 
165 | 	arch_mcs_spin_unlock_contended(&next->locked);
166 | 
167 | release:
168 | 
169 | 	mcs_pool[4 * threadnum].count--;
170 | asm volatile (
171 | "	sevl\n"
172 | "7:	wfe\n"
173 | "	ldaxrb	%w[tmp3], %[serving]\n"
174 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
175 | "	cbnz	%w[tmp2], 7b\n"
176 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
177 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
178 | : [tmp] "r" (ticketval >> 24)
179 | : );
180 | #else
181 | #endif
182 | 
183 | }
184 | 
185 | unsigned long __attribute__((noinline)) lock_acquire (uint64_t *lock, unsigned long threadnum) {
186 | 	unsigned long depth = 0;
187 | 
188 | 	u32 ticketval;
189 | 
190 | 	unsigned enqueue;
191 | 
192 | #if defined(__aarch64__)
193 | 	unsigned tmp, tmp2, tmp3;
194 | asm volatile (
195 | "1:	ldaxr	%w[ticket], %[lock]\n"
196 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
197 | "	rev16	%w[enqueue], %w[ticket]\n"
198 | "	eor	%w[enqueue], %w[enqueue], %w[ticket]\n"
199 | "	cbnz	%w[enqueue], 2f\n"
200 | "	stxr	%w[enqueue], %w[tmp2], %[lock]\n"
201 | "	cbnz	%w[enqueue], 1b\n"
202 | "2:\n"
203 | : [ticket] "=&r" (ticketval), [tmp2] "=&r" (tmp2),
204 |   [enqueue] "=&r" (enqueue), [lock] "+Q" (*lock)
205 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24)
206 | : );
207 | 	if (!enqueue)
208 | 		return 0; /* Ticket acquired immediately */
209 | 
210 | 	depth = ((ticketval >> 24) - (ticketval >> 16)) & 0xFF;
211 | #else
212 | 	/* TODO: Generic C implementation of fastpath */
213 | 	val = READ_ONCE(*lock);
214 | 
215 | 	enqueue = val & _Q_TAIL_MASK;
216 | 
217 | 	if (!(val & _Q_TAIL_MASK)
218 | 	{
219 | 	}
220 | #endif
221 | 
222 | #if defined (__aarch64__)
223 | asm volatile (
224 | "	mov	%[enqueue], #1\n"
225 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
226 | "	rev16	%w[tmp2], %w[tmp3]\n"
227 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
228 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
229 | "	cbz	%w[tmp3], 4f\n"
230 | "	and	%w[tmp3], %w[ticket], %w[qtailmask]\n"
231 | "	cbnz	%w[tmp3], 4f\n"
232 | "3:	ldaxr	%w[ticket], %[lock]\n"
233 | "	sub	%w[tmp3], %w[ticket], %w[qthresh]\n"
234 | "	rev16	%w[tmp2], %w[tmp3]\n"
235 | "	eor	%w[tmp3], %w[tmp2], %w[tmp3]\n"
236 | "	add	%w[tmp2], %w[ticket], %w[ticket_inc]\n"
237 | "	cbz	%w[tmp3], 4f\n"
238 | "	and	%w[tmp3], %w[ticket], %w[qtailmask]\n"
239 | "	cbnz	%w[tmp3], 4f\n"
240 | "	stxr	%w[enqueue], %w[tmp2], %[lock]\n"
241 | "	cbnz	%w[enqueue], 3b\n"
242 | "4:\n"
243 | : [ticket] "+&r" (ticketval), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
244 |   [enqueue] "=&r" (enqueue), [lock] "+Q" (*lock)
245 | : [ticket_inc] "r" (0x01000000), [qthresh] "r" (_Q_THRESHOLD << 24),
246 |   [qtailmask] "i" (_Q_TAIL_MASK)
247 | : );
248 | #else
249 | #endif
250 | 
251 | 	if (enqueue)
252 | 	{
253 | 		hybrid_spinlock_slowpath(lock, threadnum);
254 | 	}
255 | 	else
256 | 	{
257 | 		depth = 0;
258 | #if defined(__aarch64__)
259 | asm volatile (
260 | "	sevl\n"
261 | "9:	wfe\n"
262 | "	ldaxrb	%w[tmp3], %[serving]\n"
263 | "	eor	%w[tmp2], %w[tmp], %w[tmp3]\n"
264 | "	cbnz	%w[tmp2], 9b\n"
265 | : [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3),
266 |   [serving] "+Q" (*(((unsigned char *) lock) + 2))
267 | : [tmp] "r" (ticketval >> 24)
268 | : );
269 | #else
270 | #endif
271 | 	}
272 | 
273 | 	return depth;
274 | }
275 | 
276 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
277 | #if defined(__x86_64__)
278 | asm volatile (
279 | "	addw	$0x2,%[lock]\n"
280 | : [lock] "+m" (*lock)
281 | :
282 | : "cc" );
283 | #elif defined(__aarch64__)
284 | 	unsigned long tmp;
285 | asm volatile (
286 | "	ldrb	%w[tmp], %[lock]\n"
287 | "	add	%w[tmp], %w[tmp], #0x1\n"
288 | "	stlrb	%w[tmp], %[lock]\n"
289 | : [tmp] "=&r" (tmp), [lock] "+Q" (*(((unsigned char *) lock) + 2))
290 | :
291 | : );
292 | 
293 | #endif
294 | }
295 | 
296 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab : */
297 | 


--------------------------------------------------------------------------------
/ext/linux/include/lk_barrier.h:
--------------------------------------------------------------------------------
 1 | /* SPDX-License-Identifier: GPL-2.0 */
 2 | 
 3 | /* Based on Linux kernel 4.16.10
 4 |  * arch/arm64/include/asm/barrier.h
 5 |  * arch/x86/include/asm/barrier.h
 6 |  * https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git/commit/?h=v4.16.10&id=b3fdf8284efbc5020dfbd0a28150637189076115
 7 |  */
 8 | 
 9 | #ifndef __ASM_BARRIER_H
10 | #define __ASM_BARRIER_H
11 | 
12 | #include "lk_cmpxchg.h"
13 | 
14 | #if defined(__x86_64__)
15 | 
16 | #define mb() 	asm volatile("mfence":::"memory")
17 | #define rmb()	asm volatile("lfence":::"memory")
18 | #define wmb()	asm volatile("sfence" ::: "memory")
19 | #define dma_rmb()	barrier()
20 | #define dma_wmb()	barrier()
21 | #define smp_mb()	asm volatile("lock; addl $0,-4(%%rsp)" ::: "memory", "cc")
22 | #define smp_rmb()	dma_rmb()
23 | #define smp_wmb()	barrier()
24 | #define smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
25 | 
26 | 
27 | /* Atomic operations are already serializing on x86 */
28 | #define __smp_mb__before_atomic()	barrier()
29 | #define __smp_mb__after_atomic()	barrier()
30 | 
31 | 
32 | #elif defined(__aarch64__)
33 | 
34 | #define isb()		asm volatile("isb" : : : "memory")
35 | #define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
36 | #define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
37 | #define psb_csync()	asm volatile("hint #17" : : : "memory")
38 | #define csdb()		asm volatile("hint #20" : : : "memory")
39 | #define mb()		dsb(sy)
40 | #define rmb()		dsb(ld)
41 | #define wmb()		dsb(st)
42 | #define dma_rmb()	dmb(oshld)
43 | #define dma_wmb()	dmb(oshst)
44 | #define smp_mb()	dmb(ish)
45 | #define smp_rmb()	dmb(ishld)
46 | #define smp_wmb()	dmb(ishst)
47 | 
48 | #else /* No Arch */
49 |     /* TODO: No Arch Default */
50 | #endif /* __x86_64__ */
51 | 
52 | #endif	/* __ASM_BARRIER_H */
53 | 


--------------------------------------------------------------------------------
/ext/linux/ticket_spinlock.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (C) 2012 ARM Ltd.
  3 |  *
  4 |  * This program is free software; you can redistribute it and/or modify
  5 |  * it under the terms of the GNU General Public License version 2 as
  6 |  * published by the Free Software Foundation.
  7 |  *
  8 |  * This program is distributed in the hope that it will be useful,
  9 |  * but WITHOUT ANY WARRANTY; without even the implied warranty of
 10 |  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 11 |  * GNU General Public License for more details.
 12 |  *
 13 |  * You should have received a copy of the GNU General Public License
 14 |  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 15 |  */
 16 | 
 17 | /* aarch64 version is based on Linux 3.13 */
 18 | 
 19 | #include "atomics.h"
 20 | 
 21 | unsigned long __attribute__((noinline)) lock_acquire (uint64_t *lock, unsigned long threadnum) {
 22 | 	unsigned long depth = 0;
 23 | #if defined(__x86_64__)
 24 | asm volatile (
 25 | "	movw	$0,%[depth]\n"
 26 | "	nop\n"
 27 | "	nop\n"
 28 | "	nop\n"
 29 | "	mov    $0x20000,%%eax\n"
 30 | "	lock xadd %%eax,%[lock]\n"
 31 | "	mov    %%eax,%%edx\n"
 32 | "	mov    %%eax,%[depth]\n"
 33 | "	shr    $0x10,%%edx\n"
 34 | "	cmp    %%ax,%%dx\n"
 35 | "	jne    2f\n"
 36 | "1:	nop\n"
 37 | "	jmp 4f\n"
 38 | "2:	movzwl %[lock],%%eax\n"
 39 | "	mov    %%edx,%%ecx\n"
 40 | "	cmp    %%dx,%%ax\n"
 41 | "	je     1b\n"
 42 | "3:	pause\n"
 43 | "	movzwl %[lock],%%eax\n"
 44 | "	cmp    %%cx,%%ax\n"
 45 | "	jne    3b\n"
 46 | "4:\n"
 47 | : [lock] "+m" (*lock), [depth] "=m" (depth)
 48 | :
 49 | : "cc", "eax", "ecx", "edx", "ax", "cx", "dx" );
 50 | 	depth = (((depth >> 16) - (depth & 0xFFFF)) & 0xFFFF) >> 2;
 51 | #elif defined(__aarch64__)
 52 | 	unsigned tmp, tmp2, tmp3;
 53 | asm volatile (
 54 | "	mov	%w[depth], #0\n"
 55 | #if defined(USE_LSE)
 56 | "	mov	%w[tmp3], #0x10000\n"
 57 | "	ldadda	%w[tmp3], %w[tmp], %[lock]\n"
 58 | "	nop\n"
 59 | "	nop\n"
 60 | #else
 61 | "1:	ldaxr	%w[tmp], %[lock]\n"
 62 | "	add	%w[tmp2], %w[tmp], #0x10, lsl #12\n"
 63 | "	stxr	%w[tmp3], %w[tmp2], %[lock]\n"
 64 | "	cbnz	%w[tmp3], 1b\n"
 65 | #endif
 66 | "	eor	%w[tmp2], %w[tmp], %w[tmp], ror #16\n"
 67 | "	cbz	%w[tmp2], 3f\n"
 68 | "	and	%w[tmp3], %w[tmp], #0xFFFF\n"
 69 | "	lsr	%w[depth], %w[tmp], #16\n"
 70 | "	sub	%w[depth], %w[depth], %w[tmp3]\n"
 71 | "	and	%w[depth], %w[depth], #0xFFFF\n"
 72 | "	sevl\n"
 73 | "2:	wfe\n"
 74 | "	ldaxrh	%w[tmp3], %[lock]\n"
 75 | "	eor	%w[tmp2], %w[tmp3], %w[tmp], lsr #16\n"
 76 | "	cbnz	%w[tmp2], 2b\n"
 77 | "3:\n"
 78 | : [tmp] "=&r" (tmp), [tmp2] "=&r" (tmp2),
 79 |   [tmp3] "=&r" (tmp3), [lock] "+Q" (*lock),
 80 |   [depth] "=&r" (depth)
 81 | : 
 82 | : );
 83 | #endif
 84 | 
 85 | 	return depth;
 86 | }
 87 | 
 88 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
 89 | #if defined(__x86_64__)
 90 | asm volatile (
 91 | "	addw	$0x2,%[lock]\n"
 92 | : [lock] "+m" (*lock)
 93 | :
 94 | : "cc" );
 95 | #elif defined(__aarch64__)
 96 | 	unsigned long tmp;
 97 | asm volatile (
 98 | #if defined(USE_LSE)
 99 | "	mov	%w[tmp], #1\n"
100 | "	staddlh	%w[tmp], %[lock]\n"
101 | "	nop\n"
102 | #else
103 | "	ldrh	%w[tmp], %[lock]\n"
104 | "	add	%w[tmp], %w[tmp], #0x1\n"
105 | "	stlrh	%w[tmp], %[lock]\n"
106 | #endif
107 | : [tmp] "=&r" (tmp), [lock] "+Q" (*lock)
108 | :
109 | : );
110 | #endif
111 | }
112 | 
113 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab: */
114 | 


--------------------------------------------------------------------------------
/ext/mysql/cas_event_mutex.h:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 | 
  3 | Copyright (c) 1994, 2017, Oracle and/or its affiliates. All Rights Reserved.
  4 | Copyright (c) 2017, The Linux Foundation. All rights reserved.
  5 | 
  6 | This program is free software; you can redistribute it and/or modify it under
  7 | the terms of the GNU General Public License as published by the Free Software
  8 | Foundation; version 2 of the License.
  9 | 
 10 | This program is distributed in the hope that it will be useful, but WITHOUT
 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License along with
 15 | this program; if not, write to the Free Software Foundation, Inc.,
 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 17 | 
 18 | *****************************************************************************/
 19 | 
 20 | /* Based on MySQL 5.7 */
 21 | #ifdef initialize_lock
 22 | #undef initialize_lock
 23 | #endif
 24 | 
 25 | #define initialize_lock(lock, pinorder, threads) event_mutex_init(lock, threads)
 26 | 
 27 | #include "atomics.h"
 28 | #include "ut_atomics.h"
 29 | 
 30 | unsigned long ev_generation = 0;
 31 | 
 32 | typedef unsigned long ulint;
 33 | 
 34 | /** Mutex states. */
 35 | enum mutex_state_t {
 36 | 	/** Mutex is free */
 37 | 	MUTEX_STATE_UNLOCKED = 0,
 38 | 
 39 | 	/** Mutex is acquired by some thread. */
 40 | 	MUTEX_STATE_LOCKED = 1,
 41 | 
 42 | 	/** Mutex is contended and there are threads waiting on the lock. */
 43 | 	MUTEX_STATE_WAITERS = 2
 44 | };
 45 | 
 46 | #define UT_RND1			151117737   // 901DFA9
 47 | #define UT_RND2			119785373   // 723C79D
 48 | #define UT_RND3			 85689495   // 51B8497
 49 | #define UT_RND4			 76595339   // 490C08B
 50 | #define UT_SUM_RND2		 98781234   // 5E34832
 51 | #define UT_SUM_RND3		126792457   // 78EB309
 52 | #define UT_SUM_RND4		 63498502   // 3C8E906
 53 | #define UT_XOR_RND1		187678878   // B2FC09E
 54 | #define UT_XOR_RND2		143537923   // 88E3703
 55 | 
 56 | /** Seed value of ut_rnd_gen_ulint() */
 57 | ulint	 ut_rnd_ulint_counter = 65654363;
 58 | 
 59 | /** Wakeup any waiting thread(s). */
 60 | 
 61 | void lock_signal(void)
 62 | {
 63 | 	unsigned long version = *((volatile unsigned long *) &ev_generation);
 64 | 
 65 | 	
 66 | 	*((volatile unsigned long *) &ev_generation) = (version + 1);
 67 | }
 68 | 
 69 | /** Try and acquire the lock using TestAndSet.
 70 |     @return true if lock succeeded */
 71 | int tas_lock(uint64_t *lock)
 72 | {
 73 | #if defined(__aarch64__) && !defined(USE_BUILTIN)
 74 | 
 75 | 	uint64_t lockValue;
 76 | 
 77 | 	__asm__ __volatile__ ("ldaxr %[lockValue],[%[lockAddr]]"
 78 | 			: [lockValue] "=r" (lockValue)
 79 | 			: [lockAddr] "r" (lock)
 80 | 			: "memory");
 81 | 	if (lockValue != MUTEX_STATE_UNLOCKED)
 82 | 		return 0;
 83 | 
 84 | 	uint32_t exResult;
 85 | 
 86 | 	__asm__ __volatile__ ("stxr %w[exResult], %[lockValue], [%[lockAddr]]"
 87 | 			: [exResult] "=&r" (exResult)
 88 | 			: [lockAddr] "r" (lock), [lockValue] "r" ((long) MUTEX_STATE_LOCKED)
 89 | 			: "memory");
 90 | 
 91 | 	return exResult == 0;
 92 | #else
 93 | 	return(swap64(lock, MUTEX_STATE_LOCKED)
 94 | 			== MUTEX_STATE_UNLOCKED);
 95 | #endif
 96 | }
 97 | 
 98 | /** In theory __sync_lock_release should be used to release the lock.
 99 |     Unfortunately, it does not work properly alone. The workaround is
100 |     that more conservative __sync_lock_test_and_set is used instead. */
101 | void tas_unlock(uint64_t *lock)
102 | {
103 | #if defined(__aarch64__) && !defined(USE_BUILTIN)
104 | 	__asm__ __volatile__ ("stlr %[lockValue],[%[lockAddr]]"
105 | 			:
106 | 			: [lockAddr] "r" (lock), [lockValue] "r" ((long) MUTEX_STATE_UNLOCKED)
107 | 			: "memory");
108 | 	os_wmb;
109 | #else
110 | 	swap64(lock, MUTEX_STATE_UNLOCKED);
111 | #endif
112 | }
113 | 
114 | 
115 | 
116 | /********************************************************//**
117 | The following function generates a series of 'random' ulint integers.
118 | @return the next 'random' number */
119 | static inline
120 | ulint
121 | ut_rnd_gen_next_ulint(
122 | /*==================*/
123 | 	ulint	rnd)	/*!< in: the previous random number value */
124 | {
125 | 	ulint	n_bits;
126 | 
127 | 	n_bits = 8 * sizeof(ulint);
128 | 
129 | 	rnd = UT_RND2 * rnd + UT_SUM_RND3;
130 | 	rnd = UT_XOR_RND1 ^ rnd;
131 | 	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
132 | 	rnd = UT_RND3 * rnd + UT_SUM_RND4;
133 | 	rnd = UT_XOR_RND2 ^ rnd;
134 | 	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
135 | 	rnd = UT_RND1 * rnd + UT_SUM_RND2;
136 | 
137 | 	return(rnd);
138 | }
139 | 
140 | /********************************************************//**
141 | The following function generates 'random' ulint integers which
142 | enumerate the value space of ulint integers in a pseudo random
143 | fashion. Note that the same integer is repeated always after
144 | 2 to power 32 calls to the generator (if ulint is 32-bit).
145 | @return the 'random' number */
146 | static inline ulint
147 | ut_rnd_gen_ulint(void)
148 | /*==================*/
149 | {
150 | 	ulint	rnd;
151 | 
152 | 	ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
153 | 
154 | 	rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
155 | 
156 | 	return(rnd);
157 | }
158 | 
159 | /********************************************************//**
160 | Generates a random integer from a given interval.
161 | @return the 'random' number */
162 | ulint
163 | ut_rnd_interval(
164 | /*============*/
165 | 	ulint	low,	/*!< in: low limit; can generate also this value */
166 | 	ulint	high)	/*!< in: high limit; can generate also this value */
167 | {
168 | 	ulint	rnd;
169 | 
170 | 	if (low == high) {
171 | 
172 | 		return(low);
173 | 	}
174 | 
175 | 	rnd = ut_rnd_gen_ulint();
176 | 
177 | 	return(low + (rnd % (high - low)));
178 | }
179 | 
180 | ulint
181 | ut_delay(
182 | /*=====*/
183 | 	ulint	delay)	/*!< in: delay in microseconds on 100 MHz Pentium */
184 | {
185 | 	ulint	i, j;
186 | 
187 | 	j = 0;
188 | 
189 | 	for (i = 0; i < delay * 50; i++) {
190 | 		j += i;
191 | 		UT_RELAX_CPU();
192 | 	}
193 | 
194 | 	return(j);
195 | }
196 | 
197 | 	/** @return true if locked by some thread */
198 | 	int is_locked(uint64_t *lock)
199 | 	{
200 | 		return(*lock != MUTEX_STATE_UNLOCKED);
201 | 	}
202 | 
203 | 	/** Spin and wait for the mutex to become free.
204 | 	@param[in]	max_spins	max spins
205 | 	@param[in]	max_delay	max delay per spin
206 | 	@param[in,out]	n_spins		spin start index
207 | 	@return true if unlocked */
208 | 	int is_free(
209 | 		uint64_t	*lock,
210 | 		uint32_t	max_spins,
211 | 		uint32_t	max_delay,
212 | 		uint32_t	*n_spins)
213 | 	{
214 | 		/* Spin waiting for the lock word to become zero. Note
215 | 		that we do not have to assume that the read access to
216 | 		the lock word is atomic, as the actual locking is always
217 | 		committed with atomic test-and-set. In reality, however,
218 | 		all processors probably have an atomic read of a memory word. */
219 | 
220 | 		do {
221 | 			if (!is_locked(lock)) {
222 | 				return(1);
223 | 			}
224 | 
225 | 			ut_delay(ut_rnd_interval(0, max_delay));
226 | 
227 | 			++(*n_spins);
228 | 
229 | 		} while (*n_spins < max_spins);
230 | 
231 | 		return(0);
232 | 	}
233 | 
234 | void event_mutex_init(uint64_t *lock, uint64_t threads) {
235 | 	*lock = MUTEX_STATE_UNLOCKED;
236 | }
237 | 
238 | 	/** Try and lock the mutex. Note: POSIX returns 0 on success.
239 | 	@return true on success */
240 | 	int try_lock(uint64_t *lock)
241 | 	{
242 | 		return(tas_lock(lock));
243 | 	}
244 | 
245 | 	/** Release the mutex. */
246 | 	void lock_exit(uint64_t *lock)
247 | 	{
248 | 		/* A problem: we assume that mutex_reset_lock word
249 | 		is a memory barrier, that is when we read the waiters
250 | 		field next, the read must be serialized in memory
251 | 		after the reset. A speculative processor might
252 | 		perform the read first, which could leave a waiting
253 | 		thread hanging indefinitely.
254 | 
255 | 		Our current solution call every second
256 | 		sync_arr_wake_threads_if_sema_free()
257 | 		to wake up possible hanging threads if they are missed
258 | 		in mutex_signal_object. */
259 | 
260 | 		tas_unlock(lock);
261 | 
262 | 		lock_signal();
263 | 	}
264 | 
265 | 	/** Spin while trying to acquire the mutex
266 | 	@param[in]	max_spins	max number of spins
267 | 	@param[in]	max_delay	max delay per spin
268 | 	@param[in]	filename	from where called
269 | 	@param[in]	line		within filename */
270 | 	unsigned long spin_and_try_lock(
271 | 		uint64_t	*lock,
272 | 		uint32_t	max_spins,
273 | 		uint32_t	max_delay)
274 | 	{
275 | 		uint32_t	n_spins = 0;
276 | 		uint32_t	n_waits = 0;
277 | 		const uint32_t	step = max_spins;
278 | 		unsigned long	wait_state;
279 | 
280 | 		os_rmb;
281 | 
282 | 		for (;;) {
283 | 
284 | 			/* If the lock was free then try and acquire it. */
285 | 
286 | 			if (is_free(lock, max_spins, max_delay, &n_spins)) {
287 | 
288 | 				if (try_lock(lock)) {
289 | 
290 | 					break;
291 | 				} else {
292 | 
293 | 					continue;
294 | 				}
295 | 
296 | 			} else {
297 | 				max_spins = n_spins + step;
298 | 			}
299 | 
300 | 			++n_waits;
301 | 
302 | 			wait_state = *((volatile unsigned long *) &ev_generation);
303 | 
304 | 			// Try lock one last time to avoid race with releaser
305 | 			if (try_lock(lock)) {
306 | 				break;
307 | 			}
308 | 
309 | 			// Spin until generation changes
310 | 			while(*((volatile unsigned long *) &ev_generation) == wait_state);
311 | 		}
312 | 
313 | 		return n_spins;
314 | 	}
315 | 
316 | 
317 | 	/** Acquire the mutex.
318 | 	@param[in]	max_spins	max number of spins
319 | 	@param[in]	max_delay	max delay per spin
320 | 	@param[in]	filename	from where called
321 | 	@param[in]	line		within filename */
322 | 	unsigned long lock_enter(uint64_t *lock,
323 | 		uint32_t	max_spins,
324 | 		uint32_t	max_delay)
325 | 	{
326 | 		if (!try_lock(lock)) {
327 | 			return spin_and_try_lock(lock, max_spins, max_delay);
328 | 		}
329 | 
330 | 		return 0;
331 | 	}
332 | 
333 | 
334 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
335 | 	return lock_enter(lock, 30, 200);
336 | }
337 | 
338 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
339 | 	lock_exit(lock);
340 | }
341 | 
342 | /* vim: set tabstop=8 shiftwidth=8 softtabstop=8 noexpandtab: */
343 | 


--------------------------------------------------------------------------------
/ext/mysql/event_mutex.h:
--------------------------------------------------------------------------------
  1 | /*****************************************************************************
  2 | 
  3 | Copyright (c) 2013, 2017, Oracle and/or its affiliates. All Rights Reserved.
  4 | Copyright (c) 2017, The Linux Foundation. All rights reserved.
  5 | 
  6 | This program is free software; you can redistribute it and/or modify it under
  7 | the terms of the GNU General Public License as published by the Free Software
  8 | Foundation; version 2 of the License.
  9 | 
 10 | This program is distributed in the hope that it will be useful, but WITHOUT
 11 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
 12 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
 13 | 
 14 | You should have received a copy of the GNU General Public License along with
 15 | this program; if not, write to the Free Software Foundation, Inc.,
 16 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
 17 | 
 18 | *****************************************************************************/
 19 | 
 20 | /* Based on MySQL 5.7 */
 21 | #ifdef initialize_lock
 22 | #undef initialize_lock
 23 | #endif
 24 | 
 25 | #define initialize_lock(lock, pinorder, threads) event_mutex_init(lock, threads)
 26 | 
 27 | #include "atomics.h"
 28 | #include "ut_atomics.h"
 29 | 
 30 | unsigned long ev_generation = 0;
 31 | 
 32 | typedef unsigned long ulint;
 33 | 
 34 | /** Mutex states. */
 35 | enum mutex_state_t {
 36 | 	/** Mutex is free */
 37 | 	MUTEX_STATE_UNLOCKED = 0,
 38 | 
 39 | 	/** Mutex is acquired by some thread. */
 40 | 	MUTEX_STATE_LOCKED = 1,
 41 | 
 42 | 	/** Mutex is contended and there are threads waiting on the lock. */
 43 | 	MUTEX_STATE_WAITERS = 2
 44 | };
 45 | 
 46 | #define UT_RND1			151117737   // 901DFA9
 47 | #define UT_RND2			119785373   // 723C79D
 48 | #define UT_RND3			 85689495   // 51B8497
 49 | #define UT_RND4			 76595339   // 490C08B
 50 | #define UT_SUM_RND2		 98781234   // 5E34832
 51 | #define UT_SUM_RND3		126792457   // 78EB309
 52 | #define UT_SUM_RND4		 63498502   // 3C8E906
 53 | #define UT_XOR_RND1		187678878   // B2FC09E
 54 | #define UT_XOR_RND2		143537923   // 88E3703
 55 | 
 56 | /** Seed value of ut_rnd_gen_ulint() */
 57 | ulint	 ut_rnd_ulint_counter = 65654363;
 58 | 
 59 | /** Wakeup any waiting thread(s). */
 60 | 
 61 | void lock_signal(void)
 62 | {
 63 | 	unsigned long version = *((volatile unsigned long *) &ev_generation);
 64 | 
 65 | 	
 66 | 	*((volatile unsigned long *) &ev_generation) = (version + 1);
 67 | }
 68 | 
 69 | 	/** Try and acquire the lock using TestAndSet.
 70 | 	@return	true if lock succeeded */
 71 | 	int tas_lock(uint64_t *lock)
 72 | 	{
 73 | 		return(swap64(lock, MUTEX_STATE_LOCKED)
 74 | 			== MUTEX_STATE_UNLOCKED);
 75 | 	}
 76 | 
 77 | 	/** In theory __sync_lock_release should be used to release the lock.
 78 | 	Unfortunately, it does not work properly alone. The workaround is
 79 | 	that more conservative __sync_lock_test_and_set is used instead. */
 80 | 	void tas_unlock(uint64_t *lock)
 81 | 	{
 82 | 		swap64(lock, MUTEX_STATE_UNLOCKED);
 83 | 	}
 84 | 
 85 | 
 86 | 
 87 | /********************************************************//**
 88 | The following function generates a series of 'random' ulint integers.
 89 | @return the next 'random' number */
 90 | static inline
 91 | ulint
 92 | ut_rnd_gen_next_ulint(
 93 | /*==================*/
 94 | 	ulint	rnd)	/*!< in: the previous random number value */
 95 | {
 96 | 	ulint	n_bits;
 97 | 
 98 | 	n_bits = 8 * sizeof(ulint);
 99 | 
100 | 	rnd = UT_RND2 * rnd + UT_SUM_RND3;
101 | 	rnd = UT_XOR_RND1 ^ rnd;
102 | 	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
103 | 	rnd = UT_RND3 * rnd + UT_SUM_RND4;
104 | 	rnd = UT_XOR_RND2 ^ rnd;
105 | 	rnd = (rnd << 20) + (rnd >> (n_bits - 20));
106 | 	rnd = UT_RND1 * rnd + UT_SUM_RND2;
107 | 
108 | 	return(rnd);
109 | }
110 | 
111 | /********************************************************//**
112 | The following function generates 'random' ulint integers which
113 | enumerate the value space of ulint integers in a pseudo random
114 | fashion. Note that the same integer is repeated always after
115 | 2 to power 32 calls to the generator (if ulint is 32-bit).
116 | @return the 'random' number */
117 | static inline ulint
118 | ut_rnd_gen_ulint(void)
119 | /*==================*/
120 | {
121 | 	ulint	rnd;
122 | 
123 | 	ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
124 | 
125 | 	rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter);
126 | 
127 | 	return(rnd);
128 | }
129 | 
130 | /********************************************************//**
131 | Generates a random integer from a given interval.
132 | @return the 'random' number */
133 | ulint
134 | ut_rnd_interval(
135 | /*============*/
136 | 	ulint	low,	/*!< in: low limit; can generate also this value */
137 | 	ulint	high)	/*!< in: high limit; can generate also this value */
138 | {
139 | 	ulint	rnd;
140 | 
141 | 	if (low == high) {
142 | 
143 | 		return(low);
144 | 	}
145 | 
146 | 	rnd = ut_rnd_gen_ulint();
147 | 
148 | 	return(low + (rnd % (high - low)));
149 | }
150 | 
151 | ulint
152 | ut_delay(
153 | /*=====*/
154 | 	ulint	delay)	/*!< in: delay in microseconds on 100 MHz Pentium */
155 | {
156 | 	ulint	i, j;
157 | 
158 | 	j = 0;
159 | 
160 | 	for (i = 0; i < delay * 50; i++) {
161 | 		j += i;
162 | 		UT_RELAX_CPU();
163 | 	}
164 | 
165 | 	return(j);
166 | }
167 | 
168 | 	/** @return true if locked by some thread */
169 | 	int is_locked(uint64_t *lock)
170 | 	{
171 | 		return(*lock != MUTEX_STATE_UNLOCKED);
172 | 	}
173 | 
174 | 	/** Spin and wait for the mutex to become free.
175 | 	@param[in]	max_spins	max spins
176 | 	@param[in]	max_delay	max delay per spin
177 | 	@param[in,out]	n_spins		spin start index
178 | 	@return true if unlocked */
179 | 	int is_free(
180 | 		uint64_t	*lock,
181 | 		uint32_t	max_spins,
182 | 		uint32_t	max_delay,
183 | 		uint32_t	*n_spins)
184 | 	{
185 | 		/* Spin waiting for the lock word to become zero. Note
186 | 		that we do not have to assume that the read access to
187 | 		the lock word is atomic, as the actual locking is always
188 | 		committed with atomic test-and-set. In reality, however,
189 | 		all processors probably have an atomic read of a memory word. */
190 | 
191 | 		do {
192 | 			if (!is_locked(lock)) {
193 | 				return(1);
194 | 			}
195 | 
196 | 			ut_delay(ut_rnd_interval(0, max_delay));
197 | 
198 | 			++(*n_spins);
199 | 
200 | 		} while (*n_spins < max_spins);
201 | 
202 | 		return(0);
203 | 	}
204 | 
205 | void event_mutex_init(uint64_t *lock, uint64_t threads) {
206 | 	*lock = MUTEX_STATE_UNLOCKED;
207 | }
208 | 
209 | 	/** Try and lock the mutex. Note: POSIX returns 0 on success.
210 | 	@return true on success */
211 | 	int try_lock(uint64_t *lock)
212 | 	{
213 | 		return(tas_lock(lock));
214 | 	}
215 | 
216 | 	/** Release the mutex. */
217 | 	void lock_exit(uint64_t *lock)
218 | 	{
219 | 		/* A problem: we assume that mutex_reset_lock word
220 | 		is a memory barrier, that is when we read the waiters
221 | 		field next, the read must be serialized in memory
222 | 		after the reset. A speculative processor might
223 | 		perform the read first, which could leave a waiting
224 | 		thread hanging indefinitely.
225 | 
226 | 		Our current solution call every second
227 | 		sync_arr_wake_threads_if_sema_free()
228 | 		to wake up possible hanging threads if they are missed
229 | 		in mutex_signal_object. */
230 | 
231 | 		tas_unlock(lock);
232 | 
233 | 		lock_signal();
234 | 	}
235 | 
236 | 	/** Spin while trying to acquire the mutex
237 | 	@param[in]	max_spins	max number of spins
238 | 	@param[in]	max_delay	max delay per spin
239 | 	@param[in]	filename	from where called
240 | 	@param[in]	line		within filename */
241 | 	unsigned long spin_and_try_lock(
242 | 		uint64_t	*lock,
243 | 		uint32_t	max_spins,
244 | 		uint32_t	max_delay)
245 | 	{
246 | 		uint32_t	n_spins = 0;
247 | 		uint32_t	n_waits = 0;
248 | 		const uint32_t	step = max_spins;
249 | 		unsigned long	wait_state;
250 | 
251 | 		os_rmb;
252 | 
253 | 		for (;;) {
254 | 
255 | 			/* If the lock was free then try and acquire it. */
256 | 
257 | 			if (is_free(lock, max_spins, max_delay, &n_spins)) {
258 | 
259 | 				if (try_lock(lock)) {
260 | 
261 | 					break;
262 | 				} else {
263 | 
264 | 					continue;
265 | 				}
266 | 
267 | 			} else {
268 | 				max_spins = n_spins + step;
269 | 			}
270 | 
271 | 			++n_waits;
272 | 
273 | 			wait_state = *((volatile unsigned long *) &ev_generation);
274 | 
275 | 			// Try lock one last time to avoid race with releaser
276 | 			if (try_lock(lock)) {
277 | 				break;
278 | 			}
279 | 
280 | 			// Spin until generation changes
281 | 			while(*((volatile unsigned long *) &ev_generation) == wait_state);
282 | 		}
283 | 
284 | 		return n_spins;
285 | 	}
286 | 
287 | 
288 | 	/** Acquire the mutex.
289 | 	@param[in]	max_spins	max number of spins
290 | 	@param[in]	max_delay	max delay per spin
291 | 	@param[in]	filename	from where called
292 | 	@param[in]	line		within filename */
293 | 	unsigned long lock_enter(uint64_t *lock,
294 | 		uint32_t	max_spins,
295 | 		uint32_t	max_delay)
296 | 	{
297 | 		if (!try_lock(lock)) {
298 | 			return spin_and_try_lock(lock, max_spins, max_delay);
299 | 		}
300 | 
301 | 		return 0;
302 | 	}
303 | 
304 | 
305 | static inline unsigned long lock_acquire (uint64_t *lock, unsigned long threadnum) {
306 | 	return lock_enter(lock, 30, 600);
307 | }
308 | 
309 | static inline void lock_release (uint64_t *lock, unsigned long threadnum) {
310 | 	lock_exit(lock);
311 | }
312 | 


--------------------------------------------------------------------------------
/ext/mysql/include/ut_atomics.h:
--------------------------------------------------------------------------------
 1 | /*****************************************************************************
 2 | 
 3 | Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
 4 | 
 5 | This program is free software; you can redistribute it and/or modify it under
 6 | the terms of the GNU General Public License as published by the Free Software
 7 | Foundation; version 2 of the License.
 8 | 
 9 | This program is distributed in the hope that it will be useful, but WITHOUT
10 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12 | 
13 | You should have received a copy of the GNU General Public License along with
14 | this program; if not, write to the Free Software Foundation, Inc.,
15 | 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
16 | 
17 | *****************************************************************************/
18 | 
19 | #define os_rmb __atomic_thread_fence(__ATOMIC_ACQUIRE)
20 | #define os_wmb __atomic_thread_fence(__ATOMIC_RELEASE)
21 | 
22 | #include "cpu_relax.h"
23 | 
24 | #define UT_RELAX_CPU() __cpu_relax()
25 | 


--------------------------------------------------------------------------------
/ext/pagemap/include/pagemap.h:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * SPDX-License-Identifier: BSD-3-Clause
 3 |  * SPDX-FileCopyrightText: Copyright 2020 Ciro Santilli
 4 |  *
 5 |  * pagemap.h is retrieved from
 6 |  * https://raw.githubusercontent.com/cirosantilli/linux-kernel-module-cheat/master/lkmc/pagemap.h
 7 |  */
 8 | 
 9 | /* https://cirosantilli.com/linux-kernel-module-cheat#userland-physical-address-experiments
10 |  * https://cirosantilli.com/linux-kernel-module-cheat#pagemap-dump-out
11 |  *
12 |  * This file is dual licensed as both 3-Clause BSD and GPLv3.
13 |  */
14 | 
15 | #ifndef LKMC_PAGEMAP_H
16 | #define LKMC_PAGEMAP_H
17 | 
18 | #define _XOPEN_SOURCE 700
19 | #include <fcntl.h> /* open */
20 | #include <stdint.h> /* uint64_t  */
21 | #include <stdio.h> /* snprintf */
22 | #include <sys/types.h>
23 | #include <unistd.h> /* pread, sysconf */
24 | 
25 | /* Format documented at:
26 |  * https://github.com/torvalds/linux/blob/v4.9/Documentation/vm/pagemap.txt
27 |  */
28 | typedef struct {
29 |     uint64_t pfn : 55;
30 |     unsigned int soft_dirty : 1;
31 |     unsigned int file_page : 1;
32 |     unsigned int swapped : 1;
33 |     unsigned int present : 1;
34 | } LkmcPagemapEntry;
35 | 
36 | /* Parse the pagemap entry for the given virtual address.
37 |  *
38 |  * @param[out] entry      the parsed entry
39 |  * @param[in]  pagemap_fd file descriptor to an open /proc/pid/pagemap file
40 |  * @param[in]  vaddr      virtual address to get entry for
41 |  * @return                0 for success, 1 for failure
42 |  */
43 | int lkmc_pagemap_get_entry(LkmcPagemapEntry *entry, int pagemap_fd, uintptr_t vaddr) {
44 |     size_t nread;
45 |     ssize_t ret;
46 |     uint64_t data;
47 |     uintptr_t vpn;
48 | 
49 |     vpn = vaddr / sysconf(_SC_PAGE_SIZE);
50 |     nread = 0;
51 |     while (nread < sizeof(data)) {
52 |         ret = pread(
53 |             pagemap_fd,
54 |             ((uint8_t*)&data) + nread,
55 |             sizeof(data) - nread,
56 |             vpn * sizeof(data) + nread
57 |         );
58 |         nread += ret;
59 |         if (ret <= 0) {
60 |             return 1;
61 |         }
62 |     }
63 |     entry->pfn = data & (((uint64_t)1 << 55) - 1);
64 |     entry->soft_dirty = (data >> 55) & 1;
65 |     entry->file_page = (data >> 61) & 1;
66 |     entry->swapped = (data >> 62) & 1;
67 |     entry->present = (data >> 63) & 1;
68 |     return 0;
69 | }
70 | 
71 | /* Convert the given virtual address to physical using /proc/PID/pagemap.
72 |  *
73 |  * @param[out] paddr physical address
74 |  * @param[in]  pid   process to convert for
75 |  * @param[in]  vaddr virtual address to get entry for
76 |  * @return           0 for success, 1 for failure
77 |  */
78 | int lkmc_pagemap_virt_to_phys_user(uintptr_t *paddr, pid_t pid, uintptr_t vaddr) {
79 |     char pagemap_file[BUFSIZ];
80 |     int pagemap_fd;
81 | 
82 |     snprintf(pagemap_file, sizeof(pagemap_file), "/proc/%ju/pagemap", (uintmax_t)pid);
83 |     pagemap_fd = open(pagemap_file, O_RDONLY);
84 |     if (pagemap_fd < 0) {
85 |         return 1;
86 |     }
87 |     LkmcPagemapEntry entry;
88 |     if (lkmc_pagemap_get_entry(&entry, pagemap_fd, vaddr)) {
89 |         return 1;
90 |     }
91 |     close(pagemap_fd);
92 |     *paddr = (entry.pfn * sysconf(_SC_PAGE_SIZE)) + (vaddr % sysconf(_SC_PAGE_SIZE));
93 |     return 0;
94 | }
95 | 
96 | #endif
97 | 


--------------------------------------------------------------------------------
/ext/sms/base/build_config.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 ARM Limited. All rights reserved.
 2 | // SPDX-License-Identifier: BSD-3-Clause
 3 | 
 4 | #pragma once
 5 | 
 6 | // Architecture detection is inferred from the toolchain. This relies on
 7 | // the C compiler's system-specific macros.
 8 | #if defined(__aarch64__)
 9 | #define CONFIG_ARCH_ARM_V8
10 | #define CONFIG_ARCH_64BIT
11 | #elif defined(__arm__)
12 | #define CONFIG_ARCH_ARM_V7
13 | #define CONFIG_ARCH_32BIT
14 | #elif defined(__x86_64__)
15 | #define CONFIG_ARCH_X86_64
16 | #define CONFIG_ARCH_64BIT
17 | #elif defined(__i386__)
18 | #define CONFIG_ARCH_X86
19 | #define CONFIG_ARCH_32BIT
20 | #endif
21 | 
22 | #if !defined(CONFIG_ARCH_64BIT) && !defined(CONFIG_ARCH_32BIT)
23 | #error Please add support for N-bit computing to build_config.h
24 | // If you experience this C pre-processor error, take a look at the place
25 | // in this file where CONFIG_ARCH_64/32BIT are defined. If there are no issues
26 | // there and you are needing to add support for a new N-bit processor, please
27 | // search the source code for all occurances of CONFIG_ARCH_64BIT and
28 | // CONFIG_ARCH_32BIT to check whether further modification is necessary.
29 | // These places will not necessarily #error for unsupported N-bit computing.
30 | #endif
31 | 
32 | // OS detection is also inferred from the toolchain.
33 | #if defined(__APPLE__)
34 | #define OS_MACOSX 1
35 | #elif defined(__linux__)
36 | #define OS_LINUX 1
37 | #elif defined(__FreeBSD__)
38 | #define OS_FREEBSD 1
39 | #endif
40 | 
41 | #if defined(OS_MACOSX) || defined(OS_LINUX) || defined(OS_FREEBSD)
42 | #define OS_POSIX 1
43 | #endif
44 | 
45 | #define MAX_THREADS 32
46 | 
47 | //Use LL/SC atomic primitives instead of __atomic_compare_exchange built-ins
48 | //This seems to be the most performant option on ARM but may violate
49 | //recommendations by the ARM architecture (e.g. no memory accesses between
50 | //LL and SC)
51 | //USE_LLSC overrides the use of __atomic_compare_exchange
52 | #ifdef __ARM_ARCH
53 | #define USE_LLSC
54 | #endif
55 | 
56 | //Use barrier + relaxed store (DMB;STR) instead of store-release (STRL)
57 | //This is more performant on Cortex-A57 and possibly also on Cortex-A53
58 | #if defined(__aarch64__)
59 | #define USE_DMB
60 | #endif
61 | 
62 | #if defined(USE_DMB) && defined(__arm__)
63 | #error USE_DMB optimization only applies to select ARMv8 processors
64 | #endif
65 | 
66 | //Use ARM wait-for-event mechanism when busy polling
67 | //This will minimise interconnect transactions and often increase system-wide
68 | //performance
69 | #if defined __ARM_ARCH
70 | #define USE_WFE
71 | #if defined(__arm__)
72 | //TODO: WFE on ARMv7
73 | #undef USE_WFE
74 | #endif
75 | #endif
76 | 


--------------------------------------------------------------------------------
/ext/sms/base/cpu.h:
--------------------------------------------------------------------------------
 1 | // Copyright (c) 2017 ARM Limited. All rights reserved.
 2 | // SPDX-License-Identifier: BSD-3-Clause
 3 | 
 4 | #pragma once
 5 | 
 6 | #ifndef CACHE_LINE
 7 | // Default CPU cache line size
 8 | #define CACHE_LINE 128
 9 | #endif
10 | 
11 | #include "cpu_relax.h"
12 | 
13 | static inline void doze(void)
14 | {
15 |     __cpu_relax();
16 | }
17 | 
18 | int num_cpus(void);
19 | 
20 | unsigned long cpu_hz(void);
21 | 


--------------------------------------------------------------------------------
/ext/sms/base/llsc.h:
--------------------------------------------------------------------------------
  1 | // Copyright (c) 2017 ARM Limited. All rights reserved.
  2 | // SPDX-License-Identifier: BSD-3-Clause
  3 | 
  4 | #pragma once
  5 | 
  6 | #include "build_config.h"
  7 | #include "cpu.h"
  8 | 
  9 | #include <stdint.h>
 10 | #include <stdlib.h>
 11 | 
 12 | /******************************************************************************
 13 |  * LL/SC primitives
 14 |  *****************************************************************************/
 15 | 
 16 | #if __ARM_ARCH == 7 || (__ARM_ARCH == 8 && __ARM_64BIT_STATE == 0)
 17 | 
 18 | static inline void dmb()
 19 | {
 20 |     __asm volatile("dmb" : : : "memory");
 21 | }
 22 | 
 23 | static inline uint8_t ll8(uint8_t *var, int mm)
 24 | {
 25 |     uint8_t old;
 26 |     __asm volatile("ldrexb %0, [%1]"
 27 |                    : "=&r" (old)
 28 |                    : "r" (var)
 29 |                    : );
 30 |     if (mm == __ATOMIC_ACQUIRE)
 31 |         dmb();
 32 |     return old;
 33 | }
 34 | 
 35 | static inline uint32_t ll(uint32_t *var, int mm)
 36 | {
 37 |     uint32_t old;
 38 |     __asm volatile("ldrex %0, [%1]"
 39 |                    : "=&r" (old)
 40 |                    : "r" (var)
 41 |                    : );
 42 |     //Barrier after an acquiring load
 43 |     if (mm == __ATOMIC_ACQUIRE)
 44 |         dmb();
 45 |     return old;
 46 | }
 47 | #define ll32(a, b) ll((a), (b))
 48 | 
 49 | //Return 0 on success, 1 on failure
 50 | static inline uint32_t sc(uint32_t *var, uint32_t neu, int mm)
 51 | {
 52 |     uint32_t ret;
 53 |     //Barrier before a releasing store
 54 |     if (mm == __ATOMIC_RELEASE)
 55 |         dmb();
 56 |     __asm volatile("strex %0, %1, [%2]"
 57 |                    : "=&r" (ret)
 58 |                    : "r" (neu), "r" (var)
 59 |                    : );
 60 |     return ret;
 61 | }
 62 | #define sc32(a, b, c) sc((a), (b), (c))
 63 | 
 64 | static inline uint64_t lld(uint64_t *var, int mm)
 65 | {
 66 |     uint64_t old;
 67 |     __asm volatile("ldrexd %0, %H0, [%1]"
 68 |                    : "=&r" (old)
 69 |                    : "r" (var)
 70 |                    : );
 71 |     //Barrier after an acquiring load
 72 |     if (mm == __ATOMIC_ACQUIRE)
 73 |         dmb();
 74 |     return old;
 75 | }
 76 | #define ll64(a, b) lld((a), (b))
 77 | 
 78 | //Return 0 on success, 1 on failure
 79 | static inline uint32_t scd(uint64_t *var, uint64_t neu, int mm)
 80 | {
 81 |     uint32_t ret;
 82 |     //Barrier before a releasing store
 83 |     if (mm == __ATOMIC_RELEASE)
 84 |         dmb();
 85 |     __asm volatile("strexd %0, %1, %H1, [%2]"
 86 |                    : "=&r" (ret)
 87 |                    : "r" (neu), "r" (var)
 88 |                    : );
 89 |     return ret;
 90 | }
 91 | #define sc64(a, b, c) scd((a), (b), (c))
 92 | 
 93 | #endif
 94 | 
 95 | #if __ARM_ARCH == 8 && __ARM_64BIT_STATE == 1
 96 | 
 97 | static inline uint8_t ll8(uint8_t *var, int mm)
 98 | {
 99 |     uint8_t old;
100 |     if (mm == __ATOMIC_ACQUIRE)
101 |     __asm volatile("ldaxrb %w0, [%1]"
102 |                    : "=&r" (old)
103 |                    : "r" (var)
104 |                    : "memory");
105 |     else if (mm == __ATOMIC_RELAXED)
106 |     __asm volatile("ldxrb %w0, [%1]"
107 |                    : "=&r" (old)
108 |                    : "r" (var)
109 |                    : );
110 |     else
111 |         abort();
112 |     return old;
113 | }
114 | 
115 | static inline uint16_t ll16(uint16_t *var, int mm)
116 | {
117 |     uint16_t old;
118 |     if (mm == __ATOMIC_ACQUIRE)
119 |     __asm volatile("ldaxrh %w0, [%1]"
120 |                    : "=&r" (old)
121 |                    : "r" (var)
122 |                    : "memory");
123 |     else if (mm == __ATOMIC_RELAXED)
124 |     __asm volatile("ldxrh %w0, [%1]"
125 |                    : "=&r" (old)
126 |                    : "r" (var)
127 |                    : );
128 |     else
129 |         abort();
130 |     return old;
131 | }
132 | 
133 | static inline uint32_t ll32(uint32_t *var, int mm)
134 | {
135 |     uint32_t old;
136 |     if (mm == __ATOMIC_ACQUIRE)
137 |     __asm volatile("ldaxr %w0, [%1]"
138 |                    : "=&r" (old)
139 |                    : "r" (var)
140 |                    : "memory");
141 |     else if (mm == __ATOMIC_RELAXED)
142 |     __asm volatile("ldxr %w0, [%1]"
143 |                    : "=&r" (old)
144 |                    : "r" (var)
145 |                    : );
146 |     else
147 |         abort();
148 |     return old;
149 | }
150 | 
151 | //Return 0 on success, 1 on failure
152 | static inline uint8_t sc8(uint8_t *var, uint8_t neu, int mm)
153 | {
154 |     uint8_t ret;
155 |     if (mm == __ATOMIC_RELEASE)
156 |     __asm volatile("stlxrb %w0, %w1, [%2]"
157 |                    : "=&r" (ret)
158 |                    : "r" (neu), "r" (var)
159 |                    : "memory");
160 |     else if (mm == __ATOMIC_RELAXED)
161 |     __asm volatile("stxrb %w0, %w1, [%2]"
162 |                    : "=&r" (ret)
163 |                    : "r" (neu), "r" (var)
164 |                    : );
165 |     else
166 |         abort();
167 |     return ret;
168 | }
169 | 
170 | //Return 0 on success, 1 on failure
171 | static inline uint32_t sc32(uint32_t *var, uint32_t neu, int mm)
172 | {
173 |     uint32_t ret;
174 |     if (mm == __ATOMIC_RELEASE)
175 |     __asm volatile("stlxr %w0, %w1, [%2]"
176 |                    : "=&r" (ret)
177 |                    : "r" (neu), "r" (var)
178 |                    : "memory");
179 |     else if (mm == __ATOMIC_RELAXED)
180 |     __asm volatile("stxr %w0, %w1, [%2]"
181 |                    : "=&r" (ret)
182 |                    : "r" (neu), "r" (var)
183 |                    : );
184 |     else
185 |         abort();
186 |     return ret;
187 | }
188 | 
189 | static inline uint64_t ll(uint64_t *var, int mm)
190 | {
191 |     uint64_t old;
192 |     if (mm == __ATOMIC_ACQUIRE)
193 |     __asm volatile("ldaxr %0, [%1]"
194 |                    : "=&r" (old)
195 |                    : "r" (var)
196 |                    : "memory");
197 |     else if (mm == __ATOMIC_RELAXED)
198 |     __asm volatile("ldxr %0, [%1]"
199 |                    : "=&r" (old)
200 |                    : "r" (var)
201 |                    : );
202 |     else
203 |         abort();
204 |     return old;
205 | }
206 | #define ll64(a, b) ll((a), (b))
207 | 
208 | //Return 0 on success, 1 on failure
209 | static inline uint32_t sc(uint64_t *var, uint64_t neu, int mm)
210 | {
211 |     uint32_t ret;
212 |     if (mm == __ATOMIC_RELEASE)
213 |     __asm volatile("stlxr %w0, %1, [%2]"
214 |                    : "=&r" (ret)
215 |                    : "r" (neu), "r" (var)
216 |                    : "memory");
217 |     else if (mm == __ATOMIC_RELAXED)
218 |     __asm volatile("stxr %w0, %1, [%2]"
219 |                    : "=&r" (ret)
220 |                    : "r" (neu), "r" (var)
221 |                    : );
222 |     else
223 |         abort();
224 |     return ret;
225 | }
226 | #define sc64(a, b, c) sc((a), (b), (c))
227 | 
228 | #if defined(__clang__)
229 | union i128
230 | {
231 |     __int128 i128;
232 |     int64_t  i64[2];
233 | };
234 | #endif
235 | 
236 | static inline __int128 lld(__int128 *var, int mm)
237 | {
238 | #if defined(__clang__)
239 |     union i128 old;
240 |     if (mm == __ATOMIC_ACQUIRE)
241 |     __asm volatile("ldaxp %0, %1, [%2]"
242 |                    : "=&r" (old.i64[0]), "=&r" (old.i64[1])
243 |                    : "r" (var)
244 |                    : "memory");
245 |     else if (mm == __ATOMIC_RELAXED)
246 |     __asm volatile("ldxp %0, %1, [%2]"
247 |                    : "=&r" (old.i64[0]), "=&r" (old.i64[1])
248 |                    : "r" (var)
249 |                    : );
250 |     else
251 |         abort();
252 |     return old.i128;
253 | #else
254 |     __int128 old;
255 |     if (mm == __ATOMIC_ACQUIRE)
256 |     __asm volatile("ldaxp %0, %H0, [%1]"
257 |                    : "=&r" (old)
258 |                    : "r" (var)
259 |                    : "memory");
260 |     else if (mm == __ATOMIC_RELAXED)
261 |     __asm volatile("ldxp %0, %H0, [%1]"
262 |                    : "=&r" (old)
263 |                    : "r" (var)
264 |                    : );
265 |     else
266 |         abort();
267 |     return old;
268 | #endif
269 | }
270 | 
271 | //Return 0 on success, 1 on failure
272 | static inline uint32_t scd(__int128 *var, __int128 neu, int mm)
273 | {
274 | #if defined(__clang__)
275 |     uint32_t ret;
276 |     if (mm == __ATOMIC_RELEASE)
277 |     __asm volatile("stlxp %w0, %1, %2, [%3]"
278 |                    : "=&r" (ret)
279 |                    : "r" (((union i128)neu).i64[0]),
280 |                      "r" (((union i128)neu).i64[1]),
281 |                      "r" (var)
282 |                    : "memory");
283 |     else if (mm == __ATOMIC_RELAXED)
284 |     __asm volatile("stxp %w0, %1, %2, [%3]"
285 |                    : "=&r" (ret)
286 |                    : "r" (((union i128)neu).i64[0]),
287 |                      "r" (((union i128)neu).i64[1]),
288 |                      "r" (var)
289 |                    : );
290 |     else
291 |         abort();
292 |     return ret;
293 | #else
294 |     uint32_t ret;
295 |     if (mm == __ATOMIC_RELEASE)
296 |     __asm volatile("stlxp %w0, %1, %H1, [%2]"
297 |                    : "=&r" (ret)
298 |                    : "r" (neu), "r" (var)
299 |                    : "memory");
300 |     else if (mm == __ATOMIC_RELAXED)
301 |     __asm volatile("stxp %w0, %1, %H1, [%2]"
302 |                    : "=&r" (ret)
303 |                    : "r" (neu), "r" (var)
304 |                    : );
305 |     else
306 |         abort();
307 |     return ret;
308 | #endif
309 | }
310 | #endif
311 | 
312 | static inline void sevl(void)
313 | {
314 | #if defined __ARM_ARCH
315 |     __asm volatile("sevl" : : : );
316 | #endif
317 | }
318 | 
319 | static inline void sev(void)
320 | {
321 | #if defined __ARM_ARCH
322 |     __asm volatile("sev" : : : "memory");
323 | #endif
324 | }
325 | 
326 | static inline int wfe(void)
327 | {
328 | #if defined __ARM_ARCH
329 |     __asm volatile("wfe" : : : "memory");
330 | #endif
331 |     return 1;
332 | }
333 | 
334 | #ifdef USE_WFE
335 | #define SEVL() sevl()
336 | #define WFE() wfe()
337 | #define SEV() do { __asm volatile ("dsb ish" ::: "memory"); sev(); } while(0)
338 | #if __ARM_ARCH == 8 && __ARM_64BIT_STATE == 1
339 | #define LDXR128(addr, mo) lld((addr), (mo))
340 | #endif
341 | #define LDXR64(addr, mo) ll64((addr), (mo))
342 | #define LDXR32(addr, mo) ll32((addr), (mo))
343 | #define LDXR16(addr, mo) ll16((addr), (mo))
344 | #define LDXR8(addr, mo)  ll8((addr), (mo))
345 | #define LDXR(addr, mo)   ll((addr), (mo))
346 | //When using WFE we should not stall the pipeline using other means
347 | #define DOZE() (void)0
348 | #else
349 | #define SEVL() (void)0
350 | #define WFE() 1
351 | #define SEV() (void)0
352 | #define LDXR128(addr, mo) __atomic_load_n((addr), (mo))
353 | #define LDXR64(addr, mo)  __atomic_load_n((addr), (mo))
354 | #define LDXR32(addr, mo)  __atomic_load_n((addr), (mo))
355 | #define LDXR16(addr, mo)  __atomic_load_n((addr), (mo))
356 | #define LDXR8(addr, mo)   __atomic_load_n((addr), (mo))
357 | #define LDXR(addr, mo)    __atomic_load_n((addr), (mo))
358 | #define DOZE() doze()
359 | #endif
360 | 


--------------------------------------------------------------------------------
/ext/sms/clh_spinlock.h:
--------------------------------------------------------------------------------
  1 | /* 
  2 |  * Copyright (c) 2017 ARM Limited. All rights reserved.
  3 |  * SPDX-License-Identifier:    BSD-3-Clause
  4 |  *
  5 |  * Redistribution and use in source and binary forms, with or without
  6 |  * modification, are permitted provided that the following conditions are met:
  7 |  *
  8 |  * Redistributions of source code must retain the above copyright notice, this
  9 |  * list of conditions and the following disclaimer.
 10 |  *
 11 |  * Redistributions in binary form must reproduce the above copyright notice, this
 12 |  * list of conditions and the following disclaimer in the documentation and/or
 13 |  * other materials provided with the distribution.
 14 |  *
 15 |  * Neither the name of ARM Limited nor the names of its contributors may be used
 16 |  * to endorse or promote products derived from this software without specific
 17 |  * prior written permission.
 18 |  *
 19 |  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 20 |  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 21 |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 22 |  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
 23 |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 24 |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 25 |  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
 26 |  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
 27 |  * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 28 |  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 29 |  */
 30 | 
 31 | /*
 32 |  * Arm Shared Memory Synchronization Benchmark (SMS)
 33 |  * commit: 85a4b2456f1c84e2235a527d8b2b69be99621e94
 34 |  * August 6 2018
 35 |  *
 36 |  * Description:
 37 |  * CLH (Craig, Landin, and Hagersten) spinlock is a queue-based spinlock that each
 38 |  * node spins on previous node's wait status. CLH spinlock is starvation-free
 39 |  * and has FCFS (first come, first served) order. Because each thread spins
 40 |  * on the previous node created by another thread, CLH's performance may be
 41 |  * worse than MCS spinlock, which only spins on local memory. However, this
 42 |  * should not be a problem because modern architectures always implement ccNUMA
 43 |  * (cache coherent non-uniform memory architecture) which will coherently cache
 44 |  * remote memory to a local cache-line. The remote memory may not be updated at
 45 |  * all and the changed status will be implicit transferred by interconnect cache
 46 |  * coherence protocols to the spinning core. CLH data structure is an implicit
 47 |  * linked list, the global_clh only contains a cache-line aligned tail pointer
 48 |  * and an initial dummy clh_node. The main disadvantages of CLH spinlock compared
 49 |  * to MCS spinlock are: 1) slower than MCS on cacheless NUMA, 2) hard to implement
 50 |  * wait-free back-off / time-out / abortable / hierarchical spinlock.
 51 |  *
 52 |  * Changes compared to official CLH spinlock
 53 |  * Official CLH spinlock reuses previous released queue node. We used thread-local
 54 |  * pointers to indicate current local node, which is also a thread-local struct.
 55 |  * Therefore each thread may spin at other thread's TLS queue node, and ccNUMA
 56 |  * coherence protocols will cache the remote DRAM to local cache. Overall
 57 |  * performance should be similar to MCS spinlock.
 58 |  *
 59 |  * Internals:
 60 |  * The only LSE instruction is SWPAL which exchanges current node and lock tail.
 61 |  * There is a tunable parameter -w which can be used to disable WFE. All variables
 62 |  * are cache-line aligned. Queue node is implemented with TLS __thread keyword.
 63 |  * New initial clh_thread_local_init() function will initialize all queue nodes.
 64 |  * clh_lock() and clh_unlock() strictly follow the original CLH algorithm. Global
 65 |  * uint64_t lock pointer is unused.
 66 |  *
 67 |  * Workings:
 68 |  * clh_spinlock works similar to osq_lock and queued_spinlock
 69 |  *
 70 |  * Tuning Parameters:
 71 |  *
 72 |  * Optional without_wfe to disable wfe instruction and use empty loops instead.
 73 |  *
 74 |  * [-- [-w]]: disable sevl and wfe
 75 |  *
 76 |  */
 77 | 
 78 | #pragma once
 79 | 
 80 | #include "llsc.h"
 81 | 
 82 | #include <stdbool.h>
 83 | #include <stdint.h>
 84 | #include <unistd.h>
 85 | 
 86 | #ifdef initialize_lock
 87 | #undef initialize_lock
 88 | #endif
 89 | 
 90 | #ifdef parse_test_args
 91 | #undef parse_test_args
 92 | #endif
 93 | 
 94 | #ifdef thread_local_init
 95 | #undef thread_local_init
 96 | #endif
 97 | 
 98 | #define initialize_lock(lock, pinorder, threads) clh_lock_init(lock, threads)
 99 | #define parse_test_args(args, argc, argv) clh_parse_args(args, argc, argv)
100 | #define thread_local_init(smtid) clh_thread_local_init(smtid)
101 | 
102 | 
103 | struct clh_node
104 | {
105 |     struct clh_node *prev;
106 |     unsigned long wait;
107 | } __attribute__ ((aligned (CACHE_LINE)));
108 | 
109 | struct clh_node_pointer
110 | {
111 |     struct clh_node *ptr;
112 | } __attribute__ ((aligned (CACHE_LINE)));
113 | 
114 | struct clh_lock
115 | {
116 |     struct clh_node node;
117 |     unsigned long num_cores;
118 |     struct clh_node *tail __attribute__ ((aligned(CACHE_LINE)));
119 | };
120 | 
121 | static bool without_wfe;
122 | static struct clh_lock global_clh_lock;  // clh lock queue
123 | /*
124 |  * Cannot use __thread thread local storage because some threads
125 |  * may be joined earlier and their node may be referenced by other
126 |  * threads, this will cause memory access violation. We have to
127 |  * use the main thread heap and share a common C array. Two arrays
128 |  * are used here, one is used as a pointer array, which is fixed
129 |  * for each thread. The other is a nodepool, whose node is assigned
130 |  * to each thread according to its threadid initially. Then
131 |  * according to CLH algorithm, current node will reuse its previous
132 |  * node as the next available node. We just update the fixed pointer
133 |  * array to reflect this change. That is, each thread will retrieve
134 |  * its next available node from fixed pointer array by its thread
135 |  * id offset, but the pointer value may point to any node in the
136 |  * CLH nodepool.
137 |  */
138 | static struct clh_node_pointer *clh_nodeptr;  // clh node pointer array
139 | static struct clh_node *clh_nodepool;  // clh node struct array
140 | 
141 | /* additional parameter to enable WFE(default) or disable WFE */
142 | static void clh_parse_args(test_args_t * unused, int argc, char** argv) {
143 |     int i = 0;
144 | #if defined(__aarch64__)
145 |     without_wfe = false;
146 | #else
147 |     /* only aarch64 supports WFE */
148 |     without_wfe = true;
149 | #endif
150 | 
151 |     /* extended options retrieved after '--' operator */
152 |     while ((i = getopt(argc, argv, "w")) != -1)
153 |     {
154 |         switch (i) {
155 |           case 'w':
156 |             without_wfe = true;
157 |             break;
158 | 
159 |           default:
160 |             fprintf(stderr,
161 |                     "clh_spinlock additional options after --:\n"
162 |                     "\t[-h print this msg]\n"
163 |                     "\t[-w without_wfe, aarch64 default is false, non-aarch64 default is true]\n");
164 |             exit(2);
165 |         }
166 |     }
167 | }
168 | 
169 | static inline void clh_lock_init(uint64_t *u64_lock, unsigned long num_cores)
170 | {
171 |     /* default tail node should be set to 0 */
172 |     global_clh_lock.node.prev = NULL;
173 |     global_clh_lock.node.wait = 0;
174 |     global_clh_lock.num_cores = num_cores;
175 |     global_clh_lock.tail = &global_clh_lock.node;
176 | 
177 |     /* save clh_lock pointer to global u64int_t */
178 |     *u64_lock = (uint64_t)&global_clh_lock;
179 | 
180 |     /* calloc will initialize all memory to zero automatically */
181 |     if (clh_nodeptr) free(clh_nodeptr);
182 |     clh_nodeptr = calloc(num_cores, sizeof(struct clh_node_pointer));
183 |     if (clh_nodeptr == NULL) exit(errno);
184 | 
185 | 
186 |     if (clh_nodepool) free(clh_nodepool);
187 |     clh_nodepool = calloc(num_cores, sizeof(struct clh_node));
188 |     if (clh_nodepool == NULL) exit(errno);
189 | 
190 | #ifdef DDEBUG
191 |     printf("CLH: global_clh_lock=%llx\n", (long long unsigned int) &global_clh_lock);
192 | #endif
193 | }
194 | 
195 | static inline void clh_thread_local_init(unsigned long smtid)
196 | {
197 |     /* initialize clh node pointer array individually */
198 |     clh_nodepool[smtid].wait = 1;
199 |     clh_nodeptr[smtid].ptr = &clh_nodepool[smtid];
200 | }
201 | 
202 | static inline void clh_lock(struct clh_lock *lock, struct clh_node *node, bool use_wfe, unsigned long tid)
203 | {
204 |     /* must set wait to 1 first, otherwise next node after new tail will not spin */
205 |     node->wait = 1;
206 |     struct clh_node *prev = node->prev = __atomic_exchange_n(&lock->tail, node, __ATOMIC_ACQ_REL);
207 | #ifdef DDEBUG
208 |     printf("T%lu LOCK: prev<-node: %llx<-%llx\n", tid, (long long unsigned int)prev, (long long unsigned int)node);
209 | #endif
210 | 
211 |     /* CLH spinlock: spinning on previous node's wait status */
212 |     if (use_wfe)
213 |     {
214 |         if (__atomic_load_n(&prev->wait, __ATOMIC_ACQUIRE))
215 |         {
216 |             SEVL();
217 |             while (WFE() && LDXR(&prev->wait, __ATOMIC_ACQUIRE))
218 |             {
219 |                 DOZE();
220 |             }
221 |         }
222 |     }
223 |     else
224 |     {
225 |         while (__atomic_load_n(&prev->wait, __ATOMIC_ACQUIRE))
226 |         {
227 |             ;
228 |         }
229 |     }
230 | }
231 | 
232 | /* return the previous node as reused node for the next clh_lock() */
233 | static inline void clh_unlock(struct clh_node *node, unsigned long tid)
234 | {
235 | #ifdef DDEBUG
236 |     printf("T%lu UNLOCK: node: %llx\n", tid, (long long unsigned int)node);
237 | #endif
238 |     /* CLH spinlock: release current node by resetting wait status */
239 | #ifdef USE_DMB
240 |     __atomic_thread_fence(__ATOMIC_RELEASE);
241 |     __atomic_store_n(&node->wait, 0, __ATOMIC_RELAXED);
242 | #else
243 |     __atomic_store_n(&node->wait, 0, __ATOMIC_RELEASE);
244 | #endif
245 | }
246 | 
247 | /* standard lockhammer lock_acquire and lock_release interfaces */
248 | static unsigned long __attribute__((noinline))
249 | lock_acquire (uint64_t *lock, unsigned long threadnum)
250 | {
251 |     clh_lock(&global_clh_lock, clh_nodeptr[threadnum].ptr, !without_wfe, threadnum);
252 |     return 1;
253 | }
254 | 
255 | static inline void lock_release (uint64_t *lock, unsigned long threadnum)
256 | {
257 |     /*
258 |      * Have to save prev first, once called clh_unlock(), node->prev might
259 |      * be overwritten by another thread and caused two thread use the same
260 |      * nodepool clh_node, therefore generated a circular linked list after
261 |      * another round of lock acquisition.
262 |      */
263 |     struct clh_node* prev = clh_nodeptr[threadnum].ptr->prev;
264 |     clh_unlock(clh_nodeptr[threadnum].ptr, threadnum);
265 |     clh_nodeptr[threadnum].ptr = prev;
266 | }
267 | 
268 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
269 | 


--------------------------------------------------------------------------------
/ext/tbb/include/tbb.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2005-2018 Intel Corporation
  3 | 
  4 |     Licensed under the Apache License, Version 2.0 (the "License");
  5 |     you may not use this file except in compliance with the License.
  6 |     You may obtain a copy of the License at
  7 | 
  8 |         http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |     Unless required by applicable law or agreed to in writing, software
 11 |     distributed under the License is distributed on an "AS IS" BASIS,
 12 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |     See the License for the specific language governing permissions and
 14 |     limitations under the License.
 15 | 
 16 | 
 17 | 
 18 | 
 19 | */
 20 | 
 21 | /*
 22 |  * Based on: 
 23 |  *
 24 |  *      project: github.com/01org/tbb, files:
 25 |  *      tbb/include/tbb/machine/gcc_generic.h,
 26 |  *      tbb/inclide/tbb/machine/linux-intel64.h
 27 |  *
 28 |  * __TBB mappings:
 29 |  *
 30 |  *      Only added logic that is needed for spin_rw_mutex - only support for
 31 |  *      64b wide data (wordsize == 8) and Linux
 32 |  *
 33 |  *      for Aarch64: default is GCC built-ins (based on gcc version),
 34 |  *      alternative: lockhammer local atomics via USE_LOCAL, with or w/o USE_LSE
 35 |  *
 36 |  *      for x86-64: default is lockhammer local atomics (which should be same
 37 |  *      as machine/linux_intel64.h), aternative: GCC built-ins via
 38 |  *      USE_GCC_BUILTINS (based on gcc version)
 39 |  *
 40 |  * For both ISAs, USE_LOCAL has higher priority over USE_GCC_BUILTINS if used
 41 |  * together.
 42 |  */
 43 | 
 44 | 
 45 | #ifndef __TBB_H
 46 | #define __TBB_H
 47 | 
 48 | #define _GNU_SOURCE
 49 | 
 50 | #include "atomics.h"
 51 | #include "cpu_relax.h"
 52 | 
 53 | /* Non default configurations */
 54 | // #define USE_LOCAL
 55 | // #define USE_LSE
 56 | // #define USE_GCC_BUILTINS
 57 | 
 58 | 
 59 | #ifndef NDEBUG
 60 | #pragma message("Using debug build!!")
 61 | #define DBG(fmt,...) \
 62 |     do { fprintf(stderr, "tbb>%s:%d " fmt, \
 63 |         __func__, __LINE__, ##__VA_ARGS__); } while (0);
 64 | 
 65 | #define __TBB_ASSERT(b, msg) \
 66 |     do { if (!(b)) { DBG("Assert: %s\n", msg); exit (1); } } while(0);
 67 | 
 68 | #else  /* NDEBUG */
 69 | 
 70 | #define DBG(fmt, ...) do {} while (0);
 71 | #define __TBB_ASSERT(b, msg) do { } while(0);
 72 | 
 73 | #endif  /* NDEBUG */
 74 | 
 75 | /*
 76 |  * spin; do not yield
 77 |  */
 78 | static inline void machine_pause (int32_t delay) {
 79 |     while(delay>0) {
 80 |         __cpu_relax();
 81 |         delay--;
 82 |     }
 83 | }
 84 | 
 85 | #if defined(USE_LOCAL) || (defined(__x86_64__) && !defined(USE_GCC_BUILTINS))
 86 | #ifndef NDEBUG
 87 | #pragma message("Using lockhammer atomics library!!")
 88 | #endif  /* NDEBUG */
 89 | 
 90 | /*
 91 |  * this really needs to be fetchadd64_release, however we want to be same as
 92 |  * how intel-tbb uses gcc built ins
 93 |  *
 94 |  * The atomics.h is aware of USE_LSE configuration
 95 |  * So no need to do anything here.
 96 |  */
 97 | #define __TBB_machine_cmpswp8(P,V,C)        cas64_acquire_release((unsigned long *) P,V,C)
 98 | #define __TBB_machine_fetchadd8(P,V)        fetchadd64_acquire_release((unsigned long *) P,V)
 99 | #define __TBB_machine_fetchadd8release(P,V) fetchadd64_acquire_release((unsigned long *) P,V)
100 | 
101 | static inline void __TBB_machine_or(volatile void* operand, uint64_t addend) {
102 | #if defined(__x86_64__)
103 |     asm volatile(
104 |             "lock\norq %1,%0"
105 |             : "=m"(*(volatile uint64_t*)operand)
106 |             : "r"(addend), "m"(*(volatile uint64_t*)operand)
107 |             : "memory");
108 | #elif defined(__aarch64__)
109 | #ifndef USE_LSE
110 |     unsigned long old, newval, tmp;
111 |     asm volatile(
112 |             "1: ldaxr  %[old], %[ptr]\n"
113 |             "   orr    %[newval], %[old], %[val]\n"
114 |             "   stlxr  %w[tmp], %[newval], %[ptr]\n"
115 |             "   cbnz   %w[tmp], 1b\n"
116 |             : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval),
117 |               [ptr] "+Q" (*(unsigned long *)operand)
118 |             : [val] "Lr" (addend)
119 |             : );
120 | #else  /* USE_LSE */
121 |     // clobbering addend - to match gcc
122 |     asm volatile(
123 |     "ldsetal   %[val], %[val], %[ptr]\n"
124 |     : [val] "+&r" (addend), [ptr] "+Q" (*(unsigned long *)operand)
125 |     : );
126 | #endif  /* USE_LSE */
127 | #else
128 |     /* Arch independent implementation */
129 |     for(;;) {
130 |         uintptr_t tmp = *(volatile uintptr_t *)operand;
131 |         uintptr_t result = __TBB_machine_cmpswp8(operand, tmp|addend, tmp);
132 |         if( result==tmp ) break;
133 |     }
134 | #endif  /* ARCH */
135 | }
136 | 
137 | static inline void __TBB_machine_and(volatile void* operand, uint64_t addend) {
138 | #if defined(__x86_64__)
139 |     asm volatile(
140 |             "lock\nandq %1,%0"
141 |             : "=m"(*(volatile uint64_t*)operand)
142 |             : "r"(addend), "m"(*(volatile uint64_t*)operand)
143 |             : "memory");
144 | #elif defined(__aarch64__)
145 | #ifndef USE_LSE
146 |     unsigned long old, newval, tmp;
147 |     asm volatile(
148 |             "1: ldaxr   %[old], %[ptr]\n"
149 |             "   and     %[newval], %[old], %[val]\n"
150 |             "   stlxr   %w[tmp], %[newval], %[ptr]\n"
151 |             "   cbnz    %w[tmp], 1b\n"
152 |             : [tmp] "=&r" (tmp), [old] "=&r" (old), [newval] "=&r" (newval),
153 |               [ptr] "+Q" (*(unsigned long *)operand)
154 |             : [val] "Lr" (addend)
155 |             : );
156 | #else  /* USE_LSE */
157 |     // clobbering addend - to match gcc
158 |     asm volatile(
159 |         "mvn %[val], %[val]\n"
160 |         "ldclral   %[val], %[val], %[ptr]\n"
161 |     : [val] "+&r" (addend), [ptr] "+Q" (*(unsigned long *)operand)
162 |     : );
163 | #endif  /* USE_LSE */
164 | #else
165 |     /* Arch independent implementation */
166 |     for(;;) {
167 |         uintptr_t tmp = *(volatile uintptr_t *)operand;
168 |         uintptr_t result = __TBB_machine_cmpswp8(operand, tmp&addend, tmp);
169 |         if( result==tmp ) break;
170 |     }
171 | #endif  /* ARCH */
172 | }
173 | 
174 | #else  /* GCC Built-ins */
175 | 
176 | #define __GCC_VERSION \
177 |     (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
178 | 
179 | #if __GCC_VERSION < 40700 /* use __sync* built-ins */
180 | #ifndef NDEBUG
181 | #pragma message("Using old gcc (<4.7.0) built-ins!!")
182 | #endif  /* NDEBUG */
183 | 
184 | #define __TBB_MACHINE_DEFINE_ATOMICS(S,T)                                      \
185 | inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
186 |     return __sync_val_compare_and_swap((volatile T *)ptr,comparand,value);     \
187 | }                                                                              \
188 | inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {            \
189 |     return __sync_fetch_and_add((volatile T *)ptr,value);                      \
190 | }
191 | 
192 | static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
193 |     __sync_fetch_and_or((volatile uintptr_t *)ptr,addend);
194 | }
195 | 
196 | static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
197 |     __sync_fetch_and_and((volatile uintptr_t *)ptr,addend);
198 | }
199 | 
200 | #else  /* __GCC_VERSION >= 40700; use __atomic* built-ins */
201 | #ifndef NDEBUG
202 | #pragma message("Using new gcc (>=4.7.0) built-ins!!")
203 | #endif  /* NDEBUG */
204 | 
205 | #define __TBB_MACHINE_DEFINE_ATOMICS(S,T)                                      \
206 | inline T __TBB_machine_cmpswp##S( volatile void *ptr, T value, T comparand ) { \
207 |     (void)__atomic_compare_exchange_n((volatile T *)ptr, &comparand, value,    \
208 |                                       0, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);  \
209 |     return comparand;                                                          \
210 | }                                                                              \
211 | inline T __TBB_machine_fetchadd##S( volatile void *ptr, T value ) {            \
212 |     return __atomic_fetch_add((volatile T *)ptr, value, __ATOMIC_SEQ_CST);     \
213 | }
214 | 
215 | static inline void __TBB_machine_or( volatile void *ptr, uintptr_t addend ) {
216 |     __atomic_fetch_or((volatile uintptr_t *)ptr,addend,__ATOMIC_SEQ_CST);
217 | }
218 | 
219 | static inline void __TBB_machine_and( volatile void *ptr, uintptr_t addend ) {
220 |     __atomic_fetch_and((volatile uintptr_t *)ptr,addend,__ATOMIC_SEQ_CST);
221 | }
222 | 
223 | #endif  /* __GCC_VERSION */
224 | 
225 | /* only intptr_t for now */
226 | __TBB_MACHINE_DEFINE_ATOMICS(8, intptr_t)
227 | 
228 | /*
229 |  * func: fetchaddNrelese
230 |  * Scope for optimization on AArch64 side: we may not need acquire semantics?
231 |  */
232 | #define  __TBB_machine_fetchadd8release(P,V)  __TBB_machine_fetchadd8(P,V)
233 | 
234 | #endif  /* USE_LOCAL, __x86_64__ && !USE_GCC_BUILTINS */
235 | 
236 | 
237 | /*
238 |  * Top level abstraction
239 |  */
240 | #define __TBB_machine_pause(C)              machine_pause(C)
241 | #define __TBB_Yield()                       sched_yield()
242 | #define __TBB_Pause(C)                      __TBB_machine_pause(C)
243 | #define __TBB_CompareAndSwapW(P,V,C)        __TBB_machine_cmpswp8(P,V,C)
244 | #define __TBB_FetchAndAddW(P,V)             __TBB_machine_fetchadd8(P,V)
245 | #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
246 | #define __TBB_AtomicOR(P,V)                 __TBB_machine_or(P,V)
247 | #define __TBB_AtomicAND(P,V)                __TBB_machine_and(P,V)
248 | 
249 | /* TBB helper routines */
250 | 
251 | /*
252 |  * From: class atomic_backoff : no_copy
253 |  *
254 |  * //! Class that implements exponential backoff.
255 |  * 16 is approximately how many 'pause' x86 instruction takes for
256 |  * their context switch, not changing it for now, do we need to change?
257 |  */
258 | #define LOOPS_BEFORE_YIELD 16
259 | 
260 | static inline void atomic_backoff__pause(int32_t *count) {
261 |     if( *count<=LOOPS_BEFORE_YIELD ) {
262 |         __TBB_Pause(*count);
263 |         // Pause twice as long the next time.
264 |         *count*=2;
265 |     } else {
266 |         // Pause is so long that we might as well yield CPU to scheduler.
267 |         __TBB_Yield();
268 |     }
269 | }
270 | 
271 | /*
272 |  * Generic versions of helper functions if not defined by now
273 |  */
274 | #ifndef __TBB_AtomicOR
275 | #ifndef NDEBUG
276 | #pragma message("Using backoff based AtomicOR!!")
277 | #endif  /* NDEBUG */
278 | static inline void __TBB_AtomicOR(void* operand, uintmax_t addend) {
279 |     int32_t count;
280 |     for(count = 1;;atomic_backoff__pause(&count)) {
281 |         uintptr_t tmp = *(volatile uintptr_t *)operand;
282 |         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
283 |         if( result==tmp ) break;
284 |     }
285 | }
286 | #endif  /* __TBB_AtomicOR */
287 | 
288 | #ifndef __TBB_AtomicAND
289 | #ifndef NDEBUG
290 | #pragma message("Using backoff based AtomicAND!!")
291 | #endif  /* NDEBUG */
292 | static inline void __TBB_AtomicAND(void* operand, uintptr_t addend) {
293 |     int32_t count;
294 |     for(count = 1;;atomic_backoff__pause(&count)) {
295 |         uintptr_t tmp = *(volatile uintptr_t *)operand;
296 |         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
297 |         if( result==tmp ) break;
298 |     }
299 | }
300 | #endif  /* __TBB_AtomicAND */
301 | #endif  /* __TBB_H */
302 | 
303 | 
304 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
305 | 


--------------------------------------------------------------------------------
/ext/tbb/tbb_spin_rw_mutex.h:
--------------------------------------------------------------------------------
  1 | /*
  2 |     Copyright (c) 2005-2018 Intel Corporation
  3 | 
  4 |     Licensed under the Apache License, Version 2.0 (the "License");
  5 |     you may not use this file except in compliance with the License.
  6 |     You may obtain a copy of the License at
  7 | 
  8 |         http://www.apache.org/licenses/LICENSE-2.0
  9 | 
 10 |     Unless required by applicable law or agreed to in writing, software
 11 |     distributed under the License is distributed on an "AS IS" BASIS,
 12 |     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 |     See the License for the specific language governing permissions and
 14 |     limitations under the License.
 15 | 
 16 | 
 17 | 
 18 | 
 19 | */
 20 | 
 21 | /*
 22 |  *  Based on:
 23 |  *
 24 |  *      Project: github.com/01org/tbb, File: tbb/include/tbb/spin_rw_mutex.h
 25 |  *      Tag: 2018_U3-0-g633b01a
 26 |  *
 27 |  *  Description:
 28 |  *
 29 |  *      This file implements 'Fast, unfair, spinning reader-writer lock with
 30 |  *      back-off and writer-preference'. The algorithm is based on
 31 |  *      'spin_rw_mutex' from Intel TBB library.
 32 |  *
 33 |  *  Internals:
 34 |  *
 35 |  *      - Cutting through layers of abstractions in the original source code, I
 36 |  *      made things not as clean as it was. However, during the porting
 37 |  *      process, I tried to keep things as similar as possible to the setup in
 38 |  *      the Intel TBB library. I ported only required things for this
 39 |  *      synchronization scheme to work.
 40 |  *
 41 |  *      - The lockhammer/tbb.h file tries to provide similar __TBB level
 42 |  *      abstractions as tbb/include/tbb/tbb_machine.h but it is primitive and
 43 |  *      has only definitions needed for this particular scheme.
 44 |  *
 45 |  *      - Underlying atomics primitives are from GCC built-ins as configured in
 46 |  *      gcc_generic.h file in tbb project for Aarch64. For x86-64 they are
 47 |  *      derived from tbb/include/tbb/machine/linux_intel64.h file. The expected
 48 |  *      ISA is either x86-64 (no TSX) or Aarch64, 64bit only and the OS is
 49 |  *      Linux (for sched_yield).
 50 |  *
 51 |  *      - For Aarch64, TBB is using GCC generic atomic built-ins as a base. It
 52 |  *      does not assume anything about memory model or ISA. So, the
 53 |  *      implementation could be suboptimal. We inherit those traits here as
 54 |  *      well.
 55 |  *
 56 |  *      - In lockhammer/tbb.h, there are several macros which allow you to
 57 |  *      select which variant of atomics to use. For Aarch64, the default is GCC
 58 |  *      built-ins, and for x86-64, the defaults are supplied by the file. These
 59 |  *      default choices are similar to the TBB setup.
 60 |  *
 61 |  *  Changes from TBB:
 62 |  *
 63 |  *      - One main change is in the definition of 'machine_pause()'. Here, it
 64 |  *      would first spin and then sched_yield() unlike the default in TBB where
 65 |  *      it would sched_yield() immediately (at least for Aarch64).
 66 |  *
 67 |  *      - Does not implement upgrade() or downgrade() methods
 68 |  *
 69 |  *      - Not using C++ because it is difficult given this benchmark framework
 70 |  *      as well as the other complexities which comes from pulling out a set of
 71 |  *      classes from a class tree in tbb.
 72 |  *
 73 |  *  Workings:
 74 |  *
 75 |  *      This implements classical reader-writer lock. Which means a lock can be
 76 |  *      held by a single writer or a group of readers at the same time but not
 77 |  *      both.
 78 |  *
 79 |  *      From tbb docs: " Mutual exclusion is necessary when at least one thread
 80 |  *      writes to a shared variable. But it does no harm to permit multiple
 81 |  *      readers into a protected region. The reader-writer variants of the
 82 |  *      mutexes [...] enable multiple readers by distinguishing reader locks
 83 |  *      from writer locks. There can be more than one reader lock on a given
 84 |  *      mutex."
 85 |  *
 86 |  *      When a writer first tries to acquire the lock, if there are no readers
 87 |  *      already holding the lock, it will acquire it else in the presence of
 88 |  *      readers it will set a writer pending bit if not set. If this bit is
 89 |  *      already set or after setting the bit the writer will start backing off
 90 |  *      eventually yielding the CPU until obtaining the lock.
 91 |  *
 92 |  *      In case of readers, more than one of them can go in the exclusive
 93 |  *      section simultaneously. If no writer is holding the lock or no pending
 94 |  *      writers, a reader even in presence of other reader can acquire the lock.
 95 |  *      It will back off and eventually yield the CPU when writer is holding a
 96 |  *      lock until the lock becomes available again.
 97 |  *
 98 |  *  Readers/Writers ratio (-r) and Pure readers (-m):
 99 |  *
100 |  *      - 'rw_mask' variable defines the ratio between readers and writers per
101 |  *      thread. It is controlled using log2_ratio variable, cmdline args -r.
102 |  *
103 |  *      - Given the ratio, a thread will perform that many 'read_acquire' and
104 |  *      'read_release' calls and then it will do one 'write_acquire' and
105 |  *      'write_release'. And then if more work to be done, repeat.
106 |  *
107 |  *      For a thread:
108 |  *
109 |  *      num readers
110 |  *      ----------- = 2^(log2_ratio) - 1;
111 |  *      num writers
112 |  *
113 |  *       > log2_ratio of  0 means all writers
114 |  *       > log2_ratio of ~0 means all readers
115 |  *       > default log2_ratio is 6 e.g 63 reads per write.
116 |  *
117 |  *      - Pure readers are CPUs which will never perform a write acq/rel. The
118 |  *        cmdline arg is a bit mask e.g. 0x8 will make 4th cpu (cpu id: 0x3)
119 |  *        a pure reader. Default is 0x0 e.g. no pure readers.
120 |  *
121 |  */
122 | 
123 | #ifndef __TBB_spin_mutex_H
124 | #define __TBB_spin_mutex_H
125 | 
126 | #ifdef initialize_lock
127 | #undef initialize_lock
128 | #endif
129 | 
130 | #ifdef parse_test_args
131 | #undef parse_test_args
132 | #endif
133 | 
134 | #define initialize_lock(lock, pinorder, threads) tbb_init_locks(lock, threads)
135 | #define parse_test_args(args, argc, argv) tbb_parse_args(args, argc, argv)
136 | 
137 | #include "tbb.h"
138 | 
139 | #define WRITER          1
140 | #define WRITER_PENDING  2
141 | #define READERS         ~(WRITER | WRITER_PENDING)
142 | #define ONE_READER      4
143 | #define BUSY            (WRITER | READERS)
144 | 
145 | unsigned long log2_ratio = 0;
146 | unsigned long rw_mask = 0;
147 | unsigned long reader_cpu_mask = 0;
148 | 
149 | typedef struct {
150 |     unsigned long c;
151 |     uint8_t pure_reader;
152 | } __attribute__((aligned(64))) rw_count_t;
153 | 
154 | rw_count_t *rw_counts;
155 | 
156 | inline uint8_t is_writer(unsigned long i, uint8_t val) {
157 |     if (rw_counts[i].pure_reader)
158 |         return 0;
159 |     rw_counts[i].c += val;
160 |     return !(rw_counts[i].c & rw_mask);
161 | }
162 | 
163 | void tbb_print_usage() {
164 |     fprintf(stderr, "tbb_spin_rw_mutex additional options:\n");
165 |     fprintf(stderr, "\t[-h print this msg]\n");
166 |     fprintf(stderr, "\t[-r reader/writer log ratio, default: 6 (2^(6)-1 readers per writer)]\n");
167 |     fprintf(stderr, "\t[-m pure reader cpu mask, default: 0x0 (no pure readers)]\n");
168 | }
169 | 
170 | void tbb_check_strtoul(int rval, char* endptr) {
171 |     if ((errno == ERANGE && (rval == ULONG_MAX))
172 |             || (errno != 0 && rval == 0) || endptr == optarg) {
173 |         fprintf(stderr, "tbb_spin_rw_mutex: value unsuitable for 'unsigned long'\n\n");
174 |         tbb_print_usage();
175 |         exit(1);
176 |     }
177 | }
178 | 
179 | void tbb_parse_args(test_args_t * unused, int argc, char** argv) {
180 |     int i = 0;
181 |     char *endptr;
182 | 
183 |     log2_ratio = 6;
184 |     reader_cpu_mask = 0x0;
185 | 
186 |     while ((i = getopt(argc, argv, "hr:m:")) != -1)
187 |     {
188 |         switch (i) {
189 |           case 'r':
190 |             errno = 0;
191 |             log2_ratio = strtoul(optarg, &endptr, 10);
192 |             tbb_check_strtoul(log2_ratio, endptr);
193 |             if (log2_ratio >= 64) {
194 |                 fprintf(stderr, "tbb_spin_rw_mutex: -r can not be >= 64\n");
195 |                 exit(1);
196 |             }
197 |             break;
198 |           case 'm':
199 |             errno = 0;
200 |             if (!strncmp(optarg, "0x", 2))
201 |                 reader_cpu_mask = strtoul(optarg, &endptr, 16);
202 |             else
203 |                 reader_cpu_mask = strtoul(optarg, &endptr, 10);
204 | 
205 |             tbb_check_strtoul(reader_cpu_mask, endptr);
206 |             break;
207 |           case 'h':
208 |             tbb_print_usage();
209 |             exit(0);
210 |           case '?':
211 |           default:
212 |             tbb_print_usage();
213 |             exit(3);
214 |         }
215 |     }
216 | }
217 | 
218 | void tbb_init_locks (unsigned long *lock, unsigned long cores) {
219 |     unsigned i;
220 |     rw_mask = ((1UL<<log2_ratio)-1);
221 |     if (rw_counts) { free(rw_counts); }
222 |     rw_counts = (rw_count_t*) malloc(cores * sizeof(rw_count_t));
223 | 
224 |     DBG("On each thread, for every %lu readers there will be 1 writer\n", rw_mask);
225 |     DBG("CPU mask 0x%lx will be readers\n", reader_cpu_mask);
226 | 
227 |     // XXX: since reader_cpu_mask is unsigned long, this only supports up to 64 CPUs.
228 | 
229 |     for (i=0; i < cores; ++i) {
230 |         rw_counts[i].pure_reader = (reader_cpu_mask & (1UL << i)) ? 1 : 0;
231 |         DBG("\t CPU[%u], a pure reader? %u\n", i, rw_counts[i].pure_reader);
232 |     }
233 | }
234 | 
235 | //! State of lock
236 | /** Bit 0 = writer is holding lock
237 |     Bit 1 = request by a writer to acquire lock (hint to readers to wait)
238 |     Bit 2..N = number of readers holding lock */
239 | typedef intptr_t state_t;
240 | state_t state;
241 | 
242 | static inline state_t CAS(state_t *s, state_t new_val, state_t old_val) {
243 |    return (state_t)__TBB_CompareAndSwapW(s, new_val, old_val);
244 | }
245 | 
246 | static inline void internal_acquire_writer(unsigned long t) {
247 |     int32_t count;
248 |     DBG("init [%ld]: 0x%lx\n", t, state);
249 |     for(count = 1;;atomic_backoff__pause(&count)) {
250 |         state_t s = (volatile state_t) state;
251 |         if( !(s & BUSY) ) { // no readers, no writers
252 |             if( CAS(&state, WRITER, state)==s ) {
253 |                 break; // successfully stored writer flag
254 |             }
255 |             count = 1; // we could be very close to complete op.
256 |         } else if( !(s & WRITER_PENDING) ) { // no pending writers
257 |             __TBB_AtomicOR(&state, WRITER_PENDING);
258 |         }
259 |     }
260 |     DBG("final [%ld]: 0x%lx\n", t, state);
261 | }
262 | 
263 | static void internal_release_writer(unsigned long t) {
264 |     DBG("init [%ld]: 0x%lx\n", t, state);
265 |     __TBB_AtomicAND( &state, READERS );
266 |     DBG("final [%ld]: 0x%lx\n", t, state);
267 | }
268 | 
269 | static inline void internal_acquire_reader(unsigned long t) {
270 |     int32_t count;
271 |     DBG("init [%ld]: 0x%lx\n", t, state);
272 |     for(count = 1;;atomic_backoff__pause(&count)) {
273 |         state_t s = (volatile state_t) state; // ensure reloading
274 |         if( !(s & (WRITER|WRITER_PENDING)) ) { // no writer or write requests
275 |             state_t t = \
276 |                 (state_t)__TBB_FetchAndAddW( &state, (state_t) ONE_READER );
277 |             if( !( t&WRITER ))
278 |                 break; // successfully stored increased number of readers
279 |             // writer got there first, undo the increment
280 |             __TBB_FetchAndAddW( &state, -(state_t)ONE_READER );
281 |         }
282 |     }
283 |     __TBB_ASSERT( state & READERS, "invalid state of a read lock: no readers" );
284 |     DBG("final [%ld]: 0x%lx\n", t, state);
285 | }
286 | 
287 | static void internal_release_reader(unsigned long t) {
288 |     DBG("init [%ld]: 0x%lx\n", t, state);
289 |     __TBB_FetchAndAddWrelease( &state,-(state_t)ONE_READER);
290 |     DBG("final [%ld]: 0x%lx\n", t, state);
291 | }
292 | 
293 | static inline unsigned long
294 | lock_acquire (unsigned long *lock, unsigned long threadnum) {
295 |     (is_writer(threadnum,1))
296 |         ? internal_acquire_writer(threadnum)
297 |         : internal_acquire_reader(threadnum);
298 |     /* average depth will always = 1 */
299 |     return 1;
300 | }
301 | 
302 | static inline void
303 | lock_release (unsigned long *lock, unsigned long threadnum) {
304 |     (is_writer(threadnum,0))
305 |         ? internal_release_writer(threadnum)
306 |         : internal_release_reader(threadnum);
307 |     return;
308 | }
309 | #endif /* __TBB_spin_mutex_H */
310 | 
311 | /* vim: set tabstop=4 shiftwidth=4 softtabstop=4 expandtab: */
312 | 


--------------------------------------------------------------------------------
/hooks/commit-msg:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | # From Gerrit Code Review 2.13.5
  3 | #
  4 | # Part of Gerrit Code Review (https://www.gerritcodereview.com/)
  5 | #
  6 | # Copyright (C) 2009 The Android Open Source Project
  7 | #
  8 | # Licensed under the Apache License, Version 2.0 (the "License");
  9 | # you may not use this file except in compliance with the License.
 10 | # You may obtain a copy of the License at
 11 | #
 12 | # http://www.apache.org/licenses/LICENSE-2.0
 13 | #
 14 | # Unless required by applicable law or agreed to in writing, software
 15 | # distributed under the License is distributed on an "AS IS" BASIS,
 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | # See the License for the specific language governing permissions and
 18 | # limitations under the License.
 19 | #
 20 | 
 21 | unset GREP_OPTIONS
 22 | 
 23 | CHANGE_ID_AFTER="Bug|Issue|Test|Feature|Fixes|Fixed"
 24 | MSG="$1"
 25 | 
 26 | # Check for, and add if missing, a unique Change-Id
 27 | #
 28 | add_ChangeId() {
 29 | 	clean_message=`sed -e '
 30 | 		/^diff --git .*/{
 31 | 			s///
 32 | 			q
 33 | 		}
 34 | 		/^Signed-off-by:/d
 35 | 		/^#/d
 36 | 	' "$MSG" | git stripspace`
 37 | 	if test -z "$clean_message"
 38 | 	then
 39 | 		return
 40 | 	fi
 41 | 
 42 | 	# Do not add Change-Id to temp commits
 43 | 	if echo "$clean_message" | head -1 | grep -q '^\(fixup\|squash\)!'
 44 | 	then
 45 | 		return
 46 | 	fi
 47 | 
 48 | 	if test "false" = "`git config --bool --get gerrit.createChangeId`"
 49 | 	then
 50 | 		return
 51 | 	fi
 52 | 
 53 | 	# Does Change-Id: already exist? if so, exit (no change).
 54 | 	if grep -i '^Change-Id:' "$MSG" >/dev/null
 55 | 	then
 56 | 		return
 57 | 	fi
 58 | 
 59 | 	id=`_gen_ChangeId`
 60 | 	T="$MSG.tmp.$$"
 61 | 	AWK=awk
 62 | 	if [ -x /usr/xpg4/bin/awk ]; then
 63 | 		# Solaris AWK is just too broken
 64 | 		AWK=/usr/xpg4/bin/awk
 65 | 	fi
 66 | 
 67 | 	# Get core.commentChar from git config or use default symbol
 68 | 	commentChar=`git config --get core.commentChar`
 69 | 	commentChar=${commentChar:-#}
 70 | 
 71 | 	# How this works:
 72 | 	# - parse the commit message as (textLine+ blankLine*)*
 73 | 	# - assume textLine+ to be a footer until proven otherwise
 74 | 	# - exception: the first block is not footer (as it is the title)
 75 | 	# - read textLine+ into a variable
 76 | 	# - then count blankLines
 77 | 	# - once the next textLine appears, print textLine+ blankLine* as these
 78 | 	#   aren't footer
 79 | 	# - in END, the last textLine+ block is available for footer parsing
 80 | 	$AWK '
 81 | 	BEGIN {
 82 | 		# while we start with the assumption that textLine+
 83 | 		# is a footer, the first block is not.
 84 | 		isFooter = 0
 85 | 		footerComment = 0
 86 | 		blankLines = 0
 87 | 	}
 88 | 
 89 | 	# Skip lines starting with commentChar without any spaces before it.
 90 | 	/^'"$commentChar"'/ { next }
 91 | 
 92 | 	# Skip the line starting with the diff command and everything after it,
 93 | 	# up to the end of the file, assuming it is only patch data.
 94 | 	# If more than one line before the diff was empty, strip all but one.
 95 | 	/^diff --git / {
 96 | 		blankLines = 0
 97 | 		while (getline) { }
 98 | 		next
 99 | 	}
100 | 
101 | 	# Count blank lines outside footer comments
102 | 	/^$/ && (footerComment == 0) {
103 | 		blankLines++
104 | 		next
105 | 	}
106 | 
107 | 	# Catch footer comment
108 | 	/^\[[a-zA-Z0-9-]+:/ && (isFooter == 1) {
109 | 		footerComment = 1
110 | 	}
111 | 
112 | 	/]$/ && (footerComment == 1) {
113 | 		footerComment = 2
114 | 	}
115 | 
116 | 	# We have a non-blank line after blank lines. Handle this.
117 | 	(blankLines > 0) {
118 | 		print lines
119 | 		for (i = 0; i < blankLines; i++) {
120 | 			print ""
121 | 		}
122 | 
123 | 		lines = ""
124 | 		blankLines = 0
125 | 		isFooter = 1
126 | 		footerComment = 0
127 | 	}
128 | 
129 | 	# Detect that the current block is not the footer
130 | 	(footerComment == 0) && (!/^\[?[a-zA-Z0-9-]+:/ || /^[a-zA-Z0-9-]+:\/\//) {
131 | 		isFooter = 0
132 | 	}
133 | 
134 | 	{
135 | 		# We need this information about the current last comment line
136 | 		if (footerComment == 2) {
137 | 			footerComment = 0
138 | 		}
139 | 		if (lines != "") {
140 | 			lines = lines "\n";
141 | 		}
142 | 		lines = lines $0
143 | 	}
144 | 
145 | 	# Footer handling:
146 | 	# If the last block is considered a footer, splice in the Change-Id at the
147 | 	# right place.
148 | 	# Look for the right place to inject Change-Id by considering
149 | 	# CHANGE_ID_AFTER. Keys listed in it (case insensitive) come first,
150 | 	# then Change-Id, then everything else (eg. Signed-off-by:).
151 | 	#
152 | 	# Otherwise just print the last block, a new line and the Change-Id as a
153 | 	# block of its own.
154 | 	END {
155 | 		unprinted = 1
156 | 		if (isFooter == 0) {
157 | 			print lines "\n"
158 | 			lines = ""
159 | 		}
160 | 		changeIdAfter = "^(" tolower("'"$CHANGE_ID_AFTER"'") "):"
161 | 		numlines = split(lines, footer, "\n")
162 | 		for (line = 1; line <= numlines; line++) {
163 | 			if (unprinted && match(tolower(footer[line]), changeIdAfter) != 1) {
164 | 				unprinted = 0
165 | 				print "Change-Id: I'"$id"'"
166 | 			}
167 | 			print footer[line]
168 | 		}
169 | 		if (unprinted) {
170 | 			print "Change-Id: I'"$id"'"
171 | 		}
172 | 	}' "$MSG" > "$T" && mv "$T" "$MSG" || rm -f "$T"
173 | }
174 | _gen_ChangeIdInput() {
175 | 	echo "tree `git write-tree`"
176 | 	if parent=`git rev-parse "HEAD^0" 2>/dev/null`
177 | 	then
178 | 		echo "parent $parent"
179 | 	fi
180 | 	echo "author `git var GIT_AUTHOR_IDENT`"
181 | 	echo "committer `git var GIT_COMMITTER_IDENT`"
182 | 	echo
183 | 	printf '%s' "$clean_message"
184 | }
185 | _gen_ChangeId() {
186 | 	_gen_ChangeIdInput |
187 | 	git hash-object -t commit --stdin
188 | }
189 | 
190 | 
191 | add_ChangeId
192 | 


--------------------------------------------------------------------------------
/tools/.gitignore:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ARM-software/synchronization-benchmarks/9cc9fb6b5a5ddad855ead6aab88180c870d94a0d/tools/.gitignore


--------------------------------------------------------------------------------