├── .gitignore ├── .travis.yml ├── CODEOWNERS ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── containers ├── README.md └── wordpress │ ├── README.md │ ├── docker-compose.yml │ ├── run.sh │ ├── wp_base │ ├── Dockerfile │ └── files │ │ ├── 1s-bkm.j2 │ │ ├── 2s-bkm.j2 │ │ ├── entrypoint.sh │ │ ├── https_oss_performance.patch │ │ ├── nginx.conf.in │ │ ├── php-base.ini │ │ ├── quickrun.sh │ │ ├── ssl-params.conf │ │ └── update_nginx_workers.sh │ └── wp_opt │ ├── Dockerfile │ └── files │ ├── 0001-BOLT-PHP7.4.29-GCC-option.patch │ ├── 0001-BOLT-PHP8.0.18-GCC-option.patch │ ├── 7.4.29-perf.fdata │ ├── 8.0.18-perf.fdata │ ├── nginx.conf.in │ └── php-opt.ini ├── detect-platform.mk ├── large_data-go └── mmap_test │ └── main.go ├── large_data ├── LICENSE ├── Makefile ├── README.md └── data-large-reference.cc ├── large_page-c ├── LICENSE ├── Makefile ├── Makefile.preload ├── README.md ├── cflags.mk ├── example │ ├── Makefile │ ├── README.md │ ├── filler1.c │ ├── filler2.c │ ├── filler3.c │ ├── filler4.c │ ├── filler5.c │ ├── filler6.c │ ├── filler7.c │ ├── filler8.c │ └── large_page_example.c ├── large_page.c ├── large_page.h └── lp_preload.c ├── large_page ├── LICENSE ├── Makefile ├── README.md ├── example │ ├── Makefile │ ├── README │ └── large_page_example.cc ├── large_page.cc ├── large_page.h └── ld.implicit.script ├── security.md ├── sysbench └── README.md ├── test.sh ├── test ├── large_page-c ├── large_page-c.d │ ├── default_stdout │ └── unsupported_stdout ├── large_page.d │ ├── default_stdout │ └── unsupported_stdout └── lib │ └── utils.sh └── tools ├── README.md ├── gen-perf-map.sh ├── maps_file.py ├── measure-perf-metric.sh ├── metric.template ├── metric_dsb_cache ├── metric_icache_miss_stalls ├── metric_itlb_mpki ├── metric_itlb_stalls ├── metric_l1_code_read_MPI ├── metric_l2_demand_code_MPI └── utils.sh /.gitignore: -------------------------------------------------------------------------------- 1 | *.o 2 | *.a 3 | *.so 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: cpp 2 | 3 | matrix: 4 | fast_finish: true 5 | include: 6 | - env: 7 | os: linux 8 | # - env: 9 | # os: osx 10 | 11 | script: 12 | - ./test.sh 13 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * uttam.c.pawar@intel.com suresh.srinivas@intel.com nitin.tekchandani@intel.com yuxue.piao@intel.com 2 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Thank you for your interest in contributing to this project 4 | 5 | ## License 6 | 7 | The Intel® Optimizations for Dynamic Language Runtimes is distributed under 8 | the MIT License. 9 | 10 | You may obtain a copy of the License at: 11 | 12 | https://opensource.org/licenses/MIT 13 | 14 | By contributing to this project, you agree to the MIT License and release 15 | your contribution under those terms. 16 | 17 | 18 | ## Sign your work 19 | 20 | Please use the sign-off line at the end of your contribution. Your 21 | signature certifies that you wrote the contribution or otherwise have 22 | the right to pass it on as an open-source contribution, and that you 23 | agree to provide your contribution under the terms of the licenses 24 | noted above. The rules are pretty simple: if you can certify the 25 | below (from [developercertificate.org](http://developercertificate.org)): 26 | 27 | ``` 28 | Developer Certificate of Origin 29 | Version 1.1 30 | 31 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 32 | 660 York Street, Suite 102, 33 | San Francisco, CA 94110 USA 34 | 35 | Everyone is permitted to copy and distribute verbatim copies of this 36 | license document, but changing it is not allowed. 37 | 38 | Developer's Certificate of Origin 1.1 39 | 40 | By making a contribution to this project, I certify that: 41 | 42 | (a) The contribution was created in whole or in part by me and I 43 | have the right to submit it under the open source license 44 | indicated in the file; or 45 | 46 | (b) The contribution is based upon previous work that, to the best 47 | of my knowledge, is covered under an appropriate open source 48 | license and I have the right under that license to submit that 49 | work with modifications, whether created in whole or in part 50 | by me, under the same open source license (unless I am 51 | permitted to submit under a different license), as indicated 52 | in the file; or 53 | 54 | (c) The contribution was provided directly to me by some other 55 | person who certified (a), (b) or (c) and I have not modified 56 | it. 57 | 58 | (d) I understand and agree that this project and the contribution 59 | are public and that a record of the contribution (including all 60 | personal information I submit with it, including my sign-off) is 61 | maintained indefinitely and may be redistributed consistent with 62 | this project or the open source license(s) involved. 63 | ``` 64 | 65 | Then you just add a line to every git commit message: 66 | 67 | Signed-off-by: Joe Smith 68 | 69 | Use your real name (sorry, no pseudonyms or anonymous contributions.) 70 | 71 | If you set your `user.name` and `user.email` git configs, you can sign your commit automatically with `git commit -s`. 72 | 73 | 74 | ## Contribution process 75 | 76 | ### Development 77 | 78 | - Create a personal fork of the project on GitHub 79 | - Make your changes 80 | - Build and run the example test 81 | 82 | ### Review and acceptance 83 | 84 | - Create a pull request for your changes following [Creating a pull request instructions](https://help.github.com/articles/creating-a-pull-request/). 85 | ### Merge 86 | 87 | Merge of pull request is done only by project maintainers. 88 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018 Intel Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is 8 | furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 19 | SOFTWARE. 20 | 21 | SPDX-License-Identifier: MIT 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Intel® Optimizations for Dynamic Language Runtimes 2 | 3 | 4 | ## Introduction 5 | This repository offers tools and a reference implementations for 6 | optimizing dynamic language runtimes. 7 | 8 | Please refer to 9 | https://www.intel.com/content/www/us/en/developer/articles/technical/runtime-performance-optimization-blueprint-intel-architecture-optimization-with-large-code.html 10 | for a blueprint that demonstrates how to diagnose Instruction Translation Lookaside Buffer (I-TLB) stalls, and 11 | reduce them using large pages. 12 | 13 | 14 | ## Status 15 | 16 | This codebase has been built and tested successfully on Ubuntu v22.04.1 LTS version as of Feb. 2024 17 | 18 | 19 | ## License 20 | 21 | The Intel® Optimizations for Dynamic Language Runtimes is distributed under the [MIT License](LICENSE). 22 | 23 | 24 | ## Tested Linux versions 25 | 26 | iodlr is tested on the following 64 bit Linux operating systems: 27 | 28 | * Ubuntu 20.04 29 | 30 | 31 | ## How to provide feedback 32 | Please submit an issue using native github.com interface: https://github.com/intel/iodlr/issues. 33 | 34 | ## How to contribute 35 | 36 | Please refer to the [Contributing guide](CONTRIBUTING.md) for details on how to contribute 37 | to this project. 38 | 39 | Last update: 08/2024 40 | -------------------------------------------------------------------------------- /containers/README.md: -------------------------------------------------------------------------------- 1 | # Container workloads with optimizations 2 | 3 | ## About 4 | 5 | This folder contains everything needed to build containers with optimizations for various workloads. 6 | 7 | ## License 8 | 9 | Containers in this repository are distributed under the MIT License. 10 | 11 | You may obtain a copy of the License at: 12 | 13 | https://opensource.org/licenses/MIT 14 | 15 | 16 | ## Containers 17 | 18 | Currently one container is available: 19 | 20 | - Wordpress: Intended to be used to execute the 'wordpress' target of the oss-performance benchmark suite, 21 | with updates from Intel(R). 22 | -------------------------------------------------------------------------------- /containers/wordpress/README.md: -------------------------------------------------------------------------------- 1 | # Containerized oss-performance: WordPress workload 2 | 3 | ## About 4 | 5 | This project is intended to be used to execute, in a containerized environment, 6 | the 'WordPress' target of the oss-performance benchmark suite, with updates from Intel(R). 7 | 8 | The oss-performance benchmark suite with Intel(R) updates can be found here: 9 | [Updates for OSS Performance at github](https://github.com/intel/Updates-for-OSS-Performance) 10 | 11 | ## License 12 | 13 | The Intel(R) Container for oss-performance with Optimizations for WordPress is distributed under the MIT License. 14 | 15 | You may obtain a copy of the License at: 16 | 17 | https://opensource.org/licenses/MIT 18 | 19 | 20 | ## Containers 21 | 22 | To accomplish this goal, we built four https workload containers: wp4.2_php7.4_base_https, wp4.2_php7.4_opt_https, wp5.6_php8.0_base_https, wp5.6_php8.0_opt_https. 23 | 24 | * wp4.2_php7.4_base_https contains the bare minimum needed to execute WordPress4.2 / PHP7.4 and establish 25 | a baseline. The following modifications were made to wp_base in addition to containerization: 26 | * php-fpm7.4 27 | * wp4.2_php7.4_opt_https builds upon wp4.2_php7.4_base_https and has the following additions: 28 | * BOLTing of PHP 29 | * Intel QAT accelerator with SW mode for TLS1.3 (QAT requirement refer to: https://github.com/intel/QAT_Engine/blob/master/docs/hardware_requirements.md) 30 | * PHP Zend framework now uses large pages 31 | * MariaDB now uses large pages and additional tuning 32 | * NUMA optimization/multi instance (must be done via pinning, see below) 33 | * Note that for NUMA optimization/pinning you may do this with the base container if you wish to isolate this optimization. 34 | 35 | * wp5.6_php8.0_base_https contains the bare minimum needed to execute WordPress5.6 / PHP8.0. 36 | * wp5.6_php8.0_opt_https builds upon wp5.6_php8.0_base_https and has the following additions: 37 | * PHP JIT 38 | * BOLTing of PHP 39 | * Intel QAT accelerator with SW mode for TLS1.3 (QAT requirement refer to: https://github.com/intel/QAT_Engine/blob/master/docs/hardware_requirements.md) 40 | * PHP Zend framework now uses large pages 41 | * MariaDB now uses large pages and additional tuning 42 | * NUMA optimization/multi instance (must be done via pinning, see below) 43 | * Note that for NUMA optimization/pinning you may do this with the base container if you wish to isolate this optimization. 44 | 45 | Note that in order to run a baseline across multiple sockets, you will need to utilize the 1s-bkm.js file in the base user 46 | directory in the container you wish to run (likely base). Copy the file over the current my.cnf as shown in the dockerfile. 47 | This will disable mysql query cache for an appropriate baseline across multiple sockets. 48 | 49 | ## How to build 50 | 51 | ### Pre-requisites 52 | 53 | To build, you must have docker and docker-compose installed: 54 | 55 | ``` 56 | sudo apt install docker.io 57 | sudo apt install docker-compose 58 | ``` 59 | 60 | To build all containers, you may use docker-compose: 61 | 62 | ``` 63 | COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose build \ 64 | --progress=plain 65 | ``` 66 | 67 | To build a single container, you may use docker-compose as following example: 68 | 69 | ``` 70 | COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose build \ 71 | --progress=plain \ 72 | wp4.2_php7.4_base_https 73 | ``` 74 | 75 | 76 | To see an image list after this you may run: 77 | 78 | ``` 79 | docker image ls 80 | ``` 81 | 82 | Example output: 83 | 84 | ``` 85 | wp5.6_php8.0_opt_https latest 042f6b6f5e67 About a minute ago 1.76GB 86 | wp5.6_php8.0_base_https latest ac13b8af0a6a 10 minutes ago 1.09GB 87 | wp4.2_php7.4_opt_https latest c7362eb36dac 16 minutes ago 1.74GB 88 | wp4.2_php7.4_base_https latest 2a88d5f6a925 25 minutes ago 1.08GB 89 | ``` 90 | 91 | ## How to executing the workload 92 | 93 | To execute the workload, first start a container (for NUMA gains, see section below instead): 94 | 95 | ``` 96 | docker run -it --privileged wp4.2_php7.4_opt_https 97 | ``` 98 | 99 | Now inside the container you may run: 100 | 101 | ``` 102 | ./quickrun.sh 103 | ``` 104 | 105 | ### NUMA pinning and multiple instances 106 | 107 | To take advantage of multiple instances with respect to NUMA optimizations, run 108 | ``` 109 | lscpu 110 | ``` 111 | The command will list cpu ids corresponding to cores on each NUMA node. 112 | Then you must use cpuset-cpus and cpuset-mems flags in docker run to ensure each 113 | instance is running on a single NUMA node. 114 | 115 | ### Automatic script 116 | 117 | An alternative way to execute the workload is use run.sh script, which will launch the workload and 118 | calculate the total TPS (transactions per second). 119 | Below example shows it runs 8 instances of wp_base_http image with NUMA pinning. 120 | ``` 121 | $ ./run.sh --image wp4.2_php7.4_opt_https --count 8 --numa-pinning 122 | ------------------------------------------------------------- 123 | Creating temporary directory /tmp/run-DHzZTK85da for logfile. 124 | 125 | ------------------------------------------------------------- 126 | Running 8 wp4.2_php7.4_opt_https instance(s) with NUMA pinning. 127 | ... 128 | ------------------------------------------------------------- 129 | All instances are completed. 130 | ------------------------------------------------------------- 131 | TPS of 8 instances: 791.3 786.65 787.78 787.33 791.65 788.88 783.91 786.39 132 | Total TPS: 6303.89 133 | ``` 134 | 135 | ## Known issues 136 | 137 | ### Docker proxy 138 | 139 | If you are building a docker image behind a corporate proxy, please see instructions here for configuration: 140 | [Docker proxy documentation](https://docs.docker.com/network/proxy/) 141 | 142 | You may also refer to this example to get up and running: 143 | ``` 144 | COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker-compose build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy 145 | ``` 146 | Note this assumes your environment variables are properly set for your network. 147 | 148 | #### Apparmor 149 | 150 | There are instances where apparmor may cause mariadb to not start, particularly when mysql is installed on the host system running apparmor. 151 | 152 | In summary, on the host system mysql is installed with an apparmor profile. Apparmor enforces rules based on binary paths. 153 | When we use –privileged it is essentially telling apparmor to not use a profile, however apparmor does do a search for matching paths (as I understand it) so the host mysql app armor profile blocks the read anyway. 154 | 155 | An excerpt from the following link: 156 | 157 | https://github.com/moby/moby/issues/7512 158 | 159 | “This is because apparmor applies profiles based on the binary paths. When we run the container in privileged mode docker only tells apparmor that we are not setting the profile so leave this unconfined. However, by not specifying a profile, apparmor looks at the binary path and sees if it has any profiles matching the binary and automatically applies them. 160 | A few things that I would suggest you doing is not have the profiles installed on your host when using apparmor if you are running everything in containers. 161 | The other is you should not run a database container in privileged mode. Mysql should not need extra capabilities and you don't want to open up access to your host for a database that does not require it. Very few applications actually require privileged mode and mysql is definitely not one of them.” 162 | 163 | The workaround: 164 | 165 | To temporarily disable the app armor profile on your host run: 166 | sudo ln -s /etc/apparmor.d/usr.sbin.mysqld /etc/apparmor.d/disable/ 167 | sudo apparmor_parser -R /etc/apparmor.d/usr.sbin.mysqld 168 | 169 | Now you can run the container as usual 170 | 171 | Re-enable: 172 | 173 | sudo rm /etc/apparmor.d/disable/usr.sbin.mysqld 174 | sudo apparmor_parser -r /etc/apparmor.d/usr.sbin.mysqld 175 | sudo aa-status 176 | 177 | App armor enable/disable steps from this link: 178 | 179 | https://askubuntu.com/questions/1144497/how-to-disable-apparmor-for-mysql 180 | -------------------------------------------------------------------------------- /containers/wordpress/docker-compose.yml: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included 12 | # in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 18 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | # OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | # SPDX-License-Identifier: MIT 23 | version: "3.3" 24 | services: 25 | 26 | wp4.2_php7.4_base_https: 27 | build: 28 | context: ./wp_base 29 | dockerfile: Dockerfile 30 | args: 31 | PHP_VER: 7.4.29 32 | wordpressversion: 4.2 33 | image: wp4.2_php7.4_base_https 34 | 35 | wp4.2_php7.4_opt_https: 36 | build: 37 | context: ./wp_opt 38 | dockerfile: Dockerfile 39 | args: 40 | PHP_VER: 7.4.29 41 | baseimage: wp4.2_php7.4_base_https 42 | image: wp4.2_php7.4_opt_https 43 | 44 | wp5.6_php8.0_base_https: 45 | build: 46 | context: ./wp_base 47 | dockerfile: Dockerfile 48 | args: 49 | PHP_VER: 8.0.18 50 | wordpressversion: 5.6 51 | image: wp5.6_php8.0_base_https 52 | 53 | wp5.6_php8.0_opt_https: 54 | build: 55 | context: ./wp_opt 56 | dockerfile: Dockerfile 57 | args: 58 | PHP_VER: 8.0.18 59 | baseimage: wp5.6_php8.0_base_https 60 | image: wp5.6_php8.0_opt_https -------------------------------------------------------------------------------- /containers/wordpress/run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2021 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | COUNT=${COUNT:-6} 27 | IMAGE_NAME=${IMAGE_NAME:-"wp_base_http"} 28 | NUMA_PINNING=${NUMA_PINNING:-0} 29 | 30 | while test $# -gt 0; do 31 | case "$1" in 32 | -h | --help) 33 | echo "run.sh - run workload and calculate total TPS" 34 | echo " " 35 | echo "run.sh [options]" 36 | echo " " 37 | echo "options:" 38 | echo "-h, --help show brief help" 39 | echo "--image specify a container image to use" 40 | echo "--count specify the number of container instances to run" 41 | echo "--numa-pinning run container with NUMA pinning" 42 | exit 0 43 | ;; 44 | --image) 45 | shift 46 | if test $# -gt 0; then 47 | export IMAGE_NAME=$1 48 | else 49 | echo "no contain image specified" 50 | exit 1 51 | fi 52 | shift 53 | ;; 54 | --count) 55 | shift 56 | if test $# -gt 0; then 57 | export COUNT=$1 58 | else 59 | echo "no running count specified" 60 | exit 1 61 | fi 62 | shift 63 | ;; 64 | --numa-pinning) 65 | export NUMA_PINNING=1 66 | shift 67 | ;; 68 | *) 69 | break 70 | ;; 71 | esac 72 | done 73 | 74 | get_cpu_pinning() { 75 | sockets_num=$(lscpu | grep "Socket(s):" | sed "s/.* //g") 76 | 77 | for cpu in $(ls -d /sys/devices/system/cpu/cpu[0-9]* | sort -t u -k 3 -n); do 78 | socket_id=$(cat $cpu/topology/physical_package_id) 79 | tmp_name=cpusets_socket${socket_id} 80 | declare ${tmp_name}+=$(cat $cpu/topology/thread_siblings_list)" " 81 | done 82 | 83 | cpu_pinning_s=() 84 | count_per_socket=$((COUNT / sockets_num)) 85 | 86 | for ((socket_id = 0; socket_id < ${sockets_num}; socket_id++)); do 87 | tmp_name=cpusets_socket${socket_id} 88 | all_cpus=($(echo ${!tmp_name} | tr ' ' '\n' | cat -n | sort -uk2 | sort -n | cut -f2- | tr '\n' ' ')) 89 | if [[ ${socket_id} -eq $((${sockets_num} - 1)) ]]; then 90 | count_per_socket=$(($count_per_socket + $COUNT % $sockets_num)) 91 | fi 92 | cpu_per_instance=$((${#all_cpus[@]} / $count_per_socket)) 93 | start=0 94 | for ((i = 1; i <= $count_per_socket; i++)); do 95 | if [[ ${i} -eq $count_per_socket ]]; then 96 | array=("${all_cpus[@]:${start}}") 97 | else 98 | array=("${all_cpus[@]:${start}:${cpu_per_instance}}") 99 | fi 100 | start=$((start + cpu_per_instance)) 101 | cpuset_cpus_s=$(printf ",%s" "${array[@]}") 102 | cpuset_cpus_s=${cpuset_cpus_s:1} 103 | 104 | docker_run_s="--cpuset-cpus="${cpuset_cpus_s}" --cpuset-mems="${socket_id} 105 | cpu_pinning_s+=("$docker_run_s") 106 | done 107 | done 108 | } 109 | 110 | # Calculate the worker number of each instance. Total workers number equals to 1.5X core num on this system. 111 | core_count=$(nproc) 112 | worker_num=$(echo "${core_count} * 1.5 / ${COUNT} " | bc -l) 113 | worker_num=${worker_num%.*} 114 | 115 | # Create temp directory for container output 116 | tmp_dir=$(mktemp -d -t run-XXXXXXXXXX) 117 | echo "-------------------------------------------------------------" 118 | echo "Creating temporary directory ${tmp_dir} for logfile." 119 | echo "" 120 | 121 | # Launch containers 122 | all_containers=() 123 | if [[ ${NUMA_PINNING} -eq 0 ]]; then 124 | echo "-------------------------------------------------------------" 125 | echo "Running ${COUNT} ${IMAGE_NAME} instance(s)." 126 | echo "" 127 | for ((i = 0; i < ${COUNT}; i++)); do 128 | container_id=$(docker run -d --rm -ti --privileged ${IMAGE_NAME} bash -c "./quickrun.sh \"--php-fcgi-children ${worker_num}\"") 129 | all_containers[$i]=$(docker ps -q -f id=${container_id}) 130 | done 131 | else 132 | echo "-------------------------------------------------------------" 133 | echo "Running ${COUNT} ${IMAGE_NAME} instance(s) with NUMA pinning." 134 | echo "" 135 | get_cpu_pinning 136 | for ((i = 0; i < ${COUNT}; i++)); do 137 | container_id=$(docker run -d --rm -ti --privileged ${cpu_pinning_s[i]} ${IMAGE_NAME} bash -c "./quickrun.sh \"--php-fcgi-children ${worker_num}\"") 138 | all_containers[$i]=$(docker ps -q -f id=${container_id}) 139 | done 140 | fi 141 | 142 | # Redirect containers output to logfile in temp directory 143 | for container_id in ${all_containers[@]}; do 144 | docker logs -f ${container_id} >${tmp_dir}/${container_id}.log & 145 | done 146 | 147 | # Wait all container to be completed 148 | while true; do 149 | completed=0 150 | for container_id in ${all_containers[@]}; do 151 | if [ "$(docker ps -q -f id=${container_id})" ]; then 152 | completed=1 153 | echo "Container ${container_id} = running" 154 | else 155 | echo "Container ${container_id} = completed" 156 | fi 157 | done 158 | 159 | if [ ${completed} -eq 0 ]; then 160 | echo "-------------------------------------------------------------" 161 | echo "All instances are completed." 162 | break 163 | fi 164 | 165 | echo "" 166 | echo "-------------------------------------------------------------" 167 | echo "Sleep 5s to wait all instances to completed and rechecking..." 168 | echo "" 169 | sleep 5 170 | done 171 | 172 | # Calculate TPS in total 173 | total_tps=0 174 | for i in $(find ${tmp_dir}/*.log); do 175 | tps=$(grep RPS $i | awk '{ print $3 }' | sed -e 's/,.*$//') 176 | tps_string="${tps_string} ${tps}" 177 | total_tps=$(echo "${total_tps} + ${tps}" | bc -l) 178 | done 179 | echo "-------------------------------------------------------------" 180 | echo "TPS of ${COUNT} instances:${tps_string}" 181 | echo "Total TPS: ${total_tps}" 182 | 183 | # Remove temp directory 184 | rm -rf ${tmp_dir} 185 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included 12 | # in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 18 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | # OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | # SPDX-License-Identifier: MIT 23 | ARG PHP_VER 24 | ARG INSTALL_DIR="/pkg_install" 25 | ARG OPENSSL_TAG=OpenSSL_1_1_1f 26 | ARG SIEGE_VERSION='siege-4.1.5' 27 | ARG ASYNCH_NGINX_TAG='v0.4.7' 28 | 29 | 30 | # ----- php container ----- 31 | # used for copying built php-fpm and opcache.so 32 | # 33 | # ----- php-fpm build container ----- 34 | FROM php:${PHP_VER}-fpm AS php-fpm 35 | 36 | ENV TZ=America/Los_Angeles 37 | ENV DEBIAN_FRONTEND noninteractive 38 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime ;\ 39 | echo $TZ > /etc/timezone 40 | RUN docker-php-ext-configure pdo_mysql && \ 41 | docker-php-ext-configure mysqli && \ 42 | docker-php-ext-install -j$(nproc) mysqli pdo_mysql; \ 43 | cp /usr/local/sbin/php-fpm /php-fpm; \ 44 | cp /usr/local/lib/php/extensions/*/*.so /; 45 | 46 | # ----- The container for building the components----- 47 | # build siege, openssl, async Nginx ... 48 | # to copy the binary to worker container 49 | # ---------------------------------------------------- 50 | FROM ubuntu:20.04 AS dep-components 51 | ARG DEBIAN_FRONTEND="noninteractive" 52 | ARG TZ="America/Los_Angeles" 53 | ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 54 | 55 | ENV USERNAME="base" 56 | ARG INSTALL_DIR 57 | 58 | RUN apt-get update && apt-get install -y \ 59 | software-properties-common \ 60 | gcc-10 \ 61 | g++-10 \ 62 | software-properties-common \ 63 | apt-transport-https \ 64 | git \ 65 | automake \ 66 | gcc \ 67 | make \ 68 | cmake \ 69 | wget \ 70 | libevent-dev \ 71 | vim \ 72 | python3-pip \ 73 | sudo \ 74 | autotools-dev \ 75 | autoconf \ 76 | build-essential \ 77 | zlib1g \ 78 | zlib1g-dev \ 79 | sysstat \ 80 | linux-tools-common \ 81 | ruby \ 82 | python3-dev \ 83 | libssl-dev \ 84 | ninja-build \ 85 | libjemalloc-dev \ 86 | pkg-config \ 87 | build-essential \ 88 | autoconf \ 89 | bison \ 90 | re2c \ 91 | libxml2-dev \ 92 | libsqlite3-dev \ 93 | php-mysql \ 94 | php \ 95 | libpcre3 \ 96 | libpcre3-dev \ 97 | libonig5 98 | 99 | RUN update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-10 10 && \ 100 | update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-10 10 101 | 102 | ARG OPENSSL_TAG 103 | ARG SIEGE_VERSION 104 | 105 | # Build/Install openssl 106 | RUN mkdir -p /home/${USERNAME}/openssl_build 107 | WORKDIR /home/${USERNAME}/openssl_build 108 | RUN git clone --depth 1 -b $OPENSSL_TAG https://github.com/openssl/openssl.git && \ 109 | cd openssl && \ 110 | mkdir -p /home/${USERNAME}/openssl_install/lib/engines-1.1 && \ 111 | ./config --prefix=/home/${USERNAME}/openssl_install \ 112 | -Wl,-rpath=/home/${USERNAME}/openssl && \ 113 | make update && \ 114 | make -j && \ 115 | mkdir -p $INSTALL_DIR ; \ 116 | make install DESTDIR=$INSTALL_DIR && \ 117 | rm -rf /home/${USERNAME}/openssl_build 118 | 119 | 120 | # build siege with ssl, Uninstall & Reinstall 121 | # Build and install siege 4.1.5. 122 | RUN cd /home/${USERNAME}; \ 123 | wget http://download.joedog.org/siege/${SIEGE_VERSION}.tar.gz && \ 124 | tar zxf ${SIEGE_VERSION}.tar.gz 125 | WORKDIR /home/${USERNAME}/${SIEGE_VERSION} 126 | RUN ./configure --with-ssl=/usr/bin/openssl && \ 127 | make -j && \ 128 | sudo make uninstall && \ 129 | mkdir -p $INSTALL_DIR/usr/local/etc && \ 130 | sudo make install DESTDIR=$INSTALL_DIR ; 131 | 132 | ARG ASYNCH_NGINX_TAG 133 | # Build/Install AYNCH NGINX, no QAT 134 | WORKDIR /home/${USERNAME}/nginx_build 135 | RUN git clone --depth 1 -b $ASYNCH_NGINX_TAG https://github.com/intel/asynch_mode_nginx.git nginx && \ 136 | cd nginx && \ 137 | ./configure --prefix=/usr/ \ 138 | --with-http_ssl_module \ 139 | '--with-cc-opt=-DNGX_SECURE_MEM \ 140 | -I /home/${USERNAME}/openssl_install/include \ 141 | -Wno-error=deprecated-declarations -Wimplicit-fallthrough=0' \ 142 | '--with-ld-opt=-Wl,-rpath=/home/${USERNAME}/openssl_install/lib \ 143 | -L /home/${USERNAME}/openssl_install/lib' && \ 144 | make -j && \ 145 | make install DESTDIR=$INSTALL_DIR && \ 146 | rm -rf /home/${USERNAME}/nginx_build 147 | 148 | # ----- https base container ----- 149 | # 150 | # base container for a; 151 | # WP 4.2/5.6 152 | # PHP 7.4/8.0 153 | # -------------------------------- 154 | FROM ubuntu:20.04 155 | LABEL authors="ping.zhao@intel.com, yuhan.yang@intel.com" 156 | ENV USERNAME="base" 157 | 158 | ARG INSTALL_DIR 159 | ARG DEBIAN_FRONTEND="noninteractive" 160 | ARG TZ="America/Los_Angeles" 161 | ARG APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 162 | 163 | # Install required packages that are not included in ubuntu core image and mariadb 164 | RUN apt-get update && apt-get install -y \ 165 | software-properties-common \ 166 | apt-transport-https \ 167 | vim \ 168 | git \ 169 | wget \ 170 | python3-pip \ 171 | sudo \ 172 | linux-tools-common \ 173 | libssl-dev \ 174 | php-mysql \ 175 | php \ 176 | libonig5 \ 177 | libpcre3 \ 178 | libpcre3-dev \ 179 | mariadb-server && \ 180 | rm -rf /var/lib/apt/lists/* 181 | 182 | # Create a new Linux account 183 | RUN useradd -rm -d /home/${USERNAME} -s /bin/bash -g root -G sudo -u 1001 ${USERNAME} && \ 184 | echo "${USERNAME} ALL=(ALL) NOPASSWD:ALL" | tee -a /etc/sudoers 185 | 186 | # Switch to ${USERNAME} 187 | USER ${USERNAME} 188 | WORKDIR /home/${USERNAME} 189 | 190 | # Clone and install oss-performance 191 | # If WP not 4.2, download and update with wordpressversion, replace WP4.2 database dump with WP5.2, and change URLs list, remove WP4.2 192 | ARG wordpressversion 193 | RUN git clone --depth 1 -b v1.3 https://github.com/intel/Updates-for-OSS-Performance oss-performance && \ 194 | cd oss-performance && \ 195 | cd /home/${USERNAME}/oss-performance/targets/wordpress && \ 196 | if [ $wordpressversion != 4.2 ] ; then \ 197 | wget https://wordpress.org/wordpress-${wordpressversion}.tar.gz && \ 198 | sed -i "s/4.2.0/${wordpressversion}/g" WordpressTarget.php && \ 199 | mv WordpressTarget_v5.urls WordpressTarget.urls && \ 200 | mv dbdump_v5.sql.gz dbdump.sql.gz && \ 201 | rm wordpress-4.2.0.tar.gz; \ 202 | fi 203 | 204 | WORKDIR /home/${USERNAME}/oss-performance 205 | RUN wget https://getcomposer.org/installer -O composer-setup.php && \ 206 | php composer-setup.php && \ 207 | php composer.phar install && \ 208 | # Basic environment tuning 209 | echo "soft nofile 1000000\nhard nofile 1000000" | sudo tee -a /etc/security/limits.conf 210 | 211 | # MariaDB Tuning to disable query cache 212 | COPY files/1s-bkm.j2 /home/${USERNAME} 213 | COPY files/2s-bkm.j2 /home/${USERNAME} 214 | RUN sudo cp /home/${USERNAME}/2s-bkm.j2 /etc/mysql/my.cnf && \ 215 | # Create new MariaDB account "wp_bench" and database "wp_bench" 216 | sudo service mysql start && \ 217 | sleep 1 && \ 218 | sudo mysqladmin -u root password "" && \ 219 | sudo mysql -u root -e "CREATE USER 'wp_bench'@'localhost' IDENTIFIED BY 'wp_bench'" && \ 220 | sudo mysql -u root -e "GRANT ALL PRIVILEGES on *.* to 'wp_bench'@'localhost' IDENTIFIED BY 'wp_bench'" && \ 221 | sudo mysql -u root -e "CREATE DATABASE wp_bench" && \ 222 | sudo mysql -u root -e "FLUSH PRIVILEGES" && \ 223 | sudo service mysql stop 224 | 225 | # https modules 226 | USER root 227 | 228 | # Binary copy openssl, siege, AYNCH NGINX no QAT, 229 | COPY --from=dep-components --chown=${USERNAME}:root $INSTALL_DIR / 230 | 231 | # Comment out RANDFILE entry in /etc/ssl/openssl.cnf 232 | # Create required certificates for https 233 | WORKDIR /home/${USERNAME}/certificates 234 | RUN sed -iE 's/RANDFILE\(\s+\=\s\$ENV\:\:HOME\/\.rnd\)/#RANDFILE\1/' /etc/ssl/openssl.cnf && \ 235 | mkdir -p /home/${USERNAME}/certificates/ssl && \ 236 | openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:4096 -keyout server.key -out server.crt -subj "/C=US/ST=OR/L=IN/O=IN/OU=IN/CN=$(hostname)" && \ 237 | openssl ecparam -genkey -out key.pem -name secp384r1 && \ 238 | openssl req -x509 -new -key key.pem -out cert.pem -subj "/C=US/ST=OR/L=IN/O=IN/OU=IN/CN=$(hostname)" && \ 239 | chown -R ${USERNAME} /home/${USERNAME}/certificates 240 | 241 | 242 | WORKDIR /home/${USERNAME}/oss-performance 243 | # Modify WordPressTarget urls for https and temporary fix 244 | RUN sed -i 's/http/https/' /home/${USERNAME}/oss-performance/targets/wordpress/WordpressTarget.urls && \ 245 | sed -i ':currentline;N;$!bcurrentline;s/invariant.*);//g' /home/${USERNAME}/oss-performance/targets/wordpress/WordpressTarget.php 246 | 247 | # Patch https into oss-performance 248 | COPY --chown=${USERNAME}:root files/https_oss_performance.patch /home/${USERNAME}/oss-performance 249 | COPY --chown=${USERNAME}:root files/update_nginx_workers.sh /usr/local/bin/update_nginx_workers.sh 250 | RUN git apply https_oss_performance.patch 251 | 252 | COPY --chown=${USERNAME}:root files/ssl-params.conf /home/${USERNAME}/certificates/ssl 253 | COPY --chown=${USERNAME}:root files/nginx.conf.in /home/${USERNAME}/oss-performance/conf/nginx 254 | COPY --chown=${USERNAME}:root files/entrypoint.sh /usr/local/bin/entrypoint.sh 255 | RUN sed -r --expression='s/(exec "\$\@")/\/usr\/local\/bin\/update_nginx_workers\.sh\n\1/g' -i /usr/local/bin/entrypoint.sh 256 | 257 | USER ${USERNAME} 258 | COPY --from=php-fpm --chown=${USERNAME}:root /php-fpm /home/${USERNAME}/oss-performance 259 | COPY --from=php-fpm --chown=${USERNAME}:root /opcache.so /home/${USERNAME}/oss-performance 260 | COPY --from=php-fpm --chown=${USERNAME}:root /pdo_mysql.so /home/${USERNAME}/oss-performance 261 | COPY --from=php-fpm --chown=${USERNAME}:root /mysqli.so /home/${USERNAME}/oss-performance 262 | COPY --chown=${USERNAME}:root files/quickrun.sh /home/${USERNAME}/oss-performance 263 | COPY --chown=${USERNAME}:root files/php-base.ini /home/${USERNAME}/oss-performance/conf/php.ini 264 | RUN chmod +x /home/${USERNAME}/oss-performance/quickrun.sh 265 | 266 | ENTRYPOINT ["/usr/local/bin/entrypoint.sh"] 267 | CMD [ "bash" ] 268 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/1s-bkm.j2: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included 12 | # in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 18 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | # OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | # SPDX-License-Identifier: MIT 23 | # this is only for the mysqld standalone daemon 24 | [mysqld] 25 | symbolic-links=0 26 | 27 | open_files_limit=4000 28 | query_cache_limit=128K 29 | query_cache_size=1M 30 | query_cache_type=1 31 | bind-address = 0.0.0.0 32 | key_buffer = 16M # TODO: This should actually be key_buffer_size 33 | max_allowed_packet = 16M 34 | thread_stack = 192K 35 | thread_cache_size = 8 36 | skip-external-locking 37 | key_buffer_size = 384M 38 | max_allowed_packet = 1M 39 | table_open_cache = 512 # TODO: Find the optimal value for this 40 | sort_buffer_size = 2M 41 | read_buffer_size = 2M 42 | read_rnd_buffer_size = 8M 43 | myisam_sort_buffer_size = 64M 44 | max_connections = 1000 45 | myisam-recover = BACKUP 46 | #skip-grant-tables 47 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/2s-bkm.j2: -------------------------------------------------------------------------------- 1 | # Copyright (C) 2021 Intel Corporation 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files 5 | # (the "Software"), to deal in the Software without restriction, 6 | # including without limitation the rights to use, copy, modify, merge, 7 | # publish, distribute, sublicense, and/or sell copies of the Software, 8 | # and to permit persons to whom the Software is furnished to do so, 9 | # subject to the following conditions: 10 | # 11 | # The above copyright notice and this permission notice shall be included 12 | # in all copies or substantial portions of the Software. 13 | # 14 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 18 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | # OR OTHER DEALINGS IN THE SOFTWARE. 21 | # 22 | # SPDX-License-Identifier: MIT 23 | # this is only for the mysqld standalone daemon 24 | [mysqld] 25 | symbolic-links=0 26 | 27 | open_files_limit=4000 28 | query_cache_limit=0 29 | query_cache_size=0 30 | query_cache_type=0 31 | bind-address = 0.0.0.0 32 | key_buffer = 16M # TODO: This should actually be key_buffer_size 33 | max_allowed_packet = 16M 34 | thread_stack = 192K 35 | thread_cache_size = 2048 36 | skip-external-locking 37 | key_buffer_size = 384M 38 | max_allowed_packet = 1M 39 | table_open_cache = 512 # TODO: Find the optimal value for this 40 | sort_buffer_size = 2M 41 | read_buffer_size = 2M 42 | read_rnd_buffer_size = 8M 43 | myisam_sort_buffer_size = 64M 44 | 45 | innodb_buffer_pool_size = 8G 46 | innodb_buffer_pool_instances = 8 47 | aria_pagecache_buffer_size = 128M 48 | tmp_table_size = 1G 49 | max_heap_table_size = 1G 50 | max_connections = 1000 51 | myisam-recover = BACKUP 52 | #skip-grant-tables 53 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2021 Intel Corporation 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files 6 | # (the "Software"), to deal in the Software without restriction, 7 | # including without limitation the rights to use, copy, modify, merge, 8 | # publish, distribute, sublicense, and/or sell copies of the Software, 9 | # and to permit persons to whom the Software is furnished to do so, 10 | # subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 19 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | # OR OTHER DEALINGS IN THE SOFTWARE. 22 | # 23 | # SPDX-License-Identifier: MIT 24 | 25 | # Disable kernel ASLR (address space layout randomization) 26 | echo -e "Disable kernel ASLR (address space layout randomization)" 27 | sudo sysctl -w kernel.randomize_va_space=0 28 | 29 | # Flush file system buffers 30 | echo -e "Flushing file system buffers" 31 | sudo sync 32 | # Free pagecache, dentries and inodes 33 | echo -e "Free pagecache, dentries and inodes" 34 | sudo sh -c 'echo 3 >/proc/sys/vm/drop_caches' 35 | # Free swap memory 36 | echo -e "Free swap memory" 37 | sudo swapoff -a 38 | sudo swapon -a 39 | 40 | # Increase nf_conntrack hashtable size to 512000 41 | echo -e "Increasing nf_conntrack hashtable size to 512000" 42 | NF_CONNTRACK_MAX=/proc/sys/net/netfilter/nf_conntrack_max 43 | if [ -e $NF_CONNTRACK_MAX ]; then 44 | echo 512000 | sudo tee $NF_CONNTRACK_MAX 45 | fi 46 | 47 | # Set CPU scaling governor to max performance 48 | echo -e "Setting CPU scaling governor to max performance" 49 | if [ -f /sys/devices/system/cpu/cpu0/cpufreq/scaling_governor ]; then 50 | for i in $(seq 0 $(($(nproc)-1))); do 51 | echo "performance" | sudo tee /sys/devices/system/cpu/cpu"$i"/cpufreq/scaling_governor 52 | done 53 | fi 54 | 55 | # Set tcp socket reuse 56 | echo -e "Setting TCP TIME WAIT" 57 | echo 1 | sudo tee /proc/sys/net/ipv4/tcp_tw_reuse 58 | 59 | # Start database daemon 60 | echo -e "Starting database daemon" 61 | sudo service mysql start 62 | 63 | exec "$@" 64 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/https_oss_performance.patch: -------------------------------------------------------------------------------- 1 | diff --git a/base/PerfTarget.php b/base/PerfTarget.php 2 | index 93e9b1d..7c10b00 100644 3 | --- a/base/PerfTarget.php 4 | +++ b/base/PerfTarget.php 5 | @@ -19,9 +19,9 @@ abstract class PerfTarget { 6 | } 7 | 8 | final public function sanityCheck(): void { 9 | - $ctx = stream_context_create(['http' => ['timeout' => 30]]); 10 | + $ctx = stream_context_create(['http' => ['timeout' => 30],'ssl' => ['verify_peer' => false, 'verify_peer_name' => false]]); 11 | $url = 12 | - 'http://'. 13 | + 'https://'. 14 | gethostname(). 15 | ':'. 16 | PerfSettings::HttpPort(). 17 | diff --git a/targets/wordpress/WordpressTarget.php b/targets/wordpress/WordpressTarget.php 18 | index d009dbd..55d013f 100644 19 | --- a/targets/wordpress/WordpressTarget.php 20 | +++ b/targets/wordpress/WordpressTarget.php 21 | @@ -50,7 +50,7 @@ final class WordpressTarget extends PerfTarget { 22 | $this->options->proxygen 23 | ? PerfSettings::BackendPort() 24 | : PerfSettings::HttpPort(); 25 | - $root = 'http://'.gethostname().':'.$visible_port; 26 | + $root = 'https://'.gethostname().':'.$visible_port; 27 | 28 | $conn = mysqli_connect($this->options->dbHost, 'wp_bench', 'wp_bench'); 29 | $db_selected = mysqli_select_db($conn, 'wp_bench'); 30 | @@ -100,9 +100,10 @@ final class WordpressTarget extends PerfTarget { 31 | } 32 | 33 | private function unfreezeRequest(PerfOptions $options): void { 34 | - $url = 'http://'.gethostname().':'.PerfSettings::HttpPort().'/'; 35 | + $url = 'https://'.gethostname().':'.PerfSettings::HttpPort().'/'; 36 | $ctx = stream_context_create( 37 | - ['http' => ['timeout' => $options->maxdelayUnfreeze]] 38 | + ['http' => ['timeout' => $options->maxdelayUnfreeze], 39 | + 'ssl' => ['verify_peer' => false, 'verify_peer_name' => false]] 40 | ); 41 | $data = file_get_contents($url, /* include path = */ false, $ctx); 42 | assert( 43 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/nginx.conf.in: -------------------------------------------------------------------------------- 1 | worker_processes 28; # Depends on #vcpus 2 | 3 | error_log __NGINX_TEMP_DIR__/nginx-error.log; 4 | pid __NGINX_PID_FILE__; 5 | 6 | worker_rlimit_nofile 1000000; 7 | 8 | 9 | events { 10 | use epoll; 11 | worker_connections 8192; 12 | multi_accept on; 13 | accept_mutex on; 14 | } 15 | 16 | http { 17 | ssl_buffer_size 65536; 18 | include __NGINX_CONFIG_ROOT__/mime.types; 19 | default_type application/octet-stream; 20 | 21 | log_format main '$status $body_bytes_sent $request_time "$request"'; 22 | 23 | types_hash_max_size 4096; 24 | types_hash_bucket_size 64; 25 | 26 | sendfile on; 27 | #tcp_nopush on; 28 | 29 | keepalive_timeout __NGINX_KEEPALIVE_TIMEOUT__; 30 | fastcgi_read_timeout __NGINX_FASTCGI_READ_TIMEOUT__; 31 | 32 | #gzip on; 33 | 34 | server { 35 | listen [::]:__HTTP_PORT__ reuseport backlog=131072 so_keepalive=off ssl; 36 | listen __HTTP_PORT__ reuseport backlog=131072 so_keepalive=off ssl; 37 | 38 | keepalive_timeout 0s; 39 | tcp_nopush on; 40 | tcp_nodelay on; 41 | ssl_verify_client off; 42 | ssl_session_tickets off; 43 | lingering_close off; 44 | lingering_time 1; 45 | ssl_asynch off; 46 | 47 | # https certificate lines 48 | ssl_certificate /home/base/certificates/server.crt; 49 | ssl_certificate_key /home/base/certificates/server.key; 50 | ssl_certificate /home/base/certificates/cert.pem; 51 | ssl_certificate_key /home/base/certificates/key.pem; 52 | 53 | ssl_session_timeout 300s; 54 | ssl_prefer_server_ciphers on; 55 | 56 | # The following 2 lines are for TLSv1.2 settings 57 | # To use TLSv1.3, comment these out and uncomment the next line 58 | # ssl_protocols TLSv1.2; 59 | # ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256; 60 | 61 | # Uncomment the line below to use TLSv1.3 62 | # The SSL cipher TLS_AES_256_GCM_SHA384 is automatically selected by nginx 63 | # and cannot be specified in this config file 64 | ssl_protocols TLSv1.3; 65 | # ssl_ciphers TLS_AES_256_GCM_SHA384; 66 | 67 | # secp384r1 curve applies to both TLSv1.2 and TLSv1.3 68 | ssl_ecdh_curve secp384r1; 69 | 70 | access_log __NGINX_TEMP_DIR__/access.log main; 71 | client_body_temp_path __NGINX_TEMP_DIR__/client_temp; 72 | proxy_temp_path __NGINX_TEMP_DIR__/proxy_temp; 73 | fastcgi_temp_path __NGINX_TEMP_DIR__/fastcgi_temp; 74 | uwsgi_temp_path __NGINX_TEMP_DIR__/uwsgi_temp; 75 | scgi_temp_path __NGINX_TEMP_DIR__/scgi_temp; 76 | root __FRAMEWORK_ROOT__; 77 | index index.php; 78 | 79 | location / { 80 | try_files $uri $uri/ @rewrites; 81 | expires 7d; 82 | } 83 | 84 | location @rewrites { 85 | rewrite ^ /index.php last; 86 | } 87 | 88 | location ~ [^/]\.php(/|$) { 89 | fastcgi_split_path_info ^(.+?\.php)(/.*)$; 90 | if (!-f $document_root$fastcgi_script_name) { 91 | return 404; 92 | } 93 | 94 | rewrite /index.php / break; 95 | __PROXY_PASS__; 96 | fastcgi_index index.php; 97 | fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; 98 | # Siege sets Accept-Encoding: gzip. php5 and php-ng ignore it, but 99 | # HHVM automatically compresses the output. 100 | # 101 | # This means that it's not a like-for-like comparison, and also means 102 | # that we can't compare bytes sent (nginx) or received (siege) as a 103 | # sanity check between the two; this sucks, so remove the header. 104 | # 105 | # Issue for making HHVM match PHP behavior: 106 | # https://github.com/facebook/hhvm/issues/3744 107 | fastcgi_param HTTP_ACCEPT_ENCODING ""; 108 | proxy_set_header Accept-Encoding ""; 109 | proxy_set_header Host 127.0.0.1:__BACKEND_PORT__; 110 | include __NGINX_CONFIG_ROOT__/fastcgi_params; 111 | } 112 | } 113 | 114 | server { 115 | listen [::]:__HTTP_ADMIN_PORT__ default_server; 116 | listen __HTTP_ADMIN_PORT__ default_server; 117 | access_log __NGINX_TEMP_DIR__/admin-access.log main; 118 | client_body_temp_path __NGINX_TEMP_DIR__/admin-client_temp; 119 | proxy_temp_path __NGINX_TEMP_DIR__/admin-proxy_temp; 120 | fastcgi_temp_path __NGINX_TEMP_DIR__/admin-fastcgi_temp; 121 | uwsgi_temp_path __NGINX_TEMP_DIR__/admin-uwsgi_temp; 122 | scgi_temp_path __NGINX_TEMP_DIR__/admin-scgi_temp; 123 | 124 | location / { 125 | proxy_set_header Host 127.0.0.1; 126 | __ADMIN_PROXY_PASS__; 127 | include __NGINX_CONFIG_ROOT__/fastcgi_params; 128 | } 129 | } 130 | } 131 | 132 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/php-base.ini: -------------------------------------------------------------------------------- 1 | ; Copyright (C) 2021 Intel Corporation 2 | ; 3 | ; Permission is hereby granted, free of charge, to any person obtaining a 4 | ; copy of this software and associated documentation files 5 | ; (the "Software"), to deal in the Software without restriction, 6 | ; including without limitation the rights to use, copy, modify, merge, 7 | ; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ; and to permit persons to whom the Software is furnished to do so, 9 | ; subject to the following conditions: 10 | ; 11 | ; The above copyright notice and this permission notice shall be included 12 | ; in all copies or substantial portions of the Software. 13 | ; 14 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | ; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 18 | ; OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 | ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | ; OR OTHER DEALINGS IN THE SOFTWARE. 21 | ; 22 | ; SPDX-License-Identifier: MIT 23 | ;;;;;;;;;;; 24 | ; Generic ; 25 | ;;;;;;;;;;; 26 | max_execution_time=600 27 | memory_limit=128M 28 | error_reporting=0 29 | display_errors=0 30 | log_errors=0 31 | user_ini.filename= 32 | realpath_cache_size=2M 33 | cgi.check_shebang_line=0 34 | date.timezone=America/Los_Angeles 35 | assert.active=false 36 | 37 | ;;;;;;;;;;;;; 38 | ; HHVM Only ; 39 | ;;;;;;;;;;;;; 40 | hhvm.php7.all=0 41 | ; This is a no-op on production builds, but makes it possible to get meaningful 42 | ; profiles from debug builds 43 | hhvm.hhir_generate_asserts=0 44 | ; Off by default in OSS builds, but on by default in Facebook's internal builds; 45 | ; this provides consistency for FB engineers, no-op for others. 46 | hhvm.force_hh=0 47 | ; When running, HHVM maintains /tmp/perf-$PID.map mapping memory addresses to 48 | ; jitted functions - but this gets removed when HHVM exits. This option stops 49 | ; HHVM from automatically removing it, so allows you to use 'perf report' after 50 | ; the process has exited. No impact on HHVM's actual performance. 51 | hhvm.keep_perf_pid_map=1 52 | 53 | ;;;;;;;;;;;;;;; 54 | ; PHP5/7 Only ; 55 | ;;;;;;;;;;;;;;; 56 | extension=/home/base/oss-performance/pdo_mysql.so 57 | extension=/home/base/oss-performance/mysqli.so 58 | zend_extension=/home/base/oss-performance/opcache.so 59 | opcache.enable_cli=1 60 | opcache.fast_shutdown=1 61 | opcache.validate_timestamps=1 62 | opcache.revalidate_freq=60 63 | opcache.use_cwd=1 64 | opcache.max_accelerated_files=100000 65 | opcache.max_wasted_percentage=5 66 | opcache.memory_consumption=1024 67 | opcache.consistency_checks=0 68 | 69 | zend.assertions=-1 70 | zend.detect_unicode=0 71 | mysqlnd.collect_statistics=0 72 | opcache.huge_code_pages=0 73 | opcache.optimization_level=-1 74 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/quickrun.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2021 Intel Corporation 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files 6 | # (the "Software"), to deal in the Software without restriction, 7 | # including without limitation the rights to use, copy, modify, merge, 8 | # publish, distribute, sublicense, and/or sell copies of the Software, 9 | # and to permit persons to whom the Software is furnished to do so, 10 | # subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 19 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | # OR OTHER DEALINGS IN THE SOFTWARE. 22 | # 23 | # SPDX-License-Identifier: MIT 24 | sudo service mysql start 25 | php perf.php --php=/home/base/oss-performance/php-fpm --wordpress --i-am-not-benchmarking --siege=/usr/local/bin/siege $@ 26 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/ssl-params.conf: -------------------------------------------------------------------------------- 1 | ssl_protocols TLSv1.2; 2 | ssl_prefer_server_ciphers on; 3 | ssl_dhparam /home/base/certificates/ssl/dhparam.pem; 4 | ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256; 5 | ssl_session_timeout 10m; 6 | ssl_session_cache shared:SSL:10m; 7 | resolver 8.8.8.8 8.8.4.4 valid=300s; 8 | resolver_timeout 5s; 9 | add_header X-Frame-Options DENY; 10 | add_header X-Content-Type-Options nosniff; 11 | add_header X-XSS-Protection "1; mode=block"; 12 | 13 | -------------------------------------------------------------------------------- /containers/wordpress/wp_base/files/update_nginx_workers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Copyright (C) 2021 Intel Corporation 3 | # 4 | # Permission is hereby granted, free of charge, to any person obtaining a 5 | # copy of this software and associated documentation files 6 | # (the "Software"), to deal in the Software without restriction, 7 | # including without limitation the rights to use, copy, modify, merge, 8 | # publish, distribute, sublicense, and/or sell copies of the Software, 9 | # and to permit persons to whom the Software is furnished to do so, 10 | # subject to the following conditions: 11 | # 12 | # The above copyright notice and this permission notice shall be included 13 | # in all copies or substantial portions of the Software. 14 | # 15 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 16 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 19 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 21 | # OR OTHER DEALINGS IN THE SOFTWARE. 22 | # 23 | # SPDX-License-Identifier: MIT 24 | 25 | # Update nginx config to match number of vcpus 26 | export workers=$(echo $(lscpu | sed -nr --expression='s/^CPU\(s\)\:\s+([0-9]+)/\1/pg')) 27 | sed -r --expression='s/(worker_processes).*/\1 '"$workers"'\;/g' -i /home/${USERNAME}/oss-performance/conf/nginx/nginx.conf.in 28 | -------------------------------------------------------------------------------- /containers/wordpress/wp_opt/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG baseimage 2 | ARG PHP_VER 3 | ARG INSTALL_DIR="/pkg_install" 4 | 5 | ARG IPP_CRYPTO_TAG=ippcp_2021.3 6 | ARG QAT_ENGINE_TAG=v0.6.5 7 | ARG ASYNCH_NGINX_TAG=v0.4.7 8 | ARG IPSEC_MB_TAG=v0.55 9 | 10 | 11 | # ----- TEMP container ----- 12 | # used for copying built php-fpm and opcache.so 13 | # for bolting. 14 | # ----- php-fpm build container ----- 15 | FROM php:${PHP_VER}-fpm AS php-fpm 16 | MAINTAINER "anthony.s.pray@intel.com" 17 | ARG PHP_VER 18 | 19 | ENV TZ=America/Los_Angeles 20 | ENV DEBIAN_FRONTEND noninteractive 21 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime ;\ 22 | echo $TZ > /etc/timezone 23 | RUN apt-get update; \ 24 | apt-get install -y \ 25 | libonig-dev \ 26 | libsqlite3-dev \ 27 | zlib1g-dev \ 28 | libpng-dev \ 29 | libssl-dev \ 30 | libxml2-dev; 31 | 32 | WORKDIR /usr/src/php 33 | COPY files/0001-BOLT-PHP${PHP_VER}-GCC-option.patch ./ 34 | RUN docker-php-source extract; \ 35 | patch -p1 < 0001-BOLT-PHP${PHP_VER}-GCC-option.patch; \ 36 | ./configure \ 37 | --enable-fpm \ 38 | --with-mysqli \ 39 | --with-pdo-mysql \ 40 | --enable-pcntl; \ 41 | EXTRA_CFLAGS="-g -fno-reorder-blocks-and-partition" \ 42 | LDFLAGS="-Wl,--emit-relocs,-znow" \ 43 | make -j; \ 44 | cp /usr/src/php/sapi/fpm/php-fpm /php-fpm;\ 45 | cp /usr/src/php/modules/opcache.so /opcache.so;\ 46 | docker-php-source delete; 47 | 48 | # ----- llvm-bolt build container ----- 49 | FROM ubuntu:20.04 AS llvm-bolt 50 | MAINTAINER "anthony.s.pray@intel.com" 51 | ENV TZ=America/Los_Angeles 52 | ENV DEBIAN_FRONTEND noninteractive 53 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime ;\ 54 | echo $TZ > /etc/timezone 55 | RUN apt-get update;\ 56 | apt-get install -y --no-install-recommends \ 57 | ca-certificates \ 58 | git \ 59 | build-essential \ 60 | cmake \ 61 | ninja-build \ 62 | python3 \ 63 | libjemalloc-dev \ 64 | python3-psutil; \ 65 | rm -rf /var/lib/apt/lists 66 | 67 | WORKDIR /home/bolt 68 | RUN git clone --depth 1 -b llvmorg-14.0.1 https://github.com/llvm/llvm-project 69 | 70 | ARG LLVM_TARGETS="X86" 71 | RUN mkdir build; \ 72 | cd build; \ 73 | cmake -G Ninja ../llvm-project/llvm \ 74 | -DLLVM_ENABLE_PROJECTS="bolt" \ 75 | -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS} \ 76 | -DCMAKE_BUILD_TYPE=Release \ 77 | -DLLVM_ENABLE_ASSERTIONS=ON \ 78 | -DCMAKE_EXE_LINKER_FLAGS="-Wl,--push-state -Wl,-whole-archive -ljemalloc_pic -Wl,--pop-state -lpthread -lstdc++ -lm -ldl" \ 79 | -DCMAKE_INSTALL_PREFIX=/home/bolt/install; \ 80 | ninja check-bolt; \ 81 | ninja install-llvm-bolt \ 82 | install-perf2bolt \ 83 | install-merge-fdata \ 84 | install-llvm-boltdiff \ 85 | install-bolt_rt 86 | 87 | 88 | # ----- bolting container ----- 89 | FROM ubuntu:20.04 as bolter 90 | WORKDIR /opt 91 | ARG PHP_VER 92 | COPY --from=llvm-bolt /home/bolt/install /usr/local 93 | COPY --from=php-fpm /php-fpm /opt/ 94 | COPY --from=php-fpm /opcache.so /opt/ 95 | COPY files/${PHP_VER}-perf.fdata /opt/perf.fdata 96 | RUN printf '\nProcessing llvm-bolting...\n perf.fdata -> php-fpm.bolt\n';\ 97 | llvm-bolt \ 98 | /opt/php-fpm \ 99 | -o /opt/php-fpm.bolt \ 100 | -data=/opt/perf.fdata \ 101 | -reorder-blocks=cache+ \ 102 | -reorder-functions=hfsort+ \ 103 | -split-functions=3 \ 104 | -split-all-cold \ 105 | -split-eh \ 106 | -dyno-stats; 107 | 108 | 109 | # --------------- opt container --------------- 110 | FROM $baseimage 111 | MAINTAINER "ping.zhao@intel.com" 112 | 113 | ARG INSTALL_DIR 114 | 115 | RUN sudo -E apt-get update && sudo -E apt-get install -y \ 116 | software-properties-common \ 117 | gcc-10 \ 118 | g++-10 \ 119 | software-properties-common \ 120 | apt-transport-https \ 121 | git \ 122 | automake \ 123 | gcc \ 124 | make \ 125 | cmake \ 126 | libevent-dev \ 127 | vim \ 128 | python3-pip \ 129 | sudo \ 130 | autotools-dev \ 131 | autoconf \ 132 | libtool \ 133 | pkg-config \ 134 | nasm 135 | 136 | 137 | 138 | WORKDIR /home/${USERNAME}/oss-performance 139 | COPY --chown=${USERNAME}:root files/nginx.conf.in /home/${USERNAME}/oss-performance/conf/nginx 140 | COPY --from=bolter --chown=${USERNAME}:root /opt/php-fpm.bolt /home/${USERNAME}/oss-performance/php-fpm 141 | COPY --from=bolter --chown=${USERNAME}:root /opt/opcache.so /home/${USERNAME}/oss-performance/ 142 | 143 | 144 | 145 | ARG IPP_CRYPTO_TAG 146 | ARG QAT_ENGINE_TAG 147 | ARG ASYNCH_NGINX_TAG 148 | ARG IPSEC_MB_TAG 149 | 150 | ARG iodlrlp=1 151 | ARG query=1 152 | 153 | # Optimization: Set environment variable to have zend use large pages 154 | ENV USE_ZEND_ALLOC_HUGE_PAGES=1 155 | 156 | # Optimization: Build and install iodlr huge pages library (commit_id: 01f4985) for use with mariadb 157 | RUN if [ "$iodlrlp" = "1" ] ; then \ 158 | cd /home/${USERNAME} && \ 159 | git clone --depth 1 -b v1.0.0 https://github.com/intel/iodlr && \ 160 | cd iodlr && \ 161 | cd large_page-c && \ 162 | make -f Makefile.preload && \ 163 | sudo cp liblppreload.so /usr/lib/ && \ 164 | sudo sed -i 's/\/usr\/bin\/mysqld_safe/LD_PRELOAD=\/usr\/lib\/liblppreload.so \/usr\/bin\/mysqld_safe/' /etc/init.d/mysql && \ 165 | rm -rf /home/${USERNAME}/iodlr/ ; \ 166 | fi 167 | 168 | ARG mariadbconf=1s 169 | # Optimization: Query cache optimization for single socket operation (unless over-ridden) 170 | RUN if [ "$query" = "1" ] ; then sudo cp /home/${USERNAME}/${mariadbconf}-bkm.j2 /etc/mysql/my.cnf ; fi 171 | 172 | 173 | WORKDIR /home/${USERNAME}/ 174 | RUN wget http://archive.ubuntu.com/ubuntu/pool/universe/n/nasm/nasm_2.15.05-1_amd64.deb ; \ 175 | sudo dpkg -i nasm_2.15.05-1_amd64.deb ;\ 176 | wget https://www.openssl.org/source/openssl-1.1.1k.tar.gz ;\ 177 | tar xf openssl-1.1.1k.tar.gz; \ 178 | cd openssl-1.1.1k; \ 179 | ./config --prefix=/opt/openssl/1.1.1k --openssldir=/opt/openssl/1.1.1k; \ 180 | make -j ;\ 181 | sudo make install 182 | 183 | ENV PATH=/opt/openssl/1.1.1k/bin:$PATH 184 | ENV LD_LIBRARY_PATH=/opt/openssl/1.1.1k/lib:/opt/crypto_mb/lib:/opt/ipsec-mb/0.55/lib 185 | 186 | # Build/Install IPP cyrypto library for the multi-buffer support 187 | WORKDIR /home/${USERNAME}/ 188 | RUN git clone --depth 1 -b $IPP_CRYPTO_TAG https://github.com/intel/ipp-crypto.git && \ 189 | export OPENSSL_ROOT_DIR=/opt/openssl/1.1.1k/ && \ 190 | cd ipp-crypto && \ 191 | cd sources/ippcp/crypto_mb && \ 192 | cmake . -Bbuild -DCMAKE_INSTALL_PREFIX=/opt/crypto_mb && \ 193 | cd build && \ 194 | make -j && \ 195 | sudo make install 196 | 197 | # Build/install IPSec_MB library 198 | RUN sudo rm -rf ipsec intel-ipsec-mb && \ 199 | mkdir ipsec && \ 200 | git clone --depth 1 -b $IPSEC_MB_TAG https://github.com/intel/intel-ipsec-mb.git && \ 201 | cd intel-ipsec-mb/ && \ 202 | make -j SAFE_DATA=y SAFE_PARAM=y SAFE_LOOKUP=y && \ 203 | sudo make install NOLDCONFIG=y PREFIX=/opt/ipsec-mb/0.55 204 | 205 | # Build/Install QAT engine 206 | WORKDIR /home/${USERNAME}/ 207 | ARG HOME="/home/base" 208 | RUN git clone --depth 1 -b $QAT_ENGINE_TAG https://github.com/intel/QAT_Engine.git qat_engine && \ 209 | cd qat_engine && \ 210 | ./autogen.sh && \ 211 | LDFLAGS="-L/opt/ipsec-mb/0.55/lib -L/opt/crypto_mb/lib" CPPFLAGS="-I/opt/ipsec-mb/0.55/include -I/opt/crypto_mb/include" \ 212 | ./configure --enable-qat_sw --prefix=/opt/openssl/1.1.1k --with-openssl_install_dir=/opt/openssl/1.1.1k --with-openssl_dir=$HOME/openssl-1.1.1k && \ 213 | PERL5LIB=$HOME/openssl-1.1.1k make -j && \ 214 | sudo PERL5LIB=$HOME/openssl-1.1.1k make install 215 | 216 | ARG ASYNCH_NGINX_TAG 217 | # Build/Install AYNCH NGINX, with QAT 218 | RUN mkdir /home/${USERNAME}/nginx_build && \ 219 | cd /home/${USERNAME}/nginx_build && \ 220 | git clone --depth 1 -b $ASYNCH_NGINX_TAG https://github.com/intel/asynch_mode_nginx.git nginx && \ 221 | cd nginx && \ 222 | ./configure --prefix=/usr/ \ 223 | --with-http_ssl_module --add-dynamic-module=modules/nginx_qat_module \ 224 | --with-cc-opt="-DNGX_SECURE_MEM \ 225 | -I /opt/openssl/1.1.1k/include \ 226 | -Wno-error=deprecated-declarations -Wimplicit-fallthrough=0" \ 227 | --with-ld-opt="-Wl,-rpath=/opt/openssl/1.1.1k/lib \ 228 | -L /opt/openssl/1.1.1k/lib" && \ 229 | make -j && \ 230 | sudo make install && \ 231 | rm -rf /home/${USERNAME}/nginx_build 232 | 233 | RUN sudo cp /opt/ipsec-mb/0.55/lib/*.* /usr/lib/ 234 | RUN sudo cp /opt/crypto_mb/lib/libcrypto_mb.so.11.1 /usr/lib/libcrypto_mb.so.11 235 | COPY --chown=${USERNAME}:root files/nginx.conf.in /home/${USERNAME}/oss-performance/conf/nginx 236 | COPY --chown=${USERNAME}:root files/php-opt.ini /home/${USERNAME}/oss-performance/conf/php.ini 237 | 238 | WORKDIR /home/${USERNAME}/oss-performance 239 | USER ${USERNAME} 240 | -------------------------------------------------------------------------------- /containers/wordpress/wp_opt/files/0001-BOLT-PHP7.4.29-GCC-option.patch: -------------------------------------------------------------------------------- 1 | From b7ea263ad8c6fb0c40bb8d58f8721742998c21e5 Mon Sep 17 00:00:00 2001 2 | From: Ping Zhao 3 | Date: Wed, 4 May 2022 19:01:29 -0700 4 | Subject: [PATCH] Fix BOLT problem with GCC10. 5 | 6 | BOLT requires adding GCC option "-fno-reorder-blocks-and-partition". In some codes, it will be omitted because "GCC push_options". This patch is to handle this case correctly. 7 | 8 | Signed-off-by: Ping Zhao 9 | --- 10 | Zend/zend_vm_execute.h | 1 + 11 | ext/standard/crc32.c | 1 + 12 | 2 files changed, 2 insertions(+) 13 | 14 | diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h 15 | index 2875fd3740..402bb6c9ea 100644 16 | --- a/Zend/zend_vm_execute.h 17 | +++ b/Zend/zend_vm_execute.h 18 | @@ -50067,6 +50067,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_NULL_HANDLER(ZEND_OPCODE_HANDL 19 | # pragma GCC push_options 20 | # pragma GCC optimize("no-gcse") 21 | # pragma GCC optimize("no-ivopts") 22 | +# pragma GCC optimize("no-reorder-blocks-and-partition") 23 | #endif 24 | ZEND_API void execute_ex(zend_execute_data *ex) 25 | { 26 | diff --git a/ext/standard/crc32.c b/ext/standard/crc32.c 27 | index 497b133642..7657f6037b 100644 28 | --- a/ext/standard/crc32.c 29 | +++ b/ext/standard/crc32.c 30 | @@ -46,6 +46,7 @@ static inline int has_crc32_insn() { 31 | 32 | # pragma GCC push_options 33 | # pragma GCC target ("+nothing+crc") 34 | +# pragma GCC optimize("no-reorder-blocks-and-partition") 35 | static uint32_t crc32_aarch64(uint32_t crc, char *p, size_t nr) { 36 | while (nr >= sizeof(uint64_t)) { 37 | crc = __crc32d(crc, *(uint64_t *)p); 38 | -- 39 | 2.25.1 40 | 41 | -------------------------------------------------------------------------------- /containers/wordpress/wp_opt/files/0001-BOLT-PHP8.0.18-GCC-option.patch: -------------------------------------------------------------------------------- 1 | From abba541faa88e2ed9021427fa1e7014e93c5de89 Mon Sep 17 00:00:00 2001 2 | From: Ping Zhao 3 | Date: Sun, 1 May 2022 23:39:17 -0700 4 | Subject: [PATCH] BOLT requires adding GCC option 5 | "-fno-reorder-blocks-and-partition". In some codes, it will be omitted 6 | because "GCC push_options". This patch is to handle this case correctly. 7 | 8 | --- 9 | Zend/zend_vm_execute.h | 1 + 10 | ext/standard/crc32.c | 1 + 11 | 2 files changed, 2 insertions(+) 12 | 13 | diff --git a/Zend/zend_vm_execute.h b/Zend/zend_vm_execute.h 14 | index 922ed92e56..628da2cac3 100644 15 | --- a/Zend/zend_vm_execute.h 16 | +++ b/Zend/zend_vm_execute.h 17 | @@ -51085,6 +51085,7 @@ static ZEND_OPCODE_HANDLER_RET ZEND_FASTCALL ZEND_NULL_HANDLER(ZEND_OPCODE_HANDL 18 | # pragma GCC push_options 19 | # pragma GCC optimize("no-gcse") 20 | # pragma GCC optimize("no-ivopts") 21 | +# pragma GCC optimize("no-reorder-blocks-and-partition") 22 | #endif 23 | ZEND_API void execute_ex(zend_execute_data *ex) 24 | { 25 | diff --git a/ext/standard/crc32.c b/ext/standard/crc32.c 26 | index cca5917438..651e83fa31 100644 27 | --- a/ext/standard/crc32.c 28 | +++ b/ext/standard/crc32.c 29 | @@ -53,6 +53,7 @@ static inline int has_crc32_insn() { 30 | # if defined(__GNUC__) && !defined(__clang__) 31 | # pragma GCC push_options 32 | # pragma GCC target ("+nothing+crc") 33 | +# pragma GCC optimize("no-reorder-blocks-and-partition") 34 | # endif 35 | static uint32_t crc32_aarch64(uint32_t crc, char *p, size_t nr) { 36 | while (nr >= sizeof(uint64_t)) { 37 | -- 38 | 2.25.1 39 | 40 | -------------------------------------------------------------------------------- /containers/wordpress/wp_opt/files/nginx.conf.in: -------------------------------------------------------------------------------- 1 | worker_processes 28; # Depends on #vcpus 2 | 3 | load_module /usr/modules/ngx_ssl_engine_qat_module.so; 4 | 5 | error_log __NGINX_TEMP_DIR__/nginx-error.log; 6 | pid __NGINX_PID_FILE__; 7 | 8 | 9 | ssl_engine{ 10 | use_engine qatengine; 11 | default_algorithms ALL; 12 | qat_engine{ 13 | qat_notify_mode poll; 14 | qat_offload_mode async; 15 | qat_external_poll_interval 1; 16 | } 17 | } 18 | 19 | 20 | worker_rlimit_nofile 1000000; 21 | 22 | 23 | events { 24 | use epoll; 25 | worker_connections 8192; 26 | multi_accept on; 27 | accept_mutex on; 28 | } 29 | 30 | http { 31 | ssl_buffer_size 65536; 32 | include __NGINX_CONFIG_ROOT__/mime.types; 33 | default_type application/octet-stream; 34 | 35 | log_format main '$status $body_bytes_sent $request_time "$request"'; 36 | 37 | types_hash_max_size 4096; 38 | types_hash_bucket_size 64; 39 | 40 | sendfile on; 41 | #tcp_nopush on; 42 | 43 | keepalive_timeout __NGINX_KEEPALIVE_TIMEOUT__; 44 | fastcgi_read_timeout __NGINX_FASTCGI_READ_TIMEOUT__; 45 | 46 | #gzip on; 47 | 48 | #__NGINX_RESOLVER__ 49 | 50 | server { 51 | listen [::]:__HTTP_PORT__ reuseport backlog=131072 so_keepalive=off ssl; 52 | listen __HTTP_PORT__ reuseport backlog=131072 so_keepalive=off ssl; 53 | 54 | keepalive_timeout 0s; 55 | tcp_nopush on; 56 | tcp_nodelay on; 57 | ssl_verify_client off; 58 | ssl_session_tickets off; 59 | lingering_close off; 60 | lingering_time 1; 61 | ssl_asynch on; 62 | 63 | # https certificate lines 64 | ssl_certificate /home/base/certificates/server.crt; 65 | ssl_certificate_key /home/base/certificates/server.key; 66 | ssl_certificate /home/base/certificates/cert.pem; 67 | ssl_certificate_key /home/base/certificates/key.pem; 68 | 69 | ssl_session_timeout 300s; 70 | ssl_prefer_server_ciphers on; 71 | ssl_protocols TLSv1.3; #or TLSv1.3; 72 | 73 | ####### comment following line for TLSv1.3 74 | #ssl_ciphers ECDHE-RSA-AES128-GCM-SHA256; 75 | 76 | ssl_ecdh_curve secp384r1; 77 | 78 | access_log __NGINX_TEMP_DIR__/access.log main; 79 | client_body_temp_path __NGINX_TEMP_DIR__/client_temp; 80 | proxy_temp_path __NGINX_TEMP_DIR__/proxy_temp; 81 | fastcgi_temp_path __NGINX_TEMP_DIR__/fastcgi_temp; 82 | uwsgi_temp_path __NGINX_TEMP_DIR__/uwsgi_temp; 83 | scgi_temp_path __NGINX_TEMP_DIR__/scgi_temp; 84 | root __FRAMEWORK_ROOT__; 85 | index index.php; 86 | 87 | location / { 88 | try_files $uri $uri/ @rewrites; 89 | expires 7d; 90 | } 91 | 92 | location @rewrites { 93 | rewrite ^ /index.php last; 94 | } 95 | 96 | location ~ [^/]\.php(/|$) { 97 | fastcgi_split_path_info ^(.+?\.php)(/.*)$; 98 | if (!-f $document_root$fastcgi_script_name) { 99 | return 404; 100 | } 101 | 102 | rewrite /index.php / break; 103 | __PROXY_PASS__; 104 | fastcgi_index index.php; 105 | fastcgi_param SCRIPT_FILENAME $document_root$fastcgi_script_name; 106 | # Siege sets Accept-Encoding: gzip. php5 and php-ng ignore it, but 107 | # HHVM automatically compresses the output. 108 | # 109 | # This means that it's not a like-for-like comparison, and also means 110 | # that we can't compare bytes sent (nginx) or received (siege) as a 111 | # sanity check between the two; this sucks, so remove the header. 112 | # 113 | # Issue for making HHVM match PHP behavior: 114 | # https://github.com/facebook/hhvm/issues/3744 115 | fastcgi_param HTTP_ACCEPT_ENCODING ""; 116 | proxy_set_header Accept-Encoding ""; 117 | proxy_set_header Host 127.0.0.1:__BACKEND_PORT__; 118 | include __NGINX_CONFIG_ROOT__/fastcgi_params; 119 | } 120 | } 121 | 122 | server { 123 | listen [::]:__HTTP_ADMIN_PORT__ default_server; 124 | listen __HTTP_ADMIN_PORT__ default_server; 125 | access_log __NGINX_TEMP_DIR__/admin-access.log main; 126 | client_body_temp_path __NGINX_TEMP_DIR__/admin-client_temp; 127 | proxy_temp_path __NGINX_TEMP_DIR__/admin-proxy_temp; 128 | fastcgi_temp_path __NGINX_TEMP_DIR__/admin-fastcgi_temp; 129 | uwsgi_temp_path __NGINX_TEMP_DIR__/admin-uwsgi_temp; 130 | scgi_temp_path __NGINX_TEMP_DIR__/admin-scgi_temp; 131 | 132 | location / { 133 | proxy_set_header Host 127.0.0.1; 134 | __ADMIN_PROXY_PASS__; 135 | include __NGINX_CONFIG_ROOT__/fastcgi_params; 136 | } 137 | } 138 | } 139 | 140 | -------------------------------------------------------------------------------- /containers/wordpress/wp_opt/files/php-opt.ini: -------------------------------------------------------------------------------- 1 | ; Copyright (C) 2021 Intel Corporation 2 | ; 3 | ; Permission is hereby granted, free of charge, to any person obtaining a 4 | ; copy of this software and associated documentation files 5 | ; (the "Software"), to deal in the Software without restriction, 6 | ; including without limitation the rights to use, copy, modify, merge, 7 | ; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ; and to permit persons to whom the Software is furnished to do so, 9 | ; subject to the following conditions: 10 | ; 11 | ; The above copyright notice and this permission notice shall be included 12 | ; in all copies or substantial portions of the Software. 13 | ; 14 | ; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 | ; OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 | ; FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 | ; THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 18 | ; OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 19 | ; ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 20 | ; OR OTHER DEALINGS IN THE SOFTWARE. 21 | ; 22 | ; SPDX-License-Identifier: MIT 23 | ;;;;;;;;;;; 24 | ; Generic ; 25 | ;;;;;;;;;;; 26 | max_execution_time=600 27 | memory_limit=128M 28 | error_reporting=0 29 | display_errors=0 30 | log_errors=0 31 | user_ini.filename= 32 | realpath_cache_size=2M 33 | cgi.check_shebang_line=0 34 | date.timezone=America/Los_Angeles 35 | assert.active=false 36 | 37 | ;;;;;;;;;;;;; 38 | ; HHVM Only ; 39 | ;;;;;;;;;;;;; 40 | hhvm.php7.all=0 41 | ; This is a no-op on production builds, but makes it possible to get meaningful 42 | ; profiles from debug builds 43 | hhvm.hhir_generate_asserts=0 44 | ; Off by default in OSS builds, but on by default in Facebook's internal builds; 45 | ; this provides consistency for FB engineers, no-op for others. 46 | hhvm.force_hh=0 47 | ; When running, HHVM maintains /tmp/perf-$PID.map mapping memory addresses to 48 | ; jitted functions - but this gets removed when HHVM exits. This option stops 49 | ; HHVM from automatically removing it, so allows you to use 'perf report' after 50 | ; the process has exited. No impact on HHVM's actual performance. 51 | hhvm.keep_perf_pid_map=1 52 | 53 | ;;;;;;;;;;;;;;; 54 | ; PHP5/7 Only ; 55 | ;;;;;;;;;;;;;;; 56 | #zend_extension=opcache.so 57 | zend_extension=/home/base/oss-performance/opcache.so 58 | opcache.enable_cli=1 59 | opcache.fast_shutdown=1 60 | opcache.validate_timestamps=1 61 | opcache.revalidate_freq=60 62 | opcache.use_cwd=1 63 | opcache.max_accelerated_files=100000 64 | opcache.max_wasted_percentage=5 65 | opcache.memory_consumption=400 66 | opcache.consistency_checks=0 67 | 68 | zend.assertions=-1 69 | zend.detect_unicode=0 70 | mysqlnd.collect_statistics=0 71 | opcache.huge_code_pages=1 72 | opcache.optimization_level=-1 73 | 74 | ; TODO: comment out before release 75 | ; opcache.jit_debug=32 76 | opcache.jit_buffer_size=20M 77 | opcache.jit=tracing 78 | -------------------------------------------------------------------------------- /detect-platform.mk: -------------------------------------------------------------------------------- 1 | # The conditional definition below allows us to test how the implementation 2 | # behaves when a value appears that is not supported by giving a value for 3 | # `ENABLE_LARGE_CODE_PAGES` on the command line. 4 | ENABLE_LARGE_CODE_PAGES?="detect" 5 | 6 | ifeq ($(ENABLE_LARGE_CODE_PAGES), "detect") 7 | PLATFORM := $(shell uname -s) 8 | ENABLE_LARGE_CODE_PAGES=0 9 | ifeq ($(PLATFORM),Linux) 10 | ENABLE_LARGE_CODE_PAGES=1 11 | endif 12 | 13 | # To enable support on more platforms once it's implemented, add more ifeq 14 | # blocks here, such as the example below: 15 | # 16 | # ifeq ($(PLATFORM),FreeBSD) 17 | # ENABLE_LARGE_CODE_PAGES=1 18 | # endif 19 | 20 | endif 21 | 22 | ifneq ($(ENABLE_LARGE_CODE_PAGES), 0) 23 | CFLAGS+=-DENABLE_LARGE_CODE_PAGES=$(ENABLE_LARGE_CODE_PAGES) 24 | CPPFLAGS+=-DENABLE_LARGE_CODE_PAGES=$(ENABLE_LARGE_CODE_PAGES) 25 | endif 26 | -------------------------------------------------------------------------------- /large_data-go/mmap_test/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "log" 7 | "os" 8 | "strconv" 9 | "strings" 10 | "syscall" 11 | 12 | "github.com/dterei/gotsc" 13 | ) 14 | 15 | const NCHUNKS_8G = (8589934592 / 4096) 16 | const NCHUNKS_4G = (4294967296 / 4096) 17 | const NCHUNKS_2G = (2147483648 / 4096) 18 | const NCHUNKS = NCHUNKS_2G 19 | const SIZE_8G = 8 << 30 20 | const SIZE_4G = 4 << 30 21 | const SIZE_2G = 2 << 30 22 | const SIZE uint64 = 10240 23 | const FLAGS_4K = syscall.MAP_ANONYMOUS | syscall.MAP_PRIVATE 24 | const FLAGS_2M = syscall.MAP_ANONYMOUS | syscall.MAP_PRIVATE | syscall.MAP_HUGETLB 25 | const FLAGS_1G = syscall.MAP_ANONYMOUS | syscall.MAP_PRIVATE | syscall.MAP_HUGETLB | (30 << 26) 26 | 27 | func iodlr_procmeminfo(key string) int { 28 | file, err := os.Open("/proc/meminfo") 29 | 30 | if err != nil { 31 | log.Fatal(err) 32 | } 33 | 34 | //defer file.close() 35 | 36 | scanner := bufio.NewScanner(file) 37 | scanner.Split(bufio.ScanLines) 38 | 39 | for scanner.Scan() { 40 | 41 | if strings.Contains(scanner.Text(), key) { 42 | var value int 43 | words := strings.Fields(scanner.Text()) 44 | 45 | //fmt.Println(words, len(words)) 46 | //value, err2 := strconv.ParseInt(words[1], 10, 32) 47 | value, err2 := strconv.Atoi(words[1]) 48 | if err2 != nil { 49 | //fmt.Println("line 29", err2) 50 | return -1 51 | } 52 | 53 | return value 54 | } 55 | 56 | //fmt.Println(scanner.Text()) 57 | } 58 | 59 | return -1 60 | } 61 | 62 | func iodlr_hp_enabled() bool { 63 | 64 | val := iodlr_procmeminfo("HugePages_Total:") 65 | 66 | if val > 0 { 67 | counter = counter + int(*a) 68 | return true 69 | } else { 70 | return false 71 | } 72 | 73 | } 74 | 75 | func iodlr_allocate(s int, pgsz int) []byte { 76 | 77 | flags := FLAGS_4K 78 | 79 | if pgsz == 1048576*1024 { 80 | flags = FLAGS_1G 81 | 82 | } else if pgsz == 2048*1024 { 83 | flags = FLAGS_2M 84 | } 85 | 86 | data, err := syscall.Mmap(-1, 0, s, syscall.PROT_READ|syscall.PROT_WRITE, flags) 87 | 88 | if err != nil { 89 | panic(err) 90 | } 91 | counter = counter + int(*a) 92 | 93 | return data 94 | 95 | } 96 | 97 | func iodlr_get_hp_size() int { 98 | val := iodlr_procmeminfo("Hugepagesize:") 99 | 100 | if val > 0 { 101 | return val * 1024 102 | } else { 103 | return -1 104 | } 105 | 106 | } 107 | 108 | func touch(data []byte, stride uint64, index int, size uint64) { 109 | 110 | if stride > size { 111 | fmt.Printf("%d : %d", stride, size) 112 | counter = counter + int(*a) 113 | panic("error") 114 | } 115 | counter := 0 116 | cursor := 0 117 | for i := 0; i < NCHUNKS; i++ { 118 | cursor = i * int(stride) + int(index) 119 | if (cursor < int(size)) { 120 | a := &data[cursor] 121 | counter = counter + int(*a) 122 | } 123 | } 124 | 125 | } 126 | 127 | func dotest(s int) uint64 { 128 | 129 | stride := (SIZE) / NCHUNKS 130 | //fmt.Println("stride : ", stride) 131 | 132 | tsc := gotsc.TSCOverhead() 133 | 134 | start := gotsc.BenchStart() 135 | data := iodlr_allocate(int(SIZE), s) 136 | // zerofill(data, SIZE) the default generated memory is filled with zeros 137 | for i := 0; i < 4096; i++ { 138 | touch(data, stride, i, SIZE) 139 | } 140 | 141 | end := gotsc.BenchEnd() 142 | 143 | gap := end - start - tsc 144 | 145 | fmt.Printf("Cycles for %d = %d \n", s, gap) 146 | return gap 147 | 148 | } 149 | 150 | func defaulttest() int { 151 | d := syscall.Getpagesize() 152 | fmt.Printf("default test default pagesize %d\n", d) 153 | return int(dotest(d)) 154 | } 155 | 156 | func hptest() int { 157 | lp := iodlr_hp_enabled() 158 | if (lp == true) { 159 | l := iodlr_get_hp_size() 160 | fmt.Printf("l value is : %d\n", l) 161 | return int(dotest(l)) 162 | } else { 163 | return -1 164 | } 165 | } 166 | 167 | func main() { 168 | 169 | dt := defaulttest() 170 | fmt.Printf("default test time gap is %d\n", dt) 171 | 172 | hpt := hptest() 173 | fmt.Printf("HPT test time gap is %d\n", hpt) 174 | 175 | fmt.Printf("Huge Page Data Speedup = %f\n", (float64(dt) / float64(hpt))) 176 | } 177 | -------------------------------------------------------------------------------- /large_data/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2018 Intel Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom 8 | the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included 11 | in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | SPDX-License-Identifier: MIT 22 | -------------------------------------------------------------------------------- /large_data/Makefile: -------------------------------------------------------------------------------- 1 | OUTDIR?=. 2 | CC=g++ 3 | CFLAGS=-O3 -std=c++11 -D_FORTIFY_SOURCE=2 -fsanitize=address -z noexecstack -z relro -z now -fstack-protector -Wformat -Wformat-security -Wall 4 | RM=/bin/rm 5 | 6 | TARGET = $(OUTDIR)/data-large-reference 7 | 8 | .PHONY: all 9 | all: $(TARGET) 10 | 11 | $(TARGET): $(TARGET).cc 12 | $(CC) $(CFLAGS) -o $(TARGET) $(TARGET).cc 13 | 14 | .PHONY: clean 15 | clean: 16 | $(RM) -f *.o $(TARGET) 17 | -------------------------------------------------------------------------------- /large_data/README.md: -------------------------------------------------------------------------------- 1 | # Reference Implementation for Utilizing Large Pages for Data 2 | This directory contains a simple reference implementation for automating the process 3 | of utilizing explicit huge pages for Data. 4 | 5 | 6 | # APIs 7 | ``` 8 | * bool iodlr_hp_enabled() 9 | Check whether explict huge pages is enabled on the system 10 | Returns true if Huge Pages is enabled on the system 11 | * size_t iodlr_get_hp_size() 12 | Obtains the size of the explicit huge pages (2MB or 1GB) in bytes 13 | * void * iodlr_allocate(size_t s, size_t pgsz) 14 | mmap size bytes with pgsz 15 | * void iodlr_deallocate(char *d, size_t s) 16 | ``` 17 | 18 | # Test 19 | ``` 20 | * int64_t hptest() 21 | Allocate a SIZE buffer with Huge Pages, zero fill it, and stride through it at 4K stride touching each byte of the 4K region. 22 | Returns the cycles (measured by rdtsc) to execute the test. 23 | 24 | * int64_t defaulttest() 25 | Allocate a SIZE buffer with 4K Pages, zero fill it, and stride through it at 4K stride touching each byte of the 4K region. 26 | Returns the cycles (measured by rdtsc) to execute the test. 27 | ``` 28 | 29 | # Infrastructure 30 | 31 | The Linux kernel in modern distros has the large pages feature enabled by default. To check whether this is true for your kernel, use the following command and look for output lines containing “huge”: 32 | ``` 33 | shell> cat /proc/meminfo | grep -i huge 34 | HugePages_Total: 0 35 | HugePages_Free: 0 36 | HugePages_Rsvd: 0 37 | HugePages_Surp: 0 38 | Hugepagesize: 1048676 kB 39 | ``` 40 | The nonempty command output indicates that large page support is present, but the zero values indicate that no pages are configured for use. Also the kernel is configured to use 1GB pages 41 | 42 | Set the number of pages to be used. 43 | Each page is 1GB, so a value of 8 = 8GB. 44 | This command actually allocates memory, so the requested amount of memory must be available. 45 | ``` 46 | echo 8 > /proc/sys/vm/nr_hugepages 47 | ``` 48 | 49 | # Building 50 | ``` 51 | make 52 | ``` 53 | 54 | # Testing 55 | This test needs 8G memory and for the hptest it needs 8 * 1G pages or 4096 * 2MB pages allocated as hugepages (using `nr_hugepages`) 56 | ``` 57 | ./data-large-reference 58 | hptest hpsize 2097152 59 | Cycles for 2097152 = 4918657468 60 | defaulttest default pagesize 4096 61 | Cycles for 4096 = 10988167648 62 | Huge Page Data Speedup = 2.23398 63 | ``` 64 | -------------------------------------------------------------------------------- /large_data/data-large-reference.cc: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018 Intel Corporation 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom 8 | // the Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | // OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | // OR OTHER DEALINGS IN THE SOFTWARE. 20 | // 21 | // SPDX-License-Identifier: MIT 22 | // 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #ifndef MAP_HUGETLB 32 | # define MAP_HUGETLB 0x40000 33 | #endif 34 | 35 | #ifndef MAP_HUGE_1GB 36 | # define MAP_HUGE_1GB (30 << 26) 37 | #endif 38 | 39 | # define FLAGS_1G MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB | MAP_HUGE_1GB 40 | # define FLAGS_2M MAP_ANONYMOUS | MAP_PRIVATE | MAP_HUGETLB 41 | # define FLAGS_4K MAP_ANONYMOUS | MAP_PRIVATE 42 | 43 | 44 | /* 8G memory allocation */ 45 | #define SIZE_8G 8ul << 30 46 | /* Divide it into 4k chunks */ 47 | #define NCHUNKS_8G (8589934592/4096) 48 | 49 | /* 4G memory allocation */ 50 | #define SIZE_4G 4ul << 30 51 | /* Divide it into 4k chunks */ 52 | #define NCHUNKS_4G (4294967296/4096) 53 | 54 | /* 2G memory allocation */ 55 | #define SIZE_2G 2ul << 30 56 | /* Divide it into 4k chunks */ 57 | #define NCHUNKS_2G (2147483648/4096) 58 | 59 | #define SIZE SIZE_8G 60 | #define NCHUNKS NCHUNKS_8G 61 | 62 | 63 | #include 64 | #include 65 | #include 66 | #include 67 | #include 68 | 69 | 70 | #include 71 | size_t iodlr_get_default_page_size() { 72 | return (size_t)(sysconf(_SC_PAGESIZE)); 73 | } 74 | 75 | using std::string; 76 | using std::ifstream; 77 | using std::istringstream; 78 | using std::cout; 79 | using std::getline; 80 | 81 | uint64_t iodlr_procmeminfo(string key) { 82 | ifstream ifs("/proc/meminfo"); 83 | string map_line; 84 | 85 | while(getline(ifs,map_line)){ 86 | istringstream iss(map_line); 87 | string keyname; 88 | uint64_t value; 89 | string kb; 90 | iss >> keyname; 91 | iss >> value; 92 | iss >> kb; 93 | if (keyname == key) { 94 | return value; 95 | } 96 | } 97 | return -1; // nothing found 98 | 99 | } 100 | 101 | bool iodlr_hp_enabled() { 102 | uint64_t val = iodlr_procmeminfo("HugePages_Total:"); 103 | if (val > 0 ) 104 | return true; 105 | else 106 | return false; 107 | } 108 | 109 | size_t iodlr_get_hp_size() { 110 | uint64_t val = iodlr_procmeminfo("Hugepagesize:"); 111 | if (val > 0) { 112 | return val * 1024; 113 | } 114 | return -1; 115 | } 116 | 117 | 118 | void * iodlr_allocate(size_t s, size_t pgsz) { 119 | int flags=FLAGS_4K; 120 | void *data; 121 | if (pgsz == 1048576* 1024) 122 | flags = FLAGS_1G; 123 | else if (pgsz == 2048*1024) 124 | flags = FLAGS_2M; 125 | 126 | data = mmap(NULL, s, 127 | PROT_READ | PROT_WRITE, flags, 128 | -1, 0); 129 | assert(data != MAP_FAILED); 130 | return data; 131 | } 132 | 133 | void iodlr_deallocate(char *d, size_t s) { 134 | int i; 135 | i = munmap (d, s); 136 | assert (i != -1); 137 | } 138 | 139 | void zerofill(char *d, size_t s) { 140 | memset (d, 0, s); 141 | } 142 | 143 | void touch(char *d, size_t stride, size_t index, size_t size) { 144 | 145 | char a; 146 | int i; 147 | assert (stride < size); 148 | 149 | for (i=0; i < NCHUNKS; i++) { 150 | assert (i*stride + index < size); 151 | a = d[i*stride+index]; 152 | } 153 | } 154 | 155 | int64_t dotest(size_t s) { 156 | int i; 157 | size_t stride = (SIZE)/NCHUNKS; 158 | uint64_t start, end; 159 | start = __rdtsc(); 160 | char *data = (char *)iodlr_allocate(SIZE, s); 161 | zerofill(data, SIZE); 162 | for (i=0; i < 4096; i++) { 163 | touch(data, stride, i, SIZE); 164 | } 165 | iodlr_deallocate(data, SIZE); 166 | end = __rdtsc(); 167 | cout << "Cycles for " << s << " = " << (end - start) << "\n"; 168 | return (end - start); 169 | } 170 | 171 | int64_t hptest() { 172 | bool lp = iodlr_hp_enabled(); 173 | if (lp == true) { 174 | size_t l = iodlr_get_hp_size(); 175 | cout << "hptest hpsize " << l << "\n"; 176 | return dotest(l); 177 | } 178 | return -1; 179 | } 180 | 181 | int64_t defaulttest() { 182 | size_t d = iodlr_get_default_page_size(); 183 | cout << "defaulttest default pagesize " << d << "\n"; 184 | return dotest(d); 185 | } 186 | 187 | 188 | int main (int argc, char **argv) 189 | { 190 | (void)argc; 191 | (void)argv; 192 | int64_t hpt, dt; 193 | hpt = hptest(); 194 | dt = defaulttest(); 195 | cout << "Huge Page Data Speedup = " << (double) dt/ (double)hpt << "\n"; 196 | } 197 | 198 | 199 | 200 | -------------------------------------------------------------------------------- /large_page-c/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2018 Intel Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom 8 | the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included 11 | in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | SPDX-License-Identifier: MIT 22 | -------------------------------------------------------------------------------- /large_page-c/Makefile: -------------------------------------------------------------------------------- 1 | include cflags.mk 2 | 3 | OUTDIR?=. 4 | CC=gcc 5 | CFLAGS+=$(CFLAGS_COMMON) 6 | AR=ar 7 | RM=/bin/rm 8 | 9 | .PHONY: all 10 | all: $(OUTDIR)/liblarge_page.a 11 | 12 | # Append -DENABLE_LARGE_CODE_PAGES=1 to CFLAGS on supported platforms. 13 | include ../detect-platform.mk 14 | 15 | %.o: %.c 16 | $(CC) $(CFLAGS) -x c -c $< -o $@ 17 | 18 | $(OUTDIR)/liblarge_page.a: large_page.o 19 | $(AR) rcs $@ $< 20 | 21 | .PHONY: clean 22 | clean: 23 | $(RM) -f *.o $(OUTDIR)/*.a 24 | -------------------------------------------------------------------------------- /large_page-c/Makefile.preload: -------------------------------------------------------------------------------- 1 | include cflags.mk 2 | OUTDIR?=. 3 | CC=gcc 4 | CFLAGS=$(CFLAGS_COMMON) -fPIC -DPIC 5 | AR=ar 6 | RM=/bin/rm 7 | 8 | .PHONY: all 9 | all: $(OUTDIR)/liblppreload.so 10 | 11 | # Append -DENABLE_LARGE_CODE_PAGES=1 to CFLAGS on supported platforms. 12 | include ../detect-platform.mk 13 | 14 | %.o: %.c 15 | $(CC) $(CFLAGS) -x c -c $< -o $@ 16 | 17 | OBJECTS=\ 18 | large_page.o \ 19 | lp_preload.o \ 20 | 21 | $(OUTDIR)/liblppreload.so: $(OBJECTS) 22 | $(CC) -shared -o $@ $(OBJECTS) 23 | 24 | .PHONY: clean 25 | clean: 26 | $(RM) -f *.o $(OUTDIR)/*.so 27 | -------------------------------------------------------------------------------- /large_page-c/README.md: -------------------------------------------------------------------------------- 1 | # Reference Implementation for Utilizing Large Pages 2 | 3 | This directory contains a reference implementation for automating the process 4 | of re-mapping code to transparent huge pages. It contains a target for building 5 | a static library that can be used along with the header files in a larger 6 | project, and a target for building a shared library that can be loaded into a 7 | process using `LD_PRELOAD`, at which point it will re-map the `.text` segment of 8 | the executable and dependent shared libraries. 9 | 10 | ## Building The Static Library 11 | 12 | The APIs provided by the reference implementation can be built into a static 13 | library by running 14 | 15 | ```bash 16 | make 17 | ``` 18 | 19 | This will create `liblarge_page.a` in the current directory. 20 | 21 | ## Building The Shared Library 22 | 23 | The shared library can be built by running 24 | 25 | ```bash 26 | make -f Makefile.preload 27 | ``` 28 | 29 | This will create `liblppreload.so` in the current directory. This file should 30 | then be copied to `/usr/lib64`. 31 | 32 | ### Using The Shared Library 33 | 34 | `liblppreload.so` can be added to any process on the command line. The example 35 | below illustrates the use of the shared library with Node.js: 36 | 37 | ```bash 38 | LD_PRELOAD=/usr/lib64/liblppreload.so node 39 | ``` 40 | 41 | If you want to exclude some libraries from being moved to large pages use `LP_IGNORE` 42 | parameter which is a regex string to specify which libraries to ignore. An example 43 | ignoring libc and libabc: 44 | 45 | ```bash 46 | LD_PRELOAD=/usr/lib64/liblppreload.so LP_IGNORE='(libc)|(libabc)' node 47 | ``` 48 | 49 | ### Modifying A `systemd` Service 50 | 51 | `systemd` service files are responsible for running processes as daemons during 52 | startup. They are usually located in `/etc/systemd/system`. They consist of 53 | several sections, including a section named `[Service]`. In this section 54 | environment variables are specified which will be set during the execution of 55 | the processes listed in the file. An environment variable can be added to this 56 | section as follows: 57 | 58 | ``` 59 | [Service] 60 | Environment=LD_PRELOAD=/usr/lib64/liblppreload.so 61 | ``` 62 | 63 | Note that the location of `liblppreload.so` should be a system library path 64 | (`/usr/lib64` in the above example) otherwise `systemd` may refuse to load the 65 | library. 66 | 67 | After this modification is made, `systemd` must be instructed to reload its 68 | configuration and the corresponding daemon must be restarted. The `systemd` 69 | configuration can be reloaded by issuing 70 | 71 | ``` 72 | systemctl daemon-reload 73 | ``` 74 | as root. 75 | 76 | Afterwards the daemon whose `.service` file was modified can be restarted by 77 | issuing 78 | 79 | ``` 80 | systemctl restart 81 | ``` 82 | 83 | as root, where `` is the name of the daemon whose service file was 84 | modified. 85 | 86 | **NOTE:** Since `liblppreload.so` is added onto the daemon process, it is unable 87 | to use whatever logging facilities the process may have. If it fails to re-map 88 | the process' code to large pages, it will issue an error on `stderr`. The daemon 89 | should be run from the command line without forking into the background in order 90 | to determine any potential problems with the re-mapping. Taking `mysqld` as an 91 | example, running 92 | 93 | ``` 94 | LD_PRELOAD=/usr/lib64/liblppreload.so mysqld --help 95 | ``` 96 | 97 | will reveal any issues related to re-mapping the code. 98 | 99 | Examining the daemon's `/proc//smaps` file can be used as a final check to 100 | ensure that the code was re-mapped to large pages. See the [smaps][] 101 | documentation for details about the format of `/proc//smaps`. Taking 102 | `mysqld` as an example, its pid can be obtained by running 103 | 104 | ```bash 105 | $ ps ax | grep mysqld 106 | 32732 pts/7 S+ 0:00 grep --color=auto mysqld 107 | 44982 ? Ssl 254:44 /usr/sbin/mysqld 108 | ``` 109 | 110 | This reveals that `44982` is `mysqld`'s pid. Running 111 | 112 | ```bash 113 | less /proc/44982/smaps 114 | ``` 115 | 116 | and looking for `AnonHugePages` will reveal whether any portion of 117 | `/usr/sbin/mysqld`'s code was re-mapped to large pages, because the number 118 | appearing after `AnonHugePages` will be non-zero. 119 | 120 | ## API 121 | 122 | ## Types 123 | 124 | ### map_status 125 | 126 | ```C 127 | typedef enum { 128 | map_ok, 129 | map_failed_to_open_thp_file, 130 | map_invalid_regex, 131 | map_invalid_region_address, 132 | map_malformed_thp_file, 133 | map_null_regex, 134 | map_region_not_found, 135 | map_region_too_small, 136 | map_see_errno, 137 | map_see_errno_madvise_tmem_failed, 138 | map_see_errno_madvise_tmem_munmap_nmem_failed, 139 | map_see_errno_madvise_tmem_munmaps_failed, 140 | map_see_errno_madvise_tmem_munmap_tmem_failed, 141 | map_see_errno_mmap_tmem_failed, 142 | map_see_errno_mmap_tmem_munmap_nmem_failed, 143 | map_see_errno_mprotect_failed, 144 | map_see_errno_mprotect_munmap_nmem_failed, 145 | map_see_errno_mprotect_munmaps_failed, 146 | map_see_errno_mprotect_munmap_tmem_failed, 147 | map_see_errno_munmap_nmem_failed, 148 | map_unsupported_platform, 149 | map_open_exe_failed, 150 | map_see_errno_close_exe_failed, 151 | map_read_exe_header_failed, 152 | map_see_errno_seek_exe_sheaders_failed, 153 | map_read_exe_sheaders_failed, 154 | map_see_errno_seek_exe_string_table_failed, 155 | map_read_exe_string_table_failed, 156 | map_failed_to_open_ehp_file, 157 | map_not_enough_explicit_hugepages_are_allocated 158 | } map_status; 159 | ``` 160 | 161 | A value in this enum is returned by all APIs provided. It indicates whether the 162 | operation succeeded (`map_ok`) or the failure mode otherwise. 163 | 164 | ## Macros 165 | 166 | ### MAP_STATUS_STR 167 | 168 | ```C 169 | #define MAP_STATUS_STR(status) 170 | ``` 171 | 172 | Maps a `map_status` to its corresponding verbose textual explanation. 173 | 174 | ### MAP_STATUS_STR_SHORT 175 | 176 | ```C 177 | #define MAP_STATUS_STR_SHORT(status) 178 | ``` 179 | 180 | Maps a `map_status` to its corresponding terse textual explanation. 181 | 182 | ## APIs 183 | 184 | ### MapStaticCodeToLargePages 185 | 186 | ```C 187 | map_status MapStaticCodeToLargePages(); 188 | ``` 189 | 190 | Attempts to map an application's `.text` region to large pages. 191 | 192 | If the region is not aligned to 2 MiB then the portion of the page that lies 193 | below the first multiple of 2 MiB remains mapped to small pages. Likewise, if 194 | the region does not end at an address that is a multiple of 2 MiB, the remainder 195 | of the region will remain mapped to small pages. The portion in-between will be 196 | mapped to large pages. 197 | 198 | ### MapDSOToLargePages 199 | 200 | ```C 201 | map_status MapDSOToLargePages(const char* lib_regex); 202 | ``` 203 | 204 | - `[in] lib_regex`: A string containing a regular expression to be used against 205 | the process' maps file. 206 | 207 | Retrieves an address range from the process' maps file associated with a DSO 208 | whose name matches `lib_regex` and attempts to map it to large pages. 209 | 210 | If the region is not aligned to 2 MiB then the portion of the page that lies 211 | below the first multiple of 2 MiB remains mapped to small pages. Likewise, if 212 | the region does not end at an address that is a multiple of 2 MiB, the remainder 213 | of the region will remain mapped to small pages. The portion in-between will be 214 | mapped to large pages. 215 | 216 | ### MapStaticCodeRangeToLargePages 217 | 218 | ```C 219 | map_status MapStaticCodeRangeToLargePages(void* from, void* to); 220 | ``` 221 | 222 | - `[in] from`: A starting address from which to attempt to map to large pages. 223 | - `[in] to`: An ending address up to which to attempt to map to large pages. 224 | 225 | Attempts to map the given address range to large pages. 226 | 227 | If the region is not aligned to 2 MiB then the portion of the page that lies 228 | below the first multiple of 2 MiB remains mapped to small pages. Likewise, if 229 | the region does not end at an address that is a multiple of 2 MiB, the remainder 230 | of the region will remain mapped to small pages. The portion in-between will be 231 | mapped to large pages. 232 | 233 | ### IsLargePagesEnabled 234 | 235 | ```C 236 | map_status IsLargePagesEnabled(bool* result); 237 | ``` 238 | 239 | - `[out] result`: Whether large pages are enabled. 240 | 241 | Performs a platform-dependent check to determine whether it is possible to map to 242 | large pages and stores the result of the check in result. 243 | It supports both transparent and explicit hugepages. By default it will 244 | use transparent hugepages. 245 | To use explicit huge pages use an environment variable as shown below, 246 | 247 | ```C 248 | $ export IODLR_USE_EXPLICIT_HP=1 249 | ``` 250 | `[error/warning/info]`: If not enough pages are available. User will also be 251 | informed about how many pages a program would need (code section only). Please 252 | check and update /proc/sys/vm/nr_hugepages as required. 253 | 254 | 255 | ### MapStatusStr 256 | 257 | ```C 258 | const char* MapStatusStr(map_status status, bool fulltext); 259 | ``` 260 | 261 | - `[in] status`: The `map_status` for which to retrieve the textual error 262 | message. 263 | - `[in] fulltext`: Whether to retrieve the verbose message (`true`) or a terser 264 | message (`false`) 265 | - **Returns**: A string containing the textual error message. The string is owned by 266 | the implementation and must not be freed. 267 | 268 | [smaps]: https://github.com/torvalds/linux/blob/v5.6/Documentation/filesystems/proc.txt#L421 269 | -------------------------------------------------------------------------------- /large_page-c/cflags.mk: -------------------------------------------------------------------------------- 1 | CFLAGS_COMMON=-O3 -D_FORTIFY_SOURCE=2 -z noexecstack -z relro -z now -fstack-protector -Wformat -Wformat-security -Wall 2 | -------------------------------------------------------------------------------- /large_page-c/example/Makefile: -------------------------------------------------------------------------------- 1 | CC=gcc 2 | CFLAGS?=-O3 3 | OBJDIR=$(shell realpath obj) 4 | 5 | OBJFILES= \ 6 | large_page_example.o \ 7 | filler1.o \ 8 | filler2.o \ 9 | filler3.o \ 10 | filler4.o \ 11 | filler5.o \ 12 | filler6.o \ 13 | filler7.o \ 14 | filler8.o \ 15 | 16 | OBJS=$(addprefix $(OBJDIR)/,$(OBJFILES)) 17 | 18 | .PHONY: all 19 | all: large_page_example 20 | 21 | LARGE_PAGE_EXAMPLE_DEPS= \ 22 | $(OBJS) \ 23 | $(OBJDIR)/liblarge_page.a \ 24 | 25 | large_page_example: $(LARGE_PAGE_EXAMPLE_DEPS) 26 | $(CC) $(LDFLAGS) $(LARGE_PAGE_EXAMPLE_DEPS) -o $@ 27 | 28 | $(OBJDIR)/liblarge_page.a: 29 | $(MAKE) -C .. OUTDIR=$(OBJDIR) 30 | 31 | $(OBJDIR)/%.o : %.c $(OBJDIR) 32 | $(CC) $(CFLAGS) -x c -o $@ -c -I.. $< 33 | 34 | $(OBJS): | $(OBJDIR) 35 | 36 | $(OBJDIR): 37 | mkdir -p $(OBJDIR) 38 | 39 | clean: 40 | $(MAKE) -C .. OUTDIR=$(OBJDIR) clean 41 | rm -rf $(OBJDIR) large_page_example 42 | -------------------------------------------------------------------------------- /large_page-c/example/README.md: -------------------------------------------------------------------------------- 1 | # Example 2 | 3 | This directory contains a simple C source file and Makefile that illustrates how 4 | the reference implementation might be incorporated in an application. 5 | -------------------------------------------------------------------------------- /large_page-c/example/large_page_example.c: -------------------------------------------------------------------------------- 1 | #include "large_page.h" 2 | #include 3 | 4 | int main() { 5 | map_status status; 6 | bool is_enabled; 7 | 8 | status = IsLargePagesEnabled(&is_enabled); 9 | if (status != map_ok) { 10 | fprintf(stderr, "Failed to check enablement: %s\n", 11 | MAP_STATUS_STR(status)); 12 | return status; 13 | } 14 | 15 | if (is_enabled) { 16 | printf("Transparent Huge Pages are enabled, mapping...\n"); 17 | status = MapStaticCodeToLargePages(); 18 | if (status != map_ok) { 19 | fprintf(stderr, "Failed to map: %s\n", MAP_STATUS_STR(status)); 20 | return status; 21 | } 22 | printf("Success\n"); 23 | return 0; 24 | } 25 | fprintf(stderr, "Transparent Huge Pages are not enabled\n"); 26 | return -1; 27 | } 28 | -------------------------------------------------------------------------------- /large_page-c/large_page.c: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018 Intel Corporation 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom 8 | // the Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | // OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | // OR OTHER DEALINGS IN THE SOFTWARE. 20 | // 21 | // SPDX-License-Identifier: MIT 22 | 23 | #define _GNU_SOURCE 24 | #include "large_page.h" 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | #include 35 | 36 | #ifndef MAP_HUGETLB 37 | #define MAP_HUGETLB 0x40000 /* arch specific */ 38 | #endif 39 | 40 | typedef struct { 41 | void* from; 42 | void* to; 43 | } mem_range; 44 | 45 | typedef struct { 46 | uintptr_t start; 47 | uintptr_t end; 48 | regex_t regex; 49 | bool have_regex; 50 | map_status status; 51 | } FindParams; 52 | 53 | int iodlr_number_of_ehp_avail = 0; 54 | char *iodlr_use_ehp = NULL; 55 | #define HPS (2L * 1024 * 1024) 56 | 57 | static inline uintptr_t largepage_align_down(uintptr_t addr) { 58 | return (addr & ~(HPS - 1)); 59 | } 60 | 61 | static inline uintptr_t largepage_align_up(uintptr_t addr) { 62 | return largepage_align_down(addr + HPS - 1); 63 | } 64 | 65 | static map_status FindTextSection(const char* fname, ElfW(Shdr)* text_section) { 66 | FILE* bin = fopen(fname, "r"); 67 | if (bin == NULL) return map_open_exe_failed; 68 | 69 | #define CLEAN_EXIT(code) \ 70 | do { \ 71 | int status = 0; \ 72 | if (errno == 0) { \ 73 | status = fclose(bin); \ 74 | } \ 75 | return ((((code) == map_ok) && status != 0) \ 76 | ? map_see_errno_close_exe_failed \ 77 | : (code)); \ 78 | } while (0) 79 | 80 | // Read the header. 81 | ElfW(Ehdr) ehdr; 82 | if (fread(&ehdr, sizeof(ehdr), 1, bin) != 1) 83 | CLEAN_EXIT(map_read_exe_header_failed); 84 | 85 | // Read the section headers. 86 | ElfW(Shdr) shdrs[ehdr.e_shnum]; 87 | if (fseek(bin, ehdr.e_shoff, SEEK_SET) != 0) 88 | CLEAN_EXIT(map_see_errno_seek_exe_sheaders_failed); 89 | if (fread(shdrs, sizeof(shdrs[0]), ehdr.e_shnum, bin) != ehdr.e_shnum) 90 | CLEAN_EXIT(map_read_exe_sheaders_failed); 91 | 92 | // Read the string table. 93 | ElfW(Shdr)* sh_strab = &shdrs[ehdr.e_shstrndx]; 94 | char section_names[sh_strab->sh_size]; 95 | if (fseek(bin, sh_strab->sh_offset, SEEK_SET) != 0) 96 | CLEAN_EXIT(map_see_errno_seek_exe_string_table_failed); 97 | if (fread(section_names, sh_strab->sh_size, 1, bin) != 1) 98 | CLEAN_EXIT(map_read_exe_string_table_failed); 99 | 100 | // Find the ".text" section. 101 | for (uint32_t idx = 0; idx < ehdr.e_shnum; idx++) { 102 | ElfW(Shdr)* sh = &shdrs[idx]; 103 | if (!memcmp(§ion_names[sh->sh_name], ".text", 5)) { 104 | *text_section = *sh; 105 | CLEAN_EXIT(map_ok); 106 | } 107 | } 108 | 109 | CLEAN_EXIT(map_region_not_found); 110 | #undef CLEAN_EXIT 111 | } 112 | 113 | static int FindMapping(struct dl_phdr_info* hdr, size_t size, void* data) { 114 | FindParams* find_params = (FindParams*)data; 115 | ElfW(Shdr) text_section; 116 | 117 | // We are only interested in the information matching the regex or, if no 118 | // regex was given, the mapping matching the main executable. This latter 119 | // mapping has the empty string for a name. 120 | if ((find_params->have_regex && 121 | regexec(&find_params->regex, hdr->dlpi_name, 0, NULL, 0) == 0) || 122 | (hdr->dlpi_name[0] == 0 && !find_params->have_regex)) { 123 | const char* fname = (hdr->dlpi_name[0] == 0 ? "/proc/self/exe" : hdr->dlpi_name); 124 | 125 | // Once we have found the info structure for the desired linked-in object, 126 | // we open it on disk to find the location of its .text section. We use the 127 | // base address given to calculate the .text section offset in memory. 128 | text_section.sh_size=0; 129 | find_params->status = FindTextSection(fname, &text_section); 130 | // check if there are enough number of hugepages available 131 | // i.e. bytes available in HP is more than total_bytes needed 132 | // if not set the status = not_enough_pages, otherwise okay 133 | if (find_params->status == map_ok) { 134 | if (iodlr_use_ehp) { 135 | int pages_need = text_section.sh_size / HPS; 136 | int bytes_remaining = text_section.sh_size % HPS; 137 | if (bytes_remaining > 0) { 138 | pages_need += 1; 139 | } 140 | if (iodlr_number_of_ehp_avail < pages_need) { 141 | fprintf(stderr, "INFO: Need %d explicit pages.\n", pages_need); 142 | fflush(stderr); 143 | find_params->status = map_not_enough_explicit_hugepages_are_allocated; 144 | return 0; 145 | } else { 146 | iodlr_number_of_ehp_avail -= pages_need; 147 | } 148 | } 149 | find_params->start = hdr->dlpi_addr + text_section.sh_addr; 150 | fprintf(stderr, "Base address: %lx.", hdr->dlpi_addr); 151 | find_params->end = find_params->start + text_section.sh_size; 152 | return 1; 153 | } 154 | } 155 | 156 | return 0; 157 | } 158 | 159 | // Identify and return the text region in the currently mapped memory regions. 160 | static map_status FindTextRegion(const char* lib_regex, mem_range* region) { 161 | FindParams find_params = { 0, 0, { 0 }, false, map_region_not_found }; 162 | 163 | if (lib_regex != NULL) { 164 | if (regcomp(&find_params.regex, lib_regex, 0) != 0) { 165 | return map_invalid_regex; 166 | } 167 | find_params.have_regex = true; 168 | } 169 | 170 | // We iterate over all the mappings created for the main executable and any of 171 | // its linked-in dependencies. The return value of `FindMapping` will become 172 | // the return value of `dl_iterate_phdr`. 173 | dl_iterate_phdr(FindMapping, &find_params); 174 | if (find_params.status != map_ok) { 175 | regfree(&find_params.regex); 176 | return find_params.status; 177 | } 178 | 179 | region->from = (void*)find_params.start; 180 | region->to = (void*)find_params.end; 181 | 182 | regfree(&find_params.regex); 183 | return map_ok; 184 | } 185 | 186 | static map_status IsExplicitHugePagesEnabled(bool* result) { 187 | *result = false; 188 | FILE* ifs; 189 | ifs = fopen("/proc/sys/vm/nr_hugepages", "r"); 190 | if (!ifs) { 191 | return map_failed_to_open_ehp_file; 192 | } 193 | 194 | int matched = fscanf(ifs, "%d", &iodlr_number_of_ehp_avail); 195 | if (matched != 1) { 196 | return map_malformed_thp_file; 197 | } 198 | fclose(ifs); 199 | if (iodlr_number_of_ehp_avail <= 0) { 200 | fprintf(stderr, "WARNING: No explicit hugepages are allocated\n"); 201 | fflush(stderr); 202 | *result = true; 203 | } else { 204 | *result = true; 205 | } 206 | return map_ok; 207 | } 208 | 209 | static map_status IsTransparentHugePagesEnabled(bool* result) { 210 | #if defined(ENABLE_LARGE_CODE_PAGES) && ENABLE_LARGE_CODE_PAGES 211 | *result = false; 212 | FILE* ifs; 213 | char always[16] = {0}; 214 | char madvise[16] = {0}; 215 | char never[16] = {0}; 216 | int matched; 217 | 218 | ifs = fopen("/sys/kernel/mm/transparent_hugepage/enabled", "rt"); 219 | if (!ifs) { 220 | return map_failed_to_open_thp_file; 221 | } 222 | 223 | matched = fscanf(ifs, "%s %s %s", always, madvise, never); 224 | fclose(ifs); 225 | 226 | if (matched != 3) { 227 | return map_malformed_thp_file; 228 | } 229 | 230 | if (strcmp(always, "[always]") == 0) { 231 | *result = true; 232 | } else if (strcmp(madvise, "[madvise]") == 0) { 233 | *result = true; 234 | } else if (strcmp(never, "[never]") == 0) { 235 | *result = false; 236 | } 237 | 238 | return map_ok; 239 | #else 240 | return map_unsupported_platform; 241 | #endif // ENABLE_LARGE_CODE_PAGES 242 | } 243 | 244 | // Move specified region to large pages. We need to be very careful. 245 | // 1: This function itself should not be moved. 246 | // We use a gcc attributes 247 | // (__section__) to put it outside the ".text" section 248 | // (__aligned__) to align it at 2M boundary 249 | // (__noline__) to not inline this function 250 | // 2: This function should not call any function(s) that might be moved. 251 | // a. map a new area and copy the original code there 252 | // b. mmap using the start address with MAP_FIXED so we get exactly 253 | // the same virtual address 254 | // c. madvise with MADV_HUGE_PAGE 255 | // d. If successful copy the code there and unmap the original region 256 | static map_status 257 | __attribute__((__section__("lpstub"))) 258 | __attribute__((__aligned__(HPS))) 259 | __attribute__((__noinline__)) 260 | MoveRegionToLargePages(const mem_range* r) { 261 | void* nmem = NULL; 262 | void* tmem = NULL; 263 | int ret = 0; 264 | map_status status = map_ok; 265 | void* start = r->from; 266 | size_t size = r->to - r->from; 267 | 268 | // Allocate temporary region preparing for copy 269 | nmem = mmap(NULL, size, 270 | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 271 | if (nmem == MAP_FAILED) { 272 | return map_see_errno; 273 | } 274 | 275 | memcpy(nmem, r->from, size); 276 | 277 | // We already know the original page is r-xp 278 | // (PROT_READ, PROT_EXEC, MAP_PRIVATE) 279 | // We want PROT_WRITE because we are writing into it. 280 | // We want it at the fixed address and we use MAP_FIXED. 281 | #define CLEAN_EXIT_CHECK(oper) \ 282 | if (tmem == MAP_FAILED) { \ 283 | status = oper##_failed; \ 284 | ret = munmap(nmem, size); \ 285 | if (ret < 0) { \ 286 | status = oper##_munmap_nmem_failed; \ 287 | } \ 288 | return status; \ 289 | } 290 | 291 | if (iodlr_use_ehp) { 292 | // map to explicit hugepages 293 | tmem = mmap(start, size, 294 | PROT_READ | PROT_WRITE | PROT_EXEC, 295 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED | MAP_HUGETLB, 296 | -1, 0); 297 | } else { 298 | tmem = mmap(start, size, 299 | PROT_READ | PROT_WRITE | PROT_EXEC, 300 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); 301 | } 302 | 303 | CLEAN_EXIT_CHECK(map_see_errno_mmap_tmem); 304 | 305 | #undef CLEAN_EXIT_CHECK 306 | 307 | #define CLEAN_EXIT_CHECK(oper) \ 308 | if (ret < 0) { \ 309 | status = oper##_failed; \ 310 | ret = munmap(tmem, size); \ 311 | if (ret < 0) { \ 312 | status = oper##_munmap_tmem_failed; \ 313 | } \ 314 | ret = munmap(nmem, size); \ 315 | if (ret < 0) { \ 316 | status = (status == oper##_munmap_tmem_failed) \ 317 | ? oper##_munmaps_failed \ 318 | : oper##_munmap_nmem_failed; \ 319 | } \ 320 | return status; \ 321 | } 322 | 323 | if (!iodlr_use_ehp) { 324 | ret = madvise(tmem, size, MADV_HUGEPAGE); 325 | CLEAN_EXIT_CHECK(map_see_errno_madvise_tmem); 326 | } 327 | 328 | memcpy(start, nmem, size); 329 | ret = mprotect(start, size, PROT_READ | PROT_EXEC); 330 | CLEAN_EXIT_CHECK(map_see_errno_mprotect); 331 | 332 | #undef CLEAN_EXIT_CHECK 333 | 334 | // Release the old/temporary mapped region 335 | ret = munmap(nmem, size); 336 | if (ret < 0) { 337 | status = map_see_errno_munmap_nmem_failed; 338 | } 339 | 340 | return status; 341 | } 342 | 343 | // Align the region to to be mapped to 2MB page boundaries. 344 | static void AlignRegionToPageBoundary(mem_range* r) { 345 | r->from = (void*)(largepage_align_up((uintptr_t)r->from)); 346 | r->to = (void*)(largepage_align_down((uintptr_t)r->to)); 347 | } 348 | 349 | static map_status CheckMemRange(mem_range* r) { 350 | if (r->from == NULL || r->to == NULL) { 351 | return map_invalid_region_address; 352 | } 353 | 354 | if (r->to - r->from < HPS || r->from > r->to) { 355 | return map_region_too_small; 356 | } 357 | 358 | return map_ok; 359 | } 360 | 361 | // Align the region to to be mapped to 2MB page boundaries and then move the 362 | // region to large pages. 363 | static map_status AlignMoveRegionToLargePages(mem_range* r) { 364 | map_status status; 365 | AlignRegionToPageBoundary(r); 366 | 367 | status = CheckMemRange(r); 368 | if (status != map_ok) { 369 | return status; 370 | } 371 | 372 | return MoveRegionToLargePages(r); 373 | } 374 | 375 | // Map the .text segment of the linked application into 2MB pages. 376 | // The algorithm is simple: 377 | // 1. Find the text region of the executing binary in memory 378 | // * Examine the /proc/self/maps to determine the currently mapped text 379 | // region and obtain the start and end addresses. 380 | // * Modify the start address to point to the very beginning of .text segment 381 | // (from variable textsegment setup in ld.script). 382 | // * Align the address of start and end addresses to large page boundaries. 383 | // 384 | // 2: Move the text region to large pages 385 | // * Map a new area and copy the original code there. 386 | // * Use mmap using the start address with MAP_FIXED so we get exactly the 387 | // same virtual address. 388 | // * Use madvise with MADV_HUGE_PAGE to use anonymous 2M pages. 389 | // * If successful, copy the code to the newly mapped area and unmap the 390 | // original region. 391 | map_status MapStaticCodeToLargePages() { 392 | mem_range r = {0}; 393 | map_status status = FindTextRegion(NULL, &r); 394 | if (status != map_ok) { 395 | return status; 396 | } 397 | return AlignMoveRegionToLargePages(&r); 398 | } 399 | 400 | map_status MapDSOToLargePages(const char* lib_regex) { 401 | mem_range r = {0}; 402 | map_status status; 403 | 404 | if (lib_regex == NULL) { 405 | return map_null_regex; 406 | } 407 | 408 | status = FindTextRegion(lib_regex, &r); 409 | if (status != map_ok) { 410 | return status; 411 | } 412 | return AlignMoveRegionToLargePages(&r); 413 | } 414 | 415 | // This function is similar to the function above. However, the region to be 416 | // mapped to 2MB pages is specified for this version as hotStart and hotEnd. 417 | map_status MapStaticCodeRangeToLargePages(void* from, void* to) { 418 | mem_range r = {from, to}; 419 | return AlignMoveRegionToLargePages(&r); 420 | } 421 | 422 | // Return true if transparent huge pages is enabled on the system. Otherwise, 423 | // return false. 424 | map_status IsLargePagesEnabled(bool* result) { 425 | iodlr_use_ehp = getenv("IODLR_USE_EXPLICIT_HP"); 426 | if (iodlr_use_ehp) { 427 | fprintf(stderr, "- experimental: using explicit hugepages - \n"); 428 | fflush(stderr); 429 | return IsExplicitHugePagesEnabled(result); 430 | } else { 431 | return IsTransparentHugePagesEnabled(result); 432 | } 433 | } 434 | 435 | const char* MapStatusStr(map_status status, bool fulltext) { 436 | static const char* map_status_text[] = { 437 | "map_ok", 438 | "ok", 439 | "map_failed_to_open_thp_file", 440 | "failed to open hugepage enablement status file", 441 | "map_invalid_regex", 442 | "invalid regex", 443 | "map_invalid_region_address", 444 | "invalid region boundaries", 445 | "map_malformed_thp_file", 446 | "malformed thp enablement status file", 447 | "map_null_regex", 448 | "regex was NULL", 449 | "map_region_not_found", 450 | "map region not found", 451 | "map_region_too_small", 452 | "map region too small", 453 | "map_see_errno", 454 | "see errno", 455 | "map_see_errno_madvise_tmem_failed", 456 | "madvise for destination failed", 457 | "map_see_errno_madvise_tmem_munmap_nmem_failed", 458 | "madvise for destination and unmapping of temporary failed", 459 | "map_see_errno_madvise_tmem_munmaps_failed", 460 | "madvise for destination and unmappings failed", 461 | "map_see_errno_madvise_tmem_munmap_tmem_failed", 462 | "madvise for destination and unmapping of destination failed", 463 | "map_see_errno_mmap_tmem_failed", 464 | "mapping of destination failed", 465 | "map_see_errno_mmap_tmem_munmap_nmem_failed", 466 | "mapping of destination and unmapping of temporary failed", 467 | "map_see_errno_mprotect_failed", 468 | "mprotect failed", 469 | "map_see_errno_mprotect_munmap_nmem_failed", 470 | "mprotect and unmapping of temporary failed", 471 | "map_see_errno_mprotect_munmaps_failed", 472 | "mprotect and unmappings failed", 473 | "map_see_errno_mprotect_munmap_tmem_failed", 474 | "mprotect and unmapping of destination failed", 475 | "map_see_errno_munmap_nmem_failed", 476 | "unmapping of temporary failed", 477 | "map_unsupported_platform", 478 | "mapping to large pages is not supported on this platform", 479 | "map_open_exe_failed", 480 | "opening executable file failed", 481 | "map_see_errno_close_exe_failed", 482 | "closing executable file failed", 483 | "map_see_errno_seek_exe_sheaders_failed", 484 | "seeking to executable file section headers failed", 485 | "map_read_exe_header_failed", 486 | "reading executable file header failed", 487 | "map_read_exe_sheaders_failed", 488 | "reading executable file section headers failed", 489 | "map_see_errno_seek_exe_string_table_failed", 490 | "seeking to executable file string table failed", 491 | "map_read_exe_string_table_failed", 492 | "reading executable file string table failed", 493 | "map_failed_to_open_ehp_file", 494 | "failed to open nr_hugepages file", 495 | "map_not_enough_explicit_hugepages_are_allocated", 496 | "not enough explicit hugepages are available" 497 | }; 498 | return map_status_text[((int)status << 1) + (fulltext & 1)]; 499 | } 500 | -------------------------------------------------------------------------------- /large_page-c/large_page.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018 Intel Corporation 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom 8 | // the Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | // OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | // OR OTHER DEALINGS IN THE SOFTWARE. 20 | // 21 | // SPDX-License-Identifier: MIT 22 | 23 | #ifndef LARGE_PAGE_H_ 24 | #define LARGE_PAGE_H_ 25 | 26 | #include 27 | 28 | typedef enum { 29 | map_ok, 30 | map_failed_to_open_thp_file, 31 | map_invalid_regex, 32 | map_invalid_region_address, 33 | map_malformed_thp_file, 34 | map_null_regex, 35 | map_region_not_found, 36 | map_region_too_small, 37 | map_see_errno, 38 | map_see_errno_madvise_tmem_failed, 39 | map_see_errno_madvise_tmem_munmap_nmem_failed, 40 | map_see_errno_madvise_tmem_munmaps_failed, 41 | map_see_errno_madvise_tmem_munmap_tmem_failed, 42 | map_see_errno_mmap_tmem_failed, 43 | map_see_errno_mmap_tmem_munmap_nmem_failed, 44 | map_see_errno_mprotect_failed, 45 | map_see_errno_mprotect_munmap_nmem_failed, 46 | map_see_errno_mprotect_munmaps_failed, 47 | map_see_errno_mprotect_munmap_tmem_failed, 48 | map_see_errno_munmap_nmem_failed, 49 | map_unsupported_platform, 50 | map_open_exe_failed, 51 | map_see_errno_close_exe_failed, 52 | map_read_exe_header_failed, 53 | map_see_errno_seek_exe_sheaders_failed, 54 | map_read_exe_sheaders_failed, 55 | map_see_errno_seek_exe_string_table_failed, 56 | map_read_exe_string_table_failed, 57 | map_failed_to_open_ehp_file, 58 | map_not_enough_explicit_hugepages_are_allocated 59 | } map_status; 60 | 61 | #define MAP_STATUS_STR(status) MapStatusStr(status, true) 62 | #define MAP_STATUS_STR_SHORT(status) MapStatusStr(status, false) 63 | 64 | map_status MapStaticCodeToLargePages(); 65 | map_status MapDSOToLargePages(const char* lib_regex); 66 | map_status MapStaticCodeRangeToLargePages(void* from, void* to); 67 | map_status IsLargePagesEnabled(bool* result); 68 | const char* MapStatusStr(map_status status, bool fulltext); 69 | 70 | #endif // LARGE_PAGE_H_ 71 | -------------------------------------------------------------------------------- /large_page-c/lp_preload.c: -------------------------------------------------------------------------------- 1 | #define _GNU_SOURCE 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "large_page.h" 9 | 10 | pid_t gettid(void); 11 | 12 | void printErr (map_status status, const char * lib) { 13 | fprintf(stderr, 14 | "Mapping to large pages failed for %s: %s\n", lib, 15 | MapStatusStr(status, true)); 16 | } 17 | 18 | static int tryMapAllDSOs(struct dl_phdr_info* hdr, size_t size, void* data) { 19 | regex_t * ignoreReg = (regex_t *)data; 20 | const char * lib = hdr->dlpi_name; 21 | if (lib && lib[0] != 0) { 22 | if (ignoreReg != NULL && regexec(ignoreReg, lib, 0, NULL, 0) == 0) { 23 | fprintf(stderr, "Ignoring %s\n", lib); 24 | } else { 25 | fprintf(stderr, "Enabling large code pages for %s ", lib); 26 | fflush(stderr); // flush output before a possible error 27 | map_status status = MapDSOToLargePages(lib); 28 | if (status == map_ok) { 29 | fprintf(stderr, " - success.\n"); 30 | } else { 31 | fprintf(stderr, "\n"); 32 | printErr(status, lib); 33 | } 34 | } 35 | } 36 | return 0; 37 | } 38 | 39 | 40 | void __attribute__((constructor)) map_to_large_pages() { 41 | bool is_enabled = true; 42 | fprintf(stderr, "TID: %d\n:", gettid()); 43 | map_status status = IsLargePagesEnabled(&is_enabled); 44 | if (status != map_ok) goto fail; 45 | 46 | if (!is_enabled) goto fail; 47 | 48 | status = MapStaticCodeToLargePages(); 49 | if (status != map_ok) { 50 | printErr(status, "static code"); 51 | } 52 | 53 | regex_t ignoreReg; 54 | const char * ignoreStr = secure_getenv("LP_IGNORE"); 55 | if (ignoreStr == NULL || regcomp(&ignoreReg, ignoreStr, REG_EXTENDED) != 0) { 56 | dl_iterate_phdr(tryMapAllDSOs, NULL); 57 | } else { 58 | dl_iterate_phdr(tryMapAllDSOs, &ignoreReg); 59 | regfree(&ignoreReg); 60 | } 61 | 62 | return; 63 | fail: 64 | if (status == map_ok) { 65 | if (!is_enabled) 66 | fprintf(stderr, 67 | "Mapping to large pages in not enabled on your system. " 68 | "Make sure /sys/kernel/mm/transparent_hugepage/enabled is set to " 69 | "'madvise' or 'enabled' " 70 | "Or explicit hugepages of supported size is enabled with " 71 | "+ve number in /proc/sys/vm/nr_hugepages. \n"); 72 | } else { 73 | fprintf(stderr, 74 | "Mapping to large pages failed: %s\n", 75 | MapStatusStr(status, true)); 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /large_page/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2018 Intel Corporation 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), 5 | to deal in the Software without restriction, including without limitation 6 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom 8 | the Software is furnished to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included 11 | in all copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | OR OTHER DEALINGS IN THE SOFTWARE. 20 | 21 | SPDX-License-Identifier: MIT 22 | -------------------------------------------------------------------------------- /large_page/Makefile: -------------------------------------------------------------------------------- 1 | OUTDIR?=. 2 | CC=g++ 3 | CFLAGS=-O3 -std=c++11 -D_FORTIFY_SOURCE=2 -fsanitize=address -z noexecstack -z relro -z now -fstack-protector -Wformat -Wformat-security -Wall 4 | AR=ar 5 | RM=/bin/rm 6 | 7 | .PHONY: all 8 | all: $(OUTDIR)/liblarge_page.a 9 | 10 | # Append -DENABLE_LARGE_CODE_PAGES=1 to CFLAGS on supported platforms. 11 | include ../detect-platform.mk 12 | 13 | %.o: %.cc 14 | $(CC) $(CFLAGS) -c $< -o $@ 15 | 16 | $(OUTDIR)/liblarge_page.a: large_page.o 17 | $(AR) rcs $@ $< 18 | 19 | .PHONY: clean 20 | clean: 21 | $(RM) -f *.o $(OUTDIR)/*.a 22 | -------------------------------------------------------------------------------- /large_page/README.md: -------------------------------------------------------------------------------- 1 | # Reference Implementation for Utilizing Large Pages 2 | This directory contains a reference implementation for automating the process 3 | of utilizing transparent huge pages. 4 | 5 | # APIs 6 | ``` 7 | * bool IsLargePagesEnabled() 8 | Check whether transparent huge pages is enabled on the system 9 | Returns true if THP is enabled on the system 10 | * int MapStaticCodeToLargePages() 11 | Map entire .text segment of the executable to 2MB pages 12 | Returns -1 if an error occurs while mapping 13 | * int MapStaticCodeToLargePages(hotstart, hotend) 14 | Map region from hotstart to hotend to 2MB pages 15 | Returns -1 if an error occurs while mapping 16 | ``` 17 | 18 | # Building liblarge_page.a: 19 | ``` 20 | make 21 | ``` 22 | -------------------------------------------------------------------------------- /large_page/example/Makefile: -------------------------------------------------------------------------------- 1 | CPPFLAGS=-O3 -std=c++11 -D_FORTIFY_SOURCE=2 -fsanitize=address -z noexecstack -z relro -z now -fstack-protector -Wformat -Wformat-security -Wall 2 | OBJDIR=$(shell realpath obj) 3 | OBJS = $(addprefix $(OBJDIR)/,large_page_example.o) 4 | LDFLAGS = -Wl,-T ../ld.implicit.script -fsanitize=address 5 | 6 | .PHONY: all 7 | all: large_page_example 8 | 9 | LARGE_PAGE_EXAMPLE_DEPS= \ 10 | $(OBJS) \ 11 | $(OBJDIR)/liblarge_page.a \ 12 | 13 | large_page_example: $(LARGE_PAGE_EXAMPLE_DEPS) 14 | @g++ $(LARGE_PAGE_EXAMPLE_DEPS) $(LDFLAGS) -o $@ 15 | 16 | $(OBJDIR)/liblarge_page.a: 17 | $(MAKE) -C .. OUTDIR=$(OBJDIR) 18 | 19 | $(OBJDIR)/%.o : %.cc 20 | @g++ $(CPPFLAGS) -o $@ -c -I.. $< 21 | 22 | $(OBJS): | $(OBJDIR) 23 | 24 | $(OBJDIR): 25 | @mkdir -p $(OBJDIR) 26 | 27 | clean: 28 | $(MAKE) -C .. OUTDIR=$(OBJDIR) clean 29 | @rm -rf $(OBJDIR) large_page_example 30 | -------------------------------------------------------------------------------- /large_page/example/README: -------------------------------------------------------------------------------- 1 | This directory contains a simple C++ source file and Makefile that 2 | illustrates how the reference application might be incorporated in 3 | an application. 4 | -------------------------------------------------------------------------------- /large_page/example/large_page_example.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include "large_page.h" 3 | 4 | using std::cout; 5 | using std::cerr; 6 | using std::endl; 7 | 8 | int main() { 9 | largepage::MapStatus status; 10 | bool is_enabled; 11 | 12 | status = largepage::IsLargePagesEnabled(&is_enabled); 13 | if (status != largepage::map_ok) { 14 | cerr << "Failed to check enablement: " 15 | << largepage::MapStatusStr(status) << endl; 16 | return status; 17 | } 18 | 19 | if (is_enabled) { 20 | cout << "Transparent Huge Pages are enabled, mapping ..." << endl; 21 | status = largepage::MapStaticCodeToLargePages(); 22 | if (status != largepage::map_ok) { 23 | cerr << "Failed to map: " << largepage::MapStatusStr(status) << endl; 24 | return status; 25 | } 26 | cout << "Success !" << endl; 27 | return 0; 28 | } 29 | cerr << "Transparent Huge Pages are not enabled" << endl; 30 | return -1; 31 | } 32 | -------------------------------------------------------------------------------- /large_page/large_page.cc: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018 Intel Corporation 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom 8 | // the Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | // OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | // OR OTHER DEALINGS IN THE SOFTWARE. 20 | // 21 | // SPDX-License-Identifier: MIT 22 | 23 | #include "large_page.h" 24 | 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | #include 31 | #include 32 | #include 33 | #include 34 | 35 | extern char __attribute__((weak)) __textsegment; 36 | 37 | namespace largepage { 38 | 39 | using std::pair; 40 | using std::string; 41 | using std::ifstream; 42 | using std::istringstream; 43 | using std::cout; 44 | using std::cerr; 45 | using std::regex; 46 | using std::smatch; 47 | 48 | 49 | namespace { 50 | 51 | struct MemRange { 52 | void* from; 53 | void* to; 54 | MemRange() : from(nullptr), to(nullptr) {} 55 | MemRange(void* from, void* to) : from(from), to(to) {} 56 | void set(void* f, void* t) { from = f; to = t; } 57 | }; 58 | 59 | constexpr size_t hps = 2L * 1024 * 1024; 60 | 61 | constexpr uintptr_t LargePageAlignDown(uintptr_t addr) { 62 | return (addr & ~(hps - 1)); 63 | } 64 | 65 | constexpr uintptr_t LargePageAlignUp(uintptr_t addr) { 66 | return LargePageAlignDown(addr + hps - 1); 67 | } 68 | 69 | // Identify and return the text region in the currently mapped memory regions. 70 | MapStatus FindTextRegion(MemRange* region, const string& regexpr = "") { 71 | string exename; 72 | string map_line; 73 | regex lib_regex(regexpr); 74 | bool result; 75 | char selfexe[PATH_MAX] = {0}; 76 | 77 | ifstream ifs("/proc/self/maps"); 78 | 79 | if (!ifs) { 80 | return map_maps_open_failed; 81 | } 82 | 83 | ssize_t count = readlink("/proc/self/exe", selfexe, PATH_MAX); 84 | if (count < 0) { 85 | return map_exe_path_read_failed; 86 | } 87 | exename.assign(selfexe, count); 88 | 89 | // The following is the format of the maps file 90 | // address perms offset dev inode pathname 91 | // 00400000-00452000 r-xp 00000000 08:02 173521 /usr/bin/dbus-daemon 92 | while (getline(ifs, map_line)) { 93 | string permission; 94 | string dev; 95 | char dash; 96 | uint64_t offset, inode; 97 | uintptr_t start, end; 98 | 99 | istringstream iss(map_line); 100 | iss >> std::hex >> start; 101 | iss >> dash; 102 | iss >> std::hex >> end; 103 | iss >> permission; 104 | iss >> offset; 105 | iss >> dev; 106 | iss >> inode; 107 | 108 | if (inode != 0 && permission == "r-xp") { 109 | string pathname; 110 | iss >> pathname; 111 | if (regexpr.size() == 0) { 112 | result = (pathname == exename && 113 | start <= (uintptr_t)(&__textsegment) && 114 | end >= (uintptr_t)(&__textsegment)); 115 | start = (uintptr_t)(&__textsegment); 116 | } else { 117 | smatch lib_match; 118 | result = regex_search(pathname, lib_match, lib_regex); 119 | } 120 | if (result) { 121 | region->set(reinterpret_cast(start), 122 | reinterpret_cast(end)); 123 | return map_ok; 124 | } 125 | } 126 | } 127 | return map_region_not_found; 128 | } 129 | 130 | MapStatus IsTransparentHugePagesEnabled(bool* result) { 131 | #if defined(ENABLE_LARGE_CODE_PAGES) && ENABLE_LARGE_CODE_PAGES 132 | *result = false; 133 | ifstream ifs("/sys/kernel/mm/transparent_hugepage/enabled"); 134 | if (!ifs) { 135 | return map_failed_to_open_thp_file; 136 | } 137 | 138 | string always; 139 | string madvise; 140 | string never; 141 | 142 | pair check_items[] = {{ &always, true }, 143 | { &madvise, true }, 144 | { &never, false }}; 145 | 146 | ifs >> always >> madvise >> never; 147 | 148 | for (auto check : check_items) { 149 | if (check.first->size() == 0) { 150 | return map_malformed_thp_file; 151 | } 152 | 153 | if (*(check.first->begin()) == '[' && *(check.first->rbegin()) == ']') { 154 | *result = check.second; 155 | break; 156 | } 157 | } 158 | return map_ok; 159 | #else 160 | return map_unsupported_platform; 161 | #endif // ENABLE_LARGE_CODE_PAGES 162 | } 163 | 164 | // Move specified region to large pages. We need to be very careful. 165 | // 1: This function itself should not be moved. 166 | // We use a gcc attributes 167 | // (__section__) to put it outside the ".text" section 168 | // (__aligned__) to align it at 2M boundary 169 | // (__noline__) to not inline this function 170 | // 2: This function should not call any function(s) that might be moved. 171 | // a. map a new area and copy the original code there 172 | // b. mmap using the start address with MAP_FIXED so we get exactly 173 | // the same virtual address 174 | // c. madvise with MADV_HUGE_PAGE 175 | // d. If successful copy the code there and unmap the original region 176 | MapStatus 177 | __attribute__((__section__(".lpstub"))) 178 | __attribute__((__aligned__(hps))) 179 | __attribute__((__noinline__)) 180 | MoveRegionToLargePages(const MemRange& r) { 181 | void* nmem = nullptr; 182 | void* tmem = nullptr; 183 | int ret = 0; 184 | MapStatus status = map_ok; 185 | void* start = r.from; 186 | size_t size = reinterpret_cast(r.to) - 187 | reinterpret_cast(r.from); 188 | 189 | // Allocate temporary region preparing for copy 190 | nmem = mmap(nullptr, size, 191 | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); 192 | if (nmem == MAP_FAILED) { 193 | return map_see_errno; 194 | } 195 | 196 | memcpy(nmem, r.from, size); 197 | 198 | // We already know the original page is r-xp 199 | // (PROT_READ, PROT_EXEC, MAP_PRIVATE) 200 | // We want PROT_WRITE because we are writing into it. 201 | // We want it at the fixed address and we use MAP_FIXED. 202 | #define CLEAN_EXIT_CHECK(oper) \ 203 | if (tmem == MAP_FAILED) { \ 204 | status = oper##_failed; \ 205 | ret = munmap(nmem, size); \ 206 | if (ret < 0) { \ 207 | status = oper##_munmap_nmem_failed; \ 208 | } \ 209 | return status; \ 210 | } 211 | 212 | tmem = mmap(start, size, 213 | PROT_READ | PROT_WRITE | PROT_EXEC, 214 | MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1 , 0); 215 | CLEAN_EXIT_CHECK(map_see_errno_mmap_tmem); 216 | 217 | #undef CLEAN_EXIT_CHECK 218 | 219 | #define CLEAN_EXIT_CHECK(oper) \ 220 | if (ret < 0) { \ 221 | status = oper##_failed; \ 222 | ret = munmap(tmem, size); \ 223 | if (ret < 0) { \ 224 | status = oper##_munmap_tmem_failed; \ 225 | } \ 226 | ret = munmap(nmem, size); \ 227 | if (ret < 0) { \ 228 | status = (status == oper##_munmap_tmem_failed) \ 229 | ? oper##_munmaps_failed \ 230 | : oper##_munmap_nmem_failed; \ 231 | } \ 232 | return status; \ 233 | } 234 | 235 | ret = madvise(tmem, size, MADV_HUGEPAGE); 236 | CLEAN_EXIT_CHECK(map_see_errno_madvise_tmem); 237 | 238 | memcpy(start, nmem, size); 239 | ret = mprotect(start, size, PROT_READ | PROT_EXEC); 240 | CLEAN_EXIT_CHECK(map_see_errno_mprotect); 241 | 242 | #undef CLEAN_EXIT_CHECK 243 | 244 | // Release the old/temporary mapped region 245 | ret = munmap(nmem, size); 246 | if (ret < 0) { 247 | status = map_see_errno_munmap_nmem_failed; 248 | } 249 | 250 | return status; 251 | } 252 | 253 | // Align the region to to be mapped to 2MB page boundaries. 254 | void AlignRegionToPageBoundary(MemRange* r) { 255 | r->from = reinterpret_cast(LargePageAlignUp( 256 | reinterpret_cast(r->from))); 257 | r->to = reinterpret_cast(LargePageAlignDown( 258 | reinterpret_cast(r->to))); 259 | } 260 | 261 | MapStatus CheckMemRange(const MemRange& r) { 262 | if (r.from == nullptr || r.to == nullptr || r.from > r.to) { 263 | return map_invalid_region_address; 264 | } 265 | 266 | if (reinterpret_cast(r.to) - 267 | reinterpret_cast(r.from) < hps) { 268 | return map_region_too_small; 269 | } 270 | 271 | return map_ok; 272 | } 273 | 274 | // Align the region to to be mapped to 2MB page boundaries and then move the 275 | // region to large pages. 276 | MapStatus AlignMoveRegionToLargePages(MemRange r) { 277 | AlignRegionToPageBoundary(&r); 278 | 279 | MapStatus status = CheckMemRange(r); 280 | if (status != map_ok) { 281 | return status; 282 | } 283 | 284 | if (r.to <= (void*)MoveRegionToLargePages) { 285 | return MoveRegionToLargePages(r); 286 | } 287 | 288 | return map_mover_overlaps; 289 | } 290 | 291 | } // namespace 292 | 293 | // Map the .text segment of the linked application into 2MB pages. 294 | // The algorithm is simple: 295 | // 1. Find the text region of the executing binary in memory 296 | // * Examine the /proc/self/maps to determine the currently mapped text 297 | // region and obtain the start and end addresses. 298 | // * Modify the start address to point to the very beginning of .text segment 299 | // (from variable textsegment setup in ld.script). 300 | // * Align the address of start and end addresses to large page boundaries. 301 | // 302 | // 2: Move the text region to large pages 303 | // * Map a new area and copy the original code there. 304 | // * Use mmap using the start address with MAP_FIXED so we get exactly the 305 | // same virtual address. 306 | // * Use madvise with MADV_HUGE_PAGE to use anonymous 2M pages. 307 | // * If successful, copy the code to the newly mapped area and unmap the 308 | // original region. 309 | MapStatus MapStaticCodeToLargePages(const std::string& regexpr) { 310 | MemRange r; 311 | MapStatus status = FindTextRegion(&r, regexpr); 312 | if (status != map_ok) { 313 | return status; 314 | } 315 | return AlignMoveRegionToLargePages(r); 316 | } 317 | 318 | // This function is similar to the function above. However, the region to be 319 | // mapped to 2MB pages is specified for this version as hotStart and hotEnd. 320 | MapStatus MapStaticCodeToLargePages(void* from, void* to) { 321 | return AlignMoveRegionToLargePages(MemRange(from, to)); 322 | } 323 | 324 | MapStatus IsLargePagesEnabled(bool* result) { 325 | return IsTransparentHugePagesEnabled(result); 326 | } 327 | 328 | const string& MapStatusStr(MapStatus status, bool fulltext) { 329 | static string map_status_text[] = { 330 | "map_ok", 331 | "ok", 332 | "map_exe_path_read_failed", 333 | "failed to read executable path file", 334 | "map_failed_to_open_thp_file", 335 | "failed to open thp enablement status file", 336 | "map_invalid_regex", 337 | "invalid regex", 338 | "map_invalid_region_address", 339 | "invalid region boundaries", 340 | "map_malformed_thp_file", 341 | "malformed thp enablement status file", 342 | "map_malformed_maps_file", 343 | "malformed /proc//maps file", 344 | "map_maps_open_failed", 345 | "failed to open maps file", 346 | "map_mover_overlaps", 347 | "the remapping function is part of the region", 348 | "map_null_regex", 349 | "regex was NULL", 350 | "map_region_not_found", 351 | "map region not found", 352 | "map_region_too_small", 353 | "map region too small", 354 | "map_see_errno", 355 | "see errno", 356 | "map_see_errno_madvise_tmem_failed", 357 | "madvise for destination failed", 358 | "map_see_errno_madvise_tmem_munmap_nmem_failed", 359 | "madvise for destination and unmapping of temporary failed", 360 | "map_see_errno_madvise_tmem_munmaps_failed", 361 | "madvise for destination and unmappings failed", 362 | "map_see_errno_madvise_tmem_munmap_tmem_failed", 363 | "madvise for destination and unmapping of destination failed", 364 | "map_see_errno_mmap_tmem_failed", 365 | "mapping of destination failed", 366 | "map_see_errno_mmap_tmem_munmap_nmem_failed", 367 | "mapping of destination and unmapping of temporary failed", 368 | "map_see_errno_mprotect_failed", 369 | "mprotect failed", 370 | "map_see_errno_mprotect_munmap_nmem_failed", 371 | "mprotect and unmapping of temporary failed", 372 | "map_see_errno_mprotect_munmaps_failed", 373 | "mprotect and unmappings failed", 374 | "map_see_errno_mprotect_munmap_tmem_failed", 375 | "mprotect and unmapping of destination failed", 376 | "map_see_errno_munmap_nmem_failed", 377 | "unmapping of temporary failed", 378 | "map_unsupported_platform", 379 | "mapping to large pages is not supported on this platform", 380 | }; 381 | return map_status_text[(static_cast(status) << 1) + (fulltext & 1)]; 382 | } 383 | 384 | } // namespace largepage 385 | -------------------------------------------------------------------------------- /large_page/large_page.h: -------------------------------------------------------------------------------- 1 | // Copyright (C) 2018 Intel Corporation 2 | // 3 | // Permission is hereby granted, free of charge, to any person obtaining a copy 4 | // of this software and associated documentation files (the "Software"), 5 | // to deal in the Software without restriction, including without limitation 6 | // the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | // and/or sell copies of the Software, and to permit persons to whom 8 | // the Software is furnished to do so, subject to the following conditions: 9 | // 10 | // The above copyright notice and this permission notice shall be included 11 | // in all copies or substantial portions of the Software. 12 | // 13 | // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 14 | // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | // THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 17 | // OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 18 | // ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 19 | // OR OTHER DEALINGS IN THE SOFTWARE. 20 | // 21 | // SPDX-License-Identifier: MIT 22 | 23 | #ifndef LARGE_PAGE_H_ 24 | #define LARGE_PAGE_H_ 25 | 26 | #include 27 | 28 | namespace largepage { 29 | using std::string; 30 | 31 | enum MapStatus { 32 | map_ok, 33 | map_exe_path_read_failed, 34 | map_failed_to_open_thp_file, 35 | map_invalid_regex, 36 | map_invalid_region_address, 37 | map_malformed_thp_file, 38 | map_malformed_maps_file, 39 | map_maps_open_failed, 40 | map_mover_overlaps, 41 | map_null_regex, 42 | map_region_not_found, 43 | map_region_too_small, 44 | map_see_errno, 45 | map_see_errno_madvise_tmem_failed, 46 | map_see_errno_madvise_tmem_munmap_nmem_failed, 47 | map_see_errno_madvise_tmem_munmaps_failed, 48 | map_see_errno_madvise_tmem_munmap_tmem_failed, 49 | map_see_errno_mmap_tmem_failed, 50 | map_see_errno_mmap_tmem_munmap_nmem_failed, 51 | map_see_errno_mprotect_failed, 52 | map_see_errno_mprotect_munmap_nmem_failed, 53 | map_see_errno_mprotect_munmaps_failed, 54 | map_see_errno_mprotect_munmap_tmem_failed, 55 | map_see_errno_munmap_nmem_failed, 56 | map_unsupported_platform, 57 | }; 58 | 59 | MapStatus MapStaticCodeToLargePages(const std::string& regexpr = ""); 60 | MapStatus MapStaticCodeToLargePages(void* from, void* to); 61 | MapStatus IsLargePagesEnabled(bool* result); 62 | const string& MapStatusStr(MapStatus status, bool fulltext = true); 63 | }; // namespace largepage 64 | 65 | #endif // LARGE_PAGE_H_ 66 | -------------------------------------------------------------------------------- /large_page/ld.implicit.script: -------------------------------------------------------------------------------- 1 | SECTIONS { 2 | .text ALIGN(0x200000): { 3 | __textsegment = .; 4 | *(.text .text.*) 5 | } 6 | } 7 | INSERT AFTER .init; 8 | 9 | SECTIONS { 10 | .lpstub ALIGN(0x200000): { 11 | *(.lpstub) 12 | } 13 | } 14 | INSERT AFTER .text; 15 | -------------------------------------------------------------------------------- /security.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | Intel is committed to rapidly addressing security vulnerabilities affecting our customers and providing clear guidance on the solution, impact, severity and mitigation. 3 | 4 | ## Reporting a Vulnerability 5 | Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.intel.com/content/www/us/en/security-center/vulnerability-handling-guidelines.html). -------------------------------------------------------------------------------- /sysbench/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Benchmarking PostgreSQL database server using sysbench client 3 | 4 | # sysbench 5 | 6 | sysbench is a scriptable multi-threaded benchmark tool based on 7 | LuaJIT. It is most frequently used for database benchmarks, but can also 8 | be used to create arbitrarily complex workloads that do not involve a 9 | database server. 10 | 11 | sysbench comes with the following bundled benchmarks: 12 | 13 | - `oltp_*.lua`: a collection of OLTP-like database benchmarks 14 | - `fileio`: a filesystem-level benchmark 15 | - `cpu`: a simple CPU benchmark 16 | - `memory`: a memory access benchmark 17 | - `threads`: a thread-based scheduler benchmark 18 | - `mutex`: a POSIX mutex benchmark 19 | 20 | # 21 | For more information on sysbench, please visit [https://github.com/akopytov/sysbench](https://github.com/akopytov/sysbench). 22 | 23 | # PostgreSQL 24 | 25 | PostgreSQL is a powerful, open source object-relational database system 26 | that uses and extends the SQL language combined with many features that 27 | safely store and scale the most complicated data workloads. 28 | 29 | # 30 | For more information on postgresql, please visit [https://www.postgresql.org/](https://www.postgresql.org/). 31 | 32 | # start the docker container with PostgreSQL server image 33 | 34 | ``` shell 35 | $ docker run --name postgres-instance -e POSTGRES_PASSWORD=mypass -e POSTGRES_USER=sbtest -p 8001:5432 -d postgres-img -c min_dynamic_shared_memory=128 36 | ``` 37 | 38 | # If you need to use hugepages use following command 39 | 40 | ``` shell 41 | $ docker run --name postgres-instance -e POSTGRES_PASSWORD=mypass -e POSTGRES_USER=sbtest -p 8001:5432 -d postgres-img -c huge_pages=on -c huge_page-size=2MB -c min_dynamic_shared_memory=128 42 | ``` 43 | 44 | 45 | ** You can start multiple containers listening at different port number 46 | visible to the external sysbench client. 47 | 48 | # prepare data and tables 49 | 50 | ``` shell 51 | $ sysbench --db-driver=pgsql --pgsql-user=sbtest --pgsql_password=mypass --pgsql-db=sbtest --pgsql-port=8001 --tables=16 --table-size=10000 --threads=256 --time=0 --events=0 --report-interval=1 --time=300 /usr/share/sysbench/oltp_read_write.lua prepare 52 | ``` 53 | 54 | # Run benchmark 55 | 56 | ``` shell 57 | $ sysbench --db-driver=pgsql --pgsql-user=sbtest --pgsql_password=mypass --pgsql-db=sbtest --pgsql-port=8001 --tables=16 --table-size=10000 --threads=256 --time=0 --events=0 --report-interval=1 --time=300 /usr/share/sysbench/oltp_read_write.lua run 58 | ``` 59 | 60 | 61 | -------------------------------------------------------------------------------- /test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for file in test/*; do 4 | if test -f $file; then 5 | ./$file $(pwd) || exit 1 6 | fi 7 | done 8 | -------------------------------------------------------------------------------- /test/large_page-c: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | REPO_ROOT=$1 4 | if test "x${REPO_ROOT}x" = "xx"; then 5 | echo "Usage: $0 " 6 | exit 1 7 | fi 8 | 9 | echo $'\n** Testing the C implementation' 10 | 11 | . "${REPO_ROOT}/test/lib/utils.sh" 12 | 13 | MAKE=$(get_make) 14 | 15 | cd ${REPO_ROOT}/large_page-c/example 16 | 17 | echo "*** Testing output for default setup" 18 | ${MAKE} || die "make failed for default setup" 2 19 | test_stdout \ 20 | ./large_page_example \ 21 | ${REPO_ROOT}/test/large_page-c.d/default_stdout || die "" 3 22 | ${MAKE} clean || die "make clean failed for default setup" 4 23 | 24 | echo "*** Testing output for unsupported setup" 25 | ${MAKE} ENABLE_LARGE_CODE_PAGES=0 || die "make failed for unsupported setup" 26 | test_stdout \ 27 | ./large_page_example \ 28 | ${REPO_ROOT}/test/large_page-c.d/unsupported_stdout || die "" 5 29 | ${MAKE} clean || die "make clean failed for unsupported setup" 6 30 | -------------------------------------------------------------------------------- /test/large_page-c.d/default_stdout: -------------------------------------------------------------------------------- 1 | Transparent Huge Pages are enabled, mapping... 2 | Success 3 | -------------------------------------------------------------------------------- /test/large_page-c.d/unsupported_stdout: -------------------------------------------------------------------------------- 1 | Failed to check enablement: mapping to large pages is not supported on this platform 2 | -------------------------------------------------------------------------------- /test/large_page.d/default_stdout: -------------------------------------------------------------------------------- 1 | Transparent Huge Pages are enabled, mapping... 2 | Success 3 | -------------------------------------------------------------------------------- /test/large_page.d/unsupported_stdout: -------------------------------------------------------------------------------- 1 | Failed to check enablement: mapping to large pages is not supported on this platform 2 | -------------------------------------------------------------------------------- /test/lib/utils.sh: -------------------------------------------------------------------------------- 1 | # $1: message 2 | # $2: exit code 3 | die() { 4 | echo "$1" 5 | exit $2 6 | } 7 | 8 | # $1: binary 9 | # $2: name of file containing expected output 10 | test_stdout() { 11 | BINARY="$1" 12 | EXPECTED="$2" 13 | 14 | OUTPUT=$(mktemp) 15 | "${BINARY}" > "${OUTPUT}" 2>&1 16 | diff -u "${OUTPUT}" "${EXPECTED}" 17 | RESULT=$? 18 | 19 | rm -f "${OUTPUT}" 20 | return ${RESULT} 21 | } 22 | 23 | get_make() { 24 | if test "x$(uname -s)x" != "xFreeBSDx"; then 25 | echo "make" 26 | else 27 | echo "gmake" 28 | fi 29 | } 30 | -------------------------------------------------------------------------------- /tools/README.md: -------------------------------------------------------------------------------- 1 | # RBB/tools 2 | 3 | This directory contains set of convenience scripts based on "standard perf" for profile collection and to derive high level metric for further analysis. "perf" in turn uses performance counters in the CPU. 4 | 5 | # Current features 6 | 7 | * measure-perf-metric.sh is a top level script. 8 | * all metric_* files contain currently supported metrics. From the name you can see the list of supported metrics such as "itlb_stalls, itlb_mpki, l1_code_read_MPI, l2_demand_code_MPI". 9 | * There is only one metric_ file for a metric. 10 | 11 | # Contributing 12 | 13 | ## How to add new metric? 14 | * first create one file with prefix metric_. 15 | * write two (2) functions in the new file. 16 | * select set of performance counters needed to derive the metric. See whole list using "perf list" command. 17 | * decide the formula needed to derive the metric based on collected counter data. 18 | 19 | ## Example 20 | * Let's say we want to derive a metric called branch_mispredicts. 21 | * make a copy of metric.template and renamed it metric_branch_mispredicts 22 | * Select a set of counters needed to derive this metric are, "br_misp_retired.all_branches" and "br_inst_retired.all_branches" 23 | * the formula to calculate "branch_mispredict" ratio is, 24 | * (br_misp_retired.all_branches/br_inst_retired.all_branches) 25 | #### NOTE: Make sure "perf" on the target system has support for these counters. 26 | 27 | * With this information, update two functions (init_ and calc_) in the new file as shown below, 28 | ### NOTE: Make sure that function name init_ and calc_ has correct suffix with new metric_name. 29 | 30 | ``` 31 | function init_branch_mispredicts() { 32 | local local_pmu_array=(br_misp_retired.all_branches br_inst_retired.all_branches) 33 | local local_pmus 34 | for item in ${local_pmu_array[*]} 35 | do 36 | if [ "x${local_pmus}" == "x" ]; then 37 | local_pmus="$item" 38 | else 39 | local_pmus="$local_pmus,$item" 40 | fi 41 | done 42 | echo $local_pmus 43 | } 44 | 45 | function calc_branch_mispredicts() { 46 | local perf_data_file="$1" 47 | local metric_name="metric_branch mispredict ratio" 48 | echo 49 | echo "=================================================" 50 | echo "Final $metric_name" 51 | echo "--------------------------------------------------" 52 | echo "FORMULA: ${metric_name} = (a/b)" 53 | echo " where, a=br_misp_retired.all_branches" 54 | echo " b=br_inst_retired.all_branches" 55 | echo "=================================================" 56 | 57 | local a=`return_pmu_value "br_misp_retired.all_branches" $perf_data_file ` 58 | local b=`return_pmu_value "br_inst_retired.all_branches" $perf_data_file` 59 | 60 | if [ $a == -1 -o $b == -1 ]; then 61 | echo "ERROR: ${metric_name} can't be derived. Missing pmus" 62 | else 63 | local metric=`echo "scale=$bc_scale;100*(${a}/${b})"| bc -l` 64 | echo ${metric_name}=${metric} 65 | fi 66 | echo 67 | } 68 | ``` 69 | 70 | * Save and try running top level script with -h option. 71 | measure-perf-metrics.sh -h 72 | * You should see new metric listed as supported. 73 | * Use it for real collection/analysis. 74 | * Once it's ready, contribute it back to the project. 75 | 76 | # Generate Perf Map file 77 | 78 | When huge page is enabled and the .txt sections are mapped into it, Perf are no longer able to show the symbols. The /tmp/perf-[TID].map is needed for showing symbols correctly with Perf. The gen-perf-map.sh tool is offered to help generating perf-[TID].map file. 79 | 80 | ## Usage 81 | ``` 82 | ./gen-perf-map.sh -s|-b PATH BASEADDRESS TID 83 | ``` 84 | * -s|-b: Pass -s if generating symbols for a dynamic library or -b if passing a binary 85 | * PATH: Path to the shared library or binary 86 | * TID: thread ID 87 | * BASEADDRESS: beginning address of .txt section in huge page. 88 | 89 | If .txt sections are mapped into huge page with the large_page-c, you can find the TID and BASEADDRESS in the output of console. 90 | 91 | ## Example 92 | 93 | In this example, we run node with a javascript file and use "liblppreload.so" to map .txt sections to huge page. Below is the shell script. 94 | ``` 95 | #!/usr/bin/bash 96 | 97 | LD_PRELOAD=/usr/lib64/liblppreload.so node helloworld.js 98 | ``` 99 | 100 | Then, the command "perf record [shell script]" is performed to record the profiling data. The following is the ouput of console. With it, we can know what TID is, which libarary has been mapped into huge page and its base address. For our example, TID is "8817", the "libnode.so.64" has been mapped into huge page and its base address is "7f36633c3000". 101 | ``` 102 | TID: 8817 103 | :Base address: 0.Mapping to large pages failed for static code: map region too small 104 | ...... 105 | Enabling large code pages for /usr/lib64/liblppreload.so Base address: 7f3664990000. 106 | Mapping to large pages failed for /usr/lib64/liblppreload.so: map region too small 107 | Enabling large code pages for /lib/x86_64-linux-gnu/libnode.so.64 Base address: 7f36633c3000. - success. 108 | Enabling large code pages for /lib/x86_64-linux-gnu/libpthread.so.0 Base address: 7f36633a0000. 109 | Mapping to large pages failed for /lib/x86_64-linux-gnu/libpthread.so.0: map region too small 110 | Enabling large code pages for /lib/x86_64-linux-gnu/libc.so.6 Base address: 7f36631ae000. 111 | ...... 112 | [ perf record: Woken up 1 times to write data ] 113 | [ perf record: Captured and wrote 0.245 MB perf.data (1881 samples) ] 114 | ``` 115 | 116 | Run "gen-perf-map.sh" to generate the perf map file under /tmp/: 117 | ``` 118 | ./gen-perf-map.sh -s /lib/x86_64-linux-gnu/libnode.so.64 7f36633c3000 8817 119 | ``` 120 | -------------------------------------------------------------------------------- /tools/gen-perf-map.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [ "$#" -lt 4 ]; then 4 | echo "Usage: $0 -s|-b PATH BASEADDRESS TID" 5 | exit 1 6 | fi 7 | 8 | if [ "$1" = "-s" ]; then 9 | NM_ARGS="-DSC" 10 | elif [ "$1" = "-b" ]; then 11 | NM_ARGS="-aSC" 12 | else 13 | echo "Pass either -s (shared lib) or -b (binary)" 14 | exit 1 15 | fi 16 | 17 | nm $NM_ARGS $2 | grep " [TtVWu] " | awk '{$3=""; print$0}' > nm-output.txt 18 | 19 | $( dirname -- "$0"; )/maps_file.py $3 20 | 21 | cat ./tmp.map >> "/tmp/perf-$4.map" 22 | 23 | rm ./nm-output.txt ./tmp.map 24 | echo "Generated /tmp/perf-$4.map" 25 | -------------------------------------------------------------------------------- /tools/maps_file.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | import sys 3 | 4 | class MapsFile: 5 | def __init__(self, name): 6 | self.name = name 7 | self.lines = [] 8 | 9 | def write_file(self, name): 10 | with open(name, "w") as file: 11 | for line in self.lines: 12 | file.write("%s %s\n" % (line[0], line[1])) 13 | 14 | def calculate_vir_adr(self, baseaddr): 15 | with open(self.name) as file: 16 | for line in file: 17 | larray = line.rstrip().split(" ", 1) 18 | larray[0] = "%016lx" % (int(larray[0], 16) + baseaddr) 19 | self.lines.append(larray) 20 | 21 | 22 | if __name__ == "__main__": 23 | src = "./nm-output.txt" 24 | dest = "./tmp.map" 25 | fd = MapsFile(src) 26 | 27 | fd.calculate_vir_adr(int(sys.argv[1], 16)) 28 | 29 | fd.write_file(dest) 30 | -------------------------------------------------------------------------------- /tools/measure-perf-metric.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | 28 | process_id=$$ 29 | 30 | default_metric_name="itlb_stalls" 31 | default_pmu_array=(cycles instructions) 32 | default_perf_mode="stat" 33 | default_collect_time=10 34 | 35 | ##################################################################### 36 | # No change require below this line 37 | ##################################################################### 38 | collect_time=$default_collect_time 39 | perf_mode=$default_perf_mode 40 | 41 | metric_array=($default_metric_name) 42 | metric_entries=1 43 | 44 | pmu_array=$default_pmu_array 45 | pmu_entries=2 46 | 47 | metric_name=$default_metric_name 48 | input_metrics=() 49 | input_metric_idx=0 50 | 51 | verbose_mode=0 52 | debug_mode=0 53 | profile_mode="one" 54 | PERF_PMUS="" 55 | 56 | ##################################################################### 57 | # 58 | ##################################################################### 59 | function print_metric_array() { 60 | if [ $debug_mode -eq 1 ]; then 61 | for item in ${metric_array[*]} 62 | do 63 | local mod_item=$item 64 | if [ "${default_metric_name}" == "$item" ]; then 65 | mod_item="$item [Default]" 66 | fi 67 | echo " - $mod_item" 68 | local help_func="help_${item}" 69 | local type=`type -t $help_func` 70 | [ "$type" == "function" ] && $help_func 71 | done 72 | fi 73 | } 74 | 75 | function usage() { 76 | local pname=`basename $0` 77 | echo "Usage:" 78 | echo "$pname [-p pid1,pid2,... | -a] [-m m1,m2,...] [-t time] [-r] [-v] [-d] [-h] [-e \"applicaton \"]" 79 | echo 80 | echo " -a : Profile whole system" 81 | echo " -p pid(s) : application process ids to profile" 82 | echo " -m metrics : Use comma separator to specify one or many of the" 83 | echo " following metrics" 84 | debug_mode=1 85 | print_metric_array 86 | debug_mode=0 87 | echo " -t time : time in seconds. Default ${default_collect_time}s" 88 | echo " -r : Perf runs in record mode. Default is \"$default_perf_mode\" mode" 89 | echo " -v : Verbose mode" 90 | echo " -d : Debug mode" 91 | echo " -e : Specify application to run with arguments." 92 | echo " -h : Help message" 93 | echo 94 | echo " Examples:" 95 | echo " 1) Single metric with verbose output:" 96 | echo " $ measure-perf-metric.sh -p 2345 -t 30 -m itlb_stalls -v" 97 | echo 98 | echo " 2) Multiple metrics:" 99 | echo " $ measure-perf-metric.sh -p 2345 -t 30 -m itlb_stalls,itlb_mpki" 100 | echo 101 | echo " 3) Whole system collection:" 102 | echo " $ measure-perf-metric.sh -a -t 30 -m itlb_stalls" 103 | echo 104 | echo " 4) Derive metric for an application" 105 | echo " $ measure-perf-metric.sh -e \"node index.js\" -m itlb_stalls" 106 | exit 107 | } 108 | 109 | # Build supported METRIC array ---- 110 | function add_to_metric_array() { 111 | local found=0 112 | 113 | #Check if the metric is already in the array 114 | for item in ${metric_array[*]} 115 | do 116 | if [ "$item" == "$1" ]; then 117 | found=1 118 | break 119 | fi 120 | done 121 | 122 | if [ $found -eq 0 ]; then 123 | metric_array[metric_entries]=$1 124 | metric_entries=`expr $metric_entries + 1` 125 | fi 126 | } 127 | 128 | function init_metric_array() { 129 | for i in `ls $SCRIPTS_DIR/metric_*` 130 | do 131 | local metric=`basename $i|cut -d'_' -f2- | cut -d'.' -f1` 132 | source ${i} > /dev/null 2>&1 133 | if [ $? != 0 ]; then 134 | echo "ERROR: couldn't source ${i} " 135 | else 136 | add_to_metric_array "$metric" 137 | fi 138 | done 139 | 140 | # Debug mode print 141 | print_metric_array 142 | } 143 | 144 | # Build array of supported metrics 145 | init_metric_array 146 | 147 | if [ $# -eq 0 ]; then 148 | usage 149 | fi 150 | 151 | command_name="" 152 | while [ "$1" != "" ]; do 153 | case $1 in 154 | -a) profile_mode="system" 155 | app_process_id="" 156 | ;; 157 | -p) shift 158 | app_process_id=$1 159 | profile_mode="one" 160 | ;; 161 | -m) shift 162 | metric_name=$1 163 | ;; 164 | -t) shift 165 | collect_time=$1 166 | ;; 167 | -r) perf_mode="record" 168 | ;; 169 | -v) verbose_mode=1 170 | ;; 171 | -d) debug_mode=1 172 | ;; 173 | -e) 174 | shift 175 | command_name=$1 176 | profile_mode="command" 177 | ;; 178 | *) usage 179 | exit 1 180 | esac 181 | shift 182 | done 183 | 184 | 185 | if [ "x${command_name}" == "x" ]; then 186 | if [ "x${app_process_id}" == "x" -a "${profile_mode}" == "one" ]; then 187 | echo "ERROR: Please specify the application process id or use -a option to profile the whole system. Exiting." 188 | exit 189 | fi 190 | fi 191 | 192 | if [ "x${metric_name}" == "x" ]; then 193 | echo "WARNING: ITLB_STALLS as a default metric will be derived" 194 | metric_name="itlb_stalls" 195 | fi 196 | 197 | if [ "x${collect_time}" == "x" ]; then 198 | echo "WARNING: Default perf data collection time is 10 seconds" 199 | collect_time=10 200 | fi 201 | 202 | PERF_DATA_COLLECTION_TIME=$collect_time #perf collection time in seconds 203 | PERF_DATA_FILE="/tmp/measure_${process_id}_perf_stat.txt" 204 | 205 | # Build input metric array 206 | function print_input_metric_array() { 207 | if [ $debug_mode -eq 1 ]; then 208 | for item in ${input_metrics[*]} 209 | do 210 | printf " %s\n" $item 211 | done 212 | fi 213 | } 214 | 215 | function add_to_input_metric_array() { 216 | local found=0 217 | 218 | #Check if the metric is already in the array 219 | for item in ${input_metrics[*]} 220 | do 221 | if [ "$item" == "$1" ]; then 222 | found=1 223 | break 224 | fi 225 | done 226 | 227 | if [ $found -eq 0 ]; then 228 | input_metrics[input_metric_idx]=$1 229 | input_metric_idx=`expr $input_metric_idx + 1` 230 | fi 231 | } 232 | 233 | function add_if_valid_metric() { 234 | local found=0 235 | local input_metric="$1" 236 | for item in ${metric_array[*]} 237 | do 238 | if [ "$item" == "$input_metric" ]; then 239 | found=1 240 | add_to_input_metric_array "$input_metric" 241 | break 242 | fi 243 | done 244 | if [ $found -eq 0 ]; then 245 | echo 246 | echo " Warning: Ignoring invalid metric: $input_metric." 247 | fi 248 | } 249 | 250 | function build_input_metric_array() { 251 | 252 | if [ $debug_mode -eq 1 ]; then 253 | echo "Before build input metric array" 254 | echo "Array length: ${#input_metrics[@]}" 255 | print_input_metric_array 256 | fi 257 | 258 | local OLDIFS=$IFS 259 | IFS=","; read -ra local_metric <<< "${metric_name}" 260 | 261 | for i in "${local_metric[@]}" 262 | do 263 | add_if_valid_metric "$i" 264 | done 265 | IFS=$OLDIFS 266 | 267 | if [ $debug_mode -eq 1 ]; then 268 | echo "After build input metric array" 269 | echo "Array length: ${#input_metrics[@]}" 270 | print_input_metric_array 271 | fi 272 | } 273 | 274 | # PMU array ---- 275 | function print_perf_pmu_array() { 276 | if [ $debug_mode -eq 1 ]; then 277 | for item in ${pmu_array[*]} 278 | do 279 | printf " %s\n" $item 280 | done 281 | fi 282 | } 283 | 284 | function add_to_pmu_array() { 285 | local found=0 286 | 287 | #Check if the metric is already in the array 288 | for item in ${pmu_array[*]} 289 | do 290 | if [ "$item" == "$1" ]; then 291 | found=1 292 | break 293 | fi 294 | done 295 | 296 | if [ $found -eq 0 ]; then 297 | pmu_array[pmu_entries]=$1 298 | pmu_entries=`expr $pmu_entries + 1` 299 | fi 300 | } 301 | 302 | function rebuild_perf_pmu_args() { 303 | for item in ${pmu_array[*]} 304 | do 305 | if [ "x${PERF_PMUS}" == "x" ]; then 306 | PERF_PMUS="$item" 307 | else 308 | PERF_PMUS="$PERF_PMUS,$item" 309 | fi 310 | done 311 | } 312 | 313 | function init_perf_pmus() { 314 | for item in ${input_metrics[*]} 315 | do 316 | echo "Initializing for metric: $item" 317 | 318 | init_func="init_${item}" 319 | 320 | local specific_pmus=`${init_func}` 321 | 322 | local OLDIFS=$IFS 323 | IFS=","; read -ra local_pmus <<< "${specific_pmus}" 324 | 325 | for i in "${local_pmus[@]}" 326 | do 327 | add_to_pmu_array "$i" 328 | done 329 | IFS=$OLDIFS 330 | done 331 | 332 | # Debug mode print 333 | print_perf_pmu_array 334 | 335 | # Rebuild PMU list to pass to perf command 336 | rebuild_perf_pmu_args 337 | } 338 | 339 | # Check if process with pid is still running 340 | function check_pids() { 341 | local OLDIFS=$IFS 342 | local realpid=$app_process_id 343 | IFS=","; read -ra local_pids <<< "${app_process_id}" 344 | 345 | for i in "${local_pids[@]}" 346 | do 347 | echo "Checking if process with pid ${i} exists..." 348 | realpid=`/bin/ps ax|grep ${i}|grep -v grep | grep -v bash` 349 | if [ "x${realpid}" == "x" ]; then 350 | echo "ERROR: Process with PID ${i} is not found. Exiting." 351 | exit 352 | fi 353 | done 354 | IFS=$OLDIFS 355 | } 356 | 357 | function print_perf_header() { 358 | if [ "${profile_mode}" == "command" ]; then 359 | echo "perf ${perf_mode} -e ${PERF_PMUS} ${command_name}" 360 | else 361 | echo "Collect perf data for ${PERF_DATA_COLLECTION_TIME} seconds" 362 | echo "perf ${perf_mode} -e ${PERF_PMUS}" 363 | fi 364 | } 365 | 366 | function collect_perf_data() { 367 | echo 368 | 369 | local perf_ret=0 370 | if [ "${profile_mode}" == "system" ]; then 371 | echo "--------------------------------------------------" 372 | echo "Profiling whole system" 373 | echo "--------------------------------------------------" 374 | 375 | print_perf_header 376 | 377 | perf ${perf_mode} -o ${PERF_DATA_FILE} -e ${PERF_PMUS} -a sleep ${PERF_DATA_COLLECTION_TIME} 378 | if [ $? != 0 ]; then 379 | echo "Perf -a failed. Exiting" 380 | exit 381 | fi 382 | elif [ "${profile_mode}" == "one" ]; then 383 | # Check if process is still running" 384 | check_pids 385 | 386 | echo "--------------------------------------------------" 387 | echo "Profile application with process id: $app_process_id" 388 | echo "--------------------------------------------------" 389 | 390 | print_perf_header 391 | 392 | perf ${perf_mode} -o ${PERF_DATA_FILE} -e ${PERF_PMUS} -p $app_process_id sleep ${PERF_DATA_COLLECTION_TIME} 393 | if [ $? != 0 ]; then 394 | echo "Perf -p $app_process_id failed. Exiting" 395 | exit 396 | fi 397 | elif [ "${profile_mode}" == "command" ]; then 398 | # Check if command_name is given to profile 399 | echo "--------------------------------------------------" 400 | if [ -n "${app_process_id}" ]; then 401 | echo "Profiling \"${command_name}\" against PID $app_process_id" 402 | else 403 | echo "Profiling \"${command_name}\"" 404 | fi 405 | echo "--------------------------------------------------" 406 | 407 | print_perf_header 408 | echo "--------------------------------------------------" 409 | 410 | if [ -n "${app_process_id}" ]; then 411 | perf ${perf_mode} -o ${PERF_DATA_FILE} -e ${PERF_PMUS} -p $app_process_id ${command_name} 412 | else 413 | perf ${perf_mode} -o ${PERF_DATA_FILE} -e ${PERF_PMUS} ${command_name} 414 | fi 415 | if [ $? != 0 ]; then 416 | echo "Perf for $command_name failed. Exiting" 417 | exit 418 | fi 419 | fi 420 | echo 421 | } 422 | 423 | function display_perf_data() 424 | { 425 | if [ $verbose_mode -eq 1 ]; then 426 | echo "=================================================" 427 | echo "Here is the perf output data" 428 | echo "=================================================" 429 | if [ "${perf_mode}" == "stat" ]; then 430 | cat $PERF_DATA_FILE 431 | fi 432 | fi 433 | echo 434 | } 435 | 436 | # Build valid input metric array 437 | build_input_metric_array 438 | if [ ${#input_metrics[@]} -le 0 ]; then 439 | echo "Error: No valid input metric provided. Exiting." 440 | exit 441 | fi 442 | 443 | # Check if metric asked is supported 444 | #check_if_metric_supported 445 | 446 | # Initialize the PMUS 447 | init_perf_pmus 448 | 449 | # Get perf data 450 | collect_perf_data 451 | 452 | if [ "${perf_mode}" == "stat" ]; then 453 | #Display perf stat data 454 | display_perf_data 455 | 456 | for item in ${input_metrics[*]} 457 | do 458 | echo "Calculating metric for: $item" 459 | 460 | calc_func="calc_${item}" 461 | ${calc_func} "${PERF_DATA_FILE}" 462 | 463 | echo 464 | done 465 | else 466 | echo "Perf report: perf report --sort=dso,comm -i $PERF_DATA_FILE" 467 | perf report --sort=dso,comm -i $PERF_DATA_FILE 468 | fi 469 | 470 | exit 471 | 472 | -------------------------------------------------------------------------------- /tools/metric.template: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | 29 | function init_somename() { 30 | local local_pmu_array=(pmu1 pmu2) 31 | local local_pmus 32 | for item in ${local_pmu_array[*]} 33 | do 34 | if [ "x${local_pmus}" == "x" ]; then 35 | local_pmus="$item" 36 | else 37 | local_pmus="$local_pmus,$item" 38 | fi 39 | done 40 | echo $local_pmus 41 | } 42 | 43 | function calc_somename() { 44 | local perf_data_file="$1" 45 | local metric_name="metric_" 46 | echo 47 | echo "=================================================" 48 | echo "Final ${metric_name}" 49 | echo "--------------------------------------------------" 50 | echo "FORMULA: ${metric_name} = 100*(a/b)" 51 | echo " where, a=pmu1 name" 52 | echo " b=pmu2 name" 53 | echo "=================================================" 54 | 55 | local a=`return_pmu_value "" $perf_data_file ` 56 | local b=`return_pmu_value "" $perf_data_file` 57 | 58 | if [ $a == -1 -o $b == -1 ]; then 59 | echo "ERROR: ${metric_name} can't be derived. Missing pmus" 60 | else 61 | local metric=`echo "scale=$bc_scale;100*(${a}/${b})"| bc -l` 62 | echo "${metric_name}=${metric}" 63 | fi 64 | echo 65 | } 66 | -------------------------------------------------------------------------------- /tools/metric_dsb_cache: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | echo "ERR: $err_pmus" 29 | 30 | function help_dsb_cache() { 31 | echo " Measure front end related data, including MITE, DSB and IFU." 32 | } 33 | 34 | function init_dsb_cache() { 35 | local local_pmu_array=(idq.dsb_uops idq.ms_uops idq.mite_uops lsd.uops dsb2mite_switches.penalty_cycles cycles) 36 | local local_pmus 37 | for item in ${local_pmu_array[*]} 38 | do 39 | if [ "x${local_pmus}" == "x" ]; then 40 | local_pmus="$item" 41 | else 42 | local_pmus="$local_pmus,$item" 43 | fi 44 | done 45 | echo $local_pmus 46 | } 47 | 48 | function calc_dsb_use() { 49 | local perf_data_file="$1" 50 | local metric_name="metric_dsb_use" 51 | echo 52 | echo "=================================================" 53 | echo "Final ${metric_name}" 54 | echo "--------------------------------------------------" 55 | echo "FORMULA: ${metric_name} = 100*(a/(a + b + c + d))" 56 | echo " where, a=idq.dsb_uops" 57 | echo " b=idq.ms_uops" 58 | echo " c=idq.mite_uops" 59 | echo " d=lsd.uops" 60 | echo "=================================================" 61 | 62 | local a=`return_pmu_value "idq.dsb_uops" $perf_data_file ` 63 | local b=`return_pmu_value "idq.ms_uops" $perf_data_file` 64 | local c=`return_pmu_value "idq.mite_uops" $perf_data_file` 65 | local d=`return_pmu_value "lsd.uops" $perf_data_file` 66 | 67 | if [ $a == -1 -o $b == -1 -o $c == -1 ]; then 68 | echo "ERROR: ${metric_name} can't be derived. Missing pmus" 69 | else 70 | local metric=`echo "scale=$bc_scale;100*(${a}/(${a}+${b}+${c}+${d}))"| bc -l` 71 | echo "${metric_name}%=${metric}" 72 | fi 73 | echo 74 | } 75 | 76 | function calc_mite_use() { 77 | local perf_data_file="$1" 78 | local metric_name="metric_mite_use" 79 | echo 80 | echo "=================================================" 81 | echo "Final ${metric_name}" 82 | echo "--------------------------------------------------" 83 | echo "FORMULA: ${metric_name} = 100*(c/(a + b + c + d))" 84 | echo " where, a=idq.dsb_uops" 85 | echo " b=idq.ms_uops" 86 | echo " c=idq.mite_uops" 87 | echo " d=lsd.uops" 88 | echo "=================================================" 89 | 90 | local a=`return_pmu_value "idq.dsb_uops" $perf_data_file ` 91 | local b=`return_pmu_value "idq.ms_uops" $perf_data_file` 92 | local c=`return_pmu_value "idq.mite_uops" $perf_data_file` 93 | local d=`return_pmu_value "lsd.uops" $perf_data_file` 94 | 95 | if [ $a == -1 -o $b == -1 -o $c == -1 ]; then 96 | echo "ERROR: ${metric_name} can't be derived. Missing pmus" 97 | else 98 | local metric=`echo "scale=$bc_scale;100*(${c}/(${a}+${b}+${c}+${d}))"| bc -l` 99 | echo "${metric_name}%=${metric}" 100 | fi 101 | echo 102 | } 103 | 104 | function calc_ifu_switch_penalty() { 105 | local perf_data_file="$1" 106 | local metric_name="metric_ifu_switch_penalty" 107 | echo 108 | echo "=================================================" 109 | echo "Final ${metric_name}" 110 | echo "--------------------------------------------------" 111 | echo "FORMULA: ${metric_name} = 100*(a/b)" 112 | echo " where, a=dsb2mite_switches.penalty_cycles" 113 | echo " b=cycles" 114 | echo "=================================================" 115 | 116 | local a=`return_pmu_value "dsb2mite_switches.penalty_cycles" $perf_data_file ` 117 | local b=`return_pmu_value "cycles" $perf_data_file` 118 | 119 | if [ $a == -1 -o $b == -1 ]; then 120 | echo "ERROR: ${metric_name} can't be derived. Missing pmus" 121 | else 122 | local metric=`echo "scale=$bc_scale;100*(${a}/${b})"| bc -l` 123 | echo "${metric_name}%=${metric}" 124 | fi 125 | echo 126 | } 127 | 128 | function calc_dsb_cache() { 129 | local perf_data_file="$1" 130 | calc_mite_use $perf_data_file 131 | calc_dsb_use $perf_data_file 132 | calc_ifu_switch_penalty $perf_data_file 133 | } 134 | -------------------------------------------------------------------------------- /tools/metric_icache_miss_stalls: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | 29 | function help_icache_miss_stalls() { 30 | echo " Measure icache stall per instruction metrics." 31 | } 32 | 33 | function init_icache_miss_stalls() { 34 | #Comma seperated perf supported counter names. See example below" 35 | local local_pmu_array=(instructions icache_16b.ifdata_stall 36 | "cpu/event=0x80,umask=0x4,cmask=1,edge=1,name=iicache_16b.ifdata_stall:c1:e1/") 37 | local local_pmus 38 | for item in ${local_pmu_array[*]} 39 | do 40 | if [ "x${local_pmus}" == "x" ]; then 41 | local_pmus="$item" 42 | else 43 | local_pmus="$local_pmus,$item" 44 | fi 45 | done 46 | echo $local_pmus 47 | } 48 | 49 | function calc_icache_miss_stalls() { 50 | local perf_data_file="$1" 51 | echo 52 | echo "=================================================" 53 | echo "Final icache_miss_stall metric" 54 | echo "--------------------------------------------------" 55 | echo "FORMULA: metric_ICache_Misses(%) = 100*((a+2*b)/c)" 56 | echo " where, a=icache_16b.ifdata_stall" 57 | echo " b=icache_16b.ifdata_stall:c1:e1)" 58 | echo " c=cycles" 59 | echo "=================================================" 60 | 61 | local a=`return_pmu_value "icache_16b.ifdata_stall" ${perf_data_file}` 62 | local b=`return_pmu_value "iicache_16b.ifdata_stall:c1:e1" ${perf_data_file}` 63 | local c=`return_pmu_value "cycles" ${perf_data_file}` 64 | 65 | if [ $a == -1 -o $b == -1 -o $c == -1 ]; then 66 | echo "ERROR: metric_ICache_Misses can't be derived. Missing pmus" 67 | else 68 | local metric=`echo "scale=$bc_scale;100*((${a}+2*${b})/${c})"| bc -l` 69 | echo "metric_ICache_Misses(%)=${metric}" 70 | fi 71 | 72 | } 73 | -------------------------------------------------------------------------------- /tools/metric_itlb_mpki: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | 29 | function help_itlb_mpki() { 30 | echo " Measure ITLB misses per Kilo-instruction, for 4k and large (2M/1G) pages." 31 | } 32 | 33 | function init_itlb_mpki() { 34 | #Comma seperated perf supported counter names. See example below" 35 | local local_pmu_array=(instructions itlb_misses.walk_completed itlb_misses.walk_completed_4k itlb_misses.walk_completed_2m_4m itlb_misses.walk_completed_1g) 36 | local local_pmus 37 | for item in ${local_pmu_array[*]} 38 | do 39 | if [ "x${local_pmus}" == "x" ]; then 40 | local_pmus="$item" 41 | else 42 | local_pmus="$local_pmus,$item" 43 | fi 44 | done 45 | echo $local_pmus 46 | } 47 | 48 | function calc_itlb_default_mpki() { 49 | local perf_data_file="$1" 50 | echo 51 | echo "=================================================" 52 | echo "Final itlb_mpki metric" 53 | echo "--------------------------------------------------" 54 | echo "FORMULA: metric_ITLB_MPKI(%) = 1000*(a/b)" 55 | echo " where, a=itlb_misses.walk_completed" 56 | echo " b=instructions" 57 | echo "=================================================" 58 | 59 | local a=`return_pmu_value "itlb_misses.walk_completed" ${perf_data_file}` 60 | local b=`return_pmu_value "instructions" ${perf_data_file}` 61 | if [ $a == -1 -o $b == -1 ]; then 62 | echo "ERROR: metric_ITLB_MPKI can't be derived. Missing pmus" 63 | else 64 | local metric=`echo "scale=$bc_scale;1000*(${a}/${b})"| bc -l` 65 | echo "metric_ITLB_MPKI(%)=${metric}" 66 | fi 67 | 68 | } 69 | 70 | function calc_itlb_4k_mpki() { 71 | local perf_data_file="$1" 72 | echo 73 | echo "=================================================" 74 | echo "Final itlb_4k_mpki metric" 75 | echo "--------------------------------------------------" 76 | echo "FORMULA: metric_ITLB_4K_MPKI(%) = 1000*(a/b)" 77 | echo " where, a=itlb_misses.walk_completed_4k" 78 | echo " b=instructions" 79 | echo "=================================================" 80 | 81 | local a=`return_pmu_value "itlb_misses.walk_completed_4k" ${perf_data_file}` 82 | local b=`return_pmu_value "instructions" ${perf_data_file}` 83 | if [ $a == -1 -o $b == -1 ]; then 84 | echo "ERROR: metric_ITLB_4K_MPKI can't be derived. Missing pmus" 85 | else 86 | local metric=`echo "scale=$bc_scale;1000*(${a}/${b})"| bc -l` 87 | echo "metric_ITLB_4K_MPKI(%)=${metric}" 88 | fi 89 | echo 90 | } 91 | 92 | function calc_itlb_2m_4m_mpki() { 93 | local perf_data_file="$1" 94 | echo 95 | echo "=================================================" 96 | echo "Final itlb_2M_4M_mpki metric" 97 | echo "--------------------------------------------------" 98 | echo "FORMULA: metric_ITLB_2M_4M_MPKI(%) = 1000*(a/b)" 99 | echo " where, a=itlb_misses.walk_completed_2m_4m" 100 | echo " b=instructions" 101 | echo "=================================================" 102 | 103 | local a=`return_pmu_value "itlb_misses.walk_completed_2m_4m" ${perf_data_file}` 104 | local b=`return_pmu_value "instructions" ${perf_data_file}` 105 | if [ $a == -1 -o $b == -1 ]; then 106 | echo "ERROR: metric_ITLB_2M_4M_MPKI can't be derived. Missing pmus" 107 | else 108 | local metric=`echo "scale=$bc_scale;1000*(${a}/${b})"| bc -l` 109 | echo "metric_ITLB_2M_4M_MPKI(%)=${metric}" 110 | fi 111 | echo 112 | } 113 | 114 | function calc_itlb_1g_mpki() { 115 | local perf_data_file="$1" 116 | echo 117 | echo "=================================================" 118 | echo "Final itlb_1G_mpki metric" 119 | echo "--------------------------------------------------" 120 | echo "FORMULA: metric_ITLB_1G_MPKI(%) = 1000*(a/b)" 121 | echo " where, a=itlb_misses.walk_completed_1g" 122 | echo " b=instructions" 123 | echo "=================================================" 124 | 125 | local a=`return_pmu_value "itlb_misses.walk_completed_1g" ${perf_data_file}` 126 | local b=`return_pmu_value "instructions" ${perf_data_file}` 127 | if [ $a == -1 -o $b == -1 ]; then 128 | echo "ERROR: metric_ITLB_1G_MPKI can't be derived. Missing pmus" 129 | else 130 | local metric=`echo "scale=$bc_scale;1000*(${a}/${b})"| bc -l` 131 | echo "metric_ITLB_1G_MPKI(%)=${metric}" 132 | fi 133 | echo 134 | } 135 | 136 | function calc_itlb_mpki() { 137 | local perf_data_file="$1" 138 | calc_itlb_default_mpki $perf_data_file 139 | calc_itlb_4k_mpki $perf_data_file 140 | calc_itlb_2m_4m_mpki $perf_data_file 141 | calc_itlb_1g_mpki $perf_data_file 142 | } 143 | -------------------------------------------------------------------------------- /tools/metric_itlb_stalls: -------------------------------------------------------------------------------- 1 | #!/bin/bash -x 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | 29 | function help_itlb_stalls() { 30 | echo " Measure percentage of cycles where a code fetch is stalled due to L1 instruction cache tag miss." 31 | } 32 | 33 | function init_itlb_stalls() { 34 | #Comma seperated perf supported counter names. See example below" 35 | local local_pmu_array=(instructions icache_64b.iftag_stall) 36 | local local_pmus 37 | for item in ${local_pmu_array[*]} 38 | do 39 | if [ "x${local_pmus}" == "x" ]; then 40 | local_pmus="$item" 41 | else 42 | local_pmus="$local_pmus,$item" 43 | fi 44 | done 45 | echo $local_pmus 46 | } 47 | 48 | function calc_itlb_stalls() { 49 | local perf_data_file="$1" 50 | echo 51 | echo "=================================================" 52 | echo "Final itlb_stalls metric" 53 | echo "--------------------------------------------------" 54 | echo "FORMULA: metric_ITLB_Misses(%) = 100*(a/b)" 55 | echo " where, a=icache_64b.iftag_stall" 56 | echo " b=cycles" 57 | echo "=================================================" 58 | 59 | local a=`return_pmu_value "icache_64b.iftag_stall" ${perf_data_file}` 60 | local b=`return_pmu_value "cycles" ${perf_data_file}` 61 | 62 | if [ $a == -1 -o $b == -1 ]; then 63 | echo "ERROR: metric_ITLB_Misses can't be derived. Missing pmus" 64 | else 65 | local metric=`echo "scale=$bc_scale;100*(${a}/${b})"| bc -l` 66 | echo "metric_ITLB_Misses(%)=${metric}" 67 | fi 68 | 69 | } 70 | -------------------------------------------------------------------------------- /tools/metric_l1_code_read_MPI: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | 29 | function help_l1_code_read_MPI() { 30 | echo " Measure cache code read (fetch) per-instruction ratio." 31 | } 32 | 33 | function init_l1_code_read_MPI() { 34 | #Comma seperated perf supported counter names. See example below" 35 | local local_pmu_array=(instructions l2_rqsts.all_code_rd) 36 | local local_pmus 37 | for item in ${local_pmu_array[*]} 38 | do 39 | if [ "x${local_pmus}" == "x" ]; then 40 | local_pmus="$item" 41 | else 42 | local_pmus="$local_pmus,$item" 43 | fi 44 | done 45 | echo $local_pmus 46 | } 47 | 48 | function calc_l1_code_read_MPI() { 49 | local perf_data_file="$1" 50 | echo 51 | echo "=================================================" 52 | echo "Final L1_code_read_MPI metric" 53 | echo "--------------------------------------------------" 54 | echo "FORMULA: metric_name = formula for e.g. 100*(a/b)" 55 | echo " where, a=l2_rqsts.all_code_rd" 56 | echo " b=instructions" 57 | echo "=================================================" 58 | 59 | local a=`return_pmu_value "l2_rqsts.all_code_rd" ${perf_data_file}` 60 | local b=`return_pmu_value "instructions" ${perf_data_file}` 61 | 62 | if [ $a == -1 -o $b == -1 ]; then 63 | echo "ERROR: metric_L1_code_read_MPI can't be derived. Missing pmus" 64 | else 65 | local metric=`echo "scale=$bc_scale;100*(${a}/${b})"| bc -l` 66 | echo "metric_L1_code_read_MPI=${metric}" 67 | fi 68 | echo 69 | } 70 | 71 | -------------------------------------------------------------------------------- /tools/metric_l2_demand_code_MPI: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | SCRIPTS_DIR=`dirname $0` 27 | source ${SCRIPTS_DIR}/utils.sh 28 | 29 | function help_l2_demand_code_MPI() { 30 | echo " Measure cache code read (fetch) misses per-instruction." 31 | } 32 | 33 | function init_l2_demand_code_MPI() { 34 | local local_pmu_array=(instructions l2_rqsts.code_rd_miss) 35 | local local_pmus 36 | for item in ${local_pmu_array[*]} 37 | do 38 | if [ "x${local_pmus}" == "x" ]; then 39 | local_pmus="$item" 40 | else 41 | local_pmus="$local_pmus,$item" 42 | fi 43 | done 44 | echo $local_pmus 45 | } 46 | 47 | function calc_l2_demand_code_MPI() { 48 | local perf_data_file="$1" 49 | echo 50 | echo "=================================================" 51 | echo "Final L2_demand_code MPI metric" 52 | echo "--------------------------------------------------" 53 | echo "FORMULA: metric_L2_demand_code_MPI = (a/b)" 54 | echo " where, a=l2_rqsts.code_rd_miss" 55 | echo " b=instructions" 56 | echo "=================================================" 57 | 58 | local a=`return_pmu_value "l2_rqsts.code_rd_miss" $perf_data_file ` 59 | local b=`return_pmu_value "instructions" $perf_data_file` 60 | if [ $a == -1 -o $b == -1 ]; then 61 | echo "ERROR: metric_L2_demand_code_MPI can't be derived. Missing pmus" 62 | else 63 | local metric=`echo "scale=$bc_scale;(${a}/${b})"| bc -l` 64 | echo "metric_L2_demand_code_MPI=${metric}" 65 | fi 66 | 67 | echo 68 | } 69 | -------------------------------------------------------------------------------- /tools/utils.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Copyright (C) 2018 Intel Corporation 4 | # 5 | # Permission is hereby granted, free of charge, to any person obtaining a 6 | # copy of this software and associated documentation files 7 | # (the "Software"), to deal in the Software without restriction, 8 | # including without limitation the rights to use, copy, modify, merge, 9 | # publish, distribute, sublicense, and/or sell copies of the Software, 10 | # and to permit persons to whom the Software is furnished to do so, 11 | # subject to the following conditions: 12 | # 13 | # The above copyright notice and this permission notice shall be included 14 | # in all copies or substantial portions of the Software. 15 | # 16 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 | # OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL 19 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES 20 | # OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | # ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE 22 | # OR OTHER DEALINGS IN THE SOFTWARE. 23 | # 24 | # SPDX-License-Identifier: MIT 25 | 26 | bc_scale=10 27 | 28 | function return_pmu_value() { 29 | local pmu="$1" 30 | local perf_data_file="$2" 31 | if [ "x${pmu}" != "x" ]; then 32 | local myresult=`grep "\<${1}\>" $perf_data_file | awk '{print $1}' | tr -d '[:space:][,]'` 33 | re='^[0-9]+$' 34 | if ! [[ $myresult =~ $re ]]; then 35 | echo -1 36 | else 37 | echo "$myresult" 38 | fi 39 | else 40 | echo "WARNING: No pmu string specified" 41 | echo -1 42 | fi 43 | } 44 | 45 | function rebuild_metric_args() { 46 | for item in ${metric_array[*]} 47 | do 48 | if [ "x${SUPPORTED_METRICS}" == "x" ]; then 49 | SUPPORTED_METRICS="$item" 50 | else 51 | SUPPORTED_METRICS="$SUPPORTED_METRICS,$item" 52 | fi 53 | done 54 | } 55 | 56 | 57 | function check_if_pmu_exists() { 58 | local perf_pmus="$1" 59 | local perf_data_file="$2" 60 | 61 | local OLDIFS=$IFS 62 | IFS=","; read -ra PMUS <<< "${perf_pmus}" 63 | 64 | local pmu_not_found=0 65 | for i in "${PMUS[@]}"; do 66 | pmu_value=`grep ${i} $perf_data_file |awk '{print $1}'| tr -d '[:space:][,]'` 67 | if [[ "x${pmu_value}" == "x" ]]; then 68 | echo "PMU ($i) not found with valid value" 69 | pmu_not_found=1 70 | else 71 | echo "PMU_VALUE: $pmu_value" 72 | fi 73 | done 74 | unset IFS 75 | 76 | if [[ ${pmu_not_found} == 1 ]]; then 77 | echo "Error: Missing required one or more PMUs ($perf_pmus)" 78 | echo "Here is the perf output data" 79 | echo 80 | cat $perf_data_file 81 | echo 82 | echo "Ignoring ...." 83 | echo -1 84 | fi 85 | } 86 | --------------------------------------------------------------------------------