├── .github ├── dependabot.yml └── workflows │ ├── platformcheck.yml │ └── standard.yml ├── .gitignore ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── README.md ├── build.rs ├── ci ├── build.bash ├── common.bash ├── set_rust_version.bash └── test.bash ├── rust-toolchain ├── src ├── apic │ ├── ioapic.rs │ ├── mod.rs │ ├── x2apic.rs │ └── xapic.rs ├── bits16 │ ├── mod.rs │ └── segmentation.rs ├── bits32 │ ├── eflags.rs │ ├── mod.rs │ ├── paging.rs │ ├── segmentation.rs │ └── task.rs ├── bits64 │ ├── mod.rs │ ├── paging.rs │ ├── registers.rs │ ├── rflags.rs │ ├── segmentation.rs │ ├── sgx.rs │ ├── syscall.rs │ ├── task.rs │ └── vmx.rs ├── controlregs.rs ├── debugregs.rs ├── dtables.rs ├── fence.rs ├── io.rs ├── irq.rs ├── lib.rs ├── msr.rs ├── perfcnt │ ├── intel │ │ ├── description.rs │ │ ├── events.rs │ │ └── mod.rs │ └── mod.rs ├── random.rs ├── segmentation.rs ├── task.rs ├── time.rs ├── tlb.rs └── vmx │ ├── mod.rs │ └── vmcs.rs ├── tests ├── kvm │ └── bin.rs └── no_std_build.rs ├── update_perfmon_db.sh ├── x86data └── perfmon_data │ ├── BDW-DE │ ├── broadwellde_core_v7.json │ ├── broadwellde_core_v7.tsv │ ├── broadwellde_matrix_bit_definitions_v7.json │ ├── broadwellde_matrix_bit_definitions_v7.tsv │ ├── broadwellde_uncore_v7.json │ └── broadwellde_uncore_v7.tsv │ ├── BDW │ ├── broadwell_core_v23.json │ ├── broadwell_core_v23.tsv │ ├── broadwell_fp_arith_inst_v23.json │ ├── broadwell_fp_arith_inst_v23.tsv │ ├── broadwell_matrix_bit_definitions_v23.json │ ├── broadwell_matrix_bit_definitions_v23.tsv │ ├── broadwell_matrix_v23.json │ ├── broadwell_matrix_v23.tsv │ ├── broadwell_offcore_v23.tsv │ ├── broadwell_uncore_v23.json │ └── broadwell_uncore_v23.tsv │ ├── BDX │ ├── broadwellx_core_v14.json │ ├── broadwellx_core_v14.tsv │ ├── broadwellx_matrix_bit_definitions_v14.json │ ├── broadwellx_matrix_bit_definitions_v14.tsv │ ├── broadwellx_matrix_v14.json │ ├── broadwellx_matrix_v14.tsv │ ├── broadwellx_offcore_v14.tsv │ ├── broadwellx_uncore_v14.json │ └── broadwellx_uncore_v14.tsv │ ├── BNL │ ├── Bonnell_core_V4.json │ └── Bonnell_core_V4.tsv │ ├── CLX │ ├── cascadelakex_core_v1.04.json │ ├── cascadelakex_core_v1.04.tsv │ ├── cascadelakex_fp_arith_inst_v1.04.json │ ├── cascadelakex_fp_arith_inst_v1.04.tsv │ ├── cascadelakex_uncore_v1.04.json │ ├── cascadelakex_uncore_v1.04.tsv │ ├── cascadelakex_uncore_v1.04_experimental.json │ └── cascadelakex_uncore_v1.04_experimental.tsv │ ├── GLM │ ├── goldmont_core_v13.json │ ├── goldmont_core_v13.tsv │ ├── goldmont_fp_arith_inst_v13.json │ ├── goldmont_fp_arith_inst_v13.tsv │ ├── goldmont_matrix_bit_definitions_v13.json │ ├── goldmont_matrix_bit_definitions_v13.tsv │ ├── goldmont_matrix_v13.json │ ├── goldmont_matrix_v13.tsv │ └── goldmont_offcore_v13.tsv │ ├── GLP │ ├── goldmontplus_core_v1.01.json │ ├── goldmontplus_core_v1.01.tsv │ ├── goldmontplus_fp_arith_inst_v1.01.json │ ├── goldmontplus_fp_arith_inst_v1.01.tsv │ ├── goldmontplus_matrix_bit_definitions_v1.01.json │ ├── goldmontplus_matrix_bit_definitions_v1.01.tsv │ ├── goldmontplus_matrix_v1.01.json │ ├── goldmontplus_matrix_v1.01.tsv │ └── goldmontplus_offcore_v1.01.tsv │ ├── HSW │ ├── haswell_core_v28.json │ ├── haswell_core_v28.tsv │ ├── haswell_fp_arith_inst_v28.json │ ├── haswell_fp_arith_inst_v28.tsv │ ├── haswell_matrix_bit_definitions_v28.json │ ├── haswell_matrix_bit_definitions_v28.tsv │ ├── haswell_matrix_v28.json │ ├── haswell_matrix_v28.tsv │ ├── haswell_offcore_v28.tsv │ ├── haswell_uncore_v28.json │ └── haswell_uncore_v28.tsv │ ├── HSX │ ├── haswellx_core_v20.json │ ├── haswellx_core_v20.tsv │ ├── haswellx_matrix_bit_definitions_v20.json │ ├── haswellx_matrix_bit_definitions_v20.tsv │ ├── haswellx_matrix_v20.json │ ├── haswellx_matrix_v20.tsv │ ├── haswellx_offcore_v20.tsv │ ├── haswellx_uncore_v20.json │ └── haswellx_uncore_v20.tsv │ ├── ICL │ ├── icelake_core_v1.00.json │ └── icelake_core_v1.00.tsv │ ├── IVB │ ├── ivybridge_core_v21.json │ ├── ivybridge_core_v21.tsv │ ├── ivybridge_fp_arith_inst_v21.json │ ├── ivybridge_fp_arith_inst_v21.tsv │ ├── ivybridge_matrix_bit_definitions_v21.json │ ├── ivybridge_matrix_bit_definitions_v21.tsv │ ├── ivybridge_matrix_v21.json │ ├── ivybridge_matrix_v21.tsv │ ├── ivybridge_offcore_v21.tsv │ ├── ivybridge_uncore_v21.json │ └── ivybridge_uncore_v21.tsv │ ├── IVT │ ├── ivytown_core_v20.json │ ├── ivytown_core_v20.tsv │ ├── ivytown_matrix_bit_definitions_v20.json │ ├── ivytown_matrix_bit_definitions_v20.tsv │ ├── ivytown_matrix_v20.json │ ├── ivytown_matrix_v20.tsv │ ├── ivytown_offcore_v20.tsv │ ├── ivytown_uncore_v20.json │ └── ivytown_uncore_v20.tsv │ ├── JKT │ ├── Jaketown_core_V20.json │ ├── Jaketown_core_V20.tsv │ ├── Jaketown_matrix_V20.json │ ├── Jaketown_matrix_V20.tsv │ ├── Jaketown_matrix_bit_definitions_V20.json │ ├── Jaketown_matrix_bit_definitions_V20.tsv │ ├── Jaketown_offcore_V20.tsv │ ├── Jaketown_uncore_V20.json │ └── Jaketown_uncore_V20.tsv │ ├── KNL │ ├── KnightsLanding_core_V9.json │ ├── KnightsLanding_core_V9.tsv │ ├── KnightsLanding_matrix_V9.json │ ├── KnightsLanding_matrix_V9.tsv │ ├── KnightsLanding_matrix_bit_definitions_V9.json │ ├── KnightsLanding_matrix_bit_definitions_V9.tsv │ ├── KnightsLanding_offcore_V9.tsv │ ├── KnightsLanding_uncore_V9.json │ └── KnightsLanding_uncore_V9.tsv │ ├── KNM │ ├── KnightsLanding_core_V9.json │ ├── KnightsLanding_matrix_V9.json │ ├── KnightsLanding_uncore_V9.json │ └── readme.txt │ ├── NHM-EP │ ├── NehalemEP_core_V2.json │ ├── NehalemEP_core_V2.tsv │ └── NehalemEP_offcore_V2.tsv │ ├── NHM-EX │ ├── NehalemEX_core_V2.json │ ├── NehalemEX_core_V2.tsv │ └── NehalemEX_offcore_V2.tsv │ ├── SKL │ ├── skylake_core_v42.json │ ├── skylake_core_v42.tsv │ ├── skylake_fp_arith_inst_v42.json │ ├── skylake_fp_arith_inst_v42.tsv │ ├── skylake_matrix_bit_definitions_v42.json │ ├── skylake_matrix_bit_definitions_v42.tsv │ ├── skylake_matrix_v42.json │ ├── skylake_matrix_v42.tsv │ ├── skylake_offcore_v42.tsv │ ├── skylake_uncore_v42.json │ └── skylake_uncore_v42.tsv │ ├── SKX │ ├── skylakex_core_v1.12.json │ ├── skylakex_core_v1.12.tsv │ ├── skylakex_fp_arith_inst_v1.12.json │ ├── skylakex_fp_arith_inst_v1.12.tsv │ ├── skylakex_matrix_bit_definitions_v1.12.json │ ├── skylakex_matrix_bit_definitions_v1.12.tsv │ ├── skylakex_matrix_v1.12.json │ ├── skylakex_matrix_v1.12.tsv │ ├── skylakex_offcore_v1.12.tsv │ ├── skylakex_uncore_v1.12.json │ ├── skylakex_uncore_v1.12.tsv │ ├── skylakex_uncore_v1.12_experimental.json │ └── skylakex_uncore_v1.12_experimental.tsv │ ├── SLM │ ├── Silvermont_core_V14.json │ ├── Silvermont_core_V14.tsv │ ├── Silvermont_matrix_V14.json │ ├── Silvermont_matrix_V14.tsv │ └── Silvermont_offcore_V14.tsv │ ├── SNB │ ├── sandybridge_core_v16.json │ ├── sandybridge_core_v16.tsv │ ├── sandybridge_matrix_bit_definitions_v16.json │ ├── sandybridge_matrix_bit_definitions_v16.tsv │ ├── sandybridge_matrix_v16.json │ ├── sandybridge_matrix_v16.tsv │ ├── sandybridge_offcore_v16.tsv │ ├── sandybridge_uncore_v16.json │ └── sandybridge_uncore_v16.tsv │ ├── SNR │ ├── snowridgex_core_v1.00.json │ ├── snowridgex_core_v1.00.tsv │ ├── snowridgex_offcore_v1.00.tsv │ ├── snowridgex_uncore_v1.00.json │ └── snowridgex_uncore_v1.00.tsv │ ├── TMA_Metrics.csv │ ├── TMA_Metrics.xlsx │ ├── WSM-EP-DP │ ├── WestmereEP-DP_core_V2.json │ ├── WestmereEP-DP_core_V2.tsv │ └── WestmereEP-DP_offcore_V2.tsv │ ├── WSM-EP-SP │ ├── WestmereEP-SP_core_V2.json │ ├── WestmereEP-SP_core_V2.tsv │ └── WestmereEP-SP_offcore_V2.tsv │ ├── WSM-EX │ ├── WestmereEX_core_V2.json │ ├── WestmereEX_core_V2.tsv │ └── WestmereEX_offcore_V2.tsv │ ├── mapfile.csv │ ├── readme.txt │ └── secure-pmu-access-1.01.csv └── x86test ├── Cargo.toml ├── README.md ├── src ├── hypervisor │ ├── mod.rs │ └── vspace.rs ├── lib.rs └── runner.rs ├── x86test_macro ├── Cargo.toml └── src │ └── lib.rs └── x86test_types ├── Cargo.toml └── src └── lib.rs /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # See 2 | # https://docs.github.com/en/free-pro-team@latest/github/administering-a-repository/enabling-and-disabling-version-updates 3 | # for details 4 | 5 | version: 2 6 | updates: 7 | # Enable crate version updates for the main crate 8 | - package-ecosystem: "cargo" 9 | # Look `Cargo.toml` in the repository root 10 | directory: "/" 11 | # Check for updates every day (weekdays) 12 | schedule: 13 | interval: "daily" 14 | # Enable crate version updates for x86test directory 15 | - package-ecosystem: "cargo" 16 | # Look `Cargo.toml` in the repository root 17 | directory: "/x86test" 18 | # Check for updates every day (weekdays) 19 | schedule: 20 | interval: "daily" 21 | # Enable version updates for Github Actions 22 | - package-ecosystem: "github-actions" 23 | # Set to `/` to check the Actions used in `.github/workflows` 24 | directory: "/" 25 | # Check for updates every day (weekdays) 26 | schedule: 27 | interval: "daily" 28 | -------------------------------------------------------------------------------- /.github/workflows/platformcheck.yml: -------------------------------------------------------------------------------- 1 | name: Multiplatform CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | # This job downloads and stores `cross` as an artifact, so that it can be 7 | # redownloaded across all of the jobs. Currently this copied pasted between 8 | # `ci.yml` and `deploy.yml`. Make sure to update both places when making 9 | # changes. 10 | install-cross: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | fetch-depth: 50 16 | - uses: XAMPPRocky/get-github-release@v1 17 | id: cross 18 | with: 19 | owner: rust-embedded 20 | repo: cross 21 | matches: ${{ matrix.platform }} 22 | token: ${{ secrets.GITHUB_TOKEN }} 23 | - uses: actions/upload-artifact@v3 24 | with: 25 | name: cross-${{ matrix.platform }} 26 | path: ${{ steps.cross.outputs.install_path }} 27 | strategy: 28 | matrix: 29 | platform: [linux-musl, apple-darwin] 30 | 31 | windows: 32 | runs-on: windows-latest 33 | # Windows technically doesn't need this, but if we don't block windows on it 34 | # some of the windows jobs could fill up the concurrent job queue before 35 | # one of the install-cross jobs has started, so this makes sure all 36 | # artifacts are downloaded first. 37 | needs: install-cross 38 | steps: 39 | - uses: actions/checkout@v3 40 | with: 41 | fetch-depth: 50 42 | - run: ci/set_rust_version.bash ${{ matrix.channel }} ${{ matrix.target }} 43 | shell: bash 44 | - run: ci/build.bash cargo ${{ matrix.target }} 45 | shell: bash 46 | 47 | strategy: 48 | fail-fast: true 49 | matrix: 50 | channel: [nightly] 51 | target: 52 | - i686-pc-windows-msvc 53 | - x86_64-pc-windows-msvc 54 | 55 | macos: 56 | runs-on: macos-latest 57 | needs: install-cross 58 | steps: 59 | - uses: actions/checkout@v3 60 | with: 61 | fetch-depth: 50 62 | 63 | - uses: actions/download-artifact@v3 64 | with: 65 | name: cross-apple-darwin 66 | path: /usr/local/bin/ 67 | 68 | - run: chmod +x /usr/local/bin/cross 69 | - run: ci/set_rust_version.bash ${{ matrix.channel }} ${{ matrix.target }} 70 | - run: ci/build.bash cross ${{ matrix.target }} 71 | 72 | strategy: 73 | fail-fast: true 74 | matrix: 75 | channel: [nightly] 76 | target: 77 | - x86_64-apple-darwin 78 | 79 | linux: 80 | runs-on: ubuntu-latest 81 | needs: install-cross 82 | steps: 83 | - uses: actions/checkout@v3 84 | with: 85 | fetch-depth: 50 86 | 87 | - name: Download Cross 88 | uses: actions/download-artifact@v3 89 | with: 90 | name: cross-linux-musl 91 | path: /tmp/ 92 | - run: chmod +x /tmp/cross 93 | - run: ci/set_rust_version.bash ${{ matrix.channel }} ${{ matrix.target }} 94 | - run: ci/build.bash /tmp/cross ${{ matrix.target }} 95 | - run: ci/test.bash /tmp/cross ${{ matrix.target }} 96 | if: | 97 | !contains(matrix.target, 'android') && 98 | !contains(matrix.target, 'bsd') && 99 | !contains(matrix.target, 'solaris') && 100 | !contains(matrix.target, 'netbsd') && 101 | contains(matrix.target, 'x86_64') 102 | 103 | strategy: 104 | fail-fast: true 105 | matrix: 106 | channel: [nightly] 107 | target: 108 | - i686-unknown-linux-gnu 109 | - i686-unknown-linux-musl 110 | - x86_64-unknown-linux-gnu 111 | - x86_64-unknown-linux-musl 112 | - i686-linux-android 113 | - x86_64-linux-android 114 | - x86_64-unknown-netbsd 115 | -------------------------------------------------------------------------------- /.github/workflows/standard.yml: -------------------------------------------------------------------------------- 1 | name: Standard checks 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | ci: 11 | runs-on: ubuntu-latest 12 | strategy: 13 | matrix: 14 | rust: 15 | # - stable 16 | # - beta 17 | - nightly 18 | # - 1.31.0 # MSRV 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | 23 | - uses: actions-rs/toolchain@v1.0.7 24 | with: 25 | profile: minimal 26 | toolchain: ${{ matrix.rust }} 27 | override: true 28 | components: rustfmt 29 | 30 | - uses: actions-rs/cargo@v1.0.3 31 | with: 32 | command: build 33 | 34 | - uses: actions-rs/cargo@v1.0.3 35 | with: 36 | command: build 37 | args: --all-features 38 | 39 | - uses: actions-rs/cargo@v1.0.3 40 | with: 41 | command: test 42 | 43 | - uses: actions-rs/cargo@v1.0.3 44 | with: 45 | command: test 46 | args: --features performance-counter # --all-features will currently fail 47 | 48 | - uses: actions-rs/cargo@v1.0.3 49 | with: 50 | command: fmt 51 | args: --all -- --check 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled files 2 | *.o 3 | *.so 4 | *.rlib 5 | *.dll 6 | 7 | # Executables 8 | *.exe 9 | 10 | # Generated by Cargo 11 | /target/ 12 | Cargo.lock 13 | .vscode 14 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), 6 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). 7 | 8 | ## [unreleased] 9 | 10 | - Fix error in page-table documentation. 11 | 12 | ## [0.52.0] - 2022-10-18 13 | 14 | - Add user-defined, hardware ignored bits to page-table flags. 15 | 16 | ## [0.51.0] - 2022-07-15 17 | 18 | - Implement `core::iter::Step` for PAddr, VAddr, IOAddr types. This currently 19 | requires nightly so added a `unstable` Cargo feature to enable it 20 | conditionally. 21 | 22 | ## [0.50.0] - 2022-06-29 23 | 24 | - `rdtscp` now returns a tuple in the form of `(cycles: u64, aux: u32)`, where 25 | `cycles` is the cycle count (as returned by this function in previous 26 | versions) and `aux` is the value of `IA32_TSC_AUX` -- which also gets read-out 27 | by `rdtscp`. If one prefers to use the old signature, the recommendation is to 28 | replace calls for `x86::time::rdtscp` with `core::arch::x86_64::__rdtscp`. 29 | Fixes #124. 30 | 31 | ## [0.49.0] - 2022-06-03 32 | 33 | - Removed `x86::its64::segmentation::fs_deref()`: Users should replace calls to 34 | `fs_deref` with the more general `x86::bits64::segmentation::fs_deref!` macro. 35 | `fs_deref!(0)` is equivalent to `fs_deref()`. 36 | - Removed `x86::bits64::segmentation::gs_deref()`: Users should replace calls to 37 | `gs_deref` with the more general `x86::bits64::segmentation::gs_deref!` macro. 38 | `gs_deref!(0)` is equivalent to `gs_deref()`. 39 | 40 | ## [0.48.0] - 2022-05-23 41 | 42 | - Added `const new` constructor for X2APIC struct 43 | - Use fully qualified `asm!` import for `int!` macro so clients do no longer 44 | need to import `asm!` themselves. 45 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "x86" 3 | version = "0.52.0" 4 | authors = [ 5 | "Gerd Zellweger ", 6 | "Eric Kidd ", 7 | "Philipp Oppermann ", 8 | "Dan Schatzberg ", 9 | "John Ericson ", 10 | "Rex Lunae ", 11 | "Brian Martin ", 12 | "Caleb Boylan ", 13 | "Nikolay Edigaryev ", 14 | "Stefan Lankes ", 15 | "Jonathan Klimt ", 16 | "Jens Breitbart ", 17 | "Reto Achermann ", 18 | "lilasta ", 19 | "Vikram Narayanan ", 20 | "Dan Cross ", 21 | "Yuekai Jia ", 22 | "Lucas Kent " 23 | ] 24 | 25 | description = "Library to program x86 (amd64) hardware. Contains x86 specific data structure descriptions, data-tables, as well as convenience function to call assembly instructions typically not exposed in higher level languages." 26 | 27 | homepage = "https://github.com/gz/rust-x86" 28 | repository = "https://github.com/gz/rust-x86" 29 | documentation = "https://docs.rs/x86" 30 | readme = "README.md" 31 | keywords = ["ia32", "os", "amd64", "x86", "x86-64"] 32 | license = "MIT" 33 | build = "build.rs" 34 | edition = '2018' 35 | 36 | exclude = ["./.github/*"] 37 | 38 | [features] 39 | performance-counter = ["phf", "phf_codegen", "csv", "serde_json"] 40 | # Note we have to choose between regular tests and x86test at the moment, so we use features 41 | # (limitation in https://github.com/rust-lang/rust/issues/50297) 42 | # Run user-space tests, i.e. regular #[test] 43 | utest = [] 44 | # Run VM tests, i.e., the #[x86test] ones 45 | vmtest = [] 46 | unstable = [] 47 | 48 | [[test]] 49 | name = "kvm" 50 | path = "tests/kvm/bin.rs" 51 | 52 | [build-dependencies] 53 | phf_codegen = { version = "0.9.0", optional = true } 54 | csv = { version = "1.1.5", optional = true } 55 | serde_json = { version = "1.0.61", optional = true } 56 | 57 | [dependencies] 58 | bitflags = "1.*" 59 | bit_field = "0.10.1" 60 | raw-cpuid = "10.2.0" 61 | 62 | [dependencies.phf] 63 | version = "0.9.0" 64 | default-features = false # So phf uses libcore instead of libstd 65 | optional = true 66 | 67 | [target.'cfg(target_family = "unix")'.dev-dependencies] 68 | klogger = { version = "0.0.14", features = ["use_ioports"] } 69 | x86test = { path = "x86test" } 70 | libc = "0.2.*" 71 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Gerd Zellweger 4 | Copyright (c) 2015 The libcpu Developers 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in 14 | all copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 22 | THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # x86 / amd64 library [![Crates.io](https://img.shields.io/crates/v/x86.svg)](https://crates.io/crates/x86) [![docs.rs/x86](https://docs.rs/x86/badge.svg)](https://docs.rs/crate/x86/) ![Standard checks](https://github.com/gz/rust-x86/actions/workflows/standard.yml/badge.svg) 2 | 3 | Library to program x86 (amd64) hardware. Contains x86 specific data structure descriptions, data-tables, as well as convenience function to call assembly instructions typically not exposed in higher level languages. 4 | 5 | Currently supports: 6 | 7 | * I/O registers 8 | * Control registers 9 | * Debug registers 10 | * MSR registers 11 | * Segmentation 12 | * Descriptor-tables (GDT, LDT, IDT) 13 | * IA32-e page table layout 14 | * Interrupts (with xAPIC and x2APIC, I/O APIC drivers) 15 | * Task state 16 | * Performance counter information 17 | * Intel SGX: Software Guard Extensions 18 | * Random numbers (rdrand, rdseed) 19 | * Time (rdtsc, rdtscp) 20 | * Querying CPUID (uses [raw_cpuid](https://github.com/gz/rust-cpuid) library) 21 | * Transactional memory (Intel RTM and HLE) 22 | * Virtualization (Intel VMX) 23 | 24 | This library depends on libcore so it can be used in kernel level code. 25 | 26 | ## Testing 27 | 28 | We use two forms of tests for the crate. Regular tests with `#[test]` that run in a ring 3 process 29 | and `#[x86test]` tests that run in a VM (and therefore grant a privileged execution environment, see [x86test](https://github.com/gz/rust-x86/tree/master/x86test)). 30 | 31 | ```bash 32 | # To execute x86tests run: 33 | $ RUSTFLAGS="-C relocation-model=dynamic-no-pic -C code-model=kernel" RUST_BACKTRACE=1 cargo test --features vmtest 34 | 35 | # To execute the regular tests, run: 36 | $ cargo test --features utest 37 | ``` 38 | 39 | ## Features 40 | 41 | * performance-counter: Includes the performance counter information. Note this feature 42 | can increase compilation time significantly due to large, statically generated hash-tables 43 | that are included in the source. Therefore, it is disabled by default. 44 | 45 | ## Documentation 46 | 47 | * [API Documentation](https://docs.rs/crate/x86/) 48 | -------------------------------------------------------------------------------- /ci/build.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Script for building your rust projects. 3 | set -e 4 | 5 | source ci/common.bash 6 | 7 | # $1 {path} = Path to cross/cargo executable 8 | CROSS=$1 9 | # $1 {string} = e.g. x86_64-pc-windows-msvc 10 | TARGET_TRIPLE=$2 11 | # $3 {boolean} = Are we building for deployment? 12 | RELEASE_BUILD=$3 13 | 14 | required_arg $CROSS 'CROSS' 15 | required_arg $TARGET_TRIPLE '' 16 | 17 | if [ -z "$RELEASE_BUILD" ]; then 18 | $CROSS build --target $TARGET_TRIPLE 19 | $CROSS build --target $TARGET_TRIPLE --all-features 20 | else 21 | $CROSS build --target $TARGET_TRIPLE --all-features --release 22 | fi 23 | 24 | -------------------------------------------------------------------------------- /ci/common.bash: -------------------------------------------------------------------------------- 1 | required_arg() { 2 | if [ -z "$1" ]; then 3 | echo "Required argument $2 missing" 4 | exit 1 5 | fi 6 | } 7 | -------------------------------------------------------------------------------- /ci/set_rust_version.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | rustup default $1 4 | rustup target add $2 5 | -------------------------------------------------------------------------------- /ci/test.bash: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Script for building your rust projects. 3 | set -e 4 | 5 | source ci/common.bash 6 | 7 | # $1 {path} = Path to cross/cargo executable 8 | CROSS=$1 9 | # $1 {string} = 10 | TARGET_TRIPLE=$2 11 | 12 | required_arg $CROSS 'CROSS' 13 | required_arg $TARGET_TRIPLE '' 14 | 15 | $CROSS test --target $TARGET_TRIPLE --features utest 16 | -------------------------------------------------------------------------------- /rust-toolchain: -------------------------------------------------------------------------------- 1 | nightly 2 | -------------------------------------------------------------------------------- /src/apic/ioapic.rs: -------------------------------------------------------------------------------- 1 | //! To control an I/O APIC. 2 | //! 3 | //! The IO APIC routes hardware interrupts to a local APIC. 4 | //! 5 | //! Figuring out which (bus,dev,fun,vector) maps to which I/O APIC 6 | //! entry can be a pain. 7 | 8 | use bit_field::BitField; 9 | use bitflags::bitflags; 10 | 11 | bitflags! { 12 | /// The redirection table starts at REG_TABLE and uses 13 | /// two registers to configure each interrupt. 14 | /// The first (low) register in a pair contains configuration bits. 15 | /// The second (high) register contains a bitmask telling which 16 | /// CPUs can serve that interrupt. 17 | struct RedirectionEntry: u32 { 18 | /// Interrupt disabled 19 | const DISABLED = 0x00010000; 20 | /// Level-triggered (vs edge) 21 | const LEVEL = 0x00008000; 22 | /// Active low (vs high) 23 | const ACTIVELOW = 0x00002000; 24 | /// Destination is CPU id (vs APIC ID) 25 | const LOGICAL = 0x00000800; 26 | /// None 27 | const NONE = 0x00000000; 28 | } 29 | } 30 | 31 | pub struct IoApic { 32 | reg: *mut u32, 33 | data: *mut u32, 34 | } 35 | 36 | impl IoApic { 37 | /// Instantiate a new IoApic. 38 | /// 39 | /// # Safety 40 | /// `addr` must point to the base of the IoApic. 41 | pub unsafe fn new(addr: usize) -> Self { 42 | IoApic { 43 | reg: addr as *mut u32, 44 | data: (addr + 0x10) as *mut u32, 45 | } 46 | } 47 | pub fn disable_all(&mut self) { 48 | // Mark all interrupts edge-triggered, active high, disabled, 49 | // and not routed to any CPUs. 50 | for i in 0..self.supported_interrupts() { 51 | self.write_irq(i, RedirectionEntry::DISABLED, 0); 52 | } 53 | } 54 | 55 | unsafe fn read(&mut self, reg: u8) -> u32 { 56 | self.reg.write_volatile(reg as u32); 57 | self.data.read_volatile() 58 | } 59 | 60 | unsafe fn write(&mut self, reg: u8, data: u32) { 61 | self.reg.write_volatile(reg as u32); 62 | self.data.write_volatile(data); 63 | } 64 | 65 | fn write_irq(&mut self, irq: u8, flags: RedirectionEntry, dest: u8) { 66 | unsafe { 67 | self.write(REG_TABLE + 2 * irq, (T_IRQ0 + irq) as u32 | flags.bits()); 68 | self.write(REG_TABLE + 2 * irq + 1, (dest as u32) << 24); 69 | } 70 | } 71 | 72 | pub fn enable(&mut self, irq: u8, cpunum: u8) { 73 | // Mark interrupt edge-triggered, active high, 74 | // enabled, and routed to the given cpunum, 75 | // which happens to be that cpu's APIC ID. 76 | self.write_irq(irq, RedirectionEntry::NONE, cpunum); 77 | } 78 | 79 | pub fn id(&mut self) -> u8 { 80 | unsafe { self.read(REG_ID).get_bits(24..28) as u8 } 81 | } 82 | 83 | pub fn version(&mut self) -> u8 { 84 | unsafe { self.read(REG_VER).get_bits(0..8) as u8 } 85 | } 86 | 87 | /// Number of supported interrupts by this IO APIC. 88 | /// 89 | /// Max Redirection Entry = "how many IRQs can this I/O APIC handle - 1" 90 | /// The -1 is silly so we add one back to it. 91 | pub fn supported_interrupts(&mut self) -> u8 { 92 | unsafe { (self.read(REG_VER).get_bits(16..24) + 1) as u8 } 93 | } 94 | } 95 | 96 | /// Register index: ID 97 | const REG_ID: u8 = 0x00; 98 | 99 | /// Register index: version 100 | const REG_VER: u8 = 0x01; 101 | 102 | /// Redirection table base 103 | const REG_TABLE: u8 = 0x10; 104 | 105 | const T_IRQ0: u8 = 32; 106 | -------------------------------------------------------------------------------- /src/bits16/mod.rs: -------------------------------------------------------------------------------- 1 | //! Data structures and functions used by 16-bit mode. 2 | 3 | pub mod segmentation; 4 | -------------------------------------------------------------------------------- /src/bits16/segmentation.rs: -------------------------------------------------------------------------------- 1 | use crate::segmentation::{ 2 | DescriptorBuilder, DescriptorType, GateDescriptorBuilder, SegmentSelector, 3 | SystemDescriptorTypes32, 4 | }; 5 | 6 | impl GateDescriptorBuilder for DescriptorBuilder { 7 | fn tss_descriptor(base: u64, limit: u64, available: bool) -> DescriptorBuilder { 8 | let typ = match available { 9 | true => DescriptorType::System32(SystemDescriptorTypes32::TSSAvailable16), 10 | false => DescriptorType::System32(SystemDescriptorTypes32::TSSBusy16), 11 | }; 12 | 13 | DescriptorBuilder::with_base_limit(base, limit).set_type(typ) 14 | } 15 | 16 | fn call_gate_descriptor(selector: SegmentSelector, offset: u16) -> DescriptorBuilder { 17 | DescriptorBuilder::with_selector_offset(selector, offset.into()).set_type( 18 | DescriptorType::System32(SystemDescriptorTypes32::CallGate16), 19 | ) 20 | } 21 | 22 | fn interrupt_descriptor(selector: SegmentSelector, offset: u16) -> DescriptorBuilder { 23 | DescriptorBuilder::with_selector_offset(selector, offset.into()).set_type( 24 | DescriptorType::System32(SystemDescriptorTypes32::InterruptGate16), 25 | ) 26 | } 27 | 28 | fn trap_gate_descriptor(selector: SegmentSelector, offset: u16) -> DescriptorBuilder { 29 | DescriptorBuilder::with_selector_offset(selector, offset.into()).set_type( 30 | DescriptorType::System32(SystemDescriptorTypes32::TrapGate16), 31 | ) 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/bits32/eflags.rs: -------------------------------------------------------------------------------- 1 | //! Processor state stored in the EFLAGS register. 2 | 3 | use bitflags::*; 4 | 5 | use crate::Ring; 6 | use core::arch::asm; 7 | 8 | bitflags! { 9 | /// The EFLAGS register. 10 | pub struct EFlags: u32 { 11 | /// ID Flag (ID) 12 | const FLAGS_ID = 1 << 21; 13 | /// Virtual Interrupt Pending (VIP) 14 | const FLAGS_VIP = 1 << 20; 15 | /// Virtual Interrupt Flag (VIF) 16 | const FLAGS_VIF = 1 << 19; 17 | /// Alignment Check (AC) 18 | const FLAGS_AC = 1 << 18; 19 | /// Virtual-8086 Mode (VM) 20 | const FLAGS_VM = 1 << 17; 21 | /// Resume Flag (RF) 22 | const FLAGS_RF = 1 << 16; 23 | /// Nested Task (NT) 24 | const FLAGS_NT = 1 << 14; 25 | /// I/O Privilege Level (IOPL) 0 26 | const FLAGS_IOPL0 = 0b00 << 12; 27 | /// I/O Privilege Level (IOPL) 1 28 | const FLAGS_IOPL1 = 0b01 << 12; 29 | /// I/O Privilege Level (IOPL) 2 30 | const FLAGS_IOPL2 = 0b10 << 12; 31 | /// I/O Privilege Level (IOPL) 3 32 | const FLAGS_IOPL3 = 0b11 << 12; 33 | /// Overflow Flag (OF) 34 | const FLAGS_OF = 1 << 11; 35 | /// Direction Flag (DF) 36 | const FLAGS_DF = 1 << 10; 37 | /// Interrupt Enable Flag (IF) 38 | const FLAGS_IF = 1 << 9; 39 | /// Trap Flag (TF) 40 | const FLAGS_TF = 1 << 8; 41 | /// Sign Flag (SF) 42 | const FLAGS_SF = 1 << 7; 43 | /// Zero Flag (ZF) 44 | const FLAGS_ZF = 1 << 6; 45 | /// Auxiliary Carry Flag (AF) 46 | const FLAGS_AF = 1 << 4; 47 | /// Parity Flag (PF) 48 | const FLAGS_PF = 1 << 2; 49 | /// Bit 1 is always 1. 50 | const FLAGS_A1 = 1 << 1; 51 | /// Carry Flag (CF) 52 | const FLAGS_CF = 1 << 0; 53 | } 54 | } 55 | 56 | impl EFlags { 57 | /// Creates a new Flags entry. Ensures bit 1 is set. 58 | pub const fn new() -> EFlags { 59 | EFlags::FLAGS_A1 60 | } 61 | 62 | /// Creates a new Flags with the given I/O privilege level. 63 | pub const fn from_priv(iopl: Ring) -> EFlags { 64 | EFlags { 65 | bits: (iopl as u32) << 12, 66 | } 67 | } 68 | } 69 | 70 | #[cfg(target_arch = "x86")] 71 | #[inline(always)] 72 | pub unsafe fn read() -> EFlags { 73 | let r: u32; 74 | asm!("pushfl; popl {0}", out(reg) r, options(att_syntax)); 75 | EFlags::from_bits_truncate(r) 76 | } 77 | 78 | #[cfg(target_arch = "x86")] 79 | #[inline(always)] 80 | pub unsafe fn set(val: EFlags) { 81 | asm!("pushl {0}; popfl", in(reg) val.bits(), options(att_syntax)); 82 | } 83 | 84 | /// Clears the AC flag bit in EFLAGS register. 85 | /// 86 | /// This disables any alignment checking of user-mode data accesses. 87 | /// If the SMAP bit is set in the CR4 register, this disallows 88 | /// explicit supervisor-mode data accesses to user-mode pages. 89 | /// 90 | /// # Safety 91 | /// 92 | /// This instruction is only valid in Ring 0 and requires 93 | /// that the CPU supports the instruction (check CPUID). 94 | #[inline(always)] 95 | pub unsafe fn clac() { 96 | asm!("clac"); 97 | } 98 | 99 | /// Sets the AC flag bit in EFLAGS register. 100 | /// 101 | /// This may enable alignment checking of user-mode data accesses. 102 | /// This allows explicit supervisor-mode data accesses to user-mode 103 | /// pages even if the SMAP bit is set in the CR4 register. 104 | /// 105 | /// # Safety 106 | /// 107 | /// This instruction is only valid in Ring 0 and requires 108 | /// that the CPU supports the instruction (check CPUID). 109 | #[inline(always)] 110 | pub unsafe fn stac() { 111 | asm!("stac"); 112 | } 113 | -------------------------------------------------------------------------------- /src/bits32/mod.rs: -------------------------------------------------------------------------------- 1 | //! Data structures and functions used by 32-bit mode. 2 | 3 | pub mod eflags; 4 | pub mod paging; 5 | pub mod segmentation; 6 | pub mod task; 7 | 8 | #[cfg(target_arch = "x86")] 9 | use core::arch::asm; 10 | 11 | #[cfg(target_arch = "x86")] 12 | #[inline(always)] 13 | pub unsafe fn stack_jmp(stack: *mut (), ip: *const ()) -> ! { 14 | asm!("movl {0}, %esp; jmp {1}", in(reg) stack, in(reg) ip, options(att_syntax)); 15 | loop {} 16 | } 17 | -------------------------------------------------------------------------------- /src/bits32/segmentation.rs: -------------------------------------------------------------------------------- 1 | #[allow(unused_imports)] 2 | use crate::segmentation::SegmentSelector; 3 | 4 | #[cfg(target_arch = "x86")] 5 | use core::arch::asm; 6 | 7 | /// Reload code segment register. 8 | /// Note this is special since we can not directly move 9 | /// to %cs. Instead we push the new segment selector 10 | /// and return value on the stack and use lretl 11 | /// to reload cs and continue at 1:. 12 | #[cfg(target_arch = "x86")] 13 | pub unsafe fn load_cs(sel: SegmentSelector) { 14 | asm!("pushl {0}; \ 15 | pushl $1f; \ 16 | lretl; \ 17 | 1:", in(reg) sel.bits() as u32, options(att_syntax)); 18 | } 19 | -------------------------------------------------------------------------------- /src/bits32/task.rs: -------------------------------------------------------------------------------- 1 | //! Helpers to program the task state segment. 2 | //! See Intel 3a, Chapter 7 3 | 4 | use core::mem::size_of; 5 | 6 | #[derive(Copy, Clone, Debug)] 7 | #[repr(C, packed)] 8 | pub struct TaskStateSegment { 9 | pub link: u16, 10 | reserved0: u16, 11 | pub esp0: u32, 12 | pub ss0: u16, 13 | reserved1: u16, 14 | pub esp1: u32, 15 | pub ss1: u16, 16 | reserved2: u16, 17 | pub esp2: u32, 18 | pub ss2: u16, 19 | reserved3: u16, 20 | 21 | pub cr3: u32, 22 | pub eip: u32, 23 | pub eflags: u32, 24 | 25 | pub eax: u32, 26 | pub ecx: u32, 27 | pub edx: u32, 28 | pub ebx: u32, 29 | pub esp: u32, 30 | pub ebp: u32, 31 | pub esi: u32, 32 | pub edi: u32, 33 | 34 | pub es: u16, 35 | reserved4: u16, 36 | pub cs: u16, 37 | reserved5: u16, 38 | pub ss: u16, 39 | reserved6: u16, 40 | pub ds: u16, 41 | reserved7: u16, 42 | pub fs: u16, 43 | reserved8: u16, 44 | pub gs: u16, 45 | reserved9: u16, 46 | pub ldtr: u16, 47 | reserved10: u32, 48 | pub iobp_offset: u16, 49 | } 50 | 51 | impl TaskStateSegment { 52 | pub const fn new() -> TaskStateSegment { 53 | TaskStateSegment { 54 | link: 0, 55 | reserved0: 0, 56 | esp0: 0, 57 | ss0: 0, 58 | reserved1: 0, 59 | esp1: 0, 60 | ss1: 0, 61 | reserved2: 0, 62 | esp2: 0, 63 | ss2: 0, 64 | reserved3: 0, 65 | cr3: 0, 66 | eip: 0, 67 | eflags: 0, 68 | eax: 0, 69 | ecx: 0, 70 | edx: 0, 71 | ebx: 0, 72 | esp: 0, 73 | ebp: 0, 74 | esi: 0, 75 | edi: 0, 76 | es: 0, 77 | reserved4: 0, 78 | cs: 0, 79 | reserved5: 0, 80 | ss: 0, 81 | reserved6: 0, 82 | ds: 0, 83 | reserved7: 0, 84 | fs: 0, 85 | reserved8: 0, 86 | gs: 0, 87 | reserved9: 0, 88 | ldtr: 0, 89 | reserved10: 0, 90 | iobp_offset: size_of::() as u16, 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/bits64/mod.rs: -------------------------------------------------------------------------------- 1 | //! Data structures and functions used by IA-32e but not Protected Mode. 2 | 3 | pub mod paging; 4 | #[cfg(target_arch = "x86_64")] 5 | pub mod registers; 6 | pub mod rflags; 7 | pub mod segmentation; 8 | #[cfg(target_arch = "x86_64")] 9 | pub mod sgx; 10 | pub mod syscall; 11 | pub mod task; 12 | #[cfg(target_arch = "x86_64")] 13 | pub mod vmx; 14 | -------------------------------------------------------------------------------- /src/bits64/registers.rs: -------------------------------------------------------------------------------- 1 | use core::arch::asm; 2 | 3 | /// Read the RIP register (instruction pointer). 4 | #[inline(always)] 5 | pub fn rip() -> u64 { 6 | let rip: u64; 7 | unsafe { 8 | asm!("leaq 0(%rip), {0}", out(reg) rip, options(att_syntax)); 9 | } 10 | rip 11 | } 12 | 13 | /// Read the RSP register (stack pointer register). 14 | #[inline(always)] 15 | pub fn rsp() -> u64 { 16 | let rsp: u64; 17 | unsafe { 18 | asm!("mov %rsp, {0}", out(reg) rsp, options(att_syntax)); 19 | } 20 | rsp 21 | } 22 | 23 | /// Read the RBP register (base pointer register). 24 | #[inline(always)] 25 | pub fn rbp() -> u64 { 26 | let rbp: u64; 27 | unsafe { 28 | asm!("mov %rbp, {0}", out(reg) rbp, options(att_syntax)); 29 | } 30 | rbp 31 | } 32 | -------------------------------------------------------------------------------- /src/bits64/rflags.rs: -------------------------------------------------------------------------------- 1 | //! Processor state stored in the RFLAGS register. 2 | //! 3 | //! In 64-bit mode, EFLAGS is extended to 64 bits and called RFLAGS. 4 | //! The upper 32 bits of RFLAGS register is reserved. 5 | //! The lower 32 bits of RFLAGS is the same as EFLAGS. 6 | 7 | use bitflags::*; 8 | 9 | use crate::Ring; 10 | 11 | #[cfg(target_arch = "x86_64")] 12 | use core::arch::asm; 13 | 14 | bitflags! { 15 | /// The RFLAGS register. 16 | /// This is duplicated code from bits32 eflags.rs. 17 | pub struct RFlags: u64 { 18 | /// ID Flag (ID) 19 | const FLAGS_ID = 1 << 21; 20 | /// Virtual Interrupt Pending (VIP) 21 | const FLAGS_VIP = 1 << 20; 22 | /// Virtual Interrupt Flag (VIF) 23 | const FLAGS_VIF = 1 << 19; 24 | /// Alignment Check (AC) 25 | const FLAGS_AC = 1 << 18; 26 | /// Virtual-8086 Mode (VM) 27 | const FLAGS_VM = 1 << 17; 28 | /// Resume Flag (RF) 29 | const FLAGS_RF = 1 << 16; 30 | /// Nested Task (NT) 31 | const FLAGS_NT = 1 << 14; 32 | /// I/O Privilege Level (IOPL) 0 33 | const FLAGS_IOPL0 = 0b00 << 12; 34 | /// I/O Privilege Level (IOPL) 1 35 | const FLAGS_IOPL1 = 0b01 << 12; 36 | /// I/O Privilege Level (IOPL) 2 37 | const FLAGS_IOPL2 = 0b10 << 12; 38 | /// I/O Privilege Level (IOPL) 3 39 | const FLAGS_IOPL3 = 0b11 << 12; 40 | /// Overflow Flag (OF) 41 | const FLAGS_OF = 1 << 11; 42 | /// Direction Flag (DF) 43 | const FLAGS_DF = 1 << 10; 44 | /// Interrupt Enable Flag (IF) 45 | const FLAGS_IF = 1 << 9; 46 | /// Trap Flag (TF) 47 | const FLAGS_TF = 1 << 8; 48 | /// Sign Flag (SF) 49 | const FLAGS_SF = 1 << 7; 50 | /// Zero Flag (ZF) 51 | const FLAGS_ZF = 1 << 6; 52 | /// Auxiliary Carry Flag (AF) 53 | const FLAGS_AF = 1 << 4; 54 | /// Parity Flag (PF) 55 | const FLAGS_PF = 1 << 2; 56 | /// Bit 1 is always 1. 57 | const FLAGS_A1 = 1 << 1; 58 | /// Carry Flag (CF) 59 | const FLAGS_CF = 1 << 0; 60 | } 61 | } 62 | 63 | impl RFlags { 64 | /// Creates a new Flags entry. Ensures bit 1 is set. 65 | pub const fn new() -> RFlags { 66 | RFlags::FLAGS_A1 67 | } 68 | 69 | /// Creates a new Flags with the given I/O privilege level. 70 | pub const fn from_priv(iopl: Ring) -> RFlags { 71 | RFlags { 72 | bits: (iopl as u64) << 12, 73 | } 74 | } 75 | 76 | pub const fn from_raw(bits: u64) -> RFlags { 77 | RFlags { bits } 78 | } 79 | } 80 | 81 | #[cfg(target_arch = "x86_64")] 82 | #[inline(always)] 83 | pub fn read() -> RFlags { 84 | let r: u64; 85 | unsafe { asm!("pushfq; popq {0}", out(reg) r, options(att_syntax)) }; 86 | RFlags::from_bits_truncate(r) 87 | } 88 | 89 | #[cfg(target_arch = "x86_64")] 90 | #[inline(always)] 91 | pub fn set(val: RFlags) { 92 | unsafe { 93 | asm!("pushq {0}; popfq", in(reg) val.bits(), options(att_syntax)); 94 | } 95 | } 96 | 97 | // clac and stac are also usable in 64-bit mode 98 | pub use crate::bits32::eflags::{clac, stac}; 99 | -------------------------------------------------------------------------------- /src/bits64/task.rs: -------------------------------------------------------------------------------- 1 | //! Helpers to program the task state segment. 2 | //! See Intel 3a, Chapter 7, Section 7 3 | 4 | use crate::Ring; 5 | 6 | /// Although hardware task-switching is not supported in 64-bit mode, 7 | /// a 64-bit task state segment (TSS) must exist. 8 | /// 9 | /// The TSS holds information important to 64-bit mode and that is not 10 | /// directly related to the task-switch mechanism. This information includes: 11 | /// 12 | /// # RSPn 13 | /// The full 64-bit canonical forms of the stack pointers (RSP) for privilege levels 0-2. 14 | /// RSPx is loaded in whenever an interrupt causes the CPU to change RPL to x. 15 | /// Note on a syscall entry this field is not used to load a stack, setting the stack there 16 | /// is the handler's responsibility (however when using the int instruction in user-space, 17 | /// we load the stack from RSPn). 18 | /// 19 | /// # ISTn 20 | /// The full 64-bit canonical forms of the interrupt stack table (IST) pointers. 21 | /// You can set an interrupt vector to use an IST entry in the Interrupt Descriptor 22 | /// Table by giving it a number from 0 - 7. If 0 is selected, then the IST mechanism 23 | /// is not used. If any other number is selected then when that interrupt vector is 24 | /// called the CPU will load RSP from the corresponding IST entry. This is useful for 25 | /// handling things like double faults, since you don't have to worry about switching 26 | /// stacks; the CPU will do it for you. 27 | /// 28 | /// # I/O map base address 29 | /// The 16-bit offset to the I/O permission bit map from the 64-bit TSS base. 30 | /// 31 | /// The operating system must create at least one 64-bit TSS after activating IA-32e mode. 32 | /// It must execute the LTR instruction (in 64-bit mode) to load the TR register with a 33 | /// pointer to the 64-bit TSS responsible for both 64-bitmode programs and 34 | /// compatibility-mode programs ([load_tr](crate::task::load_tr)). 35 | #[derive(Clone, Copy, Debug, Default)] 36 | #[repr(C, packed)] 37 | pub struct TaskStateSegment { 38 | pub reserved: u32, 39 | /// The full 64-bit canonical forms of the stack pointers (RSP) for privilege levels 0-2. 40 | pub rsp: [u64; 3], 41 | pub reserved2: u64, 42 | /// The full 64-bit canonical forms of the interrupt stack table (IST) pointers. 43 | pub ist: [u64; 7], 44 | pub reserved3: u64, 45 | pub reserved4: u16, 46 | /// The 16-bit offset to the I/O permission bit map from the 64-bit TSS base. 47 | pub iomap_base: u16, 48 | } 49 | 50 | impl TaskStateSegment { 51 | /// Creates a new empty TSS. 52 | pub const fn new() -> TaskStateSegment { 53 | TaskStateSegment { 54 | reserved: 0, 55 | rsp: [0; 3], 56 | reserved2: 0, 57 | ist: [0; 7], 58 | reserved3: 0, 59 | reserved4: 0, 60 | iomap_base: 0, 61 | } 62 | } 63 | 64 | /// Sets the stack pointer (`stack_ptr`) to be used for when 65 | /// an interrupt causes the CPU to change RPL to `pl`. 66 | pub fn set_rsp(&mut self, pl: Ring, stack_ptr: u64) { 67 | match pl { 68 | Ring::Ring0 => self.rsp[0] = stack_ptr, 69 | Ring::Ring1 => self.rsp[1] = stack_ptr, 70 | Ring::Ring2 => self.rsp[2] = stack_ptr, 71 | Ring::Ring3 => unreachable!("Can't set stack for PL3"), 72 | } 73 | } 74 | 75 | /// Sets the stack pointer (`stack_ptr`) to be used when 76 | /// an interrupt with a corresponding IST entry in the Interrupt 77 | /// Descriptor table pointing to the given `index` is raised. 78 | pub fn set_ist(&mut self, index: usize, stack_ptr: u64) { 79 | match index { 80 | 0 => self.ist[0] = stack_ptr, 81 | 1 => self.ist[1] = stack_ptr, 82 | 2 => self.ist[2] = stack_ptr, 83 | 3 => self.ist[3] = stack_ptr, 84 | 4 => self.ist[4] = stack_ptr, 85 | 5 => self.ist[5] = stack_ptr, 86 | 6 => self.ist[6] = stack_ptr, 87 | _ => unreachable!("Can't set IST for this index (out of bounds)."), 88 | } 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /src/bits64/vmx.rs: -------------------------------------------------------------------------------- 1 | //! Virtualize processor hardware for multiple software environments using Virtual Machine Extensions. 2 | 3 | use crate::bits64::rflags::{self, RFlags}; 4 | use crate::vmx::{Result, VmFail}; 5 | use core::arch::asm; 6 | 7 | /// Helper used to extract VMX-specific Result in accordance with 8 | /// conventions described in Intel SDM, Volume 3C, Section 30.2. 9 | // We inline this to provide an obstruction-free path from this function's 10 | // call site to the moment where `rflags::read()` reads RFLAGS. Otherwise it's 11 | // possible for RFLAGS register to be clobbered by a function prologue, 12 | // see https://github.com/gz/rust-x86/pull/50. 13 | #[inline(always)] 14 | fn vmx_capture_status() -> Result<()> { 15 | let flags = rflags::read(); 16 | 17 | if flags.contains(RFlags::FLAGS_ZF) { 18 | Err(VmFail::VmFailValid) 19 | } else if flags.contains(RFlags::FLAGS_CF) { 20 | Err(VmFail::VmFailInvalid) 21 | } else { 22 | Ok(()) 23 | } 24 | } 25 | 26 | /// Enable VMX operation. 27 | /// 28 | /// `addr` specifies a 4KB-aligned physical address of VMXON region initialized 29 | /// in accordance with Intel SDM, Volume 3C, Section 24.11.5. 30 | /// 31 | /// # Safety 32 | /// Needs CPL 0. 33 | pub unsafe fn vmxon(addr: u64) -> Result<()> { 34 | asm!("vmxon ({0})", in(reg) &addr, options(att_syntax)); 35 | vmx_capture_status() 36 | } 37 | 38 | /// Disable VMX operation. 39 | /// 40 | /// # Safety 41 | /// Needs CPL 0. 42 | pub unsafe fn vmxoff() -> Result<()> { 43 | asm!("vmxoff"); 44 | vmx_capture_status() 45 | } 46 | 47 | /// Clear VMCS. 48 | /// 49 | /// Ensures that VMCS data maintained on the processor is copied to the VMCS region 50 | /// located at 4KB-aligned physical address `addr` and initializes some parts of it. 51 | /// 52 | /// # Safety 53 | /// Needs CPL 0. 54 | pub unsafe fn vmclear(addr: u64) -> Result<()> { 55 | asm!("vmclear ({0})", in(reg) &addr, options(att_syntax)); 56 | vmx_capture_status() 57 | } 58 | 59 | /// Load current VMCS pointer. 60 | /// 61 | /// Marks the current-VMCS pointer valid and loads it with the physical address `addr`. 62 | /// 63 | /// # Safety 64 | /// Needs CPL 0. 65 | pub unsafe fn vmptrld(addr: u64) -> Result<()> { 66 | asm!("vmptrld ({0})", in(reg) &addr, options(att_syntax)); 67 | vmx_capture_status() 68 | } 69 | 70 | /// Return current VMCS pointer. 71 | /// 72 | /// # Safety 73 | /// Needs CPL 0. 74 | pub unsafe fn vmptrst() -> Result { 75 | let value: u64 = 0; 76 | asm!("vmptrst ({0})", in(reg) &value, options(att_syntax)); 77 | vmx_capture_status().and(Ok(value)) 78 | } 79 | 80 | /// Read a specified field from a VMCS. 81 | /// 82 | /// # Safety 83 | /// Needs CPL 0. 84 | pub unsafe fn vmread(field: u32) -> Result { 85 | let field: u64 = field.into(); 86 | let value: u64; 87 | asm!("vmread {0}, {1}", in(reg) field, out(reg) value, options(att_syntax)); 88 | vmx_capture_status().and(Ok(value)) 89 | } 90 | 91 | /// Write to a specified field in a VMCS. 92 | /// 93 | /// # Safety 94 | /// Needs CPL 0. 95 | pub unsafe fn vmwrite(field: u32, value: u64) -> Result<()> { 96 | let field: u64 = field.into(); 97 | asm!("vmwrite {1}, {0}", in(reg) field, in(reg) value, options(att_syntax)); 98 | vmx_capture_status() 99 | } 100 | 101 | /// Launch virtual machine. 102 | /// 103 | /// # Safety 104 | /// Needs CPL 0. 105 | #[inline(always)] 106 | pub unsafe fn vmlaunch() -> Result<()> { 107 | asm!("vmlaunch"); 108 | vmx_capture_status() 109 | } 110 | 111 | /// Resume virtual machine. 112 | /// 113 | /// # Safety 114 | /// Needs CPL 0. 115 | #[inline(always)] 116 | pub unsafe fn vmresume() -> Result<()> { 117 | asm!("vmresume"); 118 | vmx_capture_status() 119 | } 120 | -------------------------------------------------------------------------------- /src/dtables.rs: -------------------------------------------------------------------------------- 1 | //! Functions and data-structures for working with descriptor tables. 2 | use crate::segmentation::SegmentSelector; 3 | use core::arch::asm; 4 | use core::fmt; 5 | use core::mem::size_of; 6 | 7 | /// A struct describing a pointer to a descriptor table (GDT / IDT). 8 | /// This is in a format suitable for giving to 'lgdt' or 'lidt'. 9 | #[repr(C, packed)] 10 | pub struct DescriptorTablePointer { 11 | /// Size of the DT. 12 | pub limit: u16, 13 | /// Pointer to the memory region containing the DT. 14 | pub base: *const Entry, 15 | } 16 | 17 | impl Default for DescriptorTablePointer { 18 | fn default() -> DescriptorTablePointer { 19 | DescriptorTablePointer { 20 | limit: 0, 21 | base: core::ptr::null(), 22 | } 23 | } 24 | } 25 | 26 | impl DescriptorTablePointer { 27 | pub fn new(tbl: &T) -> Self { 28 | // GDT, LDT, and IDT all expect the limit to be set to "one less". 29 | // See Intel 3a, Section 3.5.1 "Segment Descriptor Tables" and 30 | // Section 6.10 "Interrupt Descriptor Table (IDT)". 31 | let len = size_of::() - 1; 32 | assert!(len < 0x10000); 33 | DescriptorTablePointer { 34 | base: tbl as *const T, 35 | limit: len as u16, 36 | } 37 | } 38 | 39 | pub fn new_from_slice(slice: &[T]) -> Self { 40 | // GDT, LDT, and IDT all expect the limit to be set to "one less". 41 | // See Intel 3a, Section 3.5.1 "Segment Descriptor Tables" and 42 | // Section 6.10 "Interrupt Descriptor Table (IDT)". 43 | let len = slice.len() * size_of::() - 1; 44 | assert!(len < 0x10000); 45 | DescriptorTablePointer { 46 | base: slice.as_ptr(), 47 | limit: len as u16, 48 | } 49 | } 50 | } 51 | 52 | impl fmt::Debug for DescriptorTablePointer { 53 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 54 | write!(f, "DescriptorTablePointer ({} {:?})", { self.limit }, { 55 | self.base 56 | }) 57 | } 58 | } 59 | 60 | /// Load the GDTR register with the specified base and limit. 61 | /// 62 | /// # Safety 63 | /// Needs CPL 0. 64 | pub unsafe fn lgdt(gdt: &DescriptorTablePointer) { 65 | asm!("lgdt ({0})", in(reg) gdt, options(att_syntax)); 66 | } 67 | 68 | /// Retrieve base and limit from the GDTR register. 69 | /// 70 | /// # Safety 71 | /// Needs CPL 0. 72 | pub unsafe fn sgdt(idt: &mut DescriptorTablePointer) { 73 | asm!("sgdt ({0})", in(reg) idt as *mut DescriptorTablePointer, options(att_syntax)); 74 | } 75 | 76 | /// Loads the segment selector into the selector field of the local 77 | /// descriptor table register (LDTR). 78 | /// 79 | /// After the segment selector is loaded in the LDTR, 80 | /// the processor uses the segment selector to locate 81 | /// the segment descriptor for the LDT in the global 82 | /// descriptor table (GDT). 83 | /// 84 | /// # Safety 85 | /// Needs CPL 0. 86 | pub unsafe fn load_ldtr(selector: SegmentSelector) { 87 | asm!("lldt {0:x}", in(reg) selector.bits(), options(att_syntax)); 88 | } 89 | 90 | /// Returns the segment selector from the local descriptor table register (LDTR). 91 | /// 92 | /// The returned segment selector points to the segment descriptor 93 | /// (located in the GDT) for the current LDT. 94 | /// 95 | /// # Safety 96 | /// Needs CPL 0. 97 | pub unsafe fn ldtr() -> SegmentSelector { 98 | let selector: u16; 99 | asm!("sldt {0:x}", out(reg) selector, options(att_syntax)); 100 | SegmentSelector::from_raw(selector) 101 | } 102 | 103 | /// Load the IDTR register with the specified base and limit. 104 | /// 105 | /// # Safety 106 | /// Needs CPL 0. 107 | pub unsafe fn lidt(idt: &DescriptorTablePointer) { 108 | asm!("lidt ({0})", in(reg) idt, options(att_syntax)); 109 | } 110 | 111 | /// Retrieve base and limit from the IDTR register. 112 | /// 113 | /// # Safety 114 | /// Needs CPL 0. 115 | pub unsafe fn sidt(idt: &mut DescriptorTablePointer) { 116 | asm!("sidt ({0})", in(reg) idt as *mut DescriptorTablePointer, options(att_syntax)); 117 | } 118 | 119 | #[cfg(all(test, feature = "utest"))] 120 | mod test { 121 | use super::*; 122 | 123 | #[test] 124 | fn check_sgdt() { 125 | let mut gdtr: super::DescriptorTablePointer = Default::default(); 126 | gdtr.limit = 0xdead; 127 | gdtr.base = 0xbadc0de as *mut u64; 128 | unsafe { 129 | sgdt(&mut gdtr); 130 | } 131 | let base = gdtr.base; 132 | let limit = gdtr.limit; 133 | assert_ne!(base, core::ptr::null_mut()); 134 | assert_ne!(limit, 0xdead); 135 | assert_ne!(base as u64, 0xbadc0de); 136 | } 137 | 138 | #[test] 139 | fn check_sidt() { 140 | let mut gdtr: super::DescriptorTablePointer = Default::default(); 141 | gdtr.limit = 0xdead; 142 | gdtr.base = 0xbadc0de as *mut u64; 143 | unsafe { 144 | sidt(&mut gdtr); 145 | } 146 | let base = gdtr.base; 147 | let limit = gdtr.limit; 148 | assert_ne!(base, core::ptr::null_mut()); 149 | assert_ne!(limit, 0xdead); 150 | assert_ne!(base as u64, 0xbadc0de); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/fence.rs: -------------------------------------------------------------------------------- 1 | //! Intel fence instructions 2 | 3 | use core::arch::asm; 4 | 5 | /// mfence -- Memory Fence 6 | /// 7 | /// Performs a serializing operation on all load-from-memory and store-to-memory 8 | /// instructions that were issued prior the MFENCE instruction. 9 | pub fn mfence() { 10 | unsafe { asm!("mfence") }; 11 | } 12 | 13 | /// sfence -- Store Fence 14 | /// 15 | /// Orders processor execution relative to all memory stores prior to the SFENCE 16 | /// instruction. The processor ensures that every store prior to SFENCE is 17 | /// globally visible before any store after SFENCE becomes globally visible. 18 | pub fn sfence() { 19 | unsafe { asm!("sfence") }; 20 | } 21 | 22 | /// lfence -- Load Fence 23 | /// 24 | /// Performs a serializing operation on all load-from-memory instructions that 25 | /// were issued prior the LFENCE instruction. Specifically, LFENCE does not 26 | /// execute until all prior instructions have completed locally, and no later 27 | /// instruction begins execution until LFENCE completes. 28 | pub fn lfence() { 29 | unsafe { asm!("lfence") }; 30 | } 31 | -------------------------------------------------------------------------------- /src/io.rs: -------------------------------------------------------------------------------- 1 | //! I/O port functionality. 2 | 3 | use core::arch::asm; 4 | 5 | /// Write 8 bits to port 6 | /// 7 | /// # Safety 8 | /// Needs IO privileges. 9 | #[inline] 10 | pub unsafe fn outb(port: u16, val: u8) { 11 | asm!("outb %al, %dx", in("al") val, in("dx") port, options(att_syntax)); 12 | } 13 | 14 | /// Read 8 bits from port 15 | /// 16 | /// # Safety 17 | /// Needs IO privileges. 18 | #[inline] 19 | pub unsafe fn inb(port: u16) -> u8 { 20 | let ret: u8; 21 | asm!("inb %dx, %al", in("dx") port, out("al") ret, options(att_syntax)); 22 | ret 23 | } 24 | 25 | /// Write 16 bits to port 26 | /// 27 | /// # Safety 28 | /// Needs IO privileges. 29 | #[inline] 30 | pub unsafe fn outw(port: u16, val: u16) { 31 | asm!("outw %ax, %dx", in("ax") val, in("dx") port, options(att_syntax)); 32 | } 33 | 34 | /// Read 16 bits from port 35 | /// 36 | /// # Safety 37 | /// Needs IO privileges. 38 | #[inline] 39 | pub unsafe fn inw(port: u16) -> u16 { 40 | let ret: u16; 41 | asm!("inw %dx, %ax", in("dx") port, out("ax") ret, options(att_syntax)); 42 | ret 43 | } 44 | 45 | /// Write 32 bits to port 46 | /// 47 | /// # Safety 48 | /// Needs IO privileges. 49 | #[inline] 50 | pub unsafe fn outl(port: u16, val: u32) { 51 | asm!("outl %eax, %dx", in("eax") val, in("dx") port, options(att_syntax)); 52 | } 53 | 54 | /// Read 32 bits from port 55 | /// 56 | /// # Safety 57 | /// Needs IO privileges. 58 | #[inline] 59 | pub unsafe fn inl(port: u16) -> u32 { 60 | let ret: u32; 61 | asm!("inl %dx, %eax", out("eax") ret, in("dx") port, options(att_syntax)); 62 | ret 63 | } 64 | 65 | #[cfg(all(test, feature = "vmtest"))] 66 | mod x86testing { 67 | use super::*; 68 | use x86test::*; 69 | 70 | #[x86test(ioport(0x0, 0xaf))] 71 | fn check_outb() { 72 | unsafe { 73 | outb(0x0, 0xaf); 74 | // hypervisor will fail here if port 0x0 doesn't see 0xaf 75 | } 76 | } 77 | 78 | #[x86test(ioport(0x0, 0xaf))] 79 | #[should_panic] 80 | fn check_outb_wrong_value() { 81 | unsafe { 82 | outb(0x0, 0xff); 83 | } 84 | } 85 | 86 | #[x86test(ioport(0x1, 0xad))] 87 | fn check_inb() { 88 | unsafe { 89 | kassert!( 90 | inb(0x1) == 0xad, 91 | "`inb` instruction didn't read the correct value" 92 | ); 93 | } 94 | } 95 | 96 | #[x86test(ioport(0x2, 0xad))] 97 | #[should_panic] 98 | fn check_inb_wrong_port() { 99 | unsafe { 100 | kassert!( 101 | inb(0x1) == 0xad, 102 | "`inb` instruction didn't read the correct value" 103 | ); 104 | } 105 | } 106 | 107 | #[x86test(ioport(0x2, 0x99))] 108 | fn check_outw() { 109 | unsafe { 110 | super::outw(0x2, 0x99); 111 | // hypervisor will fail here if port 0x2 doesn't see 0x99 112 | } 113 | } 114 | 115 | #[x86test(ioport(0x3, 0xfefe))] 116 | fn check_inw() { 117 | unsafe { 118 | kassert!( 119 | inw(0x3) == 0xfefe, 120 | "`inw` instruction didn't read the correct value" 121 | ); 122 | } 123 | } 124 | 125 | #[x86test(ioport(0x5, 0xbeefaaaa))] 126 | fn check_outl() { 127 | unsafe { 128 | outl(0x5, 0xbeefaaaa); 129 | // hypervisor will fail here if port 0x5 doesn't see 0xbeefaaaa 130 | } 131 | } 132 | 133 | #[x86test(ioport(0x4, 0xdeadbeef))] 134 | fn check_inl() { 135 | unsafe { 136 | kassert!( 137 | inl(0x4) == 0xdeadbeef, 138 | "`inl` instruction didn't read the correct value" 139 | ); 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg(any(target_arch = "x86", target_arch = "x86_64"))] 2 | #![no_std] 3 | #![cfg_attr(test, allow(unused_features))] 4 | #![cfg_attr(all(test, feature = "vmtest"), feature(custom_test_frameworks))] 5 | #![cfg_attr(all(test, feature = "vmtest"), test_runner(x86test::runner::runner))] 6 | #![cfg_attr(feature = "unstable", feature(step_trait))] 7 | 8 | use core::arch::asm; 9 | #[cfg(target_arch = "x86")] 10 | pub(crate) use core::arch::x86 as arch; 11 | #[cfg(target_arch = "x86_64")] 12 | pub(crate) use core::arch::x86_64 as arch; 13 | 14 | macro_rules! bit { 15 | ($x:expr) => { 16 | 1 << $x 17 | }; 18 | } 19 | 20 | pub mod bits16; 21 | pub mod bits32; 22 | pub mod bits64; 23 | 24 | pub mod apic; 25 | pub mod controlregs; 26 | pub mod debugregs; 27 | pub mod dtables; 28 | pub mod fence; 29 | pub mod io; 30 | pub mod irq; 31 | pub mod msr; 32 | pub mod random; 33 | pub mod segmentation; 34 | pub mod task; 35 | pub mod time; 36 | pub mod tlb; 37 | pub mod vmx; 38 | 39 | #[cfg(feature = "performance-counter")] 40 | pub mod perfcnt; 41 | 42 | /// A short-cut to the architecture (bits32 or bits64) this crate was compiled for. 43 | pub mod current { 44 | #[cfg(target_arch = "x86")] 45 | pub use crate::bits32::*; 46 | #[cfg(target_arch = "x86_64")] 47 | pub use crate::bits64::*; 48 | } 49 | 50 | /// Support for the CPUID instructions. 51 | pub mod cpuid { 52 | pub use raw_cpuid::*; 53 | } 54 | 55 | #[cfg(not(test))] 56 | mod std { 57 | pub use core::fmt; 58 | pub use core::ops; 59 | pub use core::option; 60 | } 61 | 62 | #[cfg(all(test, feature = "vmtest"))] 63 | extern crate klogger; 64 | #[cfg(all(test, feature = "vmtest"))] 65 | extern crate x86test; 66 | 67 | #[derive(Copy, Clone, Debug, Eq, PartialEq)] 68 | #[repr(u8)] 69 | /// x86 Protection levels 70 | /// 71 | /// # Note 72 | /// This should not contain values larger than 2 bits, otherwise 73 | /// segment descriptor code needs to be adjusted accordingly. 74 | pub enum Ring { 75 | Ring0 = 0b00, 76 | Ring1 = 0b01, 77 | Ring2 = 0b10, 78 | Ring3 = 0b11, 79 | } 80 | 81 | /// Stops instruction execution and places the processor in a HALT state. 82 | /// 83 | /// An enabled interrupt (including NMI and SMI), a debug exception, the BINIT# 84 | /// signal, the INIT# signal, or the RESET# signal will resume execution. If an 85 | /// interrupt (including NMI) is used to resume execution after a HLT instruction, 86 | /// the saved instruction pointer (CS:EIP) points to the instruction following 87 | /// the HLT instruction. 88 | /// 89 | /// # Safety 90 | /// Will cause a general protection fault if used outside of ring 0. 91 | #[inline(always)] 92 | pub unsafe fn halt() { 93 | asm!("hlt", options(att_syntax, nomem, nostack)); // check if preserves_flags 94 | } 95 | 96 | #[cfg(all(test, feature = "vmtest"))] 97 | mod x86testing { 98 | use super::*; 99 | use x86test::*; 100 | 101 | #[x86test(should_halt)] 102 | fn should_halt() { 103 | unsafe { halt() } 104 | } 105 | 106 | #[x86test] 107 | fn should_not_halt() {} 108 | } 109 | 110 | /// Read Processor ID 111 | /// 112 | /// Reads the value of the IA32_TSC_AUX MSR (address C0000103H) into the 113 | /// destination register. 114 | /// 115 | /// # See also 116 | /// `IA32_TSC_AUX` can also be read calling [`crate::time::rdtscp`]. 117 | /// 118 | /// # Safety 119 | /// May fail with #UD if rdpid is not supported (check CPUID). 120 | #[inline(always)] 121 | pub unsafe fn rdpid() -> u64 { 122 | #[cfg(target_pointer_width = "64")] 123 | let mut pid: u64; 124 | #[cfg(target_pointer_width = "32")] 125 | let mut pid: u32; 126 | asm!("rdpid {pid}", pid = out(reg) pid, options(att_syntax)); 127 | pid.into() 128 | } 129 | 130 | #[cfg(all(test, feature = "utest"))] 131 | mod test { 132 | use super::*; 133 | 134 | #[test] 135 | fn test_rdpid() { 136 | let rdpid_support = cpuid::CpuId::new() 137 | .get_extended_feature_info() 138 | .map_or(false, |finfo| finfo.has_rdpid()); 139 | unsafe { 140 | if rdpid_support { 141 | let pid1 = rdpid(); 142 | let pid2 = rdpid(); 143 | // Let's hope we didn't migrate 144 | assert!(pid1 == pid2, "RDPID not consistent values?"); 145 | } 146 | } 147 | } 148 | } 149 | -------------------------------------------------------------------------------- /src/perfcnt/intel/events.rs: -------------------------------------------------------------------------------- 1 | //! Performance counter for all Intel architectures. 2 | use super::description::{Counter, EventDescription, MSRIndex, PebsType, Tuple}; 3 | /// The content of this file is automatically generated by `build.rs` 4 | /// from the data in `x86data/perfmon_data`. 5 | use phf; 6 | 7 | include!(concat!(env!("OUT_DIR"), "/counters.rs")); 8 | -------------------------------------------------------------------------------- /src/perfcnt/intel/mod.rs: -------------------------------------------------------------------------------- 1 | //! Information about Intel's performance events. 2 | pub mod events; 3 | // The types need to be in a spearate file so we don't get circular 4 | // dependencies with build.rs include: 5 | mod description; 6 | pub use self::description::{Counter, EventDescription, MSRIndex, PebsType, Tuple}; 7 | 8 | use crate::cpuid; 9 | use core::fmt::{Error, Result, Write}; 10 | use core::str; 11 | use phf; 12 | 13 | const MODEL_LEN: usize = 30; 14 | 15 | #[derive(Default)] 16 | struct ModelWriter { 17 | buffer: [u8; MODEL_LEN], 18 | index: usize, 19 | } 20 | 21 | impl ModelWriter { 22 | fn as_str(&self) -> &str { 23 | str::from_utf8(&self.buffer[..self.index]).unwrap() 24 | } 25 | } 26 | 27 | impl Write for ModelWriter { 28 | fn write_str(&mut self, s: &str) -> Result { 29 | // TODO: There exists probably a more efficient way of doing this: 30 | for c in s.chars() { 31 | if self.index >= self.buffer.len() { 32 | return Err(Error); 33 | } 34 | self.buffer[self.index] = c as u8; 35 | self.index += 1; 36 | } 37 | Ok(()) 38 | } 39 | } 40 | 41 | // Format must be a string literal 42 | macro_rules! get_events { 43 | ($format:expr) => {{ 44 | let cpuid = cpuid::CpuId::new(); 45 | 46 | cpuid.get_vendor_info().map_or(None, |vf| { 47 | cpuid.get_feature_info().map_or(None, |fi| { 48 | let vendor = vf.as_str(); 49 | let (family, extended_model, model) = ( 50 | fi.base_family_id(), 51 | fi.extended_model_id(), 52 | fi.base_model_id(), 53 | ); 54 | 55 | let mut writer: ModelWriter = Default::default(); 56 | // Should work as long as it fits in MODEL_LEN bytes: 57 | write!(writer, $format, vendor, family, extended_model, model).unwrap(); 58 | let key = writer.as_str(); 59 | 60 | events::COUNTER_MAP.get(key) 61 | }) 62 | }) 63 | }}; 64 | } 65 | 66 | /// Return all core performance events for the running micro-architecture. 67 | pub fn events() -> Option<&'static phf::Map<&'static str, EventDescription<'static>>> { 68 | // Should be something like: GenuineIntel-6-2C 69 | get_events!("{}-{}-{:X}{:X}") 70 | } 71 | 72 | #[test] 73 | fn events_test() { 74 | // Note: This will silently fail in case the counter is not available. 75 | events().map(|cc| { 76 | cc.get("INST_RETIRED.ANY").map(|p| { 77 | assert!(p.event_name == "INST_RETIRED.ANY"); 78 | }); 79 | }); 80 | } 81 | -------------------------------------------------------------------------------- /src/perfcnt/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod intel; 2 | -------------------------------------------------------------------------------- /src/task.rs: -------------------------------------------------------------------------------- 1 | //! Helpers to program the task state segment. 2 | //! See Intel 3a, Chapter 7 3 | 4 | pub use crate::segmentation; 5 | use core::arch::asm; 6 | 7 | /// Returns the current value of the task register. 8 | /// 9 | /// # Safety 10 | /// Needs CPL 0. 11 | pub unsafe fn tr() -> segmentation::SegmentSelector { 12 | let segment: u16; 13 | asm!("str {0:x}", 14 | out(reg) segment, 15 | options(att_syntax, nostack, nomem, preserves_flags)); 16 | segmentation::SegmentSelector::from_raw(segment) 17 | } 18 | 19 | /// Loads the task register. 20 | /// 21 | /// # Safety 22 | /// Needs CPL 0. 23 | pub unsafe fn load_tr(sel: segmentation::SegmentSelector) { 24 | asm!("ltr {0:x}", 25 | in(reg) sel.bits(), 26 | options(att_syntax, nostack, nomem, preserves_flags)); 27 | } 28 | -------------------------------------------------------------------------------- /src/time.rs: -------------------------------------------------------------------------------- 1 | //! Functions to read time stamp counters on x86. 2 | use core::arch::asm; 3 | 4 | use crate::arch::_rdtsc; 5 | 6 | /// Read the time stamp counter. 7 | /// 8 | /// The RDTSC instruction is not a serializing instruction. 9 | /// It does not necessarily wait until all previous instructions 10 | /// have been executed before reading the counter. Similarly, 11 | /// subsequent instructions may begin execution before the 12 | /// read operation is performed. If software requires RDTSC to be 13 | /// executed only after all previous instructions have completed locally, 14 | /// it can either use RDTSCP or execute the sequence LFENCE;RDTSC. 15 | /// 16 | /// # Safety 17 | /// * Causes a GP fault if the TSD flag in register CR4 is set and the CPL 18 | /// is greater than 0. 19 | pub unsafe fn rdtsc() -> u64 { 20 | _rdtsc() as u64 21 | } 22 | 23 | /// Read the time stamp counter. 24 | /// 25 | /// The RDTSCP instruction waits until all previous instructions have been 26 | /// executed before reading the counter. However, subsequent instructions may 27 | /// begin execution before the read operation is performed. 28 | /// 29 | /// Volatile is used here because the function may be used to act as an 30 | /// instruction barrier. 31 | /// 32 | /// # Returns 33 | /// - The current time stamp counter value of the CPU as a `u64`. 34 | /// - The contents of `IA32_TSC_AUX` on that particular core. This is an OS 35 | /// defined value. For example, Linux writes `numa_id << 12 | core_id` into 36 | /// it. See also [`crate::rdpid`]. 37 | /// 38 | /// # Note 39 | /// One can use `core::arch::x86_64::__rdtscp` from the Rust core library as 40 | /// well. We don't rely on it because it only returns the time-stamp counter of 41 | /// rdtscp and not the contents of `IA32_TSC_AUX`. 42 | /// 43 | /// # Safety 44 | /// * Causes a GP fault if the TSD flag in register CR4 is set and the CPL is 45 | /// greater than 0. 46 | pub unsafe fn rdtscp() -> (u64, u32) { 47 | let eax: u32; 48 | let ecx: u32; 49 | let edx: u32; 50 | asm!( 51 | "rdtscp", 52 | lateout("eax") eax, 53 | lateout("ecx") ecx, 54 | lateout("edx") edx, 55 | options(nomem, nostack) 56 | ); 57 | 58 | let counter: u64 = (edx as u64) << 32 | eax as u64; 59 | (counter, ecx) 60 | } 61 | 62 | #[cfg(all(test, feature = "utest"))] 63 | mod test { 64 | use super::*; 65 | 66 | #[test] 67 | fn check_rdtsc() { 68 | let cpuid = crate::cpuid::CpuId::new(); 69 | let has_tsc = cpuid 70 | .get_feature_info() 71 | .map_or(false, |finfo| finfo.has_tsc()); 72 | 73 | if has_tsc { 74 | unsafe { 75 | assert!(rdtsc() > 0, "rdtsc returned 0, unlikely!"); 76 | } 77 | } 78 | } 79 | 80 | #[test] 81 | fn check_rdtscp() { 82 | let cpuid = crate::cpuid::CpuId::new(); 83 | let has_rdtscp = cpuid 84 | .get_extended_processor_and_feature_identifiers() 85 | .map_or(false, |einfo| einfo.has_rdtscp()); 86 | 87 | if has_rdtscp { 88 | unsafe { 89 | // Check cycle counter: 90 | assert!(rdtscp().0 > 0, "rdtscp returned 0, unlikely!"); 91 | 92 | // Check TSC AUX is correct (currently when using Linux only): 93 | // See also: https://elixir.bootlin.com/linux/v5.18.8/source/arch/x86/include/asm/segment.h#L241 94 | if cfg!(target_os = "linux") { 95 | let mut cpu: u32 = 0; 96 | let mut node: u32 = 0; 97 | libc::syscall(libc::SYS_getcpu, &mut cpu, &mut node, 0); 98 | assert_eq!( 99 | rdtscp().1, 100 | node << 12 | cpu, 101 | "rdtscp AUX didn't match getcpu call!" 102 | ); 103 | } 104 | } 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/tlb.rs: -------------------------------------------------------------------------------- 1 | //! Functions to flush the translation lookaside buffer (TLB). 2 | 3 | use core::arch::asm; 4 | 5 | /// Invalidate the given address in the TLB using the `invlpg` instruction. 6 | /// 7 | /// # Safety 8 | /// This function is unsafe as it causes a general protection fault (GP) if the current privilege 9 | /// level is not 0. 10 | pub unsafe fn flush(addr: usize) { 11 | asm!("invlpg ({})", in(reg) addr, options(att_syntax, nostack, preserves_flags)); 12 | } 13 | 14 | /// Invalidate the TLB completely by reloading the CR3 register. 15 | /// 16 | /// # Safety 17 | /// This function is unsafe as it causes a general protection fault (GP) if the current privilege 18 | /// level is not 0. 19 | pub unsafe fn flush_all() { 20 | use crate::controlregs::{cr3, cr3_write}; 21 | cr3_write(cr3()) 22 | } 23 | 24 | #[cfg(all(test, feature = "vmtest"))] 25 | mod x86testing { 26 | use super::*; 27 | use x86test::*; 28 | 29 | #[x86test] 30 | fn check_flush_all() { 31 | unsafe { 32 | flush_all(); 33 | } 34 | } 35 | 36 | #[x86test] 37 | fn check_flush() { 38 | // A better test would be: 39 | // map page, read page, unmap page, read page, flush, read page -> pfault 40 | unsafe { 41 | flush(0xdeadbeef); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/vmx/mod.rs: -------------------------------------------------------------------------------- 1 | //! Data structures and definitions used by Virtual Machine Extensions. 2 | 3 | pub mod vmcs; 4 | 5 | /// A specialized [`Result`](core::result::Result) type for VMX operations. 6 | /// 7 | /// This type closely replicates VMX instruction conventions described in 8 | /// Intel SDM, Volume 3C, Section 30.2. 9 | pub type Result = core::result::Result; 10 | 11 | /// Possible outcomes of VMfail pseudo-function used to convey VMX operation errors. 12 | /// 13 | /// Definitions of all these pseudo-functions can be found in Intel SDM, Volume 3C, Section 30.2. 14 | #[derive(Debug)] 15 | pub enum VmFail { 16 | /// VMCS pointer is valid, but some other error was encountered. Read 17 | /// VM-instruction error field of VMCS for more details. 18 | VmFailValid, 19 | /// VMCS pointer is not valid. 20 | VmFailInvalid, 21 | } 22 | -------------------------------------------------------------------------------- /tests/kvm/bin.rs: -------------------------------------------------------------------------------- 1 | #![feature(custom_test_frameworks)] 2 | #![test_runner(x86test::runner::runner)] 3 | 4 | // Run with: 5 | // RUSTFLAGS="-C relocation-model=dynamic-no-pic -C code-model=kernel" RUST_BACKTRACE=1 cargo test --verbose --test kvm -- --nocapture 6 | 7 | extern crate core; 8 | extern crate klogger; 9 | extern crate x86; 10 | 11 | extern crate x86test; 12 | 13 | #[cfg(all(test, feature = "vmtest"))] 14 | use self::x86test::*; 15 | 16 | #[cfg(all(test, feature = "vmtest"))] 17 | #[x86test(ioport(0x1, 0xfe))] 18 | fn use_the_port() { 19 | unsafe { 20 | kassert!( 21 | x86::io::inw(0x1) == 0xfe, 22 | "`inw` instruction didn't read the correct value" 23 | ); 24 | } 25 | } 26 | 27 | #[cfg(all(test, feature = "vmtest"))] 28 | #[x86test(ram(0x30000000, 0x31000000))] 29 | fn print_works() { 30 | sprint!("sprint!, "); 31 | sprintln!("sprintln! works"); 32 | } 33 | 34 | #[cfg(all(test, feature = "vmtest"))] 35 | #[x86test] 36 | #[should_panic] 37 | fn panic_test() { 38 | kpanic!("failed"); 39 | } 40 | -------------------------------------------------------------------------------- /tests/no_std_build.rs: -------------------------------------------------------------------------------- 1 | #![feature(start, libc)] 2 | #![no_std] 3 | 4 | extern crate libc; 5 | extern crate x86; 6 | 7 | #[start] 8 | fn start(_argc: isize, _argv: *const *const u8) -> isize { 9 | 0 10 | } 11 | -------------------------------------------------------------------------------- /update_perfmon_db.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | wget --no-parent -r https://download.01.org/perfmon/ 4 | rm -rf x86data/perfmon_data 5 | mv download.01.org/perfmon x86data/perfmon_data 6 | cd x86data/perfmon_data 7 | rm `find . | grep index` 8 | cd ../.. 9 | rm -rf download.01.org 10 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDW-DE/broadwellde_matrix_bit_definitions_v7.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fifth Generation Intel Core Processors Based on the Broadwell-DE Microarchitecture - V7 2 | # 1/18/2018 9:56:36 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 0,1 null null 6 | DEMAND_RFO 1 1 0,1 null null 7 | DEMAND_CODE_RD 2 1 0,1 null null 8 | COREWB 3 1 0,1 null null 9 | PF_L2_DATA_RD 4 1 0,1 null null 10 | PF_L2_RFO 5 1 0,1 null null 11 | PF_L2_CODE_RD 6 1 0,1 null null 12 | PF_L3_DATA_RD 7 1 0,1 null null 13 | PF_L3_RFO 8 1 0,1 null null 14 | PF_L3_CODE_RD 9 1 0,1 null null 15 | SPLIT_LOCK_UC_LOCK 10 1 0,1 null null 16 | STREAMING_STORES 11 1 0,1 null null 17 | OTHER 15 1 0,1 null null 18 | ALL_PF_DATA_RD 4,7 1 0,1 null null 19 | ALL_PF_RFO 5,8 1 0,1 null null 20 | ALL_PF_CODE_RD 6,9 1 0,1 null null 21 | ALL_DATA_RD 0,4,7 1 0,1 null null 22 | ALL_RFO 1,5,8 1 0,1 null null 23 | ALL_CODE_RD 2,6,9 1 0,1 null null 24 | ALL_READS 0,1,2,4,5,6,7,8,9 1 0,1 null null 25 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 0,1 null null 26 | ANY_RESPONSE 16 2 have any response type. 0,1 null null 27 | SUPPLIER_NONE 17 3 0,1 null null 28 | L3_HIT_M 18 3 0,1 null null 29 | L3_HIT_E 19 3 0,1 null null 30 | L3_HIT_S 20 3 0,1 null null 31 | L3_HIT_F 21 3 0,1 null null 32 | L3_HIT 18,19,20,21 3 0,1 null null 33 | L3_MISS_LOCAL_DRAM 26 3 0,1 null null 34 | SPL_HIT 30 4 0,1 null null 35 | SNOOP_NONE 31 4 0,1 null null 36 | SNOOP_NOT_NEEDED 32 4 0,1 null null 37 | SNOOP_MISS 33 4 0,1 null null 38 | SNOOP_HIT_NO_FWD 34 4 0,1 null null 39 | SNOOP_HIT_WITH_FWD 35 4 0,1 null null 40 | SNOOP_HITM 36 4 0,1 null null 41 | SNOOP_NON_DRAM 37 4 0,1 null null 42 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 null null 43 | L3_MISS 26 3 0,1 null null 44 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDW/broadwell_fp_arith_inst_v23.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "BitName": "SCALAR_DOUBLE", 4 | "BitIndex": "0", 5 | "FlopsMultiplier": "1", 6 | "Description": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 7 | }, 8 | { 9 | "BitName": "SCALAR_SINGLE", 10 | "BitIndex": "1", 11 | "FlopsMultiplier": "1", 12 | "Description": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 13 | }, 14 | { 15 | "BitName": "128BIT_PACKED_DOUBLE", 16 | "BitIndex": "2", 17 | "FlopsMultiplier": "2", 18 | "Description": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 19 | }, 20 | { 21 | "BitName": "128BIT_PACKED_SINGLE", 22 | "BitIndex": "3", 23 | "FlopsMultiplier": "4", 24 | "Description": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 25 | }, 26 | { 27 | "BitName": "256BIT_PACKED_DOUBLE", 28 | "BitIndex": "4", 29 | "FlopsMultiplier": "4", 30 | "Description": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 31 | }, 32 | { 33 | "BitName": "256BIT_PACKED_SINGLE", 34 | "BitIndex": "5", 35 | "FlopsMultiplier": "8", 36 | "Description": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 37 | } 38 | ] -------------------------------------------------------------------------------- /x86data/perfmon_data/BDW/broadwell_fp_arith_inst_v23.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fifth Generation Intel Core Processors Based on the Broadwell Microarchitecture - V23 2 | # 8/7/2018 8:47:47 AM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | SCALAR_DOUBLE 0 1 Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 7 | SCALAR_SINGLE 1 1 Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 8 | 128BIT_PACKED_DOUBLE 2 2 Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 9 | 128BIT_PACKED_SINGLE 3 4 Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 10 | 256BIT_PACKED_DOUBLE 4 4 Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 11 | 256BIT_PACKED_SINGLE 5 8 Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 12 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDW/broadwell_matrix_bit_definitions_v23.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fifth Generation Intel Core Processors Based on the Broadwell Microarchitecture - V23 2 | # 8/7/2018 8:47:47 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 BDM115 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | COREWB 3 1 Counts writebacks (modified to exclusive) 0,1 na 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 BDM115 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 na 12 | PF_L3_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 na 13 | PF_L3_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 na 14 | PF_L3_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 na 15 | OTHER 15 1 Counts any other requests 0,1 na 16 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 na 17 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 na 18 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 na 19 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 na 20 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 BDM115 21 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 na 22 | ALL_READS 0,1,2,4,5,6,7,8,9 1 Counts all data/code/rfo reads (demand & prefetch) 0,1 na 23 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 na 24 | ANY_RESPONSE 16 2 have any response type. 0,1 na 25 | SUPPLIER_NONE 17 3 0,1 BDM115 26 | L3_HIT_M 18 3 0,1 163,164,165,166,167,168,169,170,171 na 27 | L3_HIT_E 19 3 0,1 163,164,165,166,167,168,169,170,171 na 28 | L3_HIT_S 20 3 0,1 163,164,165,166,167,168,169,170,171 na 29 | L3_HIT_F 21 3 0,1 163,164,165,166,167,168,169,170,171 na 30 | L3_HIT 18,19,20,21 3 0,1 na 31 | L3_MISS_LOCAL_DRAM 26 3 0,1 BDM115 32 | L3_MISS 26,27,28,29 3 0,1 169,170,171 na 33 | SNOOP_NONE 31 4 0,1 na 34 | SNOOP_NOT_NEEDED 32 4 0,1 na 35 | SNOOP_MISS 33 4 0,1 na 36 | SNOOP_HIT_NO_FWD 34 4 0,1 na 37 | SNOOP_HIT_WITH_FWD 35 4 0,1 na na 38 | SNOOP_HITM 36 4 0,1 na 39 | SNOOP_NON_DRAM 37 4 0,1 na 40 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 na 41 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDW/broadwell_matrix_v23.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fifth Generation Intel Core Processors Based on the Broadwell Microarchitecture - V23 2 | # 8/7/2018 8:47:47 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | COREWB Null 0x0008 0,1 Counts writebacks (modified to exclusive) 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_L3_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_L3_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_L3_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | OTHER Null 0x8000 0,1 Counts any other requests 16 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 17 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 18 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 19 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 20 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 21 | Null ANY_RESPONSE 0x000001 0,1 have any response type. 22 | Null SUPPLIER_NONE.SNOOP_NONE 0x008002 0,1 tbd 23 | Null SUPPLIER_NONE.SNOOP_NOT_NEEDED 0x010002 0,1 tbd 24 | Null SUPPLIER_NONE.SNOOP_MISS 0x020002 0,1 tbd 25 | Null SUPPLIER_NONE.SNOOP_HIT_NO_FWD 0x040002 0,1 tbd 26 | Null SUPPLIER_NONE.SNOOP_HITM 0x100002 0,1 tbd 27 | Null SUPPLIER_NONE.SNOOP_NON_DRAM 0x200002 0,1 tbd 28 | Null SUPPLIER_NONE.ANY_SNOOP 0x3f8002 0,1 tbd 29 | Null L3_HIT.SNOOP_NONE 0x00803c 0,1 hit in the L3 with no details on snoop-related information. 30 | Null L3_HIT.SNOOP_NOT_NEEDED 0x01003c 0,1 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. 31 | Null L3_HIT.SNOOP_MISS 0x02003c 0,1 hit in the L3 with a snoop miss response. 32 | Null L3_HIT.SNOOP_HIT_NO_FWD 0x04003c 0,1 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. 33 | Null L3_HIT.SNOOP_HITM 0x10003c 0,1 tbd 34 | Null L3_HIT.SNOOP_NON_DRAM 0x20003c 0,1 hit in the L3 and the target was non-DRAM system address. 35 | Null L3_HIT.ANY_SNOOP 0x3f803c 0,1 hit in the L3. 36 | Null L3_MISS_LOCAL_DRAM.SNOOP_NONE 0x008400 0,1 tbd 37 | Null L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED 0x010400 0,1 tbd 38 | Null L3_MISS_LOCAL_DRAM.SNOOP_MISS 0x020400 0,1 tbd 39 | Null L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD 0x040400 0,1 tbd 40 | Null L3_MISS_LOCAL_DRAM.SNOOP_HITM 0x100400 0,1 tbd 41 | Null L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM 0x200400 0,1 tbd 42 | Null L3_MISS_LOCAL_DRAM.ANY_SNOOP 0x3f8400 0,1 tbd 43 | Null L3_MISS.SNOOP_NONE 0x00bc00 0,1 miss the L3 with no details on snoop-related information. 44 | Null L3_MISS.SNOOP_NOT_NEEDED 0x013c00 0,1 tbd 45 | Null L3_MISS.SNOOP_MISS 0x023c00 0,1 miss the L3 with a snoop miss response. 46 | Null L3_MISS.SNOOP_HIT_NO_FWD 0x043c00 0,1 tbd 47 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDW/broadwell_uncore_v23.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fifth Generation Intel Core Processors Based on the Broadwell Microarchitecture - V23 2 | # 8/7/2018 8:47:47 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask EventName Description Counter CounterMask Invert EdgeDetect 5 | CBO 0x22 0x41 UNC_CBO_XSNP_RESPONSE.MISS_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core. 0,1 0 0 0 6 | CBO 0x22 0x81 UNC_CBO_XSNP_RESPONSE.MISS_EVICTION A cross-core snoop resulted from L3 Eviction which misses in some processor core. 0,1 0 0 0 7 | CBO 0x22 0x44 UNC_CBO_XSNP_RESPONSE.HIT_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core. 0,1 0 0 0 8 | CBO 0x22 0x48 UNC_CBO_XSNP_RESPONSE.HITM_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core. 0,1 0 0 0 9 | CBO 0x34 0x11 UNC_CBO_CACHE_LOOKUP.READ_M L3 Lookup read request that access cache and found line in M-state 0,1 0 0 0 10 | CBO 0x34 0x21 UNC_CBO_CACHE_LOOKUP.WRITE_M L3 Lookup write request that access cache and found line in M-state 0,1 0 0 0 11 | CBO 0x34 0x81 UNC_CBO_CACHE_LOOKUP.ANY_M L3 Lookup any request that access cache and found line in M-state 0,1 0 0 0 12 | CBO 0x34 0x18 UNC_CBO_CACHE_LOOKUP.READ_I L3 Lookup read request that access cache and found line in I-state 0,1 0 0 0 13 | CBO 0x34 0x88 UNC_CBO_CACHE_LOOKUP.ANY_I L3 Lookup any request that access cache and found line in I-state 0,1 0 0 0 14 | CBO 0x34 0x1f UNC_CBO_CACHE_LOOKUP.READ_MESI L3 Lookup read request that access cache and found line in any MESI-state 0,1 0 0 0 15 | CBO 0x34 0x2f UNC_CBO_CACHE_LOOKUP.WRITE_MESI L3 Lookup write request that access cache and found line in MESI-state 0,1 0 0 0 16 | CBO 0x34 0x8f UNC_CBO_CACHE_LOOKUP.ANY_MESI L3 Lookup any request that access cache and found line in MESI-state 0,1 0 0 0 17 | CBO 0x34 0x86 UNC_CBO_CACHE_LOOKUP.ANY_ES L3 Lookup any request that access cache and found line in E or S-state 0,1 0 0 0 18 | CBO 0x34 0x16 UNC_CBO_CACHE_LOOKUP.READ_ES L3 Lookup read request that access cache and found line in E or S-state 0,1 0 0 0 19 | CBO 0x34 0x26 UNC_CBO_CACHE_LOOKUP.WRITE_ES L3 Lookup write request that access cache and found line in E or S-state 0,1 0 0 0 20 | NCU 0x0 0x01 UNC_CLOCK.SOCKET This 48-bit fixed counter counts the UCLK cycles FIXED 0 0 0 21 | iMPH-U 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.ALL Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic. 0, 0 0 0 22 | iMPH-U 0x80 0x02 UNC_ARB_TRK_OCCUPANCY.DRD_DIRECT Each cycle count number of 'valid' coherent Data Read entries that are in DirectData mode. Such entry is defined as valid when it is allocated till data sent to Core (first chunk, IDI0). Applicable for IA Cores' requests in normal case. 0, 0 0 0 23 | iMPH-U 0x81 0x01 UNC_ARB_TRK_REQUESTS.ALL Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic. 0,1 0 0 0 24 | iMPH-U 0x81 0x02 UNC_ARB_TRK_REQUESTS.DRD_DIRECT Number of Core coherent Data Read entries allocated in DirectData mode 0,1 0 0 0 25 | iMPH-U 0x81 0x20 UNC_ARB_TRK_REQUESTS.WRITES Number of Writes allocated - any write transactions: full/partials writes and evictions. 0,1 0 0 0 26 | iMPH-U 0x84 0x01 UNC_ARB_COH_TRK_REQUESTS.ALL Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc. 0,1 0 0 0 27 | iMPH-U 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC.; 0, 1 0 0 28 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDX/broadwellx_matrix_bit_definitions_v14.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) Processor E5 v4 Family Based on the the Broadwell Microarchitecture - V14 2 | # 8/16/2018 4:41:37 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | COREWB 3 1 Counts writebacks (modified to exclusive) 0,1 na 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 na 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 na 12 | PF_L3_DATA_RD 7 1 0,1 na 13 | PF_L3_RFO 8 1 0,1 na 14 | PF_L3_CODE_RD 9 1 0,1 na 15 | SPLIT_LOCK_UC_LOCK 10 1 Counts all locks that are either split across cache line boundaries or to uncacheable addresses 0,1 na 16 | STREAMING_STORES 11 1 Counts all non-temporal stores 0,1 na 17 | OTHER 15 1 Counts any other requests 0,1 na 18 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 na 19 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 na 20 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 na 21 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 na 22 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 na 23 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 na 24 | ALL_READS 0,1,2,4,5,6,7,8,9,10 1 Counts all data/code/rfo reads (demand & prefetch) 0,1 na 25 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 na 26 | ANY_RESPONSE 16 2 have any response type. 0,1 na 27 | SUPPLIER_NONE 17 3 0,1 na 28 | L3_HIT_M 18 3 0,1 na 29 | L3_HIT_E 19 3 0,1 na 30 | L3_HIT_S 20 3 0,1 na 31 | L3_HIT_F 21 3 0,1 na 32 | L3_HIT 18,19,20,21 3 0,1 na 33 | L3_MISS_LOCAL_DRAM 26 3 0,1 na 34 | L3_MISS_REMOTE_HOP0_DRAM 27 3 0,1 na 35 | L3_MISS_REMOTE_HOP1_DRAM 28 3 0,1 na 36 | L3_MISS_REMOTE_HOP2P_DRAM 29 3 0,1 na 37 | L3_MISS 26,27,28,29 3 0,1 na 38 | SNOOP_NONE 31 4 0,1 na 39 | SNOOP_NOT_NEEDED 32 4 0,1 na 40 | SNOOP_MISS 33 4 0,1 na 41 | SNOOP_HIT_NO_FWD 34 4 0,1 na 42 | SNOOP_HIT_WITH_FWD 35 4 0,1 18,19,20,21 na 43 | SNOOP_HITM 36 4 0,1 na 44 | SNOOP_NON_DRAM 37 4 0,1 na 45 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 na 46 | LLC_HIT.ANY_RESPONSE 18,19,20,21,31,32,33,34,35,36,37 2 hit in the L3 0,1 na na 47 | LLC_HIT.NO_SNOOP_NEEDED 18,19,20,21,32 2 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 0,1 na na 48 | LLC_HIT.SNOOP_MISS 18,19,20,21,33 2 hit in the L3 and the snoops sent to sibling cores return clean response 0,1 na na 49 | LLC_HIT.HIT_OTHER_CORE_NO_FWD 18,19,20,21,34 2 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded 0,1 na na 50 | LLC_HIT.HITM_OTHER_CORE 18,19,20,21,36 2 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 0,1 na na 51 | LLC_MISS.ANY_RESPONSE 22,23,24,25,26,27,28,29,31,32,33,34,35,36,37 2 miss in the L3 0,1 na na 52 | LLC_MISS.LOCAL_DRAM 26,33,34 2 miss the L3 and the data is returned from local dram 0,1 na na 53 | LLC_MISS.REMOTE_DRAM 22,23,24,25,27,28,29,33,34 2 miss the L3 and the data is returned from remote dram 0,1 na na 54 | LLC_MISS.ANY_DRAM 22,23,24,25,26,27,28,29,33,34 2 miss the L3 and the data is returned from local or remote dram 0,1 na na 55 | LLC_MISS.REMOTE_HITM 22,23,24,25,26,27,28,29,36 2 miss the L3 and the modified data is transferred from remote cache 0,1 na na 56 | LLC_MISS.REMOTE_HIT_FORWARD 22,23,24,25,26,27,28,29,30,35 2 miss the L3 and clean or shared data is transferred from remote cache 0,1 na na 57 | PF_LLC_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 na na 58 | PF_LLC_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 na na 59 | PF_LLC_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 na na 60 | -------------------------------------------------------------------------------- /x86data/perfmon_data/BDX/broadwellx_matrix_v14.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) Processor E5 v4 Family Based on the the Broadwell Microarchitecture - V14 2 | # 8/16/2018 4:41:37 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | COREWB Null 0x0008 0,1 Counts writebacks (modified to exclusive) 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_LLC_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_LLC_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_LLC_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | SPLIT_LOCK_UC_LOCK Null 0x0400 0,1 Counts all locks that are either split across cache line boundaries or to uncacheable addresses 16 | STREAMING_STORES Null 0x0800 0,1 Counts all non-temporal stores 17 | OTHER Null 0x8000 0,1 Counts any other requests 18 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 19 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 20 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 21 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 22 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 23 | ALL_CODE_RD Null 0x0244 0,1 Counts all demand & prefetch code reads 24 | ALL_READS Null 0x07f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 25 | ALL_REQUESTS Null 0x8fff 0,1 Counts all requests 26 | Null LLC_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the L3 27 | Null LLC_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 28 | Null LLC_HIT.SNOOP_MISS 0x02003c 0,1 hit in the L3 and the snoops sent to sibling cores return clean response 29 | Null LLC_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded 30 | Null LLC_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 31 | Null LLC_MISS.ANY_RESPONSE 0x3fbfc0 0,1 miss in the L3 32 | Null LLC_MISS.LOCAL_DRAM 0x060400 0,1 miss the L3 and the data is returned from local dram 33 | Null LLC_MISS.REMOTE_DRAM 0x063bc0 0,1 miss the L3 and the data is returned from remote dram 34 | Null LLC_MISS.ANY_DRAM 0x063fc0 0,1 miss the L3 and the data is returned from local or remote dram 35 | Null LLC_MISS.REMOTE_HITM 0x103fc0 0,1 miss the L3 and the modified data is transferred from remote cache 36 | Null LLC_MISS.REMOTE_HIT_FORWARD 0x087fc0 0,1 miss the L3 and clean or shared data is transferred from remote cache 37 | -------------------------------------------------------------------------------- /x86data/perfmon_data/CLX/cascadelakex_fp_arith_inst_v1.04.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Performance Monitoring Events for Intel Xeon Processors Based on the Cascadelake Microarchitecture with Intel Optane DC persistent memory - V1.04 2 | # 4/1/2019 11:14:48 AM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | SCALAR_DOUBLE 0 1 Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 7 | SCALAR_SINGLE 1 1 Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 8 | 128BIT_PACKED_DOUBLE 2 2 Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 9 | 128BIT_PACKED_SINGLE 3 4 Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 10 | 256BIT_PACKED_DOUBLE 4 4 Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 11 | 256BIT_PACKED_SINGLE 5 8 Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 12 | SCALAR_DOUBLE 0 1 Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 13 | SCALAR_SINGLE 1 1 Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 14 | 128BIT_PACKED_DOUBLE 2 2 Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 15 | 128BIT_PACKED_SINGLE 3 4 Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 16 | 256BIT_PACKED_DOUBLE 4 4 Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 17 | 256BIT_PACKED_SINGLE 5 8 Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 18 | 512BIT_PACKED_DOUBLE 6 8 19 | 512BIT_PACKED_SINGLE 7 16 20 | -------------------------------------------------------------------------------- /x86data/perfmon_data/GLM/goldmont_fp_arith_inst_v13.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /x86data/perfmon_data/GLM/goldmont_fp_arith_inst_v13.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Goldmont Microarchitecture - V13 2 | # 3/2/2018 3:20:06 PM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | -------------------------------------------------------------------------------- /x86data/perfmon_data/GLM/goldmont_matrix_bit_definitions_v13.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Goldmont Microarchitecture - V13 2 | # 3/2/2018 3:20:06 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand cacheable data reads of full cache lines 0,1 na 6 | DEMAND_RFO 1 1 Counts demand reads for ownership (RFO) requests generated by a write to full data cache line 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache 0,1 na 8 | COREWB 3 1 Counts the number of writeback transactions caused by L1 or L2 cache evictions 0 na 9 | PF_L2_DATA_RD 4 1 Counts data cacheline reads generated by hardware L2 cache prefetcher 0,1 na 10 | PF_L2_RFO 5 1 Counts reads for ownership (RFO) requests generated by L2 prefetcher 0,1 na 11 | PARTIAL_READS 7 1 Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types 0,1 na 12 | PARTIAL_WRITES 8 1 Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory 0,1 na 13 | BUS_LOCKS 10 1 Counts bus lock and split lock requests 0,1 na 14 | FULL_STREAMING_STORES 11 1 Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes 0,1 na 15 | SW_PREFETCH 12 1 Counts data cache lines requests by software prefetch instructions 0,1 na 16 | PF_L1_DATA_RD 13 1 Counts data cache line reads generated by hardware L1 data cache prefetcher 0,1 na 17 | PARTIAL_STREAMING_STORES 14 1 Counts partial cache line data writes to uncacheable write combining (USWC) memory region 0,1 na 18 | STREAMING_STORES 11,14 1 Counts any data writes to uncacheable write combining (USWC) memory region 0,1 na 19 | ANY_REQUEST 15 1 Counts requests to the uncore subsystem 0,1 na 20 | ANY_PF_DATA_RD 4,12,13 1 Counts data reads generated by L1 or L2 prefetchers 0,1 na 21 | ANY_DATA_RD 0,4,7,12,13 1 Counts data reads (demand & prefetch) 0,1 na 22 | ANY_RFO 1,5 1 Counts reads for ownership (RFO) requests (demand & prefetch) 0,1 na 23 | ANY_READ 0,1,2,4,5,7,9,12,13 1 Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) 0,1 na 24 | ANY_RESPONSE 16 2 have any transaction responses from the uncore subsystem. 0,1 na 25 | L2_HIT 18 2 hit the L2 cache. 0,1 na 26 | L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED 33 2 true miss for the L2 cache with a snoop miss in the other processor module. 0,1 na 27 | L2_MISS.HIT_OTHER_CORE_NO_FWD 34 2 miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. 0,1 na 28 | L2_MISS.HITM_OTHER_CORE 36 2 miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. 0,1 na 29 | L2_MISS.NON_DRAM 37 2 miss the L2 cache and targets non-DRAM system address. 0,1 na 30 | L2_MISS.ANY 33,34,36,37 2 miss the L2 cache. 0,1 na 31 | OUTSTANDING 38 2 outstanding, per cycle, from the time of the L2 miss to when any response is received. 0 na 32 | -------------------------------------------------------------------------------- /x86data/perfmon_data/GLM/goldmont_matrix_v13.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Goldmont Microarchitecture - V13 2 | # 3/2/2018 3:20:06 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand cacheable data reads of full cache lines 6 | DEMAND_RFO Null 0x0002 0,1 Counts demand reads for ownership (RFO) requests generated by a write to full data cache line 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache 8 | COREWB Null 0x0008 0 Counts the number of writeback transactions caused by L1 or L2 cache evictions 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts data cacheline reads generated by hardware L2 cache prefetcher 10 | PF_L2_RFO Null 0x0020 0,1 Counts reads for ownership (RFO) requests generated by L2 prefetcher 11 | PARTIAL_READS Null 0x0080 0,1 Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types 12 | PARTIAL_WRITES Null 0x0100 0,1 Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory 13 | UC_CODE_RD Null 0x0200 0,1 Counts code reads in uncacheable (UC) memory region 14 | BUS_LOCKS Null 0x0400 0,1 Counts bus lock and split lock requests 15 | FULL_STREAMING_STORES Null 0x0800 0,1 Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes 16 | SW_PREFETCH Null 0x1000 0,1 Counts data cache lines requests by software prefetch instructions 17 | PF_L1_DATA_RD Null 0x2000 0,1 Counts data cache line reads generated by hardware L1 data cache prefetcher 18 | PARTIAL_STREAMING_STORES Null 0x4000 0,1 Counts partial cache line data writes to uncacheable write combining (USWC) memory region 19 | STREAMING_STORES Null 0x4800 0,1 Counts any data writes to uncacheable write combining (USWC) memory region 20 | ANY_REQUEST Null 0x8000 0,1 Counts requests to the uncore subsystem 21 | ANY_PF_DATA_RD Null 0x3010 0,1 Counts data reads generated by L1 or L2 prefetchers 22 | ANY_DATA_RD Null 0x3091 0,1 Counts data reads (demand & prefetch) 23 | ANY_RFO Null 0x0022 0,1 Counts reads for ownership (RFO) requests (demand & prefetch) 24 | ANY_READ Null 0x32b7 0,1 Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) 25 | Null ANY_RESPONSE 0x000001 0,1 have any transaction responses from the uncore subsystem. 26 | Null L2_HIT 0x000004 0,1 hit the L2 cache. 27 | Null L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED 0x020000 0,1 true miss for the L2 cache with a snoop miss in the other processor module. 28 | Null L2_MISS.HIT_OTHER_CORE_NO_FWD 0x040000 0,1 miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. 29 | Null L2_MISS.HITM_OTHER_CORE 0x100000 0,1 miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. 30 | Null L2_MISS.NON_DRAM 0x200000 0,1 miss the L2 cache and targets non-DRAM system address. 31 | Null L2_MISS.ANY 0x360000 0,1 miss the L2 cache. 32 | Null OUTSTANDING 0x400000 0 outstanding, per cycle, from the time of the L2 miss to when any response is received. 33 | -------------------------------------------------------------------------------- /x86data/perfmon_data/GLP/goldmontplus_fp_arith_inst_v1.01.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /x86data/perfmon_data/GLP/goldmontplus_fp_arith_inst_v1.01.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the GoldmontPlus Microarchitecture - V1.01 2 | # 3/2/2018 3:19:18 PM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | -------------------------------------------------------------------------------- /x86data/perfmon_data/GLP/goldmontplus_matrix_bit_definitions_v1.01.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the GoldmontPlus Microarchitecture - V1.01 2 | # 3/2/2018 3:19:18 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand cacheable data reads of full cache lines 0,1 na 6 | DEMAND_RFO 1 1 Counts demand reads for ownership (RFO) requests generated by a write to full data cache line 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache 0,1 na 8 | COREWB 3 1 Counts the number of writeback transactions caused by L1 or L2 cache evictions 0,1 na 9 | PF_L2_DATA_RD 4 1 Counts data cacheline reads generated by hardware L2 cache prefetcher 0,1 na 10 | PF_L2_RFO 5 1 Counts reads for ownership (RFO) requests generated by L2 cache prefetcher 0,1 na 11 | PARTIAL_READS 7 1 Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types 0,1 na 12 | PARTIAL_WRITES 8 1 Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory 0,1 na 13 | BUS_LOCKS 10 1 Counts bus lock and split lock requests 0,1 na 14 | FULL_STREAMING_STORES 11 1 Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes 0,1 na 15 | SW_PREFETCH 12 1 Counts data cache lines requests by software prefetch instructions 0,1 na 16 | PF_L1_DATA_RD 13 1 Counts data cache line reads generated by hardware L1 data cache prefetcher 0,1 na 17 | PARTIAL_STREAMING_STORES 14 1 Counts partial cache line data writes to uncacheable write combining (USWC) memory region 0,1 na 18 | STREAMING_STORES 11,14 1 Counts any data writes to uncacheable write combining (USWC) memory region 0,1 na 19 | ANY_REQUEST 15 1 Counts requests to the uncore subsystem 0,1 na 20 | ANY_PF_DATA_RD 4,12,13 1 Counts data reads generated by L1 or L2 prefetchers 0,1 na 21 | ANY_DATA_RD 0,4,7,12,13 1 Counts data reads (demand & prefetch) 0,1 na 22 | ANY_RFO 1,5 1 Counts reads for ownership (RFO) requests (demand & prefetch) 0,1 na 23 | ANY_READ 0,1,2,4,5,7,9,12,13 1 Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) 0,1 na 24 | ANY_RESPONSE 16 2 have any transaction responses from the uncore subsystem. 0,1 na 25 | L2_HIT 18 2 hit the L2 cache. 0,1 na 26 | L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED 33 2 true miss for the L2 cache with a snoop miss in the other processor module. 0,1 na 27 | L2_MISS.HIT_OTHER_CORE_NO_FWD 34 2 miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. 0,1 na 28 | L2_MISS.HITM_OTHER_CORE 36 2 miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. 0,1 na 29 | L2_MISS.NON_DRAM 37 2 miss the L2 cache and targets non-DRAM system address. 0,1 na 30 | L2_MISS.ANY 33,34,36,37 2 miss the L2 cache. 0,1 na 31 | OUTSTANDING 38 2 outstanding, per cycle, from the time of the L2 miss to when any response is received. 0 na 32 | UC_CODE_RD 9 1 Counts code reads in uncacheable (UC) memory region 0,1 na na 33 | PF_L2_CODE 6 1 Counts code(instruction) requests generated by L2 cache prefetcher 0,1 na na 34 | -------------------------------------------------------------------------------- /x86data/perfmon_data/GLP/goldmontplus_matrix_v1.01.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the GoldmontPlus Microarchitecture - V1.01 2 | # 3/2/2018 3:19:18 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand cacheable data reads of full cache lines 6 | DEMAND_RFO Null 0x0002 0,1 Counts demand reads for ownership (RFO) requests generated by a write to full data cache line 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache 8 | COREWB Null 0x0008 0,1 Counts the number of writeback transactions caused by L1 or L2 cache evictions 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts data cacheline reads generated by hardware L2 cache prefetcher 10 | PF_L2_RFO Null 0x0020 0,1 Counts reads for ownership (RFO) requests generated by L2 prefetcher 11 | PARTIAL_READS Null 0x0080 0,1 Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types 12 | PARTIAL_WRITES Null 0x0100 0,1 Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory 13 | UC_CODE_RD Null 0x0200 0,1 Counts code reads in uncacheable (UC) memory region 14 | BUS_LOCKS Null 0x0400 0,1 Counts bus lock and split lock requests 15 | FULL_STREAMING_STORES Null 0x0800 0,1 Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes 16 | SW_PREFETCH Null 0x1000 0,1 Counts data cache lines requests by software prefetch instructions 17 | PF_L1_DATA_RD Null 0x2000 0,1 Counts data cache line reads generated by hardware L1 data cache prefetcher 18 | PARTIAL_STREAMING_STORES Null 0x4000 0,1 Counts partial cache line data writes to uncacheable write combining (USWC) memory region 19 | STREAMING_STORES Null 0x4800 0,1 Counts any data writes to uncacheable write combining (USWC) memory region 20 | ANY_REQUEST Null 0x8000 0,1 Counts requests to the uncore subsystem 21 | ANY_PF_DATA_RD Null 0x3010 0,1 Counts data reads generated by L1 or L2 prefetchers 22 | ANY_DATA_RD Null 0x3091 0,1 Counts data reads (demand & prefetch) 23 | ANY_RFO Null 0x0022 0,1 Counts reads for ownership (RFO) requests (demand & prefetch) 24 | ANY_READ Null 0x32b7 0,1 Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) 25 | Null ANY_RESPONSE 0x000001 0,1 have any transaction responses from the uncore subsystem. 26 | Null L2_HIT 0x000004 0,1 hit the L2 cache. 27 | Null L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED 0x020000 0,1 true miss for the L2 cache with a snoop miss in the other processor module. 28 | Null L2_MISS.HIT_OTHER_CORE_NO_FWD 0x040000 0,1 miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. 29 | Null L2_MISS.HITM_OTHER_CORE 0x100000 0,1 miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. 30 | Null L2_MISS.NON_DRAM 0x200000 0,1 miss the L2 cache and targets non-DRAM system address. 31 | Null L2_MISS.ANY 0x360000 0,1 miss the L2 cache. 32 | Null OUTSTANDING 0x400000 0 outstanding, per cycle, from the time of the L2 miss to when any response is received. 33 | Null PF_L2_CODE 0x0040 0,1 Counts code(instruction) requests generated by L2 prefetcher 34 | -------------------------------------------------------------------------------- /x86data/perfmon_data/HSW/haswell_fp_arith_inst_v28.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /x86data/perfmon_data/HSW/haswell_fp_arith_inst_v28.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fourth Generation Intel Core Processors Based on the Haswell Microarchitecture - V28 2 | # 8/7/2018 1:02:46 AM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | -------------------------------------------------------------------------------- /x86data/perfmon_data/HSW/haswell_matrix_bit_definitions_v28.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fourth Generation Intel Core Processors Based on the Haswell Microarchitecture - V28 2 | # 8/7/2018 1:02:46 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | COREWB 3 1 0,1 HSD150 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 na 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 na 12 | PF_L3_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 HSD150 13 | PF_L3_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 HSD150 14 | PF_L3_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 HSD150 15 | SPLIT_LOCK_UC_LOCK 10 1 0,1 HSD150 16 | STREAMING_STORES 11 1 0,1 HSD150 17 | OTHER 15 1 Counts any other requests 0,1 na 18 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 HSD150 19 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 HSD150 20 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 HSD150 21 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 HSD150 22 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 HSD150 23 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 HSD150 24 | ALL_READS 0,1,2,4,5,6,7,8,9,10 1 0,1 HSD150 25 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 HSD150 26 | ANY_RESPONSE 16 2 have any response type. 0,1 na 27 | SUPPLIER_NONE 17 3 0,1 116,117,118,119,120,121,122,123,124 na 28 | L3_HIT_M 18 3 0,1 116,117,118,119,120,121,122,123,124 na 29 | L3_HIT_E 19 3 0,1 116,117,118,119,120,121,122,123,124 na 30 | L3_HIT_S 20 3 0,1 116,117,118,119,120,121,122,123,124 na 31 | L3_HIT 18,19,20 3 0,1 116,117,118,119,120,121,122,123,124 na 32 | L3_MISS_LOCAL_DRAM 22 3 0,1 116,117,118,119,120,121,122,123,124 na 33 | SNOOP_NONE 31 4 0,1 na na 34 | SNOOP_NOT_NEEDED 32 4 0,1 na na 35 | SNOOP_MISS 33 4 0,1 na na 36 | SNOOP_HIT_NO_FWD 34 4 0,1 na na 37 | SNOOP_HIT_WITH_FWD 35 4 0,1 na na 38 | SNOOP_HITM 36 4 0,1 na na 39 | SNOOP_NON_DRAM 37 4 0,1 na na 40 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 na na 41 | L3_HIT.NO_SNOOP_NEEDED 18,19,20,21,32 2 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 0,1 na na 42 | L3_HIT.HIT_OTHER_CORE_NO_FWD 18,19,20,21,34 2 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded 0,1 na na 43 | L3_HIT.HITM_OTHER_CORE 18,19,20,21,36 2 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 0,1 na na 44 | L3_MISS.LOCAL_DRAM 22,32 2 miss the L3 and the data is returned from local dram 0,1 na na 45 | L3_MISS.ANY_DRAM 22,23,24,25,26,27,28,29,30,33,34 2 miss the L3 and the data is returned from local or remote dram 0,1 na na 46 | L3_HIT.ANY_RESPONSE 18,19,20,21,31,32,33,34,35,36,37 2 hit in the L3 0,1 na na 47 | L3_MISS.ANY_RESPONSE 22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37 2 miss in the L3 0,1 na na 48 | -------------------------------------------------------------------------------- /x86data/perfmon_data/HSW/haswell_matrix_v28.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fourth Generation Intel Core Processors Based on the Haswell Microarchitecture - V28 2 | # 8/7/2018 1:02:46 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 9 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 10 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 11 | PF_L3_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 12 | PF_L3_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 13 | PF_L3_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 14 | OTHER Null 0x8000 0,1 Counts any other requests 15 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 16 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 17 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 18 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 19 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 20 | ALL_CODE_RD Null 0x0244 0,1 Counts all demand & prefetch code reads 21 | ALL_READS Null 0x07f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 22 | ALL_REQUESTS Null 0x8fff 0,1 Counts all requests 23 | Null L3_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the L3 24 | Null L3_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 25 | Null L3_HIT.SNOOP_MISS 0x02003c 0,1 hit in the L3 and the snoops sent to sibling cores return clean response 26 | Null L3_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded 27 | Null L3_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 28 | Null L3_MISS.ANY_RESPONSE 0x3fffc0 0,1 miss in the L3 29 | Null L3_MISS.LOCAL_DRAM 0x010040 0,1 miss the L3 and the data is returned from local dram 30 | -------------------------------------------------------------------------------- /x86data/perfmon_data/HSW/haswell_uncore_v28.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Fourth Generation Intel Core Processors Based on the Haswell Microarchitecture - V28 2 | # 8/7/2018 1:02:46 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask EventName Description Counter CounterMask Invert EdgeDetect 5 | CBO 0x22 0x21 UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL An external snoop misses in some processor core. 0,1 0 0 0 6 | CBO 0x22 0x41 UNC_CBO_XSNP_RESPONSE.MISS_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core. 0,1 0 0 0 7 | CBO 0x22 0x81 UNC_CBO_XSNP_RESPONSE.MISS_EVICTION A cross-core snoop resulted from L3 Eviction which misses in some processor core. 0,1 0 0 0 8 | CBO 0x22 0x24 UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL An external snoop hits a non-modified line in some processor core. 0,1 0 0 0 9 | CBO 0x22 0x44 UNC_CBO_XSNP_RESPONSE.HIT_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core. 0,1 0 0 0 10 | CBO 0x22 0x84 UNC_CBO_XSNP_RESPONSE.HIT_EVICTION A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core. 0,1 0 0 0 11 | CBO 0x22 0x28 UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL An external snoop hits a modified line in some processor core. 0,1 0 0 0 12 | CBO 0x22 0x48 UNC_CBO_XSNP_RESPONSE.HITM_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core. 0,1 0 0 0 13 | CBO 0x22 0x88 UNC_CBO_XSNP_RESPONSE.HITM_EVICTION A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core. 0,1 0 0 0 14 | CBO 0x34 0x11 UNC_CBO_CACHE_LOOKUP.READ_M L3 Lookup read request that access cache and found line in M-state. 0,1 0 0 0 15 | CBO 0x34 0x21 UNC_CBO_CACHE_LOOKUP.WRITE_M L3 Lookup write request that access cache and found line in M-state. 0,1 0 0 0 16 | CBO 0x34 0x41 UNC_CBO_CACHE_LOOKUP.EXTSNP_M L3 Lookup external snoop request that access cache and found line in M-state. 0,1 0 0 0 17 | CBO 0x34 0x81 UNC_CBO_CACHE_LOOKUP.ANY_M L3 Lookup any request that access cache and found line in M-state. 0,1 0 0 0 18 | CBO 0x34 0x18 UNC_CBO_CACHE_LOOKUP.READ_I L3 Lookup read request that access cache and found line in I-state. 0,1 0 0 0 19 | CBO 0x34 0x28 UNC_CBO_CACHE_LOOKUP.WRITE_I L3 Lookup write request that access cache and found line in I-state. 0,1 0 0 0 20 | CBO 0x34 0x48 UNC_CBO_CACHE_LOOKUP.EXTSNP_I L3 Lookup external snoop request that access cache and found line in I-state. 0,1 0 0 0 21 | CBO 0x34 0x88 UNC_CBO_CACHE_LOOKUP.ANY_I L3 Lookup any request that access cache and found line in I-state. 0,1 0 0 0 22 | CBO 0x34 0x1f UNC_CBO_CACHE_LOOKUP.READ_MESI L3 Lookup read request that access cache and found line in any MESI-state. 0,1 0 0 0 23 | CBO 0x34 0x2f UNC_CBO_CACHE_LOOKUP.WRITE_MESI L3 Lookup write request that access cache and found line in MESI-state. 0,1 0 0 0 24 | CBO 0x34 0x4f UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI L3 Lookup external snoop request that access cache and found line in MESI-state. 0,1 0 0 0 25 | CBO 0x34 0x8f UNC_CBO_CACHE_LOOKUP.ANY_MESI L3 Lookup any request that access cache and found line in MESI-state. 0,1 0 0 0 26 | CBO 0x34 0x86 UNC_CBO_CACHE_LOOKUP.ANY_ES L3 Lookup any request that access cache and found line in E or S-state. 0,1 0 0 0 27 | CBO 0x34 0x46 UNC_CBO_CACHE_LOOKUP.EXTSNP_ES L3 Lookup external snoop request that access cache and found line in E or S-state. 0,1 0 0 0 28 | CBO 0x34 0x16 UNC_CBO_CACHE_LOOKUP.READ_ES L3 Lookup read request that access cache and found line in E or S-state. 0,1 0 0 0 29 | CBO 0x34 0x26 UNC_CBO_CACHE_LOOKUP.WRITE_ES L3 Lookup write request that access cache and found line in E or S-state. 0,1 0 0 0 30 | NCU 0x0 0x01 UNC_CLOCK.SOCKET This 48-bit fixed counter counts the UCLK cycles. FIXED 0 0 0 31 | iMPH-U 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.ALL Each cycle count number of all Core outgoing valid entries. Such entry is defined as valid from it's allocation till first of IDI0 or DRS0 messages is sent out. Accounts for Coherent and non-coherent traffic. 0 0 0 0 32 | iMPH-U 0x81 0x01 UNC_ARB_TRK_REQUESTS.ALL Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic. 0,1 0 0 0 33 | iMPH-U 0x81 0x20 UNC_ARB_TRK_REQUESTS.WRITES Number of Writes allocated - any write transactions: full/partials writes and evictions. 0,1 0 0 0 34 | iMPH-U 0x83 0x01 UNC_ARB_COH_TRK_OCCUPANCY.All Each cycle count number of valid entries in Coherency Tracker queue from allocation till deallocation. Aperture requests (snoops) appear as NC decoded internally and become coherent (snoop L3, access memory) 0 0 0 0 35 | iMPH-U 0x84 0x01 UNC_ARB_COH_TRK_REQUESTS.ALL Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc. 0,1 0 0 0 36 | iMPH-U 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC. 0, 1 0 0 37 | -------------------------------------------------------------------------------- /x86data/perfmon_data/HSX/haswellx_matrix_bit_definitions_v20.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) processor E5 v3 family based on the Haswell-E microarchitecture - V20 2 | # 8/16/2018 4:23:38 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | COREWB 3 1 Counts writebacks (modified to exclusive) 0,1 na 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 na 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 na 12 | PF_L3_DATA_RD 7 1 0,1 na 13 | PF_L3_RFO 8 1 0,1 na 14 | PF_L3_CODE_RD 9 1 0,1 na 15 | SPLIT_LOCK_UC_LOCK 10 1 Counts all locks that are either split across cache line boundaries or to uncacheable addresses 0,1 na 16 | STREAMING_STORES 11 1 Counts all non-temporal stores 0,1 na 17 | OTHER 15 1 Counts any other requests 0,1 na 18 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 na 19 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 na 20 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 na 21 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 na 22 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 na 23 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 na 24 | ALL_READS 0,1,2,4,5,6,7,8,9,10 1 Counts all data/code/rfo reads (demand & prefetch) 0,1 na 25 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 na 26 | ANY_RESPONSE 16 2 have any response type. 0,1 na 27 | SUPPLIER_NONE 17 3 0,1 317,334,335,336,337,338,339,340,341,342 na 28 | L3_HIT_M 18 3 0,1 317,334,335,336,337,338,339,340,341,342 na 29 | L3_HIT_E 19 3 0,1 317,334,335,336,337,338,339,340,341,342 na 30 | L3_HIT_S 20 3 0,1 317,334,335,336,337,338,339,340,341,342 na 31 | L3_HIT_F 21 3 0,1 317,334,335,336,337,338,339,340,341,342 na 32 | L3_HIT 18,19,20,21 3 0,1 317,334,335,336,337,338,339,340,341,342 na 33 | L3_MISS_LOCAL_DRAM 22 3 0,1 317,334,335,336,337,338,339,340,341,342 na 34 | L3_MISS_REMOTE_HOP0 27 3 0,1 317,334,335,336,337,338,339,340,341,342 na 35 | L3_MISS_REMOTE_HOP1 28 3 0,1 317,334,335,336,337,338,339,340,341,342 na 36 | L3_MISS_REMOTE_HOP2P 29 3 0,1 317,334,335,336,337,338,339,340,341,342 na 37 | L3_MISS 22,27,28,29 3 0,1 317,334,335,336,337,338,339,340,341,342 na 38 | SNOOP_NONE 31 4 0,1 na 39 | SNOOP_NOT_NEEDED 32 4 0,1 na 40 | SNOOP_MISS 33 4 0,1 na 41 | SNOOP_HIT_NO_FWD 34 4 0,1 na 42 | SNOOP_HIT_WITH_FWD 35 4 0,1 na na 43 | SNOOP_HITM 36 4 0,1 na 44 | SNOOP_NON_DRAM 37 4 0,1 na 45 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 na 46 | LLC_HIT.ANY_RESPONSE 18,19,20,21,31,32,33,34,35,36,37 2 hit in the L3 0,1 na na 47 | LLC_HIT.NO_SNOOP_NEEDED 18,19,20,21,32 2 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 0,1 na na 48 | LLC_HIT.SNOOP_MISS 18,19,20,21,33 2 hit in the L3 and the snoops sent to sibling cores return clean response 0,1 na na 49 | LLC_HIT.HIT_OTHER_CORE_NO_FWD 18,19,20,21,34 2 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded 0,1 na na 50 | LLC_HIT.HITM_OTHER_CORE 18,19,20,21,36 2 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 0,1 na na 51 | LLC_MISS.ANY_RESPONSE 22,23,24,25,26,27,28,29,31,32,33,34,35,36,37 2 miss in the L3 0,1 na na 52 | LLC_MISS.LOCAL_DRAM 22,33,34 2 miss the L3 and the data is returned from local dram 0,1 na na 53 | LLC_MISS.REMOTE_DRAM 23,24,25,26,27,28,29,33,34 2 miss the L3 and the data is returned from remote dram 0,1 na na 54 | LLC_MISS.ANY_DRAM 22,23,24,25,26,27,28,29,33,34 2 miss the L3 and the data is returned from local or remote dram 0,1 na na 55 | LLC_MISS.REMOTE_HITM 22,23,24,25,26,27,28,29,36 2 miss the L3 and the modified data is transferred from remote cache 0,1 na na 56 | LLC_MISS.REMOTE_HIT_FORWARD 22,23,24,25,26,27,28,29,35 2 miss the L3 and clean or shared data is transferred from remote cache 0,1 na na 57 | PF_LLC_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 na na 58 | PF_LLC_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 na na 59 | PF_LLC_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 na na 60 | -------------------------------------------------------------------------------- /x86data/perfmon_data/HSX/haswellx_matrix_v20.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) processor E5 v3 family based on the Haswell-E microarchitecture - V20 2 | # 8/16/2018 4:23:38 AM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | COREWB Null 0x0008 0,1 Counts writebacks (modified to exclusive) 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_LLC_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_LLC_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_LLC_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | OTHER Null 0x8000 0,1 Counts any other requests 16 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 17 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 18 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 19 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 20 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 21 | ALL_CODE_RD Null 0x0244 0,1 Counts all demand & prefetch code reads 22 | ALL_READS Null 0x07f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 23 | ALL_REQUESTS Null 0x8fff 0,1 Counts all requests 24 | Null LLC_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the L3 25 | Null LLC_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 26 | Null LLC_HIT.SNOOP_MISS 0x02003c 0,1 hit in the L3 and the snoops sent to sibling cores return clean response 27 | Null LLC_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded 28 | Null LLC_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 29 | Null LLC_MISS.ANY_RESPONSE 0x3fbfc0 0,1 miss in the L3 30 | Null LLC_MISS.LOCAL_DRAM 0x060040 0,1 miss the L3 and the data is returned from local dram 31 | Null LLC_MISS.REMOTE_DRAM 0x063f80 0,1 miss the L3 and the data is returned from remote dram 32 | Null LLC_MISS.ANY_DRAM 0x063fc0 0,1 miss the L3 and the data is returned from local or remote dram 33 | Null LLC_MISS.REMOTE_HITM 0x103fc0 0,1 miss the L3 and the modified data is transferred from remote cache 34 | Null LLC_MISS.REMOTE_HIT_FORWARD 0x083fc0 0,1 miss the L3 and clean or shared data is transferred from remote cache 35 | -------------------------------------------------------------------------------- /x86data/perfmon_data/IVB/ivybridge_fp_arith_inst_v21.json: -------------------------------------------------------------------------------- 1 | [] -------------------------------------------------------------------------------- /x86data/perfmon_data/IVB/ivybridge_fp_arith_inst_v21.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Third Generation Intel Core Processors Based on the Ivy Bridge Microarchitecture - V21 2 | # 2/28/2018 4:20:34 PM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | -------------------------------------------------------------------------------- /x86data/perfmon_data/IVB/ivybridge_matrix_bit_definitions_v21.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Third Generation Intel Core Processors Based on the Ivy Bridge Microarchitecture - V21 2 | # 2/28/2018 4:20:34 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | COREWB 3 1 Counts core writebacks due to L2 evictions or L1 writeback requests 0,1 na 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 na 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 na 12 | PF_LLC_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 na 13 | PF_LLC_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 na 14 | PF_LLC_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 na 15 | OTHER 15 1 Counts any other requests 0,1 na 16 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 na 17 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 na 18 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 na 19 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 na 20 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 na 21 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 na 22 | ALL_READS 0,1,2,4,5,6,7,8,9 1 Counts all data/code/rfo reads (demand & prefetch) 0,1 na 23 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 na 24 | ANY_RESPONSE 16 2 have any response type. 0,1 na 25 | SUPPLIER_NONE 17 3 0,1 na 26 | LLC_HIT_M 18 3 0,1 na 27 | LLC_HIT_E 19 3 0,1 na 28 | LLC_HIT_S 20 3 0,1 na 29 | LLC_HIT_F 21 3 0,1 na 30 | LLC_HIT 18,19,20,21 3 0,1 na 31 | L3_MISS_LOCAL_DRAM 22 3 0,1 na 32 | L3_MISS_REMOTE_DRAM 22,23,24,25,26,27,28,29,30 3 0,1 na 33 | SNOOP_NONE 31 4 0,1 na 34 | SNOOP_NOT_NEEDED 32 4 0,1 na 35 | SNOOP_MISS 33 4 0,1 na 36 | SNOOP_HIT_NO_FWD 34 4 0,1 na 37 | SNOOP_HIT_WITH_FWD 35 4 0,1 18,19,20,21 na 38 | SNOOP_HITM 36 4 0,1 na 39 | SNOOP_NON_DRAM 37 4 0,1 na 40 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 na 41 | -------------------------------------------------------------------------------- /x86data/perfmon_data/IVB/ivybridge_matrix_v21.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Third Generation Intel Core Processors Based on the Ivy Bridge Microarchitecture - V21 2 | # 2/28/2018 4:20:34 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | COREWB Null 0x0008 0,1 Counts core writebacks due to L2 evictions or L1 writeback requests 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_LLC_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_LLC_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_LLC_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | OTHER Null 0x8000 0,1 Counts any other requests 16 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 17 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 18 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 19 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 20 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 21 | ALL_CODE_RD Null 0x0244 0,1 Counts all demand & prefetch code reads 22 | ALL_READS Null 0x03f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 23 | ALL_REQUESTS Null 0x8fff 0,1 Counts all requests 24 | Null LLC_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the LLC 25 | Null LLC_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 26 | Null LLC_HIT.SNOOP_MISS 0x02003c 0,1 hit in the LLC and the snoops sent to sibling cores return clean response 27 | Null LLC_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded 28 | Null LLC_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 29 | Null LLC_MISS.ANY_RESPONSE 0x3fffc0 0,1 miss in the LLC 30 | Null LLC_MISS.LOCAL_DRAM 0x060040 0,1 miss the LLC and the data returned from local dram 31 | Null LLC_MISS.ANY_DRAM 0x067fc0 0,1 miss the LLC and the data returned from local or remote dram 32 | -------------------------------------------------------------------------------- /x86data/perfmon_data/IVB/ivybridge_uncore_v21.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Third Generation Intel Core Processors Based on the Ivy Bridge Microarchitecture - V21 2 | # 2/28/2018 4:20:34 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask EventName Description Counter CounterMask Invert EdgeDetect 5 | ARB 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.ALL Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC. 0 0 0 0 6 | ARB 0x81 0x01 UNC_ARB_TRK_REQUESTS.ALL Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC. 0,1 0 0 0 7 | ARB 0x81 0x20 UNC_ARB_TRK_REQUESTS.WRITES Counts the number of allocated write entries, include full, partial, and LLC evictions. 0,1 0 0 0 8 | ARB 0x81 0x80 UNC_ARB_TRK_REQUESTS.EVICTIONS Counts the number of LLC evictions allocated. 0,1 0 0 0 9 | ARB 0x83 0x01 UNC_ARB_COH_TRK_OCCUPANCY.ALL Cycles weighted by number of requests pending in Coherency Tracker. 0 0 0 0 10 | ARB 0x84 0x01 UNC_ARB_COH_TRK_REQUESTS.ALL Number of requests allocated in Coherency Tracker. 0,1 0 0 0 11 | ARB 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC. 0,1 1 0 0 12 | ARB 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC. 0,1 10 0 0 13 | ARB 0x0 0x01 UNC_CLOCK.SOCKET This 48-bit fixed counter counts the UCLK cycles. Fixed 0 0 0 14 | CBO 0x34 0x11 UNC_CBO_CACHE_LOOKUP.READ_M L3 Lookup read request that access cache and found line in M-state. 0,1 0 0 0 15 | CBO 0x34 0x21 UNC_CBO_CACHE_LOOKUP.WRITE_M L3 Lookup write request that access cache and found line in M-state. 0,1 0 0 0 16 | CBO 0x34 0x41 UNC_CBO_CACHE_LOOKUP.EXTSNP_M L3 Lookup external snoop request that access cache and found line in M-state. 0,1 0 0 0 17 | CBO 0x34 0x81 UNC_CBO_CACHE_LOOKUP.ANY_M L3 Lookup any request that access cache and found line in M-state. 0,1 0 0 0 18 | CBO 0x34 0x18 UNC_CBO_CACHE_LOOKUP.READ_I L3 Lookup read request that access cache and found line in I-state. 0,1 0 0 0 19 | CBO 0x34 0x28 UNC_CBO_CACHE_LOOKUP.WRITE_I L3 Lookup write request that access cache and found line in I-state. 0,1 0 0 0 20 | CBO 0x34 0x48 UNC_CBO_CACHE_LOOKUP.EXTSNP_I L3 Lookup external snoop request that access cache and found line in I-state. 0,1 0 0 0 21 | CBO 0x34 0x88 UNC_CBO_CACHE_LOOKUP.ANY_I L3 Lookup any request that access cache and found line in I-state. 0,1 0 0 0 22 | CBO 0x34 0x1f UNC_CBO_CACHE_LOOKUP.READ_MESI L3 Lookup read request that access cache and found line in any MESI-state. 0,1 0 0 0 23 | CBO 0x34 0x2f UNC_CBO_CACHE_LOOKUP.WRITE_MESI L3 Lookup write request that access cache and found line in MESI-state. 0,1 0 0 0 24 | CBO 0x34 0x4f UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI L3 Lookup external snoop request that access cache and found line in MESI-state. 0,1 0 0 0 25 | CBO 0x34 0x8f UNC_CBO_CACHE_LOOKUP.ANY_MESI L3 Lookup any request that access cache and found line in MESI-state. 0,1 0 0 0 26 | CBO 0x34 0x86 UNC_CBO_CACHE_LOOKUP.ANY_ES L3 Lookup any request that access cache and found line in E or S-state. 0,1 0 0 0 27 | CBO 0x34 0x46 UNC_CBO_CACHE_LOOKUP.EXTSNP_ES L3 Lookup external snoop request that access cache and found line in E or S-state. 0,1 0 0 0 28 | CBO 0x34 0x16 UNC_CBO_CACHE_LOOKUP.READ_ES L3 Lookup read request that access cache and found line in E or S-state. 0,1 0 0 0 29 | CBO 0x34 0x26 UNC_CBO_CACHE_LOOKUP.WRITE_ES L3 Lookup write request that access cache and found line in E or S-state. 0,1 0 0 0 30 | CBO 0x22 0x21 UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL An external snoop misses in some processor core. 0,1 0 0 0 31 | CBO 0x22 0x41 UNC_CBO_XSNP_RESPONSE.MISS_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core. 0,1 0 0 0 32 | CBO 0x22 0x81 UNC_CBO_XSNP_RESPONSE.MISS_EVICTION A cross-core snoop resulted from L3 Eviction which misses in some processor core. 0,1 0 0 0 33 | CBO 0x22 0x24 UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL An external snoop hits a non-modified line in some processor core. 0,1 0 0 0 34 | CBO 0x22 0x44 UNC_CBO_XSNP_RESPONSE.HIT_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core. 0,1 0 0 0 35 | CBO 0x22 0x84 UNC_CBO_XSNP_RESPONSE.HIT_EVICTION A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core. 0,1 0 0 0 36 | CBO 0x22 0x28 UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL An external snoop hits a modified line in some processor core. 0,1 0 0 0 37 | CBO 0x22 0x48 UNC_CBO_XSNP_RESPONSE.HITM_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core. 0,1 0 0 0 38 | CBO 0x22 0x88 UNC_CBO_XSNP_RESPONSE.HITM_EVICTION A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core. 0,1 0 0 0 39 | -------------------------------------------------------------------------------- /x86data/perfmon_data/IVT/ivytown_matrix_bit_definitions_v20.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) processor E5 family and Intel(R) Xeon(R) processor E7 family Based on the Ivy Bridge-EP Microarchitecture - V20 2 | # 1/17/2018 5:48:43 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 null 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 null 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 null 8 | COREWB 3 1 Counts core writebacks due to L2 evictions or L1 writeback requests 0,1 null 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 null 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 null 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 null 12 | PF_LLC_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 null 13 | PF_LLC_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 null 14 | PF_LLC_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 null 15 | OTHER 15 1 Counts any other requests 0,1 null 16 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 null 17 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 null 18 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 null 19 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 null 20 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 null 21 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 null 22 | ALL_READS 0,1,2,4,5,6,7,8,9 1 Counts all data/code/rfo reads (demand & prefetch) 0,1 null 23 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 null 24 | ANY_RESPONSE 16 2 have any response type. 0,1 null 25 | SUPPLIER_NONE 17 3 0,1 null 26 | LLC_HIT_M 18 3 0,1 null 27 | LLC_HIT_E 19 3 0,1 null 28 | LLC_HIT_S 20 3 0,1 null 29 | LLC_HIT_F 21 3 0,1 null 30 | LLC_HIT 18,19,20,21 3 0,1 null 31 | L3_MISS_LOCAL_DRAM 22 3 0,1 null 32 | L3_MISS_REMOTE_DRAM 22,23,24,25,26,27,28,29 3 0,1 null 33 | SNOOP_NONE 31 4 0,1 null 34 | SNOOP_NOT_NEEDED 32 4 0,1 null 35 | SNOOP_MISS 33 4 0,1 null 36 | SNOOP_HIT_NO_FWD 34 4 0,1 null 37 | SNOOP_HIT_WITH_FWD 35 4 0,1 18,19,20,21 null 38 | SNOOP_HITM 36 4 0,1 null 39 | SNOOP_NON_DRAM 37 4 0,1 null 40 | ANY_SNOOP 31,32,33,34,35,36,37 4 0,1 null 41 | -------------------------------------------------------------------------------- /x86data/perfmon_data/IVT/ivytown_matrix_v20.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) processor E5 family and Intel(R) Xeon(R) processor E7 family Based on the Ivy Bridge-EP Microarchitecture - V20 2 | # 1/17/2018 5:48:43 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD NULL 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO NULL 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD NULL 0x0004 0,1 Counts all demand code reads 8 | COREWB NULL 0x0008 0,1 Counts core writebacks due to L2 evictions or L1 writeback requests 9 | PF_L2_DATA_RD NULL 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO NULL 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD NULL 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_LLC_DATA_RD NULL 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_LLC_RFO NULL 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_LLC_CODE_RD NULL 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | OTHER NULL 0x8000 0,1 Counts any other requests 16 | ALL_PF_DATA_RD NULL 0x0090 0,1 Counts all prefetch data reads 17 | ALL_PF_RFO NULL 0x0120 0,1 Counts prefetch RFOs 18 | ALL_PF_CODE_RD NULL 0x0240 0,1 Counts all prefetch code reads 19 | ALL_DATA_RD NULL 0x0091 0,1 Counts all demand & prefetch data reads 20 | ALL_RFO NULL 0x0122 0,1 Counts all demand & prefetch RFOs 21 | ALL_CODE_RD NULL 0x0244 0,1 Counts all demand & prefetch code reads 22 | ALL_READS NULL 0x03f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 23 | ALL_REQUESTS NULL 0x8fff 0,1 Counts all requests 24 | NULL LLC_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the LLC 25 | NULL LLC_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 26 | NULL LLC_HIT.SNOOP_MISS 0x02003c 0,1 hit in the LLC and the snoops sent to sibling cores return clean response 27 | NULL LLC_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded 28 | NULL LLC_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 29 | NULL LLC_MISS.ANY_RESPONSE 0x3fffc0 0,1 miss in the LLC 30 | NULL LLC_MISS.LOCAL_DRAM 0x060040 0,1 miss the LLC and the data returned from local dram 31 | NULL LLC_MISS.ANY_DRAM 0x067fc0 0,1 miss the LLC and the data returned from local or remote dram 32 | -------------------------------------------------------------------------------- /x86data/perfmon_data/JKT/Jaketown_matrix_V20.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) processor E5 family Based on the Sandy Bridge-EP Microarchitecture - V20 2 | # 9/16/2016 11:35:14 AM 3 | # Copyright (c) 2007 - 2016 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | COREWB Null 0x0008 0,1 Counts core writebacks due to L2 evictions or L1 writeback requests 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_LLC_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_LLC_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_LLC_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | OTHER Null 0x8000 0,1 Counts any other requests 16 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 17 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 18 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 19 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 20 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 21 | ALL_CODE_RD Null 0x0244 0,1 Counts all demand & prefetch code reads 22 | ALL_READS Null 0x03f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 23 | ALL_REQUESTS Null 0x8fff 0,1 Counts all requests 24 | Null LLC_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the LLC 25 | Null LLC_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 26 | Null LLC_HIT.SNOOP_MISS 0x02003c 0,1 hit in the LLC and the snoops sent to sibling cores return clean response 27 | Null LLC_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded 28 | Null LLC_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 29 | Null LLC_MISS.ANY_RESPONSE 0x3fffc0 0,1 miss in the LLC 30 | Null LLC_MISS.LOCAL_DRAM 0x060040 0,1 miss the LLC and the data returned from local dram 31 | Null LLC_MISS.ANY_DRAM 0x067fc0 0,1 miss the LLC and the data returned from local or remote dram 32 | -------------------------------------------------------------------------------- /x86data/perfmon_data/JKT/Jaketown_matrix_bit_definitions_V20.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon(R) processor E5 family Based on the Sandy Bridge-EP Microarchitecture - V20 2 | # 9/16/2016 11:35:14 AM 3 | # Copyright (c) 2007 - 2016 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 TBD 0,1 Null Null 6 | DEMAND_RFO 1 1 TBD 0,1 Null Null 7 | DEMAND_CODE_RD 2 1 TBD 0,1 Null Null 8 | COREWB 3 1 TBD 0,1 Null Null 9 | PF_L2_DATA_RD 4 1 TBD 0,1 Null Null 10 | PF_L2_RFO 5 1 TBD 0,1 Null Null 11 | PF_L2_CODE_RD 6 1 TBD 0,1 Null Null 12 | PF_LLC_DATA_RD 7 1 TBD 0,1 Null Null 13 | PF_LLC_RFO 8 1 TBD 0,1 Null Null 14 | PF_LLC_CODE_RD 9 1 TBD 0,1 Null Null 15 | SPLIT_LOCK_UC_LOCK 10 1 TBD 0,1 Null Null 16 | STREAMING_STORES 11 1 TBD 0,1 Null Null 17 | OTHER 15 1 TBD 0,1 Null Null 18 | ALL_PF_DATA_RD 4,7 1 TBD 0,1 Null Null 19 | ALL_PF_RFO 5,8 1 TBD 0,1 Null Null 20 | ALL_PF_CODE_RD 6,9 1 TBD 0,1 Null Null 21 | ALL_DATA_RD 0,4,7 1 TBD 0,1 Null Null 22 | ALL_RFO 1,5,8 1 TBD 0,1 Null Null 23 | ALL_CODE_RD 2,6,9 1 TBD 0,1 Null Null 24 | ALL_READS 0,1,2,4,5,6,7,8,9 1 TBD 0,1 Null Null 25 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 TBD 0,1 Null Null 26 | ANY_RESPONSE 16 2 TBD 0,1 Null Null 27 | SUPPLIER_NONE 17 3 TBD 0,1 Null Null 28 | LLC_HIT_M 18 3 TBD 0,1 Null Null 29 | LLC_HIT_E 19 3 TBD 0,1 Null Null 30 | LLC_HIT_S 20 3 TBD 0,1 Null Null 31 | LLC_HIT_F 21 3 TBD 0,1 Null Null 32 | LLC_HIT 18,19,20,21 3 TBD 0,1 Null Null 33 | L3_MISS_LOCAL_DRAM 22 3 TBD 0,1 Null Null 34 | L3_MISS_REMOTE_DRAM 22,23,24,25,26,27,28,29,30 3 TBD 0,1 Null Null 35 | SNOOP_NONE 31 4 TBD 0,1 Null Null 36 | SNOOP_NOT_NEEDED 32 4 TBD 0,1 Null Null 37 | SNOOP_MISS 33 4 TBD 0,1 Null Null 38 | SNOOP_HIT_NO_FWD 34 4 TBD 0,1 Null Null 39 | SNOOP_HIT_WITH_FWD 35 4 TBD 0,1 18,19,20,21 Null 40 | SNOOP_HITM 36 4 TBD 0,1 Null Null 41 | SNOOP_NON_DRAM 37 4 TBD 0,1 Null Null 42 | ANY_SNOOP 31,32,33,34,35,36,37 4 TBD 0,1 Null Null 43 | -------------------------------------------------------------------------------- /x86data/perfmon_data/KNL/KnightsLanding_matrix_V9.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon Phi Processor Family based on the Intel(R) Many Integrated Core Architecture - V9 2 | # 10/4/2016 7:56:29 AM 3 | # Copyright (c) 2007 - 2016 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand cacheable data and L1 prefetch data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts Demand cacheable data writes 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts demand code reads and prefetch code reads 8 | PF_L2_RFO Null 0x0020 0,1 Counts L2 data RFO prefetches (includes PREFETCHW instruction) 9 | PF_L2_CODE_RD Null 0x0040 0,1 Counts L2 code HW prefetches 10 | PARTIAL_READS Null 0x0080 0,1 Counts Partial reads (UC or WC and is valid only for Outstanding response type). 11 | PARTIAL_WRITES Null 0x0100 1 Counts Partial writes (UC or WT or WP and should be programmed on PMC1) 12 | UC_CODE_READS Null 0x0200 0,1 Counts UC code reads (valid only for Outstanding response type) 13 | BUS_LOCKS Null 0x0400 0,1 Counts Bus locks and split lock requests 14 | FULL_STREAMING_STORES Null 0x0800 0,1 Counts Full streaming stores (WC and should be programmed on PMC1) 15 | PF_SOFTWARE Null 0x1000 0,1 Counts Software Prefetches 16 | PF_L1_DATA_RD Null 0x2000 0,1 Counts L1 data HW prefetches 17 | PARTIAL_STREAMING_STORES Null 0x4000 1 Counts Partial streaming stores (WC and should be programmed on PMC1) 18 | STREAMING_STORES Null 0x4800 1 Counts all streaming stores (WC and should be programmed on PMC1) 19 | ANY_REQUEST Null 0x8000 0,1 Counts any request 20 | ANY_DATA_RD Null 0x3081 0,1 Counts Demand cacheable data and L1 prefetch data read requests 21 | ANY_RFO Null 0x0022 0,1 Counts Demand cacheable data write requests 22 | ANY_CODE_RD Null 0x0044 0,1 Counts Demand code reads and prefetch code read requests 23 | ANY_READ Null 0x32e7 0,1 Counts any Read request 24 | ANY_PF_L2 Null 0x0060 0,1 Counts any Prefetch requests 25 | Null ANY_RESPONSE 0x000001 0,1 accounts for any response 26 | Null DDR_NEAR 0x008080 0,1 accounts for data responses from DRAM Local. 27 | Null DDR_FAR 0x010100 0,1 accounts for data responses from DRAM Far. 28 | Null MCDRAM_NEAR 0x008020 0,1 accounts for data responses from MCDRAM Local. 29 | Null MCDRAM_FAR 0x010040 0,1 accounts for data responses from MCDRAM Far or Other tile L2 hit far. 30 | Null L2_HIT_NEAR_TILE_E_F 0x080008 0,1 accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in E/F state. 31 | Null L2_HIT_NEAR_TILE_M 0x100008 0,1 accounts for responses from a snoop request hit with data forwarded from its Near-other tile's L2 in M state. 32 | Null L2_HIT_FAR_TILE_E_F 0x080040 0,1 accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in E/F state. Valid only for SNC4 cluster mode. 33 | Null L2_HIT_FAR_TILE_M 0x100040 0,1 accounts for responses from a snoop request hit with data forwarded from its Far(not in the same quadrant as the request)-other tile's L2 in M state. 34 | Null NON_DRAM 0x200002 0,1 accounts for responses from any NON_DRAM system address. This includes MMIO transactions 35 | Null OUTSTANDING 0x400000 0 outstanding, per weighted cycle, from the time of the request to when any response is received. The oustanding response should be programmed only on PMC0. 36 | Null MCDRAM 0x018060 0,1 accounts for responses from MCDRAM (local and far) 37 | Null DDR 0x018180 0,1 accounts for responses from DDR (local and far) 38 | Null L2_HIT_THIS_TILE_M 0x000200 0,1 accounts for responses which hit its own tile's L2 with data in M state 39 | Null L2_HIT_THIS_TILE_E 0x000400 0,1 accounts for responses which hit its own tile's L2 with data in E state 40 | Null L2_HIT_THIS_TILE_S 0x000800 0,1 accounts for responses which hit its own tile's L2 with data in S state 41 | Null L2_HIT_THIS_TILE_F 0x001000 0,1 accounts for responses which hit its own tile's L2 with data in F state 42 | Null L2_HIT_NEAR_TILE 0x180018 0,1 accounts for reponses from snoop request hit with data forwarded from its Near-other tile L2 in E/F/M state 43 | Null L2_HIT_FAR_TILE 0x180040 0,1 accounts for reponses from snoop request hit with data forwarded from it Far(not in the same quadrant as the request)-other tile L2 in E/F/M state. Valid only in SNC4 Cluster mode. 44 | -------------------------------------------------------------------------------- /x86data/perfmon_data/KNL/KnightsLanding_matrix_bit_definitions_V9.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon Phi Processor Family based on the Intel(R) Many Integrated Core Architecture - V9 2 | # 10/4/2016 7:56:30 AM 3 | # Copyright (c) 2007 - 2016 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 TBD 0,1 Null Null 6 | DEMAND_RFO 1 1 TBD 0,1 Null Null 7 | DEMAND_CODE_RD 2 1 TBD 0,1 Null Null 8 | PF_L2_RFO 5 1 TBD 0,1 Null Null 9 | PF_L2_CODE_RD 6 1 TBD 0,1 Null Null 10 | PARTIAL_READS 7 1 TBD 0,1 Null Null 11 | PARTIAL_WRITES 8 1 TBD 1 Null Null 12 | UC_CODE_READS 9 1 TBD 0,1 Null Null 13 | BUS_LOCKS 10 1 TBD 0,1 Null Null 14 | FULL_STREAMING_STORES 11 1 TBD 0,1 Null Null 15 | PF_SOFTWARE 12 1 TBD 0,1 Null Null 16 | PF_L1_DATA_RD 13 1 TBD 0,1 Null Null 17 | PARTIAL_STREAMING_STORES 14 1 TBD 1 Null Null 18 | STREAMING_STORES 11,14 1 TBD 1 Null Null 19 | ANY_REQUEST 15 1 TBD 0,1 Null Null 20 | ANY_DATA_RD 0,7,12,13 1 TBD 0,1 Null Null 21 | ANY_RFO 1,5 1 TBD 0,1 Null Null 22 | ANY_CODE_RD 2,6 1 TBD 0,1 Null Null 23 | ANY_READ 0,1,2,5,6,7,9,12,13 1 TBD 0,1 Null Null 24 | ANY_PF_L2 5,6 1 TBD 0,1 Null Null 25 | ANY_RESPONSE 16 2 TBD 0,1 Null Null 26 | L2_HIT_OTHER_TILE_NEAR_E_F 19 3 TBD 0,1 Null Null 27 | L2_HIT_OTHER_TILE_MCDRAM_LOCAL 21 3 TBD 0,1 Null Null 28 | L2_HIT_OTHER_TILE_MCDRAM_FAR 22 3 TBD 0,1 Null Null 29 | DDR4_LOCAL_CLUSTER 23 3 TBD 0,1 Null Null 30 | DDR4_FAR_CLUSTER 24 3 TBD 0,1 Null Null 31 | L2_HIT_THIS_TILE_M 25 2 TBD 0,1 Null Null 32 | L2_HIT_THIS_TILE_E 26 2 TBD 0,1 Null Null 33 | L2_HIT_THIS_TILE_S 27 2 TBD 0,1 Null Null 34 | L2_HIT_THIS_TILE_F 28 2 TBD 0,1 Null Null 35 | L2_MISS_SNOOP_NONE 31 4 TBD 0,1 Null Null 36 | L2_MISS_SNOOP_NOT_NEEDED 32 4 TBD 0,1 Null Null 37 | L2_MISS_SNOOP_HIT_WITH_FWD 35 4 TBD 0,1 Null Null 38 | L2_MISS_SNOOP_HITM 36 4 TBD 0,1 Null Null 39 | L2_MISS_SNOOP_NON_DRAM 37 4 TBD 0,1 Null Null 40 | DDR_NEAR 23,31 2 TBD 0,1 Null Null 41 | DDR_FAR 24,32 2 TBD 0,1 Null Null 42 | MCDRAM_NEAR 21,31 2 TBD 0,1 Null Null 43 | MCDRAM_FAR 22,32 2 TBD 0,1 Null Null 44 | L2_HIT_NEAR_TILE_E 19,35 2 TBD 0,1 Null Null 45 | L2_HIT_NEAR_TILE_F 20,35 2 TBD 0,1 Null Null 46 | L2_HIT_NEAR_TILE_M 19,36 2 TBD 0,1 Null Null 47 | L2_HIT_FAR_TILE_E_F 22,35 2 TBD 0,1 Null Null 48 | L2_HIT_FAR_TILE_M 22,36 2 TBD 0,1 Null Null 49 | NON_DRAM 17,37 2 TBD 0,1 Null Null 50 | NON_CACHEABLE_TO_LOCAL 21,37 2 TBD 0,1 Null Null 51 | OUTSTANDING 38 2 TBD 0 Null Null 52 | L2_HIT_NEAR_TILE 19,20,35,36 2 TBD 0,1 Null Null 53 | L2_HIT_FAR_TILE 22,35,36 2 TBD 0,1 Null Null 54 | -------------------------------------------------------------------------------- /x86data/perfmon_data/KNM/readme.txt: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel(R) Xeon Phi Processor Family based on the Intel(R) Many Integrated Core Architecture 2 | # 03/06/2018 7:56:29 AM 3 | # Copyright (c) 2007 - 2016 Intel Corporation. All rights reserved. 4 | 5 | ****************Note************************** 6 | 7 | Please use KNL perfmon events for KNM. 8 | 9 | ********************************************* 10 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKL/skylake_fp_arith_inst_v42.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "BitName": "SCALAR_DOUBLE", 4 | "BitIndex": "0", 5 | "FlopsMultiplier": "1", 6 | "Description": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 7 | }, 8 | { 9 | "BitName": "SCALAR_SINGLE", 10 | "BitIndex": "1", 11 | "FlopsMultiplier": "1", 12 | "Description": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 13 | }, 14 | { 15 | "BitName": "128BIT_PACKED_DOUBLE", 16 | "BitIndex": "2", 17 | "FlopsMultiplier": "2", 18 | "Description": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 19 | }, 20 | { 21 | "BitName": "128BIT_PACKED_SINGLE", 22 | "BitIndex": "3", 23 | "FlopsMultiplier": "4", 24 | "Description": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 25 | }, 26 | { 27 | "BitName": "256BIT_PACKED_DOUBLE", 28 | "BitIndex": "4", 29 | "FlopsMultiplier": "4", 30 | "Description": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 31 | }, 32 | { 33 | "BitName": "256BIT_PACKED_SINGLE", 34 | "BitIndex": "5", 35 | "FlopsMultiplier": "8", 36 | "Description": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element." 37 | } 38 | ] -------------------------------------------------------------------------------- /x86data/perfmon_data/SKL/skylake_fp_arith_inst_v42.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Sixth Generation Intel Core Processors Based on the Skylake Microarchitecture - V42 2 | # 8/6/2018 3:30:52 PM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | SCALAR_DOUBLE 0 1 Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 7 | SCALAR_SINGLE 1 1 Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 8 | 128BIT_PACKED_DOUBLE 2 2 Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 9 | 128BIT_PACKED_SINGLE 3 4 Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 10 | 256BIT_PACKED_DOUBLE 4 4 Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 11 | 256BIT_PACKED_SINGLE 5 8 Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 12 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKL/skylake_matrix_bit_definitions_v42.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "BitName": "DEMAND_DATA_RD", 4 | "BitIndex": "0", 5 | "Type": "1", 6 | "Description": "Counts demand data reads", 7 | "MATRIX_REG": "0,1", 8 | "BitsNotCombinedWith": "", 9 | "Errata": "na" 10 | }, 11 | { 12 | "BitName": "DEMAND_RFO", 13 | "BitIndex": "1", 14 | "Type": "1", 15 | "Description": "Counts all demand data writes (RFOs)", 16 | "MATRIX_REG": "0,1", 17 | "BitsNotCombinedWith": "", 18 | "Errata": "na" 19 | }, 20 | { 21 | "BitName": "DEMAND_CODE_RD", 22 | "BitIndex": "2", 23 | "Type": "1", 24 | "Description": "Counts all demand code reads", 25 | "MATRIX_REG": "0,1", 26 | "BitsNotCombinedWith": "", 27 | "Errata": "na" 28 | }, 29 | { 30 | "BitName": "OTHER", 31 | "BitIndex": "15", 32 | "Type": "1", 33 | "Description": "Counts any other requests", 34 | "MATRIX_REG": "0,1", 35 | "BitsNotCombinedWith": "", 36 | "Errata": "na" 37 | }, 38 | { 39 | "BitName": "ANY_RESPONSE", 40 | "BitIndex": "16", 41 | "Type": "2", 42 | "Description": "have any response type.", 43 | "MATRIX_REG": "0,1", 44 | "BitsNotCombinedWith": "", 45 | "Errata": "na" 46 | }, 47 | { 48 | "BitName": "SUPPLIER_NONE", 49 | "BitIndex": "17", 50 | "Type": "3", 51 | "Description": null, 52 | "MATRIX_REG": "0,1", 53 | "BitsNotCombinedWith": "", 54 | "Errata": "na" 55 | }, 56 | { 57 | "BitName": "L3_HIT_M", 58 | "BitIndex": "18", 59 | "Type": "3", 60 | "Description": null, 61 | "MATRIX_REG": "0,1", 62 | "BitsNotCombinedWith": "", 63 | "Errata": "na" 64 | }, 65 | { 66 | "BitName": "L3_HIT_E", 67 | "BitIndex": "19", 68 | "Type": "3", 69 | "Description": null, 70 | "MATRIX_REG": "0,1", 71 | "BitsNotCombinedWith": "", 72 | "Errata": "na" 73 | }, 74 | { 75 | "BitName": "L3_HIT_S", 76 | "BitIndex": "20", 77 | "Type": "3", 78 | "Description": null, 79 | "MATRIX_REG": "0,1", 80 | "BitsNotCombinedWith": "", 81 | "Errata": "na" 82 | }, 83 | { 84 | "BitName": "L3_HIT", 85 | "BitIndex": "18,19,20", 86 | "Type": "3", 87 | "Description": null, 88 | "MATRIX_REG": "0,1", 89 | "BitsNotCombinedWith": "", 90 | "Errata": "na" 91 | }, 92 | { 93 | "BitName": "L4_HIT_LOCAL_L4", 94 | "BitIndex": "22", 95 | "Type": "3", 96 | "Description": null, 97 | "MATRIX_REG": "0,1", 98 | "BitsNotCombinedWith": "", 99 | "Errata": "na" 100 | }, 101 | { 102 | "BitName": "L3_MISS_LOCAL_DRAM", 103 | "BitIndex": "26", 104 | "Type": "3", 105 | "Description": null, 106 | "MATRIX_REG": "0,1", 107 | "BitsNotCombinedWith": "", 108 | "Errata": "na" 109 | }, 110 | { 111 | "BitName": "L3_MISS", 112 | "BitIndex": "26,27,28,29", 113 | "Type": "3", 114 | "Description": null, 115 | "MATRIX_REG": "0,1", 116 | "BitsNotCombinedWith": "", 117 | "Errata": "na" 118 | }, 119 | { 120 | "BitName": "SPL_HIT", 121 | "BitIndex": "30", 122 | "Type": "4", 123 | "Description": null, 124 | "MATRIX_REG": "0,1", 125 | "BitsNotCombinedWith": "", 126 | "Errata": "na" 127 | }, 128 | { 129 | "BitName": "SNOOP_NONE", 130 | "BitIndex": "31", 131 | "Type": "4", 132 | "Description": null, 133 | "MATRIX_REG": "0,1", 134 | "BitsNotCombinedWith": "", 135 | "Errata": "na" 136 | }, 137 | { 138 | "BitName": "SNOOP_NOT_NEEDED", 139 | "BitIndex": "32", 140 | "Type": "4", 141 | "Description": null, 142 | "MATRIX_REG": "0,1", 143 | "BitsNotCombinedWith": "", 144 | "Errata": "na" 145 | }, 146 | { 147 | "BitName": "SNOOP_MISS", 148 | "BitIndex": "33", 149 | "Type": "4", 150 | "Description": null, 151 | "MATRIX_REG": "0,1", 152 | "BitsNotCombinedWith": "", 153 | "Errata": "na" 154 | }, 155 | { 156 | "BitName": "SNOOP_HIT_NO_FWD", 157 | "BitIndex": "34", 158 | "Type": "4", 159 | "Description": null, 160 | "MATRIX_REG": "0,1", 161 | "BitsNotCombinedWith": "", 162 | "Errata": "na" 163 | }, 164 | { 165 | "BitName": "SNOOP_HIT_WITH_FWD", 166 | "BitIndex": "35", 167 | "Type": "4", 168 | "Description": null, 169 | "MATRIX_REG": "0,1", 170 | "BitsNotCombinedWith": "18,19,20,21", 171 | "Errata": "na" 172 | }, 173 | { 174 | "BitName": "SNOOP_HITM", 175 | "BitIndex": "36", 176 | "Type": "4", 177 | "Description": null, 178 | "MATRIX_REG": "0,1", 179 | "BitsNotCombinedWith": "", 180 | "Errata": "na" 181 | }, 182 | { 183 | "BitName": "SNOOP_NON_DRAM", 184 | "BitIndex": "37", 185 | "Type": "4", 186 | "Description": null, 187 | "MATRIX_REG": "0,1", 188 | "BitsNotCombinedWith": "", 189 | "Errata": "na" 190 | }, 191 | { 192 | "BitName": "ANY_SNOOP", 193 | "BitIndex": "30,31,32,33,34,35,36,37", 194 | "Type": "4", 195 | "Description": null, 196 | "MATRIX_REG": "0,1", 197 | "BitsNotCombinedWith": "", 198 | "Errata": "na" 199 | } 200 | ] -------------------------------------------------------------------------------- /x86data/perfmon_data/SKL/skylake_matrix_bit_definitions_v42.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Sixth Generation Intel Core Processors Based on the Skylake Microarchitecture - V42 2 | # 8/6/2018 3:30:52 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | OTHER 15 1 Counts any other requests 0,1 na 9 | ANY_RESPONSE 16 2 have any response type. 0,1 na 10 | SUPPLIER_NONE 17 3 0,1 na 11 | L3_HIT_M 18 3 0,1 na 12 | L3_HIT_E 19 3 0,1 na 13 | L3_HIT_S 20 3 0,1 na 14 | L3_HIT 18,19,20 3 0,1 na 15 | L4_HIT_LOCAL_L4 22 3 0,1 na 16 | L3_MISS_LOCAL_DRAM 26 3 0,1 na 17 | L3_MISS 26,27,28,29 3 0,1 na 18 | SPL_HIT 30 4 0,1 na 19 | SNOOP_NONE 31 4 0,1 na 20 | SNOOP_NOT_NEEDED 32 4 0,1 na 21 | SNOOP_MISS 33 4 0,1 na 22 | SNOOP_HIT_NO_FWD 34 4 0,1 na 23 | SNOOP_HIT_WITH_FWD 35 4 0,1 18,19,20,21 na 24 | SNOOP_HITM 36 4 0,1 na 25 | SNOOP_NON_DRAM 37 4 0,1 na 26 | ANY_SNOOP 30,31,32,33,34,35,36,37 4 0,1 na 27 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKL/skylake_matrix_v42.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Sixth Generation Intel Core Processors Based on the Skylake Microarchitecture - V42 2 | # 8/6/2018 3:30:52 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | OTHER Null 0x8000 0,1 Counts any other requests 9 | Null ANY_RESPONSE 0x000001 0,1 have any response type. 10 | Null SUPPLIER_NONE.SPL_HIT 0x004002 0,1 tbd 11 | Null SUPPLIER_NONE.SNOOP_NONE 0x008002 0,1 tbd 12 | Null SUPPLIER_NONE.SNOOP_NOT_NEEDED 0x010002 0,1 tbd 13 | Null SUPPLIER_NONE.SNOOP_MISS 0x020002 0,1 tbd 14 | Null SUPPLIER_NONE.SNOOP_HIT_NO_FWD 0x040002 0,1 tbd 15 | Null SUPPLIER_NONE.SNOOP_HITM 0x100002 0,1 tbd 16 | Null SUPPLIER_NONE.SNOOP_NON_DRAM 0x200002 0,1 tbd 17 | Null SUPPLIER_NONE.ANY_SNOOP 0x3fc002 0,1 tbd 18 | Null L3_HIT_M.SPL_HIT 0x004004 0,1 tbd 19 | Null L3_HIT_M.SNOOP_NONE 0x008004 0,1 tbd 20 | Null L3_HIT_M.SNOOP_NOT_NEEDED 0x010004 0,1 tbd 21 | Null L3_HIT_M.SNOOP_MISS 0x020004 0,1 tbd 22 | Null L3_HIT_M.SNOOP_HIT_NO_FWD 0x040004 0,1 tbd 23 | Null L3_HIT_M.SNOOP_HITM 0x100004 0,1 tbd 24 | Null L3_HIT_M.SNOOP_NON_DRAM 0x200004 0,1 tbd 25 | Null L3_HIT_M.ANY_SNOOP 0x3fc004 0,1 tbd 26 | Null L3_HIT_E.SPL_HIT 0x004008 0,1 tbd 27 | Null L3_HIT_E.SNOOP_NONE 0x008008 0,1 tbd 28 | Null L3_HIT_E.SNOOP_NOT_NEEDED 0x010008 0,1 tbd 29 | Null L3_HIT_E.SNOOP_MISS 0x020008 0,1 tbd 30 | Null L3_HIT_E.SNOOP_HIT_NO_FWD 0x040008 0,1 tbd 31 | Null L3_HIT_E.SNOOP_HITM 0x100008 0,1 tbd 32 | Null L3_HIT_E.SNOOP_NON_DRAM 0x200008 0,1 tbd 33 | Null L3_HIT_E.ANY_SNOOP 0x3fc008 0,1 tbd 34 | Null L3_HIT_S.SPL_HIT 0x004010 0,1 tbd 35 | Null L3_HIT_S.SNOOP_NONE 0x008010 0,1 tbd 36 | Null L3_HIT_S.SNOOP_NOT_NEEDED 0x010010 0,1 tbd 37 | Null L3_HIT_S.SNOOP_MISS 0x020010 0,1 tbd 38 | Null L3_HIT_S.SNOOP_HIT_NO_FWD 0x040010 0,1 tbd 39 | Null L3_HIT_S.SNOOP_HITM 0x100010 0,1 tbd 40 | Null L3_HIT_S.SNOOP_NON_DRAM 0x200010 0,1 tbd 41 | Null L3_HIT_S.ANY_SNOOP 0x3fc010 0,1 tbd 42 | Null L3_HIT.SPL_HIT 0x00401c 0,1 tbd 43 | Null L3_HIT.SNOOP_NONE 0x00801c 0,1 tbd 44 | Null L3_HIT.SNOOP_NOT_NEEDED 0x01001c 0,1 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. 45 | Null L3_HIT.SNOOP_MISS 0x02001c 0,1 hit in the L3 and the snoops sent to sibling cores return clean response. 46 | Null L3_HIT.SNOOP_HIT_NO_FWD 0x04001c 0,1 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. 47 | Null L3_HIT.SNOOP_HITM 0x10001c 0,1 tbd 48 | Null L3_HIT.SNOOP_NON_DRAM 0x20001c 0,1 tbd 49 | Null L3_HIT.ANY_SNOOP 0x3fc01c 0,1 tbd 50 | Null L4_HIT_LOCAL_L4.SPL_HIT 0x004040 0,1 tbd 51 | Null L4_HIT_LOCAL_L4.SNOOP_NONE 0x008040 0,1 tbd 52 | Null L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED 0x010040 0,1 tbd 53 | Null L4_HIT_LOCAL_L4.SNOOP_MISS 0x020040 0,1 tbd 54 | Null L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD 0x040040 0,1 tbd 55 | Null L4_HIT_LOCAL_L4.SNOOP_HIT_WITH_FWD 0x080040 0,1 tbd 56 | Null L4_HIT_LOCAL_L4.SNOOP_HITM 0x100040 0,1 tbd 57 | Null L4_HIT_LOCAL_L4.SNOOP_NON_DRAM 0x200040 0,1 tbd 58 | Null L4_HIT_LOCAL_L4.ANY_SNOOP 0x3fc040 0,1 tbd 59 | Null L3_MISS_LOCAL_DRAM.SPL_HIT 0x004400 0,1 tbd 60 | Null L3_MISS_LOCAL_DRAM.SNOOP_NONE 0x008400 0,1 tbd 61 | Null L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED 0x010400 0,1 tbd 62 | Null L3_MISS_LOCAL_DRAM.SNOOP_MISS 0x020400 0,1 tbd 63 | Null L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD 0x040400 0,1 tbd 64 | Null L3_MISS_LOCAL_DRAM.SNOOP_HITM 0x100400 0,1 tbd 65 | Null L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM 0x200400 0,1 tbd 66 | Null L3_MISS_LOCAL_DRAM.ANY_SNOOP 0x3fc400 0,1 tbd 67 | Null L3_MISS.SPL_HIT 0x007c40 0,1 tbd 68 | Null L3_MISS.SNOOP_NONE 0x00bc40 0,1 tbd 69 | Null L3_MISS.SNOOP_NOT_NEEDED 0x013c40 0,1 tbd 70 | Null L3_MISS.SNOOP_MISS 0x023c40 0,1 tbd 71 | Null L3_MISS.SNOOP_HIT_NO_FWD 0x043c40 0,1 tbd 72 | Null L3_MISS.SNOOP_HITM 0x103c40 0,1 tbd 73 | Null L3_MISS.SNOOP_NON_DRAM 0x203c40 0,1 tbd 74 | Null L3_MISS.ANY_SNOOP 0x3ffc40 0,1 tbd 75 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKL/skylake_uncore_v42.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Sixth Generation Intel Core Processors Based on the Skylake Microarchitecture - V42 2 | # 8/6/2018 3:30:52 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask EventName Description Counter CounterMask Invert EdgeDetect 5 | CBO 0x22 0x41 UNC_CBO_XSNP_RESPONSE.MISS_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core. 0,1 0 0 0 6 | CBO 0x22 0x81 UNC_CBO_XSNP_RESPONSE.MISS_EVICTION A cross-core snoop resulted from L3 Eviction which misses in some processor core. 0,1 0 0 0 7 | CBO 0x22 0x44 UNC_CBO_XSNP_RESPONSE.HIT_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core. 0,1 0 0 0 8 | CBO 0x22 0x48 UNC_CBO_XSNP_RESPONSE.HITM_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core. 0,1 0 0 0 9 | CBO 0x34 0x21 UNC_CBO_CACHE_LOOKUP.WRITE_M L3 Lookup write request that access cache and found line in M-state 0,1 0 0 0 10 | CBO 0x34 0x81 UNC_CBO_CACHE_LOOKUP.ANY_M L3 Lookup any request that access cache and found line in M-state 0,1 0 0 0 11 | CBO 0x34 0x18 UNC_CBO_CACHE_LOOKUP.READ_I L3 Lookup read request that access cache and found line in I-state 0,1 0 0 0 12 | CBO 0x34 0x88 UNC_CBO_CACHE_LOOKUP.ANY_I L3 Lookup any request that access cache and found line in I-state 0,1 0 0 0 13 | CBO 0x34 0x1f UNC_CBO_CACHE_LOOKUP.READ_MESI L3 Lookup read request that access cache and found line in any MESI-state 0,1 0 0 0 14 | CBO 0x34 0x2f UNC_CBO_CACHE_LOOKUP.WRITE_MESI L3 Lookup write request that access cache and found line in MESI-state 0,1 0 0 0 15 | CBO 0x34 0x8f UNC_CBO_CACHE_LOOKUP.ANY_MESI L3 Lookup any request that access cache and found line in MESI-state 0,1 0 0 0 16 | CBO 0x34 0x86 UNC_CBO_CACHE_LOOKUP.ANY_ES L3 Lookup any request that access cache and found line in E or S-state 0,1 0 0 0 17 | CBO 0x34 0x16 UNC_CBO_CACHE_LOOKUP.READ_ES L3 Lookup read request that access cache and found line in E or S-state 0,1 0 0 0 18 | CBO 0x34 0x26 UNC_CBO_CACHE_LOOKUP.WRITE_ES L3 Lookup write request that access cache and found line in E or S-state 0,1 0 0 0 19 | NCU 0x0 0x01 UNC_CLOCK.SOCKET This 48-bit fixed counter counts the UCLK cycles FIXED 0 0 0 20 | iMPH-U 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.ALL Number of all Core entries outstanding for the memory controller. The outstanding interval starts after LLC miss till return of first data chunk. Accounts for Coherent and non-coherent traffic. 0 0 0 0 21 | iMPH-U 0x81 0x01 UNC_ARB_TRK_REQUESTS.ALL Total number of Core outgoing entries allocated. Accounts for Coherent and non-coherent traffic. 0,1 0 0 0 22 | iMPH-U 0x81 0x02 UNC_ARB_TRK_REQUESTS.DRD_DIRECT Number of Core coherent Data Read requests sent to memory controller whose data is returned directly to requesting agent. 0,1 0 0 0 23 | iMPH-U 0x81 0x20 UNC_ARB_TRK_REQUESTS.WRITES Number of Writes allocated - any write transactions: full/partials writes and evictions. 0,1 0 0 0 24 | iMPH-U 0x84 0x01 UNC_ARB_COH_TRK_REQUESTS.ALL Number of entries allocated. Account for Any type: e.g. Snoop, Core aperture, etc. 0,1 0 0 0 25 | iMPH-U 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC. 0 1 0 0 26 | iMPH-U 0x80 0x02 UNC_ARB_TRK_OCCUPANCY.DATA_READ Number of Core Data Read entries outstanding for the memory controller. The outstanding interval starts after LLC miss till return of first data chunk. 0 0 0 0 27 | iMPH-U 0x81 0x02 UNC_ARB_TRK_REQUESTS.DATA_READ Number of Core coherent Data Read requests sent to memory controller whose data is returned directly to requesting agent. 0,1 0 0 0 28 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKX/skylakex_fp_arith_inst_v1.12.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Xeon Processors Based on the Skylake Microarchitecture - V1.12 2 | # 8/6/2018 10:15:59 PM 3 | # Intel Confidential. Do not distribute. 4 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 5 | BitName BitIndex FlopsMultiplier Description 6 | SCALAR_DOUBLE 0 1 Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 7 | SCALAR_SINGLE 1 1 Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 8 | 128BIT_PACKED_DOUBLE 2 2 Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 9 | 128BIT_PACKED_SINGLE 3 4 Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 10 | 256BIT_PACKED_DOUBLE 4 4 Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 11 | 256BIT_PACKED_SINGLE 5 8 Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 12 | SCALAR_DOUBLE 0 1 Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 13 | SCALAR_SINGLE 1 1 Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 14 | 128BIT_PACKED_DOUBLE 2 2 Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 15 | 128BIT_PACKED_SINGLE 3 4 Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 16 | 256BIT_PACKED_DOUBLE 4 4 Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 17 | 256BIT_PACKED_SINGLE 5 8 Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. 18 | 512BIT_PACKED_DOUBLE 6 8 19 | 512BIT_PACKED_SINGLE 7 16 20 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKX/skylakex_matrix_bit_definitions_v1.12.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Xeon Processors Based on the Skylake Microarchitecture - V1.12 2 | # 8/6/2018 10:15:59 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na na 7 | DEMAND_CODE_RD 2 1 Counts demand instruction fetches and L1 instruction cache prefetches that 0,1 na na 8 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na na 9 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 na na 10 | PF_L3_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 na na 11 | PF_L3_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 na na 12 | PF_L1D_AND_SW 10 1 Counts L1 data cache hardware prefetch requests and software prefetch requests 0,1 na na 13 | OTHER 15 1 Counts any other requests 0,1 na na 14 | ALL_PF_DATA_RD 4,7,10 1 TBD 0,1 na na 15 | ALL_PF_RFO 5,8 1 TBD 0,1 na na 16 | ALL_DATA_RD 0,4,7,10 1 TBD 0,1 na na 17 | ALL_RFO 1,5,8 1 TBD 0,1 na na 18 | ALL_READS 0,1,2,4,5,6,7,8,9,10 1 TBD 0,1 na na 19 | ANY_RESPONSE 16 2 have any response type. 0,1 na na 20 | SUPPLIER_NONE 17 3 0,1 na na 21 | L3_HIT_M 18 3 0,1 na na 22 | L3_HIT_E 19 3 0,1 na na 23 | L3_HIT_S 20 3 0,1 na na 24 | L3_HIT_F 21 3 0,1 na na 25 | L3_HIT 18,19,20,21 3 TBD 0,1 na na 26 | L3_MISS_LOCAL_DRAM 26 3 0,1 na na 27 | L3_MISS_REMOTE_HOP1_DRAM 28 3 0,1 na na 28 | L3_MISS 26,27,28,29 3 TBD 0,1 na na 29 | SNOOP_NONE 31 4 0,1 na na 30 | NO_SNOOP_NEEDED 32 4 TBD 0,1 na na 31 | SNOOP_MISS 33 4 0,1 na na 32 | HIT_OTHER_CORE_NO_FWD 34 4 TBD 0,1 na na 33 | HIT_OTHER_CORE_FWD 35 4 TBD 0,1 18,19,20,21 na 34 | HITM_OTHER_CORE 36 4 TBD 0,1 na na 35 | ANY_SNOOP 31,32,33,34,35,36,37 4 TBD 0,1 na na 36 | L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD 23,24,25,27,28,29,33,34 2 TBD 0,1 na na 37 | L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD 26,33,34 2 TBD 0,1 na na 38 | L3_HIT.NO_SNOOP_NEEDED 18,19,20,21,32 2 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. tbd na na 39 | L3_HIT.HIT_OTHER_CORE_NO_FWD 18,19,20,21,34 2 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. tbd na na 40 | L3_HIT.HIT_OTHER_CORE_FWD 18,19,20,21,35 2 hit in the L3 and the snoop to one of the sibling cores hits the line in E/S/F state and the line is forwarded. tbd na na 41 | L3_HIT.HITM_OTHER_CORE 18,19,20,21,36 2 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. tbd na na 42 | L3_HIT.ANY_SNOOP 18,19,20,21,31,32,33,34,35,36,37 2 hit in the L3. tbd na na 43 | L3_MISS.ANY_SNOOP 26,27,28,29,31,32,33,34,35,36,37 2 miss in the L3. tbd na na 44 | L3_HIT.SNOOP_HIT_WITH_FWD 18, 19, 20, 21, 22, 35 2 0,1 18,19,20,21 na 45 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SKX/skylakex_matrix_v1.12.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Xeon Processors Based on the Skylake Microarchitecture - V1.12 2 | # 8/6/2018 10:15:59 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 9 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 10 | PF_L3_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 11 | PF_L3_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 12 | PF_L1D_AND_SW Null 0x0400 0,1 Counts L1 data cache hardware prefetch requests and software prefetch requests 13 | OTHER Null 0x8000 0,1 Counts any other requests 14 | ALL_PF_DATA_RD Null 0x0490 0,1 Counts all prefetch data reads 15 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 16 | ALL_DATA_RD Null 0x0491 0,1 Counts all demand & prefetch data reads 17 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 18 | ALL_READS Null 0x07f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 19 | Null ANY_RESPONSE 0x000001 0,1 have any response type. 20 | Null L3_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. 21 | Null L3_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. 22 | Null L3_HIT.HIT_OTHER_CORE_FWD 0x08003c 0,1 hit in the L3 and the snoop to one of the sibling cores hits the line in E/S/F state and the line is forwarded. 23 | Null L3_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. 24 | Null L3_HIT.ANY_SNOOP 0x3f803c 0,1 hit in the L3. 25 | Null L3_MISS.ANY_SNOOP 0x3fbc00 0,1 miss in the L3. 26 | Null L3_MISS.REMOTE_HIT_FORWARD 0x083fc0 0,1 miss the L3 and clean or shared data is transferred from remote cache. 27 | Null L3_MISS.REMOTE_HITM 0x103fc0 0,1 miss the L3 and the modified data is transferred from remote cache. 28 | Null L3_MISS.SNOOP_MISS_OR_NO_FWD 0x063fc0 0,1 miss the L3 and the data is returned from local or remote dram. 29 | Null L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD 0x063b80 0,1 miss the L3 and the data is returned from remote dram. 30 | Null L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD 0x060400 0,1 miss the L3 and the data is returned from local dram. 31 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SLM/Silvermont_matrix_V14.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Silvermont Microarchitecture - V14 2 | # 3/14/2017 1:13:51 PM 3 | # Copyright (c) 2007 - 2016 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand and DCU prefetch data read 6 | DEMAND_RFO Null 0x0002 0,1 Counts demand and DCU prefetch RFOs 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts demand and DCU prefetch instruction cacheline 8 | COREWB Null 0x0008 0,1 Counts writeback (modified to exclusive) 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts data cacheline reads generated by L2 prefetchers 10 | PF_L2_RFO Null 0x0020 0,1 Counts RFO requests generated by L2 prefetchers 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts code reads generated by L2 prefetchers 12 | PARTIAL_READS Null 0x0080 0,1 Counts demand reads of partial cache lines (including UC and WC) 13 | PARTIAL_WRITES Null 0x0100 0,1 Countsof demand RFO requests to write to partial cache lines 14 | UC_CODE_READS Null 0x0200 0,1 Counts UC instruction fetch 15 | BUS_LOCKS Null 0x0400 0,1 Bus lock and split lock 16 | PF_L1_DATA_RD Null 0x2000 0,1 Counts DCU hardware prefetcher data read 17 | ANY_REQUEST Null 0x8008 0,1 Counts any request 18 | STREAMING_STORES Null 0x4800 0,1 Counts streaming store 19 | ANY_DATA_RD Null 0x3091 0,1 Counts any data read (demand & prefetch) 20 | ANY_RFO Null 0x0022 0,1 Counts any rfo reads (demand & prefetch) 21 | ANY_CODE_RD Null 0x0044 0,1 Counts any code reads (demand & prefetch) 22 | ANY_READS Null 0x32f7 0,1 Counts any data/code/rfo reads (demand & prefetch) 23 | ANY_PF_L2 Null 0x0070 0,1 Counts any prefetch read 24 | Null ANY_RESPONSE 0x000001 0,1 have any response type. 25 | Null L2_MISS.NO_SNOOP_NEEDED 0x008000 0,1 miss L2 with no details on snoop-related information. 26 | Null L2_MISS.SNOOP_MISS 0x020000 0,1 miss L2 with a snoop miss response. 27 | Null L2_MISS.HIT_OTHER_CORE_NO_FWD 0x040000 0,1 miss L2 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. 28 | Null L2_MISS.HITM_OTHER_CORE 0x100000 0,1 hit in the other module where modified copies were found in other core's L1 cache. 29 | Null L2_MISS.NON_DRAM 0x200000 0,1 miss L2 and the target was non-DRAM system address. 30 | Null L2_MISS.ANY 0x168000 0,1 miss L2. 31 | Null OUTSTANDING 0x400000 0 are outstanding, per cycle, from the time of the L2 miss to when any response is received. 32 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SNB/sandybridge_matrix_bit_definitions_v16.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Second Generation Intel Core Processors Based on the Sandy Bridge Microarchitecture - V16 2 | # 3/5/2018 10:22:58 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | BitName BitIndex Type Description MATRIX_REG BitsNotCombinedWith Errata 5 | DEMAND_DATA_RD 0 1 Counts demand data reads 0,1 na 6 | DEMAND_RFO 1 1 Counts all demand data writes (RFOs) 0,1 na 7 | DEMAND_CODE_RD 2 1 Counts all demand code reads 0,1 na 8 | COREWB 3 1 Counts core writebacks due to L2 evictions or L1 writeback requests 0,1 na 9 | PF_L2_DATA_RD 4 1 Counts prefetch (that bring data to L2) data reads 0,1 na 10 | PF_L2_RFO 5 1 Counts all prefetch (that bring data to L2) RFOs 0,1 na 11 | PF_L2_CODE_RD 6 1 Counts all prefetch (that bring data to LLC only) code reads 0,1 na 12 | PF_LLC_DATA_RD 7 1 Counts all prefetch (that bring data to LLC only) data reads 0,1 na 13 | PF_LLC_RFO 8 1 Counts all prefetch (that bring data to LLC only) RFOs 0,1 na 14 | PF_LLC_CODE_RD 9 1 Counts prefetch (that bring data to LLC only) code reads 0,1 na 15 | OTHER 15 1 Counts any other requests 0,1 na 16 | ALL_PF_DATA_RD 4,7 1 Counts all prefetch data reads 0,1 na 17 | ALL_PF_RFO 5,8 1 Counts prefetch RFOs 0,1 na 18 | ALL_PF_CODE_RD 6,9 1 Counts all prefetch code reads 0,1 na 19 | ALL_DATA_RD 0,4,7 1 Counts all demand & prefetch data reads 0,1 na 20 | ALL_RFO 1,5,8 1 Counts all demand & prefetch RFOs 0,1 na 21 | ALL_CODE_RD 2,6,9 1 Counts all demand & prefetch code reads 0,1 na 22 | ALL_READS 0,1,2,4,5,6,7,8,9 1 Counts all data/code/rfo reads (demand & prefetch) 0,1 na 23 | ALL_REQUESTS 0,1,2,3,4,5,6,7,8,9,10,11,15 1 Counts all requests 0,1 na 24 | ANY_RESPONSE 16 2 have any response type. 0,1 na 25 | SUPPLIER_NONE 17 3 tbd 0,1 na 26 | LLC_HIT_M 18 3 tbd 0,1 na 27 | LLC_HIT_E 19 3 tbd 0,1 na 28 | LLC_HIT_S 20 3 tbd 0,1 na 29 | LLC_HIT_F 21 3 tbd 0,1 na 30 | LLC_HIT 18,19,20,21 3 tbd 0,1 na 31 | L3_MISS_LOCAL_DRAM 22 3 tbd 0,1 na 32 | L3_MISS_REMOTE_DRAM 22,23,24,25,26,27,28,29,30 3 tbd 0,1 na 33 | SNOOP_NONE 31 4 tbd 0,1 na 34 | SNOOP_NOT_NEEDED 32 4 tbd 0,1 na 35 | SNOOP_MISS 33 4 tbd 0,1 na 36 | SNOOP_HIT_NO_FWD 34 4 tbd 0,1 na 37 | SNOOP_HIT_WITH_FWD 35 4 tbd 0,1 18,19,20,21 na 38 | SNOOP_HITM 36 4 tbd 0,1 na 39 | SNOOP_NON_DRAM 37 4 tbd 0,1 na 40 | ANY_SNOOP 31,32,33,34,35,36,37 4 tbd 0,1 na 41 | LLC_HIT.NO_SNOOP_NEEDED 18,19,20,21,32 2 hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 0,1 na na 42 | LLC_HIT.HIT_OTHER_CORE_NO_FWD 18,19,20,21,34 2 hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded 0,1 na na 43 | LLC_HIT.HITM_OTHER_CORE 18,19,20,21,36 2 hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 0,1 na na 44 | LLC_MISS.ANY_RESPONSE 22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37 2 miss in the LLC 0,1 na na 45 | LLC_MISS.LOCAL_DRAM 22,33,34 2 miss the LLC and the data returned from local dram 0,1 na na 46 | LLC_MISS.ANY_DRAM 22,23,24,25,26,27,28,29,30,33,34 2 miss the LLC and the data returned from local or remote dram 0,1 na na 47 | LLC_HIT.ANY_RESPONSE 18,19,20,21,31,32,33,34,35,36,37 2 hit in the LLC 0,1 na na 48 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SNB/sandybridge_matrix_v16.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Second Generation Intel Core Processors Based on the Sandy Bridge Microarchitecture - V16 2 | # 2/28/2018 4:14:33 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | MATRIX_REQUEST MATRIX_RESPONSE MATRIX_VALUE MATRIX_REGISTER DESCRIPTION 5 | DEMAND_DATA_RD Null 0x0001 0,1 Counts demand data reads 6 | DEMAND_RFO Null 0x0002 0,1 Counts all demand data writes (RFOs) 7 | DEMAND_CODE_RD Null 0x0004 0,1 Counts all demand code reads 8 | COREWB Null 0x0008 0,1 Counts core writebacks due to L2 evictions or L1 writeback requests 9 | PF_L2_DATA_RD Null 0x0010 0,1 Counts prefetch (that bring data to L2) data reads 10 | PF_L2_RFO Null 0x0020 0,1 Counts all prefetch (that bring data to L2) RFOs 11 | PF_L2_CODE_RD Null 0x0040 0,1 Counts all prefetch (that bring data to LLC only) code reads 12 | PF_LLC_DATA_RD Null 0x0080 0,1 Counts all prefetch (that bring data to LLC only) data reads 13 | PF_LLC_RFO Null 0x0100 0,1 Counts all prefetch (that bring data to LLC only) RFOs 14 | PF_LLC_CODE_RD Null 0x0200 0,1 Counts prefetch (that bring data to LLC only) code reads 15 | OTHER Null 0x8000 0,1 Counts any other requests 16 | ALL_PF_DATA_RD Null 0x0090 0,1 Counts all prefetch data reads 17 | ALL_PF_RFO Null 0x0120 0,1 Counts prefetch RFOs 18 | ALL_PF_CODE_RD Null 0x0240 0,1 Counts all prefetch code reads 19 | ALL_DATA_RD Null 0x0091 0,1 Counts all demand & prefetch data reads 20 | ALL_RFO Null 0x0122 0,1 Counts all demand & prefetch RFOs 21 | ALL_CODE_RD Null 0x0244 0,1 Counts all demand & prefetch code reads 22 | ALL_READS Null 0x03f7 0,1 Counts all data/code/rfo reads (demand & prefetch) 23 | ALL_REQUESTS Null 0x8fff 0,1 Counts all requests 24 | Null LLC_HIT.ANY_RESPONSE 0x3f803c 0,1 hit in the LLC 25 | Null LLC_HIT.NO_SNOOP_NEEDED 0x01003c 0,1 hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores 26 | Null LLC_HIT.SNOOP_MISS 0x02003c 0,1 hit in the LLC and the snoops sent to sibling cores return clean response 27 | Null LLC_HIT.HIT_OTHER_CORE_NO_FWD 0x04003c 0,1 hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded 28 | Null LLC_HIT.HITM_OTHER_CORE 0x10003c 0,1 hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded 29 | Null LLC_MISS.ANY_RESPONSE 0x3fffc0 0,1 miss in the LLC 30 | Null LLC_MISS.LOCAL_DRAM 0x060040 0,1 miss the LLC and the data returned from local dram 31 | Null LLC_MISS.ANY_DRAM 0x067fc0 0,1 miss the LLC and the data returned from local or remote dram 32 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SNB/sandybridge_uncore_v16.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for the Second Generation Intel Core Processors Based on the Sandy Bridge Microarchitecture - V16 2 | # 2/28/2018 4:14:33 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask EventName Description Counter CounterMask Invert EdgeDetect 5 | ARB 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.ALL Counts cycles weighted by the number of requests waiting for data returning from the memory controller. Accounts for coherent and non-coherent requests initiated by IA cores, processor graphic units, or LLC. 0 0 0 0 6 | ARB 0x81 0x01 UNC_ARB_TRK_REQUESTS.ALL Counts the number of coherent and in-coherent requests initiated by IA cores, processor graphic units, or LLC. 0,1 0 0 0 7 | ARB 0x81 0x20 UNC_ARB_TRK_REQUESTS.WRITES Counts the number of allocated write entries, include full, partial, and LLC evictions. 0,1 0 0 0 8 | ARB 0x81 0x80 UNC_ARB_TRK_REQUESTS.EVICTIONS Counts the number of LLC evictions allocated. 0,1 0 0 0 9 | ARB 0x83 0x01 UNC_ARB_COH_TRK_OCCUPANCY.ALL Cycles weighted by number of requests pending in Coherency Tracker. 0 0 0 0 10 | ARB 0x84 0x01 UNC_ARB_COH_TRK_REQUESTS.ALL Number of requests allocated in Coherency Tracker. 0,1 0 0 0 11 | ARB 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_WITH_ANY_REQUEST Cycles with at least one request outstanding is waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC. 0,1 1 0 0 12 | ARB 0x80 0x01 UNC_ARB_TRK_OCCUPANCY.CYCLES_OVER_HALF_FULL Cycles with at least half of the requests outstanding are waiting for data return from memory controller. Account for coherent and non-coherent requests initiated by IA Cores, Processor Graphics Unit, or LLC. 0,1 10 0 0 13 | ARB 0x0 0x01 UNC_CLOCK.SOCKET This 48-bit fixed counter counts the UCLK cycles. Fixed 0 0 0 14 | CBO 0x22 0x21 UNC_CBO_XSNP_RESPONSE.MISS_EXTERNAL An external snoop misses in some processor core. 0,1 0 0 0 15 | CBO 0x22 0x41 UNC_CBO_XSNP_RESPONSE.MISS_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which misses in some processor core. 0,1 0 0 0 16 | CBO 0x22 0x81 UNC_CBO_XSNP_RESPONSE.MISS_EVICTION A cross-core snoop resulted from L3 Eviction which misses in some processor core. 0,1 0 0 0 17 | CBO 0x22 0x24 UNC_CBO_XSNP_RESPONSE.HIT_EXTERNAL An external snoop hits a non-modified line in some processor core. 0,1 0 0 0 18 | CBO 0x22 0x44 UNC_CBO_XSNP_RESPONSE.HIT_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a non-modified line in some processor core. 0,1 0 0 0 19 | CBO 0x22 0x84 UNC_CBO_XSNP_RESPONSE.HIT_EVICTION A cross-core snoop resulted from L3 Eviction which hits a non-modified line in some processor core. 0,1 0 0 0 20 | CBO 0x22 0x28 UNC_CBO_XSNP_RESPONSE.HITM_EXTERNAL An external snoop hits a modified line in some processor core. 0,1 0 0 0 21 | CBO 0x22 0x48 UNC_CBO_XSNP_RESPONSE.HITM_XCORE A cross-core snoop initiated by this Cbox due to processor core memory request which hits a modified line in some processor core. 0,1 0 0 0 22 | CBO 0x22 0x88 UNC_CBO_XSNP_RESPONSE.HITM_EVICTION A cross-core snoop resulted from L3 Eviction which hits a modified line in some processor core. 0,1 0 0 0 23 | CBO 0x34 0x11 UNC_CBO_CACHE_LOOKUP.READ_M L3 Lookup read request that access cache and found line in M-state. 0,1 0 0 0 24 | CBO 0x34 0x21 UNC_CBO_CACHE_LOOKUP.WRITE_M L3 Lookup write request that access cache and found line in M-state. 0,1 0 0 0 25 | CBO 0x34 0x41 UNC_CBO_CACHE_LOOKUP.EXTSNP_M L3 Lookup external snoop request that access cache and found line in M-state. 0,1 0 0 0 26 | CBO 0x34 0x81 UNC_CBO_CACHE_LOOKUP.ANY_M L3 Lookup any request that access cache and found line in M-state. 0,1 0 0 0 27 | CBO 0x34 0x18 UNC_CBO_CACHE_LOOKUP.READ_I L3 Lookup read request that access cache and found line in I-state. 0,1 0 0 0 28 | CBO 0x34 0x28 UNC_CBO_CACHE_LOOKUP.WRITE_I L3 Lookup write request that access cache and found line in I-state. 0,1 0 0 0 29 | CBO 0x34 0x48 UNC_CBO_CACHE_LOOKUP.EXTSNP_I L3 Lookup external snoop request that access cache and found line in I-state. 0,1 0 0 0 30 | CBO 0x34 0x88 UNC_CBO_CACHE_LOOKUP.ANY_I L3 Lookup any request that access cache and found line in I-state. 0,1 0 0 0 31 | CBO 0x34 0x1f UNC_CBO_CACHE_LOOKUP.READ_MESI L3 Lookup read request that access cache and found line in any MESI-state. 0,1 0 0 0 32 | CBO 0x34 0x2f UNC_CBO_CACHE_LOOKUP.WRITE_MESI L3 Lookup write request that access cache and found line in MESI-state. 0,1 0 0 0 33 | CBO 0x34 0x4f UNC_CBO_CACHE_LOOKUP.EXTSNP_MESI L3 Lookup external snoop request that access cache and found line in MESI-state. 0,1 0 0 0 34 | CBO 0x34 0x8f UNC_CBO_CACHE_LOOKUP.ANY_MESI L3 Lookup any request that access cache and found line in MESI-state. 0,1 0 0 0 35 | CBO 0x34 0x86 UNC_CBO_CACHE_LOOKUP.ANY_ES L3 Lookup any request that access cache and found line in E or S-state. 0,1 0 0 0 36 | CBO 0x34 0x46 UNC_CBO_CACHE_LOOKUP.EXTSNP_ES L3 Lookup external snoop request that access cache and found line in E or S-state. 0,1 0 0 0 37 | CBO 0x34 0x16 UNC_CBO_CACHE_LOOKUP.READ_ES L3 Lookup read request that access cache and found line in E or S-state. 0,1 0 0 0 38 | CBO 0x34 0x26 UNC_CBO_CACHE_LOOKUP.WRITE_ES L3 Lookup write request that access cache and found line in E or S-state. 0,1 0 0 0 39 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SNR/snowridgex_core_v1.00.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Tremont Microarchitecture - V1.00 2 | # 4/5/2019 5:20:56 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | EventCode UMask EventName BriefDescription Counter PEBScounters SampleAfterValue MSRIndex MSRValue CollectPEBSRecord CounterMask Invert AnyThread EdgeDetect PEBS Data_LA Errata PDIR_COUNTER 5 | 0x00 0x01 INST_RETIRED.ANY Counts the number of instructions retired. (Fixed event) 32 32 2000003 0x00 0x00 2 0 0 0 0 1 0 0 0 6 | 0x00 0x02 CPU_CLK_UNHALTED.CORE Counts the number of unhalted core clock cycles. (Fixed event) 33 33 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 7 | 0x00 0x03 CPU_CLK_UNHALTED.REF_TSC Counts the number of unhalted reference clock cycles at TSC frequency. (Fixed event) 34 34 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 8 | 0x08 0x02 DTLB_LOAD_MISSES.WALK_COMPLETED_4K Page walk completed due to a demand load to a 4K page. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 0 0 0 na 9 | 0x08 0x04 DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M Page walk completed due to a demand load to a 2M or 4M page. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 0 0 0 na 10 | 0x2e 0x41 LONGEST_LAT_CACHE.MISS Counts memory requests originating from the core that miss in the last level cache. If the platform has an L3 cache, last level cache is the L3, otherwise it is the L2. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 0 0 0 na 11 | 0x2e 0x4f LONGEST_LAT_CACHE.REFERENCE Counts memory requests originating from the core that reference a cache line in the last level cache. If the platform has an L3 cache, last level cache is the L3, otherwise it is the L2. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 0 0 0 na 12 | 0x3c 0x00 CPU_CLK_UNHALTED.CORE_P Counts the number of unhalted core clock cycles. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 13 | 0x3c 0x01 CPU_CLK_UNHALTED.REF Counts the number of unhalted reference clock cycles at TSC frequency. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 14 | 0x49 0x02 DTLB_STORE_MISSES.WALK_COMPLETED_4K Page walk completed due to a demand data store to a 4K page. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 15 | 0x49 0x04 DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M Page walk completed due to a demand data store to a 2M or 4M page. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 16 | 0x81 0x04 ITLB.FILLS Counts the number of times there was an ITLB miss and a new translation was filled into the ITLB. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 0 0 0 na 17 | 0x85 0x02 ITLB_MISSES.WALK_COMPLETED_4K Page walk completed due to an instruction fetch in a 4K page. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 18 | 0x85 0x04 ITLB_MISSES.WALK_COMPLETED_2M_4M Page walk completed due to an instruction fetch in a 2M or 4M page. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 0 0 0 na 19 | 0xc0 0x00 INST_RETIRED.ANY_P Counts the number of instructions retired. 0,1,2,3 0,1,2,3 2000003 0x00 0x00 2 0 0 0 0 1 0 0 0 20 | 0xc4 0x00 BR_INST_RETIRED.ALL_BRANCHES Counts the number of branch instructions retired for all branch types. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 1 0 0 0 21 | 0xc5 0x00 BR_MISP_RETIRED.ALL_BRANCHES Counts the number of mispredicted branch instructions retired. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 1 0 0 0 22 | 0xd0 0x81 MEM_UOPS_RETIRED.ALL_LOADS Counts the number of load uops retired. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 1 1 0 0 23 | 0xd0 0x82 MEM_UOPS_RETIRED.ALL_STORES Counts the number of store uops retired. 0,1,2,3 0,1,2,3 200003 0x00 0x00 2 0 0 0 0 1 1 0 0 24 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SNR/snowridgex_offcore_v1.00.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Tremont Microarchitecture - V1.00 2 | # 4/5/2019 5:20:57 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask PortMask FCMask UMaskExt EventName Description Counter MSRValue ELLC Filter Internal Deprecated FILTER_VALUE 5 | MLC MLC 0XB7 0x01,0x02 0 0 0 OCR.DEMAND_DATA_RD.ANY_RESPONSE Counts demand data reads that have any response type. 0,1,2,3 0x000000000000010001 0 0 0 0 0 6 | MLC MLC 0XB7 0x01,0x02 0 0 0 OCR.DEMAND_DATA_RD.L3_MISS Counts demand data reads that was not supplied by the L3 cache. 0,1,2,3 0x000000003F04000001 0 0 0 0 0 7 | MLC MLC 0XB7 0x01,0x02 0 0 0 OCR.DEMAND_RFO.ANY_RESPONSE Counts all demand reads for ownership (RFO) requests and software based prefetches for exclusive ownership (PREFETCHW) that have any response type. 0,1,2,3 0x000000000000010002 0 0 0 0 0 8 | MLC MLC 0XB7 0x01,0x02 0 0 0 OCR.DEMAND_RFO.L3_MISS Counts all demand reads for ownership (RFO) requests and software based prefetches for exclusive ownership (PREFETCHW) that was not supplied by the L3 cache. 0,1,2,3 0x000000003F04000002 0 0 0 0 0 9 | -------------------------------------------------------------------------------- /x86data/perfmon_data/SNR/snowridgex_uncore_v1.00.tsv: -------------------------------------------------------------------------------- 1 | # Performance Monitoring Events for Intel Atom Processors Based on the Tremont Microarchitecture - V1.00 2 | # 4/5/2019 5:20:57 PM 3 | # Copyright (c) 2007 - 2017 Intel Corporation. All rights reserved. 4 | Unit EventCode UMask PortMask FCMask UMaskExt EventName Description Counter MSRValue ELLC Filter Internal Deprecated FILTER_VALUE 5 | CHA 0x00 0x00 0x00 0x00 0x00 UNC_CHA_CLOCKTICKS Clockticks of the uncore caching and home agent (CHA) 0,1,2,3 0x00 0 na 0 0 0 6 | IIO 0x01 0x00 0x00 0x00 0x00 UNC_IIO_CLOCKTICKS Clockticks of the integrated IO (IIO) traffic controller 0,1,2,3 0x00 0 na 0 0 0 7 | IRP 0x01 0x00 0x00 0x00 0x00 UNC_I_CLOCKTICKS Clockticks of the IO coherency tracker (IRP) 0,1 0x00 0 na 0 0 0 8 | iMC 0x04 0x0f 0x00 0x00 0x00 UNC_M_CAS_COUNT.RD All DRAM read CAS commands issued (including underfills) 0,1,2,3 0x00 0 na 0 0 0 9 | iMC 0x04 0x30 0x00 0x00 0x00 UNC_M_CAS_COUNT.WR All DRAM write CAS commands issued 0,1,2,3 0x00 0 na 0 0 0 10 | iMC 0x00 0x00 0x00 0x00 0x00 UNC_M_CLOCKTICKS Clockticks of the integrated memory controller (IMC) 0,1,2,3 0x00 0 na 0 0 0 11 | M2M 0x00 0x00 0x00 0x00 0x00 UNC_M2M_CLOCKTICKS Clockticks of the mesh to memory (M2M) 0,1,2,3 0x00 0 na 0 0 0 12 | M2PCIe 0x01 0x00 0x00 0x00 0x00 UNC_M2P_CLOCKTICKS Clockticks of the mesh to PCI (M2P) 0,1,2,3 0x00 0 na 0 0 0 13 | UBOX 0x00 0x01 0x00 0x00 0x00 UNC_U_CLOCKTICKS Clockticks in the UBOX using a dedicated 48-bit Fixed Counter FIXED 0x00 0 na 0 0 0 14 | PCU 0x00 0x00 0x00 0x00 0x00 UNC_P_CLOCKTICKS Clockticks of the power control unit (PCU) 0,1,2,3 0x00 0 na 0 0 0 15 | -------------------------------------------------------------------------------- /x86data/perfmon_data/TMA_Metrics.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gz/rust-x86/ae3306a372c82a92b2e0f7ca81c6664455625c7f/x86data/perfmon_data/TMA_Metrics.xlsx -------------------------------------------------------------------------------- /x86data/perfmon_data/readme.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gz/rust-x86/ae3306a372c82a92b2e0f7ca81c6664455625c7f/x86data/perfmon_data/readme.txt -------------------------------------------------------------------------------- /x86test/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "x86test" 3 | version = "0.0.5" 4 | authors = ["Gerd Zellweger "] 5 | 6 | repository = "https://github.com/gz/rust-x86" 7 | documentation = "https://docs.rs/x86test" 8 | 9 | readme = "README.md" 10 | keywords = ["vm", "os", "amd64", "kvm", "x86-64"] 11 | license = "MIT OR Apache-2.0" 12 | edition = '2018' 13 | 14 | description = """ 15 | Custom test runner for bare-metal x86 tests. 16 | """ 17 | 18 | [dependencies] 19 | x86test-macro = { path = "x86test_macro", version = "0.0.5" } 20 | x86test-types = { path = "x86test_types", version = "0.0.5" } 21 | kvm-sys = "0.3.0" 22 | x86 = { version = "0.52" } 23 | mmap = "0.1.1" 24 | log = "0.4" 25 | klogger = { version = "0.0.12", features = ["use_ioports"] } 26 | -------------------------------------------------------------------------------- /x86test/README.md: -------------------------------------------------------------------------------- 1 | # x86test custom test runner 2 | 3 | x86test is a custom test runner that allows you to write unit tests which use 4 | privileged (x86) instructions. 5 | 6 | It achieves that as follows: for every unit test it creates a tiny VM (using 7 | kvm) which mirrors the address space of the current test process inside the 8 | guest VM. Next the VM is initialized and jumps to the unit test function which 9 | is now executed in guest ring 0 (and here you can use all your fancy 10 | instructions). Finally, once the test returns (or panics), control is 11 | transferred back from the VM to our test runner. 12 | 13 | Funky? Yes. 14 | 15 | Is it hard to use? No! It integrates neatly with rust thanks to the rust custom 16 | test framework and procedural macros. See the example below. 17 | 18 | Does it work? It has limitations (this is expected you're running on bare-metal 19 | x86), so don't expect much infrastructure. For panic and assert you have to use 20 | special versions, also you can't use anything that does system calls (like 21 | println!, but a custom sprintln! macro is provided). 22 | 23 | ## An example 24 | 25 | This is particularly helpful to test the [x86 crate](https://github.com/gz/rust-x86). 26 | For example say we have a function like this: 27 | 28 | ```rust 29 | /// Read 16 bits from port 30 | #[inline] 31 | pub unsafe fn inw(port: u16) -> u16 { 32 | let ret: u16; 33 | asm!("inw %dx, %ax", in("dx") port, out("ax") ret, options(att_syntax)); 34 | ret 35 | } 36 | ``` 37 | 38 | The problem with `inw` is that it needs IO privilege level in E/RFlags to not 39 | cause an exception (and as a result crash the process). A regular Linux process 40 | will not run with this privilege level, however we can now write a x86test: 41 | 42 | ```rust 43 | #[x86test(ioport(0x1, 0xfe))] 44 | fn check_inw_port_read() { 45 | unsafe { 46 | kassert!( 47 | x86::io::inw(0x1) == 0xfe, 48 | "`inw` instruction didn't read the correct value" 49 | ); 50 | } 51 | } 52 | ``` 53 | 54 | A few things are happening here that warrant some explaining: 55 | 56 | First, instead of `#[test]` we used `#[x86test]` to tell the system we don't 57 | want to use regular unit tests. `x86test` supports a few arguments (more on 58 | that later), here we just tell the "hypervisor" of the test runner to install 59 | an ioport with port number 1 that shall always return 0xfe when being read. 60 | Next, comes our function declaration -- nothing special here -- followed by 61 | unsafe, just because `inw` is unsafe. Finally, we use `kassert!`, a custom assert 62 | macro that works in guest ring 0 for our hypervisor, to check that `inw` does 63 | the right thing. 64 | 65 | You'll find more example tests among the [x86 tests](../tests/kvm/bin.rs). 66 | Note that running a x86test currently works only on Linux and requires some linking magic. 67 | Setting `RUSTFLAGS="-C relocation-model=dynamic-no-pic -C code-model=kernel"` should do. 68 | I expect the custom `RUSTFLAGS` to not be necessary in the future. 69 | 70 | ## x86test reference 71 | 72 | The x86test attribute currently supports the following parameters: 73 | 74 | * `ioport(port, val)`: Reads to `port` will return `val`, writes to `port` other than `val` will fail the test. 75 | * `ram(from, to)`: Adds physical memory in address range `from` -- `to` 76 | * `should_halt`: To tell the hypervisor that the test will halt (note: use like this `#[x86test(should_halt)]`). 77 | * `#[should_panic]`: Can be added if a test is expected to panic. 78 | 79 | ## Code Organization 80 | 81 | * [x86test_macro](x86test_macro): contains a procedural macro implementation of `x86test`. 82 | * [x86test_types](x86test_types): contains implementations of kassert, kpanic and the X86TestFn struct. 83 | * [src](src): contains the custom test runner implementation. 84 | 85 | ## Updating 86 | 87 | Should be done in the following order: 88 | 89 | * Release new version of `x86test-types` 90 | * Release new version of `x86test-macro` (adjust version dependency of x86test-types) 91 | * Release new version of `x86test` (adjust version dependency of x86test-types and x86test-macro) 92 | * Tag with `git tag x86test-0.0.x` 93 | -------------------------------------------------------------------------------- /x86test/src/lib.rs: -------------------------------------------------------------------------------- 1 | //! x86test infrastructure to run rust unit tests in guest-ring 0. 2 | #![feature(lang_items)] 3 | 4 | extern crate klogger; 5 | extern crate kvm_sys as kvm; 6 | extern crate mmap; 7 | extern crate x86; 8 | 9 | #[macro_use] 10 | extern crate log; 11 | 12 | extern crate x86test_macro; 13 | extern crate x86test_types; 14 | 15 | mod hypervisor; 16 | pub mod runner; 17 | 18 | pub use x86test_macro::x86test; 19 | pub use x86test_types::*; 20 | 21 | pub use klogger::{sprint, sprintln}; 22 | pub use x86::io::outw; 23 | -------------------------------------------------------------------------------- /x86test/src/runner.rs: -------------------------------------------------------------------------------- 1 | use crate::hypervisor::{ 2 | handle_ioexit, IoHandleStatus, PhysicalMemory, SerialPrinter, TestEnvironment, 3 | }; 4 | use kvm::{Exit, System}; 5 | use x86::bits64::paging::VAddr; 6 | 7 | use crate::X86TestFn; 8 | 9 | /// Start the test harness. 10 | pub fn test_start(ntests: usize) { 11 | println!("running {} tests (using x86test runner)", ntests) 12 | } 13 | 14 | /// Signals that given test is ignored. 15 | pub fn test_ignored(name: &str) { 16 | println!("test {} ... ignored", name); 17 | } 18 | 19 | /// Output before a new test is run. 20 | pub fn test_before_run(name: &str) { 21 | print!("test {} ... ", name); 22 | } 23 | 24 | /// Output when a test is fails. 25 | pub fn test_failed(_name: &str) { 26 | println!("FAILED"); 27 | } 28 | 29 | /// Output when a test succeeds. 30 | pub fn test_success(_name: &str) { 31 | println!("OK"); 32 | } 33 | 34 | /// Summary display at the end of the test run. 35 | pub fn test_summary(passed: usize, failed: usize, ignored: usize) { 36 | println!( 37 | "\ntest result: {} {} passed; {} failed; {} ignored", 38 | if failed == 0 { "OK" } else { "FAILED" }, 39 | passed, 40 | failed, 41 | ignored 42 | ); 43 | 44 | if failed != 0 { 45 | std::process::exit(101); 46 | } 47 | } 48 | 49 | /// Actual logic to run a list of KVM tests. 50 | pub fn runner(tests: &[&X86TestFn]) { 51 | test_start(tests.len()); 52 | 53 | let mut failed = 0; 54 | let mut ignored = 0; 55 | let mut passed = 0; 56 | for test in tests { 57 | if test.ignore { 58 | ignored += 1; 59 | test_ignored(test.name); 60 | } else { 61 | test_before_run(test.name); 62 | 63 | let sys = System::initialize().unwrap(); 64 | let mut stack = PhysicalMemory::new(0x3000000); 65 | let mut heap = PhysicalMemory::new(0x6000000); 66 | let mut ptables = PhysicalMemory::new(0x9000000); 67 | 68 | let mut test_environment = 69 | TestEnvironment::new(&sys, &mut stack, &mut heap, &mut ptables); 70 | let mut printer: SerialPrinter = SerialPrinter::new(); 71 | 72 | let test_fn_vaddr = VAddr::from_usize(test.testfn.0 as *const () as usize); 73 | let mut vcpu = test_environment.create_vcpu(test_fn_vaddr); 74 | 75 | let mut vm_is_done = false; 76 | let mut test_panicked = false; 77 | 78 | while !vm_is_done { 79 | let run = unsafe { vcpu.run() }.unwrap(); 80 | match run.exit_reason { 81 | Exit::Io => { 82 | match handle_ioexit(test, &mut vcpu, &run, &mut printer) { 83 | Result::Ok(IoHandleStatus::Handled) => { /* Continue */ } 84 | Result::Ok(IoHandleStatus::TestSuccessful) => vm_is_done = true, 85 | Result::Ok(IoHandleStatus::TestPanic(code)) => { 86 | if !test.should_panic { 87 | debug!( 88 | "IoHandleStatus::TestPanic {} should_panic is {}", 89 | code, test.should_panic 90 | ); 91 | } 92 | vm_is_done = true; 93 | test_panicked = true; 94 | } 95 | Result::Err(err) => { 96 | if !test.should_panic { 97 | println!("Test failed due to unexpected IO: {:?}", err); 98 | } 99 | vm_is_done = true; 100 | test_panicked = true; 101 | } 102 | } 103 | } 104 | Exit::Shutdown => { 105 | println!( 106 | "Exit::Shutdown cpu.get_regs() {:#x}", 107 | vcpu.get_regs().unwrap().rip 108 | ); 109 | println!("Exit::Shutdown cpu.get_sregs() {:#?}", vcpu.get_sregs()); 110 | vm_is_done = true; 111 | test_panicked = true; 112 | } 113 | Exit::Hlt => { 114 | vm_is_done = true; 115 | test_panicked = if test.should_halt { false } else { true }; 116 | } 117 | _ => { 118 | test_panicked = true; 119 | println!("Unknown exit reason: {:?}", run.exit_reason); 120 | break; 121 | } 122 | } 123 | } 124 | 125 | if test_panicked == test.should_panic { 126 | passed += 1; 127 | test_success(test.name); 128 | } else { 129 | failed += 1; 130 | test_failed(test.name); 131 | } 132 | } 133 | } 134 | 135 | test_summary(passed, failed, ignored); 136 | } 137 | -------------------------------------------------------------------------------- /x86test/x86test_macro/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "x86test-macro" 3 | version = "0.0.5" 4 | authors = ["Gerd Zellweger "] 5 | 6 | repository = "https://github.com/gz/rust-x86" 7 | documentation = "https://docs.rs/x86test-macro" 8 | 9 | keywords = ["vm", "os", "amd64", "kvm", "x86-64"] 10 | license = "MIT OR Apache-2.0" 11 | edition = '2018' 12 | 13 | description = """ 14 | Procedural macro plugin for x86test. 15 | """ 16 | 17 | [dependencies] 18 | syn = { version = "0.15", features = ["full", "extra-traits"] } 19 | quote = "0.6" 20 | proc-macro2 = "0.4" 21 | x86test-types = { path = "../x86test_types", version = "0.0.5" } 22 | 23 | [lib] 24 | proc-macro = true 25 | -------------------------------------------------------------------------------- /x86test/x86test_types/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "x86test-types" 3 | version = "0.0.5" 4 | authors = ["Gerd Zellweger "] 5 | 6 | repository = "https://github.com/gz/rust-x86" 7 | documentation = "https://docs.rs/x86test-types" 8 | 9 | keywords = ["vm", "os", "amd64", "kvm", "x86-64"] 10 | license = "MIT OR Apache-2.0" 11 | edition = '2018' 12 | 13 | description = """ 14 | Common types for x86test runnter and the x86test procedural macro. 15 | """ 16 | 17 | [dependencies] 18 | x86 = "0.47" #{path = "../.."} 19 | -------------------------------------------------------------------------------- /x86test/x86test_types/src/lib.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! kassert { 3 | ($test:expr) => ({ 4 | if !$test { 5 | sprintln!("kassertion failed: {}, {}:{}:{}", stringify!($test), file!(), line!(), column!()); 6 | unsafe { x86test::outw(0xf4, 0x01); } // exit failure 7 | } 8 | }); 9 | ($test:expr, $($arg:tt)+) => ({ 10 | if !$test { 11 | sprintln!("kassertion failed: {}, {}:{}:{}", format_args!($($arg)+), file!(), line!(), column!()); 12 | #[allow(unused_unsafe)] 13 | unsafe { x86test::outw(0xf4, 0x01); } // exit failure 14 | } 15 | }); 16 | } 17 | 18 | #[macro_export] 19 | macro_rules! kpanic { 20 | ($test:expr) => ({ 21 | sprintln!("kpanic: {}, {}:{}:{}", stringify!($test), file!(), line!(), column!()); 22 | unsafe { x86test::outw(0xf4, 0x02); } // exit failure 23 | }); 24 | ($test:expr, $($arg:tt)+) => ({ 25 | if !$test { 26 | sprintln!("kpanic: {}, {}:{}:{}", format_args!($($arg)+), file!(), line!(), column!()); 27 | #[allow(unused_unsafe)] 28 | unsafe { x86test::outw(0xf4, 0x02); } // exit failure 29 | } 30 | }); 31 | } 32 | 33 | pub struct StaticTestFn(pub fn()); 34 | 35 | pub struct X86TestFn { 36 | /// Name of test. 37 | pub name: &'static str, 38 | /// Ignore this test? 39 | pub ignore: bool, 40 | /// Create an identify map of process inside the VM? 41 | pub identity_map: bool, 42 | /// Add guest physical memory in this range. 43 | pub physical_memory: (u64, u64), 44 | /// When read on ioport_enable.0 return ioport_enable.1 as value. 45 | /// When write on ioport_enable.0 abort if value was not ioport_enable.1. 46 | pub ioport_enable: (u16, u32), 47 | /// Test has a #[should_panic] attribute 48 | pub should_panic: bool, 49 | /// Test has a #[should_halt] attribute 50 | pub should_halt: bool, 51 | /// Test function we need to execute (in a VM). 52 | pub testfn: StaticTestFn, 53 | } 54 | --------------------------------------------------------------------------------