├── .github └── workflows │ ├── build.yml │ ├── debug.yml │ ├── update.yml │ └── watch.yml ├── .gitignore ├── .gitmodules ├── README.md ├── crack.bundle ├── debian ├── changelog ├── compat ├── config │ └── config.pve ├── copyright ├── patches │ ├── pve │ │ ├── 0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch │ │ ├── 0002-bridge-keep-MAC-of-first-assigned-port.patch │ │ ├── 0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch │ │ ├── 0004-kvm-disable-default-dynamic-halt-polling-growth.patch │ │ ├── 0005-net-core-downgrade-unregister_netdevice-refcount-lea.patch │ │ ├── 0006-Revert-PCI-Coalesce-host-bridge-contiguous-apertures.patch │ │ ├── 0006-do-not-generate-split-BTF-type-info-per-default.patch │ │ ├── 0007-PCI-Reinstate-PCI-Coalesce-host-bridge-contiguous-ap.patch │ │ ├── 0007-Ubuntu-remove-leftover-reference-to-ubuntu-hio-drive.patch │ │ ├── 0008-KVM-x86-start-moving-SMM-related-functions-to-new-fi.patch │ │ ├── 0008-do-not-generate-split-BTF-type-info-per-default.patch │ │ ├── 0009-KVM-x86-move-SMM-entry-to-a-new-file.patch │ │ ├── 0010-KVM-x86-move-SMM-exit-to-a-new-file.patch │ │ ├── 0011-KVM-x86-do-not-go-through-ctxt-ops-when-emulating-rs.patch │ │ ├── 0012-KVM-allow-compiling-out-SMM-support.patch │ │ ├── 0013-KVM-x86-compile-out-vendor-specific-code-if-SMM-is-d.patch │ │ ├── 0014-KVM-x86-remove-SMRAM-address-space-if-SMM-is-not-sup.patch │ │ ├── 0015-KVM-x86-do-not-define-KVM_REQ_SMI-if-SMM-disabled.patch │ │ ├── 0016-bug-introduce-ASSERT_STRUCT_OFFSET.patch │ │ ├── 0017-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch │ │ ├── 0018-KVM-x86-emulator-introduce-emulator_recalc_and_set_m.patch │ │ ├── 0019-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch │ │ ├── 0020-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch │ │ ├── 0021-KVM-x86-smm-number-of-GPRs-in-the-SMRAM-image-depend.patch │ │ ├── 0022-KVM-x86-smm-check-for-failures-on-smm-entry.patch │ │ ├── 0023-KVM-x86-smm-add-structs-for-KVM-s-smram-layout.patch │ │ ├── 0024-KVM-x86-smm-use-smram-structs-in-the-common-code.patch │ │ ├── 0025-KVM-x86-smm-use-smram-struct-for-32-bit-smram-load-r.patch │ │ ├── 0026-KVM-x86-smm-use-smram-struct-for-64-bit-smram-load-r.patch │ │ ├── 0027-KVM-svm-drop-explicit-return-value-of-kvm_vcpu_map.patch │ │ ├── 0028-KVM-x86-SVM-use-smram-structs.patch │ │ ├── 0029-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch │ │ ├── 0030-KVM-x86-smm-preserve-interrupt-shadow-in-SMRAM.patch │ │ ├── 650-netfilter-add-xt_FLOWOFFLOAD-target.patch │ │ ├── 952-add-net-conntrack-events-support-multiple-registrant.patch │ │ ├── 953-bnx2x_warpcore_8727_2_5g_sgmii_txfault.patch │ │ ├── 954-add-netfilter-full-cone-nat.patch │ │ └── 999-ixgbe-add-disabling-NBASE-T-suppression-hack.patch │ ├── series.linux │ ├── series.zfs │ ├── ubuntu │ │ ├── 0001-apparmor-compatibility-v2.x-net-rules.patch │ │ ├── 0002-apparmor-af_unix-mediation.patch │ │ ├── 0003-apparmor-fix-apparmor-mediating-locking-non-fs-unix-sockets.patch │ │ └── 0004-apparmor-fix-use-after-free-in-sk_peer_label.patch │ └── zfs │ │ ├── 0001-Check-for-META-and-DCH-consistency-in-autoconf.patch │ │ ├── 0002-always-load-ZFS-module-on-boot.patch │ │ ├── 0003-Fix-the-path-to-the-zed-binary-on-the-systemd-unit.patch │ │ ├── 0004-import-with-d-dev-disk-by-id-in-scan-service.patch │ │ ├── 0005-Enable-zed-emails.patch │ │ ├── 0006-dont-symlink-zed-scripts.patch │ │ ├── 0007-Use-installed-python3.patch │ │ ├── 0008-Add-systemd-unit-for-importing-specific-pools.patch │ │ ├── 0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch │ │ ├── 0010-arcstat-Fix-integer-division-with-python3.patch │ │ └── 0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch ├── rules ├── rules.d │ ├── arch │ │ └── amd64.mk │ └── common.mk ├── source │ └── format └── templates │ ├── control.in │ ├── pve-headers.postinst.in │ ├── pve-kernel.postinst.in │ ├── pve-kernel.postrm.in │ └── pve-kernel.prerm.in └── scripts ├── ar.sh ├── check.sh ├── update.sh └── version.sh /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: Kernel Build 2 | 3 | on: 4 | pull_request: 5 | types: [closed] 6 | branches: 7 | - v* 8 | - flavor/*/v* 9 | workflow_dispatch: 10 | 11 | env: 12 | SSH_ACTIONS: false 13 | UPLOAD_BIN_DIR: true 14 | TZ: Asia/Shanghai 15 | 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | if: ${{ github.event_name == 'workflow_dispatch' || github.event.pull_request.merged == true }} 20 | steps: 21 | - name: Checkout Sources 22 | uses: actions/checkout@v3 23 | with: 24 | submodules: recursive 25 | path: pve-edge-kernel 26 | 27 | - name: Initialization environment 28 | env: 29 | DEBIAN_FRONTEND: noninteractive 30 | run: | 31 | sudo rm -rf /swapfile /usr/share/dotnet /usr/local/lib/android /opt/ghc 32 | sudo apt-get update 33 | sudo apt-get install devscripts debhelper equivs git dwarves python3-dev 34 | sudo apt-get autoremove --purge 35 | sudo apt-get clean 36 | 37 | - name: Compile the PVE kernel 38 | id: compile 39 | run: | 40 | cd pve-edge-kernel 41 | debian/rules debian/control 42 | sudo mk-build-deps -i 43 | debuild -ePVE* --jobs=auto -b -uc -us 44 | echo "::set-output name=status::success" 45 | 46 | - name: Start SSH via Ngrok 47 | env: 48 | NGROK_TOKEN: ${{ secrets.NGROK_TOKEN }} 49 | USER_PASS: ${{ secrets.USER_PASS }} 50 | if: ${{ failure() }} 51 | run: curl -sL https://gist.githubusercontent.com/retyui/7115bb6acf151351a143ec8f96a7c561/raw/7099b9db76729dc5761da72aa8525f632d8875c9/debug-github-actions.sh | bash 52 | 53 | - name: Don't kill instace 54 | if: ${{ failure() }} 55 | run: sleep 4h 56 | 57 | - name: Upload Artifacts 58 | uses: actions/upload-artifact@v3 59 | if: steps.compile.outputs.status == 'success' && env.UPLOAD_BIN_DIR == 'true' 60 | with: 61 | name: alldebs 62 | path: "*.deb" 63 | 64 | - name: Delete Debug Symbols 65 | if: steps.compile.outputs.status == 'success' && env.UPLOAD_BIN_DIR == 'true' 66 | run: | 67 | #tar czf Dbgsym.tar.gz *dbgsym*.deb 68 | #rm -f *dbgsym*.deb 69 | tar czf Alldebs.tar.gz *.deb 70 | 71 | - name: Format Release Name 72 | id: format_release 73 | if: steps.compile.outputs.status == 'success' && env.UPLOAD_BIN_DIR == 'true' 74 | run: | 75 | cd pve-edge-kernel 76 | release=$(scripts/version.sh) 77 | echo "::set-output name=release::$release" 78 | changelog=$(dpkg-parsechangelog -c 1 -l debian/changelog) 79 | changelog="${changelog//'%'/'%25'}" 80 | changelog="${changelog//$'\n'/'%0A'}" 81 | changelog="${changelog//$'\r'/'%0D'}" 82 | echo "::set-output name=changelog::$changelog" 83 | 84 | - name: Create Release 85 | uses: ncipollo/release-action@v1 86 | if: steps.compile.outputs.status == 'success' && env.UPLOAD_BIN_DIR == 'true' 87 | with: 88 | commit: ${{ github.sha }} 89 | tag: v${{ steps.format_release.outputs.release }} 90 | body: ${{ steps.format_release.outputs.changelog }} 91 | token: ${{ secrets.RElEASE_TOKEN }} 92 | artifacts: "Alldebs.tar.gz" 93 | -------------------------------------------------------------------------------- /.github/workflows/debug.yml: -------------------------------------------------------------------------------- 1 | name: Kernel Debug 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | if: github.event.repository.owner.id == github.event.sender.id 10 | steps: 11 | - name: Checkout 12 | uses: actions/checkout@master 13 | 14 | - name: Try Build 15 | run: ./not-exist-file.sh it bloke build 16 | 17 | - name: Start SSH via Ngrok 18 | env: 19 | NGROK_TOKEN: ${{ secrets.NGROK_TOKEN }} 20 | USER_PASS: ${{ secrets.USER_PASS }} 21 | if: ${{ failure() }} 22 | run: curl -sL https://gist.githubusercontent.com/retyui/7115bb6acf151351a143ec8f96a7c561/raw/7099b9db76729dc5761da72aa8525f632d8875c9/debug-github-actions.sh | bash 23 | 24 | - name: Don't kill instace 25 | if: ${{ failure() }} 26 | run: sleep 6h 27 | -------------------------------------------------------------------------------- /.github/workflows/update.yml: -------------------------------------------------------------------------------- 1 | name: Kernel Update 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | tag: 7 | description: 'Tag of the Linux Kernel to update to' 8 | required: true 9 | version: 10 | description: 'Custom version number' 11 | required: false 12 | release: 13 | description: 'Custom release number' 14 | required: false 15 | 16 | jobs: 17 | update: 18 | name: Update 19 | runs-on: [ubuntu-latest] 20 | steps: 21 | - name: Checkout Sources 22 | uses: actions/checkout@v3 23 | with: 24 | submodules: recursive 25 | - name: Setup System Dependencies 26 | run: | 27 | sudo sed -i 's/azure.archive.ubuntu.com/archive.ubuntu.com/g' /etc/apt/sources.list 28 | sudo apt update 29 | sudo apt install devscripts 30 | - name: Update Kernel 31 | id: update 32 | run: | 33 | TAG=${{ github.event.inputs.tag }} 34 | VERSION=${{ github.event.inputs.version }} 35 | RELEASE=${{ github.event.inputs.release }} 36 | VERSION_OPT=${VERSION:+-v ${VERSION}} 37 | RELEASE_OPT=${RELEASE:+-r ${RELEASE}} 38 | ./scripts/update.sh -t ${{ github.event.inputs.tag }} $VERSION_OPT $RELEASE_OPT 39 | echo "::set-output name=version::$(scripts/version.sh -L)" 40 | echo "::set-output name=full::$(scripts/version.sh)" 41 | changelog=$(dpkg-parsechangelog -c 1 -l debian/changelog) 42 | changelog="${changelog//'%'/'%25'}" 43 | changelog="${changelog//$'\n'/'%0A'}" 44 | changelog="${changelog//$'\r'/'%0D'}" 45 | echo "::set-output name=changelog::$changelog" 46 | echo "::set-output name=branch::${GITHUB_REF##*/}" 47 | - name: Create Pull Request 48 | uses: peter-evans/create-pull-request@v4 49 | with: 50 | token: ${{ secrets.RElEASE_TOKEN }} # Custom token needed to recursively trigger workflows 51 | author: fw867 52 | commit-message: | 53 | Add Linux ${{ steps.update.outputs.version }} 54 | 55 | This change updates the kernel to Linux ${{ steps.update.outputs.version }}. 56 | branch: staging/v${{ steps.update.outputs.full }} 57 | branch-suffix: short-commit-hash 58 | delete-branch: true 59 | title: "Add Linux ${{ steps.update.outputs.version }}" 60 | body: | 61 | Automated pull request to update the kernel to Linux ${{ steps.update.outputs.version }}. 62 | 63 | **Changelog:** 64 | ``` 65 | ${{ steps.update.outputs.changelog }} 66 | ``` 67 | labels: | 68 | release 69 | -------------------------------------------------------------------------------- /.github/workflows/watch.yml: -------------------------------------------------------------------------------- 1 | name: Kernel Watch 2 | 3 | on: 4 | # workflow_dispatch: 5 | # schedule: 6 | # - cron: '0 11 * * *' # Every day 7 | 8 | jobs: 9 | check: 10 | name: Check for new Release 11 | runs-on: [ubuntu-latest] 12 | strategy: 13 | matrix: 14 | branch: ['v6.0.x'] 15 | steps: 16 | - name: Checkout Sources 17 | uses: actions/checkout@v3 18 | with: 19 | ref: ${{ matrix.branch }} 20 | submodules: recursive 21 | - name: Setup System Dependencies 22 | run: sudo apt install jq curl 23 | - name: Check for Release 24 | id: check 25 | run: | 26 | NEW=$(scripts/check.sh ${{ matrix.branch }}) 27 | if [[ -z $NEW ]]; then 28 | echo "No new release found" 29 | exit 0 30 | fi 31 | echo "Found new Linux kernel version $NEW" 32 | STAGING=$(git ls-remote --heads origin "staging/v$NEW*") 33 | if [[ $STAGING ]]; then 34 | echo "Existing staging update found" 35 | exit 0 36 | fi 37 | echo "No staging update found: triggering update" 38 | echo "::set-output name=version::$NEW" 39 | - name: Trigger Update 40 | if: ${{ steps.check.outputs.version }} 41 | uses: benc-uk/workflow-dispatch@v1 42 | with: 43 | workflow: Kernel Update 44 | token: ${{ secrets.RElEASE_TOKEN }} # Custom token needed to recursively trigger workflows 45 | inputs: '{ "tag": "v${{ steps.check.outputs.version }}" }' 46 | ref: ${{ matrix.branch }} 47 | 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Build artifacts 2 | build 3 | *.prepared 4 | config*.org 5 | *.deb 6 | *.ddeb 7 | pve-headers-* 8 | pve-kernel-* 9 | linux-* 10 | debian/autoreconf.after 11 | debian/autoreconf.before 12 | debian/control 13 | .headers_install_mark 14 | .install_mark 15 | .tools_compile_mark 16 | .tools_install_mark 17 | .usr_headers_install_mark 18 | debian/SOURCE 19 | debian/debhelper-build-stamp 20 | debian/files 21 | debian/watchdog-blacklist.tmp 22 | debian/.debhelper/ 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "zfs"] 2 | path = zfs 3 | url = https://github.com/openzfs/zfs.git 4 | 5 | [submodule "linux"] 6 | path = linux 7 | url = git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git 8 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### 主要特性 2 | 3 | - linux kernel version :V6.0.x 感谢:[@ubuntu-jammy](https://code.launchpad.net/~ubuntu-kernel/ubuntu/+source/linux/+git/jammy) [@pve-kernel](https://github.com/proxmox/pve-kernel) [@pve-edge-kernel](https://github.com/fabianishere/pve-edge-kernel) 4 | - 添加FULLCONE-NAT 原项目:[@Chion82](https://github.com/Chion82/netfilter-full-cone-nat) 5 | - 添加nft-fullcone 原项目:[@fullcone-nat-nftables](https://github.com/fullcone-nat-nftables/nft-fullcone) 6 | - 添加ppp拨号组件 7 | - 添加BCM578XX 2.5G支持 8 | - 开启了FLOW_OFFLOAD 支持 9 | 10 | ## 安装 11 | > 如有使用DKMS外挂安装fullcone,先暂停自动更新 12 | ``` 13 | touch /etc/dkms/no-autoinstall 14 | ``` 15 | 16 | 在[[Releases]](https://github.com/fw867/pve-edge-kernel/releases"[Releases]")下载Releasedebs.tar.gz后上传到PVE 17 | ``` 18 | tar xzf Alldebs.tar.gz 19 | dpkg -i *.deb 20 | ``` 21 | 22 | [Openwrt rootfs下载](http://fw.koolcenter.com/LEDE_X64_fw867/openwrt-koolshare-router-v3.2-r19470-2f7d60f0e5-x86-64-generic-rootfs.tar.gz) 23 | 24 | ## 配置和模版 25 | [一些Shell](https://github.com/fw867/pve-edge-kernel/releases/download/V1.0/shell.tar.gz)脚本介绍 26 | > PVE下 27 | 28 | | 脚本名称 | 功能介绍 | 放置目录 | 开启命令| 29 | | :------------: | :------------: | :------------: | :------------: | 30 | | rps | 增加网卡的并发能力 | /etc/init.d |update-rc.d rps defaults 31 | | net-sriov | 开机自动设置sriov网卡 |/etc/init.d |systemctl enable net-sriov 32 | | device_hook.sh | lxc op启动后设置ppp tun | /var/lib/lxc/{ct id} |修改199.conf里的目录 33 | | hookscript.pl | lxc op启动后设置ppp tun | {存储目录}/snippets | 修改199.conf 里的目录 34 | | openwrt.conf | 开机自启openwrt需要的模块 | /etc/modules-load.d/ |无需 35 | | 199.conf | lxc op的配置模版 | /etc/pve/lxc/{ct id}/ |根据实际修改硬盘目录和网卡 | 36 | 37 | > 在/etc/modules-load.d/openwrt.conf内加入 38 | 39 | ``` 40 | nf_nat_fullcone 41 | nft_ext_fullcone 42 | ``` 43 | 44 | > 直通网卡配置 45 | 46 | ``` 47 | lxc.net.0.type: phys 48 | lxc.net.0.link: eno1v0 49 | lxc.net.0.flags: up 50 | lxc.net.0.name: eth0 51 | lxc.net.0.hwaddr: 00:11:22:33:44:1A 52 | ``` 53 | 54 | > *如直通sriov创建的网卡,无法为openwrt创建的br-lan所互通 55 | 56 | ------------ 57 | 58 | > LXC openwrt下 59 | 60 | | 脚本名称 | 功能介绍 | 放置目录 | 开启命令| 61 | | :------------: | :------------: | :------------: | :------------: | 62 | | 99_firewall_lxc_workaround | 解决lxc下op偶发开机时卡防火墙问题 | /lib/preinit |无需 63 | 64 | 65 | ## 一些网络调优参数 66 | > nano /etc/sysctl.conf 67 | ```shell 68 | net.netfilter.nf_conntrack_icmp_timeout=10 69 | net.netfilter.nf_conntrack_tcp_timeout_syn_recv=5 70 | net.netfilter.nf_conntrack_tcp_timeout_syn_sent=5 71 | net.netfilter.nf_conntrack_tcp_timeout_established=600 72 | net.netfilter.nf_conntrack_tcp_timeout_fin_wait=10 73 | net.netfilter.nf_conntrack_tcp_timeout_time_wait=10 74 | net.netfilter.nf_conntrack_tcp_timeout_close_wait=10 75 | net.netfilter.nf_conntrack_tcp_timeout_last_ack=10 76 | net.core.somaxconn=65535 77 | ``` 78 | ## x550开启NBASE-T协商2.5G 5G 79 | > 1.使用ethtool配置 80 | ```shell 81 | ethtool -s ens2f0 advertise 0x1800000001028 82 | ``` 83 | > 2.配置自动开启 84 | ```shell 85 | echo "options ixgbe enable_nbase_t_suppression_hack=0" > /etc/modprobe.d/ixgbe.conf 86 | 87 | update-initramfs -u 88 | ``` 89 | ## 自编译 90 | > 最少10G空闲硬盘空间 91 | ```shell 92 | sudo apt install devscripts debhelper equivs git 93 | git clone https://github.com/fw867/pve-edge-kernel.git 94 | cd pve-edge-kernel 95 | git submodule update --init --depth=1 --recursive linux 96 | git submodule update --init --recursive 97 | debian/rules debian/control 98 | sudo mk-build-deps -i 99 | debuild -ePVE* --jobs=auto -b -uc -us 100 | ``` 101 | 102 | 103 | ### End 104 | -------------------------------------------------------------------------------- /crack.bundle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fw867/pve-edge-kernel/12cc43784c94a3441cd80025ad2967eb9bb31c16/crack.bundle -------------------------------------------------------------------------------- /debian/changelog: -------------------------------------------------------------------------------- 1 | pve-kernel (6.0.5-1) pve; urgency=medium 2 | 3 | * Update to Linux 6.0.5. 4 | 5 | -- fw867 Thu, 27 Oct 2022 12:15:18 +0000 6 | 7 | pve-kernel (5.19.1-1) pve; urgency=medium 8 | 9 | * Update to Linux 5.19.1. 10 | 11 | -- fw867 Sun, 14 Aug 2022 13:17:27 +0000 12 | 13 | pve-kernel (5.18.7-6) pve; urgency=medium 14 | 15 | * Update to Linux 5.18.7. 16 | 17 | -- fw867 Sat, 25 Jun 2022 16:33:10 +0000 18 | 19 | pve-kernel (5.18.7-5) pve; urgency=medium 20 | 21 | * Update to Linux 5.18.7. 22 | 23 | -- fw867 Sat, 25 Jun 2022 16:27:32 +0000 24 | 25 | pve-kernel (5.18.6-4) pve; urgency=medium 26 | 27 | * Update to Linux 5.18.6. 28 | 29 | -- fw867 Sat, 25 Jun 2022 12:27:18 +0000 30 | 31 | pve-kernel (5.17.8-3) pve; urgency=medium 32 | 33 | * Update to Linux 5.17.8. 34 | 35 | -- fw867 Mon, 16 May 2022 13:28:30 +0000 36 | 37 | pve-kernel (5.17.8-2) pve; urgency=medium 38 | 39 | * Update to Linux 5.17.8. 40 | 41 | -- fw867 Mon, 16 May 2022 13:27:09 +0000 42 | 43 | pve-kernel (5.17.5-1) pve; urgency=medium 44 | 45 | * Update to Linux 5.17.5. 46 | 47 | -- fw867 Thu, 28 Apr 2022 12:35:20 +0000 48 | 49 | pve-kernel (5.17.5-1) pve; urgency=medium 50 | 51 | * Update to Linux 5.17.5. 52 | 53 | -- fw867 Thu, 28 Apr 2022 12:25:44 +0000 54 | 55 | pve-kernel (5.17.3-1) pve; urgency=medium 56 | 57 | * Update to Linux 5.17.3. 58 | * Update to ZFS 2.1.4 59 | 60 | -- fw867 Sat, 16 Apr 2022 16:11:25 +0000 61 | 62 | pve-kernel (5.17.2-1) pve; urgency=medium 63 | 64 | * Update to Linux 5.17.2. 65 | 66 | -- Fabian Mastenbroek Sat, 09 Apr 2022 12:12:19 +0000 67 | 68 | pve-kernel (5.17.1-1) pve; urgency=medium 69 | 70 | * Update to Linux 5.17.1. 71 | 72 | -- Fabian Mastenbroek Mon, 28 Mar 2022 12:10:44 +0000 73 | 74 | pve-kernel (5.17.0-2) pve; urgency=high 75 | 76 | * Update to ZFS 2.1.4 77 | 78 | -- Fabian Mastenbroek Sat, 26 Mar 2022 20:00:00 +0000 79 | 80 | pve-kernel (5.17.0-1) pve; urgency=medium 81 | 82 | * Update to Linux 5.17 83 | * Update to ZFS 2.1.3 84 | 85 | -- Fabian Mastenbroek Mon, 21 Mar 2022 13:00:00 +0000 86 | 87 | pve-kernel (5.16.0-1) pve; urgency=medium 88 | 89 | * Disable UBNSAN (see issue #164 and #200). 90 | * Update to ZFS 2.1.2 91 | * Fix AppArmor incompatibilities 92 | 93 | -- Fabian Mastenbroek Mon, 10 Jan 2022 11:00:00 +0000 94 | 95 | pve-kernel (5.15.0-1) pve; urgency=medium 96 | 97 | * Update to Linux 5.15. 98 | * Update to OpenZFS 2.1.1 99 | * Drop kernel release from version 100 | 101 | -- Fabian Mastenbroek Tue, 2 Nov 2021 10:00:00 +0000 102 | 103 | pve-kernel (5.14.0-1) pve; urgency=medium 104 | 105 | * Update to Linux 5.14. 106 | * Update to OpenZFS 2.1.0. 107 | 108 | -- Fabian Mastenbroek Mon, 30 Aug 2021 11:00:00 +0000 109 | 110 | pve-kernel (5.13.0-1) pve; urgency=medium 111 | 112 | * Update to Linux 5.13. 113 | * Update to OpenZFS 2.1.0-rc7. 114 | 115 | -- Fabian Mastenbroek Mon, 28 Jun 2021 17:00:00 +0000 116 | 117 | pve-kernel (5.12.0-1) pve; urgency=medium 118 | 119 | * Update to Linux 5.12. 120 | * Update to OpenZFS 2.1.0-rc5. 121 | 122 | -- Fabian Mastenbroek Thu, 18 Feb 2021 10:49:30 +0000 123 | 124 | pve-kernel (5.11.0-1) pve; urgency=medium 125 | 126 | * Update to Linux 5.11. 127 | * Properly add module.lds to headers package. 128 | 129 | -- Fabian Mastenbroek Thu, 18 Feb 2021 10:49:30 +0000 130 | 131 | pve-kernel (5.10.0-1) pve; urgency=medium 132 | 133 | * Update to Linux 5.10. 134 | 135 | -- Fabian Mastenbroek Sun, 24 Jan 2021 12:53:48 +0000 136 | 137 | pve-kernel (5.9.0-1) pve; urgency=medium 138 | 139 | * Update to Linux 5.9. 140 | 141 | -- Fabian Mastenbroek Thu, 12 Nov 2020 16:49:51 +0000 142 | 143 | pve-kernel (5.8.0-1) pve; urgency=medium 144 | 145 | * update to Linux 5.8 based on Ubuntu 5.8.0-12.13 146 | 147 | -- Fabian Mastenbroek Sun, 9 Aug 2020 14:58:00 +0200 148 | 149 | pve-kernel (5.7.0-1) pve; urgency=medium 150 | 151 | * update to Linux 5.7 based on Ubuntu 5.7.0-5.6 152 | 153 | -- Fabian Mastenbroek Mon, 1 Jun 2020 19:57:12 +0200 154 | 155 | pve-kernel (5.6.0-1) pve; urgency=medium 156 | 157 | * update to Linux 5.6. 158 | 159 | -- Fabian Mastenbroek Sat, 2 May 2020 21:07:10 +0200 160 | 161 | pve-kernel (5.5.0-1) pve; urgency=medium 162 | 163 | * update to Linux 5.5. 164 | 165 | -- Fabian Mastenbroek Thu, 30 Apr 2020 18:30:30 +0200 166 | 167 | pve-kernel (5.4.30-1) pve; urgency=medium 168 | 169 | * update to Ubuntu-5.4.0-24.28 170 | 171 | * bump ABI to 5.4.30-1 172 | 173 | -- Fabian Mastenbroek Tue, 28 Apr 2020 14:11:30 +0200 174 | -------------------------------------------------------------------------------- /debian/compat: -------------------------------------------------------------------------------- 1 | 11 2 | -------------------------------------------------------------------------------- /debian/config/config.pve: -------------------------------------------------------------------------------- 1 | CONFIG_DEBUG_INFO_NONE=y 2 | CONFIG_INTEL_MEI_WDT=m 3 | CONFIG_SND_PCM_OSS=n 4 | CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y 5 | CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=n 6 | CONFIG_CEPH_FS=m 7 | CONFIG_BLK_DEV_NBD=m 8 | CONFIG_BLK_DEV_RBD=m 9 | CONFIG_SND_PCSP=n 10 | CONFIG_BCACHE=m 11 | CONFIG_JFS_FS=m 12 | CONFIG_HFS_FS=m 13 | CONFIG_HFSPLUS_FS=m 14 | CONFIG_CIFS_SMB_DIRECT=y 15 | CONFIG_SQUASHFS_DECOMP_MULTI_PERCPU=y 16 | CONFIG_BRIDGE=y 17 | CONFIG_BRIDGE_NETFILTER=y 18 | CONFIG_BLK_DEV_SD=y 19 | CONFIG_BLK_DEV_SR=y 20 | CONFIG_BLK_DEV_DM=y 21 | CONFIG_BLK_DEV_NVME=y 22 | CONFIG_NLS_ISO8859_1=y 23 | CONFIG_INPUT_EVBUG=n 24 | CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=n 25 | CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL=n 26 | CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y 27 | CONFIG_SYSFB_SIMPLEFB=y 28 | CONFIG_DRM_SIMPLEDRM=y 29 | CONFIG_MODULE_SIG=n 30 | CONFIG_MODULE_SIG_KEY="" 31 | CONFIG_MODULE_SIG_ALL=n 32 | CONFIG_MEMCG_DISABLED=n 33 | CONFIG_MEMCG_SWAP_ENABLED=y 34 | CONFIG_HYPERV=y 35 | CONFIG_VFIO_IOMMU_TYPE1=m 36 | CONFIG_VFIO_VIRQFD=m 37 | CONFIG_VFIO=m 38 | CONFIG_VFIO_PCI=m 39 | CONFIG_USB_XHCI_HCD=m 40 | CONFIG_USB_XHCI_PCI=m 41 | CONFIG_USB_EHCI_HCD=m 42 | CONFIG_USB_EHCI_PCI=m 43 | CONFIG_USB_EHCI_HCD_PLATFORM=m 44 | CONFIG_USB_OHCI_HCD=m 45 | CONFIG_USB_OHCI_HCD_PCI=m 46 | CONFIG_USB_OHCI_HCD_PLATFORM=m 47 | CONFIG_USB_OHCI_HCD_SSB=n 48 | CONFIG_USB_UHCI_HCD=m 49 | CONFIG_USB_SL811_HCD_ISO=n 50 | CONFIG_MEMCG_KMEM=y 51 | CONFIG_DEFAULT_CFQ=n 52 | CONFIG_DEFAULT_DEADLINE=y 53 | CONFIG_MODVERSIONS=y 54 | CONFIG_DEFAULT_SECURITY_DAC=n 55 | CONFIG_DEFAULT_SECURITY_APPARMOR=y 56 | CONFIG_DEFAULT_SECURITY=apparmor 57 | CONFIG_UNWINDER_ORC=n 58 | CONFIG_UNWINDER_GUESS=n 59 | CONFIG_UNWINDER_FRAME_POINTER=y 60 | CONFIG_SYSTEM_TRUSTED_KEYS="" 61 | CONFIG_SYSTEM_REVOCATION_KEYS="" 62 | CONFIG_SECURITY_LOCKDOWN_LSM=n 63 | CONFIG_SECURITY_LOCKDOWN_LSM_EARLY=n 64 | CONFIG_LSM="yama,integrity,apparmor" 65 | CONFIG_PAGE_TABLE_ISOLATION=y 66 | CONFIG_UBSAN=n 67 | CONFIG_NFT_FULLCONE=m 68 | CONFIG_IP_NF_TARGET_FULLCONENAT=m 69 | CONFIG_NETFILTER_XT_TARGET_FULLCONENAT=m 70 | CONFIG_NETFILTER_XT_TARGET_FLOWOFFLOAD=m 71 | CONFIG_NF_CONNTRACK_CHAIN_EVENTS=y 72 | CONFIG_PPP=m 73 | -------------------------------------------------------------------------------- /debian/patches/pve/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Ben Hutchings 3 | Date: Tue, 12 May 2015 19:29:22 +0100 4 | Subject: [PATCH] Make mkcompile_h accept an alternate timestamp string 5 | MIME-Version: 1.0 6 | Content-Type: text/plain; charset=UTF-8 7 | Content-Transfer-Encoding: 8bit 8 | 9 | We want to include the Debian version in the utsname::version string 10 | instead of a full timestamp string. However, we still need to provide 11 | a standard timestamp string for gen_initramfs_list.sh to make the 12 | kernel image reproducible. 13 | 14 | Make mkcompile_h use $KBUILD_BUILD_VERSION_TIMESTAMP in preference to 15 | $KBUILD_BUILD_TIMESTAMP. 16 | 17 | Signed-off-by: Fabian Grünbichler 18 | Signed-off-by: Thomas Lamprecht 19 | --- 20 | scripts/mkcompile_h | 10 +++++++--- 21 | 1 file changed, 7 insertions(+), 3 deletions(-) 22 | 23 | diff --git a/scripts/mkcompile_h b/scripts/mkcompile_h 24 | index ca40a5258c87..6ae930a732f0 100755 25 | --- a/scripts/mkcompile_h 26 | +++ b/scripts/mkcompile_h 27 | @@ -23,10 +23,14 @@ else 28 | VERSION=$KBUILD_BUILD_VERSION 29 | fi 30 | 31 | -if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then 32 | - TIMESTAMP=`date` 33 | +if [ -z "$KBUILD_BUILD_VERSION_TIMESTAMP" ]; then 34 | + if [ -z "$KBUILD_BUILD_TIMESTAMP" ]; then 35 | + TIMESTAMP=`date` 36 | + else 37 | + TIMESTAMP=$KBUILD_BUILD_TIMESTAMP 38 | + fi 39 | else 40 | - TIMESTAMP=$KBUILD_BUILD_TIMESTAMP 41 | + TIMESTAMP=$KBUILD_BUILD_VERSION_TIMESTAMP 42 | fi 43 | if test -z "$KBUILD_BUILD_USER"; then 44 | LINUX_COMPILE_BY=$(whoami | sed 's/\\/\\\\/') 45 | -------------------------------------------------------------------------------- /debian/patches/pve/0002-bridge-keep-MAC-of-first-assigned-port.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= 3 | Date: Thu, 14 Sep 2017 11:02:18 +0200 4 | Subject: [PATCH] bridge: keep MAC of first assigned port 5 | MIME-Version: 1.0 6 | Content-Type: text/plain; charset=UTF-8 7 | Content-Transfer-Encoding: 8bit 8 | 9 | original commit message: 10 | 11 | Default bridge changes MAC dynamically using smallest MAC of all 12 | connected ports (for no real reason). To avoid problems with ARP 13 | we simply use the MAC of the first connected port. 14 | 15 | Signed-off-by: Fabian Grünbichler 16 | Signed-off-by: Thomas Lamprecht 17 | --- 18 | net/bridge/br_stp_if.c | 5 +---- 19 | 1 file changed, 1 insertion(+), 4 deletions(-) 20 | 21 | diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c 22 | index 75204d36d7f9..1fb5ff73ec1e 100644 23 | --- a/net/bridge/br_stp_if.c 24 | +++ b/net/bridge/br_stp_if.c 25 | @@ -265,10 +265,7 @@ bool br_stp_recalculate_bridge_id(struct net_bridge *br) 26 | return false; 27 | 28 | list_for_each_entry(p, &br->port_list, list) { 29 | - if (addr == br_mac_zero || 30 | - memcmp(p->dev->dev_addr, addr, ETH_ALEN) < 0) 31 | - addr = p->dev->dev_addr; 32 | - 33 | + addr = p->dev->dev_addr; 34 | } 35 | 36 | if (ether_addr_equal(br->bridge_id.addr, addr)) 37 | -------------------------------------------------------------------------------- /debian/patches/pve/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch: -------------------------------------------------------------------------------- 1 | From 1e30ab2525d8a9ad1fe9a76bef64ff54c2d95e0d Mon Sep 17 00:00:00 2001 2 | From: Mark Weiman 3 | Date: Sun, 12 Aug 2018 11:36:21 -0400 4 | Subject: [PATCH 09/14] pci: Enable overrides for missing ACS capabilities 5 | 6 | This an updated version of Alex Williamson's patch from: 7 | https://lkml.org/lkml/2013/5/30/513 8 | 9 | Original commit message follows: 10 | 11 | PCIe ACS (Access Control Services) is the PCIe 2.0+ feature that 12 | allows us to control whether transactions are allowed to be redirected 13 | in various subnodes of a PCIe topology. For instance, if two 14 | endpoints are below a root port or downsteam switch port, the 15 | downstream port may optionally redirect transactions between the 16 | devices, bypassing upstream devices. The same can happen internally 17 | on multifunction devices. The transaction may never be visible to the 18 | upstream devices. 19 | 20 | One upstream device that we particularly care about is the IOMMU. If 21 | a redirection occurs in the topology below the IOMMU, then the IOMMU 22 | cannot provide isolation between devices. This is why the PCIe spec 23 | encourages topologies to include ACS support. Without it, we have to 24 | assume peer-to-peer DMA within a hierarchy can bypass IOMMU isolation. 25 | 26 | Unfortunately, far too many topologies do not support ACS to make this 27 | a steadfast requirement. Even the latest chipsets from Intel are only 28 | sporadically supporting ACS. We have trouble getting interconnect 29 | vendors to include the PCIe spec required PCIe capability, let alone 30 | suggested features. 31 | 32 | Therefore, we need to add some flexibility. The pcie_acs_override= 33 | boot option lets users opt-in specific devices or sets of devices to 34 | assume ACS support. The "downstream" option assumes full ACS support 35 | on root ports and downstream switch ports. The "multifunction" 36 | option assumes the subset of ACS features available on multifunction 37 | endpoints and upstream switch ports are supported. The "id:nnnn:nnnn" 38 | option enables ACS support on devices matching the provided vendor 39 | and device IDs, allowing more strategic ACS overrides. These options 40 | may be combined in any order. A maximum of 16 id specific overrides 41 | are available. It's suggested to use the most limited set of options 42 | necessary to avoid completely disabling ACS across the topology. 43 | Note to hardware vendors, we have facilities to permanently quirk 44 | specific devices which enforce isolation but not provide an ACS 45 | capability. Please contact me to have your devices added and save 46 | your customers the hassle of this boot option. 47 | 48 | Signed-off-by: Mark Weiman 49 | Signed-off-by: Alexandre Frade 50 | --- 51 | .../admin-guide/kernel-parameters.txt | 9 ++ 52 | drivers/pci/quirks.c | 101 ++++++++++++++++++ 53 | 2 files changed, 110 insertions(+) 54 | 55 | diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt 56 | index 8dee8f68fe15..8aa1f5c6c042 100644 57 | --- a/Documentation/admin-guide/kernel-parameters.txt 58 | +++ b/Documentation/admin-guide/kernel-parameters.txt 59 | @@ -3402,6 +3402,15 @@ 60 | nomsi [MSI] If the PCI_MSI kernel config parameter is 61 | enabled, this kernel boot option can be used to 62 | disable the use of MSI interrupts system-wide. 63 | + pcie_acs_override = 64 | + [PCIE] Override missing PCIe ACS support for: 65 | + downstream 66 | + All downstream ports - full ACS capabilities 67 | + multifunction 68 | + All multifunction devices - multifunction ACS subset 69 | + id:nnnn:nnnn 70 | + Specific device - full ACS capabilities 71 | + Specified as vid:did (vendor/device ID) in hex 72 | noioapicquirk [APIC] Disable all boot interrupt quirks. 73 | Safety option to keep boot IRQs enabled. This 74 | should never be necessary. 75 | diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c 76 | index 320255e5e8f8..8d5808de9071 100644 77 | --- a/drivers/pci/quirks.c 78 | +++ b/drivers/pci/quirks.c 79 | @@ -3483,6 +3483,106 @@ static void quirk_no_bus_reset(struct pci_dev *dev) 80 | dev->dev_flags |= PCI_DEV_FLAGS_NO_BUS_RESET; 81 | } 82 | 83 | +static bool acs_on_downstream; 84 | +static bool acs_on_multifunction; 85 | + 86 | +#define NUM_ACS_IDS 16 87 | +struct acs_on_id { 88 | + unsigned short vendor; 89 | + unsigned short device; 90 | +}; 91 | +static struct acs_on_id acs_on_ids[NUM_ACS_IDS]; 92 | +static u8 max_acs_id; 93 | + 94 | +static __init int pcie_acs_override_setup(char *p) 95 | +{ 96 | + if (!p) 97 | + return -EINVAL; 98 | + 99 | + while (*p) { 100 | + if (!strncmp(p, "downstream", 10)) 101 | + acs_on_downstream = true; 102 | + if (!strncmp(p, "multifunction", 13)) 103 | + acs_on_multifunction = true; 104 | + if (!strncmp(p, "id:", 3)) { 105 | + char opt[5]; 106 | + int ret; 107 | + long val; 108 | + 109 | + if (max_acs_id >= NUM_ACS_IDS - 1) { 110 | + pr_warn("Out of PCIe ACS override slots (%d)\n", 111 | + NUM_ACS_IDS); 112 | + goto next; 113 | + } 114 | + 115 | + p += 3; 116 | + snprintf(opt, 5, "%s", p); 117 | + ret = kstrtol(opt, 16, &val); 118 | + if (ret) { 119 | + pr_warn("PCIe ACS ID parse error %d\n", ret); 120 | + goto next; 121 | + } 122 | + acs_on_ids[max_acs_id].vendor = val; 123 | + 124 | + p += strcspn(p, ":"); 125 | + if (*p != ':') { 126 | + pr_warn("PCIe ACS invalid ID\n"); 127 | + goto next; 128 | + } 129 | + 130 | + p++; 131 | + snprintf(opt, 5, "%s", p); 132 | + ret = kstrtol(opt, 16, &val); 133 | + if (ret) { 134 | + pr_warn("PCIe ACS ID parse error %d\n", ret); 135 | + goto next; 136 | + } 137 | + acs_on_ids[max_acs_id].device = val; 138 | + max_acs_id++; 139 | + } 140 | +next: 141 | + p += strcspn(p, ","); 142 | + if (*p == ',') 143 | + p++; 144 | + } 145 | + 146 | + if (acs_on_downstream || acs_on_multifunction || max_acs_id) 147 | + pr_warn("Warning: PCIe ACS overrides enabled; This may allow non-IOMMU protected peer-to-peer DMA\n"); 148 | + 149 | + return 0; 150 | +} 151 | +early_param("pcie_acs_override", pcie_acs_override_setup); 152 | + 153 | +static int pcie_acs_overrides(struct pci_dev *dev, u16 acs_flags) 154 | +{ 155 | + int i; 156 | + 157 | + /* Never override ACS for legacy devices or devices with ACS caps */ 158 | + if (!pci_is_pcie(dev) || 159 | + pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ACS)) 160 | + return -ENOTTY; 161 | + 162 | + for (i = 0; i < max_acs_id; i++) 163 | + if (acs_on_ids[i].vendor == dev->vendor && 164 | + acs_on_ids[i].device == dev->device) 165 | + return 1; 166 | + 167 | + switch (pci_pcie_type(dev)) { 168 | + case PCI_EXP_TYPE_DOWNSTREAM: 169 | + case PCI_EXP_TYPE_ROOT_PORT: 170 | + if (acs_on_downstream) 171 | + return 1; 172 | + break; 173 | + case PCI_EXP_TYPE_ENDPOINT: 174 | + case PCI_EXP_TYPE_UPSTREAM: 175 | + case PCI_EXP_TYPE_LEG_END: 176 | + case PCI_EXP_TYPE_RC_END: 177 | + if (acs_on_multifunction && dev->multifunction) 178 | + return 1; 179 | + } 180 | + 181 | + return -ENOTTY; 182 | +} 183 | /* 184 | * Some Atheros AR9xxx and QCA988x chips do not behave after a bus reset. 185 | * The device will throw a Link Down error on AER-capable systems and 186 | @@ -4796,6 +4896,8 @@ static const struct pci_dev_acs_enabled { 187 | { PCI_VENDOR_ID_ZHAOXIN, 0x9083, pci_quirk_mf_endpoint_acs }, 188 | /* Zhaoxin Root/Downstream Ports */ 189 | { PCI_VENDOR_ID_ZHAOXIN, PCI_ANY_ID, pci_quirk_zhaoxin_pcie_ports_acs }, 190 | + /* PCIe ACS overrides */ 191 | + { PCI_ANY_ID, PCI_ANY_ID, pcie_acs_overrides }, 192 | { 0 } 193 | }; 194 | 195 | -- 196 | 2.17.1 197 | -------------------------------------------------------------------------------- /debian/patches/pve/0004-kvm-disable-default-dynamic-halt-polling-growth.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= 3 | Date: Thu, 14 Sep 2017 11:09:58 +0200 4 | Subject: [PATCH] kvm: disable default dynamic halt polling growth 5 | MIME-Version: 1.0 6 | Content-Type: text/plain; charset=UTF-8 7 | Content-Transfer-Encoding: 8bit 8 | 9 | Signed-off-by: Fabian Grünbichler 10 | Signed-off-by: Thomas Lamprecht 11 | --- 12 | virt/kvm/kvm_main.c | 2 +- 13 | 1 file changed, 1 insertion(+), 1 deletion(-) 14 | 15 | diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c 16 | index c56861ed0e38..3ba11c5e7acd 100644 17 | --- a/virt/kvm/kvm_main.c 18 | +++ b/virt/kvm/kvm_main.c 19 | @@ -79,7 +79,7 @@ module_param(halt_poll_ns, uint, 0644); 20 | EXPORT_SYMBOL_GPL(halt_poll_ns); 21 | 22 | /* Default doubles per-vcpu halt_poll_ns. */ 23 | -unsigned int halt_poll_ns_grow = 2; 24 | +unsigned int halt_poll_ns_grow = 0; 25 | module_param(halt_poll_ns_grow, uint, 0644); 26 | EXPORT_SYMBOL_GPL(halt_poll_ns_grow); 27 | 28 | -------------------------------------------------------------------------------- /debian/patches/pve/0005-net-core-downgrade-unregister_netdevice-refcount-lea.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Thomas Lamprecht 3 | Date: Wed, 7 Oct 2020 17:18:28 +0200 4 | Subject: [PATCH] net: core: downgrade unregister_netdevice refcount leak from 5 | emergency to error 6 | 7 | Signed-off-by: Thomas Lamprecht 8 | --- 9 | net/core/dev.c | 2 +- 10 | 1 file changed, 1 insertion(+), 1 deletion(-) 11 | 12 | diff --git a/net/core/dev.c b/net/core/dev.c 13 | index ecaeb3ef8e5c..81f117f43bbe 100644 14 | --- a/net/core/dev.c 15 | +++ b/net/core/dev.c 16 | @@ -10268,7 +10268,7 @@ static struct net_device *netdev_wait_allrefs_any(struct list_head *list) 17 | if (time_after(jiffies, warning_time + 18 | READ_ONCE(netdev_unregister_timeout_secs) * HZ)) { 19 | list_for_each_entry(dev, list, todo_list) { 20 | - pr_emerg("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", 21 | + pr_err("unregister_netdevice: waiting for %s to become free. Usage count = %d\n", 22 | dev->name, netdev_refcnt_read(dev)); 23 | ref_tracker_dir_print(&dev->refcnt_tracker, 10); 24 | } 25 | -------------------------------------------------------------------------------- /debian/patches/pve/0006-Revert-PCI-Coalesce-host-bridge-contiguous-apertures.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Thomas Lamprecht 3 | Date: Mon, 27 Sep 2021 11:28:39 +0200 4 | Subject: [PATCH] Revert "PCI: Coalesce host bridge contiguous apertures" 5 | MIME-Version: 1.0 6 | Content-Type: text/plain; charset=UTF-8 7 | Content-Transfer-Encoding: 8bit 8 | 9 | This reverts commit ab20e43b20b60f5cc8e2ea3763ffa388158469ac. 10 | 11 | was reverted upstream because of reports similar to 12 | 13 | Link: https://bugzilla.proxmox.com/show_bug.cgi?id=3552 14 | Link: https://lore.kernel.org/r/20210709231529.GA3270116@roeck-us.net 15 | Signed-off-by: Fabian Grünbichler 16 | Signed-off-by: Thomas Lamprecht 17 | --- 18 | drivers/pci/probe.c | 50 ++++----------------------------------------- 19 | 1 file changed, 4 insertions(+), 46 deletions(-) 20 | 21 | diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c 22 | index cb70d2605e97..258350f80f6c 100644 23 | --- a/drivers/pci/probe.c 24 | +++ b/drivers/pci/probe.c 25 | @@ -20,7 +20,6 @@ 26 | #include 27 | #include 28 | #include 29 | -#include 30 | #include "pci.h" 31 | 32 | #define CARDBUS_LATENCY_TIMER 176 /* secondary latency timer */ 33 | @@ -881,31 +880,14 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus) 34 | dev_set_msi_domain(&bus->dev, d); 35 | } 36 | 37 | -static int res_cmp(void *priv, const struct list_head *a, 38 | - const struct list_head *b) 39 | -{ 40 | - struct resource_entry *entry1, *entry2; 41 | - 42 | - entry1 = container_of(a, struct resource_entry, node); 43 | - entry2 = container_of(b, struct resource_entry, node); 44 | - 45 | - if (entry1->res->flags != entry2->res->flags) 46 | - return entry1->res->flags > entry2->res->flags; 47 | - 48 | - if (entry1->offset != entry2->offset) 49 | - return entry1->offset > entry2->offset; 50 | - 51 | - return entry1->res->start > entry2->res->start; 52 | -} 53 | - 54 | static int pci_register_host_bridge(struct pci_host_bridge *bridge) 55 | { 56 | struct device *parent = bridge->dev.parent; 57 | - struct resource_entry *window, *next, *n; 58 | + struct resource_entry *window, *n; 59 | struct pci_bus *bus, *b; 60 | - resource_size_t offset, next_offset; 61 | + resource_size_t offset; 62 | LIST_HEAD(resources); 63 | - struct resource *res, *next_res; 64 | + struct resource *res; 65 | char addr[64], *fmt; 66 | const char *name; 67 | int err; 68 | @@ -988,35 +970,11 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) 69 | if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE) 70 | dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n"); 71 | 72 | - /* Sort and coalesce contiguous windows */ 73 | - list_sort(NULL, &resources, res_cmp); 74 | - resource_list_for_each_entry_safe(window, n, &resources) { 75 | - if (list_is_last(&window->node, &resources)) 76 | - break; 77 | - 78 | - next = list_next_entry(window, node); 79 | - offset = window->offset; 80 | - res = window->res; 81 | - next_offset = next->offset; 82 | - next_res = next->res; 83 | - 84 | - if (res->flags != next_res->flags || offset != next_offset) 85 | - continue; 86 | - 87 | - if (res->end + 1 == next_res->start) { 88 | - next_res->start = res->start; 89 | - res->flags = res->start = res->end = 0; 90 | - } 91 | - } 92 | - 93 | /* Add initial resources to the bus */ 94 | resource_list_for_each_entry_safe(window, n, &resources) { 95 | + list_move_tail(&window->node, &bridge->windows); 96 | offset = window->offset; 97 | res = window->res; 98 | - if (!res->end) 99 | - continue; 100 | - 101 | - list_move_tail(&window->node, &bridge->windows); 102 | 103 | if (res->flags & IORESOURCE_BUS) 104 | pci_bus_insert_busn_res(bus, bus->number, res->end); -------------------------------------------------------------------------------- /debian/patches/pve/0006-do-not-generate-split-BTF-type-info-per-default.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= 3 | Date: Thu, 14 Sep 2017 11:09:58 +0200 4 | Subject: [PATCH] do not generate split BTF type info per default 5 | 6 | This reverts commit a8ed1a0607cfa5478ff6009539f44790c4d0956d. 7 | 8 | It breaks ZFS sometimes: 9 | https://github.com/openzfs/zfs/issues/12301#issuecomment-873303739 10 | 11 | Signed-off-by: Thomas Lamprecht 12 | --- 13 | lib/Kconfig.debug | 2 +- 14 | 1 file changed, 1 insertion(+), 1 deletion(-) 15 | 16 | diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug 17 | index 2e24db4bff19..7766a24d0f19 100644 18 | --- a/lib/Kconfig.debug 19 | +++ b/lib/Kconfig.debug 20 | @@ -357,7 +357,7 @@ config PAHOLE_HAS_BTF_TAG 21 | these attributes, so make the config depend on CC_IS_CLANG. 22 | 23 | config DEBUG_INFO_BTF_MODULES 24 | - def_bool y 25 | + def_bool n 26 | depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF 27 | help 28 | Generate compact split BTF type information for kernel modules. 29 | -------------------------------------------------------------------------------- /debian/patches/pve/0007-PCI-Reinstate-PCI-Coalesce-host-bridge-contiguous-ap.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Kai-Heng Feng 3 | Date: Tue, 13 Jul 2021 20:50:07 +0800 4 | Subject: [PATCH] PCI: Reinstate "PCI: Coalesce host bridge contiguous 5 | apertures" 6 | MIME-Version: 1.0 7 | Content-Type: text/plain; charset=UTF-8 8 | Content-Transfer-Encoding: 8bit 9 | 10 | Built-in graphics on HP EliteDesk 805 G6 doesn't work because graphics 11 | can't get the BAR it needs: 12 | pci_bus 0000:00: root bus resource [mem 0x10020200000-0x100303fffff window] 13 | pci_bus 0000:00: root bus resource [mem 0x10030400000-0x100401fffff window] 14 | 15 | pci 0000:00:08.1: bridge window [mem 0xd2000000-0xd23fffff] 16 | pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100401fffff 64bit pref] 17 | pci 0000:00:08.1: can't claim BAR 15 [mem 0x10030000000-0x100401fffff 64bit pref]: no compatible bridge window 18 | pci 0000:00:08.1: [mem 0x10030000000-0x100401fffff 64bit pref] clipped to [mem 0x10030000000-0x100303fffff 64bit pref] 19 | pci 0000:00:08.1: bridge window [mem 0x10030000000-0x100303fffff 64bit pref] 20 | pci 0000:07:00.0: can't claim BAR 0 [mem 0x10030000000-0x1003fffffff 64bit pref]: no compatible bridge window 21 | pci 0000:07:00.0: can't claim BAR 2 [mem 0x10040000000-0x100401fffff 64bit pref]: no compatible bridge window 22 | 23 | However, the root bus has two contiguous apertures that can contain the 24 | child resource requested. 25 | 26 | Coalesce contiguous apertures so we can allocate from the entire contiguous 27 | region. 28 | 29 | This is the second take of commit 65db04053efe ("PCI: Coalesce host 30 | bridge contiguous apertures"). The original approach sorts the apertures 31 | by address, but that makes NVMe stop working on QEMU ppc:sam460ex: 32 | PCI host bridge to bus 0002:00 33 | pci_bus 0002:00: root bus resource [io 0x0000-0xffff] 34 | pci_bus 0002:00: root bus resource [mem 0xd80000000-0xdffffffff] (bus address [0x80000000-0xffffffff]) 35 | pci_bus 0002:00: root bus resource [mem 0xc0ee00000-0xc0eefffff] (bus address [0x00000000-0x000fffff]) 36 | 37 | After the offending commit: 38 | PCI host bridge to bus 0002:00 39 | pci_bus 0002:00: root bus resource [io 0x0000-0xffff] 40 | pci_bus 0002:00: root bus resource [mem 0xc0ee00000-0xc0eefffff] (bus address [0x00000000-0x000fffff]) 41 | pci_bus 0002:00: root bus resource [mem 0xd80000000-0xdffffffff] (bus address [0x80000000-0xffffffff]) 42 | 43 | Since the apertures on HP EliteDesk 805 G6 are already in ascending 44 | order, doing a precautious sorting is not necessary. 45 | 46 | Remove the sorting part to avoid the regression on ppc:sam460ex. 47 | 48 | Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=212013 49 | Cc: Guenter Roeck 50 | Suggested-by: Bjorn Helgaas 51 | Signed-off-by: Kai-Heng Feng 52 | Signed-off-by: Fabian Grünbichler 53 | Signed-off-by: Thomas Lamprecht 54 | --- 55 | drivers/pci/probe.c | 31 +++++++++++++++++++++++++++---- 56 | 1 file changed, 27 insertions(+), 4 deletions(-) 57 | 58 | diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c 59 | index 258350f80f6c..7ff9fcec365b 100644 60 | --- a/drivers/pci/probe.c 61 | +++ b/drivers/pci/probe.c 62 | @@ -883,11 +883,11 @@ static void pci_set_bus_msi_domain(struct pci_bus *bus) 63 | static int pci_register_host_bridge(struct pci_host_bridge *bridge) 64 | { 65 | struct device *parent = bridge->dev.parent; 66 | - struct resource_entry *window, *n; 67 | + struct resource_entry *window, *next, *n; 68 | struct pci_bus *bus, *b; 69 | - resource_size_t offset; 70 | + resource_size_t offset, next_offset; 71 | LIST_HEAD(resources); 72 | - struct resource *res; 73 | + struct resource *res, *next_res; 74 | char addr[64], *fmt; 75 | const char *name; 76 | int err; 77 | @@ -970,11 +970,34 @@ static int pci_register_host_bridge(struct pci_host_bridge *bridge) 78 | if (nr_node_ids > 1 && pcibus_to_node(bus) == NUMA_NO_NODE) 79 | dev_warn(&bus->dev, "Unknown NUMA node; performance will be reduced\n"); 80 | 81 | + /* Coalesce contiguous windows */ 82 | + resource_list_for_each_entry_safe(window, n, &resources) { 83 | + if (list_is_last(&window->node, &resources)) 84 | + break; 85 | + 86 | + next = list_next_entry(window, node); 87 | + offset = window->offset; 88 | + res = window->res; 89 | + next_offset = next->offset; 90 | + next_res = next->res; 91 | + 92 | + if (res->flags != next_res->flags || offset != next_offset) 93 | + continue; 94 | + 95 | + if (res->end + 1 == next_res->start) { 96 | + next_res->start = res->start; 97 | + res->flags = res->start = res->end = 0; 98 | + } 99 | + } 100 | + 101 | /* Add initial resources to the bus */ 102 | resource_list_for_each_entry_safe(window, n, &resources) { 103 | - list_move_tail(&window->node, &bridge->windows); 104 | offset = window->offset; 105 | res = window->res; 106 | + if (!res->end) 107 | + continue; 108 | + 109 | + list_move_tail(&window->node, &bridge->windows); 110 | 111 | if (res->flags & IORESOURCE_BUS) 112 | pci_bus_insert_busn_res(bus, bus->number, res->end); -------------------------------------------------------------------------------- /debian/patches/pve/0007-Ubuntu-remove-leftover-reference-to-ubuntu-hio-drive.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Thomas Lamprecht 3 | Date: Sat, 12 Mar 2022 16:19:10 +0100 4 | Subject: [PATCH] Ubuntu: remove leftover reference to ubuntu/hio driver 5 | 6 | A single reference to the hio driver was forgotten when it was removed 7 | recently. While this reference is not a problem for the build itself, it 8 | breaks the __clean target from 'scripts/Makefile.clean' here, as make 9 | cannot enter the "ubuntu/hio" folder for cleaning due to ENOENT. 10 | 11 | Fixes: 4ea6dd9afa0a0d ("UBUNTU: Remove ubuntu/hio driver") 12 | Signed-off-by: Thomas Lamprecht 13 | --- 14 | ubuntu/Makefile | 4 ---- 15 | 1 file changed, 4 deletions(-) 16 | 17 | diff --git a/ubuntu/Makefile b/ubuntu/Makefile 18 | index 27fa95ba242a..3bfc4494c069 100644 19 | --- a/ubuntu/Makefile 20 | +++ b/ubuntu/Makefile 21 | @@ -15,10 +15,6 @@ 22 | ## 23 | ## 24 | ## 25 | -obj-$(CONFIG_HIO) += hio/ 26 | -## 27 | -## 28 | -## 29 | obj-$(CONFIG_UBUNTU_HOST) += ubuntu-host/ 30 | ## 31 | ## 32 | -------------------------------------------------------------------------------- /debian/patches/pve/0008-do-not-generate-split-BTF-type-info-per-default.patch: -------------------------------------------------------------------------------- 1 | diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug 2 | index 25dc20175bba..7ff51a3f65e6 100644 3 | --- a/lib/Kconfig.debug 4 | +++ b/lib/Kconfig.debug 5 | @@ -314,7 +314,7 @@ config PAHOLE_HAS_SPLIT_BTF 6 | def_bool $(success, test `$(PAHOLE) --version | sed -E 's/v([0-9]+)\.([0-9]+)/\1\2/'` -ge "119") 7 | 8 | config DEBUG_INFO_BTF_MODULES 9 | - def_bool y 10 | + def_bool n 11 | depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF 12 | help 13 | Generate compact split BTF type information for kernel modules. -------------------------------------------------------------------------------- /debian/patches/pve/0012-KVM-allow-compiling-out-SMM-support.patch: -------------------------------------------------------------------------------- 1 | From 14fe6763b2618afb73a1109d7fda337cb06af0a2 Mon Sep 17 00:00:00 2001 2 | From: Paolo Bonzini 3 | Date: Tue, 25 Oct 2022 15:47:23 +0300 4 | Subject: [PATCH] KVM: allow compiling out SMM support 5 | 6 | Some users of KVM implement the UEFI variable store through a paravirtual device 7 | that does not require the "SMM lockbox" component of edk2; allow them to 8 | compile out system management mode, which is not a full implementation 9 | especially in how it interacts with nested virtualization. 10 | 11 | Suggested-by: Sean Christopherson 12 | Signed-off-by: Paolo Bonzini 13 | --- 14 | arch/x86/kvm/Kconfig | 11 ++++++++++ 15 | arch/x86/kvm/Makefile | 2 +- 16 | arch/x86/kvm/smm.h | 13 ++++++++++++ 17 | arch/x86/kvm/svm/svm.c | 2 ++ 18 | arch/x86/kvm/vmx/vmx.c | 2 ++ 19 | arch/x86/kvm/x86.c | 21 +++++++++++++++++-- 20 | tools/testing/selftests/kvm/x86_64/smm_test.c | 2 ++ 21 | 7 files changed, 50 insertions(+), 3 deletions(-) 22 | 23 | diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig 24 | index e3cbd7706136..20d5aea868a4 100644 25 | --- a/arch/x86/kvm/Kconfig 26 | +++ b/arch/x86/kvm/Kconfig 27 | @@ -86,6 +86,17 @@ config KVM_INTEL 28 | To compile this as a module, choose M here: the module 29 | will be called kvm-intel. 30 | 31 | +config KVM_SMM 32 | + bool "System Management Mode emulation" 33 | + default y 34 | + depends on KVM 35 | + help 36 | + Provides support for KVM to emulate System Management Mode (SMM) 37 | + in virtual machines. This can be used by the virtual machine 38 | + firmware to implement UEFI secure boot. 39 | + 40 | + If unsure, say Y. 41 | + 42 | config X86_SGX_KVM 43 | bool "Software Guard eXtensions (SGX) Virtualization" 44 | depends on X86_SGX && KVM_INTEL 45 | diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile 46 | index ec6f7656254b..6cf40f668277 100644 47 | --- a/arch/x86/kvm/Makefile 48 | +++ b/arch/x86/kvm/Makefile 49 | @@ -20,7 +20,7 @@ endif 50 | 51 | kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o 52 | kvm-$(CONFIG_KVM_XEN) += xen.o 53 | -kvm-y += smm.o 54 | +kvm-$(CONFIG_KVM_SMM) += smm.o 55 | 56 | kvm-intel-y += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \ 57 | vmx/evmcs.o vmx/nested.o vmx/posted_intr.o 58 | diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h 59 | index b0602a92e511..4c699fee4492 100644 60 | --- a/arch/x86/kvm/smm.h 61 | +++ b/arch/x86/kvm/smm.h 62 | @@ -8,6 +8,7 @@ 63 | #define PUT_SMSTATE(type, buf, offset, val) \ 64 | *(type *)((buf) + (offset) - 0x7e00) = val 65 | 66 | +#ifdef CONFIG_KVM_SMM 67 | static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) 68 | { 69 | kvm_make_request(KVM_REQ_SMI, vcpu); 70 | @@ -23,5 +24,17 @@ void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm); 71 | void enter_smm(struct kvm_vcpu *vcpu); 72 | int emulator_leave_smm(struct x86_emulate_ctxt *ctxt); 73 | void process_smi(struct kvm_vcpu *vcpu); 74 | +#else 75 | +static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } 76 | +static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } 77 | +static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); } 78 | +static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } 79 | +static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } 80 | + 81 | +/* 82 | + * emulator_leave_smm is used as a function pointer, so the 83 | + * stub is defined in x86.c. 84 | + */ 85 | +#endif 86 | 87 | #endif 88 | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c 89 | index f4ed4a02b109..a6807492bfae 100644 90 | --- a/arch/x86/kvm/svm/svm.c 91 | +++ b/arch/x86/kvm/svm/svm.c 92 | @@ -4151,6 +4151,8 @@ static bool svm_has_emulated_msr(struct kvm *kvm, u32 index) 93 | case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC: 94 | return false; 95 | case MSR_IA32_SMBASE: 96 | + if (!IS_ENABLED(CONFIG_KVM_SMM)) 97 | + return false; 98 | /* SEV-ES guests do not support SMM, so report false */ 99 | if (kvm && sev_es_guest(kvm)) 100 | return false; 101 | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c 102 | index dc75de78ceb6..ce22860156c5 100644 103 | --- a/arch/x86/kvm/vmx/vmx.c 104 | +++ b/arch/x86/kvm/vmx/vmx.c 105 | @@ -6849,6 +6849,8 @@ static bool vmx_has_emulated_msr(struct kvm *kvm, u32 index) 106 | { 107 | switch (index) { 108 | case MSR_IA32_SMBASE: 109 | + if (!IS_ENABLED(CONFIG_KVM_SMM)) 110 | + return false; 111 | /* 112 | * We cannot do SMM unless we can run the guest in big 113 | * real mode. 114 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c 115 | index 77e0ca43ee27..14ef42c6efbd 100644 116 | --- a/arch/x86/kvm/x86.c 117 | +++ b/arch/x86/kvm/x86.c 118 | @@ -3631,7 +3631,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 119 | break; 120 | } 121 | case MSR_IA32_SMBASE: 122 | - if (!msr_info->host_initiated) 123 | + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated) 124 | return 1; 125 | vcpu->arch.smbase = data; 126 | break; 127 | @@ -4047,7 +4047,7 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) 128 | msr_info->data = vcpu->arch.ia32_misc_enable_msr; 129 | break; 130 | case MSR_IA32_SMBASE: 131 | - if (!msr_info->host_initiated) 132 | + if (!IS_ENABLED(CONFIG_KVM_SMM) || !msr_info->host_initiated) 133 | return 1; 134 | msr_info->data = vcpu->arch.smbase; 135 | break; 136 | @@ -4421,6 +4421,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) 137 | r |= KVM_X86_DISABLE_EXITS_MWAIT; 138 | break; 139 | case KVM_CAP_X86_SMM: 140 | + if (!IS_ENABLED(CONFIG_KVM_SMM)) 141 | + break; 142 | + 143 | /* SMBASE is usually relocated above 1M on modern chipsets, 144 | * and SMM handlers might indeed rely on 4G segment limits, 145 | * so do not report SMM to be available if real mode is 146 | @@ -5146,6 +5149,12 @@ static int kvm_vcpu_ioctl_x86_set_vcpu_events(struct kvm_vcpu *vcpu, 147 | vcpu->arch.apic->sipi_vector = events->sipi_vector; 148 | 149 | if (events->flags & KVM_VCPUEVENT_VALID_SMM) { 150 | + if (!IS_ENABLED(CONFIG_KVM_SMM) && 151 | + (events->smi.smm || 152 | + events->smi.pending || 153 | + events->smi.smm_inside_nmi)) 154 | + return -EINVAL; 155 | + 156 | if (!!(vcpu->arch.hflags & HF_SMM_MASK) != events->smi.smm) { 157 | kvm_x86_ops.nested_ops->leave_nested(vcpu); 158 | kvm_smm_changed(vcpu, events->smi.smm); 159 | @@ -8021,6 +8030,14 @@ static unsigned emulator_get_hflags(struct x86_emulate_ctxt *ctxt) 160 | return emul_to_vcpu(ctxt)->arch.hflags; 161 | } 162 | 163 | +#ifndef CONFIG_KVM_SMM 164 | +static int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 165 | +{ 166 | + WARN_ON_ONCE(1); 167 | + return X86EMUL_UNHANDLEABLE; 168 | +} 169 | +#endif 170 | + 171 | static void emulator_triple_fault(struct x86_emulate_ctxt *ctxt) 172 | { 173 | kvm_make_request(KVM_REQ_TRIPLE_FAULT, emul_to_vcpu(ctxt)); 174 | diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c 175 | index 1f136a81858e..cb38a478e1f6 100644 176 | --- a/tools/testing/selftests/kvm/x86_64/smm_test.c 177 | +++ b/tools/testing/selftests/kvm/x86_64/smm_test.c 178 | @@ -137,6 +137,8 @@ int main(int argc, char *argv[]) 179 | struct kvm_x86_state *state; 180 | int stage, stage_reported; 181 | 182 | + TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM)); 183 | + 184 | /* Create VM */ 185 | vm = vm_create_with_one_vcpu(&vcpu, guest_code); 186 | 187 | -- 188 | 2.38.1 189 | 190 | -------------------------------------------------------------------------------- /debian/patches/pve/0013-KVM-x86-compile-out-vendor-specific-code-if-SMM-is-d.patch: -------------------------------------------------------------------------------- 1 | From 3b69dd23b153e6f4a512a9495612a2664d236872 Mon Sep 17 00:00:00 2001 2 | From: Paolo Bonzini 3 | Date: Tue, 25 Oct 2022 15:47:24 +0300 4 | Subject: [PATCH] KVM: x86: compile out vendor-specific code if SMM is disabled 5 | 6 | Vendor-specific code that deals with SMI injection and saving/restoring 7 | SMM state is not needed if CONFIG_KVM_SMM is disabled, so remove the 8 | four callbacks smi_allowed, enter_smm, leave_smm and enable_smi_window. 9 | The users in svm/nested.c and x86.c also have to be compiled out; the 10 | amount of #ifdef'ed code is small and it's not worth moving it to 11 | smm.c. 12 | 13 | enter_smm is now used only within #ifdef CONFIG_KVM_SMM, and the stub 14 | can therefore be removed. 15 | 16 | Signed-off-by: Paolo Bonzini 17 | --- 18 | arch/x86/include/asm/kvm-x86-ops.h | 2 ++ 19 | arch/x86/include/asm/kvm_host.h | 2 ++ 20 | arch/x86/kvm/smm.h | 1 - 21 | arch/x86/kvm/svm/nested.c | 2 ++ 22 | arch/x86/kvm/svm/svm.c | 4 ++++ 23 | arch/x86/kvm/vmx/vmx.c | 4 ++++ 24 | arch/x86/kvm/x86.c | 4 ++++ 25 | 7 files changed, 18 insertions(+), 1 deletion(-) 26 | 27 | diff --git a/arch/x86/include/asm/kvm-x86-ops.h b/arch/x86/include/asm/kvm-x86-ops.h 28 | index 51f777071584..d92ec198db2a 100644 29 | --- a/arch/x86/include/asm/kvm-x86-ops.h 30 | +++ b/arch/x86/include/asm/kvm-x86-ops.h 31 | @@ -110,10 +110,12 @@ KVM_X86_OP_OPTIONAL_RET0(dy_apicv_has_pending_interrupt) 32 | KVM_X86_OP_OPTIONAL(set_hv_timer) 33 | KVM_X86_OP_OPTIONAL(cancel_hv_timer) 34 | KVM_X86_OP(setup_mce) 35 | +#ifdef CONFIG_KVM_SMM 36 | KVM_X86_OP(smi_allowed) 37 | KVM_X86_OP(enter_smm) 38 | KVM_X86_OP(leave_smm) 39 | KVM_X86_OP(enable_smi_window) 40 | +#endif 41 | KVM_X86_OP_OPTIONAL(mem_enc_ioctl) 42 | KVM_X86_OP_OPTIONAL(mem_enc_register_region) 43 | KVM_X86_OP_OPTIONAL(mem_enc_unregister_region) 44 | diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h 45 | index 5b466eb0feca..3f6a31175db1 100644 46 | --- a/arch/x86/include/asm/kvm_host.h 47 | +++ b/arch/x86/include/asm/kvm_host.h 48 | @@ -1600,10 +1600,12 @@ struct kvm_x86_ops { 49 | 50 | void (*setup_mce)(struct kvm_vcpu *vcpu); 51 | 52 | +#ifdef CONFIG_KVM_SMM 53 | int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); 54 | int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); 55 | int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); 56 | void (*enable_smi_window)(struct kvm_vcpu *vcpu); 57 | +#endif 58 | 59 | int (*mem_enc_ioctl)(struct kvm *kvm, void __user *argp); 60 | int (*mem_enc_register_region)(struct kvm *kvm, struct kvm_enc_region *argp); 61 | diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h 62 | index 4c699fee4492..7ccce6b655ca 100644 63 | --- a/arch/x86/kvm/smm.h 64 | +++ b/arch/x86/kvm/smm.h 65 | @@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu); 66 | static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } 67 | static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } 68 | static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); } 69 | -static inline void enter_smm(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } 70 | static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } 71 | 72 | /* 73 | diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c 74 | index d6cc9963b04a..ec4d6be70639 100644 75 | --- a/arch/x86/kvm/svm/nested.c 76 | +++ b/arch/x86/kvm/svm/nested.c 77 | @@ -1384,6 +1384,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) 78 | return 0; 79 | } 80 | 81 | +#ifdef CONFIG_KVM_SMM 82 | if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { 83 | if (block_nested_events) 84 | return -EBUSY; 85 | @@ -1392,6 +1393,7 @@ static int svm_check_nested_events(struct kvm_vcpu *vcpu) 86 | nested_svm_simple_vmexit(svm, SVM_EXIT_SMI); 87 | return 0; 88 | } 89 | +#endif 90 | 91 | if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { 92 | if (block_nested_events) 93 | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c 94 | index a6807492bfae..e69390909d08 100644 95 | --- a/arch/x86/kvm/svm/svm.c 96 | +++ b/arch/x86/kvm/svm/svm.c 97 | @@ -4409,6 +4409,7 @@ static void svm_setup_mce(struct kvm_vcpu *vcpu) 98 | vcpu->arch.mcg_cap &= 0x1ff; 99 | } 100 | 101 | +#ifdef CONFIG_KVM_SMM 102 | bool svm_smi_blocked(struct kvm_vcpu *vcpu) 103 | { 104 | struct vcpu_svm *svm = to_svm(vcpu); 105 | @@ -4558,6 +4559,7 @@ static void svm_enable_smi_window(struct kvm_vcpu *vcpu) 106 | /* We must be in SMM; RSM will cause a vmexit anyway. */ 107 | } 108 | } 109 | +#endif 110 | 111 | static bool svm_can_emulate_instruction(struct kvm_vcpu *vcpu, int emul_type, 112 | void *insn, int insn_len) 113 | @@ -4841,10 +4843,12 @@ static struct kvm_x86_ops svm_x86_ops __initdata = { 114 | .pi_update_irte = avic_pi_update_irte, 115 | .setup_mce = svm_setup_mce, 116 | 117 | +#ifdef CONFIG_KVM_SMM 118 | .smi_allowed = svm_smi_allowed, 119 | .enter_smm = svm_enter_smm, 120 | .leave_smm = svm_leave_smm, 121 | .enable_smi_window = svm_enable_smi_window, 122 | +#endif 123 | 124 | .mem_enc_ioctl = sev_mem_enc_ioctl, 125 | .mem_enc_register_region = sev_mem_enc_register_region, 126 | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c 127 | index ce22860156c5..8cfb40cfad10 100644 128 | --- a/arch/x86/kvm/vmx/vmx.c 129 | +++ b/arch/x86/kvm/vmx/vmx.c 130 | @@ -7913,6 +7913,7 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu) 131 | ~FEAT_CTL_LMCE_ENABLED; 132 | } 133 | 134 | +#ifdef CONFIG_KVM_SMM 135 | static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 136 | { 137 | /* we need a nested vmexit to enter SMM, postpone if run is pending */ 138 | @@ -7967,6 +7968,7 @@ static void vmx_enable_smi_window(struct kvm_vcpu *vcpu) 139 | { 140 | /* RSM will cause a vmexit anyway. */ 141 | } 142 | +#endif 143 | 144 | static bool vmx_apic_init_signal_blocked(struct kvm_vcpu *vcpu) 145 | { 146 | @@ -8134,10 +8136,12 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = { 147 | 148 | .setup_mce = vmx_setup_mce, 149 | 150 | +#ifdef CONFIG_KVM_SMM 151 | .smi_allowed = vmx_smi_allowed, 152 | .enter_smm = vmx_enter_smm, 153 | .leave_smm = vmx_leave_smm, 154 | .enable_smi_window = vmx_enable_smi_window, 155 | +#endif 156 | 157 | .can_emulate_instruction = vmx_can_emulate_instruction, 158 | .apic_init_signal_blocked = vmx_apic_init_signal_blocked, 159 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c 160 | index 14ef42c6efbd..33c8fb8f4c61 100644 161 | --- a/arch/x86/kvm/x86.c 162 | +++ b/arch/x86/kvm/x86.c 163 | @@ -9696,6 +9696,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) 164 | * in order to make progress and get back here for another iteration. 165 | * The kvm_x86_ops hooks communicate this by returning -EBUSY. 166 | */ 167 | +#ifdef CONFIG_KVM_SMM 168 | if (vcpu->arch.smi_pending) { 169 | r = can_inject ? static_call(kvm_x86_smi_allowed)(vcpu, true) : -EBUSY; 170 | if (r < 0) 171 | @@ -9708,6 +9709,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit) 172 | } else 173 | static_call(kvm_x86_enable_smi_window)(vcpu); 174 | } 175 | +#endif 176 | 177 | if (vcpu->arch.nmi_pending) { 178 | r = can_inject ? static_call(kvm_x86_nmi_allowed)(vcpu, true) : -EBUSY; 179 | @@ -12300,10 +12302,12 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) 180 | static_call(kvm_x86_nmi_allowed)(vcpu, false))) 181 | return true; 182 | 183 | +#ifdef CONFIG_KVM_SMM 184 | if (kvm_test_request(KVM_REQ_SMI, vcpu) || 185 | (vcpu->arch.smi_pending && 186 | static_call(kvm_x86_smi_allowed)(vcpu, false))) 187 | return true; 188 | +#endif 189 | 190 | if (kvm_arch_interrupt_allowed(vcpu) && 191 | (kvm_cpu_has_interrupt(vcpu) || 192 | -- 193 | 2.38.1 194 | 195 | -------------------------------------------------------------------------------- /debian/patches/pve/0014-KVM-x86-remove-SMRAM-address-space-if-SMM-is-not-sup.patch: -------------------------------------------------------------------------------- 1 | From 8f5e12c8768ecff91ccf335e2242ab64482c01fb Mon Sep 17 00:00:00 2001 2 | From: Paolo Bonzini 3 | Date: Tue, 25 Oct 2022 15:47:25 +0300 4 | Subject: [PATCH] KVM: x86: remove SMRAM address space if SMM is not supported 5 | 6 | If CONFIG_KVM_SMM is not defined HF_SMM_MASK will always be zero, and 7 | we can spare userspace the hassle of setting up the SMRAM address space 8 | simply by reporting that only one address space is supported. 9 | 10 | Signed-off-by: Paolo Bonzini 11 | --- 12 | arch/x86/include/asm/kvm_host.h | 13 ++++++++----- 13 | 1 file changed, 8 insertions(+), 5 deletions(-) 14 | 15 | diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h 16 | index 3f6a31175db1..dcaa0b43baef 100644 17 | --- a/arch/x86/include/asm/kvm_host.h 18 | +++ b/arch/x86/include/asm/kvm_host.h 19 | @@ -1988,11 +1988,14 @@ enum { 20 | #define HF_SMM_MASK (1 << 6) 21 | #define HF_SMM_INSIDE_NMI_MASK (1 << 7) 22 | 23 | -#define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE 24 | -#define KVM_ADDRESS_SPACE_NUM 2 25 | - 26 | -#define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) 27 | -#define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) 28 | +#ifdef CONFIG_KVM_SMM 29 | +# define __KVM_VCPU_MULTIPLE_ADDRESS_SPACE 30 | +# define KVM_ADDRESS_SPACE_NUM 2 31 | +# define kvm_arch_vcpu_memslots_id(vcpu) ((vcpu)->arch.hflags & HF_SMM_MASK ? 1 : 0) 32 | +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, (role).smm) 33 | +#else 34 | +# define kvm_memslots_for_spte_role(kvm, role) __kvm_memslots(kvm, 0) 35 | +#endif 36 | 37 | #define KVM_ARCH_WANT_MMU_NOTIFIER 38 | 39 | -- 40 | 2.38.1 41 | 42 | -------------------------------------------------------------------------------- /debian/patches/pve/0015-KVM-x86-do-not-define-KVM_REQ_SMI-if-SMM-disabled.patch: -------------------------------------------------------------------------------- 1 | From 20e250dd42abe942ed3498c5b5be02699e38cc4a Mon Sep 17 00:00:00 2001 2 | From: Paolo Bonzini 3 | Date: Tue, 25 Oct 2022 15:47:26 +0300 4 | Subject: [PATCH] KVM: x86: do not define KVM_REQ_SMI if SMM disabled 5 | 6 | This ensures that all the relevant code is compiled out, in fact 7 | the process_smi stub can be removed too. 8 | 9 | Signed-off-by: Paolo Bonzini 10 | --- 11 | arch/x86/include/asm/kvm_host.h | 2 ++ 12 | arch/x86/kvm/smm.h | 1 - 13 | arch/x86/kvm/x86.c | 6 ++++++ 14 | 3 files changed, 8 insertions(+), 1 deletion(-) 15 | 16 | diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h 17 | index dcaa0b43baef..87ee187b3f26 100644 18 | --- a/arch/x86/include/asm/kvm_host.h 19 | +++ b/arch/x86/include/asm/kvm_host.h 20 | @@ -81,7 +81,9 @@ 21 | #define KVM_REQ_NMI KVM_ARCH_REQ(9) 22 | #define KVM_REQ_PMU KVM_ARCH_REQ(10) 23 | #define KVM_REQ_PMI KVM_ARCH_REQ(11) 24 | +#ifdef CONFIG_KVM_SMM 25 | #define KVM_REQ_SMI KVM_ARCH_REQ(12) 26 | +#endif 27 | #define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) 28 | #define KVM_REQ_MCLOCK_INPROGRESS \ 29 | KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) 30 | diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h 31 | index 7ccce6b655ca..a6795b93ba30 100644 32 | --- a/arch/x86/kvm/smm.h 33 | +++ b/arch/x86/kvm/smm.h 34 | @@ -28,7 +28,6 @@ void process_smi(struct kvm_vcpu *vcpu); 35 | static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) { return -ENOTTY; } 36 | static inline bool is_smm(struct kvm_vcpu *vcpu) { return false; } 37 | static inline void kvm_smm_changed(struct kvm_vcpu *vcpu, bool in_smm) { WARN_ON_ONCE(1); } 38 | -static inline void process_smi(struct kvm_vcpu *vcpu) { WARN_ON_ONCE(1); } 39 | 40 | /* 41 | * emulator_leave_smm is used as a function pointer, so the 42 | diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c 43 | index 33c8fb8f4c61..7b109120f47d 100644 44 | --- a/arch/x86/kvm/x86.c 45 | +++ b/arch/x86/kvm/x86.c 46 | @@ -5013,8 +5013,10 @@ static void kvm_vcpu_ioctl_x86_get_vcpu_events(struct kvm_vcpu *vcpu, 47 | { 48 | process_nmi(vcpu); 49 | 50 | +#ifdef CONFIG_KVM_SMM 51 | if (kvm_check_request(KVM_REQ_SMI, vcpu)) 52 | process_smi(vcpu); 53 | +#endif 54 | 55 | /* 56 | * In guest mode, payload delivery should be deferred, 57 | @@ -10027,8 +10029,10 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) 58 | } 59 | if (kvm_check_request(KVM_REQ_STEAL_UPDATE, vcpu)) 60 | record_steal_time(vcpu); 61 | +#ifdef CONFIG_KVM_SMM 62 | if (kvm_check_request(KVM_REQ_SMI, vcpu)) 63 | process_smi(vcpu); 64 | +#endif 65 | if (kvm_check_request(KVM_REQ_NMI, vcpu)) 66 | process_nmi(vcpu); 67 | if (kvm_check_request(KVM_REQ_PMU, vcpu)) 68 | @@ -12351,7 +12355,9 @@ bool kvm_arch_dy_runnable(struct kvm_vcpu *vcpu) 69 | return true; 70 | 71 | if (kvm_test_request(KVM_REQ_NMI, vcpu) || 72 | +#ifdef CONFIG_KVM_SMM 73 | kvm_test_request(KVM_REQ_SMI, vcpu) || 74 | +#endif 75 | kvm_test_request(KVM_REQ_EVENT, vcpu)) 76 | return true; 77 | 78 | -- 79 | 2.38.1 80 | 81 | -------------------------------------------------------------------------------- /debian/patches/pve/0016-bug-introduce-ASSERT_STRUCT_OFFSET.patch: -------------------------------------------------------------------------------- 1 | From cd22e3c62bdf90babba3bdf1bc2b48e4e2e664d5 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:27 +0300 4 | Subject: [PATCH] bug: introduce ASSERT_STRUCT_OFFSET 5 | 6 | ASSERT_STRUCT_OFFSET allows to assert during the build of 7 | the kernel that a field in a struct have an expected offset. 8 | 9 | KVM used to have such macro, but there is almost nothing KVM specific 10 | in it so move it to build_bug.h, so that it can be used in other 11 | places in KVM. 12 | 13 | Signed-off-by: Maxim Levitsky 14 | --- 15 | arch/x86/kvm/vmx/vmcs12.h | 5 ++--- 16 | include/linux/build_bug.h | 9 +++++++++ 17 | 2 files changed, 11 insertions(+), 3 deletions(-) 18 | 19 | diff --git a/arch/x86/kvm/vmx/vmcs12.h b/arch/x86/kvm/vmx/vmcs12.h 20 | index 746129ddd5ae..01936013428b 100644 21 | --- a/arch/x86/kvm/vmx/vmcs12.h 22 | +++ b/arch/x86/kvm/vmx/vmcs12.h 23 | @@ -208,9 +208,8 @@ struct __packed vmcs12 { 24 | /* 25 | * For save/restore compatibility, the vmcs12 field offsets must not change. 26 | */ 27 | -#define CHECK_OFFSET(field, loc) \ 28 | - BUILD_BUG_ON_MSG(offsetof(struct vmcs12, field) != (loc), \ 29 | - "Offset of " #field " in struct vmcs12 has changed.") 30 | +#define CHECK_OFFSET(field, loc) \ 31 | + ASSERT_STRUCT_OFFSET(struct vmcs12, field, loc) 32 | 33 | static inline void vmx_check_vmcs12_offsets(void) 34 | { 35 | diff --git a/include/linux/build_bug.h b/include/linux/build_bug.h 36 | index e3a0be2c90ad..3aa3640f8c18 100644 37 | --- a/include/linux/build_bug.h 38 | +++ b/include/linux/build_bug.h 39 | @@ -77,4 +77,13 @@ 40 | #define static_assert(expr, ...) __static_assert(expr, ##__VA_ARGS__, #expr) 41 | #define __static_assert(expr, msg, ...) _Static_assert(expr, msg) 42 | 43 | + 44 | +/* 45 | + * Compile time check that field has an expected offset 46 | + */ 47 | +#define ASSERT_STRUCT_OFFSET(type, field, expected_offset) \ 48 | + BUILD_BUG_ON_MSG(offsetof(type, field) != (expected_offset), \ 49 | + "Offset of " #field " in " #type " has changed.") 50 | + 51 | + 52 | #endif /* _LINUX_BUILD_BUG_H */ 53 | -- 54 | 2.38.1 55 | 56 | -------------------------------------------------------------------------------- /debian/patches/pve/0017-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch: -------------------------------------------------------------------------------- 1 | From 8e1a89b66d8a8b80d135a072a0ec4147f79ac2f3 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:28 +0300 4 | Subject: [PATCH] KVM: x86: emulator: em_sysexit should update ctxt->mode 5 | 6 | SYSEXIT is one of the instructions that can change the 7 | processor mode, thus ctxt->mode should be updated after it. 8 | 9 | Note that this is likely a benign bug, because the only problematic 10 | mode change is from 32 bit to 64 bit which can lead to truncation of RIP, 11 | and it is not possible to do with sysexit, 12 | since sysexit running in 32 bit mode will be limited to 32 bit version. 13 | 14 | Signed-off-by: Maxim Levitsky 15 | --- 16 | arch/x86/kvm/emulate.c | 1 + 17 | 1 file changed, 1 insertion(+) 18 | 19 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c 20 | index 853262d4fb6c..5ee1998dd38e 100644 21 | --- a/arch/x86/kvm/emulate.c 22 | +++ b/arch/x86/kvm/emulate.c 23 | @@ -2523,6 +2523,7 @@ static int em_sysexit(struct x86_emulate_ctxt *ctxt) 24 | ops->set_segment(ctxt, ss_sel, &ss, 0, VCPU_SREG_SS); 25 | 26 | ctxt->_eip = rdx; 27 | + ctxt->mode = usermode; 28 | *reg_write(ctxt, VCPU_REGS_RSP) = rcx; 29 | 30 | return X86EMUL_CONTINUE; 31 | -- 32 | 2.38.1 33 | 34 | -------------------------------------------------------------------------------- /debian/patches/pve/0018-KVM-x86-emulator-introduce-emulator_recalc_and_set_m.patch: -------------------------------------------------------------------------------- 1 | From d2c4fc069a073d621d2cbc97ffe9547754784639 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:29 +0300 4 | Subject: [PATCH] KVM: x86: emulator: introduce emulator_recalc_and_set_mode 5 | 6 | Some instructions update the cpu execution mode, which needs to update the 7 | emulation mode. 8 | 9 | Extract this code, and make assign_eip_far use it. 10 | 11 | assign_eip_far now reads CS, instead of getting it via a parameter, 12 | which is ok, because callers always assign CS to the same value 13 | before calling this function. 14 | 15 | No functional change is intended. 16 | 17 | Signed-off-by: Maxim Levitsky 18 | --- 19 | arch/x86/kvm/emulate.c | 85 ++++++++++++++++++++++++++++-------------- 20 | 1 file changed, 57 insertions(+), 28 deletions(-) 21 | 22 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c 23 | index 5ee1998dd38e..d75d33d60cb8 100644 24 | --- a/arch/x86/kvm/emulate.c 25 | +++ b/arch/x86/kvm/emulate.c 26 | @@ -760,8 +760,7 @@ static int linearize(struct x86_emulate_ctxt *ctxt, 27 | ctxt->mode, linear); 28 | } 29 | 30 | -static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, 31 | - enum x86emul_mode mode) 32 | +static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst) 33 | { 34 | ulong linear; 35 | int rc; 36 | @@ -771,41 +770,71 @@ static inline int assign_eip(struct x86_emulate_ctxt *ctxt, ulong dst, 37 | 38 | if (ctxt->op_bytes != sizeof(unsigned long)) 39 | addr.ea = dst & ((1UL << (ctxt->op_bytes << 3)) - 1); 40 | - rc = __linearize(ctxt, addr, &max_size, 1, false, true, mode, &linear); 41 | + rc = __linearize(ctxt, addr, &max_size, 1, false, true, ctxt->mode, &linear); 42 | if (rc == X86EMUL_CONTINUE) 43 | ctxt->_eip = addr.ea; 44 | return rc; 45 | } 46 | 47 | +static inline int emulator_recalc_and_set_mode(struct x86_emulate_ctxt *ctxt) 48 | +{ 49 | + u64 efer; 50 | + struct desc_struct cs; 51 | + u16 selector; 52 | + u32 base3; 53 | + 54 | + ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 55 | + 56 | + if (!(ctxt->ops->get_cr(ctxt, 0) & X86_CR0_PE)) { 57 | + /* Real mode. cpu must not have long mode active */ 58 | + if (efer & EFER_LMA) 59 | + return X86EMUL_UNHANDLEABLE; 60 | + ctxt->mode = X86EMUL_MODE_REAL; 61 | + return X86EMUL_CONTINUE; 62 | + } 63 | + 64 | + if (ctxt->eflags & X86_EFLAGS_VM) { 65 | + /* Protected/VM86 mode. cpu must not have long mode active */ 66 | + if (efer & EFER_LMA) 67 | + return X86EMUL_UNHANDLEABLE; 68 | + ctxt->mode = X86EMUL_MODE_VM86; 69 | + return X86EMUL_CONTINUE; 70 | + } 71 | + 72 | + if (!ctxt->ops->get_segment(ctxt, &selector, &cs, &base3, VCPU_SREG_CS)) 73 | + return X86EMUL_UNHANDLEABLE; 74 | + 75 | + if (efer & EFER_LMA) { 76 | + if (cs.l) { 77 | + /* Proper long mode */ 78 | + ctxt->mode = X86EMUL_MODE_PROT64; 79 | + } else if (cs.d) { 80 | + /* 32 bit compatibility mode*/ 81 | + ctxt->mode = X86EMUL_MODE_PROT32; 82 | + } else { 83 | + ctxt->mode = X86EMUL_MODE_PROT16; 84 | + } 85 | + } else { 86 | + /* Legacy 32 bit / 16 bit mode */ 87 | + ctxt->mode = cs.d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 88 | + } 89 | + 90 | + return X86EMUL_CONTINUE; 91 | +} 92 | + 93 | static inline int assign_eip_near(struct x86_emulate_ctxt *ctxt, ulong dst) 94 | { 95 | - return assign_eip(ctxt, dst, ctxt->mode); 96 | + return assign_eip(ctxt, dst); 97 | } 98 | 99 | -static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst, 100 | - const struct desc_struct *cs_desc) 101 | +static int assign_eip_far(struct x86_emulate_ctxt *ctxt, ulong dst) 102 | { 103 | - enum x86emul_mode mode = ctxt->mode; 104 | - int rc; 105 | + int rc = emulator_recalc_and_set_mode(ctxt); 106 | 107 | -#ifdef CONFIG_X86_64 108 | - if (ctxt->mode >= X86EMUL_MODE_PROT16) { 109 | - if (cs_desc->l) { 110 | - u64 efer = 0; 111 | + if (rc != X86EMUL_CONTINUE) 112 | + return rc; 113 | 114 | - ctxt->ops->get_msr(ctxt, MSR_EFER, &efer); 115 | - if (efer & EFER_LMA) 116 | - mode = X86EMUL_MODE_PROT64; 117 | - } else 118 | - mode = X86EMUL_MODE_PROT32; /* temporary value */ 119 | - } 120 | -#endif 121 | - if (mode == X86EMUL_MODE_PROT16 || mode == X86EMUL_MODE_PROT32) 122 | - mode = cs_desc->d ? X86EMUL_MODE_PROT32 : X86EMUL_MODE_PROT16; 123 | - rc = assign_eip(ctxt, dst, mode); 124 | - if (rc == X86EMUL_CONTINUE) 125 | - ctxt->mode = mode; 126 | - return rc; 127 | + return assign_eip(ctxt, dst); 128 | } 129 | 130 | static inline int jmp_rel(struct x86_emulate_ctxt *ctxt, int rel) 131 | @@ -2139,7 +2168,7 @@ static int em_jmp_far(struct x86_emulate_ctxt *ctxt) 132 | if (rc != X86EMUL_CONTINUE) 133 | return rc; 134 | 135 | - rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); 136 | + rc = assign_eip_far(ctxt, ctxt->src.val); 137 | /* Error handling is not implemented. */ 138 | if (rc != X86EMUL_CONTINUE) 139 | return X86EMUL_UNHANDLEABLE; 140 | @@ -2217,7 +2246,7 @@ static int em_ret_far(struct x86_emulate_ctxt *ctxt) 141 | &new_desc); 142 | if (rc != X86EMUL_CONTINUE) 143 | return rc; 144 | - rc = assign_eip_far(ctxt, eip, &new_desc); 145 | + rc = assign_eip_far(ctxt, eip); 146 | /* Error handling is not implemented. */ 147 | if (rc != X86EMUL_CONTINUE) 148 | return X86EMUL_UNHANDLEABLE; 149 | @@ -3117,7 +3146,7 @@ static int em_call_far(struct x86_emulate_ctxt *ctxt) 150 | if (rc != X86EMUL_CONTINUE) 151 | return rc; 152 | 153 | - rc = assign_eip_far(ctxt, ctxt->src.val, &new_desc); 154 | + rc = assign_eip_far(ctxt, ctxt->src.val); 155 | if (rc != X86EMUL_CONTINUE) 156 | goto fail; 157 | 158 | -- 159 | 2.38.1 160 | 161 | -------------------------------------------------------------------------------- /debian/patches/pve/0019-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch: -------------------------------------------------------------------------------- 1 | From 6d83f3690f8f3026df587db2264f0917ba747de9 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:30 +0300 4 | Subject: [PATCH] KVM: x86: emulator: update the emulation mode after rsm 5 | 6 | Update the emulation mode after RSM so that RIP will be correctly 7 | written back, because the RSM instruction can switch the CPU mode from 8 | 32 bit (or less) to 64 bit. 9 | 10 | This fixes a guest crash in case the #SMI is received while the guest 11 | runs a code from an address > 32 bit. 12 | 13 | Signed-off-by: Maxim Levitsky 14 | --- 15 | arch/x86/kvm/emulate.c | 2 +- 16 | 1 file changed, 1 insertion(+), 1 deletion(-) 17 | 18 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c 19 | index d75d33d60cb8..4365137d823b 100644 20 | --- a/arch/x86/kvm/emulate.c 21 | +++ b/arch/x86/kvm/emulate.c 22 | @@ -2313,7 +2313,7 @@ static int em_rsm(struct x86_emulate_ctxt *ctxt) 23 | if (ctxt->ops->leave_smm(ctxt)) 24 | ctxt->ops->triple_fault(ctxt); 25 | 26 | - return X86EMUL_CONTINUE; 27 | + return emulator_recalc_and_set_mode(ctxt); 28 | } 29 | 30 | static void 31 | -- 32 | 2.38.1 33 | 34 | -------------------------------------------------------------------------------- /debian/patches/pve/0020-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch: -------------------------------------------------------------------------------- 1 | From 3a99d9781d2d3ccf58d70b1dc7edfda886f5d271 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:31 +0300 4 | Subject: [PATCH] KVM: x86: emulator: update the emulation mode after CR0 write 5 | 6 | Update the emulation mode when handling writes to CR0, because 7 | toggling CR0.PE switches between Real and Protected Mode, and toggling 8 | CR0.PG when EFER.LME=1 switches between Long and Protected Mode. 9 | 10 | This is likely a benign bug because there is no writeback of state, 11 | other than the RIP increment, and when toggling CR0.PE, the CPU has 12 | to execute code from a very low memory address. 13 | 14 | Signed-off-by: Maxim Levitsky 15 | --- 16 | arch/x86/kvm/emulate.c | 16 +++++++++++++++- 17 | 1 file changed, 15 insertions(+), 1 deletion(-) 18 | 19 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c 20 | index 4365137d823b..5d7d4c1be843 100644 21 | --- a/arch/x86/kvm/emulate.c 22 | +++ b/arch/x86/kvm/emulate.c 23 | @@ -3288,11 +3288,25 @@ static int em_movbe(struct x86_emulate_ctxt *ctxt) 24 | 25 | static int em_cr_write(struct x86_emulate_ctxt *ctxt) 26 | { 27 | - if (ctxt->ops->set_cr(ctxt, ctxt->modrm_reg, ctxt->src.val)) 28 | + int cr_num = ctxt->modrm_reg; 29 | + int r; 30 | + 31 | + if (ctxt->ops->set_cr(ctxt, cr_num, ctxt->src.val)) 32 | return emulate_gp(ctxt, 0); 33 | 34 | /* Disable writeback. */ 35 | ctxt->dst.type = OP_NONE; 36 | + 37 | + if (cr_num == 0) { 38 | + /* 39 | + * CR0 write might have updated CR0.PE and/or CR0.PG 40 | + * which can affect the cpu's execution mode. 41 | + */ 42 | + r = emulator_recalc_and_set_mode(ctxt); 43 | + if (r != X86EMUL_CONTINUE) 44 | + return r; 45 | + } 46 | + 47 | return X86EMUL_CONTINUE; 48 | } 49 | 50 | -- 51 | 2.38.1 52 | 53 | -------------------------------------------------------------------------------- /debian/patches/pve/0021-KVM-x86-smm-number-of-GPRs-in-the-SMRAM-image-depend.patch: -------------------------------------------------------------------------------- 1 | From 80377e1fac45a547a43511ee5c8d783a7ec37be5 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:32 +0300 4 | Subject: [PATCH] KVM: x86: smm: number of GPRs in the SMRAM image depends on 5 | the image format 6 | 7 | On 64 bit host, if the guest doesn't have X86_FEATURE_LM, KVM will 8 | access 16 gprs to 32-bit smram image, causing out-ouf-bound ram 9 | access. 10 | 11 | On 32 bit host, the rsm_load_state_64/enter_smm_save_state_64 12 | is compiled out, thus access overflow can't happen. 13 | 14 | Fixes: b443183a25ab61 ("KVM: x86: Reduce the number of emulator GPRs to '8' for 32-bit KVM") 15 | 16 | Signed-off-by: Maxim Levitsky 17 | Reviewed-by: Sean Christopherson 18 | --- 19 | arch/x86/kvm/emulate.c | 1 + 20 | arch/x86/kvm/smm.c | 4 ++-- 21 | 2 files changed, 3 insertions(+), 2 deletions(-) 22 | 23 | diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c 24 | index 5d7d4c1be843..02a01c65471b 100644 25 | --- a/arch/x86/kvm/emulate.c 26 | +++ b/arch/x86/kvm/emulate.c 27 | @@ -2305,6 +2305,7 @@ static int em_lseg(struct x86_emulate_ctxt *ctxt) 28 | return rc; 29 | } 30 | 31 | + 32 | static int em_rsm(struct x86_emulate_ctxt *ctxt) 33 | { 34 | if ((ctxt->ops->get_hflags(ctxt) & X86EMUL_SMM_MASK) == 0) 35 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 36 | index 41ca128478fc..b290ad14070f 100644 37 | --- a/arch/x86/kvm/smm.c 38 | +++ b/arch/x86/kvm/smm.c 39 | @@ -382,7 +382,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 40 | ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; 41 | ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); 42 | 43 | - for (i = 0; i < NR_EMULATOR_GPRS; i++) 44 | + for (i = 0; i < 8; i++) 45 | *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); 46 | 47 | val = GET_SMSTATE(u32, smstate, 0x7fcc); 48 | @@ -438,7 +438,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 49 | u64 val, cr0, cr3, cr4; 50 | int i, r; 51 | 52 | - for (i = 0; i < NR_EMULATOR_GPRS; i++) 53 | + for (i = 0; i < 16; i++) 54 | *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); 55 | 56 | ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); 57 | -- 58 | 2.38.1 59 | 60 | -------------------------------------------------------------------------------- /debian/patches/pve/0022-KVM-x86-smm-check-for-failures-on-smm-entry.patch: -------------------------------------------------------------------------------- 1 | From a01916088ebb15b357d5c110270b797295d02f78 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:33 +0300 4 | Subject: [PATCH] KVM: x86: smm: check for failures on smm entry 5 | 6 | In the rare case of the failure on SMM entry, the KVM should at 7 | least terminate the VM instead of going south. 8 | 9 | Suggested-by: Sean Christopherson 10 | Signed-off-by: Maxim Levitsky 11 | --- 12 | arch/x86/kvm/smm.c | 19 +++++++++++++++---- 13 | 1 file changed, 15 insertions(+), 4 deletions(-) 14 | 15 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 16 | index b290ad14070f..1191a79cf027 100644 17 | --- a/arch/x86/kvm/smm.c 18 | +++ b/arch/x86/kvm/smm.c 19 | @@ -211,11 +211,17 @@ void enter_smm(struct kvm_vcpu *vcpu) 20 | * Give enter_smm() a chance to make ISA-specific changes to the vCPU 21 | * state (e.g. leave guest mode) after we've saved the state into the 22 | * SMM state-save area. 23 | + * 24 | + * Kill the VM in the unlikely case of failure, because the VM 25 | + * can be in undefined state in this case. 26 | */ 27 | - static_call(kvm_x86_enter_smm)(vcpu, buf); 28 | + if (static_call(kvm_x86_enter_smm)(vcpu, buf)) 29 | + goto error; 30 | 31 | kvm_smm_changed(vcpu, true); 32 | - kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf)); 33 | + 34 | + if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf))) 35 | + goto error; 36 | 37 | if (static_call(kvm_x86_get_nmi_mask)(vcpu)) 38 | vcpu->arch.hflags |= HF_SMM_INSIDE_NMI_MASK; 39 | @@ -235,7 +241,8 @@ void enter_smm(struct kvm_vcpu *vcpu) 40 | dt.address = dt.size = 0; 41 | static_call(kvm_x86_set_idt)(vcpu, &dt); 42 | 43 | - kvm_set_dr(vcpu, 7, DR7_FIXED_1); 44 | + if (WARN_ON_ONCE(kvm_set_dr(vcpu, 7, DR7_FIXED_1))) 45 | + goto error; 46 | 47 | cs.selector = (vcpu->arch.smbase >> 4) & 0xffff; 48 | cs.base = vcpu->arch.smbase; 49 | @@ -264,11 +271,15 @@ void enter_smm(struct kvm_vcpu *vcpu) 50 | 51 | #ifdef CONFIG_X86_64 52 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 53 | - static_call(kvm_x86_set_efer)(vcpu, 0); 54 | + if (static_call(kvm_x86_set_efer)(vcpu, 0)) 55 | + goto error; 56 | #endif 57 | 58 | kvm_update_cpuid_runtime(vcpu); 59 | kvm_mmu_reset_context(vcpu); 60 | + return; 61 | +error: 62 | + kvm_vm_dead(vcpu->kvm); 63 | } 64 | 65 | static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) 66 | -- 67 | 2.38.1 68 | 69 | -------------------------------------------------------------------------------- /debian/patches/pve/0023-KVM-x86-smm-add-structs-for-KVM-s-smram-layout.patch: -------------------------------------------------------------------------------- 1 | From dae1d13d62cdc44a137c51fbc92c5037a8f104c5 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:34 +0300 4 | Subject: [PATCH] KVM: x86: smm: add structs for KVM's smram layout 5 | 6 | Add structs that will be used to define and read/write the KVM's 7 | SMRAM layout, instead of reading/writing to raw offsets. 8 | 9 | Also document the differences between KVM's SMRAM layout and SMRAM 10 | layout that is used by real Intel/AMD cpus. 11 | 12 | Signed-off-by: Maxim Levitsky 13 | --- 14 | arch/x86/kvm/smm.c | 94 +++++++++++++++++++++++++++++++++ 15 | arch/x86/kvm/smm.h | 127 +++++++++++++++++++++++++++++++++++++++++++++ 16 | 2 files changed, 221 insertions(+) 17 | 18 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 19 | index 1191a79cf027..01dab9fc3ab4 100644 20 | --- a/arch/x86/kvm/smm.c 21 | +++ b/arch/x86/kvm/smm.c 22 | @@ -8,6 +8,97 @@ 23 | #include "cpuid.h" 24 | #include "trace.h" 25 | 26 | +#define CHECK_SMRAM32_OFFSET(field, offset) \ 27 | + ASSERT_STRUCT_OFFSET(struct kvm_smram_state_32, field, offset - 0xFE00) 28 | + 29 | +#define CHECK_SMRAM64_OFFSET(field, offset) \ 30 | + ASSERT_STRUCT_OFFSET(struct kvm_smram_state_64, field, offset - 0xFE00) 31 | + 32 | +static void check_smram_offsets(void) 33 | +{ 34 | + /* 32 bit SMRAM image */ 35 | + CHECK_SMRAM32_OFFSET(reserved1, 0xFE00); 36 | + CHECK_SMRAM32_OFFSET(smbase, 0xFEF8); 37 | + CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC); 38 | + CHECK_SMRAM32_OFFSET(reserved2, 0xFF00); 39 | + CHECK_SMRAM32_OFFSET(cr4, 0xFF14); 40 | + CHECK_SMRAM32_OFFSET(reserved3, 0xFF18); 41 | + CHECK_SMRAM32_OFFSET(ds, 0xFF2C); 42 | + CHECK_SMRAM32_OFFSET(fs, 0xFF38); 43 | + CHECK_SMRAM32_OFFSET(gs, 0xFF44); 44 | + CHECK_SMRAM32_OFFSET(idtr, 0xFF50); 45 | + CHECK_SMRAM32_OFFSET(tr, 0xFF5C); 46 | + CHECK_SMRAM32_OFFSET(gdtr, 0xFF6C); 47 | + CHECK_SMRAM32_OFFSET(ldtr, 0xFF78); 48 | + CHECK_SMRAM32_OFFSET(es, 0xFF84); 49 | + CHECK_SMRAM32_OFFSET(cs, 0xFF90); 50 | + CHECK_SMRAM32_OFFSET(ss, 0xFF9C); 51 | + CHECK_SMRAM32_OFFSET(es_sel, 0xFFA8); 52 | + CHECK_SMRAM32_OFFSET(cs_sel, 0xFFAC); 53 | + CHECK_SMRAM32_OFFSET(ss_sel, 0xFFB0); 54 | + CHECK_SMRAM32_OFFSET(ds_sel, 0xFFB4); 55 | + CHECK_SMRAM32_OFFSET(fs_sel, 0xFFB8); 56 | + CHECK_SMRAM32_OFFSET(gs_sel, 0xFFBC); 57 | + CHECK_SMRAM32_OFFSET(ldtr_sel, 0xFFC0); 58 | + CHECK_SMRAM32_OFFSET(tr_sel, 0xFFC4); 59 | + CHECK_SMRAM32_OFFSET(dr7, 0xFFC8); 60 | + CHECK_SMRAM32_OFFSET(dr6, 0xFFCC); 61 | + CHECK_SMRAM32_OFFSET(gprs, 0xFFD0); 62 | + CHECK_SMRAM32_OFFSET(eip, 0xFFF0); 63 | + CHECK_SMRAM32_OFFSET(eflags, 0xFFF4); 64 | + CHECK_SMRAM32_OFFSET(cr3, 0xFFF8); 65 | + CHECK_SMRAM32_OFFSET(cr0, 0xFFFC); 66 | + 67 | + /* 64 bit SMRAM image */ 68 | + CHECK_SMRAM64_OFFSET(es, 0xFE00); 69 | + CHECK_SMRAM64_OFFSET(cs, 0xFE10); 70 | + CHECK_SMRAM64_OFFSET(ss, 0xFE20); 71 | + CHECK_SMRAM64_OFFSET(ds, 0xFE30); 72 | + CHECK_SMRAM64_OFFSET(fs, 0xFE40); 73 | + CHECK_SMRAM64_OFFSET(gs, 0xFE50); 74 | + CHECK_SMRAM64_OFFSET(gdtr, 0xFE60); 75 | + CHECK_SMRAM64_OFFSET(ldtr, 0xFE70); 76 | + CHECK_SMRAM64_OFFSET(idtr, 0xFE80); 77 | + CHECK_SMRAM64_OFFSET(tr, 0xFE90); 78 | + CHECK_SMRAM64_OFFSET(io_restart_rip, 0xFEA0); 79 | + CHECK_SMRAM64_OFFSET(io_restart_rcx, 0xFEA8); 80 | + CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0); 81 | + CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8); 82 | + CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0); 83 | + CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); 84 | + CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); 85 | + CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); 86 | + CHECK_SMRAM64_OFFSET(reserved2, 0xFECA); 87 | + CHECK_SMRAM64_OFFSET(efer, 0xFED0); 88 | + CHECK_SMRAM64_OFFSET(svm_guest_flag, 0xFED8); 89 | + CHECK_SMRAM64_OFFSET(svm_guest_vmcb_gpa, 0xFEE0); 90 | + CHECK_SMRAM64_OFFSET(svm_guest_virtual_int, 0xFEE8); 91 | + CHECK_SMRAM64_OFFSET(reserved3, 0xFEF0); 92 | + CHECK_SMRAM64_OFFSET(smm_revison, 0xFEFC); 93 | + CHECK_SMRAM64_OFFSET(smbase, 0xFF00); 94 | + CHECK_SMRAM64_OFFSET(reserved4, 0xFF04); 95 | + CHECK_SMRAM64_OFFSET(ssp, 0xFF18); 96 | + CHECK_SMRAM64_OFFSET(svm_guest_pat, 0xFF20); 97 | + CHECK_SMRAM64_OFFSET(svm_host_efer, 0xFF28); 98 | + CHECK_SMRAM64_OFFSET(svm_host_cr4, 0xFF30); 99 | + CHECK_SMRAM64_OFFSET(svm_host_cr3, 0xFF38); 100 | + CHECK_SMRAM64_OFFSET(svm_host_cr0, 0xFF40); 101 | + CHECK_SMRAM64_OFFSET(cr4, 0xFF48); 102 | + CHECK_SMRAM64_OFFSET(cr3, 0xFF50); 103 | + CHECK_SMRAM64_OFFSET(cr0, 0xFF58); 104 | + CHECK_SMRAM64_OFFSET(dr7, 0xFF60); 105 | + CHECK_SMRAM64_OFFSET(dr6, 0xFF68); 106 | + CHECK_SMRAM64_OFFSET(rflags, 0xFF70); 107 | + CHECK_SMRAM64_OFFSET(rip, 0xFF78); 108 | + CHECK_SMRAM64_OFFSET(gprs, 0xFF80); 109 | + 110 | + BUILD_BUG_ON(sizeof(union kvm_smram) != 512); 111 | +} 112 | + 113 | +#undef CHECK_SMRAM64_OFFSET 114 | +#undef CHECK_SMRAM32_OFFSET 115 | + 116 | + 117 | void kvm_smm_changed(struct kvm_vcpu *vcpu, bool entering_smm) 118 | { 119 | trace_kvm_smm_transition(vcpu->vcpu_id, vcpu->arch.smbase, entering_smm); 120 | @@ -199,6 +290,8 @@ void enter_smm(struct kvm_vcpu *vcpu) 121 | unsigned long cr0; 122 | char buf[512]; 123 | 124 | + check_smram_offsets(); 125 | + 126 | memset(buf, 0, 512); 127 | #ifdef CONFIG_X86_64 128 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 129 | @@ -449,6 +542,7 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 130 | u64 val, cr0, cr3, cr4; 131 | int i, r; 132 | 133 | + 134 | for (i = 0; i < 16; i++) 135 | *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); 136 | 137 | diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h 138 | index a6795b93ba30..bf5c7ffeb11e 100644 139 | --- a/arch/x86/kvm/smm.h 140 | +++ b/arch/x86/kvm/smm.h 141 | @@ -2,6 +2,8 @@ 142 | #ifndef ASM_KVM_SMM_H 143 | #define ASM_KVM_SMM_H 144 | 145 | +#include 146 | + 147 | #define GET_SMSTATE(type, buf, offset) \ 148 | (*(type *)((buf) + (offset) - 0x7e00)) 149 | 150 | @@ -9,6 +11,131 @@ 151 | *(type *)((buf) + (offset) - 0x7e00) = val 152 | 153 | #ifdef CONFIG_KVM_SMM 154 | + 155 | + 156 | +/* 32 bit KVM's emulated SMM layout. Loosely based on Intel's layout */ 157 | + 158 | +struct kvm_smm_seg_state_32 { 159 | + u32 flags; 160 | + u32 limit; 161 | + u32 base; 162 | +} __packed; 163 | + 164 | +struct kvm_smram_state_32 { 165 | + u32 reserved1[62]; 166 | + u32 smbase; 167 | + u32 smm_revision; 168 | + u32 reserved2[5]; 169 | + u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */ 170 | + u32 reserved3[5]; 171 | + 172 | + /* 173 | + * Segment state is not present/documented in the Intel/AMD SMRAM image 174 | + * Instead this area on Intel/AMD contains IO/HLT restart flags. 175 | + */ 176 | + struct kvm_smm_seg_state_32 ds; 177 | + struct kvm_smm_seg_state_32 fs; 178 | + struct kvm_smm_seg_state_32 gs; 179 | + struct kvm_smm_seg_state_32 idtr; /* IDTR has only base and limit */ 180 | + struct kvm_smm_seg_state_32 tr; 181 | + u32 reserved; 182 | + struct kvm_smm_seg_state_32 gdtr; /* GDTR has only base and limit */ 183 | + struct kvm_smm_seg_state_32 ldtr; 184 | + struct kvm_smm_seg_state_32 es; 185 | + struct kvm_smm_seg_state_32 cs; 186 | + struct kvm_smm_seg_state_32 ss; 187 | + 188 | + u32 es_sel; 189 | + u32 cs_sel; 190 | + u32 ss_sel; 191 | + u32 ds_sel; 192 | + u32 fs_sel; 193 | + u32 gs_sel; 194 | + u32 ldtr_sel; 195 | + u32 tr_sel; 196 | + 197 | + u32 dr7; 198 | + u32 dr6; 199 | + u32 gprs[8]; /* GPRS in the "natural" X86 order (EAX/ECX/EDX.../EDI) */ 200 | + u32 eip; 201 | + u32 eflags; 202 | + u32 cr3; 203 | + u32 cr0; 204 | +} __packed; 205 | + 206 | + 207 | +/* 64 bit KVM's emulated SMM layout. Based on AMD64 layout */ 208 | + 209 | +struct kvm_smm_seg_state_64 { 210 | + u16 selector; 211 | + u16 attributes; 212 | + u32 limit; 213 | + u64 base; 214 | +}; 215 | + 216 | +struct kvm_smram_state_64 { 217 | + 218 | + struct kvm_smm_seg_state_64 es; 219 | + struct kvm_smm_seg_state_64 cs; 220 | + struct kvm_smm_seg_state_64 ss; 221 | + struct kvm_smm_seg_state_64 ds; 222 | + struct kvm_smm_seg_state_64 fs; 223 | + struct kvm_smm_seg_state_64 gs; 224 | + struct kvm_smm_seg_state_64 gdtr; /* GDTR has only base and limit*/ 225 | + struct kvm_smm_seg_state_64 ldtr; 226 | + struct kvm_smm_seg_state_64 idtr; /* IDTR has only base and limit*/ 227 | + struct kvm_smm_seg_state_64 tr; 228 | + 229 | + /* I/O restart and auto halt restart are not implemented by KVM */ 230 | + u64 io_restart_rip; 231 | + u64 io_restart_rcx; 232 | + u64 io_restart_rsi; 233 | + u64 io_restart_rdi; 234 | + u32 io_restart_dword; 235 | + u32 reserved1; 236 | + u8 io_inst_restart; 237 | + u8 auto_hlt_restart; 238 | + u8 reserved2[6]; 239 | + 240 | + u64 efer; 241 | + 242 | + /* 243 | + * Two fields below are implemented on AMD only, to store 244 | + * SVM guest vmcb address if the #SMI was received while in the guest mode. 245 | + */ 246 | + u64 svm_guest_flag; 247 | + u64 svm_guest_vmcb_gpa; 248 | + u64 svm_guest_virtual_int; /* unknown purpose, not implemented */ 249 | + 250 | + u32 reserved3[3]; 251 | + u32 smm_revison; 252 | + u32 smbase; 253 | + u32 reserved4[5]; 254 | + 255 | + /* ssp and svm_* fields below are not implemented by KVM */ 256 | + u64 ssp; 257 | + u64 svm_guest_pat; 258 | + u64 svm_host_efer; 259 | + u64 svm_host_cr4; 260 | + u64 svm_host_cr3; 261 | + u64 svm_host_cr0; 262 | + 263 | + u64 cr4; 264 | + u64 cr3; 265 | + u64 cr0; 266 | + u64 dr7; 267 | + u64 dr6; 268 | + u64 rflags; 269 | + u64 rip; 270 | + u64 gprs[16]; /* GPRS in a reversed "natural" X86 order (R15/R14/../RCX/RAX.) */ 271 | +}; 272 | + 273 | +union kvm_smram { 274 | + struct kvm_smram_state_64 smram64; 275 | + struct kvm_smram_state_32 smram32; 276 | + u8 bytes[512]; 277 | +}; 278 | + 279 | static inline int kvm_inject_smi(struct kvm_vcpu *vcpu) 280 | { 281 | kvm_make_request(KVM_REQ_SMI, vcpu); 282 | -- 283 | 2.38.1 284 | 285 | -------------------------------------------------------------------------------- /debian/patches/pve/0024-KVM-x86-smm-use-smram-structs-in-the-common-code.patch: -------------------------------------------------------------------------------- 1 | From e13349f01bc9b4b94dd995d60fad196d3074a868 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:35 +0300 4 | Subject: [PATCH] KVM: x86: smm: use smram structs in the common code 5 | 6 | Use kvm_smram union instad of raw arrays in the common smm code. 7 | 8 | Signed-off-by: Maxim Levitsky 9 | --- 10 | arch/x86/include/asm/kvm_host.h | 5 +++-- 11 | arch/x86/kvm/smm.c | 27 ++++++++++++++------------- 12 | arch/x86/kvm/svm/svm.c | 8 ++++++-- 13 | arch/x86/kvm/vmx/vmx.c | 4 ++-- 14 | 4 files changed, 25 insertions(+), 19 deletions(-) 15 | 16 | diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h 17 | index 87ee187b3f26..84c0b441a336 100644 18 | --- a/arch/x86/include/asm/kvm_host.h 19 | +++ b/arch/x86/include/asm/kvm_host.h 20 | @@ -206,6 +206,7 @@ typedef enum exit_fastpath_completion fastpath_t; 21 | 22 | struct x86_emulate_ctxt; 23 | struct x86_exception; 24 | +union kvm_smram; 25 | enum x86_intercept; 26 | enum x86_intercept_stage; 27 | 28 | @@ -1604,8 +1605,8 @@ struct kvm_x86_ops { 29 | 30 | #ifdef CONFIG_KVM_SMM 31 | int (*smi_allowed)(struct kvm_vcpu *vcpu, bool for_injection); 32 | - int (*enter_smm)(struct kvm_vcpu *vcpu, char *smstate); 33 | - int (*leave_smm)(struct kvm_vcpu *vcpu, const char *smstate); 34 | + int (*enter_smm)(struct kvm_vcpu *vcpu, union kvm_smram *smram); 35 | + int (*leave_smm)(struct kvm_vcpu *vcpu, const union kvm_smram *smram); 36 | void (*enable_smi_window)(struct kvm_vcpu *vcpu); 37 | #endif 38 | 39 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 40 | index 01dab9fc3ab4..e714d43b746c 100644 41 | --- a/arch/x86/kvm/smm.c 42 | +++ b/arch/x86/kvm/smm.c 43 | @@ -288,17 +288,18 @@ void enter_smm(struct kvm_vcpu *vcpu) 44 | struct kvm_segment cs, ds; 45 | struct desc_ptr dt; 46 | unsigned long cr0; 47 | - char buf[512]; 48 | + union kvm_smram smram; 49 | 50 | check_smram_offsets(); 51 | 52 | - memset(buf, 0, 512); 53 | + memset(smram.bytes, 0, sizeof(smram.bytes)); 54 | + 55 | #ifdef CONFIG_X86_64 56 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 57 | - enter_smm_save_state_64(vcpu, buf); 58 | + enter_smm_save_state_64(vcpu, smram.bytes); 59 | else 60 | #endif 61 | - enter_smm_save_state_32(vcpu, buf); 62 | + enter_smm_save_state_32(vcpu, smram.bytes); 63 | 64 | /* 65 | * Give enter_smm() a chance to make ISA-specific changes to the vCPU 66 | @@ -308,12 +309,12 @@ void enter_smm(struct kvm_vcpu *vcpu) 67 | * Kill the VM in the unlikely case of failure, because the VM 68 | * can be in undefined state in this case. 69 | */ 70 | - if (static_call(kvm_x86_enter_smm)(vcpu, buf)) 71 | + if (static_call(kvm_x86_enter_smm)(vcpu, &smram)) 72 | goto error; 73 | 74 | kvm_smm_changed(vcpu, true); 75 | 76 | - if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, buf, sizeof(buf))) 77 | + if (kvm_vcpu_write_guest(vcpu, vcpu->arch.smbase + 0xfe00, &smram, sizeof(smram))) 78 | goto error; 79 | 80 | if (static_call(kvm_x86_get_nmi_mask)(vcpu)) 81 | @@ -473,7 +474,7 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, 82 | } 83 | 84 | static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 85 | - const char *smstate) 86 | + u8 *smstate) 87 | { 88 | struct kvm_vcpu *vcpu = ctxt->vcpu; 89 | struct kvm_segment desc; 90 | @@ -534,7 +535,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 91 | 92 | #ifdef CONFIG_X86_64 93 | static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 94 | - const char *smstate) 95 | + u8 *smstate) 96 | { 97 | struct kvm_vcpu *vcpu = ctxt->vcpu; 98 | struct kvm_segment desc; 99 | @@ -606,13 +607,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 100 | { 101 | struct kvm_vcpu *vcpu = ctxt->vcpu; 102 | unsigned long cr0, cr4, efer; 103 | - char buf[512]; 104 | + union kvm_smram smram; 105 | u64 smbase; 106 | int ret; 107 | 108 | smbase = vcpu->arch.smbase; 109 | 110 | - ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, buf, sizeof(buf)); 111 | + ret = kvm_vcpu_read_guest(vcpu, smbase + 0xfe00, smram.bytes, sizeof(smram)); 112 | if (ret < 0) 113 | return X86EMUL_UNHANDLEABLE; 114 | 115 | @@ -666,13 +667,13 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 116 | * state (e.g. enter guest mode) before loading state from the SMM 117 | * state-save area. 118 | */ 119 | - if (static_call(kvm_x86_leave_smm)(vcpu, buf)) 120 | + if (static_call(kvm_x86_leave_smm)(vcpu, &smram)) 121 | return X86EMUL_UNHANDLEABLE; 122 | 123 | #ifdef CONFIG_X86_64 124 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 125 | - return rsm_load_state_64(ctxt, buf); 126 | + return rsm_load_state_64(ctxt, smram.bytes); 127 | else 128 | #endif 129 | - return rsm_load_state_32(ctxt, buf); 130 | + return rsm_load_state_32(ctxt, smram.bytes); 131 | } 132 | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c 133 | index e69390909d08..2a61b8c50ab4 100644 134 | --- a/arch/x86/kvm/svm/svm.c 135 | +++ b/arch/x86/kvm/svm/svm.c 136 | @@ -4437,12 +4437,14 @@ static int svm_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 137 | return 1; 138 | } 139 | 140 | -static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) 141 | +static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 142 | { 143 | struct vcpu_svm *svm = to_svm(vcpu); 144 | struct kvm_host_map map_save; 145 | int ret; 146 | 147 | + char *smstate = (char *)smram; 148 | + 149 | if (!is_guest_mode(vcpu)) 150 | return 0; 151 | 152 | @@ -4484,7 +4486,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, char *smstate) 153 | return 0; 154 | } 155 | 156 | -static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) 157 | +static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 158 | { 159 | struct vcpu_svm *svm = to_svm(vcpu); 160 | struct kvm_host_map map, map_save; 161 | @@ -4492,6 +4494,8 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) 162 | struct vmcb *vmcb12; 163 | int ret; 164 | 165 | + const char *smstate = (const char *)smram; 166 | + 167 | if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) 168 | return 0; 169 | 170 | diff --git a/arch/x86/kvm/vmx/vmx.c b/arch/x86/kvm/vmx/vmx.c 171 | index 8cfb40cfad10..480ff79071c6 100644 172 | --- a/arch/x86/kvm/vmx/vmx.c 173 | +++ b/arch/x86/kvm/vmx/vmx.c 174 | @@ -7922,7 +7922,7 @@ static int vmx_smi_allowed(struct kvm_vcpu *vcpu, bool for_injection) 175 | return !is_smm(vcpu); 176 | } 177 | 178 | -static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate) 179 | +static int vmx_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 180 | { 181 | struct vcpu_vmx *vmx = to_vmx(vcpu); 182 | 183 | @@ -7943,7 +7943,7 @@ static int vmx_enter_smm(struct kvm_vcpu *vcpu, char *smstate) 184 | return 0; 185 | } 186 | 187 | -static int vmx_leave_smm(struct kvm_vcpu *vcpu, const char *smstate) 188 | +static int vmx_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 189 | { 190 | struct vcpu_vmx *vmx = to_vmx(vcpu); 191 | int ret; 192 | -- 193 | 2.38.1 194 | 195 | -------------------------------------------------------------------------------- /debian/patches/pve/0025-KVM-x86-smm-use-smram-struct-for-32-bit-smram-load-r.patch: -------------------------------------------------------------------------------- 1 | From 9d654dc3270bcf5d876115c009097974d994a324 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:36 +0300 4 | Subject: [PATCH] KVM: x86: smm: use smram struct for 32 bit smram load/restore 5 | 6 | Use kvm_smram_state_32 struct to save/restore 32 bit SMM state 7 | (used when X86_FEATURE_LM is not present in the guest CPUID). 8 | 9 | Signed-off-by: Maxim Levitsky 10 | --- 11 | arch/x86/kvm/smm.c | 155 ++++++++++++++++++--------------------------- 12 | 1 file changed, 61 insertions(+), 94 deletions(-) 13 | 14 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 15 | index e714d43b746c..2635f6b1d81a 100644 16 | --- a/arch/x86/kvm/smm.c 17 | +++ b/arch/x86/kvm/smm.c 18 | @@ -142,22 +142,17 @@ static u32 enter_smm_get_segment_flags(struct kvm_segment *seg) 19 | return flags; 20 | } 21 | 22 | -static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, char *buf, int n) 23 | +static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, 24 | + struct kvm_smm_seg_state_32 *state, 25 | + u32 *selector, int n) 26 | { 27 | struct kvm_segment seg; 28 | - int offset; 29 | 30 | kvm_get_segment(vcpu, &seg, n); 31 | - PUT_SMSTATE(u32, buf, 0x7fa8 + n * 4, seg.selector); 32 | - 33 | - if (n < 3) 34 | - offset = 0x7f84 + n * 12; 35 | - else 36 | - offset = 0x7f2c + (n - 3) * 12; 37 | - 38 | - PUT_SMSTATE(u32, buf, offset + 8, seg.base); 39 | - PUT_SMSTATE(u32, buf, offset + 4, seg.limit); 40 | - PUT_SMSTATE(u32, buf, offset, enter_smm_get_segment_flags(&seg)); 41 | + *selector = seg.selector; 42 | + state->base = seg.base; 43 | + state->limit = seg.limit; 44 | + state->flags = enter_smm_get_segment_flags(&seg); 45 | } 46 | 47 | #ifdef CONFIG_X86_64 48 | @@ -178,54 +173,48 @@ static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) 49 | } 50 | #endif 51 | 52 | -static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, char *buf) 53 | +static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, 54 | + struct kvm_smram_state_32 *smram) 55 | { 56 | struct desc_ptr dt; 57 | - struct kvm_segment seg; 58 | unsigned long val; 59 | int i; 60 | 61 | - PUT_SMSTATE(u32, buf, 0x7ffc, kvm_read_cr0(vcpu)); 62 | - PUT_SMSTATE(u32, buf, 0x7ff8, kvm_read_cr3(vcpu)); 63 | - PUT_SMSTATE(u32, buf, 0x7ff4, kvm_get_rflags(vcpu)); 64 | - PUT_SMSTATE(u32, buf, 0x7ff0, kvm_rip_read(vcpu)); 65 | + smram->cr0 = kvm_read_cr0(vcpu); 66 | + smram->cr3 = kvm_read_cr3(vcpu); 67 | + smram->eflags = kvm_get_rflags(vcpu); 68 | + smram->eip = kvm_rip_read(vcpu); 69 | 70 | for (i = 0; i < 8; i++) 71 | - PUT_SMSTATE(u32, buf, 0x7fd0 + i * 4, kvm_register_read_raw(vcpu, i)); 72 | + smram->gprs[i] = kvm_register_read_raw(vcpu, i); 73 | 74 | kvm_get_dr(vcpu, 6, &val); 75 | - PUT_SMSTATE(u32, buf, 0x7fcc, (u32)val); 76 | + smram->dr6 = (u32)val; 77 | kvm_get_dr(vcpu, 7, &val); 78 | - PUT_SMSTATE(u32, buf, 0x7fc8, (u32)val); 79 | + smram->dr7 = (u32)val; 80 | 81 | - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); 82 | - PUT_SMSTATE(u32, buf, 0x7fc4, seg.selector); 83 | - PUT_SMSTATE(u32, buf, 0x7f64, seg.base); 84 | - PUT_SMSTATE(u32, buf, 0x7f60, seg.limit); 85 | - PUT_SMSTATE(u32, buf, 0x7f5c, enter_smm_get_segment_flags(&seg)); 86 | - 87 | - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); 88 | - PUT_SMSTATE(u32, buf, 0x7fc0, seg.selector); 89 | - PUT_SMSTATE(u32, buf, 0x7f80, seg.base); 90 | - PUT_SMSTATE(u32, buf, 0x7f7c, seg.limit); 91 | - PUT_SMSTATE(u32, buf, 0x7f78, enter_smm_get_segment_flags(&seg)); 92 | + enter_smm_save_seg_32(vcpu, &smram->tr, &smram->tr_sel, VCPU_SREG_TR); 93 | + enter_smm_save_seg_32(vcpu, &smram->ldtr, &smram->ldtr_sel, VCPU_SREG_LDTR); 94 | 95 | static_call(kvm_x86_get_gdt)(vcpu, &dt); 96 | - PUT_SMSTATE(u32, buf, 0x7f74, dt.address); 97 | - PUT_SMSTATE(u32, buf, 0x7f70, dt.size); 98 | + smram->gdtr.base = dt.address; 99 | + smram->gdtr.limit = dt.size; 100 | 101 | static_call(kvm_x86_get_idt)(vcpu, &dt); 102 | - PUT_SMSTATE(u32, buf, 0x7f58, dt.address); 103 | - PUT_SMSTATE(u32, buf, 0x7f54, dt.size); 104 | + smram->idtr.base = dt.address; 105 | + smram->idtr.limit = dt.size; 106 | 107 | - for (i = 0; i < 6; i++) 108 | - enter_smm_save_seg_32(vcpu, buf, i); 109 | + enter_smm_save_seg_32(vcpu, &smram->es, &smram->es_sel, VCPU_SREG_ES); 110 | + enter_smm_save_seg_32(vcpu, &smram->cs, &smram->cs_sel, VCPU_SREG_CS); 111 | + enter_smm_save_seg_32(vcpu, &smram->ss, &smram->ss_sel, VCPU_SREG_SS); 112 | 113 | - PUT_SMSTATE(u32, buf, 0x7f14, kvm_read_cr4(vcpu)); 114 | + enter_smm_save_seg_32(vcpu, &smram->ds, &smram->ds_sel, VCPU_SREG_DS); 115 | + enter_smm_save_seg_32(vcpu, &smram->fs, &smram->fs_sel, VCPU_SREG_FS); 116 | + enter_smm_save_seg_32(vcpu, &smram->gs, &smram->gs_sel, VCPU_SREG_GS); 117 | 118 | - /* revision id */ 119 | - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020000); 120 | - PUT_SMSTATE(u32, buf, 0x7ef8, vcpu->arch.smbase); 121 | + smram->cr4 = kvm_read_cr4(vcpu); 122 | + smram->smm_revision = 0x00020000; 123 | + smram->smbase = vcpu->arch.smbase; 124 | } 125 | 126 | #ifdef CONFIG_X86_64 127 | @@ -299,7 +288,7 @@ void enter_smm(struct kvm_vcpu *vcpu) 128 | enter_smm_save_state_64(vcpu, smram.bytes); 129 | else 130 | #endif 131 | - enter_smm_save_state_32(vcpu, smram.bytes); 132 | + enter_smm_save_state_32(vcpu, &smram.smram32); 133 | 134 | /* 135 | * Give enter_smm() a chance to make ISA-specific changes to the vCPU 136 | @@ -391,21 +380,16 @@ static void rsm_set_desc_flags(struct kvm_segment *desc, u32 flags) 137 | desc->padding = 0; 138 | } 139 | 140 | -static int rsm_load_seg_32(struct kvm_vcpu *vcpu, const char *smstate, 141 | - int n) 142 | +static int rsm_load_seg_32(struct kvm_vcpu *vcpu, 143 | + const struct kvm_smm_seg_state_32 *state, 144 | + u16 selector, int n) 145 | { 146 | struct kvm_segment desc; 147 | - int offset; 148 | - 149 | - if (n < 3) 150 | - offset = 0x7f84 + n * 12; 151 | - else 152 | - offset = 0x7f2c + (n - 3) * 12; 153 | 154 | - desc.selector = GET_SMSTATE(u32, smstate, 0x7fa8 + n * 4); 155 | - desc.base = GET_SMSTATE(u32, smstate, offset + 8); 156 | - desc.limit = GET_SMSTATE(u32, smstate, offset + 4); 157 | - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, offset)); 158 | + desc.selector = selector; 159 | + desc.base = state->base; 160 | + desc.limit = state->limit; 161 | + rsm_set_desc_flags(&desc, state->flags); 162 | kvm_set_segment(vcpu, &desc, n); 163 | return X86EMUL_CONTINUE; 164 | } 165 | @@ -474,63 +458,46 @@ static int rsm_enter_protected_mode(struct kvm_vcpu *vcpu, 166 | } 167 | 168 | static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 169 | - u8 *smstate) 170 | + const struct kvm_smram_state_32 *smstate) 171 | { 172 | struct kvm_vcpu *vcpu = ctxt->vcpu; 173 | - struct kvm_segment desc; 174 | struct desc_ptr dt; 175 | - u32 val, cr0, cr3, cr4; 176 | int i; 177 | 178 | - cr0 = GET_SMSTATE(u32, smstate, 0x7ffc); 179 | - cr3 = GET_SMSTATE(u32, smstate, 0x7ff8); 180 | - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7ff4) | X86_EFLAGS_FIXED; 181 | - ctxt->_eip = GET_SMSTATE(u32, smstate, 0x7ff0); 182 | + ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; 183 | + ctxt->_eip = smstate->eip; 184 | 185 | for (i = 0; i < 8; i++) 186 | - *reg_write(ctxt, i) = GET_SMSTATE(u32, smstate, 0x7fd0 + i * 4); 187 | - 188 | - val = GET_SMSTATE(u32, smstate, 0x7fcc); 189 | + *reg_write(ctxt, i) = smstate->gprs[i]; 190 | 191 | - if (kvm_set_dr(vcpu, 6, val)) 192 | + if (kvm_set_dr(vcpu, 6, smstate->dr6)) 193 | return X86EMUL_UNHANDLEABLE; 194 | - 195 | - val = GET_SMSTATE(u32, smstate, 0x7fc8); 196 | - 197 | - if (kvm_set_dr(vcpu, 7, val)) 198 | + if (kvm_set_dr(vcpu, 7, smstate->dr7)) 199 | return X86EMUL_UNHANDLEABLE; 200 | 201 | - desc.selector = GET_SMSTATE(u32, smstate, 0x7fc4); 202 | - desc.base = GET_SMSTATE(u32, smstate, 0x7f64); 203 | - desc.limit = GET_SMSTATE(u32, smstate, 0x7f60); 204 | - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f5c)); 205 | - kvm_set_segment(vcpu, &desc, VCPU_SREG_TR); 206 | - 207 | - desc.selector = GET_SMSTATE(u32, smstate, 0x7fc0); 208 | - desc.base = GET_SMSTATE(u32, smstate, 0x7f80); 209 | - desc.limit = GET_SMSTATE(u32, smstate, 0x7f7c); 210 | - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7f78)); 211 | - kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR); 212 | + rsm_load_seg_32(vcpu, &smstate->tr, smstate->tr_sel, VCPU_SREG_TR); 213 | + rsm_load_seg_32(vcpu, &smstate->ldtr, smstate->ldtr_sel, VCPU_SREG_LDTR); 214 | 215 | - dt.address = GET_SMSTATE(u32, smstate, 0x7f74); 216 | - dt.size = GET_SMSTATE(u32, smstate, 0x7f70); 217 | + dt.address = smstate->gdtr.base; 218 | + dt.size = smstate->gdtr.limit; 219 | static_call(kvm_x86_set_gdt)(vcpu, &dt); 220 | 221 | - dt.address = GET_SMSTATE(u32, smstate, 0x7f58); 222 | - dt.size = GET_SMSTATE(u32, smstate, 0x7f54); 223 | + dt.address = smstate->idtr.base; 224 | + dt.size = smstate->idtr.limit; 225 | static_call(kvm_x86_set_idt)(vcpu, &dt); 226 | 227 | - for (i = 0; i < 6; i++) { 228 | - int r = rsm_load_seg_32(vcpu, smstate, i); 229 | - if (r != X86EMUL_CONTINUE) 230 | - return r; 231 | - } 232 | + rsm_load_seg_32(vcpu, &smstate->es, smstate->es_sel, VCPU_SREG_ES); 233 | + rsm_load_seg_32(vcpu, &smstate->cs, smstate->cs_sel, VCPU_SREG_CS); 234 | + rsm_load_seg_32(vcpu, &smstate->ss, smstate->ss_sel, VCPU_SREG_SS); 235 | 236 | - cr4 = GET_SMSTATE(u32, smstate, 0x7f14); 237 | + rsm_load_seg_32(vcpu, &smstate->ds, smstate->ds_sel, VCPU_SREG_DS); 238 | + rsm_load_seg_32(vcpu, &smstate->fs, smstate->fs_sel, VCPU_SREG_FS); 239 | + rsm_load_seg_32(vcpu, &smstate->gs, smstate->gs_sel, VCPU_SREG_GS); 240 | 241 | - vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7ef8); 242 | + vcpu->arch.smbase = smstate->smbase; 243 | 244 | - return rsm_enter_protected_mode(vcpu, cr0, cr3, cr4); 245 | + return rsm_enter_protected_mode(vcpu, smstate->cr0, 246 | + smstate->cr3, smstate->cr4); 247 | } 248 | 249 | #ifdef CONFIG_X86_64 250 | @@ -675,5 +642,5 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 251 | return rsm_load_state_64(ctxt, smram.bytes); 252 | else 253 | #endif 254 | - return rsm_load_state_32(ctxt, smram.bytes); 255 | + return rsm_load_state_32(ctxt, &smram.smram32); 256 | } 257 | -- 258 | 2.38.1 259 | 260 | -------------------------------------------------------------------------------- /debian/patches/pve/0026-KVM-x86-smm-use-smram-struct-for-64-bit-smram-load-r.patch: -------------------------------------------------------------------------------- 1 | From b7913065928c913fb8569a8a71f6eec4a32779c7 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:37 +0300 4 | Subject: [PATCH] KVM: x86: smm: use smram struct for 64 bit smram load/restore 5 | 6 | Use kvm_smram_state_64 struct to save/restore the 64 bit SMM state 7 | (used when X86_FEATURE_LM is present in the guest CPUID, 8 | regardless of 32-bitness of the guest). 9 | 10 | Signed-off-by: Maxim Levitsky 11 | --- 12 | arch/x86/kvm/smm.c | 153 +++++++++++++++++++-------------------------- 13 | 1 file changed, 63 insertions(+), 90 deletions(-) 14 | 15 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 16 | index 2635f6b1d81a..82761384a866 100644 17 | --- a/arch/x86/kvm/smm.c 18 | +++ b/arch/x86/kvm/smm.c 19 | @@ -156,20 +156,17 @@ static void enter_smm_save_seg_32(struct kvm_vcpu *vcpu, 20 | } 21 | 22 | #ifdef CONFIG_X86_64 23 | -static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, char *buf, int n) 24 | +static void enter_smm_save_seg_64(struct kvm_vcpu *vcpu, 25 | + struct kvm_smm_seg_state_64 *state, 26 | + int n) 27 | { 28 | struct kvm_segment seg; 29 | - int offset; 30 | - u16 flags; 31 | 32 | kvm_get_segment(vcpu, &seg, n); 33 | - offset = 0x7e00 + n * 16; 34 | - 35 | - flags = enter_smm_get_segment_flags(&seg) >> 8; 36 | - PUT_SMSTATE(u16, buf, offset, seg.selector); 37 | - PUT_SMSTATE(u16, buf, offset + 2, flags); 38 | - PUT_SMSTATE(u32, buf, offset + 4, seg.limit); 39 | - PUT_SMSTATE(u64, buf, offset + 8, seg.base); 40 | + state->selector = seg.selector; 41 | + state->attributes = enter_smm_get_segment_flags(&seg) >> 8; 42 | + state->limit = seg.limit; 43 | + state->base = seg.base; 44 | } 45 | #endif 46 | 47 | @@ -218,57 +215,52 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, 48 | } 49 | 50 | #ifdef CONFIG_X86_64 51 | -static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, char *buf) 52 | +static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, 53 | + struct kvm_smram_state_64 *smram) 54 | { 55 | struct desc_ptr dt; 56 | - struct kvm_segment seg; 57 | unsigned long val; 58 | int i; 59 | 60 | for (i = 0; i < 16; i++) 61 | - PUT_SMSTATE(u64, buf, 0x7ff8 - i * 8, kvm_register_read_raw(vcpu, i)); 62 | + smram->gprs[15 - i] = kvm_register_read_raw(vcpu, i); 63 | + 64 | + smram->rip = kvm_rip_read(vcpu); 65 | + smram->rflags = kvm_get_rflags(vcpu); 66 | 67 | - PUT_SMSTATE(u64, buf, 0x7f78, kvm_rip_read(vcpu)); 68 | - PUT_SMSTATE(u32, buf, 0x7f70, kvm_get_rflags(vcpu)); 69 | 70 | kvm_get_dr(vcpu, 6, &val); 71 | - PUT_SMSTATE(u64, buf, 0x7f68, val); 72 | + smram->dr6 = val; 73 | kvm_get_dr(vcpu, 7, &val); 74 | - PUT_SMSTATE(u64, buf, 0x7f60, val); 75 | - 76 | - PUT_SMSTATE(u64, buf, 0x7f58, kvm_read_cr0(vcpu)); 77 | - PUT_SMSTATE(u64, buf, 0x7f50, kvm_read_cr3(vcpu)); 78 | - PUT_SMSTATE(u64, buf, 0x7f48, kvm_read_cr4(vcpu)); 79 | + smram->dr7 = val; 80 | 81 | - PUT_SMSTATE(u32, buf, 0x7f00, vcpu->arch.smbase); 82 | + smram->cr0 = kvm_read_cr0(vcpu); 83 | + smram->cr3 = kvm_read_cr3(vcpu); 84 | + smram->cr4 = kvm_read_cr4(vcpu); 85 | 86 | - /* revision id */ 87 | - PUT_SMSTATE(u32, buf, 0x7efc, 0x00020064); 88 | + smram->smbase = vcpu->arch.smbase; 89 | + smram->smm_revison = 0x00020064; 90 | 91 | - PUT_SMSTATE(u64, buf, 0x7ed0, vcpu->arch.efer); 92 | + smram->efer = vcpu->arch.efer; 93 | 94 | - kvm_get_segment(vcpu, &seg, VCPU_SREG_TR); 95 | - PUT_SMSTATE(u16, buf, 0x7e90, seg.selector); 96 | - PUT_SMSTATE(u16, buf, 0x7e92, enter_smm_get_segment_flags(&seg) >> 8); 97 | - PUT_SMSTATE(u32, buf, 0x7e94, seg.limit); 98 | - PUT_SMSTATE(u64, buf, 0x7e98, seg.base); 99 | + enter_smm_save_seg_64(vcpu, &smram->tr, VCPU_SREG_TR); 100 | 101 | static_call(kvm_x86_get_idt)(vcpu, &dt); 102 | - PUT_SMSTATE(u32, buf, 0x7e84, dt.size); 103 | - PUT_SMSTATE(u64, buf, 0x7e88, dt.address); 104 | + smram->idtr.limit = dt.size; 105 | + smram->idtr.base = dt.address; 106 | 107 | - kvm_get_segment(vcpu, &seg, VCPU_SREG_LDTR); 108 | - PUT_SMSTATE(u16, buf, 0x7e70, seg.selector); 109 | - PUT_SMSTATE(u16, buf, 0x7e72, enter_smm_get_segment_flags(&seg) >> 8); 110 | - PUT_SMSTATE(u32, buf, 0x7e74, seg.limit); 111 | - PUT_SMSTATE(u64, buf, 0x7e78, seg.base); 112 | + enter_smm_save_seg_64(vcpu, &smram->ldtr, VCPU_SREG_LDTR); 113 | 114 | static_call(kvm_x86_get_gdt)(vcpu, &dt); 115 | - PUT_SMSTATE(u32, buf, 0x7e64, dt.size); 116 | - PUT_SMSTATE(u64, buf, 0x7e68, dt.address); 117 | + smram->gdtr.limit = dt.size; 118 | + smram->gdtr.base = dt.address; 119 | 120 | - for (i = 0; i < 6; i++) 121 | - enter_smm_save_seg_64(vcpu, buf, i); 122 | + enter_smm_save_seg_64(vcpu, &smram->es, VCPU_SREG_ES); 123 | + enter_smm_save_seg_64(vcpu, &smram->cs, VCPU_SREG_CS); 124 | + enter_smm_save_seg_64(vcpu, &smram->ss, VCPU_SREG_SS); 125 | + enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); 126 | + enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); 127 | + enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); 128 | } 129 | #endif 130 | 131 | @@ -285,7 +277,7 @@ void enter_smm(struct kvm_vcpu *vcpu) 132 | 133 | #ifdef CONFIG_X86_64 134 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 135 | - enter_smm_save_state_64(vcpu, smram.bytes); 136 | + enter_smm_save_state_64(vcpu, &smram.smram64); 137 | else 138 | #endif 139 | enter_smm_save_state_32(vcpu, &smram.smram32); 140 | @@ -395,18 +387,17 @@ static int rsm_load_seg_32(struct kvm_vcpu *vcpu, 141 | } 142 | 143 | #ifdef CONFIG_X86_64 144 | -static int rsm_load_seg_64(struct kvm_vcpu *vcpu, const char *smstate, 145 | + 146 | +static int rsm_load_seg_64(struct kvm_vcpu *vcpu, 147 | + const struct kvm_smm_seg_state_64 *state, 148 | int n) 149 | { 150 | struct kvm_segment desc; 151 | - int offset; 152 | - 153 | - offset = 0x7e00 + n * 16; 154 | 155 | - desc.selector = GET_SMSTATE(u16, smstate, offset); 156 | - rsm_set_desc_flags(&desc, GET_SMSTATE(u16, smstate, offset + 2) << 8); 157 | - desc.limit = GET_SMSTATE(u32, smstate, offset + 4); 158 | - desc.base = GET_SMSTATE(u64, smstate, offset + 8); 159 | + desc.selector = state->selector; 160 | + rsm_set_desc_flags(&desc, state->attributes << 8); 161 | + desc.limit = state->limit; 162 | + desc.base = state->base; 163 | kvm_set_segment(vcpu, &desc, n); 164 | return X86EMUL_CONTINUE; 165 | } 166 | @@ -502,69 +493,51 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 167 | 168 | #ifdef CONFIG_X86_64 169 | static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 170 | - u8 *smstate) 171 | + const struct kvm_smram_state_64 *smstate) 172 | { 173 | struct kvm_vcpu *vcpu = ctxt->vcpu; 174 | - struct kvm_segment desc; 175 | struct desc_ptr dt; 176 | - u64 val, cr0, cr3, cr4; 177 | int i, r; 178 | 179 | 180 | for (i = 0; i < 16; i++) 181 | - *reg_write(ctxt, i) = GET_SMSTATE(u64, smstate, 0x7ff8 - i * 8); 182 | - 183 | - ctxt->_eip = GET_SMSTATE(u64, smstate, 0x7f78); 184 | - ctxt->eflags = GET_SMSTATE(u32, smstate, 0x7f70) | X86_EFLAGS_FIXED; 185 | + *reg_write(ctxt, i) = smstate->gprs[15 - i]; 186 | 187 | - val = GET_SMSTATE(u64, smstate, 0x7f68); 188 | + ctxt->_eip = smstate->rip; 189 | + ctxt->eflags = smstate->rflags | X86_EFLAGS_FIXED; 190 | 191 | - if (kvm_set_dr(vcpu, 6, val)) 192 | + if (kvm_set_dr(vcpu, 6, smstate->dr6)) 193 | return X86EMUL_UNHANDLEABLE; 194 | - 195 | - val = GET_SMSTATE(u64, smstate, 0x7f60); 196 | - 197 | - if (kvm_set_dr(vcpu, 7, val)) 198 | + if (kvm_set_dr(vcpu, 7, smstate->dr7)) 199 | return X86EMUL_UNHANDLEABLE; 200 | 201 | - cr0 = GET_SMSTATE(u64, smstate, 0x7f58); 202 | - cr3 = GET_SMSTATE(u64, smstate, 0x7f50); 203 | - cr4 = GET_SMSTATE(u64, smstate, 0x7f48); 204 | - vcpu->arch.smbase = GET_SMSTATE(u32, smstate, 0x7f00); 205 | - val = GET_SMSTATE(u64, smstate, 0x7ed0); 206 | + vcpu->arch.smbase = smstate->smbase; 207 | 208 | - if (kvm_set_msr(vcpu, MSR_EFER, val & ~EFER_LMA)) 209 | + if (kvm_set_msr(vcpu, MSR_EFER, smstate->efer & ~EFER_LMA)) 210 | return X86EMUL_UNHANDLEABLE; 211 | 212 | - desc.selector = GET_SMSTATE(u32, smstate, 0x7e90); 213 | - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e92) << 8); 214 | - desc.limit = GET_SMSTATE(u32, smstate, 0x7e94); 215 | - desc.base = GET_SMSTATE(u64, smstate, 0x7e98); 216 | - kvm_set_segment(vcpu, &desc, VCPU_SREG_TR); 217 | + rsm_load_seg_64(vcpu, &smstate->tr, VCPU_SREG_TR); 218 | 219 | - dt.size = GET_SMSTATE(u32, smstate, 0x7e84); 220 | - dt.address = GET_SMSTATE(u64, smstate, 0x7e88); 221 | + dt.size = smstate->idtr.limit; 222 | + dt.address = smstate->idtr.base; 223 | static_call(kvm_x86_set_idt)(vcpu, &dt); 224 | 225 | - desc.selector = GET_SMSTATE(u32, smstate, 0x7e70); 226 | - rsm_set_desc_flags(&desc, GET_SMSTATE(u32, smstate, 0x7e72) << 8); 227 | - desc.limit = GET_SMSTATE(u32, smstate, 0x7e74); 228 | - desc.base = GET_SMSTATE(u64, smstate, 0x7e78); 229 | - kvm_set_segment(vcpu, &desc, VCPU_SREG_LDTR); 230 | + rsm_load_seg_64(vcpu, &smstate->ldtr, VCPU_SREG_LDTR); 231 | 232 | - dt.size = GET_SMSTATE(u32, smstate, 0x7e64); 233 | - dt.address = GET_SMSTATE(u64, smstate, 0x7e68); 234 | + dt.size = smstate->gdtr.limit; 235 | + dt.address = smstate->gdtr.base; 236 | static_call(kvm_x86_set_gdt)(vcpu, &dt); 237 | 238 | - r = rsm_enter_protected_mode(vcpu, cr0, cr3, cr4); 239 | + r = rsm_enter_protected_mode(vcpu, smstate->cr0, smstate->cr3, smstate->cr4); 240 | if (r != X86EMUL_CONTINUE) 241 | return r; 242 | 243 | - for (i = 0; i < 6; i++) { 244 | - r = rsm_load_seg_64(vcpu, smstate, i); 245 | - if (r != X86EMUL_CONTINUE) 246 | - return r; 247 | - } 248 | + rsm_load_seg_64(vcpu, &smstate->es, VCPU_SREG_ES); 249 | + rsm_load_seg_64(vcpu, &smstate->cs, VCPU_SREG_CS); 250 | + rsm_load_seg_64(vcpu, &smstate->ss, VCPU_SREG_SS); 251 | + rsm_load_seg_64(vcpu, &smstate->ds, VCPU_SREG_DS); 252 | + rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); 253 | + rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); 254 | 255 | return X86EMUL_CONTINUE; 256 | } 257 | @@ -639,7 +612,7 @@ int emulator_leave_smm(struct x86_emulate_ctxt *ctxt) 258 | 259 | #ifdef CONFIG_X86_64 260 | if (guest_cpuid_has(vcpu, X86_FEATURE_LM)) 261 | - return rsm_load_state_64(ctxt, smram.bytes); 262 | + return rsm_load_state_64(ctxt, &smram.smram64); 263 | else 264 | #endif 265 | return rsm_load_state_32(ctxt, &smram.smram32); 266 | -- 267 | 2.38.1 268 | 269 | -------------------------------------------------------------------------------- /debian/patches/pve/0027-KVM-svm-drop-explicit-return-value-of-kvm_vcpu_map.patch: -------------------------------------------------------------------------------- 1 | From 736f391168886fa50eb3888007a34db101f8de9e Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:38 +0300 4 | Subject: [PATCH] KVM: svm: drop explicit return value of kvm_vcpu_map 5 | 6 | if kvm_vcpu_map returns non zero value, error path should be triggered 7 | regardless of the exact returned error value. 8 | 9 | Suggested-by: Sean Christopherson 10 | Signed-off-by: Maxim Levitsky 11 | --- 12 | arch/x86/kvm/svm/svm.c | 7 +++---- 13 | 1 file changed, 3 insertions(+), 4 deletions(-) 14 | 15 | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c 16 | index 2a61b8c50ab4..74f390077a1e 100644 17 | --- a/arch/x86/kvm/svm/svm.c 18 | +++ b/arch/x86/kvm/svm/svm.c 19 | @@ -4473,8 +4473,7 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 20 | * that, see svm_prepare_switch_to_guest()) which must be 21 | * preserved. 22 | */ 23 | - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), 24 | - &map_save) == -EINVAL) 25 | + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) 26 | return 1; 27 | 28 | BUILD_BUG_ON(offsetof(struct vmcb, save) != 0x400); 29 | @@ -4511,11 +4510,11 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 30 | return 1; 31 | 32 | vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); 33 | - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map) == -EINVAL) 34 | + if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) 35 | return 1; 36 | 37 | ret = 1; 38 | - if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save) == -EINVAL) 39 | + if (kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.hsave_msr), &map_save)) 40 | goto unmap_map; 41 | 42 | if (svm_allocate_nested(svm)) 43 | -- 44 | 2.38.1 45 | 46 | -------------------------------------------------------------------------------- /debian/patches/pve/0028-KVM-x86-SVM-use-smram-structs.patch: -------------------------------------------------------------------------------- 1 | From 0e0e4df8502c00f60955187c3e98e50653b15008 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:39 +0300 4 | Subject: [PATCH] KVM: x86: SVM: use smram structs 5 | 6 | Use SMM structs in the SVM code as well, which removes the last user of 7 | put_smstate/GET_SMSTATE so remove these macros as well. 8 | 9 | Signed-off-by: Maxim Levitsky 10 | --- 11 | arch/x86/kvm/smm.h | 6 ------ 12 | arch/x86/kvm/svm/svm.c | 21 +++++++-------------- 13 | 2 files changed, 7 insertions(+), 20 deletions(-) 14 | 15 | diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h 16 | index bf5c7ffeb11e..8d96bff3f4d5 100644 17 | --- a/arch/x86/kvm/smm.h 18 | +++ b/arch/x86/kvm/smm.h 19 | @@ -4,12 +4,6 @@ 20 | 21 | #include 22 | 23 | -#define GET_SMSTATE(type, buf, offset) \ 24 | - (*(type *)((buf) + (offset) - 0x7e00)) 25 | - 26 | -#define PUT_SMSTATE(type, buf, offset, val) \ 27 | - *(type *)((buf) + (offset) - 0x7e00) = val 28 | - 29 | #ifdef CONFIG_KVM_SMM 30 | 31 | 32 | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c 33 | index 74f390077a1e..44d6a2240e9e 100644 34 | --- a/arch/x86/kvm/svm/svm.c 35 | +++ b/arch/x86/kvm/svm/svm.c 36 | @@ -4443,15 +4443,11 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 37 | struct kvm_host_map map_save; 38 | int ret; 39 | 40 | - char *smstate = (char *)smram; 41 | - 42 | if (!is_guest_mode(vcpu)) 43 | return 0; 44 | 45 | - /* FED8h - SVM Guest */ 46 | - PUT_SMSTATE(u64, smstate, 0x7ed8, 1); 47 | - /* FEE0h - SVM Guest VMCB Physical Address */ 48 | - PUT_SMSTATE(u64, smstate, 0x7ee0, svm->nested.vmcb12_gpa); 49 | + smram->smram64.svm_guest_flag = 1; 50 | + smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; 51 | 52 | svm->vmcb->save.rax = vcpu->arch.regs[VCPU_REGS_RAX]; 53 | svm->vmcb->save.rsp = vcpu->arch.regs[VCPU_REGS_RSP]; 54 | @@ -4489,28 +4485,25 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 55 | { 56 | struct vcpu_svm *svm = to_svm(vcpu); 57 | struct kvm_host_map map, map_save; 58 | - u64 saved_efer, vmcb12_gpa; 59 | struct vmcb *vmcb12; 60 | int ret; 61 | 62 | - const char *smstate = (const char *)smram; 63 | + const struct kvm_smram_state_64 *smram64 = &smram->smram64; 64 | 65 | if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) 66 | return 0; 67 | 68 | /* Non-zero if SMI arrived while vCPU was in guest mode. */ 69 | - if (!GET_SMSTATE(u64, smstate, 0x7ed8)) 70 | + if (!smram64->svm_guest_flag) 71 | return 0; 72 | 73 | if (!guest_cpuid_has(vcpu, X86_FEATURE_SVM)) 74 | return 1; 75 | 76 | - saved_efer = GET_SMSTATE(u64, smstate, 0x7ed0); 77 | - if (!(saved_efer & EFER_SVME)) 78 | + if (!(smram64->efer & EFER_SVME)) 79 | return 1; 80 | 81 | - vmcb12_gpa = GET_SMSTATE(u64, smstate, 0x7ee0); 82 | - if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map)) 83 | + if (kvm_vcpu_map(vcpu, gpa_to_gfn(smram64->svm_guest_vmcb_gpa), &map)) 84 | return 1; 85 | 86 | ret = 1; 87 | @@ -4536,7 +4529,7 @@ static int svm_leave_smm(struct kvm_vcpu *vcpu, const union kvm_smram *smram) 88 | vmcb12 = map.hva; 89 | nested_copy_vmcb_control_to_cache(svm, &vmcb12->control); 90 | nested_copy_vmcb_save_to_cache(svm, &vmcb12->save); 91 | - ret = enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, false); 92 | + ret = enter_svm_guest_mode(vcpu, smram64->svm_guest_vmcb_gpa, vmcb12, false); 93 | 94 | if (ret) 95 | goto unmap_save; 96 | -- 97 | 2.38.1 98 | 99 | -------------------------------------------------------------------------------- /debian/patches/pve/0029-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch: -------------------------------------------------------------------------------- 1 | From 61fe0ac44f8d9714accad28bab0179d85f969b23 Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:40 +0300 4 | Subject: [PATCH] KVM: x86: SVM: don't save SVM state to SMRAM when VM is not 5 | long mode capable 6 | 7 | When the guest CPUID doesn't have support for long mode, 32 bit SMRAM 8 | layout is used and it has no support for preserving EFER and/or SVM 9 | state. 10 | 11 | Note that this isn't relevant to running 32 bit guests on VM which is 12 | long mode capable - such VM can still run 32 bit guests in compatibility 13 | mode. 14 | 15 | Signed-off-by: Maxim Levitsky 16 | --- 17 | arch/x86/kvm/svm/svm.c | 8 ++++++++ 18 | 1 file changed, 8 insertions(+) 19 | 20 | diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c 21 | index 44d6a2240e9e..9f2e3fe3dcd7 100644 22 | --- a/arch/x86/kvm/svm/svm.c 23 | +++ b/arch/x86/kvm/svm/svm.c 24 | @@ -4446,6 +4446,14 @@ static int svm_enter_smm(struct kvm_vcpu *vcpu, union kvm_smram *smram) 25 | if (!is_guest_mode(vcpu)) 26 | return 0; 27 | 28 | + /* 29 | + * 32-bit SMRAM format doesn't preserve EFER and SVM state. Userspace is 30 | + * responsible for ensuring nested SVM and SMIs are mutually exclusive. 31 | + */ 32 | + 33 | + if (!guest_cpuid_has(vcpu, X86_FEATURE_LM)) 34 | + return 1; 35 | + 36 | smram->smram64.svm_guest_flag = 1; 37 | smram->smram64.svm_guest_vmcb_gpa = svm->nested.vmcb12_gpa; 38 | 39 | -- 40 | 2.38.1 41 | 42 | -------------------------------------------------------------------------------- /debian/patches/pve/0030-KVM-x86-smm-preserve-interrupt-shadow-in-SMRAM.patch: -------------------------------------------------------------------------------- 1 | From 810253988e9e317d6e576ebe608a5454f274b8fc Mon Sep 17 00:00:00 2001 2 | From: Maxim Levitsky 3 | Date: Tue, 25 Oct 2022 15:47:41 +0300 4 | Subject: [PATCH] KVM: x86: smm: preserve interrupt shadow in SMRAM 5 | 6 | When #SMI is asserted, the CPU can be in interrupt shadow due to sti or 7 | mov ss. 8 | 9 | It is not mandatory in Intel/AMD prm to have the #SMI blocked during the 10 | shadow, and on top of that, since neither SVM nor VMX has true support 11 | for SMI window, waiting for one instruction would mean single stepping 12 | the guest. 13 | 14 | Instead, allow #SMI in this case, but both reset the interrupt window and 15 | stash its value in SMRAM to restore it on exit from SMM. 16 | 17 | This fixes rare failures seen mostly on windows guests on VMX, when #SMI 18 | falls on the sti instruction which mainfest in VM entry failure due 19 | to EFLAGS.IF not being set, but STI interrupt window still being set 20 | in the VMCS. 21 | 22 | Signed-off-by: Maxim Levitsky 23 | --- 24 | arch/x86/kvm/smm.c | 24 +++++++++++++++++++++--- 25 | arch/x86/kvm/smm.h | 5 +++-- 26 | 2 files changed, 24 insertions(+), 5 deletions(-) 27 | 28 | diff --git a/arch/x86/kvm/smm.c b/arch/x86/kvm/smm.c 29 | index 82761384a866..46d2656937a7 100644 30 | --- a/arch/x86/kvm/smm.c 31 | +++ b/arch/x86/kvm/smm.c 32 | @@ -21,6 +21,7 @@ static void check_smram_offsets(void) 33 | CHECK_SMRAM32_OFFSET(smbase, 0xFEF8); 34 | CHECK_SMRAM32_OFFSET(smm_revision, 0xFEFC); 35 | CHECK_SMRAM32_OFFSET(reserved2, 0xFF00); 36 | + CHECK_SMRAM32_OFFSET(int_shadow, 0xFF10); 37 | CHECK_SMRAM32_OFFSET(cr4, 0xFF14); 38 | CHECK_SMRAM32_OFFSET(reserved3, 0xFF18); 39 | CHECK_SMRAM32_OFFSET(ds, 0xFF2C); 40 | @@ -65,7 +66,7 @@ static void check_smram_offsets(void) 41 | CHECK_SMRAM64_OFFSET(io_restart_rsi, 0xFEB0); 42 | CHECK_SMRAM64_OFFSET(io_restart_rdi, 0xFEB8); 43 | CHECK_SMRAM64_OFFSET(io_restart_dword, 0xFEC0); 44 | - CHECK_SMRAM64_OFFSET(reserved1, 0xFEC4); 45 | + CHECK_SMRAM64_OFFSET(int_shadow, 0xFEC4); 46 | CHECK_SMRAM64_OFFSET(io_inst_restart, 0xFEC8); 47 | CHECK_SMRAM64_OFFSET(auto_hlt_restart, 0xFEC9); 48 | CHECK_SMRAM64_OFFSET(reserved2, 0xFECA); 49 | @@ -212,6 +213,8 @@ static void enter_smm_save_state_32(struct kvm_vcpu *vcpu, 50 | smram->cr4 = kvm_read_cr4(vcpu); 51 | smram->smm_revision = 0x00020000; 52 | smram->smbase = vcpu->arch.smbase; 53 | + 54 | + smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); 55 | } 56 | 57 | #ifdef CONFIG_X86_64 58 | @@ -261,6 +264,8 @@ static void enter_smm_save_state_64(struct kvm_vcpu *vcpu, 59 | enter_smm_save_seg_64(vcpu, &smram->ds, VCPU_SREG_DS); 60 | enter_smm_save_seg_64(vcpu, &smram->fs, VCPU_SREG_FS); 61 | enter_smm_save_seg_64(vcpu, &smram->gs, VCPU_SREG_GS); 62 | + 63 | + smram->int_shadow = static_call(kvm_x86_get_interrupt_shadow)(vcpu); 64 | } 65 | #endif 66 | 67 | @@ -306,6 +311,8 @@ void enter_smm(struct kvm_vcpu *vcpu) 68 | kvm_set_rflags(vcpu, X86_EFLAGS_FIXED); 69 | kvm_rip_write(vcpu, 0x8000); 70 | 71 | + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); 72 | + 73 | cr0 = vcpu->arch.cr0 & ~(X86_CR0_PE | X86_CR0_EM | X86_CR0_TS | X86_CR0_PG); 74 | static_call(kvm_x86_set_cr0)(vcpu, cr0); 75 | vcpu->arch.cr0 = cr0; 76 | @@ -453,7 +460,7 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 77 | { 78 | struct kvm_vcpu *vcpu = ctxt->vcpu; 79 | struct desc_ptr dt; 80 | - int i; 81 | + int i, r; 82 | 83 | ctxt->eflags = smstate->eflags | X86_EFLAGS_FIXED; 84 | ctxt->_eip = smstate->eip; 85 | @@ -487,8 +494,16 @@ static int rsm_load_state_32(struct x86_emulate_ctxt *ctxt, 86 | 87 | vcpu->arch.smbase = smstate->smbase; 88 | 89 | - return rsm_enter_protected_mode(vcpu, smstate->cr0, 90 | + r = rsm_enter_protected_mode(vcpu, smstate->cr0, 91 | smstate->cr3, smstate->cr4); 92 | + 93 | + if (r != X86EMUL_CONTINUE) 94 | + return r; 95 | + 96 | + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); 97 | + ctxt->interruptibility = (u8)smstate->int_shadow; 98 | + 99 | + return r; 100 | } 101 | 102 | #ifdef CONFIG_X86_64 103 | @@ -539,6 +554,9 @@ static int rsm_load_state_64(struct x86_emulate_ctxt *ctxt, 104 | rsm_load_seg_64(vcpu, &smstate->fs, VCPU_SREG_FS); 105 | rsm_load_seg_64(vcpu, &smstate->gs, VCPU_SREG_GS); 106 | 107 | + static_call(kvm_x86_set_interrupt_shadow)(vcpu, 0); 108 | + ctxt->interruptibility = (u8)smstate->int_shadow; 109 | + 110 | return X86EMUL_CONTINUE; 111 | } 112 | #endif 113 | diff --git a/arch/x86/kvm/smm.h b/arch/x86/kvm/smm.h 114 | index 8d96bff3f4d5..2eaec53bcc95 100644 115 | --- a/arch/x86/kvm/smm.h 116 | +++ b/arch/x86/kvm/smm.h 117 | @@ -19,7 +19,8 @@ struct kvm_smram_state_32 { 118 | u32 reserved1[62]; 119 | u32 smbase; 120 | u32 smm_revision; 121 | - u32 reserved2[5]; 122 | + u32 reserved2[4]; 123 | + u32 int_shadow; /* KVM extension */ 124 | u32 cr4; /* CR4 is not present in Intel/AMD SMRAM image */ 125 | u32 reserved3[5]; 126 | 127 | @@ -86,7 +87,7 @@ struct kvm_smram_state_64 { 128 | u64 io_restart_rsi; 129 | u64 io_restart_rdi; 130 | u32 io_restart_dword; 131 | - u32 reserved1; 132 | + u32 int_shadow; 133 | u8 io_inst_restart; 134 | u8 auto_hlt_restart; 135 | u8 reserved2[6]; 136 | -- 137 | 2.38.1 138 | 139 | -------------------------------------------------------------------------------- /debian/patches/pve/952-add-net-conntrack-events-support-multiple-registrant.patch: -------------------------------------------------------------------------------- 1 | From 42824d4b753f84ccf885eca602c5037338b546c8 Mon Sep 17 00:00:00 2001 2 | From: Zhi Chen 3 | Date: Tue, 13 Jan 2015 14:28:18 -0800 4 | Subject: [PATCH 3/3] net: conntrack events, support multiple registrant 5 | 6 | Merging this patch from kernel 3.4: 7 | This was supported by old (.28) kernel versions but removed 8 | because of it's overhead. 9 | But we need this feature for NA connection manager. Both ipv4 10 | and ipv6 modules needs to register themselves to ct events. 11 | 12 | Change-Id: Iebfb254590fb594f5baf232f849d1b7ae45ef757 13 | Signed-off-by: Zhi Chen 14 | --- 15 | include/net/netfilter/nf_conntrack_ecache.h | 15 ++- 16 | include/net/netns/conntrack.h | 3 + 17 | net/netfilter/Kconfig | 8 ++ 18 | net/netfilter/nf_conntrack_core.c | 4 + 19 | net/netfilter/nf_conntrack_ecache.c | 103 +++++++++++++++++++- 20 | net/netfilter/nf_conntrack_netlink.c | 17 ++++ 21 | 6 files changed, 146 insertions(+), 4 deletions(-) 22 | 23 | --- a/include/net/netfilter/nf_conntrack_ecache.h 24 | +++ b/include/net/netfilter/nf_conntrack_ecache.h 25 | @@ -65,9 +65,14 @@ struct nf_ct_event_notifier { 26 | int (*exp_event)(unsigned int events, const struct nf_exp_event *item); 27 | }; 28 | 29 | -void nf_conntrack_register_notifier(struct net *net, 30 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 31 | +extern int nf_conntrack_register_notifier(struct net *net, struct notifier_block *nb); 32 | +extern int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb); 33 | +#else 34 | +int nf_conntrack_register_notifier(struct net *net, 35 | const struct nf_ct_event_notifier *nb); 36 | void nf_conntrack_unregister_notifier(struct net *net); 37 | +#endif 38 | 39 | void nf_ct_deliver_cached_events(struct nf_conn *ct); 40 | int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, 41 | @@ -98,11 +103,13 @@ static inline void 42 | nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) 43 | { 44 | #ifdef CONFIG_NF_CONNTRACK_EVENTS 45 | - struct net *net = nf_ct_net(ct); 46 | struct nf_conntrack_ecache *e; 47 | +#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 48 | + struct net *net = nf_ct_net(ct); 49 | 50 | if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) 51 | return; 52 | +#endif 53 | 54 | e = nf_ct_ecache_find(ct); 55 | if (e == NULL) 56 | @@ -117,20 +124,34 @@ nf_conntrack_event_report(enum ip_conntr 57 | u32 portid, int report) 58 | { 59 | #ifdef CONFIG_NF_CONNTRACK_EVENTS 60 | - if (nf_ct_ecache_exist(ct)) 61 | - return nf_conntrack_eventmask_report(1 << event, ct, portid, report); 62 | +#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 63 | + const struct net *net = nf_ct_net(ct); 64 | + 65 | + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) 66 | + return 0; 67 | #endif 68 | + 69 | + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); 70 | +#else 71 | return 0; 72 | +#endif 73 | } 74 | 75 | static inline int 76 | nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) 77 | { 78 | #ifdef CONFIG_NF_CONNTRACK_EVENTS 79 | - if (nf_ct_ecache_exist(ct)) 80 | - return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); 81 | +#ifndef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 82 | + const struct net *net = nf_ct_net(ct); 83 | + 84 | + if (!rcu_access_pointer(net->ct.nf_conntrack_event_cb)) 85 | + return 0; 86 | #endif 87 | + 88 | + return nf_conntrack_eventmask_report(1 << event, ct, 0, 0); 89 | +#else 90 | return 0; 91 | +#endif 92 | } 93 | 94 | #ifdef CONFIG_NF_CONNTRACK_EVENTS 95 | --- a/include/net/netns/conntrack.h 96 | +++ b/include/net/netns/conntrack.h 97 | @@ -106,6 +106,9 @@ struct netns_ct { 98 | u8 sysctl_checksum; 99 | 100 | struct ip_conntrack_stat __percpu *stat; 101 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 102 | + struct atomic_notifier_head nf_conntrack_chain; 103 | +#endif 104 | struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; 105 | struct nf_ip_net nf_ct_proto; 106 | #if defined(CONFIG_NF_CONNTRACK_LABELS) 107 | --- a/net/netfilter/Kconfig 108 | +++ b/net/netfilter/Kconfig 109 | @@ -161,6 +161,14 @@ config NF_CONNTRACK_EVENTS 110 | 111 | If unsure, say `N'. 112 | 113 | +config NF_CONNTRACK_CHAIN_EVENTS 114 | + bool "Register multiple callbacks to ct events" 115 | + depends on NF_CONNTRACK_EVENTS 116 | + help 117 | + Support multiple registrations. 118 | + 119 | + If unsure, say `N'. 120 | + 121 | config NF_CONNTRACK_TIMEOUT 122 | bool 'Connection tracking timeout' 123 | depends on NETFILTER_ADVANCED 124 | --- a/net/netfilter/nf_conntrack_core.c 125 | +++ b/net/netfilter/nf_conntrack_core.c 126 | @@ -2803,6 +2803,10 @@ int nf_conntrack_init_net(struct net *ne 127 | nf_conntrack_ecache_pernet_init(net); 128 | nf_conntrack_proto_pernet_init(net); 129 | 130 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 131 | + ATOMIC_INIT_NOTIFIER_HEAD(&net->ct.nf_conntrack_chain); 132 | +#endif 133 | + 134 | return 0; 135 | 136 | err_expect: 137 | --- a/net/netfilter/nf_conntrack_ecache.c 138 | +++ b/net/netfilter/nf_conntrack_ecache.c 139 | @@ -17,6 +17,9 @@ 140 | #include 141 | #include 142 | #include 143 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 144 | +#include 145 | +#endif 146 | #include 147 | #include 148 | #include 149 | @@ -162,6 +165,35 @@ static int __nf_conntrack_eventmask_repo 150 | return ret; 151 | } 152 | 153 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 154 | +int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, 155 | + u32 portid, int report) 156 | +{ 157 | + struct nf_conntrack_ecache *e; 158 | + struct net *net = nf_ct_net(ct); 159 | + 160 | + e = nf_ct_ecache_find(ct); 161 | + if (e == NULL) 162 | + return 0; 163 | + 164 | + if (nf_ct_is_confirmed(ct)) { 165 | + struct nf_ct_event item = { 166 | + .ct = ct, 167 | + .portid = e->portid ? e->portid : portid, 168 | + .report = report 169 | + }; 170 | + /* This is a resent of a destroy event? If so, skip missed */ 171 | + unsigned long missed = e->portid ? 0 : e->missed; 172 | + 173 | + if (!((eventmask | missed) & e->ctmask)) 174 | + return 0; 175 | + 176 | + atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, eventmask | missed, &item); 177 | + } 178 | + 179 | + return 0; 180 | +} 181 | +#else 182 | int nf_conntrack_eventmask_report(unsigned int events, struct nf_conn *ct, 183 | u32 portid, int report) 184 | { 185 | @@ -197,10 +229,52 @@ int nf_conntrack_eventmask_report(unsign 186 | 187 | return ret; 188 | } 189 | +#endif 190 | EXPORT_SYMBOL_GPL(nf_conntrack_eventmask_report); 191 | 192 | /* deliver cached events and clear cache entry - must be called with locally 193 | * disabled softirqs */ 194 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 195 | +void nf_ct_deliver_cached_events(struct nf_conn *ct) 196 | +{ 197 | + unsigned long events, missed; 198 | + struct nf_conntrack_ecache *e; 199 | + struct nf_ct_event item; 200 | + struct net *net = nf_ct_net(ct); 201 | + 202 | + e = nf_ct_ecache_find(ct); 203 | + if (e == NULL) 204 | + return; 205 | + 206 | + events = xchg(&e->cache, 0); 207 | + 208 | + if (!nf_ct_is_confirmed(ct) || nf_ct_is_dying(ct) || !events) 209 | + return; 210 | + 211 | + /* We make a copy of the missed event cache without taking 212 | + * the lock, thus we may send missed events twice. However, 213 | + * this does not harm and it happens very rarely. */ 214 | + missed = e->missed; 215 | + 216 | + if (!((events | missed) & e->ctmask)) 217 | + return; 218 | + 219 | + item.ct = ct; 220 | + item.portid = 0; 221 | + item.report = 0; 222 | + 223 | + atomic_notifier_call_chain(&net->ct.nf_conntrack_chain, 224 | + events | missed, 225 | + &item); 226 | + 227 | + if (likely(!missed)) 228 | + return; 229 | + 230 | + spin_lock_bh(&ct->lock); 231 | + e->missed &= ~missed; 232 | + spin_unlock_bh(&ct->lock); 233 | +} 234 | +#else 235 | void nf_ct_deliver_cached_events(struct nf_conn *ct) 236 | { 237 | struct nf_conntrack_ecache *e; 238 | @@ -226,6 +300,7 @@ void nf_ct_deliver_cached_events(struct 239 | */ 240 | __nf_conntrack_eventmask_report(e, events, e->missed, &item); 241 | } 242 | +#endif 243 | EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events); 244 | 245 | void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, 246 | @@ -258,20 +333,43 @@ out_unlock: 247 | rcu_read_unlock(); 248 | } 249 | 250 | -void nf_conntrack_register_notifier(struct net *net, 251 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 252 | +int nf_conntrack_register_notifier(struct net *net, 253 | + struct notifier_block *nb) 254 | +{ 255 | + return atomic_notifier_chain_register(&net->ct.nf_conntrack_chain, nb); 256 | +} 257 | +#else 258 | +int nf_conntrack_register_notifier(struct net *net, 259 | const struct nf_ct_event_notifier *new) 260 | { 261 | + int ret; 262 | struct nf_ct_event_notifier *notify; 263 | 264 | mutex_lock(&nf_ct_ecache_mutex); 265 | notify = rcu_dereference_protected(net->ct.nf_conntrack_event_cb, 266 | lockdep_is_held(&nf_ct_ecache_mutex)); 267 | WARN_ON_ONCE(notify); 268 | + if (notify != NULL) { 269 | + ret = -EBUSY; 270 | + goto out_unlock; 271 | + } 272 | + 273 | rcu_assign_pointer(net->ct.nf_conntrack_event_cb, new); 274 | - mutex_unlock(&nf_ct_ecache_mutex); 275 | + ret = 0; 276 | +out_unlock: 277 | + mutex_unlock(&nf_ct_ecache_mutex); 278 | + return ret; 279 | } 280 | +#endif 281 | EXPORT_SYMBOL_GPL(nf_conntrack_register_notifier); 282 | 283 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 284 | +int nf_conntrack_unregister_notifier(struct net *net, struct notifier_block *nb) 285 | +{ 286 | + return atomic_notifier_chain_unregister(&net->ct.nf_conntrack_chain, nb); 287 | +} 288 | +#else 289 | void nf_conntrack_unregister_notifier(struct net *net) 290 | { 291 | mutex_lock(&nf_ct_ecache_mutex); 292 | @@ -279,6 +377,7 @@ void nf_conntrack_unregister_notifier(st 293 | mutex_unlock(&nf_ct_ecache_mutex); 294 | /* synchronize_rcu() is called after netns pre_exit */ 295 | } 296 | +#endif 297 | EXPORT_SYMBOL_GPL(nf_conntrack_unregister_notifier); 298 | 299 | void nf_conntrack_ecache_work(struct net *net, enum nf_ct_ecache_state state) 300 | --- a/net/netfilter/nf_conntrack_netlink.c 301 | +++ b/net/netfilter/nf_conntrack_netlink.c 302 | @@ -712,12 +712,19 @@ static size_t ctnetlink_nlmsg_size(const 303 | } 304 | 305 | static int 306 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 307 | +ctnetlink_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr) 308 | +#else 309 | ctnetlink_conntrack_event(unsigned int events, const struct nf_ct_event *item) 310 | +#endif 311 | { 312 | const struct nf_conntrack_zone *zone; 313 | struct net *net; 314 | struct nlmsghdr *nlh; 315 | struct nlattr *nest_parms; 316 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 317 | + struct nf_ct_event *item = (struct nf_ct_event *)ptr; 318 | +#endif 319 | struct nf_conn *ct = item->ct; 320 | struct sk_buff *skb; 321 | unsigned int type; 322 | @@ -3745,11 +3752,17 @@ static int ctnetlink_stat_exp_cpu(struct 323 | } 324 | 325 | #ifdef CONFIG_NF_CONNTRACK_EVENTS 326 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 327 | +static struct notifier_block ctnl_notifier = { 328 | + .notifier_call = ctnetlink_conntrack_event 329 | +}; 330 | +#else 331 | static struct nf_ct_event_notifier ctnl_notifier = { 332 | .ct_event = ctnetlink_conntrack_event, 333 | .exp_event = ctnetlink_expect_event, 334 | }; 335 | #endif 336 | +#endif 337 | 338 | static const struct nfnl_callback ctnl_cb[IPCTNL_MSG_MAX] = { 339 | [IPCTNL_MSG_CT_NEW] = { 340 | @@ -3848,8 +3861,12 @@ static int __net_init ctnetlink_net_init 341 | static void ctnetlink_net_pre_exit(struct net *net) 342 | { 343 | #ifdef CONFIG_NF_CONNTRACK_EVENTS 344 | +#ifdef CONFIG_NF_CONNTRACK_CHAIN_EVENTS 345 | + nf_conntrack_unregister_notifier(net,&ctnl_notifier); 346 | +#else 347 | nf_conntrack_unregister_notifier(net); 348 | #endif 349 | +#endif 350 | } 351 | 352 | static struct pernet_operations ctnetlink_net_ops = { 353 | -------------------------------------------------------------------------------- /debian/patches/pve/953-bnx2x_warpcore_8727_2_5g_sgmii_txfault.patch: -------------------------------------------------------------------------------- 1 | diff -Naur a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h 2 | --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h 2020-09-03 23:21:36.695693427 -0400 3 | +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h 2020-09-03 23:29:22.457340229 -0400 4 | @@ -39,7 +39,7 @@ 5 | * 6 | * DO NOT UPDATE DRV_MODULE_VERSION below. 7 | */ 8 | -#define DRV_MODULE_VERSION "1.713.36-0" 9 | +#define DRV_MODULE_VERSION "99.1.713.36-0" 10 | #define BNX2X_BC_VER 0x040200 11 | 12 | #if defined(CONFIG_DCB) 13 | @@ -1592,6 +1592,7 @@ 14 | uint num_ethernet_queues; 15 | uint num_cnic_queues; 16 | int disable_tpa; 17 | + int mask_tx_fault; 18 | 19 | u32 rx_mode; 20 | #define BNX2X_RX_MODE_NONE 0 21 | diff -Naur a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c 22 | --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c 2020-09-03 23:21:36.699693373 -0400 23 | +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c 2020-09-03 23:29:22.457340229 -0400 24 | @@ -151,6 +151,7 @@ 25 | 26 | #define SFP_EEPROM_CON_TYPE_ADDR 0x2 27 | #define SFP_EEPROM_CON_TYPE_VAL_UNKNOWN 0x0 28 | + #define SFP_EEPROM_CON_TYPE_VAL_SC 0x1 29 | #define SFP_EEPROM_CON_TYPE_VAL_LC 0x7 30 | #define SFP_EEPROM_CON_TYPE_VAL_COPPER 0x21 31 | #define SFP_EEPROM_CON_TYPE_VAL_RJ45 0x22 32 | @@ -4210,6 +4211,16 @@ 33 | 0x1000); 34 | DP(NETIF_MSG_LINK, "set SGMII AUTONEG\n"); 35 | } else { 36 | + /* Note that 2.5G works only when used with 1G advertisment */ 37 | + if (fiber_mode && (phy->req_line_speed == SPEED_2500) && 38 | + (phy->speed_cap_mask & 39 | + (PORT_HW_CFG_SPEED_CAPABILITY_D0_1G | 40 | + PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G))) { 41 | + bnx2x_cl45_write(bp, phy, MDIO_WC_DEVAD, 42 | + MDIO_WC_REG_SERDESDIGITAL_MISC1, 43 | + 0x6010); 44 | + } 45 | + 46 | bnx2x_cl45_read(bp, phy, MDIO_WC_DEVAD, 47 | MDIO_WC_REG_COMBO_IEEE0_MIICTRL, &val16); 48 | val16 &= 0xcebf; 49 | @@ -4220,6 +4231,7 @@ 50 | val16 |= 0x2000; 51 | break; 52 | case SPEED_1000: 53 | + case SPEED_2500: 54 | val16 |= 0x0040; 55 | break; 56 | default: 57 | @@ -8172,6 +8184,7 @@ 58 | break; 59 | } 60 | case SFP_EEPROM_CON_TYPE_VAL_UNKNOWN: 61 | + case SFP_EEPROM_CON_TYPE_VAL_SC: 62 | case SFP_EEPROM_CON_TYPE_VAL_LC: 63 | case SFP_EEPROM_CON_TYPE_VAL_RJ45: 64 | check_limiting_mode = 1; 65 | @@ -8182,7 +8195,8 @@ 66 | (val[SFP_EEPROM_1G_COMP_CODE_ADDR] != 0)) { 67 | DP(NETIF_MSG_LINK, "1G SFP module detected\n"); 68 | phy->media_type = ETH_PHY_SFP_1G_FIBER; 69 | - if (phy->req_line_speed != SPEED_1000) { 70 | + if ((phy->req_line_speed != SPEED_1000) && 71 | + (phy->req_line_speed != SPEED_2500)) { 72 | u8 gport = params->port; 73 | phy->req_line_speed = SPEED_1000; 74 | if (!CHIP_IS_E1x(bp)) { 75 | @@ -9236,6 +9250,7 @@ 76 | u16 tmp1, val; 77 | /* Set option 1G speed */ 78 | if ((phy->req_line_speed == SPEED_1000) || 79 | + (phy->req_line_speed == SPEED_2500) || 80 | (phy->media_type == ETH_PHY_SFP_1G_FIBER)) { 81 | DP(NETIF_MSG_LINK, "Setting 1G force\n"); 82 | bnx2x_cl45_write(bp, phy, 83 | @@ -9245,6 +9260,22 @@ 84 | bnx2x_cl45_read(bp, phy, 85 | MDIO_PMA_DEVAD, MDIO_PMA_REG_10G_CTRL2, &tmp1); 86 | DP(NETIF_MSG_LINK, "1.7 = 0x%x\n", tmp1); 87 | + if ((phy->req_line_speed == SPEED_2500) && 88 | + (phy->speed_cap_mask & 89 | + (PORT_HW_CFG_SPEED_CAPABILITY_D0_1G | 90 | + PORT_HW_CFG_SPEED_CAPABILITY_D0_2_5G))) { 91 | + bnx2x_cl45_read_and_write(bp, phy, 92 | + MDIO_AN_DEVAD, 93 | + MDIO_AN_REG_8727_MISC_CTRL2, 94 | + ~(1<<5)); 95 | + bnx2x_cl45_write(bp, phy, 96 | + MDIO_AN_DEVAD, 97 | + MDIO_AN_REG_8727_MISC_CTRL1, 0x0010); 98 | + } else { 99 | + bnx2x_cl45_write(bp, phy, 100 | + MDIO_AN_DEVAD, 101 | + MDIO_AN_REG_8727_MISC_CTRL1, 0x001C); 102 | + } 103 | /* Power down the XAUI until link is up in case of dual-media 104 | * and 1G 105 | */ 106 | @@ -9266,7 +9297,7 @@ 107 | 108 | DP(NETIF_MSG_LINK, "Setting 1G clause37\n"); 109 | bnx2x_cl45_write(bp, phy, 110 | - MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL, 0); 111 | + MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL2, 0); 112 | bnx2x_cl45_write(bp, phy, 113 | MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x1300); 114 | } else { 115 | @@ -9274,9 +9305,12 @@ 116 | * registers although it is default 117 | */ 118 | bnx2x_cl45_write(bp, phy, 119 | - MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL, 120 | + MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL2, 121 | 0x0020); 122 | bnx2x_cl45_write(bp, phy, 123 | + MDIO_AN_DEVAD, MDIO_AN_REG_8727_MISC_CTRL1, 124 | + 0x001C); 125 | + bnx2x_cl45_write(bp, phy, 126 | MDIO_AN_DEVAD, MDIO_AN_REG_CL37_AN, 0x0100); 127 | bnx2x_cl45_write(bp, phy, 128 | MDIO_PMA_DEVAD, MDIO_PMA_REG_CTRL, 0x2040); 129 | @@ -9565,6 +9599,11 @@ 130 | vars->line_speed = SPEED_10000; 131 | DP(NETIF_MSG_LINK, "port %x: External link up in 10G\n", 132 | params->port); 133 | + } else if ((link_status & (1<<1)) && (!(link_status & (1<<14)))) { 134 | + link_up = 1; 135 | + vars->line_speed = SPEED_2500; 136 | + DP(NETIF_MSG_LINK, "port %x: External link up in 2.5G\n", 137 | + params->port); 138 | } else if ((link_status & (1<<0)) && (!(link_status & (1<<13)))) { 139 | link_up = 1; 140 | vars->line_speed = SPEED_1000; 141 | @@ -9596,7 +9635,8 @@ 142 | } 143 | 144 | if ((DUAL_MEDIA(params)) && 145 | - (phy->req_line_speed == SPEED_1000)) { 146 | + ((phy->req_line_speed == SPEED_1000) || 147 | + (phy->req_line_speed == SPEED_2500))) { 148 | bnx2x_cl45_read(bp, phy, 149 | MDIO_PMA_DEVAD, 150 | MDIO_PMA_REG_8727_PCS_GP, &val1); 151 | @@ -11720,6 +11760,7 @@ 152 | SUPPORTED_100baseT_Full | 153 | SUPPORTED_1000baseT_Full | 154 | SUPPORTED_1000baseKX_Full | 155 | + SUPPORTED_2500baseX_Full | 156 | SUPPORTED_10000baseT_Full | 157 | SUPPORTED_10000baseKR_Full | 158 | SUPPORTED_20000baseKR2_Full | 159 | @@ -11906,6 +11947,7 @@ 160 | .tx_preemphasis = {0xffff, 0xffff, 0xffff, 0xffff}, 161 | .mdio_ctrl = 0, 162 | .supported = (SUPPORTED_10000baseT_Full | 163 | + SUPPORTED_2500baseX_Full | 164 | SUPPORTED_1000baseT_Full | 165 | SUPPORTED_FIBRE | 166 | SUPPORTED_Pause | 167 | @@ -12253,6 +12295,7 @@ 168 | break; 169 | case PORT_HW_CFG_NET_SERDES_IF_SFI: 170 | phy->supported &= (SUPPORTED_1000baseT_Full | 171 | + SUPPORTED_2500baseX_Full | 172 | SUPPORTED_10000baseT_Full | 173 | SUPPORTED_FIBRE | 174 | SUPPORTED_Pause | 175 | @@ -13937,7 +13980,8 @@ 176 | & PORT_HW_CFG_NET_SERDES_IF_MASK) == 177 | PORT_HW_CFG_NET_SERDES_IF_SFI) { 178 | if (bnx2x_is_sfp_module_plugged(phy, params)) { 179 | - bnx2x_sfp_tx_fault_detection(phy, params, vars); 180 | + if(!((params->port + 1) & bp->mask_tx_fault)) 181 | + bnx2x_sfp_tx_fault_detection(phy, params, vars); 182 | } else if (vars->link_status & 183 | LINK_STATUS_SFP_TX_FAULT) { 184 | /* Clean trail, interrupt corrects the leds */ 185 | diff -Naur a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 186 | --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 2020-09-03 23:21:36.699693373 -0400 187 | +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c 2020-09-03 23:29:22.461340175 -0400 188 | @@ -113,6 +113,10 @@ 189 | module_param(disable_tpa, int, 0444); 190 | MODULE_PARM_DESC(disable_tpa, " Disable the TPA (LRO) feature"); 191 | 192 | +static int mask_tx_fault; 193 | +module_param(mask_tx_fault, int, 0444); 194 | +MODULE_PARM_DESC(mask_tx_fault, " Mask SFP TX fault detection"); 195 | + 196 | static int int_mode; 197 | module_param(int_mode, int, 0444); 198 | MODULE_PARM_DESC(int_mode, " Force interrupt mode other than MSI-X " 199 | @@ -12346,6 +12350,8 @@ 200 | if (BP_NOMCP(bp) && (func == 0)) 201 | dev_err(&bp->pdev->dev, "MCP disabled, must load devices in order!\n"); 202 | 203 | + bp->mask_tx_fault = mask_tx_fault; 204 | + 205 | bp->disable_tpa = disable_tpa; 206 | bp->disable_tpa |= !!IS_MF_STORAGE_ONLY(bp); 207 | /* Reduce memory usage in kdump environment by disabling TPA */ 208 | diff -Naur a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h 209 | --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h 2020-09-03 23:21:36.699693373 -0400 210 | +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_reg.h 2020-09-03 23:22:22.975062197 -0400 211 | @@ -7169,7 +7169,8 @@ 212 | #define MDIO_PMA_REG_8727_PCS_GP 0xc842 213 | #define MDIO_PMA_REG_8727_OPT_CFG_REG 0xc8e4 214 | 215 | -#define MDIO_AN_REG_8727_MISC_CTRL 0x8309 216 | +#define MDIO_AN_REG_8727_MISC_CTRL1 0x8308 217 | +#define MDIO_AN_REG_8727_MISC_CTRL2 0x8309 218 | 219 | #define MDIO_PMA_REG_8073_CHIP_REV 0xc801 220 | #define MDIO_PMA_REG_8073_SPEED_LINK_STATUS 0xc820 221 | -------------------------------------------------------------------------------- /debian/patches/pve/999-ixgbe-add-disabling-NBASE-T-suppression-hack.patch: -------------------------------------------------------------------------------- 1 | diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 2 | index 13c4782b920a..8073d375bde2 100644 3 | --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 4 | +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c 5 | @@ -156,6 +156,11 @@ 6 | MODULE_PARM_DESC(allow_unsupported_sfp, 7 | "Allow unsupported and untested SFP+ modules on 82599-based adapters"); 8 | 9 | +static unsigned int enable_nbase_t_suppression_hack = 0; 10 | +module_param(enable_nbase_t_suppression_hack, uint, 0); 11 | +MODULE_PARM_DESC(enable_nbase_t_suppression_hack, 12 | + "Enable hack which suppresses the advertisement of NBASE-T speeds to accommodate broken network switches"); 13 | + 14 | #define DEFAULT_MSG_ENABLE (NETIF_MSG_DRV|NETIF_MSG_PROBE|NETIF_MSG_LINK) 15 | static int debug = -1; 16 | module_param(debug, int, 0); 17 | @@ -5535,8 +5540,10 @@ 18 | * to accommodate broken network switches in the field 19 | * which cannot cope with advertised NBASE-T speeds 20 | */ 21 | - speed &= ~(IXGBE_LINK_SPEED_5GB_FULL | 22 | - IXGBE_LINK_SPEED_2_5GB_FULL); 23 | + if (enable_nbase_t_suppression_hack) { 24 | + speed &= ~(IXGBE_LINK_SPEED_5GB_FULL | 25 | + IXGBE_LINK_SPEED_2_5GB_FULL); 26 | + } 27 | } 28 | 29 | if (ret) 30 | -------------------------------------------------------------------------------- /debian/patches/series.linux: -------------------------------------------------------------------------------- 1 | ubuntu/0001-apparmor-compatibility-v2.x-net-rules.patch 2 | ubuntu/0002-apparmor-af_unix-mediation.patch 3 | ubuntu/0003-apparmor-fix-apparmor-mediating-locking-non-fs-unix-sockets.patch 4 | ubuntu/0004-apparmor-fix-use-after-free-in-sk_peer_label.patch 5 | 6 | pve/0001-Make-mkcompile_h-accept-an-alternate-timestamp-strin.patch 7 | pve/0002-bridge-keep-MAC-of-first-assigned-port.patch 8 | pve/0003-pci-Enable-overrides-for-missing-ACS-capabilities-4..patch 9 | pve/0004-kvm-disable-default-dynamic-halt-polling-growth.patch 10 | pve/0005-net-core-downgrade-unregister_netdevice-refcount-lea.patch 11 | # pve/0007-Ubuntu-remove-leftover-reference-to-ubuntu-hio-drive.patch # Not in Ubuntu Mainline 12 | pve/0008-KVM-x86-start-moving-SMM-related-functions-to-new-fi.patch 13 | pve/0009-KVM-x86-move-SMM-entry-to-a-new-file.patch 14 | pve/0010-KVM-x86-move-SMM-exit-to-a-new-file.patch 15 | pve/0011-KVM-x86-do-not-go-through-ctxt-ops-when-emulating-rs.patch 16 | pve/0012-KVM-allow-compiling-out-SMM-support.patch 17 | pve/0013-KVM-x86-compile-out-vendor-specific-code-if-SMM-is-d.patch 18 | pve/0014-KVM-x86-remove-SMRAM-address-space-if-SMM-is-not-sup.patch 19 | pve/0015-KVM-x86-do-not-define-KVM_REQ_SMI-if-SMM-disabled.patch 20 | pve/0016-bug-introduce-ASSERT_STRUCT_OFFSET.patch 21 | pve/0017-KVM-x86-emulator-em_sysexit-should-update-ctxt-mode.patch 22 | pve/0018-KVM-x86-emulator-introduce-emulator_recalc_and_set_m.patch 23 | pve/0019-KVM-x86-emulator-update-the-emulation-mode-after-rsm.patch 24 | pve/0020-KVM-x86-emulator-update-the-emulation-mode-after-CR0.patch 25 | pve/0021-KVM-x86-smm-number-of-GPRs-in-the-SMRAM-image-depend.patch 26 | pve/0022-KVM-x86-smm-check-for-failures-on-smm-entry.patch 27 | pve/0023-KVM-x86-smm-add-structs-for-KVM-s-smram-layout.patch 28 | pve/0024-KVM-x86-smm-use-smram-structs-in-the-common-code.patch 29 | pve/0025-KVM-x86-smm-use-smram-struct-for-32-bit-smram-load-r.patch 30 | pve/0026-KVM-x86-smm-use-smram-struct-for-64-bit-smram-load-r.patch 31 | pve/0027-KVM-svm-drop-explicit-return-value-of-kvm_vcpu_map.patch 32 | pve/0028-KVM-x86-SVM-use-smram-structs.patch 33 | pve/0029-KVM-x86-SVM-don-t-save-SVM-state-to-SMRAM-when-VM-is.patch 34 | pve/0030-KVM-x86-smm-preserve-interrupt-shadow-in-SMRAM.patch 35 | pve/650-netfilter-add-xt_FLOWOFFLOAD-target.patch 36 | pve/952-add-net-conntrack-events-support-multiple-registrant.patch 37 | pve/953-bnx2x_warpcore_8727_2_5g_sgmii_txfault.patch 38 | pve/954-add-netfilter-full-cone-nat.patch 39 | pve/999-ixgbe-add-disabling-NBASE-T-suppression-hack.patch 40 | -------------------------------------------------------------------------------- /debian/patches/series.zfs: -------------------------------------------------------------------------------- 1 | zfs/0001-Check-for-META-and-DCH-consistency-in-autoconf.patch 2 | zfs/0002-always-load-ZFS-module-on-boot.patch 3 | zfs/0003-Fix-the-path-to-the-zed-binary-on-the-systemd-unit.patch 4 | zfs/0004-import-with-d-dev-disk-by-id-in-scan-service.patch 5 | zfs/0005-Enable-zed-emails.patch 6 | zfs/0006-dont-symlink-zed-scripts.patch 7 | zfs/0007-Use-installed-python3.patch 8 | zfs/0008-Add-systemd-unit-for-importing-specific-pools.patch 9 | zfs/0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch 10 | zfs/0010-arcstat-Fix-integer-division-with-python3.patch 11 | zfs/0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch 12 | -------------------------------------------------------------------------------- /debian/patches/ubuntu/0001-apparmor-compatibility-v2.x-net-rules.patch: -------------------------------------------------------------------------------- 1 | From f153f512ed7a81e9b92a04d49869cffebf714f52 Mon Sep 17 00:00:00 2001 2 | From: John Johansen 3 | Date: Sun, 17 Jun 2018 03:56:25 -0700 4 | Subject: UBUNTU: SAUCE: apparmor: patch to provide compatibility with v2.x net 5 | rules 6 | 7 | The networking rules upstreamed in 4.17 have a deliberate abi break 8 | with the older 2.x network rules. 9 | 10 | This patch provides compatibility with the older rules for those 11 | still using an apparmor 2.x userspace and still want network rules 12 | to work on a newer kernel. 13 | 14 | Signed-off-by: John Johansen 15 | [ saf: resolve conflicts when rebasing to 4.20 ] 16 | Signed-off-by: Seth Forshee 17 | --- 18 | security/apparmor/apparmorfs.c | 1 + 19 | security/apparmor/include/apparmor.h | 2 +- 20 | security/apparmor/include/net.h | 11 ++++++++ 21 | security/apparmor/include/policy.h | 2 ++ 22 | security/apparmor/net.c | 31 ++++++++++++++++----- 23 | security/apparmor/policy.c | 1 + 24 | security/apparmor/policy_unpack.c | 54 ++++++++++++++++++++++++++++++++++-- 25 | 7 files changed, 92 insertions(+), 10 deletions(-) 26 | 27 | (limited to 'security/apparmor') 28 | 29 | diff --git a/security/apparmor/apparmorfs.c b/security/apparmor/apparmorfs.c 30 | index 2ee3b3d..0aef8e3 100644 31 | --- a/security/apparmor/apparmorfs.c 32 | +++ b/security/apparmor/apparmorfs.c 33 | @@ -2362,6 +2362,7 @@ static struct aa_sfs_entry aa_sfs_entry_features[] = { 34 | AA_SFS_DIR("domain", aa_sfs_entry_domain), 35 | AA_SFS_DIR("file", aa_sfs_entry_file), 36 | AA_SFS_DIR("network_v8", aa_sfs_entry_network), 37 | + AA_SFS_DIR("network", aa_sfs_entry_network_compat), 38 | AA_SFS_DIR("mount", aa_sfs_entry_mount), 39 | AA_SFS_DIR("namespaces", aa_sfs_entry_ns), 40 | AA_SFS_FILE_U64("capability", VFS_CAP_FLAGS_MASK), 41 | diff --git a/security/apparmor/include/apparmor.h b/security/apparmor/include/apparmor.h 42 | index 1fbabdb..5870de2 100644 43 | --- a/security/apparmor/include/apparmor.h 44 | +++ b/security/apparmor/include/apparmor.h 45 | @@ -20,7 +20,7 @@ 46 | #define AA_CLASS_UNKNOWN 1 47 | #define AA_CLASS_FILE 2 48 | #define AA_CLASS_CAP 3 49 | -#define AA_CLASS_DEPRECATED 4 50 | +#define AA_CLASS_NET_COMPAT 4 51 | #define AA_CLASS_RLIMITS 5 52 | #define AA_CLASS_DOMAIN 6 53 | #define AA_CLASS_MOUNT 7 54 | diff --git a/security/apparmor/include/net.h b/security/apparmor/include/net.h 55 | index aadb4b2..98a42ef 100644 56 | --- a/security/apparmor/include/net.h 57 | +++ b/security/apparmor/include/net.h 58 | @@ -68,6 +68,16 @@ struct aa_sk_ctx { 59 | DEFINE_AUDIT_NET(NAME, OP, SK, (SK)->sk_family, (SK)->sk_type, \ 60 | (SK)->sk_protocol) 61 | 62 | +/* struct aa_net - network confinement data 63 | + * @allow: basic network families permissions 64 | + * @audit: which network permissions to force audit 65 | + * @quiet: which network permissions to quiet rejects 66 | + */ 67 | +struct aa_net_compat { 68 | + u16 allow[AF_MAX]; 69 | + u16 audit[AF_MAX]; 70 | + u16 quiet[AF_MAX]; 71 | +}; 72 | 73 | #define af_select(FAMILY, FN, DEF_FN) \ 74 | ({ \ 75 | @@ -87,6 +97,7 @@ struct aa_secmark { 76 | }; 77 | 78 | extern struct aa_sfs_entry aa_sfs_entry_network[]; 79 | +extern struct aa_sfs_entry aa_sfs_entry_network_compat[]; 80 | 81 | void audit_net_cb(struct audit_buffer *ab, void *va); 82 | int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, 83 | diff --git a/security/apparmor/include/policy.h b/security/apparmor/include/policy.h 84 | index b5b4b81..f904105 100644 85 | --- a/security/apparmor/include/policy.h 86 | +++ b/security/apparmor/include/policy.h 87 | @@ -108,6 +108,7 @@ struct aa_data { 88 | * @policy: general match rules governing policy 89 | * @file: The set of rules governing basic file access and domain transitions 90 | * @caps: capabilities for the profile 91 | + * @net_compat: v2 compat network controls for the profile 92 | * @rlimits: rlimits for the profile 93 | * 94 | * @dents: dentries for the profiles file entries in apparmorfs 95 | @@ -145,6 +146,7 @@ struct aa_profile { 96 | struct aa_policydb policy; 97 | struct aa_file_rules file; 98 | struct aa_caps caps; 99 | + struct aa_net_compat *net_compat; 100 | 101 | int xattr_count; 102 | char **xattrs; 103 | diff --git a/security/apparmor/net.c b/security/apparmor/net.c 104 | index e0c1b50..e693df8 100644 105 | --- a/security/apparmor/net.c 106 | +++ b/security/apparmor/net.c 107 | @@ -24,6 +24,11 @@ struct aa_sfs_entry aa_sfs_entry_network[] = { 108 | { } 109 | }; 110 | 111 | +struct aa_sfs_entry aa_sfs_entry_network_compat[] = { 112 | + AA_SFS_FILE_STRING("af_mask", AA_SFS_AF_MASK), 113 | + { } 114 | +}; 115 | + 116 | static const char * const net_mask_names[] = { 117 | "unknown", 118 | "send", 119 | @@ -118,14 +123,26 @@ int aa_profile_af_perm(struct aa_profile *profile, struct common_audit_data *sa, 120 | if (profile_unconfined(profile)) 121 | return 0; 122 | state = PROFILE_MEDIATES(profile, AA_CLASS_NET); 123 | - if (!state) 124 | + if (state) { 125 | + if (!state) 126 | + return 0; 127 | + buffer[0] = cpu_to_be16(family); 128 | + buffer[1] = cpu_to_be16((u16) type); 129 | + state = aa_dfa_match_len(profile->policy.dfa, state, 130 | + (char *) &buffer, 4); 131 | + aa_compute_perms(profile->policy.dfa, state, &perms); 132 | + } else if (profile->net_compat) { 133 | + /* 2.x socket mediation compat */ 134 | + perms.allow = (profile->net_compat->allow[family] & (1 << type)) ? 135 | + ALL_PERMS_MASK : 0; 136 | + perms.audit = (profile->net_compat->audit[family] & (1 << type)) ? 137 | + ALL_PERMS_MASK : 0; 138 | + perms.quiet = (profile->net_compat->quiet[family] & (1 << type)) ? 139 | + ALL_PERMS_MASK : 0; 140 | + 141 | + } else { 142 | return 0; 143 | - 144 | - buffer[0] = cpu_to_be16(family); 145 | - buffer[1] = cpu_to_be16((u16) type); 146 | - state = aa_dfa_match_len(profile->policy.dfa, state, (char *) &buffer, 147 | - 4); 148 | - aa_compute_perms(profile->policy.dfa, state, &perms); 149 | + } 150 | aa_apply_modes_to_perms(profile, &perms); 151 | 152 | return aa_check_perms(profile, &perms, request, sa, audit_net_cb); 153 | diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c 154 | index 4c010c9..a00e39b 100644 155 | --- a/security/apparmor/policy.c 156 | +++ b/security/apparmor/policy.c 157 | @@ -222,6 +222,7 @@ void aa_free_profile(struct aa_profile *profile) 158 | aa_free_file_rules(&profile->file); 159 | aa_free_cap_rules(&profile->caps); 160 | aa_free_rlimit_rules(&profile->rlimits); 161 | + kfree_sensitive(profile->net_compat); 162 | 163 | for (i = 0; i < profile->xattr_count; i++) 164 | kfree_sensitive(profile->xattrs[i]); 165 | diff --git a/security/apparmor/policy_unpack.c b/security/apparmor/policy_unpack.c 166 | index 4e1f96b..aedfecc 100644 167 | --- a/security/apparmor/policy_unpack.c 168 | +++ b/security/apparmor/policy_unpack.c 169 | @@ -34,7 +34,7 @@ 170 | 171 | #define v5 5 /* base version */ 172 | #define v6 6 /* per entry policydb mediation check */ 173 | -#define v7 7 174 | +#define v7 7 /* v2 compat networking */ 175 | #define v8 8 /* full network masking */ 176 | 177 | /* 178 | @@ -314,6 +314,19 @@ fail: 179 | return false; 180 | } 181 | 182 | +static bool unpack_u16(struct aa_ext *e, u16 *data, const char *name) 183 | +{ 184 | + if (unpack_nameX(e, AA_U16, name)) { 185 | + if (!inbounds(e, sizeof(u16))) 186 | + return 0; 187 | + if (data) 188 | + *data = le16_to_cpu(get_unaligned((__le16 *) e->pos)); 189 | + e->pos += sizeof(u16); 190 | + return 1; 191 | + } 192 | + return 0; 193 | +} 194 | + 195 | static bool unpack_u32(struct aa_ext *e, u32 *data, const char *name) 196 | { 197 | void *pos = e->pos; 198 | @@ -676,7 +689,7 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) 199 | struct aa_profile *profile = NULL; 200 | const char *tmpname, *tmpns = NULL, *name = NULL; 201 | const char *info = "failed to unpack profile"; 202 | - size_t ns_len; 203 | + size_t size = 0, ns_len; 204 | struct rhashtable_params params = { 0 }; 205 | char *key = NULL; 206 | struct aa_data *data; 207 | @@ -823,6 +836,43 @@ static struct aa_profile *unpack_profile(struct aa_ext *e, char **ns_name) 208 | goto fail; 209 | } 210 | 211 | + size = unpack_array(e, "net_allowed_af"); 212 | + if (size || VERSION_LT(e->version, v8)) { 213 | + profile->net_compat = kzalloc(sizeof(struct aa_net_compat), GFP_KERNEL); 214 | + if (!profile->net_compat) { 215 | + info = "out of memory"; 216 | + goto fail; 217 | + } 218 | + for (i = 0; i < size; i++) { 219 | + /* discard extraneous rules that this kernel will 220 | + * never request 221 | + */ 222 | + if (i >= AF_MAX) { 223 | + u16 tmp; 224 | + 225 | + if (!unpack_u16(e, &tmp, NULL) || 226 | + !unpack_u16(e, &tmp, NULL) || 227 | + !unpack_u16(e, &tmp, NULL)) 228 | + goto fail; 229 | + continue; 230 | + } 231 | + if (!unpack_u16(e, &profile->net_compat->allow[i], NULL)) 232 | + goto fail; 233 | + if (!unpack_u16(e, &profile->net_compat->audit[i], NULL)) 234 | + goto fail; 235 | + if (!unpack_u16(e, &profile->net_compat->quiet[i], NULL)) 236 | + goto fail; 237 | + } 238 | + if (size && !unpack_nameX(e, AA_ARRAYEND, NULL)) 239 | + goto fail; 240 | + if (VERSION_LT(e->version, v7)) { 241 | + /* pre v7 policy always allowed these */ 242 | + profile->net_compat->allow[AF_UNIX] = 0xffff; 243 | + profile->net_compat->allow[AF_NETLINK] = 0xffff; 244 | + } 245 | + } 246 | + 247 | + 248 | if (unpack_nameX(e, AA_STRUCT, "policydb")) { 249 | /* generic policy dfa - optional and may be NULL */ 250 | info = "failed to unpack policydb"; 251 | -------------------------------------------------------------------------------- /debian/patches/ubuntu/0003-apparmor-fix-apparmor-mediating-locking-non-fs-unix-sockets.patch: -------------------------------------------------------------------------------- 1 | From d7410054784d8aa0e313f9eeb6110a791420f3d3 Mon Sep 17 00:00:00 2001 2 | From: John Johansen 3 | Date: Mon, 30 Jul 2018 13:55:30 -0700 4 | Subject: UBUNTU SAUCE: apparmor: fix apparmor mediating locking non-fs, unix 5 | sockets 6 | 7 | The apparmor policy language current does not allow expressing of the 8 | locking permission for no-fs unix sockets. However the kernel is 9 | enforcing mediation. 10 | 11 | Add the AA_MAY_LOCK perm to the computed perm mask which will grant 12 | permission for all current abi profiles, but still allow specifying 13 | auditing of the operation if needed. 14 | 15 | BugLink: http://bugs.launchpad.net/bugs/1780227 16 | Signed-off-by: John Johansen 17 | Acked-by: Acked-by: Stefan Bader 18 | Acked-by: Acked-by: Kleber Sacilotto de Souza 19 | Signed-off-by: Seth Forshee 20 | --- 21 | security/apparmor/lib.c | 2 +- 22 | 1 file changed, 1 insertion(+), 1 deletion(-) 23 | 24 | (limited to 'security/apparmor') 25 | 26 | diff --git a/security/apparmor/lib.c b/security/apparmor/lib.c 27 | index fa49b81..bf72843 100644 28 | --- a/security/apparmor/lib.c 29 | +++ b/security/apparmor/lib.c 30 | @@ -334,7 +334,7 @@ void aa_compute_perms(struct aa_dfa *dfa, unsigned int state, 31 | /* for v5 perm mapping in the policydb, the other set is used 32 | * to extend the general perm set 33 | */ 34 | - perms->allow |= map_other(dfa_other_allow(dfa, state)); 35 | + perms->allow |= map_other(dfa_other_allow(dfa, state)) | AA_MAY_LOCK; 36 | perms->audit |= map_other(dfa_other_audit(dfa, state)); 37 | perms->quiet |= map_other(dfa_other_quiet(dfa, state)); 38 | // perms->xindex = dfa_user_xindex(dfa, state); 39 | -------------------------------------------------------------------------------- /debian/patches/ubuntu/0004-apparmor-fix-use-after-free-in-sk_peer_label.patch: -------------------------------------------------------------------------------- 1 | From e9243f6a285589f49161faf0f96f4cf15c1dafae Mon Sep 17 00:00:00 2001 2 | From: John Johansen 3 | Date: Tue, 26 Jun 2018 20:19:19 -0700 4 | Subject: UBUNTU: SAUCE: apparmor: fix use after free in sk_peer_label 5 | 6 | BugLink: http://bugs.launchpad.net/bugs/1778646 7 | Signed-off-by: John Johansen 8 | Signed-off-by: Seth Forshee 9 | --- 10 | security/apparmor/lsm.c | 11 +++++++---- 11 | 1 file changed, 7 insertions(+), 4 deletions(-) 12 | 13 | (limited to 'security/apparmor') 14 | 15 | diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c 16 | index 59a8ddd..b1216ee 100644 17 | --- a/security/apparmor/lsm.c 18 | +++ b/security/apparmor/lsm.c 19 | @@ -1162,9 +1162,10 @@ static struct aa_label *sk_peer_label(struct sock *sk) 20 | { 21 | struct sock *peer_sk; 22 | struct aa_sk_ctx *ctx = SK_CTX(sk); 23 | + struct aa_label *label = ERR_PTR(-ENOPROTOOPT); 24 | 25 | if (ctx->peer) 26 | - return ctx->peer; 27 | + return aa_get_label(ctx->peer); 28 | 29 | if (sk->sk_family != PF_UNIX) 30 | return ERR_PTR(-ENOPROTOOPT); 31 | @@ -1172,14 +1173,15 @@ static struct aa_label *sk_peer_label(struct sock *sk) 32 | /* check for sockpair peering which does not go through 33 | * security_unix_stream_connect 34 | */ 35 | - peer_sk = unix_peer(sk); 36 | + peer_sk = unix_peer_get(sk); 37 | if (peer_sk) { 38 | ctx = SK_CTX(peer_sk); 39 | if (ctx->label) 40 | - return ctx->label; 41 | + label = aa_get_label(ctx->label); 42 | + sock_put(peer_sk); 43 | } 44 | 45 | - return ERR_PTR(-ENOPROTOOPT); 46 | + return label; 47 | } 48 | 49 | /** 50 | @@ -1223,6 +1225,7 @@ out: 51 | 52 | } 53 | 54 | + aa_put_label(peer); 55 | done: 56 | end_current_label_crit_section(label); 57 | -------------------------------------------------------------------------------- /debian/patches/zfs/0001-Check-for-META-and-DCH-consistency-in-autoconf.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Debian ZFS on Linux maintainers 3 | 4 | Date: Wed, 30 Jan 2019 15:12:04 +0100 5 | Subject: [PATCH] Check-for-META-and-DCH-consistency-in-autoconf 6 | 7 | Signed-off-by: Thomas Lamprecht 8 | --- 9 | config/zfs-meta.m4 | 34 +++++++++++++++++++++++++++++----- 10 | 1 file changed, 29 insertions(+), 5 deletions(-) 11 | 12 | diff --git a/config/zfs-meta.m4 b/config/zfs-meta.m4 13 | index 20064a0fb..4d5f545ad 100644 14 | --- a/config/zfs-meta.m4 15 | +++ b/config/zfs-meta.m4 16 | @@ -1,9 +1,10 @@ 17 | dnl # 18 | dnl # DESCRIPTION: 19 | -dnl # Read meta data from the META file. When building from a git repository 20 | -dnl # the ZFS_META_RELEASE field will be overwritten if there is an annotated 21 | -dnl # tag matching the form ZFS_META_NAME-ZFS_META_VERSION-*. This allows 22 | -dnl # for working builds to be uniquely identified using the git commit hash. 23 | +dnl # Read meta data from the META file or the debian/changelog file if it 24 | +dnl # exists. When building from a git repository the ZFS_META_RELEASE field 25 | +dnl # will be overwritten if there is an annotated tag matching the form 26 | +dnl # ZFS_META_NAME-ZFS_META_VERSION-*. This allows for working builds to be 27 | +dnl # uniquely identified using the git commit hash. 28 | dnl # 29 | dnl # The META file format is as follows: 30 | dnl # ^[ ]*KEY:[ \t]+VALUE$ 31 | @@ -49,6 +50,7 @@ AC_DEFUN([ZFS_AC_META], [ 32 | _zfs_ac_meta_type="none" 33 | if test -f "$META"; then 34 | _zfs_ac_meta_type="META file" 35 | + _dpkg_parsechangelog=$(dpkg-parsechangelog 2>/dev/null) 36 | 37 | ZFS_META_NAME=_ZFS_AC_META_GETVAL([(Name|Project|Package)]); 38 | if test -n "$ZFS_META_NAME"; then 39 | @@ -69,8 +71,30 @@ AC_DEFUN([ZFS_AC_META], [ 40 | AC_SUBST([ZFS_META_VERSION]) 41 | fi 42 | 43 | + if test -n "${_dpkg_parsechangelog}"; then 44 | + _dpkg_version=$(echo "${_dpkg_parsechangelog}" \ 45 | + | $AWK '$[]1 == "Version:" { print $[]2; }' \ 46 | + | cut -d- -f1) 47 | + if test "${_dpkg_version}" != "$ZFS_META_VERSION"; then 48 | + AC_MSG_ERROR([ 49 | + *** Version $ZFS_META_VERSION in the META file is different than 50 | + *** version $_dpkg_version in the debian/changelog file. DKMS and DEB 51 | + *** packaging require that these files have the same version. 52 | + ]) 53 | + fi 54 | + fi 55 | + 56 | ZFS_META_RELEASE=_ZFS_AC_META_GETVAL([Release]); 57 | - if test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then 58 | + 59 | + if test -n "${_dpkg_parsechangelog}"; then 60 | + _dpkg_release=$(echo "${_dpkg_parsechangelog}" \ 61 | + | $AWK '$[]1 == "Version:" { print $[]2; }' \ 62 | + | cut -d- -f2-) 63 | + if test -n "${_dpkg_release}"; then 64 | + ZFS_META_RELEASE=${_dpkg_release} 65 | + _zfs_ac_meta_type="dpkg-parsechangelog" 66 | + fi 67 | + elif test ! -f ".nogitrelease" && git rev-parse --git-dir > /dev/null 2>&1; then 68 | _match="${ZFS_META_NAME}-${ZFS_META_VERSION}" 69 | _alias=$(git describe --match=${_match} 2>/dev/null) 70 | _release=$(echo ${_alias}|sed "s/${ZFS_META_NAME}//"|cut -f3- -d'-'|tr - _) -------------------------------------------------------------------------------- /debian/patches/zfs/0002-always-load-ZFS-module-on-boot.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= 3 | Date: Fri, 19 Jan 2018 12:13:46 +0100 4 | Subject: [PATCH] always load ZFS module on boot 5 | MIME-Version: 1.0 6 | Content-Type: text/plain; charset=UTF-8 7 | Content-Transfer-Encoding: 8bit 8 | 9 | since zfs-import-scan.service is disabled by default, and 10 | zfs-import-cache.service only gets started if a cache file exists, this 11 | is needed for zfs-mount, zfs-share and zfs-zed services in case ZFS is 12 | not actually used. 13 | 14 | Signed-off-by: Fabian Grünbichler 15 | Signed-off-by: Stoiko Ivanov 16 | Signed-off-by: Thomas Lamprecht 17 | --- 18 | etc/modules-load.d/zfs.conf | 2 +- 19 | 1 file changed, 1 insertion(+), 1 deletion(-) 20 | 21 | diff --git a/etc/modules-load.d/zfs.conf b/etc/modules-load.d/zfs.conf 22 | index 44e1bb3ed..7509b03cb 100644 23 | --- a/etc/modules-load.d/zfs.conf 24 | +++ b/etc/modules-load.d/zfs.conf 25 | @@ -1,3 +1,3 @@ 26 | # The default behavior is to allow udev to load the kernel modules on demand. 27 | # Uncomment the following line to unconditionally load them at boot. 28 | -#zfs 29 | +zfs 30 | -------------------------------------------------------------------------------- /debian/patches/zfs/0003-Fix-the-path-to-the-zed-binary-on-the-systemd-unit.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= 3 | Date: Mon, 6 Feb 2017 12:04:35 +0100 4 | Subject: [PATCH] Fix the path to the zed binary on the systemd unit. 5 | 6 | We install zed into /usr/sbin manually meanwhile the upstream default is 7 | installing it into /sbin. Ubuntu packages also install zed to /usr/sbin, but 8 | they ship their own zfs-zed unit. 9 | 10 | Signed-off-by: Thomas Lamprecht 11 | --- 12 | etc/systemd/system/zfs-zed.service.in | 2 +- 13 | 1 file changed, 1 insertion(+), 1 deletion(-) 14 | 15 | diff --git a/etc/systemd/system/zfs-zed.service.in b/etc/systemd/system/zfs-zed.service.in 16 | index 008075138..570e27707 100644 17 | --- a/etc/systemd/system/zfs-zed.service.in 18 | +++ b/etc/systemd/system/zfs-zed.service.in 19 | @@ -4,7 +4,7 @@ Documentation=man:zed(8) 20 | ConditionPathIsDirectory=/sys/module/zfs 21 | 22 | [Service] 23 | -ExecStart=@sbindir@/zed -F 24 | +ExecStart=/usr/sbin/zed -F 25 | Restart=on-abort 26 | 27 | [Install] 28 | -------------------------------------------------------------------------------- /debian/patches/zfs/0004-import-with-d-dev-disk-by-id-in-scan-service.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Fabian=20Gr=C3=BCnbichler?= 3 | Date: Mon, 24 Oct 2016 13:47:06 +0200 4 | Subject: [PATCH] import with -d /dev/disk/by-id in scan service 5 | MIME-Version: 1.0 6 | Content-Type: text/plain; charset=UTF-8 7 | Content-Transfer-Encoding: 8bit 8 | 9 | Signed-off-by: Fabian Grünbichler 10 | Signed-off-by: Stoiko Ivanov 11 | Signed-off-by: Thomas Lamprecht 12 | --- 13 | etc/systemd/system/zfs-import-scan.service.in | 2 +- 14 | 1 file changed, 1 insertion(+), 1 deletion(-) 15 | 16 | diff --git a/etc/systemd/system/zfs-import-scan.service.in b/etc/systemd/system/zfs-import-scan.service.in 17 | index c1111c73a..c5e12c2c6 100644 18 | --- a/etc/systemd/system/zfs-import-scan.service.in 19 | +++ b/etc/systemd/system/zfs-import-scan.service.in 20 | @@ -13,7 +13,7 @@ ConditionPathIsDirectory=/sys/module/zfs 21 | [Service] 22 | Type=oneshot 23 | RemainAfterExit=yes 24 | -ExecStart=@sbindir@/zpool import -aN -o cachefile=none $ZPOOL_IMPORT_OPTS 25 | +ExecStart=@sbindir@/zpool import -aN -d /dev/disk/by-id -o cachefile=none $ZPOOL_IMPORT_OPTS 26 | 27 | [Install] 28 | WantedBy=zfs-import.target 29 | -------------------------------------------------------------------------------- /debian/patches/zfs/0005-Enable-zed-emails.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Richard Laager 3 | Date: Wed, 30 Jan 2019 15:12:04 +0100 4 | Subject: [PATCH] Enable zed emails 5 | 6 | The OpenZFS event daemon monitors pools. This patch enables the email sending 7 | function by default (if zed is installed). This is consistent with the default 8 | behavior of mdadm. 9 | 10 | Signed-off-by: Thomas Lamprecht 11 | --- 12 | cmd/zed/zed.d/zed.rc | 2 +- 13 | 1 file changed, 1 insertion(+), 1 deletion(-) 14 | 15 | diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc 16 | index 9ac77f929..672617f54 100644 17 | --- a/cmd/zed/zed.d/zed.rc 18 | +++ b/cmd/zed/zed.d/zed.rc 19 | @@ -41,7 +41,7 @@ ZED_EMAIL_ADDR="root" 20 | ## 21 | # Minimum number of seconds between notifications for a similar event. 22 | # 23 | -#ZED_NOTIFY_INTERVAL_SECS=3600 24 | +ZED_NOTIFY_INTERVAL_SECS=3600 25 | 26 | ## 27 | # Notification verbosity. -------------------------------------------------------------------------------- /debian/patches/zfs/0006-dont-symlink-zed-scripts.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Antonio Russo 3 | Date: Fri, 20 Mar 2020 17:28:43 +0100 4 | Subject: [PATCH] dont symlink zed scripts 5 | 6 | (cherry picked and adapted from 5cee380324d74e640d5dd7a360faba3994c8007f [0]) 7 | 8 | [0] https://salsa.debian.org/zfsonlinux-team/zfs.git 9 | 10 | Signed-off-by: Stoiko Ivanov 11 | 12 | Description: track default symlinks, instead of symlinking 13 | Forwarded: no need 14 | --- 15 | cmd/zed/zed.d/Makefile.am | 2 +- 16 | 1 file changed, 1 insertion(+), 1 deletion(-) 17 | 18 | diff --git a/cmd/zed/zed.d/Makefile.am b/cmd/zed/zed.d/Makefile.am 19 | index 8b2d0c200..118c96547 100644 20 | --- a/cmd/zed/zed.d/Makefile.am 21 | +++ b/cmd/zed/zed.d/Makefile.am 22 | @@ -48,6 +48,6 @@ install-data-hook: 23 | for f in $(zedconfdefaults); do \ 24 | test -f "$(DESTDIR)$(zedconfdir)/$${f}" -o \ 25 | -L "$(DESTDIR)$(zedconfdir)/$${f}" || \ 26 | - ln -s "$(zedexecdir)/$${f}" "$(DESTDIR)$(zedconfdir)"; \ 27 | + echo "$${f}" >> "$(DESTDIR)$(zedexecdir)/DEFAULT-ENABLED" ; \ 28 | done 29 | chmod 0600 "$(DESTDIR)$(zedconfdir)/zed.rc" 30 | -------------------------------------------------------------------------------- /debian/patches/zfs/0007-Use-installed-python3.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Antonio Russo 3 | Date: Tue, 5 May 2020 22:15:16 -0600 4 | Subject: [PATCH] Use installed python3 5 | 6 | --- 7 | .../functional/cli_root/zfs_program/zfs_program_json.ksh | 6 +++--- 8 | .../tests/functional/rsend/send_encrypted_files.ksh | 2 +- 9 | .../tests/functional/rsend/send_realloc_dnode_size.ksh | 2 +- 10 | 3 files changed, 5 insertions(+), 5 deletions(-) 11 | 12 | diff --git a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh 13 | index 3788543b0..c7ee4ae9a 100755 14 | --- a/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh 15 | +++ b/tests/zfs-tests/tests/functional/cli_root/zfs_program/zfs_program_json.ksh 16 | @@ -100,10 +100,10 @@ typeset -a pos_cmds_out=( 17 | # the same as the input and the --sort-keys option was added. Detect when 18 | # --sort-keys is supported and apply the option to ensure the expected order. 19 | # 20 | -if python -m json.tool --sort-keys <<< "{}"; then 21 | - JSON_TOOL_CMD="python -m json.tool --sort-keys" 22 | +if python3 -m json.tool --sort-keys <<< "{}"; then 23 | + JSON_TOOL_CMD="python3 -m json.tool --sort-keys" 24 | else 25 | - JSON_TOOL_CMD="python -m json.tool" 26 | + JSON_TOOL_CMD="python3 -m json.tool" 27 | fi 28 | 29 | typeset -i cnt=0 30 | diff --git a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh 31 | index f89cb3b31..375d483f7 100755 32 | --- a/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh 33 | +++ b/tests/zfs-tests/tests/functional/rsend/send_encrypted_files.ksh 34 | @@ -87,7 +87,7 @@ log_must xattrtest -f 10 -x 3 -s 32768 -r -k -p /$TESTPOOL/$TESTFS2/xattrsadir 35 | # ZoL issue #7432 36 | log_must zfs set compression=on xattr=sa $TESTPOOL/$TESTFS2 37 | log_must touch /$TESTPOOL/$TESTFS2/attrs 38 | -log_must eval "python -c 'print \"a\" * 4096' | \ 39 | +log_must eval "python3 -c 'print \"a\" * 4096' | \ 40 | set_xattr_stdin bigval /$TESTPOOL/$TESTFS2/attrs" 41 | log_must zfs set compression=off xattr=on $TESTPOOL/$TESTFS2 42 | 43 | diff --git a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh 44 | index 394fe95bb..43560aac5 100755 45 | --- a/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh 46 | +++ b/tests/zfs-tests/tests/functional/rsend/send_realloc_dnode_size.ksh 47 | @@ -93,7 +93,7 @@ log_must zfs snapshot $POOL/fs@c 48 | # 4. Create an empty file and add xattrs to it to exercise reclaiming a 49 | # dnode that requires more than 1 slot for its bonus buffer (Zol #7433) 50 | log_must zfs set compression=on xattr=sa $POOL/fs 51 | -log_must eval "python -c 'print \"a\" * 512' | 52 | +log_must eval "python3 -c 'print \"a\" * 512' | 53 | set_xattr_stdin bigval /$POOL/fs/attrs" 54 | log_must zfs snapshot $POOL/fs@d 55 | 56 | -------------------------------------------------------------------------------- /debian/patches/zfs/0008-Add-systemd-unit-for-importing-specific-pools.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Stoiko Ivanov 3 | Date: Tue, 15 Sep 2020 19:07:24 +0200 4 | Subject: [PATCH] Add systemd-unit for importing specific pools 5 | 6 | The unit can be instantiated with a specific poolname, which will get imported 7 | by scanning /dev/disk/by-id, irrespective of the existence and content of 8 | /etc/zfs/zpool.cache. 9 | 10 | the instance name is used unescaped (see systemd.unit(5)), since zpool names 11 | can contain characters which will be escaped by systemd. 12 | 13 | Signed-off-by: Stoiko Ivanov 14 | --- 15 | etc/systemd/system/50-zfs.preset.in | 1 + 16 | etc/systemd/system/Makefile.am | 1 + 17 | etc/systemd/system/zfs-import@.service.in | 16 ++++++++++++++++ 18 | 3 files changed, 18 insertions(+) 19 | create mode 100644 etc/systemd/system/zfs-import@.service.in 20 | 21 | diff --git a/etc/systemd/system/50-zfs.preset.in b/etc/systemd/system/50-zfs.preset.in 22 | index e4056a92c..030611419 100644 23 | --- a/etc/systemd/system/50-zfs.preset.in 24 | +++ b/etc/systemd/system/50-zfs.preset.in 25 | @@ -1,6 +1,7 @@ 26 | # ZFS is enabled by default 27 | enable zfs-import-cache.service 28 | disable zfs-import-scan.service 29 | +enable zfs-import@.service 30 | enable zfs-import.target 31 | enable zfs-mount.service 32 | enable zfs-share.service 33 | diff --git a/etc/systemd/system/Makefile.am b/etc/systemd/system/Makefile.am 34 | index 5e65e1db4..8e6baeb68 100644 35 | --- a/etc/systemd/system/Makefile.am 36 | +++ b/etc/systemd/system/Makefile.am 37 | @@ -7,6 +7,7 @@ systemdunit_DATA = \ 38 | zfs-zed.service \ 39 | zfs-import-cache.service \ 40 | zfs-import-scan.service \ 41 | + zfs-import@.service \ 42 | zfs-mount.service \ 43 | zfs-share.service \ 44 | zfs-volume-wait.service \ 45 | diff --git a/etc/systemd/system/zfs-import@.service.in b/etc/systemd/system/zfs-import@.service.in 46 | new file mode 100644 47 | index 000000000..9b4ee9371 48 | --- /dev/null 49 | +++ b/etc/systemd/system/zfs-import@.service.in 50 | @@ -0,0 +1,16 @@ 51 | +[Unit] 52 | +Description=Import ZFS pool %i 53 | +Documentation=man:zpool(8) 54 | +DefaultDependencies=no 55 | +After=systemd-udev-settle.service 56 | +After=cryptsetup.target 57 | +After=multipathd.target 58 | +Before=zfs-import.target 59 | + 60 | +[Service] 61 | +Type=oneshot 62 | +RemainAfterExit=yes 63 | +ExecStart=@sbindir@/zpool import -N -d /dev/disk/by-id -o cachefile=none %I 64 | + 65 | +[Install] 66 | +WantedBy=zfs-import.target 67 | -------------------------------------------------------------------------------- /debian/patches/zfs/0009-Patch-move-manpage-arcstat-1-to-arcstat-8.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Stoiko Ivanov 3 | Date: Thu, 4 Feb 2021 19:01:12 +0100 4 | Subject: [PATCH] Patch: move manpage arcstat(1) to arcstat(8). 5 | 6 | Originally-By: Mo Zhou 7 | Originally-By: Antonio Russo 8 | 9 | Signed-off-by: Stoiko Ivanov 10 | --- 11 | diff --git a/man/Makefile.am b/man/Makefile.am 12 | index 8ab1b7572..5485076f9 100644 13 | --- a/man/Makefile.am 14 | +++ b/man/Makefile.am 15 | @@ -8,7 +8,7 @@ dist_man_MANS = \ 16 | man1/ztest.1 \ 17 | man1/raidz_test.1 \ 18 | man1/zvol_wait.1 \ 19 | - man1/arcstat.1 \ 20 | + man8/arcstat.8 \ 21 | \ 22 | man5/vdev_id.conf.5 \ 23 | \ 24 | diff --git a/man/man1/arcstat.1 b/man/man1/arcstat.1 25 | index a69cd8937..dfe9c971b 100644 26 | --- a/man/man1/arcstat.1 27 | +++ b/man/man1/arcstat.1 28 | @@ -13,7 +13,7 @@ 29 | .\" Copyright (c) 2020 by AJ Jordan. All rights reserved. 30 | .\" 31 | .Dd May 26, 2021 32 | -.Dt ARCSTAT 1 33 | +.Dt ARCSTAT 8 34 | .Os 35 | . 36 | .Sh NAME 37 | -------------------------------------------------------------------------------- /debian/patches/zfs/0010-arcstat-Fix-integer-division-with-python3.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Valmiky Arquissandas 3 | Date: Fri, 8 Oct 2021 16:32:27 +0100 4 | Subject: [PATCH] arcstat: Fix integer division with python3 5 | 6 | The arcstat script requests compatibility with python2 and python3, but 7 | PEP 238 modified the / operator and results in erroneous output when 8 | run under python3. 9 | 10 | This commit replaces instances of / with //, yielding the expected 11 | result in both versions of Python. 12 | 13 | Reviewed-by: Brian Behlendorf 14 | Reviewed-by: John Kennedy 15 | Reviewed-by: Ryan Moeller 16 | Signed-off-by: Valmiky Arquissandas 17 | Closes #12603 18 | (cherry picked from commit 2d02bba23d83ae8fede8d281edc255f01ccd28e9) 19 | Signed-off-by: Thomas Lamprecht 20 | --- 21 | cmd/arcstat/arcstat.in | 66 +++++++++++++++++++++--------------------- 22 | 1 file changed, 33 insertions(+), 33 deletions(-) 23 | 24 | diff --git a/cmd/arcstat/arcstat.in b/cmd/arcstat/arcstat.in 25 | index 9e7c52a6c..cd9a803a2 100755 26 | --- a/cmd/arcstat/arcstat.in 27 | +++ b/cmd/arcstat/arcstat.in 28 | @@ -441,73 +441,73 @@ def calculate(): 29 | 30 | v = dict() 31 | v["time"] = time.strftime("%H:%M:%S", time.localtime()) 32 | - v["hits"] = d["hits"] / sint 33 | - v["miss"] = d["misses"] / sint 34 | + v["hits"] = d["hits"] // sint 35 | + v["miss"] = d["misses"] // sint 36 | v["read"] = v["hits"] + v["miss"] 37 | - v["hit%"] = 100 * v["hits"] / v["read"] if v["read"] > 0 else 0 38 | + v["hit%"] = 100 * v["hits"] // v["read"] if v["read"] > 0 else 0 39 | v["miss%"] = 100 - v["hit%"] if v["read"] > 0 else 0 40 | 41 | - v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) / sint 42 | - v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) / sint 43 | + v["dhit"] = (d["demand_data_hits"] + d["demand_metadata_hits"]) // sint 44 | + v["dmis"] = (d["demand_data_misses"] + d["demand_metadata_misses"]) // sint 45 | 46 | v["dread"] = v["dhit"] + v["dmis"] 47 | - v["dh%"] = 100 * v["dhit"] / v["dread"] if v["dread"] > 0 else 0 48 | + v["dh%"] = 100 * v["dhit"] // v["dread"] if v["dread"] > 0 else 0 49 | v["dm%"] = 100 - v["dh%"] if v["dread"] > 0 else 0 50 | 51 | - v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) / sint 52 | + v["phit"] = (d["prefetch_data_hits"] + d["prefetch_metadata_hits"]) // sint 53 | v["pmis"] = (d["prefetch_data_misses"] + 54 | - d["prefetch_metadata_misses"]) / sint 55 | + d["prefetch_metadata_misses"]) // sint 56 | 57 | v["pread"] = v["phit"] + v["pmis"] 58 | - v["ph%"] = 100 * v["phit"] / v["pread"] if v["pread"] > 0 else 0 59 | + v["ph%"] = 100 * v["phit"] // v["pread"] if v["pread"] > 0 else 0 60 | v["pm%"] = 100 - v["ph%"] if v["pread"] > 0 else 0 61 | 62 | v["mhit"] = (d["prefetch_metadata_hits"] + 63 | - d["demand_metadata_hits"]) / sint 64 | + d["demand_metadata_hits"]) // sint 65 | v["mmis"] = (d["prefetch_metadata_misses"] + 66 | - d["demand_metadata_misses"]) / sint 67 | + d["demand_metadata_misses"]) // sint 68 | 69 | v["mread"] = v["mhit"] + v["mmis"] 70 | - v["mh%"] = 100 * v["mhit"] / v["mread"] if v["mread"] > 0 else 0 71 | + v["mh%"] = 100 * v["mhit"] // v["mread"] if v["mread"] > 0 else 0 72 | v["mm%"] = 100 - v["mh%"] if v["mread"] > 0 else 0 73 | 74 | v["arcsz"] = cur["size"] 75 | v["size"] = cur["size"] 76 | v["c"] = cur["c"] 77 | - v["mfu"] = d["mfu_hits"] / sint 78 | - v["mru"] = d["mru_hits"] / sint 79 | - v["mrug"] = d["mru_ghost_hits"] / sint 80 | - v["mfug"] = d["mfu_ghost_hits"] / sint 81 | - v["eskip"] = d["evict_skip"] / sint 82 | - v["el2skip"] = d["evict_l2_skip"] / sint 83 | - v["el2cach"] = d["evict_l2_cached"] / sint 84 | - v["el2el"] = d["evict_l2_eligible"] / sint 85 | - v["el2mfu"] = d["evict_l2_eligible_mfu"] / sint 86 | - v["el2mru"] = d["evict_l2_eligible_mru"] / sint 87 | - v["el2inel"] = d["evict_l2_ineligible"] / sint 88 | - v["mtxmis"] = d["mutex_miss"] / sint 89 | + v["mfu"] = d["mfu_hits"] // sint 90 | + v["mru"] = d["mru_hits"] // sint 91 | + v["mrug"] = d["mru_ghost_hits"] // sint 92 | + v["mfug"] = d["mfu_ghost_hits"] // sint 93 | + v["eskip"] = d["evict_skip"] // sint 94 | + v["el2skip"] = d["evict_l2_skip"] // sint 95 | + v["el2cach"] = d["evict_l2_cached"] // sint 96 | + v["el2el"] = d["evict_l2_eligible"] // sint 97 | + v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint 98 | + v["el2mru"] = d["evict_l2_eligible_mru"] // sint 99 | + v["el2inel"] = d["evict_l2_ineligible"] // sint 100 | + v["mtxmis"] = d["mutex_miss"] // sint 101 | 102 | if l2exist: 103 | - v["l2hits"] = d["l2_hits"] / sint 104 | - v["l2miss"] = d["l2_misses"] / sint 105 | + v["l2hits"] = d["l2_hits"] // sint 106 | + v["l2miss"] = d["l2_misses"] // sint 107 | v["l2read"] = v["l2hits"] + v["l2miss"] 108 | - v["l2hit%"] = 100 * v["l2hits"] / v["l2read"] if v["l2read"] > 0 else 0 109 | + v["l2hit%"] = 100 * v["l2hits"] // v["l2read"] if v["l2read"] > 0 else 0 110 | 111 | v["l2miss%"] = 100 - v["l2hit%"] if v["l2read"] > 0 else 0 112 | v["l2asize"] = cur["l2_asize"] 113 | v["l2size"] = cur["l2_size"] 114 | - v["l2bytes"] = d["l2_read_bytes"] / sint 115 | + v["l2bytes"] = d["l2_read_bytes"] // sint 116 | 117 | v["l2pref"] = cur["l2_prefetch_asize"] 118 | v["l2mfu"] = cur["l2_mfu_asize"] 119 | v["l2mru"] = cur["l2_mru_asize"] 120 | v["l2data"] = cur["l2_bufc_data_asize"] 121 | v["l2meta"] = cur["l2_bufc_metadata_asize"] 122 | - v["l2pref%"] = 100 * v["l2pref"] / v["l2asize"] 123 | - v["l2mfu%"] = 100 * v["l2mfu"] / v["l2asize"] 124 | - v["l2mru%"] = 100 * v["l2mru"] / v["l2asize"] 125 | - v["l2data%"] = 100 * v["l2data"] / v["l2asize"] 126 | - v["l2meta%"] = 100 * v["l2meta"] / v["l2asize"] 127 | + v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"] 128 | + v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"] 129 | + v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"] 130 | + v["l2data%"] = 100 * v["l2data"] // v["l2asize"] 131 | + v["l2meta%"] = 100 * v["l2meta"] // v["l2asize"] 132 | 133 | v["grow"] = 0 if cur["arc_no_grow"] else 1 134 | v["need"] = cur["arc_need_free"] 135 | -------------------------------------------------------------------------------- /debian/patches/zfs/0011-arc-stat-summary-guard-access-to-l2arc-MFU-MRU-stats.patch: -------------------------------------------------------------------------------- 1 | From 0000000000000000000000000000000000000000 Mon Sep 17 00:00:00 2001 2 | From: Thomas Lamprecht 3 | Date: Wed, 10 Nov 2021 09:29:47 +0100 4 | Subject: [PATCH] arc stat/summary: guard access to l2arc MFU/MRU stats 5 | 6 | commit 085321621e79a75bea41c2b6511da6ebfbf2ba0a added printing MFU 7 | and MRU stats for 2.1 user space tools, but those keys are not 8 | available in the 2.0 module. That means it may break the arcstat and 9 | arc_summary tools after upgrade to 2.1 (user space), before a reboot 10 | to the new 2.1 ZFS kernel-module happened, due to python raising a 11 | KeyError on the dict access then. 12 | 13 | Move those two keys to a .get accessor with `0` as fallback, as it 14 | should be better to show some possible wrong data for new stat-keys 15 | than throwing an exception. 16 | 17 | Signed-off-by: Thomas Lamprecht 18 | 19 | also move l2_mfu_asize l2_mru_asize l2_prefetch_asize 20 | l2_bufc_data_asize l2_bufc_metadata_asize to .get accessor 21 | (these are only present with a cache device in the pool) 22 | Signed-off-by: Stoiko Ivanov 23 | --- 24 | cmd/arc_summary/arc_summary3 | 28 ++++++++++++++-------------- 25 | cmd/arcstat/arcstat.in | 14 +++++++------- 26 | 2 files changed, 21 insertions(+), 21 deletions(-) 27 | 28 | diff --git a/cmd/arc_summary/arc_summary3 b/cmd/arc_summary/arc_summary3 29 | index 7b28012ed..fe6a6d9e2 100755 30 | --- a/cmd/arc_summary/arc_summary3 31 | +++ b/cmd/arc_summary/arc_summary3 32 | @@ -617,13 +617,13 @@ def section_arc(kstats_dict): 33 | prt_i1('L2 cached evictions:', f_bytes(arc_stats['evict_l2_cached'])) 34 | prt_i1('L2 eligible evictions:', f_bytes(arc_stats['evict_l2_eligible'])) 35 | prt_i2('L2 eligible MFU evictions:', 36 | - f_perc(arc_stats['evict_l2_eligible_mfu'], 37 | + f_perc(arc_stats.get('evict_l2_eligible_mfu', 0), # 2.0 module compat 38 | arc_stats['evict_l2_eligible']), 39 | - f_bytes(arc_stats['evict_l2_eligible_mfu'])) 40 | + f_bytes(arc_stats.get('evict_l2_eligible_mfu', 0))) 41 | prt_i2('L2 eligible MRU evictions:', 42 | - f_perc(arc_stats['evict_l2_eligible_mru'], 43 | + f_perc(arc_stats.get('evict_l2_eligible_mru', 0), # 2.0 module compat 44 | arc_stats['evict_l2_eligible']), 45 | - f_bytes(arc_stats['evict_l2_eligible_mru'])) 46 | + f_bytes(arc_stats.get('evict_l2_eligible_mru', 0))) 47 | prt_i1('L2 ineligible evictions:', 48 | f_bytes(arc_stats['evict_l2_ineligible'])) 49 | print() 50 | @@ -765,20 +765,20 @@ def section_l2arc(kstats_dict): 51 | f_perc(arc_stats['l2_hdr_size'], arc_stats['l2_size']), 52 | f_bytes(arc_stats['l2_hdr_size'])) 53 | prt_i2('MFU allocated size:', 54 | - f_perc(arc_stats['l2_mfu_asize'], arc_stats['l2_asize']), 55 | - f_bytes(arc_stats['l2_mfu_asize'])) 56 | + f_perc(arc_stats.get('l2_mfu_asize', 0), arc_stats['l2_asize']), 57 | + f_bytes(arc_stats.get('l2_mfu_asize', 0))) # 2.0 module compat 58 | prt_i2('MRU allocated size:', 59 | - f_perc(arc_stats['l2_mru_asize'], arc_stats['l2_asize']), 60 | - f_bytes(arc_stats['l2_mru_asize'])) 61 | + f_perc(arc_stats.get('l2_mru_asize', 0), arc_stats['l2_asize']), 62 | + f_bytes(arc_stats.get('l2_mru_asize', 0))) # 2.0 module compat 63 | prt_i2('Prefetch allocated size:', 64 | - f_perc(arc_stats['l2_prefetch_asize'], arc_stats['l2_asize']), 65 | - f_bytes(arc_stats['l2_prefetch_asize'])) 66 | + f_perc(arc_stats.get('l2_prefetch_asize', 0), arc_stats['l2_asize']), 67 | + f_bytes(arc_stats.get('l2_prefetch_asize',0))) # 2.0 module compat 68 | prt_i2('Data (buffer content) allocated size:', 69 | - f_perc(arc_stats['l2_bufc_data_asize'], arc_stats['l2_asize']), 70 | - f_bytes(arc_stats['l2_bufc_data_asize'])) 71 | + f_perc(arc_stats.get('l2_bufc_data_asize', 0), arc_stats['l2_asize']), 72 | + f_bytes(arc_stats.get('l2_bufc_data_asize', 0))) # 2.0 module compat 73 | prt_i2('Metadata (buffer content) allocated size:', 74 | - f_perc(arc_stats['l2_bufc_metadata_asize'], arc_stats['l2_asize']), 75 | - f_bytes(arc_stats['l2_bufc_metadata_asize'])) 76 | + f_perc(arc_stats.get('l2_bufc_metadata_asize', 0), arc_stats['l2_asize']), 77 | + f_bytes(arc_stats.get('l2_bufc_metadata_asize', 0))) # 2.0 module compat 78 | 79 | print() 80 | prt_1('L2ARC breakdown:', f_hits(l2_access_total)) 81 | diff --git a/cmd/arcstat/arcstat.in b/cmd/arcstat/arcstat.in 82 | index cd9a803a2..ea45dc602 100755 83 | --- a/cmd/arcstat/arcstat.in 84 | +++ b/cmd/arcstat/arcstat.in 85 | @@ -482,8 +482,8 @@ def calculate(): 86 | v["el2skip"] = d["evict_l2_skip"] // sint 87 | v["el2cach"] = d["evict_l2_cached"] // sint 88 | v["el2el"] = d["evict_l2_eligible"] // sint 89 | - v["el2mfu"] = d["evict_l2_eligible_mfu"] // sint 90 | - v["el2mru"] = d["evict_l2_eligible_mru"] // sint 91 | + v["el2mfu"] = d.get("evict_l2_eligible_mfu", 0) // sint 92 | + v["el2mru"] = d.get("evict_l2_eligible_mru", 0) // sint 93 | v["el2inel"] = d["evict_l2_ineligible"] // sint 94 | v["mtxmis"] = d["mutex_miss"] // sint 95 | 96 | @@ -498,11 +498,11 @@ def calculate(): 97 | v["l2size"] = cur["l2_size"] 98 | v["l2bytes"] = d["l2_read_bytes"] // sint 99 | 100 | - v["l2pref"] = cur["l2_prefetch_asize"] 101 | - v["l2mfu"] = cur["l2_mfu_asize"] 102 | - v["l2mru"] = cur["l2_mru_asize"] 103 | - v["l2data"] = cur["l2_bufc_data_asize"] 104 | - v["l2meta"] = cur["l2_bufc_metadata_asize"] 105 | + v["l2pref"] = cur.get("l2_prefetch_asize", 0) 106 | + v["l2mfu"] = cur.get("l2_mfu_asize", 0) 107 | + v["l2mru"] = cur.get("l2_mru_asize", 0) 108 | + v["l2data"] = cur.get("l2_bufc_data_asize", 0) 109 | + v["l2meta"] = cur.get("l2_bufc_metadata_asize", 0) 110 | v["l2pref%"] = 100 * v["l2pref"] // v["l2asize"] 111 | v["l2mfu%"] = 100 * v["l2mfu"] // v["l2asize"] 112 | v["l2mru%"] = 100 * v["l2mru"] // v["l2asize"] -------------------------------------------------------------------------------- /debian/rules: -------------------------------------------------------------------------------- 1 | #!/usr/bin/make -f 2 | # -*- makefile -*- 3 | include /usr/share/dpkg/default.mk 4 | 5 | # Common variables for all architectures 6 | include debian/rules.d/common.mk 7 | 8 | # Pull in some arch specific stuff 9 | include debian/rules.d/arch/${DEB_BUILD_ARCH}.mk 10 | 11 | BUILD_DIR ?= ${CURDIR} 12 | KERNEL_SRC := linux 13 | ZFS_SRC := zfs 14 | 15 | %: 16 | dh $@ --with quilt 17 | 18 | ## Debian package metadata 19 | debian/control: debian/templates/control.in 20 | sed -e 's/@KVNAME@/${KVNAME}/g' -e 's/@KVMAJMIN@/${KERNEL_MAJMIN}/g' < debian/templates/control.in > debian/control 21 | 22 | debian/SOURCE: 23 | echo "git clone git@github.com:fw867/pve-edge-kernel.git \ 24 | git checkout ${PKG_GIT_VERSION} \ 25 | git submodule update --init --depth=1" > $@ 26 | 27 | ## Debhelper phases 28 | override_dh_quilt_patch: 29 | # Apply crack bundle 30 | git -C ${KERNEL_SRC} fetch ../crack.bundle $$(git -C ${KERNEL_SRC} ls-remote ../crack.bundle | cut -f1) 31 | git -C ${KERNEL_SRC} checkout -f FETCH_HEAD 32 | # Apply patches 33 | cd ${KERNEL_SRC}; \ 34 | QUILT_PATCHES=../debian/patches \ 35 | QUILT_SERIES=series.linux \ 36 | quilt --quiltrc /dev/null --color=always push -a || test $$? = 2 37 | cd ${ZFS_SRC}; \ 38 | QUILT_PATCHES=../debian/patches \ 39 | QUILT_SERIES=series.zfs \ 40 | quilt --quiltrc /dev/null --color=always push -a || test $$? = 2 41 | 42 | override_dh_quilt_unpatch: 43 | cd ${KERNEL_SRC}; \ 44 | QUILT_PATCHES=../debian/patches \ 45 | QUILT_SERIES=series.linux \ 46 | quilt --quiltrc /dev/null pop -a || test $$? = 2 47 | cd ${ZFS_SRC}; \ 48 | QUILT_PATCHES=../debian/patches \ 49 | QUILT_SERIES=series.zfs \ 50 | quilt --quiltrc /dev/null pop -a || test $$? = 2 51 | rm -rf ${KERNEL_SRC}/.pc ${ZFS_SRC}/.pc 52 | 53 | override_dh_autoreconf: 54 | dh_autoreconf -D ${ZFS_SRC} 55 | 56 | override_dh_auto_configure: ${KERNEL_SRC}/.config 57 | 58 | override_dh_auto_build: 59 | # Kernel 60 | dh_auto_build -D ${KERNEL_SRC} -- \ 61 | CC="${PVE_KERNEL_CC}" \ 62 | KCFLAGS="${PVE_KERNEL_CFLAGS}" \ 63 | EXTRAVERSION="${EXTRAVERSION}" \ 64 | LOCALVERSION="${LOCALVERSION}" \ 65 | KBUILD_BUILD_VERSION_TIMESTAMP="${PKG_DISTRIBUTOR} ${DEB_VERSION} (${PKG_DATE_UTC_ISO})" 66 | # ZFS 67 | dh_auto_configure -D ${ZFS_SRC} -- CC="${PVE_ZFS_CC}" --with-config=kernel --with-linux=$(realpath ${KERNEL_SRC}) --with-linux-obj=$(realpath ${KERNEL_SRC}) 68 | dh_auto_build -D ${ZFS_SRC} 69 | 70 | override_dh_auto_install: debian/SOURCE .install_mark .tools_install_mark .headers_install_mark .usr_headers_install_mark 71 | dh_installdocs -A debian/copyright debian/SOURCE 72 | dh_installchangelogs 73 | dh_installman 74 | dh_strip_nondeterminism 75 | dh_compress 76 | dh_fixperms 77 | 78 | override_dh_auto_clean: debian/control 79 | dh_clean 80 | dh_auto_clean -D ${KERNEL_SRC} 81 | dh_auto_clean -D ${ZFS_SRC} 82 | rm -f ${PVE_KERNEL_TEMPLATES} ${PVE_HEADER_TEMPLATES} 83 | rm -f debian/SOURCE debian/*.tmp .*_mark 84 | 85 | ## Kernel 86 | PVE_KERNEL_TEMPLATES := $(patsubst debian/templates/pve-kernel.%.in, debian/${PVE_KERNEL_PKG}.%, $(wildcard debian/templates/pve-kernel.*.in)) 87 | 88 | ${KERNEL_SRC}/.config: 89 | ${KERNEL_SRC}/scripts/kconfig/merge_config.sh -m \ 90 | -O ${KERNEL_SRC} \ 91 | ${KERNEL_SRC}/debian.master/config/config.common.ubuntu \ 92 | ${KERNEL_SRC}/debian.master/config/${DEB_BUILD_ARCH}/config.common.${DEB_BUILD_ARCH} \ 93 | ${KERNEL_SRC}/debian.master/config/${DEB_BUILD_ARCH}/config.flavour.generic \ 94 | debian/config/config.pve 95 | ${MAKE} -C ${KERNEL_SRC} CC=${PVE_KERNEL_CC} olddefconfig 96 | 97 | debian/${PVE_KERNEL_PKG}.%: debian/templates/pve-kernel.%.in 98 | sed -e 's/@@KVNAME@@/${KVNAME}/g' < debian/templates/pve-kernel.$*.in > debian/${PVE_KERNEL_PKG}.$* 99 | chmod +x debian/${PVE_KERNEL_PKG}.$* 100 | 101 | .install_mark: ${PVE_KERNEL_TEMPLATES} 102 | rm -rf debian/${PVE_KERNEL_PKG} 103 | mkdir -p debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME} 104 | mkdir debian/${PVE_KERNEL_PKG}/boot 105 | install -m 644 ${KERNEL_SRC}/.config debian/${PVE_KERNEL_PKG}/boot/config-${KVNAME} 106 | install -m 644 ${KERNEL_SRC}/System.map debian/${PVE_KERNEL_PKG}/boot/System.map-${KVNAME} 107 | install -m 644 ${KERNEL_SRC}/${KERNEL_IMAGE_PATH} debian/${PVE_KERNEL_PKG}/boot/${KERNEL_INSTALL_FILE}-${KVNAME} 108 | ${MAKE} -C ${KERNEL_SRC} INSTALL_MOD_PATH=${BUILD_DIR}/debian/${PVE_KERNEL_PKG}/ modules_install 109 | # install zfs drivers 110 | install -d -m 0755 debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 111 | install -m 644 ${ZFS_SRC}/module/avl/zavl.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 112 | install -m 644 ${ZFS_SRC}/module/nvpair/znvpair.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 113 | install -m 644 ${ZFS_SRC}/module/unicode/zunicode.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 114 | install -m 644 ${ZFS_SRC}/module/zcommon/zcommon.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 115 | install -m 644 ${ZFS_SRC}/module/icp/icp.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 116 | install -m 644 ${ZFS_SRC}/module/zfs/zfs.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 117 | install -m 644 ${ZFS_SRC}/module/lua/zlua.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 118 | install -m 644 ${ZFS_SRC}/module/spl/spl.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 119 | install -m 644 ${ZFS_SRC}/module/zstd/zzstd.ko debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/zfs 120 | # remove firmware 121 | rm -rf debian/${PVE_KERNEL_PKG}/lib/firmware 122 | # strip debug info 123 | find debian/${PVE_KERNEL_PKG}/lib/modules -name \*.ko -print | while read f ; do strip --strip-debug "$$f"; done 124 | # finalize 125 | /sbin/depmod -b debian/${PVE_KERNEL_PKG}/ ${KVNAME} 126 | # Autogenerate blacklist for watchdog devices (see README) 127 | install -m 0755 -d debian/${PVE_KERNEL_PKG}/lib/modprobe.d 128 | ls debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/kernel/drivers/watchdog/ > debian/watchdog-blacklist.tmp 129 | echo ipmi_watchdog.ko >> debian/watchdog-blacklist.tmp 130 | cat debian/watchdog-blacklist.tmp | sed -e 's/^/blacklist /' -e 's/.ko$$//' | sort -u > debian/${PVE_KERNEL_PKG}/lib/modprobe.d/blacklist_${PVE_KERNEL_PKG}.conf 131 | rm -f debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/source 132 | rm -f debian/${PVE_KERNEL_PKG}/lib/modules/${KVNAME}/build 133 | touch $@ 134 | 135 | ## Kernel tools 136 | .tools_compile_mark: 137 | dh_auto_build -D ${KERNEL_SRC}/tools/perf -- prefix=/usr \ 138 | HAVE_NO_LIBBFD=1 \ 139 | HAVE_CPLUS_DEMANGLE_SUPPORT=1 \ 140 | NO_LIBPYTHON=1 \ 141 | NO_LIBPERL=1 \ 142 | NO_LIBCRYPTO=1 \ 143 | PYTHON=python3 144 | echo "checking GPL-2 only perf binary for library linkage with incompatible licenses.." 145 | ! ldd ${KERNEL_SRC}/tools/perf/perf | grep -q -E '\blibbfd' 146 | ! ldd ${KERNEL_SRC}/tools/perf/perf | grep -q -E '\blibcrypto' 147 | ${MAKE} -C ${KERNEL_SRC}/tools/perf man 148 | touch $@ 149 | 150 | .tools_install_mark: .tools_compile_mark 151 | rm -rf debian/${LINUX_TOOLS_PKG} 152 | mkdir -p debian/${LINUX_TOOLS_PKG}/usr/bin 153 | mkdir -p debian/${LINUX_TOOLS_PKG}/usr/share/man/man1 154 | install -m 755 ${BUILD_DIR}/${KERNEL_SRC}/tools/perf/perf debian/${LINUX_TOOLS_PKG}/usr/bin/perf_$(KERNEL_MAJMIN) 155 | for i in ${BUILD_DIR}/${KERNEL_SRC}/tools/perf/Documentation/*.1; do \ 156 | fname="$${i##*/}"; manname="$${fname%.1}"; \ 157 | install -m644 "$$i" "debian/${LINUX_TOOLS_PKG}/usr/share/man/man1/$${manname}_$(KERNEL_MAJMIN).1"; \ 158 | done 159 | touch $@ 160 | 161 | ## Headers 162 | PVE_HEADER_TEMPLATES := $(patsubst debian/templates/pve-headers.%.in, debian/${PVE_HEADER_PKG}.%, $(wildcard debian/templates/pve-headers.*.in)) 163 | 164 | debian/${PVE_HEADER_PKG}.%: debian/templates/pve-headers.%.in 165 | sed -e 's/@@KVNAME@@/${KVNAME}/g' < debian/templates/pve-headers.$*.in > debian/${PVE_HEADER_PKG}.$* 166 | chmod +x debian/${PVE_HEADER_PKG}.$* 167 | 168 | .headers_install_mark: ${PVE_HEADER_TEMPLATES} 169 | rm -rf debian/${PVE_HEADER_PKG} 170 | mkdir -p debian/${PVE_HEADER_PKG}/usr/src/linux-headers-${KVNAME} 171 | install -m 0644 ${KERNEL_SRC}/.config debian/${PVE_HEADER_PKG}/usr/src/linux-headers-${KVNAME} 172 | ( \ 173 | cd ${KERNEL_SRC}; \ 174 | find . -path './debian/*' -prune \ 175 | -o -path './include/*' -prune \ 176 | -o -path './scripts' -prune \ 177 | -o -type f \ 178 | \( \ 179 | -name 'Makefile*' \ 180 | -o -name 'Kconfig*' \ 181 | -o -name 'Kbuild*' \ 182 | -o -name '*.sh' \ 183 | -o -name '*.pl' \ 184 | \) \ 185 | -print; \ 186 | find include scripts -type f -o -type l; \ 187 | find arch/${KERNEL_BUILD_ARCH} -maxdepth 1 -name Makefile\*; \ 188 | find arch/${KERNEL_BUILD_ARCH} -name module.lds -o -name Kbuild.platforms -o -name Platform; \ 189 | find $$(find arch/${KERNEL_BUILD_ARCH} -name include -o -name scripts -type d) -type f; \ 190 | find arch/${KERNEL_BUILD_ARCH}/include Module.symvers include scripts -type f; \ 191 | find tools/ -name 'objtool' -type f \ 192 | ) | rsync -avq --files-from=- ${KERNEL_SRC} debian/${PVE_HEADER_PKG}/usr/src/linux-headers-${KVNAME} 193 | mkdir -p debian/${PVE_HEADER_PKG}/lib/modules/${KVNAME} 194 | ln -sf /usr/src/linux-headers-${KVNAME} debian/${PVE_HEADER_PKG}/lib/modules/${KVNAME}/build 195 | touch $@ 196 | 197 | ## User-space headers 198 | .usr_headers_install_mark: PKG_DIR = debian/${PVE_USR_HEADER_PKG} 199 | .usr_headers_install_mark: OUT_DIR = ${PKG_DIR}/usr 200 | .usr_headers_install_mark: 201 | rm -rf '${PKG_DIR}' 202 | mkdir -p '${PKG_DIR}' 203 | $(MAKE) -C ${KERNEL_SRC} headers_install ARCH=$(KERNEL_HEADER_ARCH) INSTALL_HDR_PATH='$(CURDIR)'/$(OUT_DIR) 204 | rm -rf $(OUT_DIR)/include/drm $(OUT_DIR)/include/scsi 205 | find $(OUT_DIR)/include \( -name .install -o -name ..install.cmd \) -execdir rm {} + 206 | # Move include/asm to arch-specific directory 207 | mkdir -p $(OUT_DIR)/include/$(DEB_HOST_MULTIARCH) 208 | mv $(OUT_DIR)/include/asm $(OUT_DIR)/include/$(DEB_HOST_MULTIARCH)/ 209 | test ! -d $(OUT_DIR)/include/arch || \ 210 | mv $(OUT_DIR)/include/arch $(OUT_DIR)/include/$(DEB_HOST_MULTIARCH)/ 211 | touch $@ 212 | -------------------------------------------------------------------------------- /debian/rules.d/arch/amd64.mk: -------------------------------------------------------------------------------- 1 | KERNEL_BUILD_ARCH = x86 2 | KERNEL_HEADER_ARCH = $(KERNEL_BUILD_ARCH) 3 | KERNEL_BUILD_IMAGE = bzImage 4 | KERNEL_IMAGE_PATH = arch/$(KERNEL_BUILD_ARCH)/boot/${KERNEL_BUILD_IMAGE} 5 | KERNEL_INSTALL_FILE = vmlinuz 6 | -------------------------------------------------------------------------------- /debian/rules.d/common.mk: -------------------------------------------------------------------------------- 1 | ## Kernel information 2 | KERNEL_MAJMIN=$(shell ./scripts/version.sh -n) 3 | KERNEL_VER=$(shell ./scripts/version.sh -L) 4 | 5 | ## Debian package information 6 | PKG_DISTRIBUTOR ?= PVE fw867 7 | PKG_RELEASE = $(shell ./scripts/version.sh -r) 8 | PKG_DATE := $(shell dpkg-parsechangelog -SDate) 9 | PKG_DATE_UTC_ISO := $(shell date -u -d '$(PKG_DATE)' +%Y-%m-%d) 10 | PKG_GIT_VERSION := $(shell git rev-parse HEAD) 11 | 12 | # Build settings 13 | PVE_KERNEL_CC ?= ${CC} 14 | PVE_ZFS_CC ?= ${CC} 15 | 16 | ### Debian package names 17 | EXTRAVERSION=-pve 18 | KVNAME=${KERNEL_VER}${EXTRAVERSION} 19 | 20 | PVE_KERNEL_PKG=pve-kernel-${KVNAME} 21 | PVE_HEADER_PKG=pve-headers-${KVNAME} 22 | PVE_USR_HEADER_PKG=pve-kernel-libc-dev 23 | LINUX_TOOLS_PKG=linux-tools-${KERNEL_MAJMIN} 24 | -------------------------------------------------------------------------------- /debian/source/format: -------------------------------------------------------------------------------- 1 | 3.0 (native) 2 | -------------------------------------------------------------------------------- /debian/templates/control.in: -------------------------------------------------------------------------------- 1 | Source: pve-kernel 2 | Section: devel 3 | Priority: optional 4 | Maintainer: fw867 5 | Build-Depends: asciidoc, 6 | automake, 7 | bc, 8 | bison, 9 | cpio, 10 | debhelper (>= 10~), 11 | dwarves (>= 1.16), 12 | flex, 13 | gcc (>= 8.3.0-6), 14 | git, 15 | kmod, 16 | libdw-dev, 17 | libelf-dev, 18 | libiberty-dev, 19 | libnuma-dev, 20 | libslang2-dev, 21 | libssl-dev, 22 | lz4, 23 | python3-minimal, 24 | python3-dev, 25 | quilt, 26 | rsync, 27 | xmlto, 28 | zlib1g-dev, 29 | zstd 30 | Build-Conflicts: pve-headers-@KVNAME@, 31 | Vcs-Git: git://github.com/fw867/pve-edge-kernel.git 32 | Vcs-Browser: https://github.com/fw867/pve-edge-kernel 33 | 34 | Package: linux-tools-@KVMAJMIN@ 35 | Architecture: any 36 | Section: devel 37 | Priority: optional 38 | Depends: linux-base, 39 | ${misc:Depends}, 40 | ${shlibs:Depends}, 41 | Description: Linux kernel version specific tools for version @KVMAJMIN@ 42 | This package provides the architecture dependent parts for kernel 43 | version locked tools (such as perf and x86_energy_perf_policy) 44 | 45 | Package: pve-headers-@KVMAJMIN@-pve 46 | Architecture: all 47 | Section: admin 48 | Priority: optional 49 | Depends: pve-headers-@KVNAME@, 50 | Description: Latest Proxmox Edge Kernel Headers 51 | This is a metapackage which will install the kernel headers 52 | for the latest available Proxmox Edge kernel from the @KVMAJMIN@ 53 | series. 54 | 55 | Package: pve-kernel-@KVMAJMIN@-pve 56 | Architecture: all 57 | Section: admin 58 | Priority: optional 59 | Depends: pve-firmware, 60 | pve-kernel-@KVNAME@, 61 | Description: Latest Proxmox Edge Kernel Image 62 | This is a metapackage which will install the latest available 63 | Proxmox Edge kernel from the @KVMAJMIN@ series. 64 | 65 | Package: pve-headers-@KVNAME@ 66 | Section: devel 67 | Priority: optional 68 | Architecture: any 69 | Provides: linux-headers, 70 | linux-headers-2.6, 71 | Depends: 72 | Description: The Proxmox Edge Kernel Headers 73 | This package contains the Proxmox Edge Linux kernel headers 74 | 75 | Package: pve-kernel-@KVNAME@ 76 | Section: admin 77 | Priority: optional 78 | Architecture: any 79 | Provides: linux-image, 80 | linux-image-2.6, 81 | Suggests: pve-firmware, 82 | Depends: busybox, 83 | initramfs-tools, 84 | Recommends: grub-pc | grub-efi-amd64 | grub-efi-ia32 | grub-efi-arm64, 85 | Description: The Proxmox PVE Kernel Image 86 | This package contains the Linux kernel and initial ramdisk used for booting 87 | 88 | Package: pve-kernel-libc-dev 89 | Section: devel 90 | Priority: optional 91 | Architecture: any 92 | Provides: linux-libc-dev, 93 | Conflicts: linux-libc-dev, 94 | Replaces: linux-libc-dev, 95 | Depends: ${misc:Depends} 96 | Description: Linux support headers for userspace development 97 | This package provides userspaces headers from the Linux kernel. These headers 98 | are used by the installed headers for GNU libc and other system libraries. 99 | -------------------------------------------------------------------------------- /debian/templates/pve-headers.postinst.in: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | 3 | # Abort if any command returns an error value 4 | set -e 5 | 6 | case "$1" in 7 | configure) 8 | 9 | # There are three sub-cases: 10 | if test "${2+set}" != set; then 11 | # We're being installed by an ancient dpkg which doesn't remember 12 | # which version was most recently configured, or even whether 13 | # there is a most recently configured version. 14 | : 15 | 16 | elif test -z "$2" -o "$2" = ""; then 17 | # The package has not ever been configured on this system, or was 18 | # purged since it was last configured. 19 | : 20 | 21 | else 22 | # Version $2 is the most recently configured version of this 23 | # package. 24 | : 25 | 26 | fi ;; 27 | abort-upgrade) 28 | # Back out of an attempt to upgrade this package FROM THIS VERSION 29 | # to version $2. Undo the effects of "prerm upgrade $2". 30 | : 31 | 32 | ;; 33 | abort-remove) 34 | if test "$2" != in-favour; then 35 | echo "$0: undocumented call to \`postinst $*'" 1>&2 36 | exit 0 37 | fi 38 | # Back out of an attempt to remove this package, which was due to 39 | # a conflict with package $3 (version $4). Undo the effects of 40 | # "prerm remove in-favour $3 $4". 41 | : 42 | 43 | ;; 44 | abort-deconfigure) 45 | if test "$2" != in-favour -o "$5" != removing; then 46 | echo "$0: undocumented call to \`postinst $*'" 1>&2 47 | exit 0 48 | fi 49 | # Back out of an attempt to deconfigure this package, which was 50 | # due to package $6 (version $7) which we depend on being removed 51 | # to make way for package $3 (version $4). Undo the effects of 52 | # "prerm deconfigure in-favour $3 $4 removing $6 $7". 53 | : 54 | 55 | ;; 56 | *) echo "$0: didn't understand being called with \`$1'" 1>&2 57 | exit 0;; 58 | esac 59 | 60 | exit 0 61 | -------------------------------------------------------------------------------- /debian/templates/pve-kernel.postinst.in: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | # Ignore all invocations except when called on to configure. 6 | exit 0 unless $ARGV[0] =~ /configure/; 7 | 8 | # do nothing if run from proxmox installer 9 | exit 0 if -e "/proxmox_install_mode"; 10 | 11 | my $imagedir = "/boot"; 12 | 13 | my $version = "@@KVNAME@@"; 14 | 15 | system("depmod $version"); 16 | 17 | if (-d "/etc/kernel/postinst.d") { 18 | print STDERR "Examining /etc/kernel/postinst.d.\n"; 19 | system ("run-parts --verbose --exit-on-error --arg=$version " . 20 | "--arg=$imagedir/vmlinuz-$version " . 21 | "/etc/kernel/postinst.d") && 22 | die "Failed to process /etc/kernel/postinst.d"; 23 | } 24 | 25 | exit 0 26 | -------------------------------------------------------------------------------- /debian/templates/pve-kernel.postrm.in: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | # Ignore all 'upgrade' invocations . 6 | exit 0 if $ARGV[0] =~ /upgrade/; 7 | 8 | my $imagedir = "/boot"; 9 | 10 | my $version = "@@KVNAME@@"; 11 | 12 | if (-d "/etc/kernel/postrm.d") { 13 | print STDERR "Examining /etc/kernel/postrm.d.\n"; 14 | system ("run-parts --verbose --exit-on-error --arg=$version " . 15 | "--arg=$imagedir/vmlinuz-$version " . 16 | "/etc/kernel/postrm.d") && 17 | die "Failed to process /etc/kernel/postrm.d"; 18 | } 19 | 20 | unlink "$imagedir/initrd.img-$version"; 21 | unlink "$imagedir/initrd.img-$version.bak"; 22 | unlink "/var/lib/initramfs-tools/$version"; 23 | 24 | # Ignore all invocations except when called on to purge. 25 | exit 0 unless $ARGV[0] =~ /purge/; 26 | 27 | my @files_to_remove = qw{ 28 | modules.dep modules.isapnpmap modules.pcimap 29 | modules.usbmap modules.parportmap 30 | modules.generic_string modules.ieee1394map 31 | modules.ieee1394map modules.pnpbiosmap 32 | modules.alias modules.ccwmap modules.inputmap 33 | modules.symbols modules.ofmap 34 | modules.seriomap modules.*.bin 35 | modules.softdep modules.devname 36 | }; 37 | 38 | foreach my $extra_file (@files_to_remove) { 39 | for (glob("/lib/modules/$version/$extra_file")) { 40 | unlink; 41 | } 42 | } 43 | 44 | system ("rmdir", "/lib/modules/$version") if -d "/lib/modules/$version"; 45 | 46 | exit 0 47 | -------------------------------------------------------------------------------- /debian/templates/pve-kernel.prerm.in: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | use strict; 4 | 5 | # Ignore all invocations uxcept when called on to remove 6 | exit 0 unless ($ARGV[0] && $ARGV[0] =~ /remove/) ; 7 | 8 | # do nothing if run from proxmox installer 9 | exit 0 if -e "/proxmox_install_mode"; 10 | 11 | my $imagedir = "/boot"; 12 | 13 | my $version = "@@KVNAME@@"; 14 | 15 | if (-d "/etc/kernel/prerm.d") { 16 | print STDERR "Examining /etc/kernel/prerm.d.\n"; 17 | system ("run-parts --verbose --exit-on-error --arg=$version " . 18 | "--arg=$imagedir/vmlinuz-$version " . 19 | "/etc/kernel/prerm.d") && 20 | die "Failed to process /etc/kernel/prerm.d"; 21 | } 22 | 23 | exit 0 24 | -------------------------------------------------------------------------------- /scripts/ar.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | for file in *.deb; 4 | do 5 | ar x $file 6 | unzstd control.tar.zst 7 | unzstd data.tar.zst 8 | xz control.tar 9 | xz data.tar 10 | rm $file 11 | ar cr $file debian-binary control.tar.xz data.tar.xz 12 | rm control.tar.zst 13 | rm data.tar.zst 14 | rm control.tar.xz 15 | rm data.tar.xz 16 | rm debian-binary 17 | done 18 | -------------------------------------------------------------------------------- /scripts/check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to check for new kernel release 3 | set -e 4 | set -o pipefail 5 | 6 | MAJOR=$(echo $1 | sed -e "s/^v//" -e "s/\.[^.]*$//") 7 | CURRENT=$(scripts/version.sh -L) 8 | NEW=$(curl -s https://www.kernel.org/releases.json | jq -r ".releases|sort_by(-.released.timestamp)|map(select(.version | startswith(\"$MAJOR\")))|first|.version") 9 | 10 | if [[ "$(scripts/version.sh -p)" = "0" ]]; then 11 | [[ "$(scripts/version.sh -n)" = "$NEW" ]] || echo $NEW 12 | else 13 | [[ "$CURRENT" = "$NEW" ]] || echo $NEW 14 | fi 15 | -------------------------------------------------------------------------------- /scripts/update.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script to prepare update for new kernel release 3 | set -e 4 | set -o pipefail 5 | 6 | LINUX_REPOSITORY=linux 7 | LINUX_VERSION_PREVIOUS=$(scripts/version.sh -L) 8 | 9 | while getopts "R:t:v:r:h" OPTION; do 10 | case $OPTION in 11 | R) 12 | LINUX_REPOSITORY=$OPTARG 13 | ;; 14 | t) 15 | LINUX_TAG=$OPTARG 16 | ;; 17 | v) 18 | LINUX_VERSION=$OPTARG 19 | ;; 20 | r) 21 | LINUX_PACKAGE_RELEASE=$OPTARG 22 | ;; 23 | h) 24 | echo "update.sh -Rrtvh" 25 | echo " -R path to Linux Git repository" 26 | echo " -t tag in Linux Git repository to pick" 27 | echo " -v manual version for this kernel" 28 | echo " -r manual release version for this kernel" 29 | echo " -h this help message" 30 | exit 1 31 | ;; 32 | *) 33 | echo "Incorrect options provided" 34 | exit 1 35 | ;; 36 | esac 37 | done 38 | 39 | # Fetch from Git repository 40 | echo "Fetching $LINUX_TAG from Linux Git repository..." 41 | 42 | git --git-dir $LINUX_REPOSITORY/.git fetch origin --depth 1 $LINUX_TAG 43 | git --git-dir $LINUX_REPOSITORY/.git checkout FETCH_HEAD 44 | 45 | if [[ -z "$LINUX_VERSION" ]]; then 46 | # Parse the Linux version from the Linux repository if it not provided by the user 47 | LINUX_VERSION=$(scripts/version.sh -L) 48 | fi 49 | 50 | echo "Using Linux $LINUX_VERSION." 51 | 52 | # Prepare Debian changelog 53 | sed -e "s/@KVNAME@/$LINUX_VERSION/g" -e "s/@KVMAJMIN@/$LINUX_VERSION_MAJOR.$LINUX_VERSION_MINOR/g" < debian/templates/control.in > debian/control 54 | 55 | LINUX_VERSION_MAJOR=$(echo $LINUX_VERSION | cut -d. -f1) 56 | LINUX_VERSION_MINOR=$(echo $LINUX_VERSION | cut -d. -f2) 57 | LINUX_VERSION_PATCH=$(echo $LINUX_VERSION | cut -d. -f3) 58 | LINUX_VERSION_PATCH=${LINUX_VERSION_PATCH:-0} # Default to 0 59 | 60 | LINUX_PACKAGE_RELEASE_PREVIOUS=$(scripts/version.sh -r) 61 | 62 | # Check whether we need to increment the package release 63 | if [[ -n $LINUX_PACKAGE_RELEASE ]]; then 64 | echo "Using custom package release $LINUX_PACKAGE_RELEASE" 65 | elif [[ $LINUX_VERSION == "$LINUX_VERSION_PREVIOUS" ]]; then 66 | LINUX_PACKAGE_RELEASE=$((LINUX_PACKAGE_RELEASE_PREVIOUS + 1)) 67 | echo "Incrementing package release to $LINUX_PACKAGE_RELEASE" 68 | else 69 | LINUX_PACKAGE_RELEASE=1 70 | echo "New package release" 71 | fi 72 | 73 | echo "Updating crack.bundle..." 74 | wget https://kernel.ubuntu.com/~kernel-ppa/mainline/v$LINUX_VERSION/crack.bundle -O crack.bundle 75 | 76 | echo "Generating entry for change log..." 77 | # Generate a changelog entry 78 | debchange -v $LINUX_VERSION-$LINUX_PACKAGE_RELEASE -D pve --force-distribution -U -M "Update to Linux $LINUX_VERSION." 79 | 80 | echo "Cleaning up" 81 | rm -f debian/control 82 | 83 | -------------------------------------------------------------------------------- /scripts/version.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Script for parsing version information in the repository 3 | set -e 4 | set -o pipefail 5 | 6 | LINUX_REPOSITORY=linux 7 | LINUX_VERSION=$(sed -n "s/^VERSION = \([0-9]*$\)/\1/p" < linux/Makefile | xargs) 8 | LINUX_PATCHLEVEL=$(sed -n "s/^PATCHLEVEL = \([0-9]*$\)/\1/p" < linux/Makefile | xargs) 9 | LINUX_SUBLEVEL=$(sed -n "s/^SUBLEVEL = \([0-9]*$\)/\1/p" < linux/Makefile | xargs) 10 | LINUX_VERSION_FULL=$LINUX_VERSION.$LINUX_PATCHLEVEL.$LINUX_SUBLEVEL 11 | PACKAGE_VERSION=$(dpkg-parsechangelog -SVersion) 12 | PACKAGE_RELEASE=$(echo $PACKAGE_VERSION | sed -n 's/^.*-\([0-9]*\).*$/\1/p' | xargs) 13 | 14 | while getopts "MmnprdLh" OPTION; do 15 | case $OPTION in 16 | M) 17 | echo $LINUX_VERSION 18 | exit 0 19 | ;; 20 | m) 21 | echo $LINUX_PATCHLEVEL 22 | exit 0 23 | ;; 24 | n) 25 | echo $LINUX_VERSION.$LINUX_PATCHLEVEL 26 | exit 0 27 | ;; 28 | p) 29 | echo $LINUX_SUBLEVEL 30 | exit 0 31 | ;; 32 | r) 33 | echo $PACKAGE_RELEASE 34 | exit 0 35 | ;; 36 | L) 37 | echo $LINUX_VERSION_FULL 38 | exit 0 39 | ;; 40 | h) 41 | echo "version.sh [-Mmnprfh]" 42 | echo " -M major version" 43 | echo " -m minor version" 44 | echo " -n major minor version" 45 | echo " -p patch version" 46 | echo " -r package release" 47 | echo " -L Linux version" 48 | echo " -h this help message" 49 | exit 1 50 | ;; 51 | *) 52 | echo "Incorrect options provided" 53 | exit 1 54 | ;; 55 | esac 56 | done 57 | 58 | echo "$PACKAGE_VERSION" 59 | --------------------------------------------------------------------------------