├── .gitignore ├── Makefile ├── README.md ├── VERSION ├── old-helpers ├── Makefile ├── README.md ├── VERSION ├── root │ ├── lib │ │ └── systemd │ │ │ └── system-sleep │ │ │ └── suspend-resume-all-vms │ └── usr │ │ ├── lib │ │ └── pve-helpers │ │ │ ├── qemu-server-hooks.sh │ │ │ ├── resume-all-vms.sh │ │ │ └── suspend-all-vms.sh │ │ └── sbin │ │ └── pin-vcpus.sh └── scripts │ └── pve-qemu-hooks.service └── root ├── etc └── systemd │ └── system │ └── pve-guests.service.d │ └── manual-start.conf ├── lib └── systemd │ └── system-sleep │ └── restart-vms └── var └── lib └── vz └── snippets └── exec-cmds /.gitignore: -------------------------------------------------------------------------------- 1 | *.deb 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | export RELEASE_START_SHA ?= $(shell git rev-list -1 HEAD VERSION) 2 | export RELEASE ?= $(shell git rev-list $(RELEASE_START_SHA).. --count) 3 | export RELEASE_NAME ?= $(shell cat VERSION)-$(RELEASE) 4 | export RELEASE_VERSION ?= $(RELEASE_NAME)-g$(shell git rev-parse --short HEAD) 5 | 6 | PACKAGE_FILE ?= pve-helpers-$(RELEASE_VERSION)_all.deb 7 | TARGET_HOST ?= fill-me.home 8 | 9 | all: pve-helpers 10 | 11 | .PHONY: pve-helpers 12 | pve-helpers: $(PACKAGE_FILE) 13 | 14 | $(PACKAGE_FILE): 15 | fpm \ 16 | --input-type dir \ 17 | --output-type deb \ 18 | --name pve-helpers \ 19 | --version $(RELEASE_VERSION) \ 20 | --package $@ \ 21 | --architecture all \ 22 | --category admin \ 23 | --url https://gitlab.com/ayufan/pve-helpers-build \ 24 | --description "Proxmox VE Helpers" \ 25 | --vendor "Kamil Trzciński" \ 26 | --maintainer "Kamil Trzciński " \ 27 | --license "MIT" \ 28 | --deb-priority optional \ 29 | --depends inotify-tools \ 30 | --depends qemu-server \ 31 | --depends expect \ 32 | --depends util-linux \ 33 | --deb-compression gz \ 34 | root/=/ 35 | 36 | install: pve-helpers 37 | dpkg -i $(PACKAGE_FILE) 38 | 39 | deploy: pve-helpers 40 | scp $(PACKAGE_FILE) $(TARGET_HOST): 41 | ssh $(TARGET_HOST) dpkg -i $(PACKAGE_FILE) 42 | 43 | clean: 44 | rm -f $(PACKAGE_FILE) 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Proxmox VE Helpers 2 | 3 | This repository is a set of scripts to better handle some of the Proxmox functions: 4 | 5 | - automatically restart VMs on host suspend, 6 | - allow to use CPU pinning, 7 | - allow to set fifo scheduler 8 | - allow to set affinity mask for vfio devices 9 | 10 | Why to do CPU pinning? 11 | 12 | - Usually, it is not needed as long as you don't use SMT 13 | - If you use SMT, each vCPU is not equal, CPU pinning allows to ensure that VMs receive a real threads 14 | - For having a good and predictable performance it is not needed to pin to exact cores, Linux can balance it very well 15 | - In general the less we configure the better it works. These settings are hints to define affinity masks for resources. 16 | 17 | ## Installation 18 | 19 | Clone and compile the repository: 20 | 21 | ```bash 22 | # install dependencies 23 | sudo apt-get install -f ruby ruby-dev rubygems build-essential 24 | sudo gem install fpm 25 | ``` 26 | 27 | ```bash 28 | # compile pve-helpers 29 | git clone https://github.com/ayufan/pve-helpers 30 | cd pve-helpers 31 | sudo make install 32 | ``` 33 | 34 | ## Usage 35 | 36 | ### 1. Enable snippet 37 | 38 | You need to configure each machine to enable the hookscript. 39 | 40 | The snippet by default is installed in `/var/lib/vz` 41 | that for Proxmox is present as `local`. 42 | 43 | ```bash 44 | qm set 204 --hookscript=local:snippets/exec-cmds 45 | ``` 46 | 47 | ### 2. Configure VM 48 | 49 | Edit VM description and add a new line if one or both these two commands. 50 | 51 | ### 2.1. `cpu_taskset` 52 | 53 | For the best performance you want to assign VM to physical cores, 54 | not a mix of physical and virtual cores. 55 | 56 | For example for `i7-8700` each core has two threads: 0-6, 1-7, 2-8. 57 | You can easily check that with `lscpu -e`, checking which cores are 58 | assigned twice. 59 | 60 | ```bash 61 | CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE MAXMHZ MINMHZ 62 | 0 0 0 0 0:0:0:0 yes 4600.0000 800.0000 63 | 1 0 0 1 1:1:1:0 yes 4600.0000 800.0000 64 | 2 0 0 2 2:2:2:0 yes 4600.0000 800.0000 65 | 3 0 0 3 3:3:3:0 yes 4600.0000 800.0000 66 | 4 0 0 4 4:4:4:0 yes 4600.0000 800.0000 67 | 5 0 0 5 5:5:5:0 yes 4600.0000 800.0000 68 | 6 0 0 0 0:0:0:0 yes 4600.0000 800.0000 69 | 7 0 0 1 1:1:1:0 yes 4600.0000 800.0000 70 | 8 0 0 2 2:2:2:0 yes 4600.0000 800.0000 71 | 9 0 0 3 3:3:3:0 yes 4600.0000 800.0000 72 | 10 0 0 4 4:4:4:0 yes 4600.0000 800.0000 73 | 11 0 0 5 5:5:5:0 yes 4600.0000 800.0000 74 | ``` 75 | 76 | For example it is advised to assign a one CPU less than a number of 77 | physical cores. For the `i7-8700` it will be 5 cores. 78 | 79 | Then, you can assign the 5 cores (with CPU pinning, but not pinning specific 80 | threads) to VM: 81 | 82 | ```text 83 | cpu_taskset 7-11 84 | ``` 85 | 86 | This does assign to VM second thread of physical cores 1-6. We deliberatly 87 | choose to not assign `CORE 0`. 88 | 89 | If you have two VMs concurrently running, you can assign it on one thread, 90 | second on another thread, like this: 91 | 92 | ```text 93 | VM 1: 94 | cpu_taskset 1-5 95 | 96 | VM 2: 97 | cpu_taskset 7-11 98 | ``` 99 | 100 | ### 2.2. use `vendor-reset` for fixing AMD Radeon reset bug 101 | 102 | Instead of `pci_unbind` and `pci_rescan` install DKMS module from https://github.com/gnif/vendor-reset: 103 | 104 | ```bash 105 | apt install dkms 106 | git clone https://github.com/gnif/vendor-reset.git /usr/src/vendor-reset-0.1.1 107 | dkms build vendor-reset/0.1.1 108 | dkms install vendor-reset/0.1.1 109 | echo vendor-reset >> /etc/modules 110 | modprobe vendor-reset 111 | ``` 112 | 113 | ### 2.3. `set_halt_poll` 114 | 115 | This setting changes the value of the kvm parameter `halt_poll_ns` in `/sys/module/kvm/parameters/halt_poll_ns` 116 | Different configurations benefit from different settings. Default value is `20000`. In theory, a larger value would be beneficial for the performance/latency of a VM. 117 | In practice, most Ryzen systems work best with `halt_poll_ns` set to `0`. 118 | 119 | Usage example: 120 | ```yaml 121 | cat /etc/pve/qemu-server/110.conf 122 | 123 | ##Set halt_poll_ns 124 | #set_halt_poll 0 125 | ... 126 | ``` 127 | 128 | ### 2.4. `assign_interrupts` 129 | 130 | `assign_interrupts [--sleep=10s] [cpu cores] [--all] [interrupt name] [interrupt name...]` 131 | 132 | This setting aims to simplify the process of assigning interrupts to the correct cpu cores in order to get the best performance 133 | while doing a gpu/usb controller/audio controller passthrough. The goal is to have the same cores assigned to the VM using `cpu_taskset`, 134 | be responsible for the interrupts generated by the devices that are fully passed through to the VM. 135 | This is very important for achieving the lowest possible latency and eliminating random latency spikes inside the VM. 136 | Ideally, you would also use something like irqbalance to move all other interrupts away from the VM assigned CPU cores and onto your other hypervisor-reserved cores. Same CPU mask can be used with irqbalance to have the VM cpu cores banned from getting any other interrupts. 137 | 138 | Note: Isolating cpu cores with `isolcpus` while having its own small benefits, is not required to get these latency improvements. 139 | 140 | An optional `--sleep=10s` can be assigned to modify 141 | default `30s` wait duration. 142 | 143 | The `--all` can be used to automatically assign interrupts of all configured `hostpci` devices. 144 | 145 | Usage example: 146 | ```yaml 147 | cat /etc/pve/qemu-server/110.conf 148 | ##CPU pinning 149 | #cpu_taskset 1-5 150 | #assign_interrupts --sleep=10s 1-5 --all 151 | ... 152 | ``` 153 | 154 | In this particular use case, all interrupts with `vfio` in their name are assigned to cores `4,12,5,13,6,14,7,15,2,10,3,11`, which in term correspond to cores `2-7` and their SMT equivalents `10-15`. 155 | In other words, cores `2,3,4,5,6,7` from an 8 core 3700x are assigned to the VM and to all of the interrupts from the GPU, the USB onboard controller, and the onboard audio controller. 156 | 157 | ### 2.5. `qm_conflict` and `qm_depends` 158 | 159 | Sometimes some VMs are conflicting with each other due to dependency on the same resources, 160 | like disks, or VGA. 161 | 162 | There are helper commands to shutdown (the `qm_conflict`) or start (the `qm_depends`) 163 | when main machine is being started. 164 | 165 | ```yaml 166 | cat /etc/pve/qemu-server/204.conf 167 | 168 | # qm_conflict 204 169 | # qm_depends 207 170 | ... 171 | ``` 172 | 173 | This first `qm_conflict` will shuttdown VM with VMID 204 before starting the current one, 174 | and it will also start VMID 207, that might be a sibiling VM. 175 | 176 | I use the `qm_conflict` or `qm_depends` to run Linux VM sometimes with VGA passthrough, 177 | sometimes as a sibiling VM without graphics cards passed, but running in a console mode. 178 | 179 | Be careful if you use `pci_unbind` and `pci_rebind`, they should be after the `qm_*` commands. 180 | 181 | ### 2.6. `pci_unbind` and `pci_rebind` 182 | 183 | It might be desirable to bind VGA to VM, but as soon as VM finishes 184 | unbind that and allow to use on a host. 185 | 186 | The `--all` can be used to unbind all devices. 187 | 188 | The simplest is to ensure that VGA can render output on a host before 189 | starting, then instruct Proxmox VE to unbind, and rebind devices: 190 | 191 | ```yaml 192 | cat /etc/pve/qemu-server/204.conf 193 | 194 | ## Rebind VGA to host 195 | #pci_unbind 02 00 0 196 | #pci_unbind 02 00 1 197 | #pci_unbind --all 198 | #pci_rebind 199 | ``` 200 | 201 | ### 3. Legacy features 202 | 203 | These are features that are no really longer needed to achieve a good latency in a VM. 204 | 205 | ### 3.1. `cpu_chrt` **no longer needed, outdated** 206 | 207 | Running virtualized environment always results in quite random latency 208 | due to amount of other work being done. This is also, because Linux 209 | hypervisor does balance all threads that has bad effects on `DPC` 210 | and `ISR` execution times. Latency in Windows VM can be measured with https://www.resplendence.com/latencymon. Ideally, we want to have the latency of `< 300us`. 211 | 212 | To improve the latency you can switch to the usage of `FIFO` scheduler. 213 | This has a catastrophic effects to everything else that is not your VM, 214 | but this is likely acceptable for Gaming / daily use of passthrough VMs. 215 | 216 | Configure VM description with: 217 | 218 | ```text 219 | cpu_chrt fifo 1 220 | ``` 221 | 222 | > Note: 223 | > It seems that if Hyper-V entitlements (they are enabled for `ostype: win10`) are enabled this is no longer needed. 224 | > I now have amazing performance without using `cpu_chrt`. 225 | 226 | ### 3.2. `pci_unbind` and `pci_rescan` **no longer needed, outdated** 227 | 228 | Just use `vendor-reset`. 229 | 230 | There are multiple approaches to handle Radeon graphics cards. I did find that 231 | to make it stable: 232 | 233 | 1. VGA bios needs to be exported, put in `/usr/share/kvm` and passed as `romfile` of `hostpci*`, 234 | 2. PCIE unbind/rescan needs to happen. 235 | 236 | Exporting bios should happen ideally when running "natively", so with graphics card available, 237 | ideally on Windows, with `GPU-Z`. Once bios is exported, you should ensure that it 238 | contains UEFI section: https://pve.proxmox.com/wiki/Pci_passthrough#How_to_known_if_card_is_UEFI_.28ovmf.29_compatible. 239 | Sometimes the bios can be found on https://www.techpowerup.com/vgabios/. 240 | Ensure that you find the exact one for you `vid:pid` of your graphics card. 241 | 242 | This is how my config looks like once a bios is put in a correct place: 243 | 244 | ```yaml 245 | cat /etc/pve/qemu-server/204.conf 246 | 247 | ## Fix VGA 248 | #pci_rescan 249 | #pci_unbind 02 00 0 250 | #pci_unbind 02 00 1 251 | ... 252 | hookscript: local:snippets/exec-cmds 253 | ... 254 | hostpci0: 02:00,pcie=1,romfile=215895.rom,x-vga=1 255 | ... 256 | machine: q35 257 | ... 258 | ``` 259 | 260 | The comment defines a commands to execute to unbind and rebind graphics card VM. 261 | 262 | In cases where there are bugs in getting VM up, the `suspend/resume` cycle of Proxmox 263 | helps: `systemctl suspend`. 264 | 265 | ### 4. Suspend/resume 266 | 267 | There's a set of scripts that try to perform restart of machines 268 | when Proxmox VE machine goes to sleep. 269 | 270 | First, you might be interested in doing `suspend` on power button. 271 | Edit the `/etc/systemd/logind.conf` to modify: 272 | 273 | ```text 274 | HandlePowerKey=suspend 275 | ``` 276 | 277 | Then `systemctl restart systemd-logind.service` or reboot Proxmox VE. 278 | 279 | After that every of your machines should restart alongside with Proxmox VE 280 | suspend, thus be able to support restart on PCI passthrough devices, 281 | like GPU. 282 | 283 | **Ensure that each of your machines does support Qemu Guest Agent**. 284 | This function will not work if you don't have Qemu Guest Agent installed 285 | and running. 286 | 287 | ### 5. My setup 288 | 289 | Here's a quick rundown of my environment that I currently use 290 | with above quirks. 291 | 292 | #### 5.1. Hardware 293 | 294 | - i7-8700 295 | - 48GB DDR4 296 | - Intel iGPU used by Proxmox VE 297 | - AMD RX560 2GB used by Linux VM 298 | - GeForce RTX 2080 Super used by Windows VM 299 | - Audio is being output by both VMs to the shared speakers that are connected to Motherboard audio card 300 | - Each VM has it's own dedicated USB controller 301 | - Each VM has a dedicated amount of memory using 1G hugepages 302 | - Each VM does not use SMT, rather it is assigned to the thread 0 (Linux) or thread 1 (Windows) of each CPU, having only 5 vCPUs available to VM 303 | 304 | #### 5.2. Kernel config 305 | 306 | ```text 307 | GRUB_CMDLINE_LINUX="" 308 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX pci_stub.ids=10de:1e81,10de:10f8,10de:1ad8,10de:1ad9,10de:13c2,10de:0fbb,1002:67ef,1002:aae0" 309 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX intel_iommu=on kvm_intel.ept=Y kvm_intel.nested=Y i915.enable_hd_vgaarb=1 pcie_acs_override=downstream vfio-pci.disable_idle_d3=1" 310 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX cgroup_enable=memory swapaccount=1" 311 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX intel_pstate=disable" 312 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX hugepagesz=1G hugepages=42" 313 | ``` 314 | 315 | #### 5.3. Linux VM 316 | 317 | I use Linux for regular daily development work. 318 | 319 | My Proxmox VE config looks like this: 320 | 321 | ```text 322 | ## CPU PIN 323 | #cpu_taskset 0-5 324 | #assign_interrupts 0-5 --all 325 | # 326 | ## Conflict (207 shares disks, 208 shares VGA) 327 | #qm_conflict 207 328 | #qm_conflict 208 329 | agent: 1 330 | args: -audiodev id=alsa,driver=alsa,out.period-length=100000,out.frequency=48000,out.channels=2,out.try-poll=off,out.dev=swapped -soundhw hda 331 | balloon: 0 332 | bios: ovmf 333 | boot: dcn 334 | bootdisk: scsi0 335 | cores: 5 336 | cpu: host 337 | hookscript: local:snippets/exec-cmds 338 | hostpci0: 02:00,romfile=215895.rom,x-vga=1 339 | hostpci1: 04:00 340 | hugepages: 1024 341 | ide2: none,media=cdrom 342 | memory: 32768 343 | name: ubuntu19-vga 344 | net0: virtio=32:13:40:C7:31:4C,bridge=vmbr0 345 | numa: 1 346 | onboot: 1 347 | ostype: l26 348 | scsi0: nvme-thin:vm-206-disk-1,discard=on,iothread=1,size=200G,ssd=1 349 | scsi1: ssd:vm-206-disk-0,discard=on,iothread=1,size=100G,ssd=1 350 | scsi10: ssd:vm-206-disk-1,iothread=1,replicate=0,size=32G,ssd=1 351 | scsihw: virtio-scsi-pci 352 | serial0: socket 353 | sockets: 1 354 | usb0: host=1050:0406 355 | vga: none 356 | ``` 357 | 358 | #### 5.4. Windows VM 359 | 360 | I use Windows for Gaming. It has dedicated RTX 2080 Super. 361 | 362 | ```text 363 | ## CPU PIN 364 | #cpu_taskset 6-11 365 | #assign_interrupts 6-11 --all 366 | agent: 1 367 | args: -audiodev id=alsa,driver=alsa,out.period-length=100000,out.frequency=48000,out.channels=2,out.try-poll=off,out.dev=swapped -soundhw hda 368 | balloon: 0 369 | bios: ovmf 370 | boot: dc 371 | bootdisk: scsi0 372 | cores: 5 373 | cpu: host 374 | cpuunits: 10000 375 | efidisk0: nvme-thin:vm-204-disk-1,size=4M 376 | hookscript: local:snippets/exec-cmds 377 | hostpci0: 01:00,pcie=1,x-vga=1,romfile=Gigabyte.RTX2080Super.8192.190820.rom 378 | hugepages: 1024 379 | ide2: none,media=cdrom 380 | machine: pc-q35-3.1 381 | memory: 10240 382 | name: win10-vga 383 | net0: e1000=3E:41:0E:4D:3D:14,bridge=vmbr0 384 | numa: 1 385 | onboot: 1 386 | ostype: win10 387 | runningmachine: pc-q35-3.1 388 | scsi0: ssd:vm-204-disk-2,discard=on,iothread=1,size=64G,ssd=1 389 | scsi1: ssd:vm-204-disk-0,backup=0,discard=on,iothread=1,replicate=0,size=921604M 390 | scsi3: nvme-thin:vm-204-disk-0,backup=0,discard=on,iothread=1,replicate=0,size=100G 391 | scsihw: virtio-scsi-pci 392 | sockets: 1 393 | vga: none 394 | ``` 395 | 396 | #### 5.5. Switching between VMs 397 | 398 | To switch between VMs: 399 | 400 | 1. Both VMs always run concurrently. 401 | 1. I do change the monitor input. 402 | 1. Audio is by default being output by both VMs, no need to switch it. 403 | 1. I use Barrier (previously Synergy) for most of time. 404 | 1. In other cases I have Logitech multi-device keyboard and mouse, 405 | so I switch it on keyboard. 406 | 1. I also have a physical switch that I use 407 | to change lighting and monitor inputs. 408 | 1. I have the monitor with PBP and PIP, so I can watch how Windows 409 | is updating while doing development work on Linux. 410 | 411 | ## Author, License 412 | 413 | Kamil Trzciński, 2019-2021, MIT 414 | -------------------------------------------------------------------------------- /VERSION: -------------------------------------------------------------------------------- 1 | 0.6.0 2 | -------------------------------------------------------------------------------- /old-helpers/Makefile: -------------------------------------------------------------------------------- 1 | export RELEASE_START_SHA ?= $(shell git rev-list -1 HEAD VERSION) 2 | export RELEASE ?= $(shell git rev-list $(RELEASE_START_SHA).. --count) 3 | export RELEASE_NAME ?= $(shell cat VERSION)-$(RELEASE) 4 | export RELEASE_VERSION ?= $(RELEASE_NAME)-g$(shell git rev-parse --short HEAD) 5 | 6 | PACKAGE_FILE ?= pve-helpers-$(RELEASE_VERSION)_all.deb 7 | TARGET_HOST ?= fill-me.home 8 | 9 | all: pve-helpers 10 | 11 | .PHONY: pve-helpers 12 | pve-helpers: $(PACKAGE_FILE) 13 | 14 | $(PACKAGE_FILE): 15 | fpm \ 16 | --input-type dir \ 17 | --output-type deb \ 18 | --name pve-helpers \ 19 | --version $(RELEASE_VERSION) \ 20 | --package $@ \ 21 | --architecture all \ 22 | --category admin \ 23 | --url https://gitlab.com/ayufan/pve-helpers-build \ 24 | --description "Proxmox VE Helpers" \ 25 | --vendor "Kamil Trzciński" \ 26 | --maintainer "Kamil Trzciński " \ 27 | --license "MIT" \ 28 | --deb-priority optional \ 29 | --depends inotify-tools \ 30 | --depends qemu-server \ 31 | --depends expect \ 32 | --depends util-linux \ 33 | --deb-compression bzip2 \ 34 | --deb-systemd scripts/pve-qemu-hooks.service \ 35 | root/=/ 36 | 37 | install: pve-helpers 38 | dpkg -i $(PACKAGE_FILE) 39 | 40 | deploy: pve-helpers 41 | scp $(PACKAGE_FILE) $(TARGET_HOST): 42 | ssh $(TARGET_HOST) dpkg -i $(PACKAGE_FILE) 43 | 44 | clean: 45 | rm -f $(PACKAGE_FILE) 46 | -------------------------------------------------------------------------------- /old-helpers/README.md: -------------------------------------------------------------------------------- 1 | # Proxmox VE Qemu Helpers 2 | 3 | This repository is a set of scripts to better handle some of the Proxmox functions: 4 | 5 | - automatically suspend/resume on host suspend, 6 | - allow to use CPU pinning, 7 | - allow to run actions on VM bootup 8 | 9 | ## Installation 10 | 11 | Clone and compile the repository: 12 | 13 | ```bash 14 | git clone https://github.com/ayufan/pve-helpers 15 | cd pve-helpers 16 | sudo make install 17 | ``` 18 | 19 | ## Usage 20 | 21 | ### 1. Enable CPU pinning (`/usr/sbin/pin-vcpus.sh`) 22 | 23 | The CPU pinning is enabled only when you add in notes the `CPUPIN` keyword. 24 | It will pin each CPU thread to one physical thread. 25 | The pinning will omit the CORE0 as it assumes that you use it 26 | for the purpose of the host machine. 27 | 28 | For the best performance you should configure cores specification 29 | exactly the way as they are on your host machine: matching number of threads per-core. 30 | 31 | Currently, Proxmox VE does not allow you to configure `threads`, so you have to do it manually: 32 | 33 | ```bash 34 | qm set VMID -args -smp 10,cores=5,threads=2 35 | ``` 36 | 37 | The above assume that you use CPU with SMT, which has two threads per-each core. 38 | The CPU pinning method will properly assign each virtual thread to physical thread taking 39 | into account CPUs affinity mask as produced by `lscpu -e`. 40 | 41 | To ensure that CPU pinning does work, 42 | you can try it from command line as `root` user: 43 | 44 | ```bash 45 | pin-vcpus.sh VMID 46 | ``` 47 | 48 | #### 1.1. Using `isolcpus` 49 | 50 | The above option should be used with conjuction to `isolcpus` of kernel. 51 | This is a way to disable CPU cores from being used by hypervisor, 52 | making it possible to assign cores exclusively to the VMs only. 53 | 54 | For doing that edit `/etc/default/grub` and add: 55 | 56 | ```bash 57 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX isolcpus=1-5,7-11" 58 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX nohz_full=1-5,7-11" 59 | GRUB_CMDLINE_LINUX="$GRUB_CMDLINE_LINUX rcu_nocbs=1-5,7-11" 60 | ``` 61 | 62 | Where `1-5,7-11` matches a cores that Proxmox VE should not use. 63 | You really want to omit everything that is on CORE0. 64 | The above specification is valid for latest `i7-8700` CPUs: 65 | 66 | ```bash 67 | CPU NODE SOCKET CORE L1d:L1i:L2:L3 ONLINE MAXMHZ MINMHZ 68 | 0 0 0 0 0:0:0:0 yes 4600.0000 800.0000 69 | 1 0 0 1 1:1:1:0 yes 4600.0000 800.0000 70 | 2 0 0 2 2:2:2:0 yes 4600.0000 800.0000 71 | 3 0 0 3 3:3:3:0 yes 4600.0000 800.0000 72 | 4 0 0 4 4:4:4:0 yes 4600.0000 800.0000 73 | 5 0 0 5 5:5:5:0 yes 4600.0000 800.0000 74 | 6 0 0 0 0:0:0:0 yes 4600.0000 800.0000 75 | 7 0 0 1 1:1:1:0 yes 4600.0000 800.0000 76 | 8 0 0 2 2:2:2:0 yes 4600.0000 800.0000 77 | 9 0 0 3 3:3:3:0 yes 4600.0000 800.0000 78 | 10 0 0 4 4:4:4:0 yes 4600.0000 800.0000 79 | 11 0 0 5 5:5:5:0 yes 4600.0000 800.0000 80 | ``` 81 | 82 | For Ryzen CPUs you will rather see CORE0 to be assigned 83 | to CPU0 and CPU1, thus your specification will look `2-11`. 84 | 85 | After editing configuration `update-grub` and reboot Proxmox VE. 86 | 87 | ### 2. Suspend/resume 88 | 89 | There's a set of scripts that try to perform suspend of machines 90 | when Proxmox VE machine goes to sleep. 91 | 92 | First, you might be interested in doing `suspend` on power button. 93 | Edit the `/etc/systemd/logind.conf` to modify: 94 | 95 | ``` 96 | HandlePowerKey=suspend 97 | ``` 98 | 99 | Then `systemctl restart systemd-logind.service` or reboot Proxmox VE. 100 | 101 | After that every of your machines should suspend alongside with Proxmox VE 102 | suspend, thus be able to support suspend/resume on PCI passthrough devices, 103 | like GPU. 104 | 105 | **Ensure that each of your machines does support Qemu Guest Agent**. 106 | This function will not work if you don't have Qemu Guest Agent installed 107 | and running. 108 | 109 | ### 3. Run hooks on machine start and stop 110 | 111 | This allows you to add a script `/etc/qemu-server-hooks/VMID.up` that 112 | will be executed when machine starts. 113 | 114 | This allows you to add a script `/etc/qemu-server-hooks/VMID.down` that 115 | will be executed when machine stops. 116 | 117 | ## Author, License 118 | 119 | Kamil Trzciński, 2019, MIT 120 | -------------------------------------------------------------------------------- /old-helpers/VERSION: -------------------------------------------------------------------------------- 1 | 0.2.0 2 | -------------------------------------------------------------------------------- /old-helpers/root/lib/systemd/system-sleep/suspend-resume-all-vms: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$1" == "pre" ]]; then 4 | /usr/lib/pve-helpers/suspend-all-vms.sh 5 | elif [[ "$1" == "post" ]]; then 6 | /usr/lib/pve-helpers/resume-all-vms.sh 7 | else 8 | echo "invalid: $@" 9 | exit 1 10 | fi 11 | -------------------------------------------------------------------------------- /old-helpers/root/usr/lib/pve-helpers/qemu-server-hooks.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | hooks=/etc/qemu-server-hooks 4 | watch=/var/run/qemu-server 5 | 6 | mkdir -p "$hooks" "$watch" 7 | 8 | pin_vcpus() { 9 | /usr/sbin/pin-vcpus.sh "$@" 10 | } 11 | 12 | while read file; do 13 | VMID=$(basename "$file" .pid) 14 | 15 | # ignore non-pid matches 16 | if [[ "$file" == "$VMID" ]]; then 17 | continue 18 | fi 19 | 20 | if [[ -e "$watch/$file" ]]; then 21 | echo "$VMID: Did start." 22 | [[ -f "$hooks/$VMID.up" ]] && "$hooks/$VMID.up" 23 | pin_vcpus "$VMID" & 24 | else 25 | echo "$VMID: Did stop." 26 | [[ -f "$hooks/$VMID.down" ]] && "$hooks/$VMID.down" 27 | fi 28 | done < <(/usr/bin/inotifywait -mq -e create,delete --format "%f" "$watch") 29 | -------------------------------------------------------------------------------- /old-helpers/root/usr/lib/pve-helpers/resume-all-vms.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | resume_vm() { 4 | local VMID="$1" 5 | 6 | local VMSTATUS=$(qm status "$VMID") 7 | local VMCONFIG=$(qm config "$VMID") 8 | 9 | # We need to reset only when hostpci.*: 10 | if grep -q ^hostpci <(echo "$VMCONFIG"); then 11 | if [[ "$VMSTATUS" == "status: running" ]]; then 12 | echo "$VMID: Resetting as it has 'hostpci*:' devices..." 13 | qm reset "$VMID" 14 | return 1 15 | fi 16 | fi 17 | 18 | if [[ ! -e "/var/run/qemu-server/$VMID.suspended" ]]; then 19 | echo "$VMID: Nothing to due, due to missing: $VMID.suspended." 20 | return 0 21 | fi 22 | 23 | rm -f "/var/run/qemu-server/$VMID.suspended" 24 | 25 | if [[ "$VMSTATUS" == "status: stopped" ]]; then 26 | echo "$VMID: Starting (stopped)..." 27 | qm start "$VMID" 28 | fi 29 | 30 | echo "$VMID: Resuming..." 31 | qm resume "$VMID" 32 | 33 | for i in $(seq 1 30); do 34 | VMSTATUS=$(qm status "$VMID") 35 | if [[ "$VMSTATUS" == "status: running" ]]; then 36 | echo "$VMID: Resumed." 37 | return 0 38 | fi 39 | 40 | echo "$VMID: Waiting for resume: $VMSTATUS..." 41 | sleep 1s 42 | done 43 | 44 | echo "$VMID: Failed to resume: $VMSTATUS." 45 | qm reset "$VMID" 46 | return 1 47 | } 48 | 49 | for i in /etc/pve/nodes/$(hostname)/qemu-server/*.conf; do 50 | VMID=$(basename "$i" .conf) 51 | resume_vm "$VMID" & 52 | done 53 | 54 | wait 55 | -------------------------------------------------------------------------------- /old-helpers/root/usr/lib/pve-helpers/suspend-all-vms.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | suspend_vm_action() { 4 | local VMID="$1" 5 | local ACTION="$2" 6 | 7 | if ! qm guest cmd "$VMID" ping; then 8 | return 1 9 | fi 10 | 11 | echo "$VMID: Suspending ($ACTION)..." 12 | qm guest cmd "$VMID" "$ACTION" 13 | 14 | for i in $(seq 1 30); do 15 | local VMSTATUS=$(qm status "$VMID") 16 | if [[ "$VMSTATUS" == "status: suspended" ]] || [[ "$VMSTATUS" == "status: stopped" ]]; then 17 | echo "$VMID: Suspended." 18 | touch "/var/run/qemu-server/$VMID.suspended" 19 | return 0 20 | fi 21 | 22 | echo "$VMID: Waiting for suspend: $VMSTATUS..." 23 | sleep 1s 24 | done 25 | 26 | echo "$VMID: Failed to suspend: $VMSTATUS." 27 | return 1 28 | } 29 | 30 | suspend_vm() { 31 | local VMID="$1" 32 | 33 | local VMSTATUS=$(qm status "$VMID") 34 | local VMCONFIG=$(qm config "$VMID") 35 | 36 | if [[ "$VMSTATUS" != "status: running" ]]; then 37 | echo "$VMID: Nothing to due, due to: $VMSTATUS." 38 | return 0 39 | fi 40 | 41 | if ! grep -q ^hostpci <(echo "$VMCONFIG"); then 42 | echo "$VMID: VM does not use PCI-passthrough" 43 | return 0 44 | fi 45 | 46 | # if suspend_vm_action "$VMID" suspend-disk; then 47 | # return 0 48 | # fi 49 | 50 | # echo "$VMID: VM does not support suspend-disk via Guest Agent, using shutdown." 51 | 52 | if qm shutdown "$VMID"; then 53 | touch "/var/run/qemu-server/$VMID.suspended" 54 | return 0 55 | fi 56 | 57 | echo "$VMID: Failed to suspend or shutdown." 58 | return 1 59 | } 60 | 61 | for i in /etc/pve/nodes/$(hostname)/qemu-server/*.conf; do 62 | VMID=$(basename "$i" .conf) 63 | suspend_vm "$VMID" & 64 | done 65 | 66 | wait 67 | -------------------------------------------------------------------------------- /old-helpers/root/usr/sbin/pin-vcpus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -eo pipefail 4 | 5 | if [[ $# -ne 1 ]]; then 6 | echo "Usage: $0 " 7 | exit 1 8 | fi 9 | 10 | VMID="$1" 11 | 12 | if ! VMCONFIG=$(qm config "$VMID"); then 13 | echo "$VMID: Does not exist." 14 | exit 1 15 | fi 16 | 17 | if ! grep -q CPUPIN <(echo "$VMCONFIG"); then 18 | echo "$VMID: Does not have CPUPIN defined." 19 | exit 1 20 | fi 21 | 22 | vm_cpu_tasks() { 23 | expect <" 26 | send "info cpus\r" 27 | expect ">" 28 | EOF 29 | } 30 | 31 | # this functions returns a list of CPU cores 32 | # in order as they have HT threads 33 | # mapping Intel cpus to Qemu emulated cpus 34 | cores() { 35 | # tail -n+2: ignore header 36 | # sort -n -k4: sort by core-index vs threads 37 | # ignore core-0: assuming that it is assigned to host with isolcpus 38 | while read CPU NODE SOCKET CORE REST; do 39 | if [[ "$CORE" == "0" ]]; then 40 | # We assume that $CORE is assigned to host (always) 41 | continue 42 | fi 43 | 44 | echo "$CPU" 45 | done < <(lscpu -e | tail -n+2 | sort -n -k4) 46 | } 47 | 48 | echo "$VMID: Checking..." 49 | 50 | for i in $(seq 1 10); do 51 | VMSTATUS=$(qm status $VMID) 52 | if [[ "$VMSTATUS" != "status: running" ]]; then 53 | echo "$VMID: VM is not running: $VMSTATUS" 54 | exit 1 55 | fi 56 | 57 | VCPUS=($(vm_cpu_tasks)) 58 | VCPU_COUNT="${#VCPUS[@]}" 59 | 60 | if [[ $VCPU_COUNT -gt 0 ]]; then 61 | break 62 | fi 63 | 64 | echo "* No VCPUS for $VMID" 65 | sleep 3s 66 | done 67 | 68 | if [[ $VCPU_COUNT -eq 0 ]]; then 69 | exit 1 70 | fi 71 | 72 | echo "$VMID: Detected VCPU ${#VCPUS[@]} threads..." 73 | 74 | for CPU_INDEX in "${!VCPUS[@]}"; do 75 | CPU_TASK="${VCPUS[$CPU_INDEX]}" 76 | if read CPU_INDEX; then 77 | echo "$VMID: Assigning $CPU_INDEX to $CPU_TASK..." 78 | taskset -pc "$CPU_INDEX" "$CPU_TASK" 79 | else 80 | echo "$VMID: No CPU to assign to $CPU_TASK" 81 | fi 82 | done < <(cores) 83 | -------------------------------------------------------------------------------- /old-helpers/scripts/pve-qemu-hooks.service: -------------------------------------------------------------------------------- 1 | [Unit] 2 | Description = PVE Qemu Server Hooks 3 | 4 | [Service] 5 | Type = simple 6 | ExecStart = /usr/lib/pve-helpers/qemu-server-hooks.sh 7 | 8 | [Install] 9 | WantedBy = multi-user.target 10 | -------------------------------------------------------------------------------- /root/etc/systemd/system/pve-guests.service.d/manual-start.conf: -------------------------------------------------------------------------------- 1 | [Unit] 2 | RefuseManualStart=false 3 | RefuseManualStop=false 4 | 5 | -------------------------------------------------------------------------------- /root/lib/systemd/system-sleep/restart-vms: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | if [[ "$1" == "pre" ]]; then 4 | /bin/systemctl stop pve-guests.service 5 | elif [[ "$1" == "post" ]]; then 6 | /bin/systemctl start pve-guests.service 7 | else 8 | echo "invalid: $@" 9 | exit 1 10 | fi 11 | -------------------------------------------------------------------------------- /root/var/lib/vz/snippets/exec-cmds: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | VMID="$1" 4 | ACTION="$2" 5 | SLEPT="" 6 | 7 | vmpid() { 8 | cat "/var/run/qemu-server/$VMID.pid" 9 | } 10 | 11 | if_action() { 12 | if [[ "$ACTION" == "$1" ]]; then 13 | shift 14 | eval "$@" 15 | fi 16 | } 17 | 18 | sleep_once() { 19 | if [[ -z "$SLEPT" ]]; then 20 | sleep 1s 21 | SLEPT=1 22 | fi 23 | } 24 | 25 | hostpci_ids() { 26 | grep '^hostpci[0-9]:.*0000' "/etc/pve/qemu-server/$VMID.conf" | awk '{print $2}' | awk -F, '{print $1}' 27 | } 28 | 29 | exec_pci_rescan() { 30 | echo "Running PCI rescan for $VMID..." 31 | echo 1 > /sys/bus/pci/rescan 32 | } 33 | 34 | exec_set_haltpoll() { 35 | echo "Setting haltpoll for $VMID..." 36 | echo $1 > /sys/module/kvm/parameters/halt_poll_ns 37 | } 38 | 39 | exec_assign_interrupts() { 40 | local SLEEP="30s" 41 | if [[ $1 == --sleep=* ]]; then 42 | SLEEP="${1#--sleep=}" 43 | shift 44 | fi 45 | 46 | echo "Wating $SLEEP seconds for all vfio-gpu interrupts to show up..." 47 | sleep "$SLEEP" 48 | 49 | MASK="$1" 50 | shift 51 | 52 | if [[ "$1" == "--all" ]]; then 53 | set -- $(hostpci_ids) 54 | fi 55 | 56 | for interrupt; do 57 | interrupt=$(printf '%b' "${interrupt//%/\\x}") 58 | echo "Moving $interrupt interrupts to $MASK cpu cores $VMID..." 59 | grep "$interrupt" /proc/interrupts | cut -d ":" -f 1 | while read -r i; do 60 | echo "- IRQ: $(grep "^\s*$i:" /proc/interrupts)" 61 | echo "$MASK" > /proc/irq/$i/smp_affinity_list 62 | done 63 | done 64 | } 65 | 66 | exec_pci_unbind() { 67 | if [[ "$1" == "--all" ]]; then 68 | set -- $(hostpci_ids) 69 | else 70 | set -- "0000:$1:$2.$3" 71 | fi 72 | 73 | for devid; do 74 | if [[ -e "/sys/bus/pci/devices/$devid" ]]; then 75 | echo "Running PCI unbind of '$devid' for $VMID..." 76 | echo 1 > "/sys/bus/pci/devices/$devid/remove" 77 | elif [[ -e "/sys/bus/pci/devices/$devid.0" ]]; then 78 | echo "Running PCI unbind of '$devid.0' for $VMID..." 79 | echo 1 > "/sys/bus/pci/devices/$devid.0/remove" 80 | else 81 | echo "The '$devid' not found in '/sys/bus/pci/devices'" 82 | fi 83 | done 84 | } 85 | 86 | exec_cpu_taskset() { 87 | sleep_once 88 | 89 | echo "Running taskset with $1 for $(vmpid)..." 90 | taskset -a -p -c "$1" "$(vmpid)" 91 | echo "" 92 | } 93 | 94 | exec_cpu_chrt() { 95 | sleep_once 96 | 97 | echo "Running chrt with $1:$2 for $(vmpid)..." 98 | chrt -v "--$1" -a -p "$2" "$(vmpid)" 99 | echo "" 100 | } 101 | 102 | exec_qm_conflict() { 103 | echo "Conflicting with other VM$1, shutdown just in case..." 104 | qm shutdown "$1" 105 | } 106 | 107 | exec_qm_depends() { 108 | echo "VM$1 is required, ensure that it is started..." 109 | qm start "$1" 110 | } 111 | 112 | exec_cmds() { 113 | while read CMD ARG1 ARG2 ARG3 REST; do 114 | case "$CMD" in 115 | "#pci_rescan") 116 | if_action pre-start exec_pci_rescan 117 | ;; 118 | 119 | "#cpu_taskset") 120 | if_action post-start exec_cpu_taskset "$ARG1" 121 | ;; 122 | 123 | "#set_halt_poll") 124 | if_action post-start exec_set_haltpoll "$ARG1" 125 | ;; 126 | 127 | "#assign_interrupts") 128 | if_action post-start exec_assign_interrupts "$ARG1" "$ARG2" "$ARG3" $REST 129 | ;; 130 | 131 | "#cpu_chrt") 132 | if_action post-start exec_cpu_chrt "${ARG1:-fifo}" "${ARG2:-1}" 133 | ;; 134 | 135 | "#qm_depends") 136 | if_action post-start exec_qm_depends "$ARG1" 137 | ;; 138 | 139 | "#pci_unbind") 140 | if_action post-stop exec_pci_unbind "$ARG1" "$ARG2" "$ARG3" 141 | ;; 142 | 143 | "#pci_unbind_all") 144 | if_action post-stop exec_pci_unbind_all 145 | ;; 146 | 147 | "#pci_rebind") 148 | if_action post-stop exec_pci_rescan 149 | ;; 150 | 151 | "#qm_conflict") 152 | if_action pre-start exec_qm_conflict "$ARG1" 153 | ;; 154 | 155 | "#qm_*"|"#cpu_*"|"#pci_*"|"#set_*"|"#assign_*") 156 | echo "exec-cmds: command is unknown '$CMD'" 157 | ;; 158 | esac 159 | done 160 | } 161 | 162 | echo "Running exec-cmds for $VMID on $ACTION..." 163 | 164 | exec_cmds < "/etc/pve/qemu-server/$VMID.conf" 165 | 166 | exit 0 167 | --------------------------------------------------------------------------------